mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-16 16:16:40 +03:00
Fix encoding issues with Chinese subs
In many places we tried to guess the encoding of a piece of text. This guess fails for Chinese UTF-8 text, and probably text in many other languages. However, DASH manifests, TTML files, WebVTT files, and VTTC box payloads are all specified to be in UTF-8. Rather than guess and possibly fail, treat all text in these contexts as UTF-8. Change-Id: I00c652a9f1dd20855e94abfac84275e41dd9e266
This commit is contained in:
@@ -36,8 +36,15 @@ goog.require('shaka.util.Error');
|
||||
*/
|
||||
shaka.util.StringUtils.fromUTF8 = function(data) {
|
||||
if (!data) return '';
|
||||
|
||||
var uint8 = new Uint8Array(data);
|
||||
// If present, strip off the UTF-8 BOM.
|
||||
if (uint8[0] == 0xef && uint8[1] == 0xbb && uint8[2] == 0xbf) {
|
||||
uint8 = uint8.subarray(3);
|
||||
}
|
||||
|
||||
// http://stackoverflow.com/a/13691499
|
||||
var utf8 = shaka.util.StringUtils.fromCharCode_(new Uint8Array(data));
|
||||
var utf8 = shaka.util.StringUtils.fromCharCode_(uint8);
|
||||
// This converts each character in the string to an escape sequence. If the
|
||||
// character is in the ASCII range, it is not converted; otherwise it is
|
||||
// converted to a URI escape sequence.
|
||||
@@ -109,7 +116,7 @@ shaka.util.StringUtils.fromBytesAutoDetect = function(data) {
|
||||
|
||||
var uint8 = new Uint8Array(data);
|
||||
if (uint8[0] == 0xef && uint8[1] == 0xbb && uint8[2] == 0xbf)
|
||||
return StringUtils.fromUTF8(uint8.subarray(3));
|
||||
return StringUtils.fromUTF8(uint8);
|
||||
else if (uint8[0] == 0xfe && uint8[1] == 0xff)
|
||||
return StringUtils.fromUTF16(uint8.subarray(2), false /* littleEndian */);
|
||||
else if (uint8[0] == 0xff && uint8[1] == 0xfe)
|
||||
|
||||
Reference in New Issue
Block a user