mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-15 16:06:41 +03:00
Fix encoding issues with Chinese subs
In many places we tried to guess the encoding of a piece of text. This guess fails for Chinese UTF-8 text, and probably text in many other languages. However, DASH manifests, TTML files, WebVTT files, and VTTC box payloads are all specified to be in UTF-8. Rather than guess and possibly fail, treat all text in these contexts as UTF-8. Change-Id: I00c652a9f1dd20855e94abfac84275e41dd9e266
This commit is contained in:
@@ -30,6 +30,14 @@ describe('StringUtils', function() {
|
||||
expect(StringUtils.fromUTF8(buffer)).toBe('F\u20ac \ud800\udf48');
|
||||
});
|
||||
|
||||
it('strips the BOM in fromUTF8', function() {
|
||||
// This is 4 Unicode characters, the last will be split into a surrogate
|
||||
// pair.
|
||||
var arr = [0xef, 0xbb, 0xbf, 0x74, 0x65, 0x78, 0x74];
|
||||
var buffer = new Uint8Array(arr).buffer;
|
||||
expect(StringUtils.fromUTF8(buffer)).toBe('text');
|
||||
});
|
||||
|
||||
it('parses fromUTF16 big-endian', function() {
|
||||
// This is big-endian pairs of 16-bit numbers. This translates into 3
|
||||
// Unicode characters where the last is split into a surrogate pair.
|
||||
|
||||
Reference in New Issue
Block a user