mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-07-02 18:49:36 +03:00
04fc0d47c3
In PR #4324, we lifted the requirement to have a native or polyfilled TextDecoder implementation. However, we forgot to remove the check for it in isBrowserSupported(). This led to tests being skipped entirely on Xbox, as Xbox was determined to be an unsupported platform by Player. To fix this, the check for TextDecoder/TextEncode in isBrowserSupported() has been removed. When the TextDecoder polyfill was removed, we left a reference to it in karma.conf.js. This didn't hurt anything per se, but this has now been cleaned up. Finally, TextDecoder was originally introduced to give us a way to recover from errors instead of throwing. The fallback that was reintroduced in #4324 was the original code that throws on error. This led to a test failure on Xbox, which represents a complete subtitle failure in real content with an encoding issue. To fix this, we replace the utf-8 decoding fallback based on decodeURIComponent with a plain JS implementation. This adds only 477 bytes to Shaka Player, which is pretty good compared to the 2315 byte polyfill we used to recommend for this. To better verify these text decoding features, a test that checked two things has been split into two, comments around the tests have been improved, and an additional test case has been added.
144 lines
5.4 KiB
JavaScript
144 lines
5.4 KiB
JavaScript
/*! @license
|
|
* Shaka Player
|
|
* Copyright 2016 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
describe('StringUtils', () => {
|
|
const StringUtils = shaka.util.StringUtils;
|
|
|
|
it('parses fromUTF8', () => {
|
|
// This is 4 Unicode characters, the last will be split into a surrogate
|
|
// pair.
|
|
const arr = [0x46, 0xe2, 0x82, 0xac, 0x20, 0xf0, 0x90, 0x8d, 0x88];
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
|
|
.toBe('F\u20ac \ud800\udf48');
|
|
});
|
|
|
|
it('won\'t break if given cut-off UTF8 character', () => {
|
|
const arr1 = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3, 0xa9];
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr1)))
|
|
.toBe('San Jos\u00E9');
|
|
|
|
// This array contains the first half of a 2-byte UTF8 character
|
|
// (0xc3 0xa9 = é). The half-character is stranded at the very end of the
|
|
// string.
|
|
const arr = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3];
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
|
|
.toBe('San Jos\uFFFD');
|
|
});
|
|
|
|
it('won\'t break if given an invalid UTF-8 sequence', () => {
|
|
// 0xe9 0x33 0x33 is an invalid UTF-8 sequence.
|
|
const arr = [0x4a, 0x6f, 0x73, 0xE9, 0x33, 0x33, 0x20, 0x53, 0x61, 0x6e];
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
|
|
.toBe('Jos\uFFFD33 San');
|
|
});
|
|
|
|
it('can handle an 8-byte character', () => {
|
|
// This is the UTF-8 encoding of the US flag emoji.
|
|
// It decodes into two Unicode codepoints, which becomes 4 JavaScript
|
|
// UTF-16 characters.
|
|
const arr = [0xf0, 0x9f, 0x87, 0xba, 0xf0, 0x9f, 0x87, 0xb8];
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
|
|
.toBe('\uD83C\uDDFA\uD83C\uDDF8');
|
|
});
|
|
|
|
it('strips the BOM in fromUTF8', () => {
|
|
// This is 4 Unicode characters, the last will be split into a surrogate
|
|
// pair.
|
|
const arr = [0xef, 0xbb, 0xbf, 0x74, 0x65, 0x78, 0x74];
|
|
const ContentType = shaka.util.ManifestParserUtils.ContentType;
|
|
expect(StringUtils.fromUTF8(new Uint8Array(arr))).toBe(ContentType.TEXT);
|
|
});
|
|
|
|
it('parses fromUTF16 big-endian', () => {
|
|
// This is big-endian pairs of 16-bit numbers. This translates into 3
|
|
// Unicode characters where the last is split into a surrogate pair.
|
|
const arr = [0x00, 0x46, 0x38, 0x01, 0xd8, 0x01, 0xdc, 0x37];
|
|
expect(StringUtils.fromUTF16(new Uint8Array(arr), false))
|
|
.toBe('F\u3801\ud801\udc37');
|
|
});
|
|
|
|
it('parses fromUTF16 little-endian', () => {
|
|
// This is little-endian pairs of 16-bit numbers. This translates into 3
|
|
// Unicode characters where the last is split into a surrogate pair.
|
|
const arr = [0x46, 0x00, 0x01, 0x38, 0x01, 0xd8, 0x37, 0xdc];
|
|
expect(StringUtils.fromUTF16(new Uint8Array(arr), true))
|
|
.toBe('F\u3801\ud801\udc37');
|
|
});
|
|
|
|
describe('fromBytesAutoDetect', () => {
|
|
it('detects UTF-8 BOM', () => {
|
|
const arr = [0xef, 0xbb, 0xbf, 0x46, 0x6f, 0x6f];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('detects UTF-16 BE BOM', () => {
|
|
const arr = [0xfe, 0xff, 0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('detects UTF-16 LE BOM', () => {
|
|
const arr = [0xff, 0xfe, 0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('guesses UTF-8', () => {
|
|
const arr = [0x46, 0x6f, 0x6f];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('guesses UTF-16 BE', () => {
|
|
const arr = [0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('guesses UTF-16 LE', () => {
|
|
const arr = [0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00];
|
|
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
|
|
});
|
|
|
|
it('fails if unable to guess', () => {
|
|
const expected = shaka.test.Util.jasmineError(new shaka.util.Error(
|
|
shaka.util.Error.Severity.CRITICAL,
|
|
shaka.util.Error.Category.TEXT,
|
|
shaka.util.Error.Code.UNABLE_TO_DETECT_ENCODING));
|
|
const arr = [0x01, 0x02, 0x03, 0x04];
|
|
expect(() => StringUtils.fromBytesAutoDetect(new Uint8Array(arr)))
|
|
.toThrow(expected);
|
|
});
|
|
});
|
|
|
|
it('converts toUTF8', () => {
|
|
const str = 'Xe\u4524\u1952';
|
|
const arr = [0x58, 0x65, 0xe4, 0x94, 0xa4, 0xe1, 0xa5, 0x92];
|
|
const buffer = StringUtils.toUTF8(str);
|
|
expect(shaka.util.BufferUtils.toUint8(buffer))
|
|
.toEqual(new Uint8Array(arr));
|
|
});
|
|
|
|
it('converts toUTF16-LE', () => {
|
|
const str = 'Xe\u4524\u1952';
|
|
const arr = [0x58, 0, 0x65, 0, 0x24, 0x45, 0x52, 0x19];
|
|
const buffer = StringUtils.toUTF16(str, /* littleEndian= */ true);
|
|
expect(shaka.util.BufferUtils.toUint8(buffer))
|
|
.toEqual(new Uint8Array(arr));
|
|
});
|
|
|
|
it('converts toUTF16-BE', () => {
|
|
const str = 'Xe\u4524\u1952';
|
|
const arr = [0, 0x58, 0, 0x65, 0x45, 0x24, 0x19, 0x52];
|
|
const buffer = StringUtils.toUTF16(str, /* littleEndian= */ false);
|
|
expect(shaka.util.BufferUtils.toUint8(buffer))
|
|
.toEqual(new Uint8Array(arr));
|
|
});
|
|
|
|
it('does not cause stack overflow, #335', () => {
|
|
const buffer = new Uint8Array(8e5); // Well above arg count limit.
|
|
expect(StringUtils.fromUTF8(buffer).length).toBe(buffer.byteLength);
|
|
expect(StringUtils.fromUTF16(buffer, true).length)
|
|
.toBe(buffer.byteLength / 2);
|
|
});
|
|
});
|