Files
shaka-player/test/util/string_utils_unit.js
T
Joey Parrish 04fc0d47c3 fix: Fix TextDecoder fallback and browser support check (#4403)
In PR #4324, we lifted the requirement to have a native or polyfilled
TextDecoder implementation.  However, we forgot to remove the check
for it in isBrowserSupported().  This led to tests being skipped
entirely on Xbox, as Xbox was determined to be an unsupported platform
by Player.

To fix this, the check for TextDecoder/TextEncode in
isBrowserSupported() has been removed.

When the TextDecoder polyfill was removed, we left a reference to it
in karma.conf.js.  This didn't hurt anything per se, but this has now
been cleaned up.

Finally, TextDecoder was originally introduced to give us a way to
recover from errors instead of throwing.  The fallback that was
reintroduced in #4324 was the original code that throws on error.
This led to a test failure on Xbox, which represents a complete
subtitle failure in real content with an encoding issue.

To fix this, we replace the utf-8 decoding fallback based on
decodeURIComponent with a plain JS implementation.  This adds only 477
bytes to Shaka Player, which is pretty good compared to the 2315 byte
polyfill we used to recommend for this.

To better verify these text decoding features, a test that checked two
things has been split into two, comments around the tests have been
improved, and an additional test case has been added.
2022-08-12 10:48:14 -07:00

144 lines
5.4 KiB
JavaScript

/*! @license
* Shaka Player
* Copyright 2016 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
describe('StringUtils', () => {
const StringUtils = shaka.util.StringUtils;
it('parses fromUTF8', () => {
// This is 4 Unicode characters, the last will be split into a surrogate
// pair.
const arr = [0x46, 0xe2, 0x82, 0xac, 0x20, 0xf0, 0x90, 0x8d, 0x88];
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
.toBe('F\u20ac \ud800\udf48');
});
it('won\'t break if given cut-off UTF8 character', () => {
const arr1 = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3, 0xa9];
expect(StringUtils.fromUTF8(new Uint8Array(arr1)))
.toBe('San Jos\u00E9');
// This array contains the first half of a 2-byte UTF8 character
// (0xc3 0xa9 = é). The half-character is stranded at the very end of the
// string.
const arr = [0x53, 0x61, 0x6e, 0x20, 0x4a, 0x6f, 0x73, 0xc3];
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
.toBe('San Jos\uFFFD');
});
it('won\'t break if given an invalid UTF-8 sequence', () => {
// 0xe9 0x33 0x33 is an invalid UTF-8 sequence.
const arr = [0x4a, 0x6f, 0x73, 0xE9, 0x33, 0x33, 0x20, 0x53, 0x61, 0x6e];
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
.toBe('Jos\uFFFD33 San');
});
it('can handle an 8-byte character', () => {
// This is the UTF-8 encoding of the US flag emoji.
// It decodes into two Unicode codepoints, which becomes 4 JavaScript
// UTF-16 characters.
const arr = [0xf0, 0x9f, 0x87, 0xba, 0xf0, 0x9f, 0x87, 0xb8];
expect(StringUtils.fromUTF8(new Uint8Array(arr)))
.toBe('\uD83C\uDDFA\uD83C\uDDF8');
});
it('strips the BOM in fromUTF8', () => {
// This is 4 Unicode characters, the last will be split into a surrogate
// pair.
const arr = [0xef, 0xbb, 0xbf, 0x74, 0x65, 0x78, 0x74];
const ContentType = shaka.util.ManifestParserUtils.ContentType;
expect(StringUtils.fromUTF8(new Uint8Array(arr))).toBe(ContentType.TEXT);
});
it('parses fromUTF16 big-endian', () => {
// This is big-endian pairs of 16-bit numbers. This translates into 3
// Unicode characters where the last is split into a surrogate pair.
const arr = [0x00, 0x46, 0x38, 0x01, 0xd8, 0x01, 0xdc, 0x37];
expect(StringUtils.fromUTF16(new Uint8Array(arr), false))
.toBe('F\u3801\ud801\udc37');
});
it('parses fromUTF16 little-endian', () => {
// This is little-endian pairs of 16-bit numbers. This translates into 3
// Unicode characters where the last is split into a surrogate pair.
const arr = [0x46, 0x00, 0x01, 0x38, 0x01, 0xd8, 0x37, 0xdc];
expect(StringUtils.fromUTF16(new Uint8Array(arr), true))
.toBe('F\u3801\ud801\udc37');
});
describe('fromBytesAutoDetect', () => {
it('detects UTF-8 BOM', () => {
const arr = [0xef, 0xbb, 0xbf, 0x46, 0x6f, 0x6f];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('detects UTF-16 BE BOM', () => {
const arr = [0xfe, 0xff, 0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('detects UTF-16 LE BOM', () => {
const arr = [0xff, 0xfe, 0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('guesses UTF-8', () => {
const arr = [0x46, 0x6f, 0x6f];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('guesses UTF-16 BE', () => {
const arr = [0x00, 0x46, 0x00, 0x6f, 0x00, 0x6f];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('guesses UTF-16 LE', () => {
const arr = [0x46, 0x00, 0x6f, 0x00, 0x6f, 0x00];
expect(StringUtils.fromBytesAutoDetect(new Uint8Array(arr))).toBe('Foo');
});
it('fails if unable to guess', () => {
const expected = shaka.test.Util.jasmineError(new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.UNABLE_TO_DETECT_ENCODING));
const arr = [0x01, 0x02, 0x03, 0x04];
expect(() => StringUtils.fromBytesAutoDetect(new Uint8Array(arr)))
.toThrow(expected);
});
});
it('converts toUTF8', () => {
const str = 'Xe\u4524\u1952';
const arr = [0x58, 0x65, 0xe4, 0x94, 0xa4, 0xe1, 0xa5, 0x92];
const buffer = StringUtils.toUTF8(str);
expect(shaka.util.BufferUtils.toUint8(buffer))
.toEqual(new Uint8Array(arr));
});
it('converts toUTF16-LE', () => {
const str = 'Xe\u4524\u1952';
const arr = [0x58, 0, 0x65, 0, 0x24, 0x45, 0x52, 0x19];
const buffer = StringUtils.toUTF16(str, /* littleEndian= */ true);
expect(shaka.util.BufferUtils.toUint8(buffer))
.toEqual(new Uint8Array(arr));
});
it('converts toUTF16-BE', () => {
const str = 'Xe\u4524\u1952';
const arr = [0, 0x58, 0, 0x65, 0x45, 0x24, 0x19, 0x52];
const buffer = StringUtils.toUTF16(str, /* littleEndian= */ false);
expect(shaka.util.BufferUtils.toUint8(buffer))
.toEqual(new Uint8Array(arr));
});
it('does not cause stack overflow, #335', () => {
const buffer = new Uint8Array(8e5); // Well above arg count limit.
expect(StringUtils.fromUTF8(buffer).length).toBe(buffer.byteLength);
expect(StringUtils.fromUTF16(buffer, true).length)
.toBe(buffer.byteLength / 2);
});
});