Files
shaka-player/test/text/mp4_ttml_parser_unit.js
T
Juliane Holzt 2562384055 fix(TTML): Correctly handle multiple samples in a segment (#8088)
Fixes https://github.com/shaka-project/shaka-player/issues/8087

Implements handling of multiple samples in a MP4/ISOBMFF/DASH TTML
segment/fragment. Such segments are allowed by ISO14496-12 and
ISO23000-19. gpac creates such segments. The prior code just treated the
full MDAT as one TTML XML document and tried to parse it in whole
without accounting for sample(s). A testcase is included which was
created by taking the testdata from ttml-segment.mp4 and splitting the
subtitles into two independent TTML-XML documents, which then were put
as individual samples.

The testdata for the prior existing multiple MDAT testcase was invalid.
It was created by taking the same ttml-segment.mp4 as a source and just
duplicating the MDAT box, but without then also fixing the TRUN box. The
duplicated data was thus not referenced. The test case still worked,
because the prior code did not look at the TRUN box and the sample
specification at all and just handled any full MDAT box = 1 sample. The
testdata was replaced with a new file, which is basically the same as
for the multiple samples case, but with the two samples split into two
MDAT boxes.
2025-02-17 12:39:10 +01:00

221 lines
6.9 KiB
JavaScript

/*! @license
* Shaka Player
* Copyright 2016 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
describe('Mp4TtmlParser', () => {
const ttmlInitSegmentUri = '/base/test/test/assets/ttml-init.mp4';
const ttmlSegmentUri = '/base/test/test/assets/ttml-segment.mp4';
const ttmlSegmentMultipleMDATUri =
'/base/test/test/assets/ttml-segment-multiple-mdat.mp4';
const ttmlSegmentMultipleSampleUri =
'/base/test/test/assets/ttml-segment-multiple-sample.mp4';
const imscImageInitSegmentUri =
'/base/test/test/assets/imsc-image-init.cmft';
const imscImageSegmentUri =
'/base/test/test/assets/imsc-image-segment.cmft';
const audioInitSegmentUri = '/base/test/test/assets/sintel-audio-init.mp4';
/** @type {!Uint8Array} */
let ttmlInitSegment;
/** @type {!Uint8Array} */
let ttmlSegment;
/** @type {!Uint8Array} */
let ttmlSegmentMultipleMDAT;
/** @type {!Uint8Array} */
let ttmlSegmentMultipleSample;
/** @type {!Uint8Array} */
let imscImageInitSegment;
/** @type {!Uint8Array} */
let imscImageSegment;
/** @type {!Uint8Array} */
let audioInitSegment;
beforeAll(async () => {
const responses = await Promise.all([
shaka.test.Util.fetch(ttmlInitSegmentUri),
shaka.test.Util.fetch(ttmlSegmentUri),
shaka.test.Util.fetch(ttmlSegmentMultipleMDATUri),
shaka.test.Util.fetch(ttmlSegmentMultipleSampleUri),
shaka.test.Util.fetch(imscImageInitSegmentUri),
shaka.test.Util.fetch(imscImageSegmentUri),
shaka.test.Util.fetch(audioInitSegmentUri),
]);
ttmlInitSegment = shaka.util.BufferUtils.toUint8(responses[0]);
ttmlSegment = shaka.util.BufferUtils.toUint8(responses[1]);
ttmlSegmentMultipleMDAT = shaka.util.BufferUtils.toUint8(responses[2]);
ttmlSegmentMultipleSample = shaka.util.BufferUtils.toUint8(responses[3]);
imscImageInitSegment = shaka.util.BufferUtils.toUint8(responses[4]);
imscImageSegment = shaka.util.BufferUtils.toUint8(responses[5]);
audioInitSegment = shaka.util.BufferUtils.toUint8(responses[6]);
});
it('parses init segment', () => {
new shaka.text.Mp4TtmlParser().parseInit(ttmlInitSegment);
});
it('handles media segments with multiple mdats', () => {
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(ttmlInitSegment);
const time =
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
const ret = parser.parseMedia(ttmlSegmentMultipleMDAT, time, null);
// Bodies.
expect(ret.length).toBe(2);
// Divs.
expect(ret[0].nestedCues.length).toBe(1);
expect(ret[1].nestedCues.length).toBe(1);
// Cues.
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
});
it('handles media segments with multiple sample', () => {
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(ttmlInitSegment);
const time =
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
const ret = parser.parseMedia(ttmlSegmentMultipleSample, time, null);
// Bodies.
expect(ret.length).toBe(2);
// Divs.
expect(ret[0].nestedCues.length).toBe(1);
expect(ret[1].nestedCues.length).toBe(1);
// Cues.
expect(ret[0].nestedCues[0].nestedCues.length).toBe(5);
expect(ret[1].nestedCues[0].nestedCues.length).toBe(5);
});
it('accounts for offset', () => {
const time1 =
{periodStart: 0, segmentStart: 0, segmentEnd: 70, vttOffset: 0};
const time2 =
{periodStart: 7, segmentStart: 0, segmentEnd: 70, vttOffset: 7};
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(ttmlInitSegment);
const ret1 = parser.parseMedia(ttmlSegment, time1, null);
expect(ret1.length).toBeGreaterThan(0);
const ret2 = parser.parseMedia(ttmlSegment, time2, null);
expect(ret2.length).toBeGreaterThan(0);
expect(ret2[0].startTime).toBe(ret1[0].startTime + 7);
expect(ret2[0].endTime).toBe(ret1[0].endTime + 7);
});
it('rejects init segment with no ttml', () => {
const error = shaka.test.Util.jasmineError(new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_MP4_TTML));
expect(() => new shaka.text.Mp4TtmlParser().parseInit(audioInitSegment))
.toThrow(error);
});
it('parses media segment', () => {
const cues = [
{
startTime: 23,
endTime: 24.5,
payload: 'You\'re a jerk, Thom.',
},
{
startTime: 25,
endTime: 27,
payload: 'Look Celia, we have to follow our passions;',
},
{
startTime: 27,
endTime: 30.5,
nestedCues: [{
payload: '...you have your robotics, and I',
}, {
lineBreak: true,
}, {
payload: 'just want to be awesome in space.',
}],
},
{
startTime: 30.8,
endTime: 34,
nestedCues: [{
payload: 'Why don\'t you just admit that',
}, {
lineBreak: true,
}, {
payload: 'you\'re freaked out by my robot hand?',
}],
},
{
startTime: 34.5,
endTime: 36,
payload: 'I\'m not freaked out by- it\'s...',
},
{
startTime: 37,
endTime: 38,
payload: '...alright! Fine!',
},
{
startTime: 38,
endTime: 41,
nestedCues: [{
payload: 'I\'m freaked out! I have nightmares',
}, {
lineBreak: true,
}, {
payload: 'that I\'m being chased...',
}],
},
{
startTime: 41,
endTime: 42,
payload: '...by these giant robotic claws of death...',
},
{
startTime: 42.2,
endTime: 45,
nestedCues: [{
// cspell:disable-next-line
payload: '"Fourty years later"',
}, {
lineBreak: true,
}, {
payload: 'Whatever, Thom. We\'re done.',
}],
},
{
startTime: 50,
endTime: 53.5,
payload: 'Robot\'s memory synced and locked!',
},
];
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(ttmlInitSegment);
const time =
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
const result = parser.parseMedia(ttmlSegment, time, null);
shaka.test.TtmlUtils.verifyHelper(
cues, result, {startTime: 23, endTime: 53.5});
});
it('handles IMSC1 (CMAF) image subtitle', () => {
const parser = new shaka.text.Mp4TtmlParser();
parser.parseInit(imscImageInitSegment);
const time =
{periodStart: 0, segmentStart: 0, segmentEnd: 60, vttOffset: 0};
const ret = parser.parseMedia(imscImageSegment, time, null);
// Bodies.
expect(ret.length).toBe(1);
// Divs.
expect(ret[0].nestedCues.length).toBe(1);
// Cues.
expect(ret[0].nestedCues[0].backgroundImage).toBeDefined();
});
});