mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-26 17:46:26 +03:00
849bff1db3
Make cloning buffers (or not) explicit in readBytes. When we use a range of bytes temporarily for further parsing, we pass clone=false and get a view on the existing memory buffer. When we want to store the range of bytes, we pass clone=true and avoid holding a reference to an entire segment in memory. The call for the EMSG parser in MediaSourceEngine had an explicit clone, but now uses the new clone parameter. This is not a functional change, though. The only readBytes call that changed in this audit was in the UI seek bar. The rest all appear to be values for temporary usage, and so are not being cloned. The new `clone` parameter will require future callers of `readBytes()` to think about their purpose and make a choice.
447 lines
15 KiB
JavaScript
447 lines
15 KiB
JavaScript
/*! @license
|
|
* Shaka Player
|
|
* Copyright 2016 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
goog.provide('shaka.cea.Mp4CeaParser');
|
|
|
|
goog.require('goog.asserts');
|
|
goog.require('shaka.cea.CeaUtils');
|
|
goog.require('shaka.cea.SeiProcessor');
|
|
goog.require('shaka.log');
|
|
goog.require('shaka.media.ClosedCaptionParser');
|
|
goog.require('shaka.util.DataViewReader');
|
|
goog.require('shaka.util.Error');
|
|
goog.require('shaka.util.Mp4Parser');
|
|
goog.require('shaka.util.Mp4BoxParsers');
|
|
|
|
/**
|
|
* MPEG4 stream parser used for extracting 708 closed captions data.
|
|
* @implements {shaka.extern.ICeaParser}
|
|
* @export
|
|
*/
|
|
shaka.cea.Mp4CeaParser = class {
|
|
constructor() {
|
|
/**
|
|
* SEI data processor.
|
|
* @private
|
|
* @const {!shaka.cea.SeiProcessor}
|
|
*/
|
|
this.seiProcessor_ = new shaka.cea.SeiProcessor();
|
|
|
|
/**
|
|
* Map of track id to corresponding timescale.
|
|
* @private {!Map<number, number>}
|
|
*/
|
|
this.trackIdToTimescale_ = new Map();
|
|
|
|
/**
|
|
* Default sample duration, as specified by the TREX box.
|
|
* @private {number}
|
|
*/
|
|
this.defaultSampleDuration_ = 0;
|
|
|
|
/**
|
|
* Default sample size, as specified by the TREX box.
|
|
* @private {number}
|
|
*/
|
|
this.defaultSampleSize_ = 0;
|
|
|
|
/**
|
|
* @private {shaka.cea.Mp4CeaParser.BitstreamFormat}
|
|
*/
|
|
this.bitstreamFormat_ = shaka.cea.Mp4CeaParser.BitstreamFormat.UNKNOWN;
|
|
}
|
|
|
|
/**
|
|
* Parses the init segment. Gets Default Sample Duration and Size from the
|
|
* TREX box, and constructs a map of Track IDs to timescales. Each TRAK box
|
|
* contains a track header (TKHD) containing track ID, and a media header box
|
|
* (MDHD) containing the timescale for the track
|
|
* @override
|
|
*/
|
|
init(initSegment) {
|
|
const Mp4Parser = shaka.util.Mp4Parser;
|
|
const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
|
|
const trackIds = [];
|
|
const timescales = [];
|
|
|
|
const codecBoxParser = (box) => this.setBitstreamFormat_(box.name);
|
|
|
|
new Mp4Parser()
|
|
.box('moov', Mp4Parser.children)
|
|
.box('mvex', Mp4Parser.children)
|
|
.fullBox('trex', (box) => {
|
|
const parsedTREXBox = shaka.util.Mp4BoxParsers.parseTREX(
|
|
box.reader);
|
|
|
|
this.defaultSampleDuration_ = parsedTREXBox.defaultSampleDuration;
|
|
this.defaultSampleSize_ = parsedTREXBox.defaultSampleSize;
|
|
})
|
|
.box('trak', Mp4Parser.children)
|
|
.fullBox('tkhd', (box) => {
|
|
goog.asserts.assert(
|
|
box.version != null,
|
|
'TKHD is a full box and should have a valid version.');
|
|
const parsedTKHDBox = shaka.util.Mp4BoxParsers.parseTKHD(
|
|
box.reader, box.version);
|
|
trackIds.push(parsedTKHDBox.trackId);
|
|
})
|
|
.box('mdia', Mp4Parser.children)
|
|
.fullBox('mdhd', (box) => {
|
|
goog.asserts.assert(
|
|
box.version != null,
|
|
'MDHD is a full box and should have a valid version.');
|
|
const parsedMDHDBox = shaka.util.Mp4BoxParsers.parseMDHD(
|
|
box.reader, box.version);
|
|
timescales.push(parsedMDHDBox.timescale);
|
|
})
|
|
.box('minf', Mp4Parser.children)
|
|
.box('stbl', Mp4Parser.children)
|
|
.fullBox('stsd', Mp4Parser.sampleDescription)
|
|
|
|
// These are the various boxes that signal a codec.
|
|
.box('avc1', codecBoxParser)
|
|
.box('avc3', codecBoxParser)
|
|
.box('dvav', codecBoxParser)
|
|
.box('dva1', codecBoxParser)
|
|
.box('hev1', codecBoxParser)
|
|
.box('hvc1', codecBoxParser)
|
|
.box('dvh1', codecBoxParser)
|
|
.box('dvhe', codecBoxParser)
|
|
.box('vvc1', codecBoxParser)
|
|
.box('vvi1', codecBoxParser)
|
|
.box('dvc1', codecBoxParser)
|
|
.box('dvi1', codecBoxParser)
|
|
|
|
// This signals an encrypted sample, which we can go inside of to find
|
|
// the codec used.
|
|
.box('encv', Mp4Parser.visualSampleEntry)
|
|
.box('sinf', Mp4Parser.children)
|
|
.box('frma', (box) => {
|
|
const {codec} = shaka.util.Mp4BoxParsers.parseFRMA(box.reader);
|
|
this.setBitstreamFormat_(codec);
|
|
})
|
|
|
|
.parse(initSegment, /* partialOkay= */ true, /* stopOnPartial= */ true);
|
|
|
|
// At least one track should exist, and each track should have a
|
|
// corresponding Id in TKHD box, and timescale in its MDHD box
|
|
if (!trackIds.length|| !timescales.length ||
|
|
trackIds.length != timescales.length) {
|
|
throw new shaka.util.Error(
|
|
shaka.util.Error.Severity.CRITICAL,
|
|
shaka.util.Error.Category.TEXT,
|
|
shaka.util.Error.Code.INVALID_MP4_CEA);
|
|
}
|
|
|
|
if (this.bitstreamFormat_ == BitstreamFormat.UNKNOWN) {
|
|
shaka.log.alwaysWarn(
|
|
'Unable to determine bitstream format for CEA parsing!');
|
|
}
|
|
|
|
// Populate the map from track Id to timescale
|
|
trackIds.forEach((trackId, idx) => {
|
|
this.trackIdToTimescale_.set(trackId, timescales[idx]);
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Parses each video segment. In fragmented MP4s, MOOF and MDAT come in
|
|
* pairs. The following logic gets the necessary info from MOOFs to parse
|
|
* MDATs (base media decode time, sample sizes/offsets/durations, etc),
|
|
* and then parses the MDAT boxes for CEA-708 packets using this information.
|
|
* CEA-708 packets are returned in the callback.
|
|
* @override
|
|
*/
|
|
parse(mediaSegment) {
|
|
const Mp4Parser = shaka.util.Mp4Parser;
|
|
const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
|
|
|
|
if (this.bitstreamFormat_ == BitstreamFormat.UNKNOWN) {
|
|
// We don't know how to extract SEI from this.
|
|
return [];
|
|
}
|
|
|
|
/** @type {!Array<!shaka.extern.ICeaParser.CaptionPacket>} **/
|
|
const captionPackets = [];
|
|
|
|
let moofOffset = 0;
|
|
|
|
/** @type {!Array<!shaka.cea.Mp4CeaParser.ParsedTRAF>} */
|
|
let parsedTRAFs = [];
|
|
|
|
new Mp4Parser()
|
|
.box('moof', (box) => {
|
|
moofOffset = box.start;
|
|
// traf box parsing is reset on each moof.
|
|
parsedTRAFs = [];
|
|
Mp4Parser.children(box);
|
|
})
|
|
.box('traf', (box) => {
|
|
parsedTRAFs.push({
|
|
baseMediaDecodeTime: null,
|
|
defaultSampleDuration: this.defaultSampleDuration_,
|
|
defaultSampleSize: this.defaultSampleSize_,
|
|
parsedTRUNs: [],
|
|
timescale: shaka.cea.CeaUtils.DEFAULT_TIMESCALE_VALUE,
|
|
});
|
|
Mp4Parser.children(box);
|
|
})
|
|
.fullBox('trun', (box) => {
|
|
goog.asserts.assert(
|
|
box.version != null && box.flags != null,
|
|
'TRUN is a full box and should have a valid version & flags.');
|
|
|
|
const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
|
|
|
|
const parsedTRUN = shaka.util.Mp4BoxParsers.parseTRUN(
|
|
box.reader, box.version, box.flags);
|
|
lastTRAF.parsedTRUNs.push(parsedTRUN);
|
|
})
|
|
.fullBox('tfhd', (box) => {
|
|
goog.asserts.assert(
|
|
box.flags != null,
|
|
'TFHD is a full box and should have valid flags.');
|
|
|
|
const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
|
|
|
|
const parsedTFHD = shaka.util.Mp4BoxParsers.parseTFHD(
|
|
box.reader, box.flags);
|
|
|
|
// If specified, defaultSampleDuration and defaultSampleSize
|
|
// override the ones specified in the TREX box
|
|
lastTRAF.defaultSampleDuration = parsedTFHD.defaultSampleDuration ||
|
|
this.defaultSampleDuration_;
|
|
|
|
lastTRAF.defaultSampleSize = parsedTFHD.defaultSampleSize ||
|
|
this.defaultSampleSize_;
|
|
|
|
const trackId = parsedTFHD.trackId;
|
|
|
|
// Get the timescale from the track Id
|
|
if (this.trackIdToTimescale_.has(trackId)) {
|
|
lastTRAF.timescale = this.trackIdToTimescale_.get(trackId);
|
|
}
|
|
})
|
|
.fullBox('tfdt', (box) => {
|
|
goog.asserts.assert(
|
|
box.version != null,
|
|
'TFDT is a full box and should have a valid version.');
|
|
|
|
const lastTRAF = parsedTRAFs[parsedTRAFs.length - 1];
|
|
|
|
const parsedTFDT = shaka.util.Mp4BoxParsers.parseTFDTInaccurate(
|
|
box.reader, box.version);
|
|
|
|
lastTRAF.baseMediaDecodeTime = parsedTFDT.baseMediaDecodeTime;
|
|
})
|
|
.box('mdat', (box) => {
|
|
const offset = moofOffset - box.start - 8;
|
|
const initialPosition = box.reader.getPosition();
|
|
for (const parsedTRAF of parsedTRAFs) {
|
|
if (parsedTRAF.baseMediaDecodeTime === null) {
|
|
// This field should have been populated by the Base Media Decode
|
|
// Time in the tfdt box.
|
|
shaka.log.alwaysWarn(
|
|
'Unable to find base media decode time for CEA captions!');
|
|
continue;
|
|
}
|
|
box.reader.seek(initialPosition);
|
|
this.parseMdat_(box.reader,
|
|
parsedTRAF.baseMediaDecodeTime,
|
|
parsedTRAF.timescale,
|
|
parsedTRAF.defaultSampleDuration,
|
|
parsedTRAF.defaultSampleSize,
|
|
offset,
|
|
parsedTRAF.parsedTRUNs,
|
|
captionPackets);
|
|
}
|
|
})
|
|
.parse(mediaSegment, /* partialOkay= */ false,
|
|
/* stopOnPartial= */ true);
|
|
|
|
return captionPackets;
|
|
}
|
|
|
|
/**
|
|
* Parse MDAT box.
|
|
* @param {!shaka.util.DataViewReader} reader
|
|
* @param {number} time
|
|
* @param {number} timescale
|
|
* @param {number} defaultSampleDuration
|
|
* @param {number} defaultSampleSize
|
|
* @param {number} offset
|
|
* @param {!Array<shaka.util.ParsedTRUNBox>} parsedTRUNs
|
|
* @param {!Array<!shaka.extern.ICeaParser.CaptionPacket>} captionPackets
|
|
* @private
|
|
*/
|
|
parseMdat_(reader, time, timescale, defaultSampleDuration,
|
|
defaultSampleSize, offset, parsedTRUNs, captionPackets) {
|
|
const BitstreamFormat = shaka.cea.Mp4CeaParser.BitstreamFormat;
|
|
const CeaUtils = shaka.cea.CeaUtils;
|
|
let sampleIndex = 0;
|
|
|
|
// The fields in each ParsedTRUNSample contained in the sampleData
|
|
// array are nullable. In the case of sample data and sample duration,
|
|
// we use the defaults provided by the TREX/TFHD boxes. For sample
|
|
// composition time offset, we default to 0.
|
|
let sampleSize = defaultSampleSize;
|
|
|
|
// Combine all sample data. This assumes that the samples described across
|
|
// multiple trun boxes are still continuous in the mdat box.
|
|
const sampleDatas = parsedTRUNs.map((t) => t.sampleData);
|
|
const sampleData = [].concat(...sampleDatas);
|
|
|
|
if (sampleData.length) {
|
|
sampleSize = sampleData[0].sampleSize || defaultSampleSize;
|
|
}
|
|
|
|
reader.skip(offset + parsedTRUNs[0].dataOffset);
|
|
|
|
while (reader.hasMoreData()) {
|
|
const naluSize = reader.readUint32();
|
|
const naluHeader = reader.readUint8();
|
|
let naluType = null;
|
|
let isSeiMessage = false;
|
|
let naluHeaderSize = 1;
|
|
|
|
goog.asserts.assert(this.bitstreamFormat_ != BitstreamFormat.UNKNOWN,
|
|
'Bitstream format should have been checked before now!');
|
|
switch (this.bitstreamFormat_) {
|
|
case BitstreamFormat.H264:
|
|
naluType = naluHeader & 0x1f;
|
|
isSeiMessage = naluType == CeaUtils.H264_NALU_TYPE_SEI;
|
|
break;
|
|
|
|
case BitstreamFormat.H265:
|
|
naluHeaderSize = 2;
|
|
reader.skip(1);
|
|
naluType = (naluHeader >> 1) & 0x3f;
|
|
isSeiMessage =
|
|
naluType == CeaUtils.H265_PREFIX_NALU_TYPE_SEI ||
|
|
naluType == CeaUtils.H265_SUFFIX_NALU_TYPE_SEI;
|
|
break;
|
|
|
|
case BitstreamFormat.H266:
|
|
naluHeaderSize = 2;
|
|
reader.skip(1);
|
|
naluType = (naluHeader >> 1) & 0x3f;
|
|
isSeiMessage =
|
|
naluType == CeaUtils.H266_PREFIX_NALU_TYPE_SEI ||
|
|
naluType == CeaUtils.H266_SUFFIX_NALU_TYPE_SEI;
|
|
break;
|
|
|
|
default:
|
|
return;
|
|
}
|
|
|
|
if (isSeiMessage) {
|
|
let timeOffset = 0;
|
|
|
|
if (sampleIndex < sampleData.length) {
|
|
timeOffset = sampleData[sampleIndex].sampleCompositionTimeOffset || 0;
|
|
}
|
|
|
|
const pts = (time + timeOffset) / timescale;
|
|
const nalu = reader.readBytes(
|
|
naluSize - naluHeaderSize,
|
|
// Don't clone. The nalu is temporary and is not stored.
|
|
/* clone= */ false);
|
|
for (const packet of this.seiProcessor_.process(nalu)) {
|
|
captionPackets.push({
|
|
packet,
|
|
pts,
|
|
});
|
|
}
|
|
} else {
|
|
try {
|
|
reader.skip(naluSize - naluHeaderSize);
|
|
} catch (e) {
|
|
// It is necessary to ignore this error because it can break the start
|
|
// of playback even if the user does not want to see the subtitles.
|
|
break;
|
|
}
|
|
}
|
|
sampleSize -= (naluSize + 4);
|
|
if (sampleSize == 0) {
|
|
if (sampleIndex < sampleData.length) {
|
|
time += sampleData[sampleIndex].sampleDuration ||
|
|
defaultSampleDuration;
|
|
} else {
|
|
time += defaultSampleDuration;
|
|
}
|
|
|
|
sampleIndex++;
|
|
|
|
if (sampleIndex < sampleData.length) {
|
|
sampleSize = sampleData[sampleIndex].sampleSize || defaultSampleSize;
|
|
} else {
|
|
sampleSize = defaultSampleSize;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @param {string} codec A fourcc for a codec.
|
|
* @private
|
|
*/
|
|
setBitstreamFormat_(codec) {
|
|
if (shaka.cea.Mp4CeaParser.CodecBitstreamMap_.has(codec)) {
|
|
this.bitstreamFormat_ =
|
|
shaka.cea.Mp4CeaParser.CodecBitstreamMap_.get(codec);
|
|
}
|
|
}
|
|
};
|
|
|
|
/** @enum {number} */
|
|
shaka.cea.Mp4CeaParser.BitstreamFormat = {
|
|
UNKNOWN: 0,
|
|
H264: 1,
|
|
H265: 2,
|
|
H266: 3,
|
|
};
|
|
|
|
/** @private {Map<string, shaka.cea.Mp4CeaParser.BitstreamFormat>} */
|
|
shaka.cea.Mp4CeaParser.CodecBitstreamMap_ = new Map()
|
|
// AVC
|
|
.set('avc1', shaka.cea.Mp4CeaParser.BitstreamFormat.H264)
|
|
.set('avc3', shaka.cea.Mp4CeaParser.BitstreamFormat.H264)
|
|
// Dolby Vision based in AVC
|
|
.set('dvav', shaka.cea.Mp4CeaParser.BitstreamFormat.H264)
|
|
.set('dva1', shaka.cea.Mp4CeaParser.BitstreamFormat.H264)
|
|
// HEVC
|
|
.set('hev1', shaka.cea.Mp4CeaParser.BitstreamFormat.H265)
|
|
.set('hvc1', shaka.cea.Mp4CeaParser.BitstreamFormat.H265)
|
|
// Dolby Vision based in HEVC
|
|
.set('dvh1', shaka.cea.Mp4CeaParser.BitstreamFormat.H265)
|
|
.set('dvhe', shaka.cea.Mp4CeaParser.BitstreamFormat.H265)
|
|
// VVC
|
|
.set('vvc1', shaka.cea.Mp4CeaParser.BitstreamFormat.H266)
|
|
.set('vvi1', shaka.cea.Mp4CeaParser.BitstreamFormat.H266)
|
|
// Dolby Vision based in VVC
|
|
.set('dvc1', shaka.cea.Mp4CeaParser.BitstreamFormat.H266)
|
|
.set('dvi1', shaka.cea.Mp4CeaParser.BitstreamFormat.H266);
|
|
|
|
/**
|
|
* @typedef {{
|
|
* baseMediaDecodeTime: ?number,
|
|
* defaultSampleDuration: number,
|
|
* defaultSampleSize: number,
|
|
* parsedTRUNs: !Array<shaka.util.ParsedTRUNBox>,
|
|
* timescale: number,
|
|
* }}
|
|
*
|
|
* @property {?number} baseMediaDecodeTime
|
|
* @property {number} defaultSampleDuration
|
|
* @property {number} defaultSampleSize
|
|
* @property {!Array<shaka.util.ParsedTRUNBox>} parsedTRUNs
|
|
* @property {?number} timescale
|
|
*/
|
|
shaka.cea.Mp4CeaParser.ParsedTRAF;
|
|
|
|
shaka.media.ClosedCaptionParser.registerParser('video/mp4',
|
|
() => new shaka.cea.Mp4CeaParser());
|