mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-16 16:16:40 +03:00
70fad8de8f
Closes https://github.com/shaka-project/shaka-player/issues/3674 Co-authored-by: Joey Parrish <joeyparrish@google.com>
642 lines
18 KiB
JavaScript
642 lines
18 KiB
JavaScript
/*! @license
|
||
* Shaka Player
|
||
* Copyright 2016 Google LLC
|
||
* SPDX-License-Identifier: Apache-2.0
|
||
*/
|
||
|
||
goog.provide('shaka.util.TsParser');
|
||
|
||
goog.require('goog.asserts');
|
||
goog.require('shaka.log');
|
||
goog.require('shaka.util.Id3Utils');
|
||
goog.require('shaka.util.Uint8ArrayUtils');
|
||
|
||
|
||
/**
|
||
* @see https://en.wikipedia.org/wiki/MPEG_transport_stream
|
||
* @export
|
||
*/
|
||
shaka.util.TsParser = class {
|
||
/** */
|
||
constructor() {
|
||
/** @private {?number} */
|
||
this.pmtId_ = null;
|
||
|
||
/** @private {boolean} */
|
||
this.pmtParsed_ = false;
|
||
|
||
/** @private {?number} */
|
||
this.videoStartTime_ = null;
|
||
|
||
/** @private {?number} */
|
||
this.videoPid_ = null;
|
||
|
||
/** @private {?string} */
|
||
this.videoCodec_ = null;
|
||
|
||
/** @private {!Array.<Uint8Array>} */
|
||
this.videoData_ = [];
|
||
|
||
/** @private {?number} */
|
||
this.audioStartTime_ = null;
|
||
|
||
/** @private {?number} */
|
||
this.audioPid_ = null;
|
||
|
||
/** @private {?string} */
|
||
this.audioCodec_ = null;
|
||
|
||
/** @private {!Array.<Uint8Array>} */
|
||
this.audioData_ = [];
|
||
|
||
/** @private {?number} */
|
||
this.id3Pid_ = null;
|
||
|
||
/** @private {!Array.<Uint8Array>} */
|
||
this.id3Data_ = [];
|
||
}
|
||
|
||
/**
|
||
* Parse the given data
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @return {!shaka.util.TsParser}
|
||
*/
|
||
parse(data) {
|
||
const timescale = shaka.util.TsParser.Timescale_;
|
||
const packetLength = shaka.util.TsParser.PacketLength_;
|
||
|
||
// A TS fragment should contain at least 3 TS packets, a PAT, a PMT, and
|
||
// one PID.
|
||
if (data.length < 3 * packetLength) {
|
||
return this;
|
||
}
|
||
const syncOffset = Math.max(0, shaka.util.TsParser.syncOffset(data));
|
||
|
||
const length = data.length - (data.length + syncOffset) % packetLength;
|
||
|
||
let unknownPIDs = false;
|
||
|
||
// loop through TS packets
|
||
for (let start = syncOffset; start < length; start += packetLength) {
|
||
if (data[start] == 0x47) {
|
||
const payloadUnitStartIndicator = !!(data[start + 1] & 0x40);
|
||
// pid is a 13-bit field starting at the last 5 bits of TS[1]
|
||
const pid = ((data[start + 1] & 0x1f) << 8) + data[start + 2];
|
||
const adaptationFieldControl = (data[start + 3] & 0x30) >> 4;
|
||
|
||
// if an adaption field is present, its length is specified by the
|
||
// fifth byte of the TS packet header.
|
||
let offset;
|
||
if (adaptationFieldControl > 1) {
|
||
offset = start + 5 + data[start + 4];
|
||
// continue if there is only adaptation field
|
||
if (offset == start + packetLength) {
|
||
continue;
|
||
}
|
||
} else {
|
||
offset = start + 4;
|
||
}
|
||
switch (pid) {
|
||
case 0:
|
||
if (payloadUnitStartIndicator) {
|
||
offset += data[offset] + 1;
|
||
}
|
||
|
||
this.pmtId_ = this.getPmtId_(data, offset);
|
||
break;
|
||
case 17:
|
||
case 0x1fff:
|
||
break;
|
||
case this.pmtId_: {
|
||
if (payloadUnitStartIndicator) {
|
||
offset += data[offset] + 1;
|
||
}
|
||
|
||
const parsedPIDs = this.parsePMT_(data, offset);
|
||
|
||
// only update track id if track PID found while parsing PMT
|
||
// this is to avoid resetting the PID to -1 in case
|
||
// track PID transiently disappears from the stream
|
||
// this could happen in case of transient missing audio samples
|
||
// for example
|
||
// NOTE this is only the PID of the track as found in TS,
|
||
// but we are not using this for MP4 track IDs.
|
||
if (this.videoPid_ == null) {
|
||
this.videoPid_ = parsedPIDs.video;
|
||
this.videoCodec_ = parsedPIDs.videoCodec;
|
||
}
|
||
if (this.audioPid_ == null) {
|
||
this.audioPid_ = parsedPIDs.audio;
|
||
this.audioCodec_ = parsedPIDs.audioCodec;
|
||
}
|
||
if (this.id3Pid_ == null) {
|
||
this.id3Pid_ = parsedPIDs.id3;
|
||
}
|
||
|
||
if (unknownPIDs && !this.pmtParsed_) {
|
||
shaka.log.debug('reparse from beginning');
|
||
unknownPIDs = false;
|
||
// we set it to -188, the += 188 in the for loop will reset
|
||
// start to 0
|
||
start = syncOffset - packetLength;
|
||
}
|
||
this.pmtParsed_ = true;
|
||
break;
|
||
}
|
||
case this.videoPid_: {
|
||
const videoData = data.subarray(offset, start + packetLength);
|
||
const pes = this.parsePES_(videoData);
|
||
if (pes && pes.pts != null) {
|
||
const startTime = Math.min(pes.dts, pes.pts) / timescale;
|
||
if (this.videoStartTime_ == null ||
|
||
this.videoStartTime_ > startTime) {
|
||
this.videoStartTime_ = startTime;
|
||
}
|
||
}
|
||
this.videoData_.push(videoData);
|
||
break;
|
||
}
|
||
case this.audioPid_: {
|
||
const audioData = data.subarray(offset, start + packetLength);
|
||
const pes = this.parsePES_(audioData);
|
||
if (pes && pes.pts != null) {
|
||
const startTime = Math.min(pes.dts, pes.pts) / timescale;
|
||
if (this.audioStartTime_ == null ||
|
||
this.audioStartTime_ > startTime) {
|
||
this.audioStartTime_ = startTime;
|
||
}
|
||
}
|
||
this.audioData_.push(audioData);
|
||
break;
|
||
}
|
||
case this.id3Pid_:
|
||
this.id3Data_.push(data.subarray(offset, start + packetLength));
|
||
break;
|
||
default:
|
||
unknownPIDs = true;
|
||
break;
|
||
}
|
||
} else {
|
||
shaka.log.warning('Found TS packet that do not start with 0x47');
|
||
}
|
||
}
|
||
return this;
|
||
}
|
||
|
||
/**
|
||
* Get the PMT ID from the PAT
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @param {number} offset
|
||
* @return {number}
|
||
* @private
|
||
*/
|
||
getPmtId_(data, offset) {
|
||
// skip the PSI header and parse the first PMT entry
|
||
return ((data[offset + 10] & 0x1f) << 8) | data[offset + 11];
|
||
}
|
||
|
||
/**
|
||
* Parse PMT
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @param {number} offset
|
||
* @return {!shaka.util.TsParser.PMT}
|
||
* @private
|
||
*/
|
||
parsePMT_(data, offset) {
|
||
const result = {
|
||
audio: -1,
|
||
video: -1,
|
||
id3: -1,
|
||
audioCodec: '',
|
||
videoCodec: '',
|
||
};
|
||
const sectionLength = ((data[offset + 1] & 0x0f) << 8) | data[offset + 2];
|
||
const tableEnd = offset + 3 + sectionLength - 4;
|
||
// to determine where the table is, we have to figure out how
|
||
// long the program info descriptors are
|
||
const programInfoLength =
|
||
((data[offset + 10] & 0x0f) << 8) | data[offset + 11];
|
||
// advance the offset to the first entry in the mapping table
|
||
offset += 12 + programInfoLength;
|
||
while (offset < tableEnd) {
|
||
const pid = ((data[offset + 1] & 0x1f) << 8) | data[offset + 2];
|
||
switch (data[offset]) {
|
||
// SAMPLE-AES AAC
|
||
case 0xcf:
|
||
break;
|
||
// ISO/IEC 13818-7 ADTS AAC (MPEG-2 lower bit-rate audio)
|
||
case 0x0f:
|
||
if (result.audio == -1) {
|
||
result.audio = pid;
|
||
result.audioCodec = 'aac';
|
||
}
|
||
break;
|
||
// Packetized metadata (ID3)
|
||
case 0x15:
|
||
if (result.id3 == -1) {
|
||
result.id3 = pid;
|
||
}
|
||
break;
|
||
// SAMPLE-AES AVC
|
||
case 0xdb:
|
||
break;
|
||
// ITU-T Rec. H.264 and ISO/IEC 14496-10 (lower bit-rate video)
|
||
case 0x1b:
|
||
if (result.video == -1) {
|
||
result.video = pid;
|
||
result.videoCodec = 'avc';
|
||
}
|
||
break;
|
||
// ISO/IEC 11172-3 (MPEG-1 audio)
|
||
// or ISO/IEC 13818-3 (MPEG-2 halved sample rate audio)
|
||
case 0x03:
|
||
case 0x04:
|
||
if (result.audio == -1) {
|
||
result.audio = pid;
|
||
result.audioCodec = 'mp3';
|
||
}
|
||
break;
|
||
// HEVC
|
||
case 0x24:
|
||
if (result.video == -1) {
|
||
result.video = pid;
|
||
result.videoCodec = 'hvc';
|
||
}
|
||
break;
|
||
default:
|
||
// shaka.log.warning('Unknown stream type:', data[offset]);
|
||
break;
|
||
}
|
||
// move to the next table entry
|
||
// skip past the elementary stream descriptors, if present
|
||
offset += (((data[offset + 3] & 0x0f) << 8) | data[offset + 4]) + 5;
|
||
}
|
||
return result;
|
||
}
|
||
|
||
/**
|
||
* Parse PES
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @return {?shaka.util.TsParser.PES}
|
||
* @private
|
||
*/
|
||
parsePES_(data) {
|
||
const startPrefix = (data[0] << 16) | (data[1] << 8) | data[2];
|
||
// In certain live streams, the start of a TS fragment has ts packets
|
||
// that are frame data that is continuing from the previous fragment. This
|
||
// is to check that the pes data is the start of a new pes data
|
||
if (startPrefix !== 1) {
|
||
return null;
|
||
}
|
||
/** @type {shaka.util.TsParser.PES} */
|
||
const pes = {
|
||
data: new Uint8Array(0),
|
||
// get the packet length, this will be 0 for video
|
||
packetLength: 6 + ((data[4] << 8) | data[5]),
|
||
pts: null,
|
||
dts: null,
|
||
};
|
||
|
||
// PES packets may be annotated with a PTS value, or a PTS value
|
||
// and a DTS value. Determine what combination of values is
|
||
// available to work with.
|
||
const ptsDtsFlags = data[7];
|
||
|
||
// PTS and DTS are normally stored as a 33-bit number. Javascript
|
||
// performs all bitwise operations on 32-bit integers but javascript
|
||
// supports a much greater range (52-bits) of integer using standard
|
||
// mathematical operations.
|
||
// We construct a 31-bit value using bitwise operators over the 31
|
||
// most significant bits and then multiply by 4 (equal to a left-shift
|
||
// of 2) before we add the final 2 least significant bits of the
|
||
// timestamp (equal to an OR.)
|
||
if (ptsDtsFlags & 0xC0) {
|
||
// the PTS and DTS are not written out directly. For information
|
||
// on how they are encoded, see
|
||
// http://dvd.sourceforge.net/dvdinfo/pes-hdr.html
|
||
pes.pts =
|
||
(data[9] & 0x0e) * 536870912 + // 1 << 29
|
||
(data[10] & 0xff) * 4194304 + // 1 << 22
|
||
(data[11] & 0xfe) * 16384 + // 1 << 14
|
||
(data[12] & 0xff) * 128 + // 1 << 7
|
||
(data[13] & 0xfe) / 2;
|
||
|
||
pes.dts = pes.pts;
|
||
if (ptsDtsFlags & 0x40) {
|
||
pes.dts =
|
||
(data[14] & 0x0e) * 536870912 + // 1 << 29
|
||
(data[15] & 0xff) * 4194304 + // 1 << 22
|
||
(data[16] & 0xfe) * 16384 + // 1 << 14
|
||
(data[17] & 0xff) * 128 + // 1 << 7
|
||
(data[18] & 0xfe) / 2;
|
||
}
|
||
}
|
||
// the data section starts immediately after the PES header.
|
||
// pes_header_data_length specifies the number of header bytes
|
||
// that follow the last byte of the field.
|
||
pes.data = data.subarray(9 + data[8]);
|
||
|
||
return pes;
|
||
}
|
||
|
||
/**
|
||
* Parse AVC Nalus
|
||
*
|
||
* The code is based on hls.js
|
||
* Credit to https://github.com/video-dev/hls.js/blob/master/src/demux/tsdemuxer.ts
|
||
*
|
||
* @param {shaka.util.TsParser.PES} pes
|
||
* @return {!Array.<shaka.util.TsParser.AvcNalu>}
|
||
* @private
|
||
*/
|
||
parseAvcNalus_(pes) {
|
||
const timescale = shaka.util.TsParser.Timescale_;
|
||
const time = pes.pts ? pes.pts / timescale : null;
|
||
const data = pes.data;
|
||
const len = data.byteLength;
|
||
|
||
// A NALU does not contain is its size.
|
||
// The Annex B specification solves this by requiring ‘Start Codes’ to
|
||
// precede each NALU. A start code is 2 or 3 0x00 bytes followed with a
|
||
// 0x01 byte. e.g. 0x000001 or 0x00000001.
|
||
// More info in: https://stackoverflow.com/questions/24884827/possible-locations-for-sequence-picture-parameter-sets-for-h-264-stream/24890903#24890903
|
||
let numZeros = 0;
|
||
|
||
/** @type {!Array.<shaka.util.TsParser.AvcNalu>} */
|
||
const nalus = [];
|
||
|
||
// Start position includes the first byte where we read the type.
|
||
// The data we extract begins at the next byte.
|
||
let lastNaluStart = -1;
|
||
// Extracted from the first byte.
|
||
let lastNaluType = 0;
|
||
|
||
for (let i = 0; i < len; ++i) {
|
||
const value = data[i];
|
||
if (!value) {
|
||
numZeros++;
|
||
} else if (numZeros >= 2 && value == 1) {
|
||
// We just read a start code. Consume the NALU we passed, if any.
|
||
if (lastNaluStart >= 0) {
|
||
// Because the start position includes the type, skip the first byte.
|
||
const firstByteToKeep = lastNaluStart + 1;
|
||
|
||
// Compute the last byte to keep. The start code is at most 3 zeros.
|
||
// Any earlier zeros are not part of the start code.
|
||
const startCodeSize = (numZeros > 3 ? 3 : numZeros) + 1;
|
||
const lastByteToKeep = i - startCodeSize;
|
||
|
||
/** @type {shaka.util.TsParser.AvcNalu} */
|
||
const nalu = {
|
||
// subarray's end position is exclusive, so add one.
|
||
data: data.subarray(firstByteToKeep, lastByteToKeep + 1),
|
||
type: lastNaluType,
|
||
time: time,
|
||
};
|
||
nalus.push(nalu);
|
||
}
|
||
|
||
// We just read a start code, so there should be another byte here, at
|
||
// least, for the NALU type. Check just in case.
|
||
if (i >= len - 1) {
|
||
shaka.log.warning('Malformed TS, incomplete NALU, ignoring.');
|
||
return nalus;
|
||
}
|
||
|
||
// Advance and read the type of the next NALU.
|
||
i++;
|
||
lastNaluStart = i;
|
||
lastNaluType = data[i] & 0x1f;
|
||
numZeros = 0;
|
||
} else {
|
||
numZeros = 0;
|
||
}
|
||
}
|
||
|
||
if (lastNaluStart >= 0 && numZeros >= 0) {
|
||
// The rest of the buffer was a NALU.
|
||
// Because the start position includes the type, skip the first byte.
|
||
const firstByteToKeep = lastNaluStart + 1;
|
||
/** @type {shaka.util.TsParser.AvcNalu} */
|
||
const nalu = {
|
||
data: data.subarray(firstByteToKeep, len),
|
||
type: lastNaluType,
|
||
time: time,
|
||
};
|
||
nalus.push(nalu);
|
||
}
|
||
return nalus;
|
||
}
|
||
|
||
/**
|
||
* Return the ID3 metadata
|
||
*
|
||
* @return {!Array.<shaka.extern.ID3Metadata>}
|
||
*/
|
||
getMetadata() {
|
||
const timescale = shaka.util.TsParser.Timescale_;
|
||
const Uint8ArrayUtils = shaka.util.Uint8ArrayUtils;
|
||
const metadata = [];
|
||
let prevId3Data = new Uint8Array(0);
|
||
// parsePES_() only works if the data begins on a PES boundary.
|
||
// Try the last data blob first, and if it doesn't begin on a
|
||
// PES boundary, prepend the previous blob and try again.
|
||
// This way, a successful parse will always begin and end on
|
||
// the correct boundary, and no data will be skipped.
|
||
for (let i = this.id3Data_.length - 1; i >= 0; i--) {
|
||
const data = this.id3Data_[i];
|
||
goog.asserts.assert(data, 'We should have a data');
|
||
const id3Data = Uint8ArrayUtils.concat(data, prevId3Data);
|
||
const pes = this.parsePES_(id3Data);
|
||
if (pes) {
|
||
metadata.unshift({
|
||
cueTime: pes.pts ? pes.pts / timescale : null,
|
||
data: pes.data,
|
||
frames: shaka.util.Id3Utils.getID3Frames(pes.data),
|
||
dts: pes.dts,
|
||
pts: pes.pts,
|
||
});
|
||
prevId3Data = new Uint8Array(0);
|
||
} else {
|
||
prevId3Data = id3Data;
|
||
}
|
||
}
|
||
return metadata;
|
||
}
|
||
|
||
/**
|
||
* Return the start time for the audio and video
|
||
*
|
||
* @return {{audio: ?number, video: ?number}}
|
||
*/
|
||
getStartTime() {
|
||
return {
|
||
audio: this.audioStartTime_,
|
||
video: this.videoStartTime_,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Return the audio and video codecs
|
||
*
|
||
* @return {{audio: ?string, video: ?string}}
|
||
*/
|
||
getCodecs() {
|
||
return {
|
||
audio: this.audioCodec_,
|
||
video: this.videoCodec_,
|
||
};
|
||
}
|
||
|
||
/**
|
||
* Return the video data
|
||
*
|
||
* @return {!Array.<shaka.util.TsParser.AvcNalu>}
|
||
*/
|
||
getVideoNalus() {
|
||
const Uint8ArrayUtils = shaka.util.Uint8ArrayUtils;
|
||
let nalus = [];
|
||
let prevVideoData = new Uint8Array(0);
|
||
// parsePES_() only works if the data begins on a PES boundary.
|
||
// Try the last data blob first, and if it doesn't begin on a
|
||
// PES boundary, prepend the previous blob and try again.
|
||
// This way, a successful parse will always begin and end on
|
||
// the correct boundary, and no data will be skipped.
|
||
for (let i = this.videoData_.length - 1; i >= 0; i--) {
|
||
const data = this.videoData_[i];
|
||
goog.asserts.assert(data, 'We should have a data');
|
||
const videoData = Uint8ArrayUtils.concat(data, prevVideoData);
|
||
const pes = this.parsePES_(videoData);
|
||
if (pes) {
|
||
if (this.videoCodec_ == 'avc') {
|
||
nalus = nalus.concat(this.parseAvcNalus_(pes));
|
||
}
|
||
prevVideoData = new Uint8Array(0);
|
||
} else {
|
||
prevVideoData = videoData;
|
||
}
|
||
}
|
||
// We need to invert the array to return it in the correct order.
|
||
return nalus.reverse();
|
||
}
|
||
|
||
/**
|
||
* Check if the passed data corresponds to an MPEG2-TS
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @return {boolean}
|
||
*/
|
||
static probe(data) {
|
||
const syncOffset = shaka.util.TsParser.syncOffset(data);
|
||
if (syncOffset < 0) {
|
||
return false;
|
||
} else {
|
||
if (syncOffset > 0) {
|
||
shaka.log.warning('MPEG2-TS detected but first sync word found @ ' +
|
||
'offset ' + syncOffset + ', junk ahead ?');
|
||
}
|
||
return true;
|
||
}
|
||
}
|
||
|
||
/**
|
||
* Returns the synchronization offset
|
||
*
|
||
* @param {Uint8Array} data
|
||
* @return {number}
|
||
*/
|
||
static syncOffset(data) {
|
||
const packetLength = shaka.util.TsParser.PacketLength_;
|
||
// scan 1000 first bytes
|
||
const scanwindow = Math.min(1000, data.length - 3 * packetLength);
|
||
let i = 0;
|
||
while (i < scanwindow) {
|
||
// a TS fragment should contain at least 3 TS packets, a PAT, a PMT, and
|
||
// one PID, each starting with 0x47
|
||
if (data[i] == 0x47 &&
|
||
data[i + packetLength] == 0x47 &&
|
||
data[i + 2 * packetLength] == 0x47) {
|
||
return i;
|
||
} else {
|
||
i++;
|
||
}
|
||
}
|
||
return -1;
|
||
}
|
||
};
|
||
|
||
|
||
/**
|
||
* @const {number}
|
||
* @private
|
||
*/
|
||
shaka.util.TsParser.PacketLength_ = 188;
|
||
|
||
|
||
/**
|
||
* @const {number}
|
||
* @private
|
||
*/
|
||
shaka.util.TsParser.Timescale_ = 90000;
|
||
|
||
|
||
/**
|
||
* @typedef {{
|
||
* audio: number,
|
||
* video: number,
|
||
* id3: number,
|
||
* audioCodec: string,
|
||
* videoCodec: string
|
||
* }}
|
||
*
|
||
* @summary PMT.
|
||
* @property {number} audio
|
||
* Audio PID
|
||
* @property {number} video
|
||
* Video PID
|
||
* @property {number} id3
|
||
* ID3 PID
|
||
* @property {string} audioCodec
|
||
* Audio codec
|
||
* @property {string} videoCodec
|
||
* Video codec
|
||
*/
|
||
shaka.util.TsParser.PMT;
|
||
|
||
|
||
/**
|
||
* @typedef {{
|
||
* data: Uint8Array,
|
||
* packetLength: number,
|
||
* pts: ?number,
|
||
* dts: ?number
|
||
* }}
|
||
*
|
||
* @summary PES.
|
||
* @property {Uint8Array} data
|
||
* @property {number} packetLength
|
||
* @property {?number} pts
|
||
* @property {?number} dts
|
||
*/
|
||
shaka.util.TsParser.PES;
|
||
|
||
|
||
/**
|
||
* @typedef {{
|
||
* data: !Uint8Array,
|
||
* type: number,
|
||
* time: ?number
|
||
* }}
|
||
*
|
||
* @summary AvcNalu.
|
||
* @property {!Uint8Array} data
|
||
* @property {number} type
|
||
* @property {?number} time
|
||
*/
|
||
shaka.util.TsParser.AvcNalu;
|
||
|