Files
shaka-player/lib/msf/loc_parser.js
T
2026-05-06 09:40:59 +02:00

381 lines
13 KiB
JavaScript
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/*! @license
* Shaka Player
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
goog.provide('shaka.msf.LOCParser');
goog.require('shaka.log');
goog.require('shaka.util.BufferUtils');
goog.requireType('shaka.msf.Utils');
/**
* Parser for Low Overhead Container (LOC) media objects.
*
* ## Timing strategy
*
* The LOC spec (draft-ietf-moq-loc-02 §2.3.1) defines an optional Timestamp
* property (ID 0x06) and an optional Timescale property (ID 0x08).
* `parse()` resolves `startTime` through three sources in priority order:
*
* 1. **Public properties** — `obj.extensions` (MOQ Object Header).
* Readable by relays; preferred when present.
*
* 2. **Private properties** — the key-value prefix of `obj.data`
* (LOC §2.2). May be end-to-end encrypted.
*
* 3. **Fallback** — `Number(obj.location.group) × frameDuration`.
* Used when neither source carries a Timestamp.
*
* When a Timestamp is found, `startTime` is computed as:
*
* startTime = Number(timestamp) / Number(timescale)
*
* where `timescale` defaults to 1 000 000 (microseconds) per the spec when
* the Timescale property is absent (§2.3.1.1).
*
* `frameDuration` — a fixed per-frame duration in seconds computed from
* the MSF catalog fields (framerate / samplerate / codec). Callers use
* `LOCParser.frameDurationFromTrack(track)` to obtain this value.
*
* Every call to `parse()` is stateless and always returns a non-null result.
*
* ## Payload extraction
*
* Per LOC §2.2 the MOQ Object Payload layout is:
*
* LOC Private Properties (count vi64 + key-value pairs) [optional]
* LOC Payload (raw elementary bitstream)
*
* `parse()` strips the Private Properties prefix if present and returns
* only the bare bitstream in `result.payload`. If parsing the prefix fails
* (e.g. the buffer starts with raw codec data that looks like count=0) the
* full buffer is returned unchanged.
*
* @see https://www.ietf.org/archive/id/draft-ietf-moq-loc-02.html
* @final
*/
shaka.msf.LOCParser = class {
/**
* @param {number} frameDuration
*/
constructor(frameDuration) {
/** @private {number} */
this.frameDuration_ = frameDuration;
}
/**
* Parses a single LOC MoQ object.
*
* Resolves `startTime` in priority order:
* 1. Timestamp property (ID 0x06) in public extensions (`obj.extensions`)
* 2. Timestamp property in private properties (LOC payload prefix)
* 3. Fallback: `groupId × frameDuration`
*
* @param {!shaka.msf.Utils.MOQObject} obj
* @return {!{startTime: number, duration: number, payload: !Uint8Array}}
*/
parse(obj) {
// ID of the LOC Timestamp property (§2.3.1.1, even → bigint value).
const TIMESTAMP_ID = BigInt(0x06);
// ID of the LOC Timescale property (§2.3.1.2, even → bigint value).
const TIMESCALE_ID = BigInt(0x08);
// Always parse private properties first so that we have the payload offset
// (the Private Properties prefix must be stripped regardless of which
// timing source we use).
const {props: privateProps, payloadOffset} =
this.parsePrivateProperties_(obj.data);
const payload = shaka.util.BufferUtils.toUint8(obj.data, payloadOffset);
// 1. Public properties (MOQ Object Header Extensions)
// obj.extensions is a raw Uint8Array of length-bounded extension bytes
// (the total-length prefix was already consumed by the transport layer).
// Parse it as a flat sequence of type+value pairs (no count prefix).
if (obj.extensions && obj.extensions.byteLength > 0) {
const pubProps = this.parseExtensions_(obj.extensions);
const pubTs = pubProps.get(TIMESTAMP_ID);
if (typeof pubTs === 'bigint') {
const pubScale = pubProps.get(TIMESCALE_ID);
const startTime = this.timestampToSeconds_(pubTs,
typeof pubScale === 'bigint' ? pubScale : undefined);
return {startTime, duration: this.frameDuration_, payload};
}
}
// 2. Private properties (LOC payload prefix)
const privTs = privateProps.get(TIMESTAMP_ID);
if (typeof privTs === 'bigint') {
const privScale = privateProps.get(TIMESCALE_ID);
const startTime = this.timestampToSeconds_(privTs,
typeof privScale === 'bigint' ? privScale : undefined);
return {startTime, duration: this.frameDuration_, payload};
}
// 3. Fallback: GroupID × frameDuration
return {
startTime: Number(obj.location.group) * this.frameDuration_,
duration: this.frameDuration_,
payload,
};
}
/**
* Converts a raw LOC Timestamp value to presentation seconds.
*
* Per LOC §2.3.1.1–§2.3.1.2:
* - When `timescale` is present: `seconds = timestamp / timescale`
* - When `timescale` is absent: the timestamp is wall-clock µs since the
* Unix epoch, so the implicit timescale is 1 000 000.
*
* @param {bigint} timestamp Raw vi64 timestamp value.
* @param {bigint|undefined} timescale Raw vi64 timescale, or `undefined`
* if the Timescale property was absent.
* @return {number}
* @private
*/
timestampToSeconds_(timestamp, timescale) {
const scale = timescale !== undefined ? Number(timescale) : 1e6;
return Number(timestamp) / scale;
}
/**
* Parses the raw MOQ Object Header Extensions buffer into a property map.
*
* Wire format — flat sequence of type+value pairs until buffer end
* (no leading count field; the total-length prefix was already consumed
* by the transport layer before storing the bytes in `obj.extensions`):
*
* type (vi64)
* value: vi64 when type is even
* length (vi64) + bytes when type is odd
*
* This differs from `parsePrivateProperties_()`, whose buffer begins with
* a count vi64. Both share the same per-pair encoding.
*
* If parsing throws at any point the partial map built so far is returned,
* so callers always receive a valid (possibly empty) map.
*
* @param {!Uint8Array} data Raw bytes from `obj.extensions`.
* @return {!Map<bigint, bigint|!Uint8Array>}
* @private
*/
parseExtensions_(data) {
/** @type {!Map<bigint, bigint|!Uint8Array>} */
const props = new Map();
if (data.byteLength === 0) {
return props;
}
try {
let offset = 0;
while (offset < data.byteLength) {
const typeResult = this.readVi64At_(data, offset);
offset += typeResult.bytesRead;
const type = typeResult.value;
if (type % BigInt(2) === BigInt(0)) {
// Even type → single vi64 value
const valResult = this.readVi64At_(data, offset);
offset += valResult.bytesRead;
props.set(type, valResult.value);
} else {
// Odd type → length-prefixed byte sequence
const lenResult = this.readVi64At_(data, offset);
offset += lenResult.bytesRead;
const len = Number(lenResult.value);
props.set(type, shaka.util.BufferUtils.toUint8(data, offset, len));
offset += len;
}
}
} catch (e) {
shaka.log.v2('LOCParser: failed to parse extension headers, ' +
'returning partial map', e);
}
return props;
}
/**
* Parses the optional LOC Private Properties block at the start of the raw
* MOQ Object Payload and returns the byte offset at which the actual media
* bitstream begins together with the parsed properties map.
*
* Wire format (mirrors msf_classes.js Reader.keyValuePairs()):
* count (vi64)
* For each of `count` pairs:
* type (vi64)
* value: vi64 when type is even
* length (vi64) + bytes when type is odd
* <LOC Payload starts here>
*
* If parsing throws at any point `payloadOffset` is reset to 0 so the full
* buffer is returned as-is.
*
* @param {!Uint8Array} data Raw MOQ Object Payload (obj.data).
* @return {{
* props: !Map<bigint, bigint|!Uint8Array>,
* payloadOffset: number,
* }}
* @private
*/
parsePrivateProperties_(data) {
/** @type {!Map<bigint, bigint|!Uint8Array>} */
const props = new Map();
if (data.byteLength === 0) {
return {props, payloadOffset: 0};
}
try {
let offset = 0;
const countResult = this.readVi64At_(data, offset);
offset += countResult.bytesRead;
const count = Number(countResult.value);
// Sanity guard: an implausibly large count means the first byte is raw
// codec data (e.g. an H.264 start-code 0x00 0x00 0x00 0x01 reads as
// count=0 after masking).
if (count === 0 || count > 64) {
return {props, payloadOffset: 0};
}
for (let i = 0; i < count; i++) {
const typeResult = this.readVi64At_(data, offset);
offset += typeResult.bytesRead;
const type = typeResult.value;
if (type % BigInt(2) === BigInt(0)) {
// Even type → single vi64 value
const valResult = this.readVi64At_(data, offset);
offset += valResult.bytesRead;
props.set(type, valResult.value);
} else {
// Odd type → length-prefixed byte sequence
const lenResult = this.readVi64At_(data, offset);
offset += lenResult.bytesRead;
const len = Number(lenResult.value);
props.set(type, shaka.util.BufferUtils.toUint8(data, offset, len));
offset += len;
}
}
return {props, payloadOffset: offset};
} catch (e) {
shaka.log.v2('LOCParser: failed to parse private properties prefix, ' +
'using full buffer as payload', e);
return {props, payloadOffset: 0};
}
}
/**
* Reads one QUIC variable-length integer (vi64, up to 62 bits) from
* `buffer` at byte `offset`.
*
* Top-two-bit size tag:
* 00 → 1 byte (6-bit value, mask 0x3f)
* 01 → 2 bytes (14-bit value, mask 0x3fff)
* 10 → 4 bytes (30-bit value, mask 0x3fffffff)
* 11 → 8 bytes (62-bit value, mask 0x3fffffffffffffff)
*
* Synchronous equivalent of `Reader.u62WithSize()` in msf_classes.js.
*
* @param {!Uint8Array} buffer
* @param {number} offset
* @return {{value: bigint, bytesRead: number}}
* @private
*/
readVi64At_(buffer, offset) {
if (offset >= buffer.length) {
throw new Error(
`LOCParser.readVi64At: underflow at offset ${offset}`);
}
const tag = (buffer[offset] & 0xc0) >> 6;
if (tag === 0) {
return {value: BigInt(buffer[offset] & 0x3f), bytesRead: 1};
}
if (tag === 1) {
if (offset + 2 > buffer.length) {
throw new Error('LOCParser.readVi64At: need 2 bytes');
}
const view = shaka.util.BufferUtils.toDataView(buffer, offset, 2);
return {
value: BigInt(view.getUint16(0)) & BigInt('0x3fff'),
bytesRead: 2,
};
}
if (tag === 2) {
if (offset + 4 > buffer.length) {
throw new Error('LOCParser.readVi64At: need 4 bytes');
}
const view = shaka.util.BufferUtils.toDataView(buffer, offset, 4);
return {
value: BigInt(view.getUint32(0)) & BigInt('0x3fffffff'),
bytesRead: 4,
};
}
// tag === 3
if (offset + 8 > buffer.length) {
throw new Error('LOCParser.readVi64At: need 8 bytes');
}
const view = shaka.util.BufferUtils.toDataView(buffer, offset, 8);
return {
value: view.getBigUint64(0) & BigInt('0x3fffffffffffffff'),
bytesRead: 8,
};
}
/**
* Computes the fixed frame duration (seconds) for a LOC track from MSF
* catalog fields.
*
* Rules:
* - Video: `1 / track.framerate`
* - Audio AAC (`mp4a.40.2`, AAC-LC): `1024 / track.samplerate`
* (AAC-LC always uses 1024 samples per frame per ISO 14496-3)
* - Audio Opus: `960 / track.samplerate`
* (standard 20 ms Opus frame at any sample rate)
*
* @param {msfCatalog.Track} track
* @return {?number}
*/
static frameDurationFromTrack(track) {
const codec = (track.codec || '').toLowerCase();
// Video
if (track.framerate) {
return 1 / track.framerate;
}
// Audio
if (!track.samplerate) {
shaka.log.warning('LOCParser.frameDurationFromTrack: ' +
`track "${track.name}" has no samplerate`);
return null;
}
// AAC (mp4a.40.x family — AAC-LC, HE-AAC, etc.)
// All AAC-LC profiles encode 1024 PCM samples per frame.
if (codec.startsWith('mp4a.40')) {
return 1024 / track.samplerate;
}
// Opus — RFC 6716 §2.1.2 defines the standard frame size as 20 ms
// (960 samples at 48 kHz).
if (codec === 'opus') {
return 960 / track.samplerate;
}
shaka.log.warning('LOCParser.frameDurationFromTrack: ' +
`unrecognised codec "${track.codec}" for track "${track.name}"`);
return null;
}
};