mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-26 17:46:26 +03:00
2146c81f28
These changes were made in an attempt to address issues with Shaka on Chromecast getting stuck with buffer underruns while parsing large manifests (~7k segments). Instead of parsing once, it gets stuck parsing a manifest over and over on encrypted streams, each manifest parse taking over ~1.2s. The changes here did not solve the issue, but [I was asked to make a PR](https://github.com/martinstark/shaka-player/commit/786d1370e566e0bdecf3ec8a847b62c6ebc1f7af). While the manifest parsing is slow, it's appears not to be the root cause of the issue we've been seeing. In synthetic testing, with HLS manifests ranging from 100 to 100,000,000 segments on an AMD processor, Node22, arch linux, the updated manifest parser uses somewhere between 12-40% less processor time. Assuming my tests were representative. I don't have enough real world data to say if this makes any significant difference. I haven't run isolated manifest parsing tests on CC hardware (yet, and not sure if I will have time).
346 lines
11 KiB
JavaScript
346 lines
11 KiB
JavaScript
/*! @license
|
|
* Shaka Player
|
|
* Copyright 2016 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
goog.provide('shaka.hls.ManifestTextParser');
|
|
|
|
goog.require('shaka.hls.Attribute');
|
|
goog.require('shaka.hls.Playlist');
|
|
goog.require('shaka.hls.PlaylistType');
|
|
goog.require('shaka.hls.Segment');
|
|
goog.require('shaka.hls.Tag');
|
|
goog.require('shaka.hls.Utils');
|
|
goog.require('shaka.util.Error');
|
|
goog.require('shaka.util.StringUtils');
|
|
goog.require('shaka.util.TextParser');
|
|
|
|
|
|
/**
|
|
* HlS manifest text parser.
|
|
*/
|
|
shaka.hls.ManifestTextParser = class {
|
|
constructor() {
|
|
/** @private {number} */
|
|
this.globalId_ = 0;
|
|
}
|
|
|
|
/**
|
|
* @param {BufferSource} data
|
|
* @return {!shaka.hls.Playlist}
|
|
*/
|
|
parsePlaylist(data) {
|
|
// Get the input as a string. Normalize newlines to \n.
|
|
let str = shaka.util.StringUtils.fromUTF8(data);
|
|
str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n').trim();
|
|
|
|
const lines = str.split(/\n+/m);
|
|
|
|
if (!shaka.hls.ManifestTextParser.STATIC_PATTERNS.header.test(lines[0])) {
|
|
throw new shaka.util.Error(
|
|
shaka.util.Error.Severity.CRITICAL,
|
|
shaka.util.Error.Category.MANIFEST,
|
|
shaka.util.Error.Code.HLS_PLAYLIST_HEADER_MISSING);
|
|
}
|
|
|
|
// Single-pass state machine for parsing
|
|
let playlistType = shaka.hls.PlaylistType.MASTER;
|
|
let typeDetected = false;
|
|
const tags = [];
|
|
let skip = true;
|
|
for (let i = 0; i < lines.length; i++) {
|
|
const line = lines[i];
|
|
const next = lines[i + 1];
|
|
// Skip comments
|
|
if (shaka.hls.Utils.isComment(line) || skip) {
|
|
skip = false;
|
|
continue;
|
|
}
|
|
|
|
const tag = this.parseTag_(line);
|
|
|
|
// Detect playlist type on first relevant tag
|
|
if (!typeDetected) {
|
|
if (shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS.has(tag.name)) {
|
|
playlistType = shaka.hls.PlaylistType.MEDIA;
|
|
typeDetected = true;
|
|
} else if (tag.name == 'EXT-X-STREAM-INF' ||
|
|
tag.name == 'EXT-X-MEDIA') {
|
|
// Master playlist detected, keep as MASTER
|
|
typeDetected = true;
|
|
} else if (shaka.hls.ManifestTextParser.SEGMENT_TAGS.has(tag.name)) {
|
|
// Segment tags also indicate MEDIA playlist
|
|
playlistType = shaka.hls.PlaylistType.MEDIA;
|
|
typeDetected = true;
|
|
}
|
|
}
|
|
|
|
// Transition to segment parsing
|
|
if (shaka.hls.ManifestTextParser.SEGMENT_TAGS.has(tag.name)) {
|
|
if (playlistType != shaka.hls.PlaylistType.MEDIA) {
|
|
// Only media playlists should contain segment tags
|
|
throw new shaka.util.Error(
|
|
shaka.util.Error.Severity.CRITICAL,
|
|
shaka.util.Error.Category.MANIFEST,
|
|
shaka.util.Error.Code.HLS_INVALID_PLAYLIST_HIERARCHY);
|
|
}
|
|
|
|
const segments = this.parseSegments_(lines, i, tags);
|
|
return new shaka.hls.Playlist(playlistType, tags, segments);
|
|
}
|
|
|
|
tags.push(tag);
|
|
|
|
// An EXT-X-STREAM-INF tag is followed by a URI of a media playlist.
|
|
// Add the URI to the tag object.
|
|
if (tag.name == 'EXT-X-STREAM-INF') {
|
|
const tagUri = new shaka.hls.Attribute('URI', next);
|
|
tag.addAttribute(tagUri);
|
|
skip = true;
|
|
}
|
|
}
|
|
|
|
return new shaka.hls.Playlist(playlistType, tags);
|
|
}
|
|
|
|
/**
|
|
* Parses an array of strings into an array of HLS Segment objects.
|
|
*
|
|
* @param {!Array<string>} lines
|
|
* @param {number} startIndex
|
|
* @param {!Array<!shaka.hls.Tag>} playlistTags
|
|
* @return {!Array<shaka.hls.Segment>}
|
|
* @private
|
|
*/
|
|
parseSegments_(lines, startIndex, playlistTags) {
|
|
// Pre-allocate segments array for better performance with large playlists
|
|
const remainingLines = lines.length - startIndex;
|
|
const estimatedSegments = Math.max(100, Math.floor(remainingLines / 2));
|
|
/** @type {!Array<shaka.hls.Segment>} */
|
|
const segments = new Array(estimatedSegments);
|
|
let segmentIndex = 0;
|
|
/** @type {!Array<shaka.hls.Tag>} */
|
|
let segmentTags = [];
|
|
|
|
/** @type {!Array<shaka.hls.Tag>} */
|
|
let partialSegmentTags = [];
|
|
|
|
// The last parsed EXT-X-MAP tag.
|
|
/** @type {?shaka.hls.Tag} */
|
|
let currentMapTag = null;
|
|
|
|
for (let lineIndex = startIndex; lineIndex < lines.length; lineIndex++) {
|
|
const line = lines[lineIndex];
|
|
if (line.startsWith('#EXT')) {
|
|
const tag = this.parseTag_(line);
|
|
if (shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS.has(tag.name)) {
|
|
playlistTags.push(tag);
|
|
} else {
|
|
// Mark the the EXT-X-MAP tag, and add it to the segment tags
|
|
// following it later.
|
|
if (tag.name == 'EXT-X-MAP') {
|
|
currentMapTag = tag;
|
|
} else if (tag.name == 'EXT-X-PART') {
|
|
partialSegmentTags.push(tag);
|
|
} else if (tag.name == 'EXT-X-PRELOAD-HINT') {
|
|
if (tag.getAttributeValue('TYPE') == 'PART') {
|
|
partialSegmentTags.push(tag);
|
|
} else if (tag.getAttributeValue('TYPE') == 'MAP') {
|
|
// Rename the Preload Hint tag to be a Map tag.
|
|
tag.setName('EXT-X-MAP');
|
|
currentMapTag = tag;
|
|
}
|
|
} else {
|
|
segmentTags.push(tag);
|
|
}
|
|
}
|
|
} else if (shaka.hls.Utils.isComment(line)) {
|
|
// Skip comments.
|
|
} else {
|
|
const verbatimSegmentUri = line.trim();
|
|
// Attach the last parsed EXT-X-MAP tag to the segment.
|
|
if (currentMapTag) {
|
|
segmentTags.push(currentMapTag);
|
|
}
|
|
// The URI appears after all of the tags describing the segment.
|
|
const segment = new shaka.hls.Segment(
|
|
verbatimSegmentUri, segmentTags, partialSegmentTags);
|
|
if (segmentIndex < segments.length) {
|
|
segments[segmentIndex++] = segment;
|
|
} else {
|
|
segments.push(segment);
|
|
segmentIndex++;
|
|
}
|
|
segmentTags = [];
|
|
partialSegmentTags = [];
|
|
}
|
|
}
|
|
// After all the partial segments of a regular segment is published,
|
|
// a EXTINF tag and Uri for a regular segment containing the same media
|
|
// content will get published at last.
|
|
// If no EXTINF tag follows the list of partial segment tags at the end,
|
|
// create a segment to wrap the partial segment tags.
|
|
if (partialSegmentTags.length) {
|
|
if (currentMapTag) {
|
|
segmentTags.push(currentMapTag);
|
|
}
|
|
const segment = new shaka.hls.Segment('', segmentTags,
|
|
partialSegmentTags);
|
|
if (segmentIndex < segments.length) {
|
|
segments[segmentIndex++] = segment;
|
|
} else {
|
|
segments.push(segment);
|
|
segmentIndex++;
|
|
}
|
|
}
|
|
|
|
// Trim pre-allocated array to actual size
|
|
return segmentIndex < segments.length ?
|
|
segments.slice(0, segmentIndex) : segments;
|
|
}
|
|
|
|
/**
|
|
* Parses a string into an HLS Tag object while tracking what id to use next.
|
|
*
|
|
* @param {string} word
|
|
* @return {!shaka.hls.Tag}
|
|
* @private
|
|
*/
|
|
parseTag_(word) {
|
|
return shaka.hls.ManifestTextParser.parseTag(this.globalId_++, word);
|
|
}
|
|
|
|
/**
|
|
* Parses a string into an HLS Tag object.
|
|
*
|
|
* @param {number} id
|
|
* @param {string} word
|
|
* @return {!shaka.hls.Tag}
|
|
*/
|
|
static parseTag(id, word) {
|
|
/* HLS tags start with '#EXT'. A tag can have a set of attributes
|
|
(#EXT-<tagname>:<attribute list>) and/or a value (#EXT-<tagname>:<value>).
|
|
An attribute's format is 'AttributeName=AttributeValue'.
|
|
The parsing logic goes like this:
|
|
1. Everything before ':' is a name (we ignore '#').
|
|
2. Everything after ':' is a list of comma-separated items,
|
|
2a. The first item might be a value, if it does not contain '='.
|
|
2b. Otherwise, items are attributes.
|
|
3. If there is no ":", it's a simple tag with no attributes and no value.
|
|
*/
|
|
const blocks = word.match(
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.tagBlocks);
|
|
if (!blocks) {
|
|
throw new shaka.util.Error(
|
|
shaka.util.Error.Severity.CRITICAL,
|
|
shaka.util.Error.Category.MANIFEST,
|
|
shaka.util.Error.Code.INVALID_HLS_TAG,
|
|
word);
|
|
}
|
|
const name = blocks[1];
|
|
const data = blocks[2];
|
|
const attributes = [];
|
|
let value;
|
|
|
|
if (data) {
|
|
const parser = new shaka.util.TextParser(data);
|
|
let blockAttrs;
|
|
|
|
// re-using global regex: reset lastIndex
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.valueRegex.lastIndex = 0;
|
|
const blockValue = parser.readRegex(
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.valueRegex);
|
|
|
|
if (blockValue) {
|
|
value = blockValue[1];
|
|
}
|
|
|
|
// re-using global regex: reset lastIndex
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex.lastIndex = 0;
|
|
blockAttrs = parser.readRegex(
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex);
|
|
while (blockAttrs) {
|
|
const attrName = blockAttrs[1];
|
|
const attrValue = blockAttrs[2] || blockAttrs[3];
|
|
const attribute = new shaka.hls.Attribute(attrName, attrValue);
|
|
attributes.push(attribute);
|
|
parser.skipWhitespace();
|
|
blockAttrs = parser.readRegex(
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS.attributeRegex);
|
|
}
|
|
}
|
|
|
|
return new shaka.hls.Tag(id, name, attributes, value);
|
|
}
|
|
};
|
|
|
|
/**
|
|
* Static pre-compiled regex patterns for maximum performance.
|
|
* Shared across all parser instances.
|
|
* Pre-compiled patterns eliminate repeated regex compilation during parsing.
|
|
* @const {{tagBlocks: !RegExp, valueRegex: !RegExp, attributeRegex: !RegExp,
|
|
* header: !RegExp}}
|
|
*/
|
|
shaka.hls.ManifestTextParser.STATIC_PATTERNS = {
|
|
tagBlocks: /^#(EXT[^:]*)(?::(.*))?$/,
|
|
header: /^#EXTM3U($|[ \t\n])/m,
|
|
|
|
// Regex: any number of non-equals-sign characters at the beginning
|
|
// terminated by comma or end of line
|
|
valueRegex: /^([^,=]+)(?:,|$)/g,
|
|
|
|
// Regex:
|
|
// 1. Key name ([1])
|
|
// 2. Equals sign
|
|
// 3. Either:
|
|
// a. A quoted string (everything up to the next quote, [2])
|
|
// b. An unquoted string
|
|
// (everything up to the next comma or end of line, [3])
|
|
// 4. Either:
|
|
// a. A comma
|
|
// b. End of line
|
|
attributeRegex: /([^=]+)=(?:"([^"]*)"|([^",]*))(?:,|$)/g,
|
|
};
|
|
|
|
|
|
/**
|
|
* HLS tags that only appear on Media Playlists.
|
|
* Used to determine a playlist type.
|
|
* O(1) lookup set for tag classification.
|
|
*
|
|
* @const {!Set<string>}
|
|
*/
|
|
shaka.hls.ManifestTextParser.MEDIA_PLAYLIST_TAGS = new Set([
|
|
'EXT-X-TARGETDURATION',
|
|
'EXT-X-MEDIA-SEQUENCE',
|
|
'EXT-X-DISCONTINUITY-SEQUENCE',
|
|
'EXT-X-PLAYLIST-TYPE',
|
|
'EXT-X-I-FRAMES-ONLY',
|
|
'EXT-X-ENDLIST',
|
|
'EXT-X-SERVER-CONTROL',
|
|
'EXT-X-SKIP',
|
|
'EXT-X-PART-INF',
|
|
'EXT-X-DATERANGE',
|
|
]);
|
|
|
|
|
|
/**
|
|
* HLS tags that only appear on Segments in a Media Playlists.
|
|
* Used to determine the start of the segments info.
|
|
* O(1) lookup set for tag classification.
|
|
*
|
|
* @const {!Set<string>}
|
|
*/
|
|
shaka.hls.ManifestTextParser.SEGMENT_TAGS = new Set([
|
|
'EXTINF',
|
|
'EXT-X-BYTERANGE',
|
|
'EXT-X-DISCONTINUITY',
|
|
'EXT-X-PROGRAM-DATE-TIME',
|
|
'EXT-X-KEY',
|
|
'EXT-X-DATERANGE',
|
|
'EXT-X-MAP',
|
|
'EXT-X-GAP',
|
|
'EXT-X-TILES',
|
|
]);
|