Files
shaka-player/lib/text/vtt_text_parser.js
T
Theodore Abshire 3a67802ce2 Fix parseFirstCue test on some platforms.
It turns out that, on some browsers, incomplete XML does not properly
parse. This means the parseFirstCue tests did not work on those platforms.
This modifies those tests on platforms that do not support partial XML;
when no support is detected, it instead tries to get the first cue
from a non-partial segment.
This also modifies the HLS parser to only load partial text segments if
the text parser supports it; otherwise, the hls parser will load full
captions in order to extract start times.

b/137945635
Issue #2037

Change-Id: I58112864a818baf155b365a65fba1c8a56eb9520
2019-07-24 11:23:03 -07:00

470 lines
14 KiB
JavaScript

/**
* @license
* Copyright 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
goog.provide('shaka.text.VttTextParser');
goog.require('goog.asserts');
goog.require('shaka.log');
goog.require('shaka.text.Cue');
goog.require('shaka.text.CueRegion');
goog.require('shaka.text.TextEngine');
goog.require('shaka.util.Error');
goog.require('shaka.util.StringUtils');
goog.require('shaka.util.TextParser');
/**
* @implements {shaka.extern.TextParser}
*/
shaka.text.VttTextParser = class {
/** @override */
parseInit(data) {
goog.asserts.assert(false, 'VTT does not have init segments');
}
/**
* @override
* @throws {shaka.util.Error}
*/
parseFirstCue(data, time) {
return this.parseMediaInternal_(data, time, /* partial= */ true)[0];
}
/**
* @override
* @throws {shaka.util.Error}
*/
parseMedia(data, time) {
return this.parseMediaInternal_(data, time, /* partial= */ false);
}
/** @override */
supportsPartial() {
return true;
}
/**
* @param {!Uint8Array} data
* @param {shaka.extern.TextParser.TimeContext} time
* @param {boolean} partial
* @return {!Array.<!shaka.extern.Cue>}
* @throws {shaka.util.Error}
* @private
*/
parseMediaInternal_(data, time, partial) {
const VttTextParser = shaka.text.VttTextParser;
// Get the input as a string. Normalize newlines to \n.
let str = shaka.util.StringUtils.fromUTF8(data);
str = str.replace(/\r\n|\r(?=[^\n]|$)/gm, '\n');
const blocks = str.split(/\n{2,}/m);
if (!/^WEBVTT($|[ \t\n])/m.test(blocks[0])) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_HEADER);
}
let offset = time.segmentStart;
if (offset == null) {
// This is a probe, such as the HLS parser makes. We don't know the
// segment start time, so we will use the X-TIMESTAMP-MAP header, if
// present, to get the segment start time. By only doing this when
// segmentStart == null, we protect against rollover in the MPEGTS field.
// In case the attempt below doesn't work out, assume an offset of 0.
offset = 0;
if (blocks[0].includes('X-TIMESTAMP-MAP')) {
// https://bit.ly/2K92l7y
// The 'X-TIMESTAMP-MAP' header is used in HLS to align text with
// the rest of the media.
// The header format is 'X-TIMESTAMP-MAP=MPEGTS:n,LOCAL:m'
// (the attributes can go in any order)
// where n is MPEG-2 time and m is cue time it maps to.
// For example 'X-TIMESTAMP-MAP=LOCAL:00:00:00.000,MPEGTS:900000'
// means an offset of 10 seconds
// 900000/MPEG_TIMESCALE - cue time.
const cueTimeMatch =
blocks[0].match(/LOCAL:((?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3}))/m);
const mpegTimeMatch = blocks[0].match(/MPEGTS:(\d+)/m);
if (cueTimeMatch && mpegTimeMatch) {
const parser = new shaka.util.TextParser(cueTimeMatch[1]);
const cueTime = shaka.text.VttTextParser.parseTime_(parser);
if (cueTime == null) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_HEADER);
}
const mpegTime = Number(mpegTimeMatch[1]);
const mpegTimescale = shaka.text.VttTextParser.MPEG_TIMESCALE_;
// Apple-encoded HLS content uses absolute timestamps, so assume the
// presence of the map tag means the content uses absolute timestamps.
offset = time.periodStart + (mpegTime / mpegTimescale - cueTime);
}
}
}
// Parse VTT regions.
/* !Array.<!shaka.extern.CueRegion> */
const regions = [];
for (const line of blocks[0].split('\n')) {
if (/^Region:/.test(line)) {
const region = VttTextParser.parseRegion_(line);
regions.push(region);
}
}
// Parse cues.
const ret = [];
for (const block of blocks.slice(1)) {
const lines = block.split('\n');
const cue = VttTextParser.parseCue_(lines, offset, regions);
if (cue) {
ret.push(cue);
if (partial) {
break;
}
}
}
return ret;
}
/**
* Parses a string into a Region object.
*
* @param {string} text
* @return {!shaka.extern.CueRegion}
* @private
*/
static parseRegion_(text) {
const VttTextParser = shaka.text.VttTextParser;
const parser = new shaka.util.TextParser(text);
// The region string looks like this:
// Region: id=fred width=50% lines=3 regionanchor=0%,100%
// viewportanchor=10%,90% scroll=up
const region = new shaka.text.CueRegion();
// Skip 'Region:'
parser.readWord();
parser.skipWhitespace();
let word = parser.readWord();
while (word) {
if (!VttTextParser.parseRegionSetting_(region, word)) {
shaka.log.warning(
'VTT parser encountered an invalid VTTRegion setting: ', word,
' The setting will be ignored.');
}
parser.skipWhitespace();
word = parser.readWord();
}
return region;
}
/**
* Parses a text block into a Cue object.
*
* @param {!Array.<string>} text
* @param {number} timeOffset
* @param {!Array.<!shaka.extern.CueRegion>} regions
* @return {shaka.text.Cue}
* @private
*/
static parseCue_(text, timeOffset, regions) {
const VttTextParser = shaka.text.VttTextParser;
// Skip empty blocks.
if (text.length == 1 && !text[0]) {
return null;
}
// Skip comment blocks.
if (/^NOTE($|[ \t])/.test(text[0])) {
return null;
}
// Skip style blocks.
if (text[0] == 'STYLE') {
return null;
}
let id = null;
if (!text[0].includes('-->')) {
id = text[0];
text.splice(0, 1);
}
// Parse the times.
const parser = new shaka.util.TextParser(text[0]);
let start = VttTextParser.parseTime_(parser);
const expect = parser.readRegex(/[ \t]+-->[ \t]+/g);
let end = VttTextParser.parseTime_(parser);
if (start == null || expect == null || end == null) {
throw new shaka.util.Error(
shaka.util.Error.Severity.CRITICAL,
shaka.util.Error.Category.TEXT,
shaka.util.Error.Code.INVALID_TEXT_CUE);
}
start += timeOffset;
end += timeOffset;
// Get the payload.
const payload = text.slice(1).join('\n').trim();
const cue = new shaka.text.Cue(start, end, payload);
// Parse optional settings.
parser.skipWhitespace();
let word = parser.readWord();
while (word) {
if (!VttTextParser.parseCueSetting(cue, word, regions)) {
shaka.log.warning('VTT parser encountered an invalid VTT setting: ',
word,
' The setting will be ignored.');
}
parser.skipWhitespace();
word = parser.readWord();
}
if (id != null) {
cue.id = id;
}
return cue;
}
/**
* Parses a WebVTT setting from the given word.
*
* @param {!shaka.text.Cue} cue
* @param {string} word
* @param {!Array.<!shaka.text.CueRegion>} regions
* @return {boolean} True on success.
*/
static parseCueSetting(cue, word, regions) {
const VttTextParser = shaka.text.VttTextParser;
let results = null;
if ((results = /^align:(start|middle|center|end|left|right)$/.exec(word))) {
VttTextParser.setTextAlign_(cue, results[1]);
} else if ((results = /^vertical:(lr|rl)$/.exec(word))) {
VttTextParser.setVerticalWritingMode_(cue, results[1]);
} else if ((results = /^size:([\d.]+)%$/.exec(word))) {
cue.size = Number(results[1]);
} else if ((results =
/^position:([\d.]+)%(?:,(line-left|line-right|center|start|end))?$/
.exec(word))) {
cue.position = Number(results[1]);
if (results[2]) {
VttTextParser.setPositionAlign_(cue, results[2]);
}
} else if ((results = /^region:(.*)$/.exec(word))) {
const region = VttTextParser.getRegionById_(regions, results[1]);
if (region) {
cue.region = region;
}
} else {
return VttTextParser.parsedLineValueAndInterpretation_(cue, word);
}
return true;
}
/**
*
* @param {!Array.<!shaka.text.CueRegion>} regions
* @param {string} id
* @return {?shaka.text.CueRegion}
* @private
*/
static getRegionById_(regions, id) {
const regionsWithId = regions.filter((region) => {
return region.id == id;
});
if (!regionsWithId.length) {
shaka.log.warning('VTT parser could not find a region with id: ',
id,
' The region will be ignored.');
return null;
}
goog.asserts.assert(regionsWithId.length == 1,
'VTTRegion ids should be unique!');
return regionsWithId[0];
}
/**
* Parses a WebVTTRegion setting from the given word.
*
* @param {!shaka.text.CueRegion} region
* @param {string} word
* @return {boolean} True on success.
* @private
*/
static parseRegionSetting_(region, word) {
let results = null;
if ((results = /^id=(.*)$/.exec(word))) {
region.id = results[1];
} else if ((results = /^width=(\d{1,2}|100)%$/.exec(word))) {
region.width = Number(results[1]);
} else if ((results = /^lines=(\d+)$/.exec(word))) {
region.height = Number(results[1]);
region.heightUnits = shaka.text.CueRegion.units.LINES;
} else if ((results = /^regionanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
.exec(word))) {
region.regionAnchorX = Number(results[1]);
region.regionAnchorY = Number(results[2]);
} else if ((results = /^viewportanchor=(\d{1,2}|100)%,(\d{1,2}|100)%$/
.exec(word))) {
region.viewportAnchorX = Number(results[1]);
region.viewportAnchorY = Number(results[2]);
} else if ((results = /^scroll=up$/.exec(word))) {
region.scroll = shaka.text.CueRegion.scrollMode.UP;
} else {
return false;
}
return true;
}
/**
* @param {!shaka.text.Cue} cue
* @param {string} align
* @private
*/
static setTextAlign_(cue, align) {
const Cue = shaka.text.Cue;
if (align == 'middle') {
cue.textAlign = Cue.textAlign.CENTER;
} else {
goog.asserts.assert(align.toUpperCase() in Cue.textAlign,
align.toUpperCase() +
' Should be in Cue.textAlign values!');
cue.textAlign = Cue.textAlign[align.toUpperCase()];
}
}
/**
* @param {!shaka.text.Cue} cue
* @param {string} align
* @private
*/
static setPositionAlign_(cue, align) {
const Cue = shaka.text.Cue;
if (align == 'line-left' || align == 'start') {
cue.positionAlign = Cue.positionAlign.LEFT;
} else if (align == 'line-right' || align == 'end') {
cue.positionAlign = Cue.positionAlign.RIGHT;
} else {
cue.positionAlign = Cue.positionAlign.CENTER;
}
}
/**
* @param {!shaka.text.Cue} cue
* @param {string} value
* @private
*/
static setVerticalWritingMode_(cue, value) {
const Cue = shaka.text.Cue;
if (value == 'lr') {
cue.writingMode = Cue.writingMode.VERTICAL_LEFT_TO_RIGHT;
} else {
cue.writingMode = Cue.writingMode.VERTICAL_RIGHT_TO_LEFT;
}
}
/**
* @param {!shaka.text.Cue} cue
* @param {string} word
* @return {boolean}
* @private
*/
static parsedLineValueAndInterpretation_(cue, word) {
const Cue = shaka.text.Cue;
let results = null;
if ((results = /^line:([\d.]+)%(?:,(start|end|center))?$/.exec(word))) {
cue.lineInterpretation = Cue.lineInterpretation.PERCENTAGE;
cue.line = Number(results[1]);
if (results[2]) {
goog.asserts.assert(
results[2].toUpperCase() in Cue.lineAlign,
results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
}
} else if ((results =
/^line:(-?\d+)(?:,(start|end|center))?$/.exec(word))) {
cue.lineInterpretation = Cue.lineInterpretation.LINE_NUMBER;
cue.line = Number(results[1]);
if (results[2]) {
goog.asserts.assert(
results[2].toUpperCase() in Cue.lineAlign,
results[2].toUpperCase() + ' Should be in Cue.lineAlign values!');
cue.lineAlign = Cue.lineAlign[results[2].toUpperCase()];
}
} else {
return false;
}
return true;
}
/**
* Parses a WebVTT time from the given parser.
*
* @param {!shaka.util.TextParser} parser
* @return {?number}
* @private
*/
static parseTime_(parser) {
// 00:00.000 or 00:00:00.000 or 0:00:00.000
const results = parser.readRegex(/(?:(\d{1,}):)?(\d{2}):(\d{2})\.(\d{3})/g);
if (results == null) {
return null;
}
// This capture is optional, but will still be in the array as undefined,
// in which case it is 0.
const hours = Number(results[1]) || 0;
const minutes = Number(results[2]);
const seconds = Number(results[3]);
const milliseconds = Number(results[4]);
if (minutes > 59 || seconds > 59) {
return null;
}
return (milliseconds / 1000) + seconds + (minutes * 60) + (hours * 3600);
}
};
/**
* @const {number}
* @private
*/
shaka.text.VttTextParser.MPEG_TIMESCALE_ = 90000;
shaka.text.TextEngine.registerParser('text/vtt', shaka.text.VttTextParser);
shaka.text.TextEngine.registerParser(
'text/vtt; codecs="vtt"', shaka.text.VttTextParser);