mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-13 15:46:46 +03:00
feat: Add automatic subtitles (#9123)
Internally this uses Web Speech API https://webaudio.github.io/web-speech-api/ and Translator APIs https://webmachinelearning.github.io/translation-api/ The feature is experimental and disabled by default since Chrome is the only browser that currently supports it. Closes https://github.com/shaka-project/shaka-player/issues/9110 --------- Co-authored-by: Wojciech Tyczyński <tykus160@gmail.com> Co-authored-by: Joey Parrish <joeyparrish@google.com> Co-authored-by: Theodore Abshire <TheodoreAbshire@Gmail.com> Co-authored-by: Joey Parrish <joeyparrish@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
d9e9da3f08
commit
61d80f6e59
@@ -324,6 +324,8 @@ requirement: {
|
||||
"Using \"AbortController\" directly is not allowed; "
|
||||
"use PendingRequest.abort instead."
|
||||
whitelist_regexp: "lib/net/http_fetch_plugin.js"
|
||||
whitelist_regexp: "lib/text/speech_to_text.js"
|
||||
whitelist_regexp: "test/text/speech_to_text_unit.js"
|
||||
}
|
||||
|
||||
# Disallow the use of generators, which are a major performance issue. See
|
||||
|
||||
@@ -72,6 +72,7 @@
|
||||
+../../lib/text/cue_region.js
|
||||
+../../lib/text/native_text_displayer.js
|
||||
+../../lib/text/simple_text_displayer.js
|
||||
+../../lib/text/speech_to_text.js
|
||||
+../../lib/text/stub_text_displayer.js
|
||||
+../../lib/text/text_engine.js
|
||||
+../../lib/text/text_utils.js
|
||||
|
||||
@@ -644,6 +644,7 @@ shakaDemo.Config = class {
|
||||
'streaming.returnToEndOfLiveWindowWhenOutside');
|
||||
this.addRetrySection_('streaming', 'Streaming Retry Parameters');
|
||||
this.addLiveSyncSection_();
|
||||
this.addSpeechToTextSection_();
|
||||
}
|
||||
|
||||
/** @private */
|
||||
@@ -685,6 +686,19 @@ shakaDemo.Config = class {
|
||||
'streaming.liveSync.dynamicTargetLatency.minLatency');
|
||||
}
|
||||
|
||||
/** @private */
|
||||
addSpeechToTextSection_() {
|
||||
const docLink = this.resolveExternLink_('.SpeechToTextConfiguration');
|
||||
this.addSection_('Speech to text', docLink);
|
||||
this.addBoolInput_('Speech to text', 'streaming.speechToText.enabled')
|
||||
.addNumberInput_('Max text length (characters)',
|
||||
'streaming.speechToText.maxTextLength')
|
||||
.addBoolInput_('Performed locally on the user’s device',
|
||||
'streaming.speechToText.processLocally')
|
||||
.addArrayStringInput_('Languages to translate into',
|
||||
'streaming.speechToText.languagesToTranslate');
|
||||
}
|
||||
|
||||
/** @private */
|
||||
addNetworkingSection_() {
|
||||
const docLink = this.resolveExternLink_('.NetworkingConfiguration');
|
||||
|
||||
@@ -1035,6 +1035,11 @@ shakaDemo.Main = class {
|
||||
params.get('preferredTextFormats').split(','));
|
||||
}
|
||||
|
||||
if (params.has('streaming.speechToText.languagesToTranslate')) {
|
||||
this.configure('streaming.speechToText.languagesToTranslate',
|
||||
params.get('streaming.speechToText.languagesToTranslate').split(','));
|
||||
}
|
||||
|
||||
// Add compiled/uncompiled links.
|
||||
this.makeVersionLinks_();
|
||||
|
||||
@@ -1562,6 +1567,7 @@ shakaDemo.Main = class {
|
||||
'preferredVideoCodecs',
|
||||
'preferredAudioCodecs',
|
||||
'preferredTextFormats',
|
||||
'streaming.speechToText.languagesToTranslate',
|
||||
];
|
||||
|
||||
for (const key of preferredArray) {
|
||||
|
||||
@@ -28,5 +28,6 @@
|
||||
{ "faq": { "title": "Frequently Asked Questions" } },
|
||||
{ "upgrade": { "title": "Upgrade Guide" } },
|
||||
{ "upgrade-manifest": { "title": "ManifestParser Upgrade Guide" } },
|
||||
{ "lcevc": { "title": "LCEVC Quick Start" } }
|
||||
{ "lcevc": { "title": "LCEVC Quick Start" } },
|
||||
{ "speech-to-text": { "title": "Speech to text" } }
|
||||
]
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
# Speech to text
|
||||
|
||||
#### Requirements
|
||||
|
||||
- Use `setVideoContainer` method in the player.
|
||||
- [Speech Recognition API][] with support for `start(MediaStreamTrack audioTrack)` method.
|
||||
|
||||
Note: It is necessary that the required APIs are present and functional.
|
||||
|
||||
#### Optional requirements
|
||||
|
||||
- [Translator API][].
|
||||
|
||||
|
||||
### How does it work?
|
||||
|
||||
Using the Web Audio API, the audio track is passed to the Speech Recognition module, which returns what was said in the audio at that moment.
|
||||
|
||||
There are two options here, depending on the configuration:
|
||||
- Display the text as is
|
||||
- If a translation was chosen, the text is sent to the Translator module, which returns the translation.
|
||||
|
||||
When this module is activated, only Speech to Text is used by default. If you need it to be translated, you must specify the languages in the configuration.
|
||||
|
||||
The text is rendered inside a container whose class is 'shaka-speech-to-text-container' created inside videoContainer.
|
||||
|
||||
The text is truncated by default, and the number of characters can be configured with `streaming.speechToText.maxTextLength`.
|
||||
|
||||
|
||||
### Configuration
|
||||
- `enabled`: Enable this module.
|
||||
- `maxTextLength`: Number of characters before truncation.
|
||||
- `processLocally`: Indicates a requirement that the speech recognition process MUST be performed locally on the user’s device. If set to false, the user agent can choose between local and remote processing. Note: remote processing is done by the browser and we have no control over what 3rd parties are involved.
|
||||
- `languagesToTranslate`: List of languages to translate into.
|
||||
|
||||
|
||||
### How to differentiate these tracks
|
||||
|
||||
All these tracks have `originalLanguage` equal to `speech-to-text`.
|
||||
|
||||
Track without any translation has `language` equal to `''`.
|
||||
|
||||
When a track is translated it has `language` it is translated into.
|
||||
|
||||
|
||||
### Why don't I see the text track that the translations should have?
|
||||
|
||||
The browser must support [Translator API][], if it does not support it, the tracks will not be created since it is not possible to use this part of this module.
|
||||
|
||||
|
||||
### Why don't I see the translation?
|
||||
|
||||
The translation module must support both the input and output languages. If it doesn't, then nothing will be displayed.
|
||||
|
||||
|
||||
[Speech Recognition API]: https://webaudio.github.io/web-speech-api/
|
||||
[Translator API]: https://webmachinelearning.github.io/translation-api/
|
||||
+59
-1
@@ -1861,6 +1861,61 @@ shaka.extern.DynamicTargetLatencyConfiguration;
|
||||
shaka.extern.LiveSyncConfiguration;
|
||||
|
||||
|
||||
/**
|
||||
* @typedef {{
|
||||
* enabled: boolean,
|
||||
* maxTextLength: number,
|
||||
* processLocally: boolean,
|
||||
* languagesToTranslate: !Array<string>,
|
||||
* }}
|
||||
*
|
||||
* @description
|
||||
* Speech to text configuration options.
|
||||
*
|
||||
* @property {boolean} enabled
|
||||
* If true, creates a new text track that allows speech to text if
|
||||
* supported by the browser.
|
||||
* <br>
|
||||
* This can be useful if your stream doesn't have subtitles and you want them.
|
||||
* <br>
|
||||
* This feature creates a text track that works like any other, but only
|
||||
* renders when a <code>videoContainer</code> is provided to the player.
|
||||
* You can recognize this track by its originalLanguage which is
|
||||
* 'speech-to-text'.
|
||||
* <br>
|
||||
* This functionality might work with SW DRM, but it will never work with
|
||||
* HW DRM.
|
||||
* <br>
|
||||
* This feature is experimental and may not work properly.
|
||||
* <br>
|
||||
* Defaults to <code>false</code>.
|
||||
* @property {number} maxTextLength
|
||||
* Indicates the limit of characters in the text rendered, ensuring that
|
||||
* only complete words are included. If a word is cut at the limit, it is
|
||||
* included in text rendered. Adds '...' at the start if truncation occurs.
|
||||
* <br>
|
||||
* Defaults to <code>140</code>.
|
||||
* @property {boolean} processLocally
|
||||
* When set to true, indicates a requirement that the speech recognition
|
||||
* process MUST be performed locally on the user’s device. If set to false,
|
||||
* the user agent can choose between local and remote processing.
|
||||
* Note: remote processing is done by the browser and we have no control
|
||||
* over what 3rd parties are involved.
|
||||
* <br>
|
||||
* Defaults to <code>false</code>.
|
||||
* @property {!Array<string>} languagesToTranslate
|
||||
* List of languages to translate into if the browser supports translation
|
||||
* APIs.
|
||||
* <br>
|
||||
* Each language in this list will create a new track.
|
||||
* <br>
|
||||
* Defaults to <code>[]</code>.
|
||||
*
|
||||
* @exportDoc
|
||||
*/
|
||||
shaka.extern.SpeechToTextConfiguration;
|
||||
|
||||
|
||||
/**
|
||||
* @typedef {{
|
||||
* retryParameters: shaka.extern.RetryParameters,
|
||||
@@ -1906,7 +1961,8 @@ shaka.extern.LiveSyncConfiguration;
|
||||
* shouldFixTimestampOffset: boolean,
|
||||
* avoidEvictionOnQuotaExceededError: boolean,
|
||||
* crossBoundaryStrategy: shaka.config.CrossBoundaryStrategy,
|
||||
* returnToEndOfLiveWindowWhenOutside: boolean
|
||||
* returnToEndOfLiveWindowWhenOutside: boolean,
|
||||
* speechToText: shaka.extern.SpeechToTextConfiguration,
|
||||
* }}
|
||||
*
|
||||
* @description
|
||||
@@ -2153,6 +2209,8 @@ shaka.extern.LiveSyncConfiguration;
|
||||
* it will be moved to the end of the live window, instead of the start.
|
||||
* <br>
|
||||
* Defaults to <code>false</code>.
|
||||
* @property {shaka.extern.SpeechToTextConfiguration} speechToText
|
||||
* The speech to text configuration.
|
||||
* @exportDoc
|
||||
*/
|
||||
shaka.extern.StreamingConfiguration;
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
/*! @license
|
||||
* Shaka Player
|
||||
* Copyright 2016 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview Externs for new SpeechRecognition APIs in Chrome.
|
||||
* @see https://webaudio.github.io/web-speech-api/#dom-speechrecognition-start
|
||||
* @externs
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
* @type {boolean}
|
||||
*/
|
||||
SpeechRecognition.prototype.processLocally;
|
||||
|
||||
/**
|
||||
* Not a real class, but Chrome is incubating a new parameter on start(), and
|
||||
* the only way we found to override the method from the existing Closure
|
||||
* externs is to define a subclass and use override.
|
||||
* @override
|
||||
* @param {MediaStreamTrack=} mediaStreamTrack
|
||||
*/
|
||||
var ChromeSpeechRecognition = class extends SpeechRecognition {};
|
||||
|
||||
/**
|
||||
* @override
|
||||
* @param {MediaStreamTrack=} mediaStreamTrack
|
||||
*/
|
||||
ChromeSpeechRecognition.prototype.start = function(mediaStreamTrack) {};
|
||||
@@ -0,0 +1,120 @@
|
||||
/*! @license
|
||||
* Shaka Player
|
||||
* Copyright 2025 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
/**
|
||||
* @fileoverview Externs for Translator and Language Detector APIs.
|
||||
*
|
||||
* @externs
|
||||
*/
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
*/
|
||||
function Translator() {}
|
||||
|
||||
/**
|
||||
@type {number}
|
||||
*/
|
||||
Translator.prototype.inputQuota;
|
||||
|
||||
/**
|
||||
@type {string}
|
||||
*/
|
||||
Translator.prototype.sourceLanguage;
|
||||
|
||||
/**
|
||||
@type {string}
|
||||
*/
|
||||
Translator.prototype.targetLanguage;
|
||||
|
||||
/**
|
||||
@return {void}
|
||||
*/
|
||||
Translator.prototype.destroy = function() {};
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<number>}
|
||||
*/
|
||||
Translator.prototype.measureInputUsage = function(text) {};
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<string>}
|
||||
*/
|
||||
Translator.prototype.translate = function(text) {};
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!ReadableStream<string>}
|
||||
*/
|
||||
Translator.prototype.translateStreaming = function(text) {};
|
||||
|
||||
/**
|
||||
* @param {(Object|null)=} options
|
||||
* @return {!Promise<string>}
|
||||
*/
|
||||
Translator.availability = function(options) {};
|
||||
|
||||
/**
|
||||
* @param {(Object|null)=} options
|
||||
* @return {!Promise<!Translator>}
|
||||
*/
|
||||
Translator.create = function(options) {};
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
*/
|
||||
function LanguageDetector() {}
|
||||
|
||||
/**
|
||||
@type {number}
|
||||
*/
|
||||
LanguageDetector.prototype.inputQuota;
|
||||
|
||||
/**
|
||||
@return {void}
|
||||
*/
|
||||
LanguageDetector.prototype.destroy = function() {};
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<number>}
|
||||
*/
|
||||
LanguageDetector.prototype.measureInputUsage = function(text) {};
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<Array<{detectedLanguage: string, confidence: number}>>}
|
||||
*/
|
||||
LanguageDetector.prototype.detect = function(text) {};
|
||||
|
||||
/**
|
||||
* @param {(Object|null)=} options
|
||||
* @return {!Promise<string>}
|
||||
*/
|
||||
LanguageDetector.availability = function(options) {};
|
||||
|
||||
/**
|
||||
* @param {(Object|null)=} options
|
||||
* @return {!Promise<!LanguageDetector>}
|
||||
*/
|
||||
LanguageDetector.create = function(options) {};
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
*/
|
||||
function CreateMonitor() {}
|
||||
|
||||
/**
|
||||
* @constructor
|
||||
*/
|
||||
function DownloadProgressEvent() {}
|
||||
|
||||
/**
|
||||
@type {number}
|
||||
*/
|
||||
DownloadProgressEvent.prototype.loaded;
|
||||
+63
-3
@@ -37,6 +37,7 @@ goog.require('shaka.net.NetworkingUtils');
|
||||
goog.require('shaka.text.Cue');
|
||||
goog.require('shaka.text.NativeTextDisplayer');
|
||||
goog.require('shaka.text.SimpleTextDisplayer');
|
||||
goog.require('shaka.text.SpeechToText');
|
||||
goog.require('shaka.text.StubTextDisplayer');
|
||||
goog.require('shaka.text.TextEngine');
|
||||
goog.require('shaka.text.Utils');
|
||||
@@ -1048,6 +1049,9 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
|
||||
/** @private {?shaka.extern.TextDisplayer} */
|
||||
this.textDisplayer_ = null;
|
||||
|
||||
/** @private {?shaka.text.SpeechToText} */
|
||||
this.speechToText_ = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -1180,6 +1184,11 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
this.queueManager_ = null;
|
||||
}
|
||||
|
||||
if (this.speechToText_) {
|
||||
this.speechToText_.release();
|
||||
this.speechToText_ = null;
|
||||
}
|
||||
|
||||
// FakeEventTarget implements IReleasable
|
||||
super.release();
|
||||
}
|
||||
@@ -1592,6 +1601,10 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
}
|
||||
this.isTextVisible_ = false;
|
||||
|
||||
if (this.speechToText_) {
|
||||
this.speechToText_.disable();
|
||||
}
|
||||
|
||||
if (this.video_) {
|
||||
// The life cycle of tracks that created by addTextTrackAsync() and
|
||||
// their associated resources should be the same as the loaded video.
|
||||
@@ -4736,6 +4749,28 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
if (this.queueManager_) {
|
||||
this.queueManager_.configure(this.config_.queue);
|
||||
}
|
||||
|
||||
const loaded = this.loadMode_ == shaka.Player.LoadMode.MEDIA_SOURCE ||
|
||||
this.loadMode_ == shaka.Player.LoadMode.SRC_EQUALS;
|
||||
if (this.config_.streaming.speechToText.enabled) {
|
||||
if (!this.speechToText_) {
|
||||
this.speechToText_ = new shaka.text.SpeechToText(this);
|
||||
this.speechToText_.configure(this.config_.streaming.speechToText);
|
||||
if (loaded) {
|
||||
this.onTextChanged_();
|
||||
}
|
||||
} else {
|
||||
this.speechToText_.configure(this.config_.streaming.speechToText);
|
||||
}
|
||||
} else {
|
||||
if (this.speechToText_) {
|
||||
this.speechToText_.release();
|
||||
this.speechToText_ = null;
|
||||
if (loaded) {
|
||||
this.onTextChanged_();
|
||||
}
|
||||
}
|
||||
}
|
||||
this.applyCriteriaConfigChanges_(prevConfig);
|
||||
}
|
||||
|
||||
@@ -5442,13 +5477,20 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
|
||||
tracks.push(track);
|
||||
}
|
||||
|
||||
if (this.speechToText_) {
|
||||
tracks.push(...this.speechToText_.getTextTracks());
|
||||
}
|
||||
return tracks;
|
||||
}
|
||||
} else if (this.video_ && this.video_.src && this.video_.textTracks) {
|
||||
const textTracks = this.getFilteredTextTracks_();
|
||||
const StreamUtils = shaka.util.StreamUtils;
|
||||
return textTracks.map((text) => StreamUtils.html5TextTrackToTrack(text));
|
||||
const tracks =
|
||||
textTracks.map((text) => StreamUtils.html5TextTrackToTrack(text));
|
||||
if (this.speechToText_) {
|
||||
tracks.push(...this.speechToText_.getTextTracks());
|
||||
}
|
||||
return tracks;
|
||||
} else {
|
||||
return [];
|
||||
}
|
||||
@@ -5692,7 +5734,18 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
* @export
|
||||
*/
|
||||
selectTextTrack(track) {
|
||||
if (track == null) {
|
||||
let isSpeechToText = false;
|
||||
if (this.speechToText_ && track != null) {
|
||||
const speechToTextTracks = this.speechToText_.getTextTracks();
|
||||
isSpeechToText = speechToTextTracks.includes(track);
|
||||
}
|
||||
if (this.speechToText_ && !isSpeechToText) {
|
||||
this.speechToText_.disable();
|
||||
}
|
||||
if (track == null || isSpeechToText) {
|
||||
if (this.speechToText_ && isSpeechToText && track != null) {
|
||||
this.speechToText_.enable(track);
|
||||
}
|
||||
this.onTextChanged_();
|
||||
this.setTextTrackVisibility(false);
|
||||
return;
|
||||
@@ -7764,6 +7817,13 @@ shaka.Player = class extends shaka.util.FakeEventTarget {
|
||||
this.videoContainer_ = videoContainer;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {HTMLElement}
|
||||
*/
|
||||
getVideoContainer() {
|
||||
return this.videoContainer_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!shaka.util.Error} error
|
||||
* @private
|
||||
|
||||
@@ -0,0 +1,599 @@
|
||||
/*! @license
|
||||
* Shaka Player
|
||||
* Copyright 2016 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
goog.provide('shaka.text.SpeechToText');
|
||||
|
||||
goog.require('goog.asserts');
|
||||
goog.require('shaka.log');
|
||||
goog.require('shaka.util.ArrayUtils');
|
||||
goog.require('shaka.util.Dom');
|
||||
goog.require('shaka.util.EventManager');
|
||||
goog.require('shaka.util.FakeEvent');
|
||||
goog.require('shaka.util.IReleasable');
|
||||
goog.require('shaka.util.ManifestParserUtils');
|
||||
goog.require('shaka.util.Lazy');
|
||||
goog.require('shaka.util.Timer');
|
||||
goog.requireType('shaka.Player');
|
||||
|
||||
|
||||
/**
|
||||
* @implements {shaka.util.IReleasable}
|
||||
*/
|
||||
shaka.text.SpeechToText = class {
|
||||
/**
|
||||
* @param {shaka.Player} player
|
||||
*/
|
||||
constructor(player) {
|
||||
/** @private {?shaka.Player} */
|
||||
this.player_ = player;
|
||||
|
||||
/** @private {?shaka.extern.SpeechToTextConfiguration} */
|
||||
this.config_ = null;
|
||||
|
||||
/** @private {!shaka.util.EventManager} */
|
||||
this.eventManager_ = new shaka.util.EventManager();
|
||||
|
||||
/** @private {boolean} */
|
||||
this.supported_ =
|
||||
shaka.text.SpeechToText.isMediaStreamTrackSupported.value();
|
||||
|
||||
/** @type {HTMLElement} */
|
||||
this.textContainer_ = this.getTextContainer_();
|
||||
|
||||
/** @private {boolean} */
|
||||
this.enabled_ = false;
|
||||
|
||||
/** @private {?ChromeSpeechRecognition} */
|
||||
this.recognition_ = null;
|
||||
|
||||
/** @private {?Translator} */
|
||||
this.translator_ = null;
|
||||
|
||||
/** @private {?AbortController} */
|
||||
this.translatorAbortController_ = null;
|
||||
|
||||
/** @private {boolean} */
|
||||
this.needTranslator_ = false;
|
||||
|
||||
/** @private {!shaka.util.EventManager} */
|
||||
this.recognitionEventManager_ = new shaka.util.EventManager();
|
||||
|
||||
/** @private {shaka.util.Timer} */
|
||||
this.recognitionTimer_ = new shaka.util.Timer(() => {
|
||||
this.stopRecognition_();
|
||||
this.onAudioTrackChange_();
|
||||
});
|
||||
|
||||
/** @private {number} */
|
||||
this.nextTextTrackId_ = 1e15;
|
||||
|
||||
/** @private {shaka.extern.TextTrack} */
|
||||
this.basicTextTrack_ = this.createTextTrack_();
|
||||
|
||||
/** @private {!Array<shaka.extern.TextTrack>} */
|
||||
this.textTracks_ = [
|
||||
this.basicTextTrack_,
|
||||
];
|
||||
|
||||
/** @private {?number} */
|
||||
this.activeTrackId_ = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {shaka.extern.SpeechToTextConfiguration} config
|
||||
*/
|
||||
configure(config) {
|
||||
this.config_ = config;
|
||||
this.checkTextTrackChanges_();
|
||||
}
|
||||
|
||||
/**
|
||||
* @override
|
||||
*/
|
||||
release() {
|
||||
this.activeTrackId_ = null;
|
||||
this.eventManager_.removeAll();
|
||||
this.stopRecognition_();
|
||||
this.player_ = null;
|
||||
this.eventManager_.release();
|
||||
|
||||
// Remove the text container element from the UI.
|
||||
if (this.textContainer_ && this.textContainer_.parentElement) {
|
||||
this.textContainer_.remove();
|
||||
this.textContainer_ = null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable speech to text.
|
||||
*
|
||||
* @param {!shaka.extern.TextTrack} track
|
||||
*/
|
||||
enable(track) {
|
||||
if (!this.supported_) {
|
||||
return;
|
||||
}
|
||||
if (!this.textContainer_) {
|
||||
this.textContainer_ = this.getTextContainer_();
|
||||
}
|
||||
if (!this.textContainer_) {
|
||||
return;
|
||||
}
|
||||
if (this.enabled_ && track.id == this.activeTrackId_) {
|
||||
return;
|
||||
}
|
||||
|
||||
this.enabled_ = true;
|
||||
this.activeTrackId_ = track.id;
|
||||
|
||||
this.eventManager_.listen(this.player_, 'audiotrackschanged', () => {
|
||||
this.onAudioTrackChange_();
|
||||
});
|
||||
|
||||
const mediaElement = this.player_.getMediaElement();
|
||||
|
||||
this.eventManager_.listen(mediaElement, 'seeking', () => {
|
||||
this.stopRecognition_();
|
||||
this.onAudioTrackChange_();
|
||||
});
|
||||
|
||||
this.eventManager_.listen(mediaElement, 'pause', () => {
|
||||
this.stopRecognition_(/* removeRendered= */ false);
|
||||
});
|
||||
|
||||
this.eventManager_.listen(mediaElement, 'play', () => {
|
||||
this.onAudioTrackChange_();
|
||||
});
|
||||
|
||||
if (!mediaElement.paused) {
|
||||
this.onAudioTrackChange_();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Disable speech to text.
|
||||
*/
|
||||
disable() {
|
||||
if (!this.enabled_) {
|
||||
return;
|
||||
}
|
||||
this.enabled_ = false;
|
||||
this.activeTrackId_ = null;
|
||||
this.eventManager_.removeAll();
|
||||
this.stopRecognition_();
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {boolean}
|
||||
*/
|
||||
isEnabled() {
|
||||
return this.enabled_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {boolean}
|
||||
*/
|
||||
isSupported() {
|
||||
if (!this.supported_) {
|
||||
return false;
|
||||
}
|
||||
if (!this.textContainer_) {
|
||||
this.textContainer_ = this.getTextContainer_();
|
||||
}
|
||||
if (!this.textContainer_) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {!Array<shaka.extern.TextTrack>}
|
||||
*/
|
||||
getTextTracks() {
|
||||
if (!this.isSupported()) {
|
||||
return [];
|
||||
}
|
||||
for (const textTrack of this.textTracks_) {
|
||||
textTrack.active = textTrack.id == this.activeTrackId_;
|
||||
}
|
||||
return this.textTracks_;
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
onAudioTrackChange_() {
|
||||
this.removeRenderedText_();
|
||||
const audioTracks = this.player_.getAudioTracks();
|
||||
if (audioTracks.length) {
|
||||
const mediaStreamTrack = this.getAudioTrackFromMediaElement_();
|
||||
if (!mediaStreamTrack) {
|
||||
return;
|
||||
}
|
||||
const activeAudioTrack = audioTracks.find((t) => t.active);
|
||||
let sourceLanguage = 'en';
|
||||
if (activeAudioTrack && activeAudioTrack.language &&
|
||||
activeAudioTrack.language != 'und') {
|
||||
sourceLanguage = activeAudioTrack.language;
|
||||
}
|
||||
const activeTextTrack =
|
||||
this.textTracks_.find((t) => t.id == this.activeTrackId_);
|
||||
let targetLanguage = '';
|
||||
if (activeTextTrack && activeTextTrack.language &&
|
||||
activeTextTrack.language != 'und') {
|
||||
targetLanguage = activeTextTrack.language;
|
||||
}
|
||||
|
||||
if (this.recognition_ && this.recognition_.lang == sourceLanguage) {
|
||||
this.setupTranslator_(sourceLanguage, targetLanguage).catch(() => {});
|
||||
return;
|
||||
}
|
||||
this.initRecognition_(mediaStreamTrack, sourceLanguage, targetLanguage);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!MediaStreamTrack} mediaStreamTrack
|
||||
* @param {string} sourceLanguage
|
||||
* @param {string} targetLanguage
|
||||
* @private
|
||||
*/
|
||||
initRecognition_(mediaStreamTrack, sourceLanguage, targetLanguage) {
|
||||
goog.asserts.assert(this.config_, 'Config must not be null!');
|
||||
|
||||
this.stopRecognition_();
|
||||
|
||||
this.setupTranslator_(sourceLanguage, targetLanguage).catch(() => {});
|
||||
|
||||
const SpeechRecognition =
|
||||
window.SpeechRecognition || window.webkitSpeechRecognition;
|
||||
|
||||
this.recognition_ = /** @type {ChromeSpeechRecognition} */(
|
||||
new SpeechRecognition());
|
||||
|
||||
this.recognition_.lang = sourceLanguage;
|
||||
this.recognition_.continuous = true;
|
||||
this.recognition_.interimResults = true;
|
||||
this.recognition_.processLocally = this.config_.processLocally;
|
||||
|
||||
this.recognitionEventManager_.listen(this.recognition_, 'start', () => {
|
||||
shaka.log.debug('Speech to text: start', sourceLanguage);
|
||||
this.recognitionTimer_.tickAfter(/* seconds= */ 5);
|
||||
});
|
||||
this.recognitionEventManager_.listen(this.recognition_, 'result',
|
||||
async (e) => {
|
||||
goog.asserts.assert(this.config_, 'Config must not be null!');
|
||||
const event = /** @type {SpeechRecognitionEvent} */(e);
|
||||
let text = '';
|
||||
for (let i = event.resultIndex; i < event.results.length; i++) {
|
||||
// The Web Speech API adds appropriate leading/trailing
|
||||
// whitespace.
|
||||
text += event.results[i][0].transcript;
|
||||
}
|
||||
if (this.needTranslator_) {
|
||||
if (this.translator_) {
|
||||
try {
|
||||
text = await this.translator_.translate(text);
|
||||
} catch (e) {
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
if (this.textContainer_) {
|
||||
this.removeRenderedText_();
|
||||
const elem = shaka.util.Dom.createHTMLElement('span');
|
||||
elem.setAttribute('translate', 'no');
|
||||
elem.style.backgroundColor = 'rgba(0, 0, 0, 0.8)';
|
||||
elem.style.padding = '0px 5px';
|
||||
elem.style.margin = '2.5% 5%';
|
||||
elem.textContent =
|
||||
this.truncateLastWords_(text, this.config_.maxTextLength);
|
||||
this.textContainer_.appendChild(elem);
|
||||
}
|
||||
this.recognitionTimer_.tickAfter(/* seconds= */ 0.75);
|
||||
});
|
||||
this.recognitionEventManager_.listen(this.recognition_, 'error', (e) => {
|
||||
this.removeRenderedText_();
|
||||
shaka.log.debug('Speech to text: error', e);
|
||||
});
|
||||
this.recognitionEventManager_.listen(this.recognition_, 'end', () => {
|
||||
shaka.log.debug('Speech to text: end', sourceLanguage);
|
||||
this.initRecognition_(mediaStreamTrack, sourceLanguage, targetLanguage);
|
||||
});
|
||||
this.recognition_.start(mediaStreamTrack);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} sourceLanguage
|
||||
* @param {string} targetLanguage
|
||||
* @return {!Promise}
|
||||
* @private
|
||||
*/
|
||||
async setupTranslator_(sourceLanguage, targetLanguage) {
|
||||
if (this.translatorAbortController_) {
|
||||
this.translatorAbortController_.abort();
|
||||
this.translatorAbortController_ = null;
|
||||
}
|
||||
if (this.translator_) {
|
||||
this.translator_.destroy();
|
||||
this.translator_ = null;
|
||||
}
|
||||
if (targetLanguage && sourceLanguage != targetLanguage &&
|
||||
'Translator' in window) {
|
||||
this.needTranslator_ = true;
|
||||
this.translatorAbortController_ = new AbortController();
|
||||
const signal = this.translatorAbortController_.signal;
|
||||
try {
|
||||
this.translator_ = await Translator.create({
|
||||
sourceLanguage: sourceLanguage,
|
||||
targetLanguage: targetLanguage,
|
||||
signal: signal,
|
||||
});
|
||||
} catch (err) {
|
||||
if (!err.name || err.name !== 'AbortError') {
|
||||
const languages = {
|
||||
sourceLanguage: sourceLanguage,
|
||||
targetLanguage: targetLanguage,
|
||||
};
|
||||
shaka.log.error('Error creating Translator', languages, err);
|
||||
if (err.name == 'NotSupportedError') {
|
||||
this.stopRecognition_();
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
this.needTranslator_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {boolean=} removeRendered
|
||||
* @private
|
||||
*/
|
||||
stopRecognition_(removeRendered = true) {
|
||||
this.recognitionEventManager_.removeAll();
|
||||
this.recognitionTimer_.stop();
|
||||
this.needTranslator_ = false;
|
||||
if (this.translatorAbortController_) {
|
||||
this.translatorAbortController_.abort();
|
||||
this.translatorAbortController_ = null;
|
||||
}
|
||||
if (this.translator_) {
|
||||
this.translator_.destroy();
|
||||
this.translator_ = null;
|
||||
}
|
||||
if (this.recognition_) {
|
||||
this.recognition_.stop();
|
||||
this.recognition_ = null;
|
||||
}
|
||||
if (removeRendered) {
|
||||
this.removeRenderedText_();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @private
|
||||
*/
|
||||
removeRenderedText_() {
|
||||
if (this.textContainer_) {
|
||||
shaka.util.Dom.removeAllChildren(this.textContainer_);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncates a string to the last `limit` characters, ensuring that only
|
||||
* complete words are included. If a word is cut at the limit, it is included
|
||||
* in full. Adds '...' at the start if truncation occurs.
|
||||
*
|
||||
* @param {string} text - The input string to truncate.
|
||||
* @param {number} limit - The maximum number of characters to consider from
|
||||
* the end of the string.
|
||||
* @return {string} The truncated string, starting at the first complete word
|
||||
* within the limit, and prefixed with '...' if truncation
|
||||
* was applied.
|
||||
* @private
|
||||
*/
|
||||
truncateLastWords_(text, limit) {
|
||||
if (text.length <= limit) {
|
||||
return text;
|
||||
}
|
||||
|
||||
// Start from the position where the last `limit` characters begin
|
||||
let start = text.length - limit;
|
||||
|
||||
// Move backwards to the start of the word if we are in the middle of one
|
||||
while (start > 0 && text[start - 1] !== ' ') {
|
||||
start--;
|
||||
}
|
||||
|
||||
// Take the substring from the found position to the end
|
||||
const result = text.slice(start).trimStart();
|
||||
|
||||
// Add '...' at the start to indicate truncation
|
||||
return '...' + result;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {?MediaStreamTrack}
|
||||
* @private
|
||||
*/
|
||||
getAudioTrackFromMediaElement_() {
|
||||
const mediaElement = this.player_.getMediaElement();
|
||||
if (!mediaElement) {
|
||||
return null;
|
||||
}
|
||||
if (!shaka.text.SpeechToText.audioObjectMap_.has(mediaElement)) {
|
||||
const AudioContext = window.AudioContext || window.webkitAudioContext;
|
||||
const audioContext = new AudioContext();
|
||||
const sourceNode = audioContext.createMediaElementSource(mediaElement);
|
||||
const destinationNode = audioContext.createMediaStreamDestination();
|
||||
sourceNode.connect(destinationNode);
|
||||
sourceNode.connect(audioContext.destination);
|
||||
const audioTrack = destinationNode.stream.getAudioTracks()[0];
|
||||
shaka.text.SpeechToText.audioObjectMap_.set(mediaElement, {
|
||||
audioContext,
|
||||
sourceNode,
|
||||
destinationNode,
|
||||
audioTrack,
|
||||
});
|
||||
}
|
||||
const audioObject =
|
||||
shaka.text.SpeechToText.audioObjectMap_.get(mediaElement);
|
||||
return audioObject.audioTrack;
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {?HTMLElement}
|
||||
* @private
|
||||
*/
|
||||
getTextContainer_() {
|
||||
const videoContainer = this.player_.getVideoContainer();
|
||||
if (!videoContainer) {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** @type {HTMLElement} */
|
||||
const textContainer = shaka.util.Dom.createHTMLElement('div');
|
||||
textContainer.classList.add('shaka-speech-to-text-container');
|
||||
|
||||
// Set the subtitles text-centered by default.
|
||||
textContainer.style.textAlign = 'center';
|
||||
|
||||
// Set the captions in the middle horizontally by default.
|
||||
textContainer.style.display = 'flex';
|
||||
textContainer.style.flexDirection = 'column';
|
||||
textContainer.style.alignItems = 'center';
|
||||
|
||||
// Set the captions at the bottom by default.
|
||||
textContainer.style.justifyContent = 'flex-end';
|
||||
|
||||
videoContainer.appendChild(textContainer);
|
||||
|
||||
return textContainer;
|
||||
}
|
||||
|
||||
/** @private */
|
||||
checkTextTrackChanges_() {
|
||||
goog.asserts.assert(this.config_, 'Config must not be null!');
|
||||
|
||||
const existingTrackLanguages =
|
||||
this.textTracks_.map((t) => t.language).filter((t) => t);
|
||||
const languageChanges = !shaka.util.ArrayUtils.hasSameElements(
|
||||
this.config_.languagesToTranslate, existingTrackLanguages);
|
||||
|
||||
if (languageChanges && 'Translator' in window) {
|
||||
this.textTracks_ = this.textTracks_.filter((t) => {
|
||||
if (t.id == this.basicTextTrack_.id) {
|
||||
return true;
|
||||
}
|
||||
if (this.config_.languagesToTranslate.includes(t.language)) {
|
||||
return true;
|
||||
}
|
||||
if (t.id == this.activeTrackId_) {
|
||||
this.disable();
|
||||
}
|
||||
return false;
|
||||
});
|
||||
for (const language of this.config_.languagesToTranslate) {
|
||||
let track = this.textTracks_.find((t) => t.language == language);
|
||||
if (!track) {
|
||||
track = this.createTextTrack_();
|
||||
track.language = language;
|
||||
this.textTracks_.push(track);
|
||||
}
|
||||
}
|
||||
|
||||
const event = new shaka.util.FakeEvent(
|
||||
shaka.util.FakeEvent.EventName.TextChanged);
|
||||
this.player_.dispatchEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {shaka.extern.TextTrack}
|
||||
* @private
|
||||
*/
|
||||
createTextTrack_() {
|
||||
return {
|
||||
id: this.nextTextTrackId_++,
|
||||
active: false,
|
||||
type: shaka.util.ManifestParserUtils.ContentType.TEXT,
|
||||
bandwidth: 0,
|
||||
language: '',
|
||||
label: null,
|
||||
kind: null,
|
||||
mimeType: null,
|
||||
codecs: null,
|
||||
primary: false,
|
||||
roles: [],
|
||||
accessibilityPurpose: null,
|
||||
forced: false,
|
||||
originalTextId: null,
|
||||
originalLanguage: 'speech-to-text',
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @typedef {{
|
||||
* audioContext: AudioContext,
|
||||
* sourceNode: MediaElementAudioSourceNode,
|
||||
* destinationNode: MediaStreamAudioDestinationNode,
|
||||
* audioTrack: MediaStreamTrack,
|
||||
* }}
|
||||
*/
|
||||
shaka.text.SpeechToText.AudioObject;
|
||||
|
||||
/**
|
||||
* For now, we never clean this up because if we close the context and
|
||||
* disconnect from the source, the audio from the video element never
|
||||
* works again.
|
||||
*
|
||||
* @const {!Map<!HTMLMediaElement, shaka.text.SpeechToText.AudioObject>}
|
||||
* @private
|
||||
*/
|
||||
shaka.text.SpeechToText.audioObjectMap_ = new Map();
|
||||
|
||||
/**
|
||||
* @const {!shaka.util.Lazy.<boolean>}
|
||||
*/
|
||||
shaka.text.SpeechToText.isMediaStreamTrackSupported =
|
||||
new shaka.util.Lazy(() => {
|
||||
// To avoid a permission prompt, we do this test in a temporary iframe.
|
||||
// Lazy() will make sure it only happens once, and only on demand.
|
||||
/** @type {HTMLIFrameElement} */
|
||||
const frame = shaka.util.Dom.asHTMLIFrameElement(
|
||||
document.body.appendChild(document.createElement('iframe')));
|
||||
const contentWindow = frame.contentWindow;
|
||||
const SpeechRecognition = contentWindow.SpeechRecognition ||
|
||||
contentWindow.webkitSpeechRecognition;
|
||||
if (!SpeechRecognition) {
|
||||
frame.remove();
|
||||
return false;
|
||||
}
|
||||
|
||||
// Run this with the iframe detached from the DOM.
|
||||
const recognition = /** @type {ChromeSpeechRecognition} */(
|
||||
new SpeechRecognition());
|
||||
frame.remove();
|
||||
|
||||
try {
|
||||
// If the new parameter is not used, this start() call succeeds,
|
||||
// because the 0 gets ignored. If this were running in the main
|
||||
// window, we would get a microphone permission prompt, but the iframe
|
||||
// keeps this silent by denying permission immediately.
|
||||
recognition.start(/** @type {MediaStreamTrack} */(/** @type {?} */(0)));
|
||||
recognition.stop();
|
||||
return false;
|
||||
} catch (error) {
|
||||
// If the new parameter is checked, we get a TypeError because 0 isn't
|
||||
// a MediaStreamTrack.
|
||||
return error.name == 'TypeError';
|
||||
}
|
||||
});
|
||||
@@ -126,6 +126,17 @@ shaka.util.Dom = class {
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Cast a Node/Element to an HTMLIFrameElement
|
||||
*
|
||||
* @param {!Node|!Element} original
|
||||
* @return {!HTMLIFrameElement}
|
||||
*/
|
||||
static asHTMLIFrameElement(original) {
|
||||
return /** @type {!HTMLIFrameElement}*/ (original);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the element with a given class name.
|
||||
* Assumes the class name to be unique for a given parent.
|
||||
|
||||
@@ -267,6 +267,12 @@ shaka.util.PlayerConfiguration = class {
|
||||
avoidEvictionOnQuotaExceededError: false,
|
||||
crossBoundaryStrategy: shaka.config.CrossBoundaryStrategy.KEEP,
|
||||
returnToEndOfLiveWindowWhenOutside: false,
|
||||
speechToText: {
|
||||
enabled: false,
|
||||
maxTextLength: 140,
|
||||
processLocally: false,
|
||||
languagesToTranslate: [],
|
||||
},
|
||||
};
|
||||
|
||||
const networking = {
|
||||
|
||||
@@ -67,6 +67,7 @@ goog.require('shaka.text.Mp4TtmlParser');
|
||||
goog.require('shaka.text.Mp4VttParser');
|
||||
goog.require('shaka.text.TextEngine');
|
||||
goog.require('shaka.text.SbvTextParser');
|
||||
goog.require('shaka.text.SpeechToText');
|
||||
goog.require('shaka.text.SrtTextParser');
|
||||
goog.require('shaka.text.SsaTextParser');
|
||||
goog.require('shaka.text.TtmlTextParser');
|
||||
|
||||
@@ -26,6 +26,7 @@ describe('CastUtils', () => {
|
||||
'getManifest', // Too large to proxy
|
||||
'getManifestParserFactory', // Would not serialize.
|
||||
'setVideoContainer',
|
||||
'getVideoContainer',
|
||||
'getActiveSessionsMetadata',
|
||||
'releaseAllMutexes', // Very specific to the inner workings of the player.
|
||||
'detachAndSavePreload',
|
||||
|
||||
@@ -38,7 +38,8 @@ describe('Demo', () => {
|
||||
const exceptions = new Set()
|
||||
.add('preferredAudioCodecs')
|
||||
.add('preferredVideoCodecs')
|
||||
.add('preferredTextFormats');
|
||||
.add('preferredTextFormats')
|
||||
.add('streaming.speechToText.languagesToTranslate');
|
||||
// We determine whether a config option has been made or not by looking at
|
||||
// which config values have been queried (via the fake main object's
|
||||
// |getCurrentConfigValue| method).
|
||||
|
||||
@@ -0,0 +1,374 @@
|
||||
/*! @license
|
||||
* Shaka Player
|
||||
* Copyright 2016 Google LLC
|
||||
* SPDX-License-Identifier: Apache-2.0
|
||||
*/
|
||||
|
||||
describe('SpeechToText', () => {
|
||||
/** @type {!HTMLVideoElement} */
|
||||
let video;
|
||||
/** @type {!shaka.Player} */
|
||||
let player;
|
||||
/** @type {shaka.extern.SpeechToTextConfiguration} */
|
||||
let config;
|
||||
/** @type {shaka.text.SpeechToText} */
|
||||
let speechToText;
|
||||
|
||||
const originalSpeechRecognition = window.SpeechRecognition;
|
||||
const originalTranslator = window.Translator;
|
||||
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
const originalAppendChild = Node.prototype.appendChild;
|
||||
|
||||
beforeAll(() => {
|
||||
video = shaka.test.UiUtils.createVideoElement();
|
||||
document.body.appendChild(video);
|
||||
});
|
||||
|
||||
beforeEach(async () => {
|
||||
shaka.text.SpeechToText.isMediaStreamTrackSupported.reset();
|
||||
|
||||
player = new shaka.Player();
|
||||
await player.attach(video);
|
||||
|
||||
const defaultConfig = shaka.util.PlayerConfiguration.createDefault();
|
||||
|
||||
config = defaultConfig.streaming.speechToText;
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
window.SpeechRecognition = originalSpeechRecognition;
|
||||
window.Translator = originalTranslator;
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
Node.prototype.appendChild = originalAppendChild;
|
||||
if (speechToText) {
|
||||
speechToText.release();
|
||||
}
|
||||
await player.unload();
|
||||
await player.destroy();
|
||||
});
|
||||
|
||||
afterAll(() => {
|
||||
document.body.removeChild(video);
|
||||
});
|
||||
|
||||
describe('when no SpeechRecognition support', () => {
|
||||
beforeEach(() => {
|
||||
delete window.SpeechRecognition;
|
||||
});
|
||||
|
||||
it('isSupported returns false', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
expect(speechToText.isSupported()).toBe(false);
|
||||
});
|
||||
|
||||
it('isEnabled returns false', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
expect(speechToText.isEnabled()).toBe(false);
|
||||
});
|
||||
|
||||
it('getTextTracks returns empty', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
expect(speechToText.getTextTracks()).toEqual([]);
|
||||
});
|
||||
|
||||
it('disable do nothing', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
speechToText.disable();
|
||||
});
|
||||
|
||||
it('configure works', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
speechToText.configure(config);
|
||||
});
|
||||
});
|
||||
|
||||
describe('when SpeechRecognition support', () => {
|
||||
/** @type {!HTMLElement} */
|
||||
let container;
|
||||
|
||||
beforeEach(() => {
|
||||
container = /** @type {!HTMLElement} */(document.createElement('div'));
|
||||
player.setVideoContainer(container);
|
||||
|
||||
/** @type {(typeof SpeechRecognition)} */
|
||||
const mock = /** @type {?} */ (MockSpeechRecognition);
|
||||
|
||||
window.SpeechRecognition = mock;
|
||||
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
Node.prototype.appendChild = function(child) {
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
const result = originalAppendChild.call(this, child);
|
||||
if (child instanceof HTMLIFrameElement) {
|
||||
const iframe = /** @type {!HTMLIFrameElement} */ (child);
|
||||
const contentWindow = iframe.contentWindow;
|
||||
if (contentWindow) {
|
||||
contentWindow.SpeechRecognition = mock;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
};
|
||||
});
|
||||
|
||||
it('isSupported returns true', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
expect(speechToText.isSupported()).toBe(true);
|
||||
});
|
||||
|
||||
it('isSupported returns false if no videoContainer', () => {
|
||||
player.setVideoContainer(null);
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
expect(speechToText.isSupported()).toBe(false);
|
||||
});
|
||||
|
||||
it('getTextTracks returns the correct result', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
const tracks = speechToText.getTextTracks();
|
||||
expect(tracks.length).toBe(1);
|
||||
});
|
||||
|
||||
it('getTextTracks returns the correct result with Translator API', () => {
|
||||
/** @type {(typeof Translator)} */
|
||||
window.Translator = /** @type {?} */ (MockTranslator);
|
||||
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
config.languagesToTranslate = ['en', 'es'];
|
||||
speechToText.configure(config);
|
||||
const tracks = speechToText.getTextTracks();
|
||||
expect(tracks.length).toBe(3);
|
||||
});
|
||||
|
||||
// eslint-disable-next-line @stylistic/max-len
|
||||
it('getTextTracks returns the correct result without Translator API', () => {
|
||||
delete window.Translator;
|
||||
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
config.languagesToTranslate = ['en', 'es'];
|
||||
speechToText.configure(config);
|
||||
const tracks = speechToText.getTextTracks();
|
||||
expect(tracks.length).toBe(1);
|
||||
});
|
||||
|
||||
it('create shaka-speech-to-text-container', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
const elements =
|
||||
container.getElementsByClassName('shaka-speech-to-text-container');
|
||||
expect(elements.length).toBe(1);
|
||||
});
|
||||
|
||||
it('release remove shaka-speech-to-text-container', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
let elements =
|
||||
container.getElementsByClassName('shaka-speech-to-text-container');
|
||||
expect(elements.length).toBe(1);
|
||||
speechToText.release();
|
||||
elements =
|
||||
container.getElementsByClassName('shaka-speech-to-text-container');
|
||||
expect(elements.length).toBe(0);
|
||||
});
|
||||
|
||||
it('enable and disable works', () => {
|
||||
speechToText = new shaka.text.SpeechToText(player);
|
||||
let tracks = speechToText.getTextTracks();
|
||||
expect(tracks.length).toBe(1);
|
||||
expect(tracks[0].active).toBe(false);
|
||||
speechToText.enable(tracks[0]);
|
||||
expect(speechToText.isEnabled()).toBe(true);
|
||||
tracks = speechToText.getTextTracks();
|
||||
expect(tracks.length).toBe(1);
|
||||
expect(tracks[0].active).toBe(true);
|
||||
speechToText.disable();
|
||||
expect(speechToText.isEnabled()).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
/**
|
||||
* @implements {EventTarget}
|
||||
*/
|
||||
class MockSpeechRecognition {
|
||||
constructor() {
|
||||
/** @type {string} */
|
||||
this.lang = 'en-US';
|
||||
/** @type {boolean} */
|
||||
this.continuous = false;
|
||||
/** @type {boolean} */
|
||||
this.interimResults = false;
|
||||
/** @type {boolean} */
|
||||
this.processLocally = false;
|
||||
|
||||
/** @type {?function()} */
|
||||
this.onstart = null;
|
||||
/** @type {?function(!SpeechRecognitionEvent)} */
|
||||
this.onresult = null;
|
||||
/** @type {?function(!SpeechRecognitionError)} */
|
||||
this.onerror = null;
|
||||
/** @type {?function():void} */
|
||||
this.onend = null;
|
||||
|
||||
/** @private {!EventTarget} */
|
||||
this.eventTarget_ = document.createDocumentFragment(); // Safe EventTarget
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {!MediaStreamTrack=} mediaStreamTrack
|
||||
*/
|
||||
start(mediaStreamTrack) {
|
||||
if (mediaStreamTrack !== undefined &&
|
||||
typeof mediaStreamTrack !== 'object') {
|
||||
throw new TypeError();
|
||||
}
|
||||
if (this.onstart) {
|
||||
this.onstart();
|
||||
}
|
||||
this.eventTarget_.dispatchEvent(new Event('start'));
|
||||
}
|
||||
|
||||
stop() {
|
||||
if (this.onend) {
|
||||
this.onend();
|
||||
}
|
||||
this.eventTarget_.dispatchEvent(new Event('end'));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} transcript
|
||||
* @param {boolean=} isFinal
|
||||
*/
|
||||
simulateResult(transcript, isFinal = true) {
|
||||
const event = /** @type {!SpeechRecognitionEvent} */ ({
|
||||
resultIndex: 0,
|
||||
results: [
|
||||
{transcript, confidence: 0.95},
|
||||
],
|
||||
isFinal,
|
||||
});
|
||||
|
||||
if (this.onresult) {
|
||||
this.onresult(event);
|
||||
}
|
||||
this.eventTarget_.dispatchEvent(new CustomEvent('result', {detail: event}));
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} errorType
|
||||
*/
|
||||
simulateError(errorType) {
|
||||
const event = /** @type {!SpeechRecognitionError} */ ({
|
||||
error: errorType,
|
||||
});
|
||||
|
||||
if (this.onerror) {
|
||||
this.onerror(event);
|
||||
}
|
||||
this.eventTarget_.dispatchEvent(new CustomEvent('error', {detail: event}));
|
||||
}
|
||||
|
||||
/**
|
||||
* @override
|
||||
*/
|
||||
addEventListener(type, listener) {
|
||||
this.eventTarget_.addEventListener(type, listener);
|
||||
}
|
||||
|
||||
/**
|
||||
* @override
|
||||
*/
|
||||
removeEventListener(type, listener) {
|
||||
this.eventTarget_.removeEventListener(type, listener);
|
||||
}
|
||||
|
||||
/**
|
||||
* @override
|
||||
*/
|
||||
dispatchEvent(event) {
|
||||
return this.eventTarget_.dispatchEvent(event);
|
||||
}
|
||||
}
|
||||
|
||||
class MockTranslator {
|
||||
/**
|
||||
* @param {{
|
||||
* sourceLanguage: string,
|
||||
* targetLanguage: string,
|
||||
* signal: (!AbortSignal|undefined)
|
||||
* }} options
|
||||
*/
|
||||
constructor(options) {
|
||||
this.sourceLanguage = options.sourceLanguage;
|
||||
this.targetLanguage = options.targetLanguage;
|
||||
this.signal = options.signal || new AbortController().signal;
|
||||
this.inputQuota = 100000;
|
||||
this.destroyed = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<string>}
|
||||
*/
|
||||
translate(text) {
|
||||
if (this.destroyed || this.signal.aborted) {
|
||||
throw new DOMException('Aborted', 'AbortError');
|
||||
}
|
||||
return Promise.resolve(`[${this.targetLanguage}] ${text}`);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!ReadableStream<string>}
|
||||
*/
|
||||
translateStreaming(text) {
|
||||
const chunks = [`[${this.targetLanguage}]`, ...text.split(' ')];
|
||||
let index = 0;
|
||||
return new ReadableStream({
|
||||
// eslint-disable-next-line no-restricted-syntax
|
||||
pull(controller) {
|
||||
if (index < chunks.length) {
|
||||
controller.enqueue(chunks[index++] + ' ');
|
||||
} else {
|
||||
controller.close();
|
||||
}
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {string} text
|
||||
* @return {!Promise<number>}
|
||||
*/
|
||||
measureInputUsage(text) {
|
||||
return Promise.resolve(text.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return {void}
|
||||
*/
|
||||
destroy() {
|
||||
this.destroyed = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {Object|null=} options
|
||||
* @return {!Promise<string>}
|
||||
*/
|
||||
static availability(options) {
|
||||
return Promise.resolve('available');
|
||||
}
|
||||
|
||||
/**
|
||||
* @param {{
|
||||
* sourceLanguage: string,
|
||||
* targetLanguage: string,
|
||||
* signal: (!AbortSignal|undefined)
|
||||
* }} options
|
||||
* @return {!Promise<!MockTranslator>}
|
||||
*/
|
||||
static create(options) {
|
||||
if (options.signal && options.signal.aborted) {
|
||||
throw new DOMException('Aborted', 'AbortError');
|
||||
}
|
||||
return Promise.resolve(new MockTranslator(options));
|
||||
}
|
||||
}
|
||||
|
||||
Vendored
+15
-4
@@ -1802,14 +1802,25 @@ shaka.ui.Controls = class extends shaka.util.FakeEventTarget {
|
||||
* @private
|
||||
*/
|
||||
computeShakaTextContainerSize_() {
|
||||
const elements = [];
|
||||
const shakaTextContainer = this.videoContainer_.getElementsByClassName(
|
||||
'shaka-text-container')[0];
|
||||
const shakaSpeechToTextContainer =
|
||||
this.videoContainer_.getElementsByClassName(
|
||||
'shaka-speech-to-text-container')[0];
|
||||
if (shakaTextContainer) {
|
||||
elements.push(shakaTextContainer);
|
||||
}
|
||||
if (shakaSpeechToTextContainer) {
|
||||
elements.push(shakaSpeechToTextContainer);
|
||||
}
|
||||
if (elements.length) {
|
||||
let bottom = '0px';
|
||||
if (this.isOpaque()) {
|
||||
shakaTextContainer.style.bottom =
|
||||
this.bottomControls_.clientHeight + 'px';
|
||||
} else {
|
||||
shakaTextContainer.style.bottom = '0px';
|
||||
bottom = this.bottomControls_.clientHeight + 'px';
|
||||
}
|
||||
for (const element of elements) {
|
||||
element.style.bottom = bottom;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+18
-2
@@ -315,8 +315,24 @@ shaka.ui.LanguageUtils = class {
|
||||
const span = shaka.util.Dom.createHTMLElement('span');
|
||||
button.appendChild(span);
|
||||
|
||||
span.textContent =
|
||||
shaka.ui.LanguageUtils.getLanguageName(language, localization);
|
||||
if (track.originalLanguage == 'speech-to-text') {
|
||||
// Necessary when there are multiple speech-to-text tracks and they
|
||||
// translate into different languages.
|
||||
if (language) {
|
||||
span.textContent = [
|
||||
shaka.ui.LanguageUtils.getLanguageName(language, localization),
|
||||
' (',
|
||||
localization.resolve(shaka.ui.Locales.Ids.AUTO_GENERATED),
|
||||
')',
|
||||
].join('');
|
||||
} else {
|
||||
span.textContent =
|
||||
localization.resolve(shaka.ui.Locales.Ids.AUTO_GENERATED);
|
||||
}
|
||||
} else {
|
||||
span.textContent =
|
||||
shaka.ui.LanguageUtils.getLanguageName(language, localization);
|
||||
}
|
||||
switch (trackLabelFormat) {
|
||||
case shaka.ui.Overlay.TrackLabelFormat.LANGUAGE:
|
||||
if (forced) {
|
||||
|
||||
@@ -42,7 +42,8 @@
|
||||
|
||||
background-color: @general-background-color-opaque;
|
||||
|
||||
.shaka-text-container {
|
||||
.shaka-text-container,
|
||||
.shaka-speech-to-text-container {
|
||||
/* In fullscreen mode, the text displayer's font size should be relative to
|
||||
* the either window height or width (whichever is smaller), instead of a
|
||||
* fixed size. */
|
||||
@@ -403,7 +404,8 @@
|
||||
background: linear-gradient(rgba(0, 0, 0, 0%) 0, rgba(0, 0, 0, 50%) 100%);
|
||||
}
|
||||
|
||||
.shaka-text-container {
|
||||
.shaka-text-container,
|
||||
.shaka-speech-to-text-container {
|
||||
.absolute-position();
|
||||
|
||||
/* Make sure the text container doesn't steal pointer events from another
|
||||
@@ -436,7 +438,8 @@
|
||||
}
|
||||
}
|
||||
|
||||
.shaka-controls-container[shown="true"] ~ .shaka-text-container {
|
||||
.shaka-controls-container[shown="true"] ~ .shaka-text-container,
|
||||
.shaka-controls-container[shown="true"] ~ .shaka-speech-to-text-container {
|
||||
/* Disable the transition delay when moving the captions up, so that the
|
||||
* controls don't appear over the captions. */
|
||||
transition-delay: 0ms;
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "إحصاءات الإعلانات",
|
||||
"AD_TIME": "الإعلان: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "مولّد تلقائيّا",
|
||||
"AUTO_QUALITY": "تلقائي",
|
||||
"BACK": "رجوع",
|
||||
"CAPTIONS": "الترجمة والشرح",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Anzeigenstatistiken",
|
||||
"AD_TIME": "Werbeanzeige: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Automatisch erzeugt",
|
||||
"AUTO_QUALITY": "Automatisch",
|
||||
"BACK": "Zurück",
|
||||
"CAPTIONS": "Untertitel",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Ad statistics",
|
||||
"AD_TIME": "Ad: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Auto generated",
|
||||
"AUTO_QUALITY": "Auto",
|
||||
"BACK": "Back",
|
||||
"CAPTIONS": "Captions",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Estadísticas del anuncio",
|
||||
"AD_TIME": "Anuncio: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Generados automáticamente",
|
||||
"AUTO_QUALITY": "Automático",
|
||||
"BACK": "Atrás",
|
||||
"CAPTIONS": "Subtítulos",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Statistiques sur les annonces",
|
||||
"AD_TIME": "Annonce : [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Générés automatiquement",
|
||||
"AUTO_QUALITY": "Automatique",
|
||||
"BACK": "Retour",
|
||||
"CAPTIONS": "Sous-titres",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Advertentiestatistieken",
|
||||
"AD_TIME": "Advertentie: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Automatisch gegenereerd",
|
||||
"AUTO_QUALITY": "Automatisch",
|
||||
"BACK": "Terug",
|
||||
"CAPTIONS": "Ondertiteling",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Annonsestatistikk",
|
||||
"AD_TIME": "Annonse: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Automatisk generert",
|
||||
"AUTO_QUALITY": "Automatisk",
|
||||
"BACK": "Tilbake",
|
||||
"CAPTIONS": "Teksting",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Statystyki reklam",
|
||||
"AD_TIME": "Reklama: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Wygenerowane automatycznie",
|
||||
"AUTO_QUALITY": "Automatycznie",
|
||||
"BACK": "Wstecz",
|
||||
"CAPTIONS": "Napisy",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Estatísticas de anúncios",
|
||||
"AD_TIME": "Anúncio: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Gerada automaticamente",
|
||||
"AUTO_QUALITY": "Automática",
|
||||
"BACK": "Voltar",
|
||||
"CAPTIONS": "Legendas",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Estatísticas do anúncio",
|
||||
"AD_TIME": "Anúncio: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Gerada automaticamente",
|
||||
"AUTO_QUALITY": "Automático",
|
||||
"BACK": "Anterior",
|
||||
"CAPTIONS": "Legendas",
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
"AD_STATISTICS": "Статистика объявлений",
|
||||
"AD_TIME": "Реклама: [AD_TIME]",
|
||||
"AIRPLAY": "AirPlay",
|
||||
"AUTO_GENERATED": "Автоматически сгенерированные",
|
||||
"AUTO_QUALITY": "Авто",
|
||||
"BACK": "Назад",
|
||||
"CAPTIONS": "Субтитры",
|
||||
|
||||
@@ -19,6 +19,10 @@
|
||||
"description": "Label for a button used to open the native AirPlay dialog in the browser and select a destination to AirPlay to.",
|
||||
"message": "AirPlay"
|
||||
},
|
||||
"AUTO_GENERATED": {
|
||||
"description": "Label for a button used to select an auto generated text track.",
|
||||
"message": "Auto generated"
|
||||
},
|
||||
"AUTO_QUALITY": {
|
||||
"description": "Label for a button used to allow the video player to select the resolution/quality of the video automatically.",
|
||||
"meaning": "Automatic",
|
||||
|
||||
Reference in New Issue
Block a user