mirror of
https://github.com/shaka-project/shaka-player.git
synced 2026-06-14 15:56:38 +03:00
e9551b3ee2
Related to https://github.com/moq-wg/msf/pull/133
494 lines
17 KiB
JavaScript
494 lines
17 KiB
JavaScript
/*! @license
|
|
* Shaka Player
|
|
* Copyright 2016 Google LLC
|
|
* SPDX-License-Identifier: Apache-2.0
|
|
*/
|
|
|
|
goog.provide('shaka.util.LanguageUtils');
|
|
|
|
goog.require('goog.asserts');
|
|
|
|
|
|
/**
|
|
* @summary A set of language utility functions.
|
|
* @final
|
|
* @export
|
|
*/
|
|
shaka.util.LanguageUtils = class {
|
|
/**
|
|
* Check if |locale1| and |locale2| are locale-compatible.
|
|
*
|
|
* Locale-compatible is defined as all components in each locale match. Since
|
|
* we only respect the language and region components, we only check that
|
|
* the language and region components match.
|
|
*
|
|
* Examples:
|
|
* Locale A | Locale B | Locale Compatible
|
|
* ---------------------------------------
|
|
* en-US | en-US | true
|
|
* en | en-US | false
|
|
* en-US | en-CA | false
|
|
*
|
|
* @param {string} locale1
|
|
* @param {string} locale2
|
|
* @return {boolean}
|
|
* @export
|
|
*/
|
|
static areLocaleCompatible(locale1, locale2) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
// Even through they SHOULD already be normalized, let's just be safe and
|
|
// do it again.
|
|
locale1 = LanguageUtils.normalize(locale1);
|
|
locale2 = LanguageUtils.normalize(locale2);
|
|
|
|
return locale1 == locale2;
|
|
}
|
|
|
|
/**
|
|
* Check if |locale1| and |locale2| are language-compatible.
|
|
*
|
|
* Language compatible is when the language component of each locale matches.
|
|
* This means that no matter what region they have (or don't have) as long as
|
|
* the language components match, they are language-compatible.
|
|
*
|
|
* Examples:
|
|
* Locale A | Locale B | Language-Compatible
|
|
* -----------------------------------------
|
|
* en-US | en-US | true
|
|
* en-US | en | true
|
|
* en-US | en-CA | true
|
|
* en-CA | fr-CA | false
|
|
*
|
|
* @param {string} locale1
|
|
* @param {string} locale2
|
|
* @return {boolean}
|
|
* @export
|
|
*/
|
|
static areLanguageCompatible(locale1, locale2) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
// Even through they SHOULD already be normalized, let's just be safe and
|
|
// do it again.
|
|
locale1 = LanguageUtils.normalize(locale1);
|
|
locale2 = LanguageUtils.normalize(locale2);
|
|
|
|
// Get all components. This should only be language and region
|
|
// since we do not support dialect.
|
|
/** @type {!Array<string>} */
|
|
const locale1Components = LanguageUtils.disassembleLocale_(locale1);
|
|
/** @type {!Array<string>} */
|
|
const locale2Components = LanguageUtils.disassembleLocale_(locale2);
|
|
|
|
// We are language compatible if we have the same language.
|
|
return locale1Components[0] == locale2Components[0];
|
|
}
|
|
|
|
/**
|
|
* Check if |possibleParent| is the parent locale of |possibleChild|. Because
|
|
* we do not support dialects, the parent-child relationship is a lot simpler.
|
|
* In a parent child relationship:
|
|
* - The parent and child have the same language-component
|
|
* - The parent has no region-component
|
|
* - The child has a region-component
|
|
*
|
|
* Example:
|
|
* Locale A | Locale B | Is A The parent of B?
|
|
* --------------------------------------------
|
|
* en-US | en-US | no
|
|
* en-US | en | no
|
|
* en | en-US | yes
|
|
* en | en | no
|
|
* en | fr | no
|
|
*
|
|
* @param {string} possibleParent
|
|
* @param {string} possibleChild
|
|
* @return {boolean}
|
|
* @export
|
|
*/
|
|
static isParentOf(possibleParent, possibleChild) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
// Even through they SHOULD already be normalized, let's just be safe and
|
|
// do it again.
|
|
possibleParent = LanguageUtils.normalize(possibleParent);
|
|
possibleChild = LanguageUtils.normalize(possibleChild);
|
|
|
|
// Get all components. This should only be language and region
|
|
// since we do not support dialect.
|
|
/** @type {!Array<string>} */
|
|
const possibleParentComponents =
|
|
LanguageUtils.disassembleLocale_(possibleParent);
|
|
/** @type {!Array<string>} */
|
|
const possibleChildComponents =
|
|
LanguageUtils.disassembleLocale_(possibleChild);
|
|
|
|
return possibleParentComponents[0] == possibleChildComponents[0] &&
|
|
possibleParentComponents.length == 1 &&
|
|
possibleChildComponents.length == 2;
|
|
}
|
|
|
|
/**
|
|
* Check if |localeA| shares the same parent with |localeB|. Since we don't
|
|
* support dialect, we will only look at language and region. For two locales
|
|
* to be siblings:
|
|
* - Both must have language-components
|
|
* - Both must have region-components
|
|
* - Both must have the same language-component
|
|
*
|
|
* Example:
|
|
* Locale A | Locale B | Siblings?
|
|
* --------------------------------------------
|
|
* en-US | en-US | yes
|
|
* en-US | en-CA | yes
|
|
* en-US | en | no
|
|
* en | en-US | no
|
|
* en | en | no
|
|
* en | fr | no
|
|
*
|
|
* @param {string} localeA
|
|
* @param {string} localeB
|
|
* @return {boolean}
|
|
* @export
|
|
*/
|
|
static isSiblingOf(localeA, localeB) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
// Even through they SHOULD already be normalized, let's just be safe and
|
|
// do it again.
|
|
localeA = LanguageUtils.normalize(localeA);
|
|
localeB = LanguageUtils.normalize(localeB);
|
|
|
|
// Get all components. This should only be language and region
|
|
// since we do not support dialect.
|
|
/** @type {!Array<string>} */
|
|
const localeAComponents = LanguageUtils.disassembleLocale_(localeA);
|
|
/** @type {!Array<string>} */
|
|
const localeBComponents = LanguageUtils.disassembleLocale_(localeB);
|
|
|
|
return localeAComponents.length == 2 &&
|
|
localeBComponents.length == 2 &&
|
|
localeAComponents[0] == localeBComponents[0];
|
|
}
|
|
|
|
/**
|
|
* Normalize a locale. This will take a locale and canonicalize it to a state
|
|
* that we are prepared to work with.
|
|
*
|
|
* We only support with:
|
|
* - language
|
|
* - language-REGION
|
|
*
|
|
* If given a dialect, we will discard it. We will convert any 3-character
|
|
* codes to 2-character codes. We will force language codes to lowercase and
|
|
* region codes to uppercase.
|
|
*
|
|
* @param {string} locale
|
|
* @return {string}
|
|
* @export
|
|
*/
|
|
static normalize(locale) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
const privateUsePrefix = 'x-';
|
|
const [languageRegion = '', privateUseSuffix = ''] =
|
|
locale.split(`-${privateUsePrefix}`);
|
|
const [languageCode = '', regionCode = ''] = languageRegion.split('-');
|
|
|
|
// We are only going to use the language, the region and the private use part (as per https://datatracker.ietf.org/doc/html/rfc5646).
|
|
// Anything else is thrown away.
|
|
const privateUse = privateUseSuffix ?
|
|
`${privateUsePrefix}${privateUseSuffix}` : '';
|
|
|
|
// Convert the language to lower case. It is standard for the language code
|
|
// to be in lower case, but it will also make the map look-up easier.
|
|
let language = languageCode.toLowerCase();
|
|
language = LanguageUtils.isoMap_.get(language) || language;
|
|
|
|
// Convert the region to upper case. It is standard for the region to be in
|
|
// upper case. If there is no upper code, then it will be an empty string
|
|
// and this will be a no-op.
|
|
const region = regionCode.toUpperCase();
|
|
|
|
return `${region ? `${language}-${region}` : language}${
|
|
privateUse ? `-${privateUse}` : ''}`;
|
|
}
|
|
|
|
/**
|
|
* Check if two language codes are siblings. Language codes are siblings if
|
|
* they share the same base language while neither one is the base language.
|
|
*
|
|
* For example, "en-US" and "en-CA" are siblings but "en-US" and "en" are not
|
|
* siblings.
|
|
*
|
|
* @param {string} a
|
|
* @param {string} b
|
|
* @return {boolean}
|
|
* @export
|
|
*/
|
|
static areSiblings(a, b) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
const baseA = LanguageUtils.getBase(a);
|
|
const baseB = LanguageUtils.getBase(b);
|
|
|
|
return a != baseA && b != baseB && baseA == baseB;
|
|
}
|
|
|
|
/**
|
|
* Compute a numerical relatedness for language codes. Language codes with a
|
|
* higher relatedness are a better match. Unrelated language codes have a
|
|
* relatedness score of 0.
|
|
*
|
|
* @param {string} target
|
|
* @param {string} candidate
|
|
* @return {number}
|
|
* @export
|
|
*/
|
|
static relatedness(target, candidate) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
target = LanguageUtils.normalize(target);
|
|
candidate = LanguageUtils.normalize(candidate);
|
|
|
|
// An exact match is the top score.
|
|
if (candidate == target) {
|
|
return 4;
|
|
}
|
|
|
|
// Next is a parent of the target language.
|
|
if (LanguageUtils.isParentOf(candidate, target)) {
|
|
return 3;
|
|
}
|
|
|
|
// Next is a sibling of the target language.
|
|
if (LanguageUtils.isSiblingOf(candidate, target)) {
|
|
return 2;
|
|
}
|
|
|
|
// Next is a child of the target language.
|
|
if (LanguageUtils.isParentOf(target, candidate)) {
|
|
return 1;
|
|
}
|
|
|
|
// Otherwise, they are unrelated.
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Get the normalized base language for a language code.
|
|
*
|
|
* @param {string} lang
|
|
* @return {string}
|
|
* @export
|
|
*/
|
|
static getBase(lang) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
const splitAt = lang.indexOf('-');
|
|
let major;
|
|
|
|
if (splitAt >= 0) {
|
|
major = lang.substring(0, splitAt);
|
|
} else {
|
|
major = lang;
|
|
}
|
|
|
|
// Convert the major code to lower case. It is standard for the major code
|
|
// to be in lower case, but it will also make the map look-up easier.
|
|
major = major.toLowerCase();
|
|
major = LanguageUtils.isoMap_.get(major) || major;
|
|
|
|
return major;
|
|
}
|
|
|
|
/**
|
|
* Get the normalized language of the given text stream. Will return 'und' if
|
|
* a language is not found on the text stream.
|
|
*
|
|
* This should always be used to get the language from a text stream.
|
|
*
|
|
* @param {shaka.extern.Stream} stream
|
|
* @return {string}
|
|
* @export
|
|
*/
|
|
static getLocaleForText(stream) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
const language = stream.language || 'und';
|
|
return LanguageUtils.normalize(language);
|
|
}
|
|
|
|
/**
|
|
* Get the normalized locale for the given variant. This will look through
|
|
* the variant to find the locale that represents the content in the variant.
|
|
* This will return 'und' if no language can be found.
|
|
*
|
|
* This should always be used to get the locale from a variant.
|
|
*
|
|
* @param {shaka.extern.Variant} variant
|
|
* @return {string}
|
|
* @export
|
|
*/
|
|
static getLocaleForVariant(variant) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
// Our preference order is:
|
|
// 1. Variant
|
|
// 2. Audio Stream
|
|
// 3. Video Stream
|
|
//
|
|
// We are going to consider all falsy strings to be invalid locales, this
|
|
// will include empty strings.
|
|
if (variant.language) {
|
|
return LanguageUtils.normalize(variant.language);
|
|
}
|
|
|
|
if (variant.audio && variant.audio.language) {
|
|
return LanguageUtils.normalize(variant.audio.language);
|
|
}
|
|
|
|
if (variant.video && variant.video.language) {
|
|
return LanguageUtils.normalize(variant.video.language);
|
|
}
|
|
|
|
// No language was found, but we still want to return a valid string.
|
|
return 'und';
|
|
}
|
|
|
|
/**
|
|
* Find the locale in |searchSpace| that comes closest to |target|. If no
|
|
* locale is found to be close to |target|, then |null| will be returned.
|
|
*
|
|
* @param {string} target
|
|
* @param {!Iterable<string>} searchSpace
|
|
* @return {?string}
|
|
* @export
|
|
*/
|
|
static findClosestLocale(target, searchSpace) {
|
|
const LanguageUtils = shaka.util.LanguageUtils;
|
|
|
|
/** @type {string} */
|
|
const safeTarget = LanguageUtils.normalize(target);
|
|
/** @type {!Set<string>} */
|
|
const safeSearchSpace = new Set();
|
|
for (const option of searchSpace) {
|
|
safeSearchSpace.add(LanguageUtils.normalize(option));
|
|
}
|
|
|
|
// Preference 1 - The option is an exact match. For example, "en-US" is an
|
|
// exact match of "en-US". So if there is an option that is an exact
|
|
// match, it would be the best match possible.
|
|
for (const option of safeSearchSpace) {
|
|
if (option == safeTarget) {
|
|
return option;
|
|
}
|
|
}
|
|
|
|
// Preference 2 - The option is the parent of the target. For example,
|
|
// "en" is the parent of "en-US". So if there is an option with
|
|
// "en", it should be good enough when our preference is "en-US".
|
|
for (const option of safeSearchSpace) {
|
|
if (LanguageUtils.isParentOf(option, safeTarget)) {
|
|
return option;
|
|
}
|
|
}
|
|
|
|
// Preference 3 - The option is a sibling of the target. For example,
|
|
// "en-US" is a sibling of "en-CA". So if there is an option with
|
|
// "en_CA", it should be good enough when our preference is "en-US".
|
|
for (const option of safeSearchSpace) {
|
|
if (LanguageUtils.isSiblingOf(option, safeTarget)) {
|
|
return option;
|
|
}
|
|
}
|
|
|
|
// Preference 4 - The option is a child of the target. For example,
|
|
// "en-US" is the child of "en". SO it there is an option with
|
|
// "en-US", it should be good enough when our preference is "en".
|
|
for (const option of safeSearchSpace) {
|
|
if (LanguageUtils.isParentOf(safeTarget, option)) {
|
|
return option;
|
|
}
|
|
}
|
|
|
|
// Failed to find anything.
|
|
return null;
|
|
}
|
|
|
|
/**
|
|
* Take a locale string and break it into its component. Check that each
|
|
* component matches what we would expect internally for locales. This
|
|
* should ONLY be used to verify locales that have been normalized.
|
|
*
|
|
* @param {string} locale
|
|
* @return {!Array<string>}
|
|
* @private
|
|
*/
|
|
static disassembleLocale_(locale) {
|
|
const components = locale.split('-');
|
|
|
|
goog.asserts.assert(
|
|
components.length <= 2,
|
|
[
|
|
'Locales should not have more than 2 components. ',
|
|
locale,
|
|
' has too many components.',
|
|
].join());
|
|
|
|
return components;
|
|
}
|
|
};
|
|
|
|
|
|
/**
|
|
* A map from 3-letter language codes (ISO 639-2) to 2-letter language codes
|
|
* (ISO 639-1) for all languages which have both in the registry.
|
|
*
|
|
* @const {!Map<string, string>}
|
|
* @private
|
|
*/
|
|
shaka.util.LanguageUtils.isoMap_ = new Map([
|
|
['aar', 'aa'], ['abk', 'ab'], ['afr', 'af'], ['aka', 'ak'], ['alb', 'sq'],
|
|
['amh', 'am'], ['ara', 'ar'], ['arg', 'an'], ['arm', 'hy'], ['asm', 'as'],
|
|
['ava', 'av'], ['ave', 'ae'], ['aym', 'ay'], ['aze', 'az'], ['bak', 'ba'],
|
|
['bam', 'bm'], ['baq', 'eu'], ['bel', 'be'], ['ben', 'bn'], ['bih', 'bh'],
|
|
['bis', 'bi'], ['bod', 'bo'], ['bos', 'bs'], ['bre', 'br'], ['bul', 'bg'],
|
|
['bur', 'my'], ['cat', 'ca'], ['ces', 'cs'], ['cha', 'ch'], ['che', 'ce'],
|
|
['chi', 'zh'], ['chu', 'cu'], ['chv', 'cv'], ['cor', 'kw'], ['cos', 'co'],
|
|
['cre', 'cr'], ['cym', 'cy'], ['cze', 'cs'], ['dan', 'da'], ['deu', 'de'],
|
|
['div', 'dv'], ['dut', 'nl'], ['dzo', 'dz'], ['ell', 'el'], ['eng', 'en'],
|
|
['epo', 'eo'], ['est', 'et'], ['eus', 'eu'], ['ewe', 'ee'], ['fao', 'fo'],
|
|
['fas', 'fa'], ['fij', 'fj'], ['fin', 'fi'], ['fra', 'fr'], ['fre', 'fr'],
|
|
['fry', 'fy'], ['ful', 'ff'], ['geo', 'ka'], ['ger', 'de'], ['gla', 'gd'],
|
|
['gle', 'ga'], ['glg', 'gl'], ['glv', 'gv'], ['gre', 'el'], ['grn', 'gn'],
|
|
['guj', 'gu'], ['hat', 'ht'], ['hau', 'ha'], ['heb', 'he'], ['her', 'hz'],
|
|
['hin', 'hi'], ['hmo', 'ho'], ['hrv', 'hr'], ['hun', 'hu'], ['hye', 'hy'],
|
|
['ibo', 'ig'], ['ice', 'is'], ['ido', 'io'], ['iii', 'ii'], ['iku', 'iu'],
|
|
['ile', 'ie'], ['ina', 'ia'], ['ind', 'id'], ['ipk', 'ik'], ['isl', 'is'],
|
|
['ita', 'it'], ['jav', 'jv'], ['jpn', 'ja'], ['kal', 'kl'], ['kan', 'kn'],
|
|
['kas', 'ks'], ['kat', 'ka'], ['kau', 'kr'], ['kaz', 'kk'], ['khm', 'km'],
|
|
['kik', 'ki'], ['kin', 'rw'], ['kir', 'ky'], ['kom', 'kv'], ['kon', 'kg'],
|
|
['kor', 'ko'], ['kua', 'kj'], ['kur', 'ku'], ['lao', 'lo'], ['lat', 'la'],
|
|
['lav', 'lv'], ['lim', 'li'], ['lin', 'ln'], ['lit', 'lt'], ['ltz', 'lb'],
|
|
['lub', 'lu'], ['lug', 'lg'], ['mac', 'mk'], ['mah', 'mh'], ['mal', 'ml'],
|
|
['mao', 'mi'], ['mar', 'mr'], ['may', 'ms'], ['mkd', 'mk'], ['mlg', 'mg'],
|
|
['mlt', 'mt'], ['mon', 'mn'], ['mri', 'mi'], ['msa', 'ms'], ['mya', 'my'],
|
|
['nau', 'na'], ['nav', 'nv'], ['nbl', 'nr'], ['nde', 'nd'], ['ndo', 'ng'],
|
|
['nep', 'ne'], ['nld', 'nl'], ['nno', 'nn'], ['nob', 'nb'], ['nor', 'no'],
|
|
['nya', 'ny'], ['oci', 'oc'], ['oji', 'oj'], ['ori', 'or'], ['orm', 'om'],
|
|
['oss', 'os'], ['pan', 'pa'], ['per', 'fa'], ['pli', 'pi'], ['pol', 'pl'],
|
|
['por', 'pt'], ['pus', 'ps'], ['que', 'qu'], ['roh', 'rm'], ['ron', 'ro'],
|
|
['rum', 'ro'], ['run', 'rn'], ['rus', 'ru'], ['sag', 'sg'], ['san', 'sa'],
|
|
['sin', 'si'], ['slk', 'sk'], ['slo', 'sk'], ['slv', 'sl'], ['sme', 'se'],
|
|
['smo', 'sm'], ['sna', 'sn'], ['snd', 'sd'], ['som', 'so'], ['sot', 'st'],
|
|
['spa', 'es'], ['sqi', 'sq'], ['srd', 'sc'], ['srp', 'sr'], ['ssw', 'ss'],
|
|
['sun', 'su'], ['swa', 'sw'], ['swe', 'sv'], ['tah', 'ty'], ['tam', 'ta'],
|
|
['tat', 'tt'], ['tel', 'te'], ['tgk', 'tg'], ['tgl', 'tl'], ['tha', 'th'],
|
|
['tib', 'bo'], ['tir', 'ti'], ['ton', 'to'], ['tsn', 'tn'], ['tso', 'ts'],
|
|
['tuk', 'tk'], ['tur', 'tr'], ['twi', 'tw'], ['uig', 'ug'], ['ukr', 'uk'],
|
|
['urd', 'ur'], ['uzb', 'uz'], ['ven', 've'], ['vie', 'vi'], ['vol', 'vo'],
|
|
['wel', 'cy'], ['wln', 'wa'], ['wol', 'wo'], ['xho', 'xh'], ['yid', 'yi'],
|
|
['yor', 'yo'], ['zha', 'za'], ['zho', 'zh'], ['zul', 'zu'],
|
|
]);
|