feat(SRT): Improve SRT to WebVTT conversion and color handling (#9624)

- Full SRT → WebVTT conversion now supports timestamps, alignment,
position, and basic styles (bold, italic, underline).
- Added proper handling for <font color="..."> tags:
  - Converts known colors to WebVTT <c.color> classes.
  - Safely removes unknown colors without leaving orphaned </c> tags.
- Supports Aegisub cues ({\anX} → line & align, {\pos(x,y)} → position &
line).
- Normalizes timestamps (MM:SS,mmm → 00:MM:SS.mmm) for WebVTT
compliance.
This commit is contained in:
Álvaro Velad Galván
2026-01-29 12:39:38 +01:00
committed by GitHub
parent 674e71d064
commit afd24224ce
4 changed files with 165 additions and 21 deletions
+114 -15
View File
@@ -8,6 +8,7 @@ goog.provide('shaka.text.SrtTextParser');
goog.require('goog.asserts');
goog.require('shaka.text.TextEngine');
goog.require('shaka.text.Utils');
goog.require('shaka.text.VttTextParser');
goog.require('shaka.util.BufferUtils');
goog.require('shaka.util.StringUtils');
@@ -90,32 +91,130 @@ shaka.text.SrtTextParser = class {
}
/**
* Convert a SRT cue into WebVTT cue
* Convert a single SRT cue into a WebVTT cue
* Handles: timestamps, alignment, position, styles, colors.
*
* @param {string} caption
* @return {string}
* @return {string} WebVTT cue
* @private
*/
convertSrtCue_(caption) {
const lines = caption.split(/\n/);
// Split cue into non-empty trimmed lines
const lines = caption.split('\n').map((l) => l.trim()).filter(Boolean);
if (lines.length < 2) {
return '';
}
// detect and skip numeric identifier
if (lines[0].match(/\d+/)) {
// 1. Remove numeric ID if present
if (/^\d+$/.test(lines[0])) {
lines.shift();
}
// convert time codes
lines[0] = lines[0].replace(/,/g, '.');
if (lines.length < 2) {
return '';
}
const webvttCue = lines.join('\n')
.replace(/{b}/g, '<b>')
.replace(/{\/b}/g, '</b>')
.replace(/{i}/g, '<i>')
.replace(/{\/i}/g, '</i>')
.replace(/{u}/g, '<u>')
.replace(/{\/u}/g, '</u>');
// 2. Parse time line (start --> end [settings])
const timeRegex = /^([\d:,]+)\s*-->\s*([\d:,]+)(.*)?$/;
const match = lines[0].match(timeRegex);
if (!match) {
return '';
}
return webvttCue + '\n\n';
const start = this.normalizeTime_(match[1]);
const end = this.normalizeTime_(match[2]);
let settings = '';
// 3. Combine remaining lines as cue text
let text = lines.slice(1).join('\n');
// 4. Aegisub alignment {\anX} → WebVTT line & align settings
const alignMatch = text.match(/{\\an(\d)}/);
if (alignMatch) {
const map = {
1: 'line:-1 align:left',
2: 'line:-1 align:center',
3: 'line:-1 align:right',
7: 'line:0 align:left',
8: 'line:0 align:center',
9: 'line:0 align:right',
};
settings += map[alignMatch[1]] ? ` ${map[alignMatch[1]]}` : '';
}
// 5. Aegisub position {\pos(x,y)} → WebVTT position & line
const posMatch = text.match(/{\\pos\((\d+),(\d+)\)}/);
if (posMatch) {
// Convert coordinates to percentages (approximation)
const x = Math.min(100, Math.round(parseFloat(posMatch[1]) / 19.2));
const y = Math.min(100, Math.round(parseFloat(posMatch[2]) / 10.8));
settings += ` position:${x}% line:${y}%`;
}
// 6. Remove all remaining Aegisub/unsupported tags
text = text.replace(/{\\.*?}/g, '');
// 7. Convert basic SRT style tags {b}{/b}, {i}{/i}, {u}{/u} → HTML
text = text
.replace(/{b}/gi, '<b>')
.replace(/{\/b}/gi, '</b>')
.replace(/{i}/gi, '<i>')
.replace(/{\/i}/gi, '</i>')
.replace(/{u}/gi, '<u>')
.replace(/{\/u}/gi, '</u>');
// 8. Convert <font color="#XXXXXX"> → <c.colorName> (WebVTT spec)
text = this.convertColors_(text);
// 9. Return formatted WebVTT cue
return `${start} --> ${end}${settings}\n${text}\n\n`;
}
/**
* Normalize timestamp for WebVTT
* Supports MM:SS,mmm → 00:MM:SS.mmm
*
* @param {string} time
* @return {string}
* @private
*/
normalizeTime_(time) {
if (/^\d{2}:\d{2},\d{3}$/.test(time)) {
return '00:' + time.replace(',', '.');
}
return time.replace(',', '.');
}
/**
* Convert SRT <font color="#XXXXXX"> or <font color="name"> tags
* into WebVTT <c.colorName>. Unknown colors are removed safely.
*
* @param {string} text
* @return {string}
* @private
*/
convertColors_(text) {
const openColors = [];
text = text.replace(/<font color=["']?([^"'>]+)["']?>/gi, (_, color) => {
const key = color.toLowerCase();
const colorName = shaka.text.Utils.getColorName(key);
if (colorName) {
openColors.push(colorName);
return `<c.${colorName}>`;
}
return '';
});
text = text.replace(/<\/font>/gi, () => {
if (openColors.length) {
openColors.pop();
return '</c>';
}
return '';
});
return text;
}
};
+3 -4
View File
@@ -58,7 +58,7 @@ shaka.text.Utils = class {
color = parentCue.color;
}
let classes = '';
const colorName = shaka.text.Utils.getColorName_(color);
const colorName = shaka.text.Utils.getColorName(color);
if (colorName) {
classes += `.${colorName}`;
}
@@ -66,7 +66,7 @@ shaka.text.Utils = class {
if (bgColor == '' && parentCue) {
bgColor = parentCue.backgroundColor;
}
const bgColorName = shaka.text.Utils.getColorName_(bgColor);
const bgColorName = shaka.text.Utils.getColorName(bgColor);
if (bgColorName) {
classes += `.bg_${bgColorName}`;
}
@@ -84,9 +84,8 @@ shaka.text.Utils = class {
*
* @param {string} string
* @return {?string}
* @private
*/
static getColorName_(string) {
static getColorName(string) {
let colorString = string.toLowerCase();
const rgb = colorString.replace(/\s/g, '')
.match(/^rgba?\((\d+),(\d+),(\d+),?([^,\s)]+)?/i);
+1
View File
@@ -500,6 +500,7 @@ cmsds
cdnb
# other
Aegisub
autoglottonym
avinfo
awesomplete
+47 -2
View File
@@ -121,6 +121,39 @@ describe('SrtTextParser', () => {
},
],
},
{
startTime: 50,
endTime: 60,
payload: '',
nestedCues: [
{
startTime: 50,
endTime: 60,
payload: 'Hex color',
color: 'yellow',
},
],
},
{
startTime: 60,
endTime: 70,
payload: 'Unknown color',
},
{
startTime: 70,
endTime: 80,
payload: 'Aligned bottom-left',
line: -1,
lineInterpretation: Cue.lineInterpretation.LINE_NUMBER,
textAlign: 'left',
},
{
startTime: 80,
endTime: 90,
payload: 'Positioned cue',
line: 50,
position: 50,
},
],
'1\n' +
'00:00:10,000 --> 00:00:20,000\n' +
@@ -130,10 +163,22 @@ describe('SrtTextParser', () => {
'{i}Test2{/i}\n\n' +
'3\n' +
'00:00:30,000 --> 00:00:40,000\n' +
'{u}Test3{/u}\n\n'+
'{u}Test3{/u}\n\n' +
'4\n' +
'00:00:40,000 --> 00:00:50,000\n' +
'<font color="red">Test4</font>',
'<font color="red">Test4</font>\n\n' +
'5\n' +
'00:00:50,000 --> 00:01:00,000\n' +
'<font color="#FFFF00">Hex color</font>\n\n' +
'6\n' +
'00:01:00,000 --> 00:01:10,000\n' +
'<font color="unknown">Unknown color</font>\n\n' +
'7\n' +
'00:01:10,000 --> 00:01:20,000\n' +
'{\\an1}Aligned bottom-left\n\n' +
'8\n' +
'00:01:20,000 --> 00:01:30,000\n' +
'{\\pos(960,540)}Positioned cue',
{periodStart: 0, segmentStart: 0, segmentEnd: 0, vttOffset: 0});
});