diff --git a/lib/text/srt_text_parser.js b/lib/text/srt_text_parser.js index e5f5afa76..aaf0b5c3b 100644 --- a/lib/text/srt_text_parser.js +++ b/lib/text/srt_text_parser.js @@ -8,6 +8,7 @@ goog.provide('shaka.text.SrtTextParser'); goog.require('goog.asserts'); goog.require('shaka.text.TextEngine'); +goog.require('shaka.text.Utils'); goog.require('shaka.text.VttTextParser'); goog.require('shaka.util.BufferUtils'); goog.require('shaka.util.StringUtils'); @@ -90,32 +91,130 @@ shaka.text.SrtTextParser = class { } /** - * Convert a SRT cue into WebVTT cue + * Convert a single SRT cue into a WebVTT cue + * Handles: timestamps, alignment, position, styles, colors. * * @param {string} caption - * @return {string} + * @return {string} WebVTT cue * @private */ convertSrtCue_(caption) { - const lines = caption.split(/\n/); + // Split cue into non-empty trimmed lines + const lines = caption.split('\n').map((l) => l.trim()).filter(Boolean); + if (lines.length < 2) { + return ''; + } - // detect and skip numeric identifier - if (lines[0].match(/\d+/)) { + // 1. Remove numeric ID if present + if (/^\d+$/.test(lines[0])) { lines.shift(); } - // convert time codes - lines[0] = lines[0].replace(/,/g, '.'); + if (lines.length < 2) { + return ''; + } - const webvttCue = lines.join('\n') - .replace(/{b}/g, '') - .replace(/{\/b}/g, '') - .replace(/{i}/g, '') - .replace(/{\/i}/g, '') - .replace(/{u}/g, '') - .replace(/{\/u}/g, ''); + // 2. Parse time line (start --> end [settings]) + const timeRegex = /^([\d:,]+)\s*-->\s*([\d:,]+)(.*)?$/; + const match = lines[0].match(timeRegex); + if (!match) { + return ''; + } - return webvttCue + '\n\n'; + const start = this.normalizeTime_(match[1]); + const end = this.normalizeTime_(match[2]); + let settings = ''; + + // 3. Combine remaining lines as cue text + let text = lines.slice(1).join('\n'); + + // 4. Aegisub alignment {\anX} → WebVTT line & align settings + const alignMatch = text.match(/{\\an(\d)}/); + if (alignMatch) { + const map = { + 1: 'line:-1 align:left', + 2: 'line:-1 align:center', + 3: 'line:-1 align:right', + 7: 'line:0 align:left', + 8: 'line:0 align:center', + 9: 'line:0 align:right', + }; + settings += map[alignMatch[1]] ? ` ${map[alignMatch[1]]}` : ''; + } + + // 5. Aegisub position {\pos(x,y)} → WebVTT position & line + const posMatch = text.match(/{\\pos\((\d+),(\d+)\)}/); + if (posMatch) { + // Convert coordinates to percentages (approximation) + const x = Math.min(100, Math.round(parseFloat(posMatch[1]) / 19.2)); + const y = Math.min(100, Math.round(parseFloat(posMatch[2]) / 10.8)); + settings += ` position:${x}% line:${y}%`; + } + + // 6. Remove all remaining Aegisub/unsupported tags + text = text.replace(/{\\.*?}/g, ''); + + // 7. Convert basic SRT style tags {b}{/b}, {i}{/i}, {u}{/u} → HTML + text = text + .replace(/{b}/gi, '') + .replace(/{\/b}/gi, '') + .replace(/{i}/gi, '') + .replace(/{\/i}/gi, '') + .replace(/{u}/gi, '') + .replace(/{\/u}/gi, ''); + + // 8. Convert (WebVTT spec) + text = this.convertColors_(text); + + // 9. Return formatted WebVTT cue + return `${start} --> ${end}${settings}\n${text}\n\n`; + } + + /** + * Normalize timestamp for WebVTT + * Supports MM:SS,mmm → 00:MM:SS.mmm + * + * @param {string} time + * @return {string} + * @private + */ + normalizeTime_(time) { + if (/^\d{2}:\d{2},\d{3}$/.test(time)) { + return '00:' + time.replace(',', '.'); + } + return time.replace(',', '.'); + } + + /** + * Convert SRT or tags + * into WebVTT . Unknown colors are removed safely. + * + * @param {string} text + * @return {string} + * @private + */ + convertColors_(text) { + const openColors = []; + + text = text.replace(/]+)["']?>/gi, (_, color) => { + const key = color.toLowerCase(); + const colorName = shaka.text.Utils.getColorName(key); + if (colorName) { + openColors.push(colorName); + return ``; + } + return ''; + }); + + text = text.replace(/<\/font>/gi, () => { + if (openColors.length) { + openColors.pop(); + return ''; + } + return ''; + }); + + return text; } }; diff --git a/lib/text/text_utils.js b/lib/text/text_utils.js index c49cf4987..c1d03fffc 100644 --- a/lib/text/text_utils.js +++ b/lib/text/text_utils.js @@ -58,7 +58,7 @@ shaka.text.Utils = class { color = parentCue.color; } let classes = ''; - const colorName = shaka.text.Utils.getColorName_(color); + const colorName = shaka.text.Utils.getColorName(color); if (colorName) { classes += `.${colorName}`; } @@ -66,7 +66,7 @@ shaka.text.Utils = class { if (bgColor == '' && parentCue) { bgColor = parentCue.backgroundColor; } - const bgColorName = shaka.text.Utils.getColorName_(bgColor); + const bgColorName = shaka.text.Utils.getColorName(bgColor); if (bgColorName) { classes += `.bg_${bgColorName}`; } @@ -84,9 +84,8 @@ shaka.text.Utils = class { * * @param {string} string * @return {?string} - * @private */ - static getColorName_(string) { + static getColorName(string) { let colorString = string.toLowerCase(); const rgb = colorString.replace(/\s/g, '') .match(/^rgba?\((\d+),(\d+),(\d+),?([^,\s)]+)?/i); diff --git a/project-words.txt b/project-words.txt index d2a00f3f4..b3ad0756b 100644 --- a/project-words.txt +++ b/project-words.txt @@ -500,6 +500,7 @@ cmsds cdnb # other +Aegisub autoglottonym avinfo awesomplete diff --git a/test/text/srt_text_parser_unit.js b/test/text/srt_text_parser_unit.js index ea905dddd..1c37faa95 100644 --- a/test/text/srt_text_parser_unit.js +++ b/test/text/srt_text_parser_unit.js @@ -121,6 +121,39 @@ describe('SrtTextParser', () => { }, ], }, + { + startTime: 50, + endTime: 60, + payload: '', + nestedCues: [ + { + startTime: 50, + endTime: 60, + payload: 'Hex color', + color: 'yellow', + }, + ], + }, + { + startTime: 60, + endTime: 70, + payload: 'Unknown color', + }, + { + startTime: 70, + endTime: 80, + payload: 'Aligned bottom-left', + line: -1, + lineInterpretation: Cue.lineInterpretation.LINE_NUMBER, + textAlign: 'left', + }, + { + startTime: 80, + endTime: 90, + payload: 'Positioned cue', + line: 50, + position: 50, + }, ], '1\n' + '00:00:10,000 --> 00:00:20,000\n' + @@ -130,10 +163,22 @@ describe('SrtTextParser', () => { '{i}Test2{/i}\n\n' + '3\n' + '00:00:30,000 --> 00:00:40,000\n' + - '{u}Test3{/u}\n\n'+ + '{u}Test3{/u}\n\n' + '4\n' + '00:00:40,000 --> 00:00:50,000\n' + - 'Test4', + 'Test4\n\n' + + '5\n' + + '00:00:50,000 --> 00:01:00,000\n' + + 'Hex color\n\n' + + '6\n' + + '00:01:00,000 --> 00:01:10,000\n' + + 'Unknown color\n\n' + + '7\n' + + '00:01:10,000 --> 00:01:20,000\n' + + '{\\an1}Aligned bottom-left\n\n' + + '8\n' + + '00:01:20,000 --> 00:01:30,000\n' + + '{\\pos(960,540)}Positioned cue', {periodStart: 0, segmentStart: 0, segmentEnd: 0, vttOffset: 0}); });