Skip to content

Commit

Permalink
Add support for inline timestamps
Browse files Browse the repository at this point in the history
  • Loading branch information
WithoutPants committed Mar 20, 2024
1 parent 6c644f7 commit 84b15ee
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 18 deletions.
1 change: 1 addition & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ func (l Line) String() string {
type LineItem struct {
InlineStyle *StyleAttributes
Style *Style
StartAt time.Duration
Text string
}

Expand Down
89 changes: 73 additions & 16 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ var (
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
inlineTimestampRegexp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
)
Expand Down Expand Up @@ -357,29 +358,80 @@ func parseTextWebVTT(i string) (o Line) {
}

case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}
sa.propagateWebVTTAttributes()
s := string(tr.Raw())
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
sa.propagateWebVTTAttributes()
}

// Append item
items := parseTextWebVTTText(sa, s)
o.Items = append(o.Items, items...)
}
}
return
}

func parseTextWebVTTText(sa *StyleAttributes, line string) []LineItem {
var ret []LineItem

// split the line by inline timestamps
indexes := inlineTimestampRegexp.FindAllStringSubmatchIndex(line, -1)
if len(indexes) > 0 {
// get the text before the first timestamp
s := strings.TrimSpace(line[:indexes[0][0]])
if s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}

for i, match := range indexes {
// get the text between the timestamps
endIndex := len(line)
if i+1 < len(indexes) {
endIndex = indexes[i+1][0]
}
s := strings.TrimSpace(line[match[1]:endIndex])
if s == "" {
continue
}

// get the timestamp
ts := line[match[2]:match[3]]

// Parse timestamp
t, err := parseDurationWebVTT(ts)
if err != nil {
log.Printf("astisub: parsing webvtt duration %s failed, ignoring: %v", ts, err)
}

ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
StartAt: t,
})
}
} else {
s := strings.TrimSpace(line)
if s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}
}

return ret
}

// formatDurationWebVTT formats a .vtt duration
func formatDurationWebVTT(i time.Duration) string {
return formatDuration(i, ".", 3)
Expand Down Expand Up @@ -559,6 +611,11 @@ func (l Line) webVTTBytes() (c []byte) {
}

func (li LineItem) webVTTBytes() (c []byte) {
// Add timestamp
if li.StartAt > 0 {
c = append(c, []byte("<"+formatDurationWebVTT(li.StartAt)+">")...)
}

// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.TTMLColor != nil {
Expand Down
29 changes: 29 additions & 0 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,35 @@ func TestParseTextWebVTT(t *testing.T) {
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
})

t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
assert.Equal(t, "timestamps", s.Items[1].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[1].StartAt)
})

t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
})

t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
})
}

func TestTimestampMap(t *testing.T) {
Expand Down
11 changes: 9 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,15 @@ func TestWebVTTTags(t *testing.T) {
<customed_tag.class1.class2>Text here</customed_tag>
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something</v> <v Bob>Bob says something</v>`
<v Joe>Joe says something</v> <v Bob>Bob says something</v>
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 5)
require.Len(t, s.Items, 6)

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand All @@ -210,5 +213,9 @@ func TestWebVTTTags(t *testing.T) {
5
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something Bob says something
6
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle
`, b.String())
}

0 comments on commit 84b15ee

Please sign in to comment.