Skip to content

Commit

Permalink
Add support for inline timestamps (#102)
Browse files Browse the repository at this point in the history
* Add support for inline timestamps

* Adjustments per review

* Return slice
  • Loading branch information
WithoutPants authored Mar 22, 2024
1 parent 967700d commit 90151b9
Show file tree
Hide file tree
Showing 4 changed files with 105 additions and 18 deletions.
1 change: 1 addition & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ func (l Line) String() string {
// LineItem represents a formatted line item
type LineItem struct {
InlineStyle *StyleAttributes
StartAt time.Duration
Style *Style
Text string
}
Expand Down
82 changes: 66 additions & 16 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ var (
bytesWebVTTItalicEndTag = []byte("</i>")
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpInlineTimestamp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
Expand Down Expand Up @@ -357,26 +358,70 @@ func parseTextWebVTT(i string) (o Line) {
}

case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}
sa.propagateWebVTTAttributes()
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
sa.propagateWebVTTAttributes()
}

// Append items
o.Items = append(o.Items, parseTextWebVTTTextToken(sa, string(tr.Raw()))...)
}
}
return
}

func parseTextWebVTTTextToken(sa *StyleAttributes, line string) (ret []LineItem) {
// split the line by inline timestamps
indexes := webVTTRegexpInlineTimestamp.FindAllStringSubmatchIndex(line, -1)

if len(indexes) == 0 {
if s := strings.TrimSpace(line); s != "" {
return []LineItem{{
InlineStyle: sa,
Text: unescapeWebVTT(s),
}}
}
return
}

// get the text before the first timestamp
if s := strings.TrimSpace(line[:indexes[0][0]]); s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}

for i, match := range indexes {
// get the text between the timestamps
endIndex := len(line)
if i+1 < len(indexes) {
endIndex = indexes[i+1][0]
}
s := strings.TrimSpace(line[match[1]:endIndex])
if s == "" {
continue
}

// Parse timestamp
t, err := parseDurationWebVTT(line[match[2]:match[3]])
if err != nil {
log.Printf("astisub: parsing webvtt duration %s failed, ignoring: %v", line[match[2]:match[3]], err)
}

ret = append(ret, LineItem{
InlineStyle: sa,
StartAt: t,
Text: unescapeWebVTT(s),
})
}

return
}

Expand Down Expand Up @@ -559,6 +604,11 @@ func (l Line) webVTTBytes() (c []byte) {
}

func (li LineItem) webVTTBytes() (c []byte) {
// Add timestamp
if li.StartAt > 0 {
c = append(c, []byte("<"+formatDurationWebVTT(li.StartAt)+">")...)
}

// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.TTMLColor != nil {
Expand Down
29 changes: 29 additions & 0 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,35 @@ func TestParseTextWebVTT(t *testing.T) {
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
})

t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
assert.Equal(t, "timestamps", s.Items[1].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[1].StartAt)
})

t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
})

t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
})
}

func TestTimestampMap(t *testing.T) {
Expand Down
11 changes: 9 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,15 @@ func TestWebVTTTags(t *testing.T) {
<customed_tag.class1.class2>Text here</customed_tag>
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something</v> <v Bob>Bob says something</v>`
<v Joe>Joe says something</v> <v Bob>Bob says something</v>
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 5)
require.Len(t, s.Items, 6)

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand All @@ -210,5 +213,9 @@ func TestWebVTTTags(t *testing.T) {
5
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something Bob says something
6
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle
`, b.String())
}

0 comments on commit 90151b9

Please sign in to comment.