Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for inline timestamps #102

Merged
merged 3 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ func (l Line) String() string {
// LineItem represents a formatted line item
type LineItem struct {
InlineStyle *StyleAttributes
StartAt time.Duration
WithoutPants marked this conversation as resolved.
Show resolved Hide resolved
Style *Style
Text string
}
Expand Down
82 changes: 66 additions & 16 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ var (
bytesWebVTTItalicEndTag = []byte("</i>")
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpInlineTimestamp = regexp.MustCompile(`<((?:\d{2,}:)?\d{2}:\d{2}\.\d{3})>`)
webVTTRegexpTag = regexp.MustCompile(`(</*\s*([^\.\s]+)(\.[^\s/]*)*\s*([^/]*)\s*/*>)`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
Expand Down Expand Up @@ -357,26 +358,70 @@ func parseTextWebVTT(i string) (o Line) {
}

case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}
sa.propagateWebVTTAttributes()
// Get style attribute
var sa *StyleAttributes
if len(webVTTTagStack) > 0 {
tags := make([]WebVTTTag, len(webVTTTagStack))
copy(tags, webVTTTagStack)
sa = &StyleAttributes{
WebVTTTags: tags,
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
sa.propagateWebVTTAttributes()
}

// Append items
o.Items = append(o.Items, parseTextWebVTTTextToken(sa, string(tr.Raw()))...)
}
}
return
}

func parseTextWebVTTTextToken(sa *StyleAttributes, line string) (ret []LineItem) {
// split the line by inline timestamps
indexes := webVTTRegexpInlineTimestamp.FindAllStringSubmatchIndex(line, -1)

if len(indexes) == 0 {
if s := strings.TrimSpace(line); s != "" {
return []LineItem{{
InlineStyle: sa,
Text: unescapeWebVTT(s),
}}
}
return
}

// get the text before the first timestamp
if s := strings.TrimSpace(line[:indexes[0][0]]); s != "" {
ret = append(ret, LineItem{
InlineStyle: sa,
Text: unescapeWebVTT(s),
})
}

for i, match := range indexes {
// get the text between the timestamps
endIndex := len(line)
if i+1 < len(indexes) {
endIndex = indexes[i+1][0]
}
s := strings.TrimSpace(line[match[1]:endIndex])
if s == "" {
continue
}

// Parse timestamp
t, err := parseDurationWebVTT(line[match[2]:match[3]])
if err != nil {
log.Printf("astisub: parsing webvtt duration %s failed, ignoring: %v", line[match[2]:match[3]], err)
}

ret = append(ret, LineItem{
InlineStyle: sa,
StartAt: t,
Text: unescapeWebVTT(s),
})
}

return
}

Expand Down Expand Up @@ -559,6 +604,11 @@ func (l Line) webVTTBytes() (c []byte) {
}

func (li LineItem) webVTTBytes() (c []byte) {
// Add timestamp
if li.StartAt > 0 {
c = append(c, []byte("<"+formatDurationWebVTT(li.StartAt)+">")...)
}

// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.TTMLColor != nil {
Expand Down
29 changes: 29 additions & 0 deletions webvtt_internal_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,35 @@ func TestParseTextWebVTT(t *testing.T) {
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "Incorrect end tag", s.Items[0].Text)
})

t.Run("When inline timestamps are included", func(t *testing.T) {
testData := `<00:01:01.000>With inline <00:01:02.000>timestamps`

s := parseTextWebVTT(testData)
assert.Equal(t, 2, len(s.Items))
assert.Equal(t, "With inline", s.Items[0].Text)
assert.Equal(t, time.Minute+time.Second, s.Items[0].StartAt)
assert.Equal(t, "timestamps", s.Items[1].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[1].StartAt)
})

t.Run("When inline timestamps together", func(t *testing.T) {
testData := `<00:01:01.000><00:01:02.000>With timestamp tags together`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With timestamp tags together", s.Items[0].Text)
assert.Equal(t, time.Minute+2*time.Second, s.Items[0].StartAt)
})

t.Run("When inline timestamps is at end", func(t *testing.T) {
testData := `With end timestamp<00:01:02.000>`

s := parseTextWebVTT(testData)
assert.Equal(t, 1, len(s.Items))
assert.Equal(t, "With end timestamp", s.Items[0].Text)
assert.Equal(t, time.Duration(0), s.Items[0].StartAt)
})
}

func TestTimestampMap(t *testing.T) {
Expand Down
11 changes: 9 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,12 +179,15 @@ func TestWebVTTTags(t *testing.T) {
<customed_tag.class1.class2>Text here</customed_tag>

00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something</v> <v Bob>Bob says something</v>`
<v Joe>Joe says something</v> <v Bob>Bob says something</v>

00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 5)
require.Len(t, s.Items, 6)

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand All @@ -210,5 +213,9 @@ func TestWebVTTTags(t *testing.T) {
5
00:05:00.000 --> 00:06:00.000
<v Joe>Joe says something Bob says something

6
00:06:00.000 --> 00:07:00.000
Text with a <00:06:30.000>timestamp in the middle
`, b.String())
}
Loading