Skip to content

Commit

Permalink
escape-unescape '<' character for WebVTT (#89)
Browse files Browse the repository at this point in the history
* escape-unescape '<' character for WebVTT

* add test for escape-unescape '<' character for WebVTT

* correct places to escape-unescape webvtt text

---------

Co-authored-by: Nhan Nguyen <[email protected]>
  • Loading branch information
NhanNguyen700 and NhanNguyen700 authored Jul 26, 2023
1 parent 1e3a211 commit 95f3a94
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 12 deletions.
16 changes: 6 additions & 10 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ var (
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpStartTag = regexp.MustCompile(`(<v([\.\w]*)(.+?)>)`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
webVTTUnescaper = strings.NewReplacer("&amp;", "&", "&lt;", "<")
)

// parseDurationWebVTT parses a .vtt duration
Expand Down Expand Up @@ -113,9 +115,6 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
line = strings.TrimSpace(scanner.Text())
lineNum++

// Unescape line
line = unescapeWebVTT(line)

switch {
// Comment
case strings.HasPrefix(line, "NOTE "):
Expand Down Expand Up @@ -278,11 +277,11 @@ func ReadFromWebVTT(i io.Reader) (o *Subtitles, err error) {
}

func escapeWebVTT(i string) string {
return strings.ReplaceAll(i, "&", "&amp;")
return webVTTEscaper.Replace(i)
}

func unescapeWebVTT(i string) string {
return strings.ReplaceAll(i, "&amp;", "&")
return webVTTUnescaper.Replace(i)
}

// parseTextWebVTT parses the input line to fill the Line
Expand Down Expand Up @@ -336,7 +335,7 @@ func parseTextWebVTT(i string) (o Line) {
// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: s,
Text: unescapeWebVTT(s),
})
}
}
Expand Down Expand Up @@ -487,9 +486,6 @@ func (s Subtitles) WriteToWebVTT(o io.Writer) (err error) {
// Remove last new line
c = c[:len(c)-1]

// Escape content
c = []byte(escapeWebVTT(string(c)))

// Write
if _, err = o.Write(c); err != nil {
err = fmt.Errorf("astisub: writing failed: %w", err)
Expand Down Expand Up @@ -530,7 +526,7 @@ func (li LineItem) webVTTBytes() (c []byte) {
if i {
c = append(c, []byte("<i>")...)
}
c = append(c, []byte(li.Text)...)
c = append(c, []byte(escapeWebVTT(li.Text))...)
if i {
c = append(c, []byte("</i>")...)
}
Expand Down
12 changes: 10 additions & 2 deletions webvtt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,17 @@ func TestWebVTTEscape(t *testing.T) {
testData := `WEBVTT
00:01:00.000 --> 00:02:00.000
Sentence with an &amp; in the middle`
Sentence with an &amp; in the middle
00:02:00.000 --> 00:03:00.000
Sentence with an &lt; in the middle`

s, err := astisub.ReadFromWebVTT(strings.NewReader(testData))
require.NoError(t, err)

require.Len(t, s.Items, 1)
require.Len(t, s.Items, 2)
require.Equal(t, "Sentence with an & in the middle", s.Items[0].String())
require.Equal(t, "Sentence with an < in the middle", s.Items[1].String())

b := &bytes.Buffer{}
err = s.WriteToWebVTT(b)
Expand All @@ -152,5 +156,9 @@ func TestWebVTTEscape(t *testing.T) {
1
00:01:00.000 --> 00:02:00.000
Sentence with an &amp; in the middle
2
00:02:00.000 --> 00:03:00.000
Sentence with an &lt; in the middle
`, b.String())
}

0 comments on commit 95f3a94

Please sign in to comment.