Skip to content

Commit

Permalink
Preserve br tag in ttml
Browse files Browse the repository at this point in the history
  • Loading branch information
NhanNguyen700 committed May 31, 2024
1 parent 2d03157 commit 61fc9b9
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 2 deletions.
36 changes: 35 additions & 1 deletion ttml.go
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,39 @@ func (i *TTMLInItems) UnmarshalXML(d *xml.Decoder, start xml.StartElement) (err
return nil
}

// handleBrTokenReader is used only for decoding TTMLInItems, do not use it anywhere else
type handleBrTokenReader struct {
xml.Decoder
holdingToken xml.Token
}

// Token implements the TokenReader interface, when it meets the "br" tag, it will hold the token and return a newline
// instead. This is to work around the fact that the go xml unmarshaler will ignore the "br" tag if it's within a
// character data field.
func (r *handleBrTokenReader) Token() (xml.Token, error) {
if r.holdingToken != nil {
returnToken := r.holdingToken
r.holdingToken = nil
return returnToken, nil
}

t, err := r.Decoder.Token()
if err != nil {
return nil, err
}

if se, ok := t.(xml.StartElement); ok && strings.ToLower(se.Name.Local) == "br" {
r.holdingToken = t
return xml.CharData("\n"), nil
}

return t, nil
}

func newHandleBrTokenReader(r io.Reader) xml.TokenReader {
return &handleBrTokenReader{Decoder: *xml.NewDecoder(r), holdingToken: nil}
}

// TTMLInItem represents an input TTML item
type TTMLInItem struct {
Style string `xml:"style,attr,omitempty"`
Expand Down Expand Up @@ -380,7 +413,8 @@ func ReadFromTTML(i io.Reader) (o *Subtitles, err error) {

// Unmarshal items
var items = TTMLInItems{}
if err = xml.Unmarshal([]byte("<span>"+ts.Items+"</span>"), &items); err != nil {
decoder := xml.NewTokenDecoder(newHandleBrTokenReader(strings.NewReader("<p>" + ts.Items + "</p>")))
if err = decoder.Decode(&items); err != nil {
err = fmt.Errorf("astisub: unmarshaling items failed: %w", err)
return
}
Expand Down
20 changes: 19 additions & 1 deletion ttml_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ package astisub_test

import (
"bytes"
"github.com/asticode/go-astikit"
"io/ioutil"
"strings"
"testing"

"github.com/asticode/go-astikit"

"github.com/asticode/go-astisub"
"github.com/stretchr/testify/assert"
)
Expand Down Expand Up @@ -50,3 +52,19 @@ func TestTTML(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}

func TestBreakLineHandling(t *testing.T) {
// Open
s, err := astisub.OpenFile("./testdata/example-with-breaklines-in.ttml")
assert.NoError(t, err)

// Write
w := &bytes.Buffer{}
err = s.WriteToTTML(w)
assert.NoError(t, err)

c, err := ioutil.ReadFile("./testdata/example-with-breaklines-out.ttml")
assert.NoError(t, err)

assert.Equal(t, strings.TrimSpace(string(c)), strings.TrimSpace(w.String()))
}

0 comments on commit 61fc9b9

Please sign in to comment.