Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support SRT styling and bold/underline in VTT #96

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
152 changes: 149 additions & 3 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import (
"strconv"
"strings"
"time"

"golang.org/x/net/html"
)

// Constants
Expand Down Expand Up @@ -111,7 +113,88 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
s.Lines = append(s.Lines, Line{Items: []LineItem{{Text: strings.TrimSpace(line)}}})
if l := parseTextSrt(line); len(l.Items) > 0 {
kloon15 marked this conversation as resolved.
Show resolved Hide resolved
s.Lines = append(s.Lines, l)
}
// s.Lines = append(s.Lines, Line{Items: []LineItem{{Text: strings.TrimSpace(line)}}})
}
}
return
}

func parseTextSrt(i string) (o Line) {
// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
type Styles struct {
kloon15 marked this conversation as resolved.
Show resolved Hide resolved
bold bool
italic bool
underline bool
color *string
}
styles := Styles{}
for {
// Get next tag
t := tr.Next()

// Process error
if err := tr.Err(); err != nil {
break
}

// Get current token
token := tr.Token()

switch t {
case html.EndTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
styles.bold = false
case "i":
styles.italic = false
case "u":
styles.underline = false
case "font":
styles.color = nil
}
case html.StartTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
styles.bold = true
case "i":
styles.italic = true
case "u":
styles.underline = true
case "font":
color, _ := getAttribute(&token, "color")
kloon15 marked this conversation as resolved.
Show resolved Hide resolved
if color != "" {
styles.color = &color
}
}
case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if styles.bold || styles.italic ||
styles.underline || styles.color != nil {
sa = &StyleAttributes{
TTMLColor: styles.color,
kloon15 marked this conversation as resolved.
Show resolved Hide resolved
WebVTTBold: styles.bold,
WebVTTItalics: styles.italic,
WebVTTUnderline: styles.underline,
}
sa.propagateSRTAttributes()
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: s,
})
}
}
}
return
Expand Down Expand Up @@ -146,8 +229,7 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {

// Loop through lines
for _, l := range v.Lines {
c = append(c, []byte(l.String())...)
c = append(c, bytesLineSeparator...)
c = append(c, l.srtBytes()...)
}

// Add new line
Expand All @@ -164,3 +246,67 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {
}
return
}

func (l Line) srtBytes() (c []byte) {
for idx, li := range l.Items {
c = append(c, li.srtBytes()...)
// condition to avoid adding space as the last character.
if idx < len(l.Items)-1 {
c = append(c, []byte(" ")...)
}
}
c = append(c, bytesLineSeparator...)
return
}

func (li LineItem) srtBytes() (c []byte) {
// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.TTMLColor != nil {
color = *li.InlineStyle.TTMLColor
}

// Get bold/italics/underline
b := li.InlineStyle != nil && li.InlineStyle.WebVTTBold
i := li.InlineStyle != nil && li.InlineStyle.WebVTTItalics
u := li.InlineStyle != nil && li.InlineStyle.WebVTTUnderline

// Append
if color != "" {
c = append(c, []byte("<font color=\""+color+"\">")...)
}
if b {
c = append(c, []byte("<b>")...)
}
if i {
c = append(c, []byte("<i>")...)
}
if u {
c = append(c, []byte("<u>")...)
}
c = append(c, []byte(li.Text)...)
if u {
c = append(c, []byte("</u>")...)
}
if i {
c = append(c, []byte("</i>")...)
}
if b {
c = append(c, []byte("</b>")...)
}
if color != "" {
c = append(c, []byte("</font>")...)
}
return
}

func getAttribute(n *html.Token, key string) (string, bool) {
kloon15 marked this conversation as resolved.
Show resolved Hide resolved

for _, attr := range n.Attr {
if attr.Key == key {
return attr.Val, true
}
}

return "", false
}
4 changes: 4 additions & 0 deletions subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,7 +236,9 @@ type StyleAttributes struct {
TTMLWritingMode *string
TTMLZIndex *int
WebVTTAlign string
WebVTTBold bool
WebVTTItalics bool
WebVTTUnderline bool
WebVTTLine string
WebVTTLines int
WebVTTPosition string
Expand All @@ -248,6 +250,8 @@ type StyleAttributes struct {
WebVTTWidth string
}

func (sa *StyleAttributes) propagateSRTAttributes() {}
kloon15 marked this conversation as resolved.
Show resolved Hide resolved

func (sa *StyleAttributes) propagateSSAAttributes() {}

func (sa *StyleAttributes) propagateSTLAttributes() {
Expand Down
63 changes: 48 additions & 15 deletions webvtt.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ package astisub

import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
Expand All @@ -29,8 +28,6 @@ const (

// Vars
var (
bytesWebVTTItalicEndTag = []byte("</i>")
bytesWebVTTItalicStartTag = []byte("<i>")
bytesWebVTTTimeBoundariesSeparator = []byte(webvttTimeBoundariesSeparator)
webVTTRegexpStartTag = regexp.MustCompile(`(<v([\.\w]*)(.+?)>)`)
webVTTEscaper = strings.NewReplacer("&", "&amp;", "<", "&lt;")
Expand Down Expand Up @@ -290,7 +287,12 @@ func parseTextWebVTT(i string) (o Line) {
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
italic := false
type Styles struct {
kloon15 marked this conversation as resolved.
Show resolved Hide resolved
bold bool
italic bool
underline bool
}
styles := Styles{}
for {
// Get next tag
t := tr.Next()
Expand All @@ -300,12 +302,19 @@ func parseTextWebVTT(i string) (o Line) {
break
}

// Get current token
token := tr.Token()

switch t {
case html.EndTagToken:
// Parse italic
if bytes.Equal(tr.Raw(), bytesWebVTTItalicEndTag) {
italic = false
continue
// Parse italic/bold/underline
switch token.Data {
case "b":
styles.bold = false
case "i":
styles.italic = false
case "u":
styles.underline = false
}
case html.StartTagToken:
// Parse voice name
Expand All @@ -316,18 +325,24 @@ func parseTextWebVTT(i string) (o Line) {
continue
}

// Parse italic
if bytes.Equal(tr.Raw(), bytesWebVTTItalicStartTag) {
italic = true
continue
// Parse italic/bold/underline
switch token.Data {
case "b":
styles.bold = true
case "i":
styles.italic = true
case "u":
styles.underline = true
}
case html.TextToken:
if s := strings.TrimSpace(string(tr.Raw())); s != "" {
// Get style attribute
var sa *StyleAttributes
if italic {
if styles.bold || styles.italic || styles.underline {
sa = &StyleAttributes{
WebVTTItalics: italic,
WebVTTBold: styles.bold,
WebVTTItalics: styles.italic,
WebVTTUnderline: styles.underline,
}
sa.propagateWebVTTAttributes()
}
Expand Down Expand Up @@ -516,33 +531,51 @@ func (li LineItem) webVTTBytes() (c []byte) {
color = cssColor(*li.InlineStyle.TTMLColor)
}

// Get italics
// Get bold/italics/underline
b := li.InlineStyle != nil && li.InlineStyle.WebVTTBold
i := li.InlineStyle != nil && li.InlineStyle.WebVTTItalics
u := li.InlineStyle != nil && li.InlineStyle.WebVTTUnderline

// Append
if color != "" {
c = append(c, []byte("<c."+color+">")...)
}
if b {
c = append(c, []byte("<b>")...)
}
if i {
c = append(c, []byte("<i>")...)
}
if u {
c = append(c, []byte("<u>")...)
}
c = append(c, []byte(escapeWebVTT(li.Text))...)
if u {
c = append(c, []byte("</u>")...)
}
if i {
c = append(c, []byte("</i>")...)
}
if b {
c = append(c, []byte("</b>")...)
}
if color != "" {
c = append(c, []byte("</c>")...)
}
return
}

func cssColor(rgb string) string {
// https://www.w3.org/TR/webvtt1/#default-text-color
colors := map[string]string{
"#00ffff": "cyan", // narrator, thought
"#ffff00": "yellow", // out of vision
"#ff0000": "red", // noises
"#ff00ff": "magenta", // song
"#00ff00": "lime", // foreign speak
"#ffffff": "white",
"#0000ff": "blue",
"#000000": "black",
}
return colors[strings.ToLower(rgb)] // returning the empty string is ok
}
Loading