Skip to content

Commit

Permalink
Merge pull request #835 from googlecodelabs/image-urls
Browse files Browse the repository at this point in the history
Support data URLs in Google Doc exports.
  • Loading branch information
cassierecher committed May 25, 2023
2 parents 8b5107f + 9fff679 commit 872cbcf
Show file tree
Hide file tree
Showing 5 changed files with 113 additions and 27 deletions.
60 changes: 36 additions & 24 deletions claat/fetch/fetch.go
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ func (f *Fetcher) SlurpImages(src, dir string, n []nodes.Node, images map[string
for _, imageNode := range imageNodes {
go func(imageNode *nodes.ImageNode) {
url := imageNode.Src
file, err := f.slurpBytes(src, dir, url)
file, err := f.slurpBytes(src, dir, url, imageNode.Bytes)
if err == nil {
imageNode.Src = filepath.Join(util.ImgDirname, file)
}
Expand All @@ -251,40 +251,52 @@ func (f *Fetcher) SlurpImages(src, dir string, n []nodes.Node, images map[string
return nil
}

func (f *Fetcher) slurpBytes(codelabSrc, dir, imgURL string) (string, error) {
// images can be local in Markdown cases or remote.
func (f *Fetcher) slurpBytes(codelabSrc, dir, imgURL string, imgBytes []byte) (string, error) {
// images can be data URLs, local in Markdown cases or remote.
// Only proceed a simple copy on local reference.
var b []byte
var ext string
u, err := url.Parse(imgURL)
if err != nil {
return "", err
}

// If the codelab source is being downloaded from the network, then we should interpret
// the image URL in the same way.
srcUrl, err := url.Parse(codelabSrc)
if err == nil && srcUrl.Host != "" {
u = srcUrl.ResolveReference(u)
}
var err error

if u.Host == "" {
if imgURL, err = restrictPathToParent(imgURL, filepath.Dir(codelabSrc)); err != nil {
return "", err
if len(imgBytes) > 0 {
// Slurp bytes from image URL data.
b = imgBytes
if ext, err = imgExtFromBytes(b); err != nil {
return "", fmt.Errorf("Error reading image type: %v", err)
}
if b, err = ioutil.ReadFile(imgURL); err != nil {
} else {
// Slurp bytes from local or remote URL.
u, err := url.Parse(imgURL)
if err != nil {
return "", err
}
ext = filepath.Ext(imgURL)
} else {
if b, err = f.slurpRemoteBytes(u.String(), 5); err != nil {
return "", fmt.Errorf("Error downloading image at %s: %v", u.String(), err)

// If the codelab source is being downloaded from the network, then we should interpret
// the image URL in the same way.
srcURL, err := url.Parse(codelabSrc)
if err == nil && srcURL.Host != "" {
u = srcURL.ResolveReference(u)
}
if ext, err = imgExtFromBytes(b); err != nil {
return "", fmt.Errorf("Error reading image type at %s: %v", u.String(), err)

if u.Host == "" {
if imgURL, err = restrictPathToParent(imgURL, filepath.Dir(codelabSrc)); err != nil {
return "", err
}
if b, err = ioutil.ReadFile(imgURL); err != nil {
return "", err
}
ext = filepath.Ext(imgURL)
} else {
if b, err = f.slurpRemoteBytes(u.String(), 5); err != nil {
return "", fmt.Errorf("Error downloading image at %s: %v", u.String(), err)
}
if ext, err = imgExtFromBytes(b); err != nil {
return "", fmt.Errorf("Error reading image type at %s: %v", u.String(), err)
}
}
}

// Generate image file from slurped bytes.
crc := crc64.Checksum(b, f.crcTable)
file := fmt.Sprintf("%x%s", crc, ext)
dst := filepath.Join(dir, file)
Expand Down
5 changes: 4 additions & 1 deletion claat/nodes/image.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ type NewImageNodeOptions struct {
Width float32
Alt string
Title string
Bytes []byte
}

// NewImageNode creates a new ImageNode with the given options.
Expand All @@ -18,6 +19,7 @@ func NewImageNode(opts NewImageNodeOptions) *ImageNode {
Width: opts.Width,
Alt: opts.Alt,
Title: opts.Title,
Bytes: opts.Bytes,
}
}

Expand All @@ -28,11 +30,12 @@ type ImageNode struct {
Width float32
Alt string
Title string
Bytes []byte
}

// Empty returns true if its Src is zero, excluding space runes.
func (in *ImageNode) Empty() bool {
return strings.TrimSpace(in.Src) == ""
return strings.TrimSpace(in.Src) == "" && len(in.Bytes) == 0
}

// ImageNodes extracts everything except NodeImage nodes, recursively.
Expand Down
21 changes: 20 additions & 1 deletion claat/nodes/image_test.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package nodes

import (
"encoding/base64"
"testing"

"github.com/google/go-cmp/cmp"
)

var testBytes, _ = base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")

func TestNewImageNode(t *testing.T) {
tests := []struct {
name string
Expand All @@ -19,7 +22,7 @@ func TestNewImageNode(t *testing.T) {
},
},
{
name: "NonEmpty",
name: "StandardURL",
inOpts: NewImageNodeOptions{
Src: "https://www.google.com/images/branding/googlelogo/1x/googlelogo_color_272x92dp.png",
Width: 1.0,
Expand All @@ -34,6 +37,22 @@ func TestNewImageNode(t *testing.T) {
Alt: "bar",
},
},
{
name: "DataURL",
inOpts: NewImageNodeOptions{
Width: 1.0,
Title: "foo",
Alt: "bar",
Bytes: testBytes,
},
out: &ImageNode{
node: node{typ: NodeImage},
Width: 1.0,
Title: "foo",
Alt: "bar",
Bytes: testBytes,
},
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
Expand Down
23 changes: 22 additions & 1 deletion claat/parser/gdoc/parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package gdoc

import (
"bytes"
"encoding/base64"
"fmt"
"io"
"net/url"
Expand Down Expand Up @@ -697,12 +698,32 @@ func image(ds *docState) nodes.Node {
errorAlt = "The domain of the requested iframe (" + u.Hostname() + ") has not been whitelisted."
fmt.Fprint(os.Stderr, errorAlt+"\n")
}

var imageBytes []byte
var imageSrc string
s := nodeAttr(ds.cur, "src")
if s == "" {
return nil
} else if strings.HasPrefix(s, "data:") {
_, data, ok := strings.Cut(s, ",")
if !ok {
fmt.Fprint(os.Stderr, "Failed to decode data URL: "+s+" \n")
return nil
}
b, err := base64.StdEncoding.DecodeString(data)
if err != nil {
fmt.Fprint(os.Stderr, "Failed to decode data URL: "+s+"\n"+err.Error()+"\n")
return nil
}
imageSrc = ""
imageBytes = b
} else {
imageSrc = s
imageBytes = []byte{}
}
n := nodes.NewImageNode(nodes.NewImageNodeOptions{
Src: s,
Src: imageSrc,
Bytes: imageBytes,
Width: styleFloatValue(ds.cur, "width"),
})
n.MutateBlock(findBlockParent(ds.cur))
Expand Down
31 changes: 31 additions & 0 deletions claat/parser/gdoc/parse_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ package gdoc

import (
"bytes"
"encoding/base64"
"io"
"reflect"
"strings"
Expand Down Expand Up @@ -295,6 +296,9 @@ func TestParseDoc(t *testing.T) {
<p><span>[[</span><span class="bold">import</span><span>&nbsp;</span><span><a href="https://example.com/import">shared</a></span><span>]]</span></p>
<img src="https://host/image.png" alt="alt text" title="title text">
<p><img alt="JPEG" src="data:image/jpeg;base64,/9j/2wBDAP//////////////////////////////////////////////////////////////////////////////////////wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAAA//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AN//Z"></p>
<p><img alt="GIF" src="data:image/gif;base64,R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7"></p>
<p><img alt="PNG" src="data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII="></p>
<p><img src="https://host/small.png" style="height: 10px; width: 25.5px"> icon.</p>
<p><img alt="https://www.youtube.com/watch?v=vid" src="https://yt.com/vid.jpg"></p>
Expand Down Expand Up @@ -405,6 +409,33 @@ func TestParseDoc(t *testing.T) {
para.MutateBlock(true)
content.Append(para)

bytes, _ := base64.StdEncoding.DecodeString("/9j/2wBDAP//////////////////////////////////////////////////////////////////////////////////////wAALCAABAAEBAREA/8QAFAABAAAAAAAAAAAAAAAAAAAAA//EABQQAQAAAAAAAAAAAAAAAAAAAAD/2gAIAQEAAD8AN//Z")
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
Bytes: bytes,
Alt: "JPEG",
})
para = nodes.NewListNode(img)
para.MutateBlock(true)
content.Append(para)

bytes, _ = base64.StdEncoding.DecodeString("R0lGODlhAQABAIAAAAAAAP///yH5BAEAAAAALAAAAAABAAEAAAIBRAA7")
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
Bytes: bytes,
Alt: "GIF",
})
para = nodes.NewListNode(img)
para.MutateBlock(true)
content.Append(para)

bytes, _ = base64.StdEncoding.DecodeString("iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVQYV2NgYAAAAAMAAWgmWQ0AAAAASUVORK5CYII=")
img = nodes.NewImageNode(nodes.NewImageNodeOptions{
Bytes: bytes,
Alt: "PNG",
})
para = nodes.NewListNode(img)
para.MutateBlock(true)
content.Append(para)

img = nodes.NewImageNode(nodes.NewImageNodeOptions{
Src: "https://host/small.png",
Width: 25.5,
Expand Down

0 comments on commit 872cbcf

Please sign in to comment.