diff --git a/go.mod b/go.mod index c27382a..ffc7671 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,17 @@ module github.com/hashicorp/go-slug -go 1.15 +go 1.20 + +require ( + github.com/apparentlymart/go-versions v1.0.1 + github.com/google/go-cmp v0.5.9 + github.com/hashicorp/terraform-registry-address v0.2.0 + github.com/hashicorp/terraform-svchost v0.0.1 + golang.org/x/mod v0.10.0 +) + +require ( + github.com/go-test/deep v1.0.3 // indirect + golang.org/x/net v0.5.0 // indirect + golang.org/x/text v0.6.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..e61f2e4 --- /dev/null +++ b/go.sum @@ -0,0 +1,21 @@ +github.com/apparentlymart/go-versions v1.0.1 h1:ECIpSn0adcYNsBfSRwdDdz9fWlL+S/6EUd9+irwkBgU= +github.com/apparentlymart/go-versions v1.0.1/go.mod h1:YF5j7IQtrOAOnsGkniupEA5bfCjzd7i14yu0shZavyM= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-test/deep v1.0.1/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= +github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hashicorp/terraform-registry-address v0.2.0 h1:92LUg03NhfgZv44zpNTLBGIbiyTokQCDcdH5BhVHT3s= +github.com/hashicorp/terraform-registry-address v0.2.0/go.mod h1:478wuzJPzdmqT6OGbB/iH82EDcI8VFM4yujknh/1nIs= +github.com/hashicorp/terraform-svchost v0.0.1 h1:Zj6fR5wnpOHnJUmLyWozjMeDaVuE+cstMPj41/eKmSQ= +github.com/hashicorp/terraform-svchost v0.0.1/go.mod h1:ut8JaH0vumgdCfJaihdcZULqkAwHdQNwNH7taIDdsZM= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= +golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= +golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= diff --git a/internal/ignorefiles/ignorerules.go b/internal/ignorefiles/ignorerules.go new file mode 100644 index 0000000..25c2677 --- /dev/null +++ b/internal/ignorefiles/ignorerules.go @@ -0,0 +1,100 @@ +// Package ignorefiles deals with the ".terraformignore" file format, which +// is a convention similar to ".gitignore" that specifies path patterns that +// match files Terraform should discard or ignore when interpreting a package +// fetched from a remote location. +package ignorefiles + +import ( + "fmt" + "io" + "os" + "path/filepath" +) + +// A Ruleset is the result of reading, parsing, and compiling a +// ".terraformignore" file. +type Ruleset struct { + rules []rule +} + +// ParseIgnoreFileContent takes a reader over the content of a .terraformignore +// file and returns the Ruleset described by that file, or an error if the +// file is invalid. +func ParseIgnoreFileContent(r io.Reader) (*Ruleset, error) { + rules, err := readRules(r) + if err != nil { + return nil, err + } + return &Ruleset{rules: rules}, nil +} + +// LoadPackageIgnoreRules implements reasonable default behavior for finding +// ignore rules for a particular package root directory: if .terraformignore is +// present then use it, or otherwise just return DefaultRuleset. +// +// This function will return an error only if an ignore file is present but +// unreadable, or if an ignore file is present but contains invalid syntax. +func LoadPackageIgnoreRules(packageDir string) (*Ruleset, error) { + file, err := os.Open(filepath.Join(packageDir, ".terraformignore")) + if err != nil { + if os.IsNotExist(err) { + return DefaultRuleset, nil + } + return nil, fmt.Errorf("cannot read .terraformignore: %s", err) + } + defer file.Close() + + ret, err := ParseIgnoreFileContent(file) + if err != nil { + // The parse errors already mention that they were parsing ignore rules, + // so don't need an additional prefix added. + return nil, err + } + return ret, nil +} + +// Excludes tests whether the given path matches the set of paths that are +// excluded by the rules in the ruleset. +// +// If any of the rules in the ruleset have invalid syntax then Excludes will +// return an error, but it will also still return a boolean result which +// considers all of the remaining valid rules, to support callers that want to +// just ignore invalid exclusions. Such callers can safely ignore the error +// result: +// +// exc, _ = ruleset.Excludes(path) +func (r *Ruleset) Excludes(path string) (bool, error) { + if r == nil { + return false, nil + } + + var retErr error + foundMatch := false + for _, rule := range r.rules { + match, err := rule.match(path) + if err != nil { + // We'll remember the first error we encounter, but continue + // matching anyway to support callers that want to ignore invalid + // lines and just match with whatever's left. + if retErr == nil { + retErr = fmt.Errorf("invalid ignore rule %q", rule.val) + } + } + if match { + foundMatch = !rule.excluded + } + } + return foundMatch, retErr +} + +// Includes is the inverse of [Ruleset.Excludes]. +func (r *Ruleset) Includes(path string) (bool, error) { + notRet, err := r.Excludes(path) + return !notRet, err +} + +var DefaultRuleset *Ruleset + +func init() { + DefaultRuleset = &Ruleset{rules: defaultExclusions} +} diff --git a/internal/ignorefiles/terraformignore.go b/internal/ignorefiles/terraformignore.go new file mode 100644 index 0000000..0eda3a0 --- /dev/null +++ b/internal/ignorefiles/terraformignore.go @@ -0,0 +1,186 @@ +package ignorefiles + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + "text/scanner" +) + +func readRules(input io.Reader) ([]rule, error) { + rules := defaultExclusions + scanner := bufio.NewScanner(input) + scanner.Split(bufio.ScanLines) + + for scanner.Scan() { + pattern := scanner.Text() + // Ignore blank lines + if len(pattern) == 0 { + continue + } + // Trim spaces + pattern = strings.TrimSpace(pattern) + // Ignore comments + if pattern[0] == '#' { + continue + } + // New rule structure + rule := rule{} + // Exclusions + if pattern[0] == '!' { + rule.excluded = true + pattern = pattern[1:] + } + // If it is a directory, add ** so we catch descendants + if pattern[len(pattern)-1] == os.PathSeparator { + pattern = pattern + "**" + } + // If it starts with /, it is absolute + if pattern[0] == os.PathSeparator { + pattern = pattern[1:] + } else { + // Otherwise prepend **/ + pattern = "**" + string(os.PathSeparator) + pattern + } + rule.val = pattern + rule.dirs = strings.Split(pattern, string(os.PathSeparator)) + rules = append(rules, rule) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("syntax error in .terraformignore: %w", err) + } + return rules, nil +} + +type rule struct { + val string // the value of the rule itself + excluded bool // ! is present, an exclusion rule + dirs []string // directories of the rule + regex *regexp.Regexp // regular expression to match for the rule +} + +func (r *rule) match(path string) (bool, error) { + if r.regex == nil { + if err := r.compile(); err != nil { + return false, filepath.ErrBadPattern + } + } + + b := r.regex.MatchString(path) + return b, nil +} + +func (r *rule) compile() error { + regStr := "^" + pattern := r.val + // Go through the pattern and convert it to a regexp. + // Use a scanner to support utf-8 chars. + var scan scanner.Scanner + scan.Init(strings.NewReader(pattern)) + + sl := string(os.PathSeparator) + escSL := sl + if sl == `\` { + escSL += `\` + } + + for scan.Peek() != scanner.EOF { + ch := scan.Next() + if ch == '*' { + if scan.Peek() == '*' { + // is some flavor of "**" + scan.Next() + + // Treat **/ as ** so eat the "/" + if string(scan.Peek()) == sl { + scan.Next() + } + + if scan.Peek() == scanner.EOF { + // is "**EOF" - to align with .gitignore just accept all + regStr += ".*" + } else { + // is "**" + // Note that this allows for any # of /'s (even 0) because + // the .* will eat everything, even /'s + regStr += "(.*" + escSL + ")?" + } + } else { + // is "*" so map it to anything but "/" + regStr += "[^" + escSL + "]*" + } + } else if ch == '?' { + // "?" is any char except "/" + regStr += "[^" + escSL + "]" + } else if ch == '.' || ch == '$' { + // Escape some regexp special chars that have no meaning + // in golang's filepath.Match + regStr += `\` + string(ch) + } else if ch == '\\' { + // escape next char. Note that a trailing \ in the pattern + // will be left alone (but need to escape it) + if sl == `\` { + // On windows map "\" to "\\", meaning an escaped backslash, + // and then just continue because filepath.Match on + // Windows doesn't allow escaping at all + regStr += escSL + continue + } + if scan.Peek() != scanner.EOF { + regStr += `\` + string(scan.Next()) + } else { + regStr += `\` + } + } else { + regStr += string(ch) + } + } + + regStr += "$" + re, err := regexp.Compile(regStr) + if err != nil { + return err + } + + r.regex = re + return nil +} + +/* + Default rules as they would appear in .terraformignore: + .git/ + .terraform/ + !.terraform/modules/ +*/ + +var defaultExclusions = []rule{ + { + val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)), + excluded: false, + }, + { + val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)), + excluded: false, + }, + { + val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)), + excluded: true, + }, +} + +func init() { + // We'll precompile all of the default rules at initialization, so we + // don't need to recompile them every time we encounter a package that + // doesn't have any rules (the common case). + for _, r := range defaultExclusions { + err := r.compile() + if err != nil { + panic(fmt.Sprintf("invalid default rule %q: %s", r.val, err)) + } + } +} diff --git a/terraformignore_test.go b/internal/ignorefiles/terraformignore_test.go similarity index 72% rename from terraformignore_test.go rename to internal/ignorefiles/terraformignore_test.go index 5ca2750..1ca5ba1 100644 --- a/terraformignore_test.go +++ b/internal/ignorefiles/terraformignore_test.go @@ -1,4 +1,4 @@ -package slug +package ignorefiles import ( "testing" @@ -6,13 +6,19 @@ import ( func TestTerraformIgnore(t *testing.T) { // path to directory without .terraformignore - p := parseIgnoreFile("testdata") - if len(p) != 4 { + rs, err := LoadPackageIgnoreRules("testdata/external-dir") + if err != nil { + t.Fatal(err) + } + if len(rs.rules) != 3 { t.Fatal("A directory without .terraformignore should get the default patterns") } // load the .terraformignore file's patterns - ignoreRules := parseIgnoreFile("testdata/archive-dir") + rs, err = LoadPackageIgnoreRules("testdata/archive-dir") + if err != nil { + t.Fatal(err) + } type file struct { // the actual path, should be file path format /dir/subdir/file.extension path string @@ -20,93 +26,97 @@ func TestTerraformIgnore(t *testing.T) { match bool } paths := []file{ - { + 0: { path: ".terraform/", match: true, }, - { + 1: { path: "included.txt", match: false, }, - { + 2: { path: ".terraform/foo/bar", match: true, }, - { + 3: { path: ".terraform/foo/bar/more/directories/so/many", match: true, }, - { + 4: { path: ".terraform/foo/ignored-subdirectory/", match: true, }, - { + 5: { path: "baz.txt", match: true, }, - { + 6: { path: "parent/foo/baz.txt", match: true, }, - { + 7: { path: "parent/foo/bar.tf", match: true, }, - { + 8: { path: "parent/bar/bar.tf", match: false, }, // baz.txt is ignored, but a file name including it should not be - { + 9: { path: "something/with-baz.txt", match: false, }, - { + 10: { path: "something/baz.x", match: false, }, // Getting into * patterns - { + 11: { path: "foo/ignored-doc.md", match: true, }, // Should match [a-z] group - { + 12: { path: "bar/something-a.txt", match: true, }, - // ignore sub- terraform.d paths - { + // ignore sub- terraform.d paths... + 13: { path: "some-module/terraform.d/x", match: true, }, - // but not the root one - { + // ...but not the root one + 14: { path: "terraform.d/", match: false, }, - { + 15: { path: "terraform.d/foo", match: false, }, // We ignore the directory, but a file of the same name could exist - { + 16: { path: "terraform.d", match: false, }, - // boop.text is ignored everywhere - { + // boop.txt is ignored everywhere... + 17: { path: "baz/boop.txt", match: true, }, - // except at current directory - { + // ...except in root directory + 18: { path: "boop.txt", match: false, }, } for i, p := range paths { - match := matchIgnoreRule(p.path, ignoreRules) + match, err := rs.Excludes(p.path) + if err != nil { + t.Errorf("invalid rule syntax when checking %s at index %d", p.path, i) + continue + } if match != p.match { t.Fatalf("%s at index %d should be %t", p.path, i, p.match) } diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/file.txt b/internal/ignorefiles/testdata/archive-dir/.terraform/file.txt new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README b/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README new file mode 100644 index 0000000..8c1ea48 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README @@ -0,0 +1,2 @@ +Keep this file and directory here to test if its properly ignored + diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README b/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README new file mode 100644 index 0000000..8c1ea48 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README @@ -0,0 +1,2 @@ +Keep this file and directory here to test if its properly ignored + diff --git a/internal/ignorefiles/testdata/archive-dir/.terraformignore b/internal/ignorefiles/testdata/archive-dir/.terraformignore new file mode 100644 index 0000000..3503ae9 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraformignore @@ -0,0 +1,20 @@ +# comments are ignored + # extra spaces are irrelevant +# ignore a file + baz.txt +# below is an empty line + +# ignore a directory +terraform.d/ +# exclude ignoring a directory at the root +!/terraform.d/ +# ignore a file at a subpath +**/foo/bar.tf +# ignore files with specific endings +foo/*.md +# character groups +bar/something-[a-z].txt +# ignore a file +boop.txt +# but not one at the current directory +!/boop.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/.terraformrc b/internal/ignorefiles/testdata/archive-dir/.terraformrc new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/bar.txt b/internal/ignorefiles/testdata/archive-dir/bar.txt new file mode 100644 index 0000000..5716ca5 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/bar.txt @@ -0,0 +1 @@ +bar diff --git a/internal/ignorefiles/testdata/archive-dir/baz.txt b/internal/ignorefiles/testdata/archive-dir/baz.txt new file mode 100644 index 0000000..3f95386 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/baz.txt @@ -0,0 +1 @@ +baz \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/exe b/internal/ignorefiles/testdata/archive-dir/exe new file mode 100755 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/foo.terraform/bar.txt b/internal/ignorefiles/testdata/archive-dir/foo.terraform/bar.txt new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/foo.txt b/internal/ignorefiles/testdata/archive-dir/foo.txt new file mode 120000 index 0000000..b3b9b2f --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/foo.txt @@ -0,0 +1 @@ +../external-dir/foo.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/sub/bar.txt b/internal/ignorefiles/testdata/archive-dir/sub/bar.txt new file mode 120000 index 0000000..315e865 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/sub/bar.txt @@ -0,0 +1 @@ +../bar.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/sub/zip.txt b/internal/ignorefiles/testdata/archive-dir/sub/zip.txt new file mode 100644 index 0000000..d0513b2 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/sub/zip.txt @@ -0,0 +1 @@ +zip diff --git a/internal/ignorefiles/testdata/external-dir/foo.txt b/internal/ignorefiles/testdata/external-dir/foo.txt new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/internal/ignorefiles/testdata/external-dir/foo.txt @@ -0,0 +1 @@ +foo diff --git a/slug.go b/slug.go index fd28d27..09d8fbe 100644 --- a/slug.go +++ b/slug.go @@ -8,6 +8,8 @@ import ( "os" "path/filepath" "strings" + + "github.com/hashicorp/go-slug/internal/ignorefiles" ) // Meta provides detailed information about a slug. @@ -151,7 +153,7 @@ func (p *Packer) Pack(src string, w io.Writer) (*Meta, error) { // Load the ignore rule configuration, which will use // defaults if no .terraformignore is configured - var ignoreRules []rule + var ignoreRules *ignorefiles.Ruleset if p.applyTerraformIgnore { ignoreRules = parseIgnoreFile(src) } @@ -175,7 +177,7 @@ func (p *Packer) Pack(src string, w io.Writer) (*Meta, error) { return meta, nil } -func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, ignoreRules []rule) filepath.WalkFunc { +func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, ignoreRules *ignorefiles.Ruleset) filepath.WalkFunc { return func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -190,14 +192,14 @@ func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, return nil } - if m := matchIgnoreRule(subpath, ignoreRules); m { + if m := matchIgnoreRules(subpath, ignoreRules); m { return nil } // Catch directories so we don't end up with empty directories, // the files are ignored correctly if info.IsDir() { - if m := matchIgnoreRule(subpath+string(os.PathSeparator), ignoreRules); m { + if m := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); m { return nil } } diff --git a/slug_test.go b/slug_test.go index 9ff5758..44b6878 100644 --- a/slug_test.go +++ b/slug_test.go @@ -1217,6 +1217,7 @@ func verifyFile(t *testing.T, path string, mode os.FileMode, expect string) { } func verifyPerms(t *testing.T, path string, expect os.FileMode) { + t.Helper() fi, err := os.Stat(path) if err != nil { t.Fatal(err) diff --git a/sourceaddrs/doc.go b/sourceaddrs/doc.go new file mode 100644 index 0000000..b0f3c91 --- /dev/null +++ b/sourceaddrs/doc.go @@ -0,0 +1,9 @@ +// Package sourceaddrs deals with the various types of source code address +// that Terraform can gather into a source bundle via the sibling package +// "sourcebundle". +// +// NOTE WELL: Everything in this package is currently experimental and subject +// to breaking changes even in patch releases. We will make stronger commitments +// to backward-compatibility once we have more experience using this +// functionality in real contexts. +package sourceaddrs diff --git a/sourceaddrs/package_remote.go b/sourceaddrs/package_remote.go new file mode 100644 index 0000000..701b770 --- /dev/null +++ b/sourceaddrs/package_remote.go @@ -0,0 +1,87 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" +) + +type RemotePackage struct { + sourceType string + + // NOTE: A remote package URL may never have a "userinfo" portion, and + // all relevant fields are comparable, so it's safe to compare + // RemotePackage using the == operator. + url url.URL +} + +// ParseRemotePackage parses a standalone remote package address, which is a +// remote source address without any sub-path portion. +func ParseRemotePackage(given string) (RemotePackage, error) { + srcAddr, err := ParseRemoteSource(given) + if err != nil { + return RemotePackage{}, err + } + if srcAddr.subPath != "" { + return RemotePackage{}, fmt.Errorf("remote package address may not have a sub-path") + } + return srcAddr.pkg, nil +} + +func (p RemotePackage) String() string { + // Our address normalization rules are a bit odd since we inherited the + // fundamentals of this addressing scheme from go-getter. + if p.url.Scheme == p.sourceType { + // When scheme and source type match we don't actually mention the + // source type in the stringification, because it looks redundant + // and confusing. + return p.url.String() + } + return p.sourceType + "::" + p.url.String() +} + +// SourceAddr returns a remote source address referring to the given sub-path +// inside the recieving package. +// +// subPath must be a valid sub-path (as defined by [ValidSubPath]) or this +// function will panic. An empty string is a valid sub-path representing the +// root directory of the package. +func (p RemotePackage) SourceAddr(subPath string) RemoteSource { + finalPath, err := normalizeSubpath(subPath) + if err != nil { + panic(fmt.Sprintf("invalid subPath: %s", subPath)) + } + return RemoteSource{ + pkg: p, + subPath: finalPath, + } +} + +func (p RemotePackage) subPathString(subPath string) string { + if subPath == "" { + // Easy case... the package address is also the source address + return p.String() + } + + // The weird syntax we've inherited from go-getter expects the URL's + // query string to appear after the subpath portion, so we need to + // now tweak the package URL to be a sub-path URL instead. + subURL := p.url // shallow copy + subURL.Path += "//" + subPath + if subURL.Scheme == p.sourceType { + return subURL.String() + } + return p.sourceType + "::" + subURL.String() +} + +// SourceType returns the source type component of the package address. +func (p RemotePackage) SourceType() string { + return p.sourceType +} + +// URL returns the URL component of the package address. +// +// Callers MUST NOT mutate anything accessible through the returned pointer, +// even though the Go type system cannot enforce that. +func (p RemotePackage) URL() *url.URL { + return &p.url +} diff --git a/sourceaddrs/source.go b/sourceaddrs/source.go new file mode 100644 index 0000000..2b8586b --- /dev/null +++ b/sourceaddrs/source.go @@ -0,0 +1,150 @@ +package sourceaddrs + +import ( + "fmt" + "path" + "strings" +) + +// Source acts as a tagged union over the three possible source address types, +// for situations where all three are acceptable. +// +// Source is used to specify source addresses for installation. Once packages +// have been resolved and installed we use [SourceFinal] instead to represent +// those finalized selections, which allows capturing the selected version +// number for a module registry source address. +// +// Only address types within this package can implement Source. +type Source interface { + sourceSigil() + + String() string + SupportsVersionConstraints() bool +} + +// ParseSource attempts to parse the given string as any one of the three +// supported source address types, recognizing which type it belongs to based +// on the syntax differences between the address forms. +func ParseSource(given string) (Source, error) { + if strings.TrimSpace(given) != given { + return nil, fmt.Errorf("source address must not have leading or trailing spaces") + } + if len(given) == 0 { + return nil, fmt.Errorf("a valid source address is required") + } + switch { + case looksLikeLocalSource(given): + ret, err := ParseLocalSource(given) + if err != nil { + return nil, fmt.Errorf("invalid local source address %q: %w", given, err) + } + return ret, nil + case looksLikeRegistrySource(given): + ret, err := ParseRegistrySource(given) + if err != nil { + return nil, fmt.Errorf("invalid module registry source address %q: %w", given, err) + } + return ret, nil + default: + // If it's neither a local source nor a module registry source then + // we'll assume it's intended to be a remote source. + // (This parser will return a suitable error if the given string + // is not of any of the supported address types.) + ret, err := ParseRemoteSource(given) + if err != nil { + return nil, fmt.Errorf("invalid remote source address %q: %w", given, err) + } + return ret, nil + } +} + +// MustParseSource is a thin wrapper around [ParseSource] that panics if it +// returns an error, or returns its result if not. +func MustParseSource(given string) Source { + ret, err := ParseSource(given) + if err != nil { + panic(err) + } + return ret +} + +// ResolveRelativeSource calculates a new source address from the combination +// of two other source addresses. +// +// If "b" is already an absolute source address then the result is "b" verbatim. +// +// If "b" is a relative source then the result is an address of the same type +// as "a", but with a different path component. If "a" is an absolute address +// type then the result is guaranteed to also be an absolute address type. +// +// Returns an error if "b" is a relative path that attempts to traverse out +// of the package of an absolute address given in "a". +func ResolveRelativeSource(a, b Source) (Source, error) { + if sourceIsAbs(b) { + return b, nil + } + // If we get here then b is definitely a local source, because + // otherwise it would have been absolute. + bRaw := b.(LocalSource).relPath + + switch a := a.(type) { + case LocalSource: + aRaw := a.relPath + new := path.Join(aRaw, bRaw) + if !looksLikeLocalSource(new) { + new = "./" + new // preserve LocalSource's prefix invariant + } + return LocalSource{relPath: new}, nil + case RegistrySource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RegistrySource{ + pkg: a.pkg, + subPath: newSub, + }, nil + case RemoteSource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RemoteSource{ + pkg: a.pkg, + subPath: newSub, + }, nil + default: + // Should not get here, because the cases above are exhaustive for + // all of our defined Source implementations. + panic(fmt.Sprintf("unsupported Source implementation %T", a)) + } +} + +// SourceFilename returns the base name (in the same sense as [path.Base]) +// of the sub-path or local path portion of the given source address. +// +// This only really makes sense for a source address that refers to an +// individual file, and is intended for needs such as using the suffix of +// the filename to decide how to parse a particular file. Passing a source +// address that refers to a directory will not fail but its result is +// unlikely to be useful. +func SourceFilename(addr Source) string { + switch addr := addr.(type) { + case LocalSource: + return path.Base(addr.RelativePath()) + case RemoteSource: + return path.Base(addr.SubPath()) + case RegistrySource: + return path.Base(addr.SubPath()) + default: + // above should be exhaustive for all source types + panic(fmt.Sprintf("cannot SourceFilename for %T", addr)) + } +} + +func sourceIsAbs(source Source) bool { + _, isLocal := source.(LocalSource) + return !isLocal +} diff --git a/sourceaddrs/source_final.go b/sourceaddrs/source_final.go new file mode 100644 index 0000000..ad85942 --- /dev/null +++ b/sourceaddrs/source_final.go @@ -0,0 +1,94 @@ +package sourceaddrs + +import ( + "fmt" + "path" +) + +// FinalSource is a variant of [Source] that always refers to a single +// specific package. +// +// Specifically this models the annoying oddity that while [LocalSource] and +// [RemoteSource] fully specify what they refer to, [RegistrySource] only +// gives partial information and must be qualified with a selected version +// number to determine exactly what it refers to. +type FinalSource interface { + finalSourceSigil() + + String() string +} + +// FinalSourceFilename returns the base name (in the same sense as [path.Base]) +// of the sub-path or local path portion of the given final source address. +// +// This only really makes sense for a source address that refers to an +// individual file, and is intended for needs such as using the suffix of +// the filename to decide how to parse a particular file. Passing a source +// address that refers to a directory will not fail but its result is +// unlikely to be useful. +func FinalSourceFilename(addr FinalSource) string { + switch addr := addr.(type) { + case LocalSource: + return path.Base(addr.RelativePath()) + case RemoteSource: + return path.Base(addr.SubPath()) + case RegistrySourceFinal: + return path.Base(addr.SubPath()) + default: + // above should be exhaustive for all final source types + panic(fmt.Sprintf("cannot FinalSourceFilename for %T", addr)) + } +} + +// ResolveRelativeFinalSource is like [ResolveRelativeSource] but for +// [FinalSource] addresses instead of [Source] addresses. +// +// Aside from the address type difference its meaning and behavior rules +// are the same. +func ResolveRelativeFinalSource(a, b FinalSource) (FinalSource, error) { + if finalSourceIsAbs(b) { + return b, nil + } + // If we get here then b is definitely a local source, because + // otherwise it would have been absolute. + bRaw := b.(LocalSource).relPath + + switch a := a.(type) { + case LocalSource: + aRaw := a.relPath + new := path.Join(aRaw, bRaw) + if !looksLikeLocalSource(new) { + new = "./" + new // preserve LocalSource's prefix invariant + } + return LocalSource{relPath: new}, nil + case RegistrySourceFinal: + aSub := a.src.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RegistrySource{ + pkg: a.Package(), + subPath: newSub, + }.Versioned(a.version), nil + case RemoteSource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RemoteSource{ + pkg: a.pkg, + subPath: newSub, + }, nil + default: + // Should not get here, because the cases above are exhaustive for + // all of our defined Source implementations. + panic(fmt.Sprintf("unsupported Source implementation %T", a)) + } +} + +func finalSourceIsAbs(source FinalSource) bool { + _, isLocal := source.(LocalSource) + return !isLocal +} diff --git a/sourceaddrs/source_final_test.go b/sourceaddrs/source_final_test.go new file mode 100644 index 0000000..b2250a4 --- /dev/null +++ b/sourceaddrs/source_final_test.go @@ -0,0 +1,108 @@ +package sourceaddrs + +import ( + "fmt" + "reflect" + "testing" + + "github.com/apparentlymart/go-versions/versions" +) + +func TestResolveRelativeFinalSource(t *testing.T) { + onePointOh := versions.MustParseVersion("1.0.0") + + tests := []struct { + Base FinalSource + Rel FinalSource + Want FinalSource + WantErr string + }{ + { + Base: MustParseSource("./a/b").(FinalSource), + Rel: MustParseSource("../c").(FinalSource), + Want: MustParseSource("./a/c").(FinalSource), + }, + { + Base: MustParseSource("./a").(FinalSource), + Rel: MustParseSource("../c").(FinalSource), + Want: MustParseSource("./c").(FinalSource), + }, + { + Base: MustParseSource("./a").(FinalSource), + Rel: MustParseSource("../../c").(FinalSource), + Want: MustParseSource("../c").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("git::https://example.com/foo.git").(FinalSource), + Want: MustParseSource("git::https://example.com/foo.git").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../bloop").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/bloop").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../..").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../../../baz").(FinalSource), + WantErr: `invalid traversal from git::https://github.com/hashicorp/go-slug.git//beep/boop: relative path ../../../baz traverses up too many levels from source path beep/boop`, + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git").(FinalSource), + Rel: MustParseSource("./boop").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//boop").(FinalSource), + }, + { + Base: MustParseSource("example.com/foo/bar/baz//beep/boop").(RegistrySource).Versioned(onePointOh), + Rel: MustParseSource("../").(FinalSource), + Want: MustParseSource("example.com/foo/bar/baz//beep").(RegistrySource).Versioned(onePointOh), + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s + %s", test.Base, test.Rel), func(t *testing.T) { + got, gotErr := ResolveRelativeFinalSource(test.Base, test.Rel) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} diff --git a/sourceaddrs/source_local.go b/sourceaddrs/source_local.go new file mode 100644 index 0000000..d002dba --- /dev/null +++ b/sourceaddrs/source_local.go @@ -0,0 +1,88 @@ +package sourceaddrs + +import ( + "fmt" + "path" + "strings" +) + +// LocalSource represents a relative traversal to another path within the same +// source package as whatever source artifact included this path. +// +// LocalSource sources will typically need to be resolved into either +// [RemoteSource] or [RegistrySource] addresses by reference to the address +// of whatever artifact declared them, because otherwise they cannot be +// mapped onto any real source location. +type LocalSource struct { + // relPath is a slash-separate path in the style of the Go standard + // library package "path", which should always be stored in its "Clean" + // form, aside from the mandatory "./" or "../" prefixes. + relPath string +} + +var _ Source = LocalSource{} +var _ FinalSource = LocalSource{} + +// sourceSigil implements Source +func (s LocalSource) sourceSigil() {} + +// finalSourceSigil implements FinalSource +func (s LocalSource) finalSourceSigil() {} + +func looksLikeLocalSource(given string) bool { + return strings.HasPrefix(given, "./") || strings.HasPrefix(given, "../") +} + +// ParseLocalSource interprets the given path as a local source address, or +// returns an error if it cannot be interpreted as such. +func ParseLocalSource(given string) (LocalSource, error) { + // First we'll catch some situations that seem likely to suggest that + // the caller was trying to use a real filesystem path instead of + // just a virtual relative path within a source package. + if strings.ContainsAny(given, ":\\") { + return LocalSource{}, fmt.Errorf("must be a relative path using forward-slash separators between segments, like in a relative URL") + } + + // We distinguish local source addresses from other address types by them + // starting with some kind of relative path prefix. + if !looksLikeLocalSource(given) { + return LocalSource{}, fmt.Errorf("must start with either ./ or ../ to indicate a local path") + } + + clean := path.Clean(given) + + // We use the "path" package's definition of "clean" aside from two + // exceptions: + // - we need to retain the leading "./", if it was originally present, to + // disambiguate from module registry addresses. + // - If the cleaned path is just ".." then we need a slash on the end + // because that's part of how we recognize an address as a relative path. + if clean == ".." { + clean = "../" + } + if !looksLikeLocalSource(clean) { + clean = "./" + clean + } + + if clean != given { + return LocalSource{}, fmt.Errorf("relative path must be written in canonical form %q", clean) + } + + return LocalSource{relPath: clean}, nil +} + +// String implements Source +func (s LocalSource) String() string { + return s.relPath +} + +// SupportsVersionConstraints implements Source +func (s LocalSource) SupportsVersionConstraints() bool { + return false +} + +// RelativePath returns the effective relative path for this source address, +// in our platform-agnostic slash-separated canonical syntax. +func (s LocalSource) RelativePath() string { + return s.relPath +} diff --git a/sourceaddrs/source_registry.go b/sourceaddrs/source_registry.go new file mode 100644 index 0000000..000526a --- /dev/null +++ b/sourceaddrs/source_registry.go @@ -0,0 +1,133 @@ +package sourceaddrs + +import ( + "fmt" + "path" + + "github.com/apparentlymart/go-versions/versions" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistrySource represents a source address referring to a set of versions +// published in a Module Registry. +// +// A RegistrySource is an extra indirection over a set of [RemoteSource] +// addresses, which Terraform chooses from based on version constraints given +// alongside the registry source address. +type RegistrySource struct { + pkg regaddr.ModulePackage + + // subPath is an optional subdirectory or sub-file path beneath the + // prefix of the selected underlying source address. + // + // Sub-paths are always slash-separated paths interpreted relative to + // the root of the package, and may not include ".." or "." segments. + // The sub-path is empty to indicate the root directory of the package. + subPath string +} + +// sourceSigil implements Source +func (s RegistrySource) sourceSigil() {} + +var _ Source = RegistrySource{} + +func looksLikeRegistrySource(given string) bool { + _, err := regaddr.ParseModuleSource(given) + return err == nil +} + +// ParseRegistrySource parses the given string as a registry source address, +// or returns an error if it does not use the correct syntax for interpretation +// as a registry source address. +func ParseRegistrySource(given string) (RegistrySource, error) { + pkgRaw, subPathRaw := splitSubPath(given) + subPath, err := normalizeSubpath(subPathRaw) + if err != nil { + return RegistrySource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + // We delegate the package address parsing to the shared library + // terraform-registry-address, but then we'll impose some additional + // validation and normalization over that since we're intentionally + // being a little stricter than Terraform has historically been, + // prioritizing "one obvious way to do it" over many esoteric variations. + pkgOnlyAddr, err := regaddr.ParseModuleSource(pkgRaw) + if err != nil { + return RegistrySource{}, err + } + if pkgOnlyAddr.Subdir != "" { + // Should never happen, because we split the subpath off above. + panic("post-split registry address still has subdir") + } + + return RegistrySource{ + pkg: pkgOnlyAddr.Package, + subPath: subPath, + }, nil +} + +// ParseRegistryPackage parses the given string as a registry package address, +// which is the same syntax as a registry source address with no sub-path +// portion. +func ParseRegistryPackage(given string) (regaddr.ModulePackage, error) { + srcAddr, err := ParseRegistrySource(given) + if err != nil { + return regaddr.ModulePackage{}, err + } + if srcAddr.subPath != "" { + return regaddr.ModulePackage{}, fmt.Errorf("remote package address may not have a sub-path") + } + return srcAddr.pkg, nil +} + +func (s RegistrySource) String() string { + if s.subPath != "" { + return s.pkg.String() + "//" + s.subPath + } + return s.pkg.String() +} + +func (s RegistrySource) SupportsVersionConstraints() bool { + return true +} + +func (s RegistrySource) Package() regaddr.ModulePackage { + return s.pkg +} + +func (s RegistrySource) SubPath() string { + return s.subPath +} + +// Versioned combines the receiver with a specific selected version number to +// produce a final source address that can be used to resolve to a single +// source package. +func (s RegistrySource) Versioned(selectedVersion versions.Version) RegistrySourceFinal { + return RegistrySourceFinal{ + src: s, + version: selectedVersion, + } +} + +// FinalSourceAddr takes the result of looking up the package portion of the +// receiver in a module registry and appends the reciever's sub-path to the +// returned sub-path to produce the final fully-qualified remote source address. +func (s RegistrySource) FinalSourceAddr(realSource RemoteSource) RemoteSource { + if s.subPath == "" { + return realSource // Easy case + } + if realSource.subPath == "" { + return RemoteSource{ + pkg: realSource.pkg, + subPath: s.subPath, + } + } + // If we get here then both addresses have a sub-path, so we need to + // combine them together. This assumes that the "real source" from the + // module registry will always refer to a directory, which is a fundamental + // assumption of the module registry protocol. + return RemoteSource{ + pkg: realSource.pkg, + subPath: path.Join(realSource.subPath, s.subPath), + } +} diff --git a/sourceaddrs/source_registry_final.go b/sourceaddrs/source_registry_final.go new file mode 100644 index 0000000..7ed9338 --- /dev/null +++ b/sourceaddrs/source_registry_final.go @@ -0,0 +1,64 @@ +package sourceaddrs + +import ( + "github.com/apparentlymart/go-versions/versions" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistrySourceFinal annotates a [RegistrySource] with a specific version +// selection, thereby making it sufficient for selecting a single real source +// package. +// +// Registry sources are weird in comparison to others in that they must be +// combined with a version constraint to select from possibly many available +// versions. After completing the version selection process, the result can +// be represented as a RegistrySourceFinal that carries the selected version +// number along with the originally-specified source address. +type RegistrySourceFinal struct { + src RegistrySource + version versions.Version +} + +// NOTE: RegistrySourceFinal is intentionally not a Source, because it isn't +// possible to represent a final registry source as a single source address +// string. +var _ FinalSource = RegistrySourceFinal{} + +// finalSourceSigil implements FinalSource +func (s RegistrySourceFinal) finalSourceSigil() {} + +// Unversioned returns the address of the registry package that this final +// address is a version of. +func (s RegistrySourceFinal) Unversioned() RegistrySource { + return s.src +} + +func (s RegistrySourceFinal) Package() regaddr.ModulePackage { + return s.src.Package() +} + +func (s RegistrySourceFinal) SubPath() string { + return s.src.SubPath() +} + +func (s RegistrySourceFinal) SelectedVersion() versions.Version { + return s.version +} + +func (s RegistrySourceFinal) String() string { + pkgAddr := s.src.Package() + subPath := s.src.SubPath() + if subPath != "" { + return pkgAddr.String() + "@" + s.version.String() + "//" + subPath + } + return pkgAddr.String() + "@" + s.version.String() +} + +// FinalSourceAddr takes the result of looking up the package portion of the +// receiver in a module registry and appends the reciever's sub-path to the +// returned sub-path to produce the final fully-qualified remote source address. +func (s RegistrySourceFinal) FinalSourceAddr(realSource RemoteSource) RemoteSource { + // The version number doesn't have any impact on how we combine the + // paths together, so we can just delegate to our unversioned equivalent. + return s.Unversioned().FinalSourceAddr(realSource) +} diff --git a/sourceaddrs/source_remote.go b/sourceaddrs/source_remote.go new file mode 100644 index 0000000..8ba5858 --- /dev/null +++ b/sourceaddrs/source_remote.go @@ -0,0 +1,221 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "regexp" + "strings" +) + +type RemoteSource struct { + pkg RemotePackage + subPath string +} + +var _ Source = RemoteSource{} +var _ FinalSource = RemoteSource{} + +// sourceSigil implements Source +func (RemoteSource) sourceSigil() {} + +// finalSourceSigil implements FinalSource +func (RemoteSource) finalSourceSigil() {} + +// ParseRemoteSource parses the given string as a remote source address, +// or returns an error if it does not use the correct syntax for interpretation +// as a remote source address. +func ParseRemoteSource(given string) (RemoteSource, error) { + expandedGiven := given + for _, shorthand := range remoteSourceShorthands { + replacement, ok, err := shorthand(given) + if err != nil { + return RemoteSource{}, err + } + if ok { + expandedGiven = replacement + } + } + + pkgRaw, subPathRaw := splitSubPath(expandedGiven) + subPath, err := normalizeSubpath(subPathRaw) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + // Once we've dealt with all the "shorthand" business, our address + // should be in the form sourcetype::url, where "sourcetype::" is + // optional and defaults to matching the URL scheme if not present. + var sourceType string + if matches := remoteSourceTypePattern.FindStringSubmatch(pkgRaw); len(matches) != 0 { + sourceType = matches[1] + pkgRaw = matches[2] + } + + u, err := url.Parse(pkgRaw) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid URL syntax in %q: %w", pkgRaw, err) + } + if u.Scheme == "" { + return RemoteSource{}, fmt.Errorf("must contain an absolute URL with a scheme") + } + if u.User != nil { + return RemoteSource{}, fmt.Errorf("must not use username or password in URL portion") + } + + u.Scheme = strings.ToLower(u.Scheme) + sourceType = strings.ToLower(sourceType) + + if sourceType == "" { + // sourceType defaults to the URL scheme if not explicitly set. + sourceType = u.Scheme + } else if sourceType == u.Scheme { + // This catches weirdo constructions like: https::https://example.com/ + return RemoteSource{}, fmt.Errorf("don't specify redundant %q source type for %q URL", sourceType, u.Scheme) + } + + _, err = url.ParseQuery(u.RawQuery) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid URL query string syntax in %q: %w", pkgRaw, err) + } + + return makeRemoteSource(sourceType, u, subPath) +} + +// MakeRemoteSource constructs a [RemoteSource] from its component parts. +// +// This is useful for deriving one remote source from another, by disassembling +// the original address into its component parts, modifying those parts, and +// then combining the modified parts back together with this function. +func MakeRemoteSource(sourceType string, u *url.URL, subPath string) (RemoteSource, error) { + var err error + subPath, err = normalizeSubpath(subPath) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + copyU := *u // shallow copy so we can safely modify + + return makeRemoteSource(sourceType, ©U, subPath) +} + +func makeRemoteSource(sourceType string, u *url.URL, subPath string) (RemoteSource, error) { + typeImpl, ok := remoteSourceTypes[sourceType] + if !ok { + if sourceType == u.Scheme { + // In this case the user didn't actually specify a source type, + // so we won't confuse them by mentioning it. + return RemoteSource{}, fmt.Errorf("unsupported URL scheme %q", u.Scheme) + } else { + return RemoteSource{}, fmt.Errorf("unsupported package source type %q", sourceType) + } + } + + err := typeImpl.PrepareURL(u) + if err != nil { + return RemoteSource{}, err + } + + return RemoteSource{ + pkg: RemotePackage{ + sourceType: sourceType, + url: *u, + }, + subPath: subPath, + }, nil +} + +// String implements Source +func (s RemoteSource) String() string { + return s.pkg.subPathString(s.subPath) +} + +func (s RemoteSource) SupportsVersionConstraints() bool { + return false +} + +func (s RemoteSource) Package() RemotePackage { + return s.pkg +} + +func (s RemoteSource) SubPath() string { + return s.subPath +} + +type remoteSourceShorthand func(given string) (normed string, ok bool, err error) + +var remoteSourceShorthands = []remoteSourceShorthand{ + func(given string) (string, bool, error) { + // Allows a github.com repository to be presented in a scheme-less + // format like github.com/organization/repository/path, which we'll + // turn into a git:: source string selecting the repository's main + // branch. + // + // This is intentionally compatible with what's accepted by the + // "GitHub detector" in the go-getter library, so that module authors + // can specify GitHub repositories in the same way both for the + // old Terraform module installer and the newer source bundle builder. + + if !strings.HasPrefix(given, "github.com/") { + return "", false, nil + } + + parts := strings.Split(given, "/") + if len(parts) < 3 { + return "", false, fmt.Errorf("GitHub.com shorthand addresses must start with github.com/organization/repository") + } + + urlStr := "https://" + strings.Join(parts[:3], "/") + if !strings.HasSuffix(urlStr, "git") { + urlStr += ".git" + } + + if len(parts) > 3 { + // The remaining parts will become the sub-path portion, since + // the repository as a whole is the source package. + urlStr += "//" + strings.Join(parts[3:], "/") + } + + return "git::" + urlStr, true, nil + }, + func(given string) (string, bool, error) { + // Allows a gitlab.com repository to be presented in a scheme-less + // format like gitlab.com/organization/repository/path, which we'll + // turn into a git:: source string selecting the repository's main + // branch. + // + // This is intentionally compatible with what's accepted by the + // "GitLab detector" in the go-getter library, so that module authors + // can specify GitHub repositories in the same way both for the + // old Terraform module installer and the newer source bundle builder. + + if !strings.HasPrefix(given, "gitlab.com/") { + return "", false, nil + } + + parts := strings.Split(given, "/") + if len(parts) < 3 { + return "", false, fmt.Errorf("GitLab.com shorthand addresses must start with gitlab.com/organization/repository") + } + + urlStr := "https://" + strings.Join(parts[:3], "/") + if !strings.HasSuffix(urlStr, "git") { + urlStr += ".git" + } + + if len(parts) > 3 { + // The remaining parts will become the sub-path portion, since + // the repository as a whole is the source package. + urlStr += "//" + strings.Join(parts[3:], "/") + // NOTE: We can't actually get here if there are exactly four + // parts, because gitlab.com is also a Terraform module registry + // and so gitlab.com/a/b/c must be interpreted as a registry + // module address instead of a GitLab repository address. Users + // must write an explicit git source address if they intend to + // refer to a Git repository. + } + + return "git::" + urlStr, true, nil + }, +} + +var remoteSourceTypePattern = regexp.MustCompile(`^([A-Za-z0-9]+)::(.+)$`) diff --git a/sourceaddrs/source_remote_types.go b/sourceaddrs/source_remote_types.go new file mode 100644 index 0000000..1c284a0 --- /dev/null +++ b/sourceaddrs/source_remote_types.go @@ -0,0 +1,120 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "strings" +) + +type remoteSourceType interface { + PrepareURL(u *url.URL) error +} + +var remoteSourceTypes = map[string]remoteSourceType{ + "git": gitSourceType{}, + "http": httpSourceType{}, + "https": httpSourceType{}, +} + +type gitSourceType struct{} + +func (gitSourceType) PrepareURL(u *url.URL) error { + // The Git source type requires one of the URL schemes that Git itself + // supports. We're also currently being more rigid than Git to ease + // initial implementation. We will extend this over time as the source + // bundle mechanism graduates from experimental to real use. + + if u.Scheme != "ssh" && u.Scheme != "https" { + // NOTE: We don't support "git" or "http" here because we require + // source code to originate from sources that can support + // authentication and encryption, to reduce the risk of mitm attacks + // introducing malicious code. + return fmt.Errorf("a Git repository URL must use either the https or ssh scheme") + } + + qs := u.Query() + for k, vs := range qs { + if k != "ref" { + return fmt.Errorf("a Git repository URL's query string may include only the argument 'ref'") + } + if len(vs) > 1 { + return fmt.Errorf("a Git repository URL's query string may include only one 'ref' argument") + } + } + + return nil +} + +type httpSourceType struct{} + +func (httpSourceType) PrepareURL(u *url.URL) error { + if u.Scheme == "http" { + return fmt.Errorf("source package addresses may not use unencrypted HTTP") + } + if u.Scheme != "https" { + return fmt.Errorf("invalid scheme %q for https source type", u.Scheme) + } + + // For our initial implementation the address must be something that + // go-getter would've recognized as referring to a gzipped tar archive, + // to reduce the scope of the initial source bundler fetcher + // implementations. We may extend this later, but if we do then we should + // use go-getter's syntax for anything go-getter also supports. + // + // Go-getter's treatment of HTTP is quite odd, because by default it does + // an extra module-registry-like indirection where it expects the + // given URL to return a header pointing to another source address type. + // We don't intend to support that here, but we do want to support the + // behavior of go-getter's special case for URLs whose paths end with + // suffixes that match those typically used for archives, and its magical + // treatment of the "archive" query string argument as a way to force + // treatment of archives. This does mean that we can't fetch from any + // URL that _really_ needs an "archive" query string parameter, but that's + // been true for Terraform for many years and hasn't been a problem, so + // we'll accept that for now and wait to see if any need for it arises. + // + // Ideally we'd just make an HTTP request and then decide what to do based + // on the Content-Type of the response, like a sensible HTTP client would, + // but for now compatibility with go-getter is more important than being + // sensible. + + qs := u.Query() + if vs := qs["archive"]; len(vs) > 0 { + if len(vs) > 1 { + return fmt.Errorf("a HTTPS URL's query string may include only one 'archive' argument") + } + if vs[0] != "tar.gz" && vs[0] != "tgz" { + return fmt.Errorf("the special 'archive' query string argument must be set to 'tgz' if present") + } + if vs[0] == "tar.gz" { + qs.Set("archive", "tgz") // normalize on the shorter form + } + // NOTE: We don't remove the "archive" argument here because the code + // which eventually fetches this will need it to understand what kind + // of archive it's supposed to be fetching, but that final client ought + // to remove this argument itself to avoid potentially confusing the + // remote server, since this is an argument reserved for go-getter and + // for the subset of go-getter's syntax we're implementing here. + u.RawQuery = qs.Encode() + } else { + p := u.EscapedPath() + if !(strings.HasSuffix(p, ".tar.gz") || strings.HasSuffix(p, ".tgz")) { + return fmt.Errorf("a HTTPS URL's path must end with either .tar.gz or .tgz") + } + } + + if len(qs["checksum"]) != 0 { + // This is another go-getter oddity. go-getter would treat this as + // a request to verify that the result matches the given checksum + // and not send this argument to the server. However, go-getter actually + // doesn't support this (it returns an error) when it's dealing with + // an archive. We'll explicitly reject it to avoid folks being + // misled into thinking that it _is_ working, and thus believing + // they've achieved a verification that isn't present, though we + // might relax this later since go-getter wouldn't have allowed this + // anyway. + return fmt.Errorf("a HTTPS URL's query string must not include 'checksum' argument") + } + + return nil +} diff --git a/sourceaddrs/source_test.go b/sourceaddrs/source_test.go new file mode 100644 index 0000000..1b003d8 --- /dev/null +++ b/sourceaddrs/source_test.go @@ -0,0 +1,559 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "reflect" + "testing" + + regaddr "github.com/hashicorp/terraform-registry-address" + svchost "github.com/hashicorp/terraform-svchost" +) + +func TestParseSource(t *testing.T) { + tests := []struct { + Given string + Want Source + WantErr string + }{ + { + Given: "", + WantErr: `a valid source address is required`, + }, + { + Given: " hello", + WantErr: `source address must not have leading or trailing spaces`, + }, + { + Given: "hello ", + WantErr: `source address must not have leading or trailing spaces`, + }, + { + Given: "./boop", + Want: LocalSource{ + relPath: "./boop", + }, + }, + { + Given: "./boop/../beep", + WantErr: `invalid local source address "./boop/../beep": relative path must be written in canonical form "./beep"`, + }, + { + Given: "../boop", + Want: LocalSource{ + relPath: "../boop", + }, + }, + { + Given: "../boop/../beep", + WantErr: `invalid local source address "../boop/../beep": relative path must be written in canonical form "../beep"`, + }, + { + Given: "hashicorp/subnets/cidr", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: regaddr.DefaultModuleRegistryHost, + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + }, + }, + }, + { + Given: "hashicorp/subnets/cidr//blah/blah", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: regaddr.DefaultModuleRegistryHost, + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + }, + subPath: "blah/blah", + }, + }, + { + Given: "hashicorp/subnets/cidr//blah/blah/../bloop", + WantErr: `invalid module registry source address "hashicorp/subnets/cidr//blah/blah/../bloop": invalid sub-path: must be slash-separated relative path without any .. or . segments`, + }, + { + Given: "terraform.example.com/bleep/bloop/blorp", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("terraform.example.com"), + Namespace: "bleep", + Name: "bloop", + TargetSystem: "blorp", + }, + }, + }, + { + Given: "テラフォーム.example.com/bleep/bloop/blorp", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("xn--jckxc1b4b2b6g.example.com"), + Namespace: "bleep", + Name: "bloop", + TargetSystem: "blorp", + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git//blah/blah", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git?ref=main"), + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?ref=main&ref=main", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?ref=main&ref=main": a Git repository URL's query string may include only one 'ref' argument`, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git//blah/blah?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git?ref=main"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?sshkey=blahblah", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?sshkey=blahblah": a Git repository URL's query string may include only the argument 'ref'`, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?depth=1", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?depth=1": a Git repository URL's query string may include only the argument 'ref'`, + }, + { + Given: "git::https://git@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://git@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::https://git:blit@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://git:blit@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::https://:blit@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://:blit@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::ssh://github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("ssh://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "git::ssh://github.com/hashicorp/go-slug.git//blah/blah?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("ssh://github.com/hashicorp/go-slug.git?ref=main"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git://github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git://github.com/hashicorp/go-slug.git": a Git repository URL must use either the https or ssh scheme`, + }, + { + Given: "git::git://github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::git://github.com/hashicorp/go-slug.git": don't specify redundant "git" source type for "git" URL`, + }, + { + Given: "github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "github.com/hashicorp/go-slug", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "github.com/hashicorp/go-slug/bleep", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + subPath: "bleep", + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug/bleep", + // NOTE: gitlab.com _also_ hosts a Terraform Module registry, and so + // the registry address interpretation takes precedence if it + // matches. Users must write an explicit git:: source address if + // they want this to be interpreted as a Git source address. + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("gitlab.com"), + Namespace: "hashicorp", + Name: "go-slug", + TargetSystem: "bleep", + }, + }, + }, + { + // This is the explicit Git source address version of the previous + // case, overriding the default interpretation as module registry. + Given: "git::https://gitlab.com/hashicorp/go-slug//bleep", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug"), + }, + subPath: "bleep", + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug/bleep/bloop", + // Two or more subpath portions is fine for Git interpretation, + // because that's not ambigious with module registry. This is + // an annoying inconsistency but necessary for backward + // compatibility with go-getter's interpretations. + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tar.gz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz"), + }, + }, + }, + { + Given: "https://example.com/foo.tar.gz//bleep/bloop", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tar.gz?something=anything", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz?something=anything"), + }, + }, + }, + { + Given: "https://example.com/foo.tar.gz//bleep/bloop?something=anything", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz?something=anything"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tgz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tgz"), + }, + }, + }, + { + Given: "https://example.com/foo?archive=tar.gz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo?archive=tgz"), + }, + }, + }, + { + Given: "https://example.com/foo?archive=tgz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo?archive=tgz"), + }, + }, + }, + { + Given: "https://example.com/foo.zip", + WantErr: `invalid remote source address "https://example.com/foo.zip": a HTTPS URL's path must end with either .tar.gz or .tgz`, + }, + { + Given: "https://example.com/foo?archive=zip", + WantErr: `invalid remote source address "https://example.com/foo?archive=zip": the special 'archive' query string argument must be set to 'tgz' if present`, + }, + { + Given: "http://example.com/foo.tar.gz", + WantErr: `invalid remote source address "http://example.com/foo.tar.gz": source package addresses may not use unencrypted HTTP`, + }, + { + Given: "http::http://example.com/foo.tar.gz", + WantErr: `invalid remote source address "http::http://example.com/foo.tar.gz": don't specify redundant "http" source type for "http" URL`, + }, + { + Given: "https::https://example.com/foo.tar.gz", + WantErr: `invalid remote source address "https::https://example.com/foo.tar.gz": don't specify redundant "https" source type for "https" URL`, + }, + { + Given: "https://foo@example.com/foo.tgz", + WantErr: `invalid remote source address "https://foo@example.com/foo.tgz": must not use username or password in URL portion`, + }, + { + Given: "https://foo:bar@example.com/foo.tgz", + WantErr: `invalid remote source address "https://foo:bar@example.com/foo.tgz": must not use username or password in URL portion`, + }, + { + Given: "https://:bar@example.com/foo.tgz", + WantErr: `invalid remote source address "https://:bar@example.com/foo.tgz": must not use username or password in URL portion`, + }, + } + + for _, test := range tests { + t.Run(test.Given, func(t *testing.T) { + got, gotErr := ParseSource(test.Given) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} + +func TestResolveRelativeSource(t *testing.T) { + tests := []struct { + Base Source + Rel Source + Want Source + WantErr string + }{ + { + Base: MustParseSource("./a/b"), + Rel: MustParseSource("../c"), + Want: MustParseSource("./a/c"), + }, + { + Base: MustParseSource("./a"), + Rel: MustParseSource("../c"), + Want: MustParseSource("./c"), + }, + { + Base: MustParseSource("./a"), + Rel: MustParseSource("../../c"), + Want: MustParseSource("../c"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("git::https://example.com/foo.git"), + Want: MustParseSource("git::https://example.com/foo.git"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../bloop"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/bloop"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../.."), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../../../baz"), + WantErr: `invalid traversal from git::https://github.com/hashicorp/go-slug.git//beep/boop: relative path ../../../baz traverses up too many levels from source path beep/boop`, + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git"), + Rel: MustParseSource("./boop"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//boop"), + }, + { + Base: MustParseSource("example.com/foo/bar/baz//beep/boop"), + Rel: MustParseSource("../"), + Want: MustParseSource("example.com/foo/bar/baz//beep"), + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s + %s", test.Base, test.Rel), func(t *testing.T) { + got, gotErr := ResolveRelativeSource(test.Base, test.Rel) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} + +func TestSourceFilename(t *testing.T) { + tests := []struct { + Addr Source + Want string + }{ + { + MustParseSource("./foo.tf"), + "foo.tf", + }, + { + MustParseSource("./boop/foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//boop/foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//boop/foo.tf?ref=main"), + "foo.tf", + }, + { + MustParseSource("hashicorp/subnets/cidr//main.tf"), + "main.tf", + }, + { + MustParseSource("hashicorp/subnets/cidr//test/simple.tf"), + "simple.tf", + }, + } + + for _, test := range tests { + t.Run(test.Addr.String(), func(t *testing.T) { + got := SourceFilename(test.Addr) + if got != test.Want { + t.Errorf( + "wrong result\naddr: %s\ngot: %s\nwant: %s", + test.Addr, got, test.Want, + ) + } + }) + } +} + +func mustParseURL(s string) *url.URL { + ret, err := url.Parse(s) + if err != nil { + panic(err) + } + return ret +} diff --git a/sourceaddrs/subpath.go b/sourceaddrs/subpath.go new file mode 100644 index 0000000..492cbfe --- /dev/null +++ b/sourceaddrs/subpath.go @@ -0,0 +1,128 @@ +package sourceaddrs + +import ( + "fmt" + "io/fs" + "path" + "strings" +) + +// ValidSubPath returns true if the given string is a valid sub-path string +// as could be included in either a remote or registry source address. +// +// A sub-path is valid if it's a slash-separated sequence of path segments +// without a leading or trailing slash and without any "." or ".." segments, +// since a sub-path can only traverse downwards from the root of a package. +func ValidSubPath(s string) bool { + _, err := normalizeSubpath(s) + return err == nil +} + +// normalizeSubpath interprets the given string as a package "sub-path", +// returning a normalized form of the path or an error if the string does +// not use correct syntax. +func normalizeSubpath(given string) (string, error) { + if given == "" { + // The empty string is how we represent the absense of a subpath, + // which represents the root directory of a package. + return "", nil + } + + // Our definition of "sub-path" aligns with the definition used by Go's + // virtual filesystem abstraction, since our "module package" idea + // is also essentially just a virtual filesystem. + // This definition prohibits "." and ".." segments and therefore prevents + // upward path traversal. + if !fs.ValidPath(given) { + return "", fmt.Errorf("must be slash-separated relative path without any .. or . segments") + } + + clean := path.Clean(given) + + // Go's path wrangling uses "." to represent "root directory", but + // we represent that by omitting the subpath entirely, so we forbid that + // too even though Go would consider it valid. + if clean == "." { + return "", fmt.Errorf("must be slash-separated relative path without any .. or . segments") + } + + return clean, nil +} + +// subPathAsLocalSource interprets the given subpath (which should be a value +// previously returned from [normalizeSubpath]) as a local source address +// relative to the root of the package that the sub-path was presented against. +func subPathAsLocalSource(p string) LocalSource { + // Local source addresses are _mostly_ a superset of what we allow in + // sub-paths, except that downward traversals must always start with + // "./" to disambiguate from other address types. + return LocalSource{relPath: "./" + p} +} + +// splitSubPath takes a source address that would be accepted either as a +// remote source address or a registry source address and returns a tuple of +// its package address and its sub-path portion. +// +// For example: +// dom.com/path/?q=p => "dom.com/path/?q=p", "" +// proto://dom.com/path//*?q=p => "proto://dom.com/path?q=p", "*" +// proto://dom.com/path//path2?q=p => "proto://dom.com/path?q=p", "path2" +// +// This function DOES NOT validate or normalize the sub-path. Pass the second +// return value to [normalizeSubpath] to check if it is valid and to obtain +// its normalized form. +func splitSubPath(src string) (string, string) { + // This is careful to handle the query string portion of a remote source + // address. That's not actually necessary for a module registry address + // because those don't have query strings anyway, but it doesn't _hurt_ + // to check for a query string in that case and allows us to reuse this + // function for both cases. + + // URL might contains another url in query parameters + stop := len(src) + if idx := strings.Index(src, "?"); idx > -1 { + stop = idx + } + + // Calculate an offset to avoid accidentally marking the scheme + // as the dir. + var offset int + if idx := strings.Index(src[:stop], "://"); idx > -1 { + offset = idx + 3 + } + + // First see if we even have an explicit subdir + idx := strings.Index(src[offset:stop], "//") + if idx == -1 { + return src, "" + } + + idx += offset + subdir := src[idx+2:] + src = src[:idx] + + // Next, check if we have query parameters and push them onto the + // URL. + if idx = strings.Index(subdir, "?"); idx > -1 { + query := subdir[idx:] + subdir = subdir[:idx] + src += query + } + + return src, subdir +} + +func joinSubPath(subPath, rel string) (string, error) { + new := path.Join(subPath, rel) + if new == "." { + return "", nil // the root of the package + } + // If subPath was a valid sub-path (no "." or ".." segments) then the + // appearance of such segments in our result suggests that "rel" has + // too many upward traversals and would thus escape from its containing + // package. + if !fs.ValidPath(new) { + return "", fmt.Errorf("relative path %s traverses up too many levels from source path %s", rel, subPath) + } + return new, nil +} diff --git a/sourcebundle/builder.go b/sourcebundle/builder.go new file mode 100644 index 0000000..06734ba --- /dev/null +++ b/sourcebundle/builder.go @@ -0,0 +1,700 @@ +package sourcebundle + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/internal/ignorefiles" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" + "golang.org/x/mod/sumdb/dirhash" +) + +// Builder deals with the process of gathering source code +type Builder struct { + // targetDir is the base directory of the source bundle we're writing + // into. + targetDir string + + // fetcher is the package fetching callback we'll use to fetch remote + // packages into subdirectories of the bundle directory. + fetcher PackageFetcher + + // registryClient is the module registry client we'll use to resolve + // any module registry sources into their underlying remote package + // addresses which we can then fetch using "fetcher". + registryClient RegistryClient + + // pendingRemote is an unordered set of remote artifacts that we've + // discovered we need to analyze but have not yet done so. + pendingRemote []remoteArtifact + + // analyzed is a set of remote artifacts that we've already analyzed and + // thus already found the dependencies of. + analyzed map[remoteArtifact]struct{} + + // remotePackageDirs tracks the local directory name for each remote + // package we've already fetched. The keys of this map also serve as our + // memory of which packages we've already fetched and therefore don't need + // to fetch again if we find more source addresses in those packages. + // + // In our current implementation thse directory names are always checksums + // of the content of the package, and we rely on that when building a + // manifest file so if a future update changes the directory naming scheme + // then we'll need a different solution for tracking the checksums for + // use in the manifest file. For external callers the local directory + // naming scheme is always an implementation detail that they may not + // rely on. + remotePackageDirs map[sourceaddrs.RemotePackage]string + + // remotePackageMeta tracks the package metadata of each remote package + // we've fetched so far. This does not include any packages for which + // the fetcher returned no metadata. + remotePackageMeta map[sourceaddrs.RemotePackage]*PackageMeta + + // pendingRegistry is an unordered set of registry artifacts that need to + // be translated into remote artifacts before further processing. + pendingRegistry []registryArtifact + + // resolvedRegistry tracks the underlying remote source address for each + // selected version of each module registry package. + resolvedRegistry map[registryPackageVersion]sourceaddrs.RemoteSource + + // registryPackageVersions caches responses from module registry calls to + // look up the available versions for a particular module package. Although + // these could potentially change while we're running, we assume that the + // lifetime of a particular Builder is short enough for that not to + // matter. + registryPackageVersions map[regaddr.ModulePackage]versions.List + + mu sync.Mutex +} + +// NewBuilder creates a new builder that will construct a source bundle in the +// given target directory, which must already exist and be empty before any +// work begins. +// +// During the lifetime of a builder the target directory must not be modified +// or moved by anything other than the builder, including other concurrent +// processes running on the system. The target directory is not a valid source +// bundle until a call to [Builder.Close] returns successfully; the directory +// may be apepar in an inconsistent state while the builder is working. +func NewBuilder(targetDir string, fetcher PackageFetcher, registryClient RegistryClient) (*Builder, error) { + // We'll lock in our absolute path here just in case someone changes the + // process working directory out from under us for some reason. + absDir, err := filepath.Abs(targetDir) + if err != nil { + return nil, fmt.Errorf("invalid target directory: %w", err) + } + return &Builder{ + targetDir: absDir, + fetcher: fetcher, + registryClient: registryClient, + analyzed: make(map[remoteArtifact]struct{}), + remotePackageDirs: make(map[sourceaddrs.RemotePackage]string), + remotePackageMeta: make(map[sourceaddrs.RemotePackage]*PackageMeta), + resolvedRegistry: make(map[registryPackageVersion]sourceaddrs.RemoteSource), + registryPackageVersions: make(map[regaddr.ModulePackage]versions.List), + }, nil +} + +// AddRemoteSource incorporates the package containing the given remote source +// into the bundle, and then analyzes the source artifact for dependencies +// using the given dependency finder. +// +// If the returned diagnostics contains errors then the bundle is left in an +// inconsistent state and must not be used for any other calls. +func (b *Builder) AddRemoteSource(ctx context.Context, addr sourceaddrs.RemoteSource, depFinder DependencyFinder) Diagnostics { + if b.targetDir == "" { + // The builder has been closed, so cannot be modified further. + // This is always a bug in the caller, which should discard a builder + // as soon as it's been closed. + panic("AddRemoteSource on closed sourcebundle.Builder") + } + + af := remoteArtifact{addr, depFinder} + b.mu.Lock() + if _, exists := b.analyzed[af]; exists { + // Nothing further to do with this one, then. + // NOTE: This early check is just an optimization; b.resolvePending + // will re-check whether each queued item has already been analyzed + // anyway, so this just avoids growing b.pendingRemote if possible, + // since once something has become analyzed it never becomes + // "un-analyzed" again. + b.mu.Unlock() + return nil + } + b.pendingRemote = append(b.pendingRemote, af) + b.mu.Unlock() + + return b.resolvePending(ctx) +} + +// AddRegistrySource incorporates the registry metadata for the given address +// and the package associated with the latest version in allowedVersions +// into the bundle, and then analyzes the new artifact for dependencies +// using the given dependency finder. +// +// If you have already selected a specific version to install, consider using +// [Builder.AddFinalRegistrySource] instead. +// +// If the returned diagnostics contains errors then the bundle is left in an +// inconsistent state and must not be used for any other calls. +func (b *Builder) AddRegistrySource(ctx context.Context, addr sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) Diagnostics { + if b.targetDir == "" { + // The builder has been closed, so cannot be modified further. + // This is always a bug in the caller, which should discard a builder + // as soon as it's been closed. + panic("AddRegistrySource on closed sourcebundle.Builder") + } + + b.mu.Lock() + b.pendingRegistry = append(b.pendingRegistry, registryArtifact{addr, allowedVersions, depFinder}) + b.mu.Unlock() + + return b.resolvePending(ctx) +} + +// AddFinalRegistrySource is a variant of [Builder.AddRegistrySource] which +// takes an already-selected version of a registry source, instead of taking +// a version constraint and then selecting the latest available version +// matching that constraint. +// +// This function still asks the registry for its set of available versions for +// the unversioned package first, to ensure that the results from installing +// from a final source will always be consistent with those from installing +// from a not-yet-resolved registry source. +func (b *Builder) AddFinalRegistrySource(ctx context.Context, addr sourceaddrs.RegistrySourceFinal, depFinder DependencyFinder) Diagnostics { + // We handle this just by turning the version selection into an exact + // version set and then installing from that as normal. + allowedVersions := versions.Only(addr.SelectedVersion()) + return b.AddRegistrySource(ctx, addr.Unversioned(), allowedVersions, depFinder) +} + +// Close ensures that the target directory is in a valid and consistent state +// to be used as a source bundle and then returns an object providing the +// read-only API for that bundle. +// +// After calling Close the receiving builder becomes invalid and must not be +// used any further. +func (b *Builder) Close() (*Bundle, error) { + b.mu.Lock() + if b.targetDir == "" { + b.mu.Unlock() + panic("Close on already-closed sourcebundle.Builder") + } + baseDir := b.targetDir + b.targetDir = "" // makes the Add... methods panic when called, to avoid mutating the finalized bundle + b.mu.Unlock() + + // We need to freeze all of the metadata we've been tracking into the + // manifest file so that OpenDir can discover equivalent metadata itself + // when opening the finalized bundle. + err := b.writeManifest(filepath.Join(baseDir, manifestFilename)) + if err != nil { + return nil, fmt.Errorf("failed to generate source bundle manifest: %w", err) + } + + ret, err := OpenDir(baseDir) + if err != nil { + // If we get here then it suggests that we've left the bundle directory + // in an inconsistent state which therefore made OpenDir fail its + // early checks. + return nil, fmt.Errorf("failed to open bundle after Close: %w", err) + } + return ret, nil +} + +// resolvePending depletes the queues of pending source artifacts, making sure +// that everything required is present in the bundle directory, both directly +// and indirectly. +func (b *Builder) resolvePending(ctx context.Context) (diags Diagnostics) { + b.mu.Lock() + defer func() { + // If anything we do here generates any errors then the bundle + // directory is in an inconsistent state and must not be used + // any further. This will make all subsequent calls panic. + if diags.HasErrors() { + b.targetDir = "" + } + + b.mu.Unlock() + }() + + trace := buildTraceFromContext(ctx) + + // We'll just keep iterating until we've depleted our queues. + // Note that the order of operations isn't actually important here and + // so we're consuming the "queues" in LIFO order instead of FIFO order, + // since that is easier to model using a Go slice. + for len(b.pendingRemote) > 0 || len(b.pendingRegistry) > 0 { + // We'll consume items from the "registry" queue first because resolving + // this will contribute additional items to the "remote" queue. + for len(b.pendingRegistry) > 0 { + next, remain := b.pendingRegistry[len(b.pendingRegistry)-1], b.pendingRegistry[:len(b.pendingRegistry)-1] + b.pendingRegistry = remain + + realSource, err := b.findRegistryPackageSource(ctx, next.sourceAddr, next.versions) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Cannot resolve module registry package", + detail: fmt.Sprintf("Error resolving module registry source %s: %s.", next.sourceAddr, err), + }) + continue + } + + b.pendingRemote = append(b.pendingRemote, remoteArtifact{ + sourceAddr: realSource, + depFinder: next.depFinder, + }) + } + + // Now we'll consume items from the "remote" queue, which might have + // grown as a result of resolving some registry queue items. + for len(b.pendingRemote) > 0 { + next, remain := b.pendingRemote[len(b.pendingRemote)-1], b.pendingRemote[:len(b.pendingRemote)-1] + b.pendingRemote = remain + + pkgAddr := next.sourceAddr.Package() + pkgLocalDir, err := b.ensureRemotePackage(ctx, pkgAddr) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Cannot install source package", + detail: fmt.Sprintf("Error installing %s: %s.", next.sourceAddr.Package(), err), + }) + continue + } + + // localDirPath now refers to the local equivalent of whatever + // sub-path or sub-file the source address referred to, so we + // can ask the dependency finder to analyze it and possibly + // contribute more items to our queues. + artifact := remoteArtifact{ + sourceAddr: next.sourceAddr, + depFinder: next.depFinder, + } + if _, exists := b.analyzed[artifact]; !exists { + fsys := os.DirFS(filepath.Join(b.targetDir, pkgLocalDir)) + subPath := next.sourceAddr.SubPath() + depFinder := next.depFinder + + deps := Dependencies{ + baseAddr: next.sourceAddr, + + remoteCb: func(source sourceaddrs.RemoteSource, depFinder DependencyFinder) { + b.pendingRemote = append(b.pendingRemote, remoteArtifact{ + sourceAddr: source, + depFinder: depFinder, + }) + }, + registryCb: func(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) { + b.pendingRegistry = append(b.pendingRegistry, registryArtifact{ + sourceAddr: source, + versions: allowedVersions, + depFinder: depFinder, + }) + }, + localResolveErrCb: func(err error) { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid relative source address", + detail: fmt.Sprintf("Invalid relative path from %s: %s.", next.sourceAddr, err), + }) + }, + } + moreDiags := depFinder.FindDependencies(fsys, subPath, &deps) + deps.disable() + b.analyzed[artifact] = struct{}{} + if len(moreDiags) != 0 { + moreDiags = moreDiags.inRemoteSourcePackage(pkgAddr) + if cb := trace.Diagnostics; cb != nil { + cb(ctx, moreDiags) + } + } + diags = diags.Append(moreDiags) + if diags.HasErrors() { + continue + } + } + } + } + + return diags +} + +func (b *Builder) findRegistryPackageSource(ctx context.Context, sourceAddr sourceaddrs.RegistrySource, allowedVersions versions.Set) (sourceaddrs.RemoteSource, error) { + // NOTE: This expects to be called while b.mu is already locked. + + trace := buildTraceFromContext(ctx) + + pkgAddr := sourceAddr.Package() + availableVersions, ok := b.registryPackageVersions[pkgAddr] + if !ok { + var reqCtx context.Context + if cb := trace.RegistryPackageVersionsStart; cb != nil { + reqCtx = cb(ctx, pkgAddr) + } + if reqCtx == nil { + reqCtx = ctx + } + + vs, err := b.registryClient.ModulePackageVersions(reqCtx, pkgAddr) + if err != nil { + if cb := trace.RegistryPackageVersionsFailure; cb != nil { + cb(reqCtx, pkgAddr, err) + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("failed to query available versions for %s: %w", pkgAddr, err) + } + vs.Sort() + availableVersions = vs + b.registryPackageVersions[pkgAddr] = availableVersions + if cb := trace.RegistryPackageVersionsSuccess; cb != nil { + cb(reqCtx, pkgAddr, availableVersions) + } + } else { + if cb := trace.RegistryPackageVersionsAlready; cb != nil { + cb(ctx, pkgAddr, availableVersions) + } + } + + selectedVersion := availableVersions.NewestInSet(allowedVersions) + if selectedVersion == versions.Unspecified { + return sourceaddrs.RemoteSource{}, fmt.Errorf("no available version of %s matches the specified version constraint", pkgAddr) + } + + pkgVer := registryPackageVersion{ + pkg: pkgAddr, + version: selectedVersion, + } + realSourceAddr, ok := b.resolvedRegistry[pkgVer] + if !ok { + var reqCtx context.Context + if cb := trace.RegistryPackageSourceStart; cb != nil { + reqCtx = cb(ctx, pkgAddr, selectedVersion) + } + if reqCtx == nil { + reqCtx = ctx + } + + sa, err := b.registryClient.ModulePackageSourceAddr(reqCtx, pkgAddr, selectedVersion) + if err != nil { + if cb := trace.RegistryPackageSourceFailure; cb != nil { + cb(reqCtx, pkgAddr, selectedVersion, err) + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("failed to find real source address for %s %s: %w", pkgAddr, selectedVersion, err) + } + realSourceAddr = sa + b.resolvedRegistry[pkgVer] = realSourceAddr + if cb := trace.RegistryPackageSourceSuccess; cb != nil { + cb(reqCtx, pkgAddr, selectedVersion, realSourceAddr) + } + } else { + if cb := trace.RegistryPackageSourceAlready; cb != nil { + cb(ctx, pkgAddr, selectedVersion, realSourceAddr) + } + } + + // If our original source address had its own sub-path component then we + // need to combine that with the one in realSourceAddr to get the correct + // final path: the sourceAddr subpath is relative to the realSourceAddr + // subpath. + realSourceAddr = sourceAddr.FinalSourceAddr(realSourceAddr) + + return realSourceAddr, nil +} + +func (b *Builder) ensureRemotePackage(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) (localDir string, err error) { + // NOTE: This expects to be called while b.mu is already locked. + + trace := buildTraceFromContext(ctx) + + existingDir, ok := b.remotePackageDirs[pkgAddr] + if ok { + // We already have this package, so there's nothing more to do. + if cb := trace.RemotePackageDownloadAlready; cb != nil { + cb(ctx, pkgAddr) + } + return existingDir, nil + } + + var reqCtx context.Context + if cb := trace.RemotePackageDownloadStart; cb != nil { + reqCtx = cb(ctx, pkgAddr) + } + if reqCtx == nil { + reqCtx = ctx + } + defer func() { + if err == nil { + if cb := trace.RemotePackageDownloadSuccess; cb != nil { + cb(reqCtx, pkgAddr) + } + } else { + if cb := trace.RemotePackageDownloadFailure; cb != nil { + cb(reqCtx, pkgAddr, err) + } + } + }() + + // We'll eventually name our local directory after a checksum of its + // content, but we don't know its content yet so we'll use a temporary + // name while we work on getting it populated. + workDir, err := ioutil.TempDir(b.targetDir, ".tmp-") + if err != nil { + return "", fmt.Errorf("failed to create new package directory: %w", err) + } + + meta, err := b.fetcher.FetchSourcePackage(reqCtx, pkgAddr.SourceType(), pkgAddr.URL(), workDir) + if err != nil { + return "", fmt.Errorf("failed to fetch package: %w", err) + } + if meta != nil { + // We'll remember the meta so we can use it when building a manifest later. + b.remotePackageMeta[pkgAddr] = meta + } + + // If the package has a .terraformignore file then we now need to remove + // everything that we've been instructed to ignore. + ignoreRules, err := ignorefiles.LoadPackageIgnoreRules(workDir) + if err != nil { + return "", fmt.Errorf("invalid .terraformignore file: %w", err) + } + + // NOTE: The checks in packagePrepareWalkFn are safe only if we are sure + // that no other process is concurrently modifying our temporary directory. + // Source bundle building should only occur on hosts that are trusted by + // whoever will ultimately be using the generated bundle. + err = filepath.Walk(workDir, packagePrepareWalkFn(workDir, ignoreRules)) + if err != nil { + return "", fmt.Errorf("failed to prepare package directory: %#w", err) + } + + // If we got here then our tmpDir contains the final source code of a valid + // module package. We'll compute a hash of its contents so we can notice + // if it is identical to some other package we already installed, and then + // if not rename it into its final directory name. + // For this purpose we reuse the same directory tree hashing scheme that + // Go uses for its own modules, although that's an implementation detail + // subject to change in future versions: callers should always resolve + // paths through the source bundle's manifest rather than assuming a path. + // + // FIXME: We should implement our own thing similar to Go's dirhash but + // which can preserve file metadata at least to the level of detail that + // Git can, so that we can e.g. avoid coalescing two packages that differ + // only in whether a particular file is executable, or similar. + // + // We do currently _internally_ rely on the temporary directory being a + // hash when we build the final manifest for the bundle, so if you change + // this naming scheme you'll need to devise a new way for the manifest + // to learn about the checksum. External callers are forbidden from relying + // on it though, so you only have to worry about making the internals of + // this package self-consistent in how they deal with naming and hashes. + hash, err := dirhash.HashDir(workDir, "", dirhash.Hash1) + if err != nil { + return "", fmt.Errorf("failed to calculate package checksum: %w", err) + } + dirName := strings.TrimPrefix(hash, "h1:") + + // dirhash produces standard base64 encoding, but we need URL-friendly + // base64 encoding since we're using these as filenames. + rawChecksum, err := base64.StdEncoding.DecodeString(dirName) + if err != nil { + // Should not get here + return "", fmt.Errorf("package has invalid checksum: %w", err) + } + dirName = base64.RawURLEncoding.EncodeToString(rawChecksum) + + b.remotePackageDirs[pkgAddr] = dirName + + // We might already have a directory with the same hash if we have two + // different package addresses that happen to return the same source code. + // For example, this could happen if one Git source leaves ref unspecified + // and another explicitly specifies the main branch, therefore causing us + // to fetch the same source code in two different ways. If a directory + // already exists then we'll assume that it's suitable for this package + // and discard the temporary directory we've been working on here, thereby + // making the final bundle smaller. + finalDir := filepath.Join(b.targetDir, dirName) + if info, err := os.Lstat(finalDir); err == nil && info.IsDir() { + err := os.RemoveAll(workDir) + if err != nil { + return "", fmt.Errorf("failed to clean temporary directory: %w", err) + } + return dirName, nil + } + + // If a directory isn't already present then we'll now rename our + // temporary directory to its final name. + err = os.Rename(workDir, finalDir) + if err != nil { + return "", fmt.Errorf("failed to place final package directory: %w", err) + } + + return dirName, nil +} + +func (b *Builder) writeManifest(filename string) error { + var root manifestRoot + root.FormatVersion = 1 + + for pkgAddr, localDirName := range b.remotePackageDirs { + pkgMeta := b.remotePackageMeta[pkgAddr] + + manifestPkg := manifestRemotePackage{ + SourceAddr: pkgAddr.String(), + LocalDir: localDirName, + } + if pkgMeta != nil && pkgMeta.gitCommitID != "" { + manifestPkg.Meta.GitCommitID = pkgMeta.gitCommitID + } + + root.Packages = append(root.Packages, manifestPkg) + } + sort.Slice(root.Packages, func(i, j int) bool { + return root.Packages[i].SourceAddr < root.Packages[j].SourceAddr + }) + + registryObjs := make(map[regaddr.ModulePackage]*manifestRegistryMeta) + for rpv, sourceAddr := range b.resolvedRegistry { + manifestMeta, ok := registryObjs[rpv.pkg] + if !ok { + root.RegistryMeta = append(root.RegistryMeta, manifestRegistryMeta{ + SourceAddr: rpv.pkg.String(), + Versions: make(map[string]manifestRegistryVersion), + }) + manifestMeta = &root.RegistryMeta[len(root.RegistryMeta)-1] + registryObjs[rpv.pkg] = manifestMeta + } + manifestMeta.Versions[rpv.version.String()] = manifestRegistryVersion{ + SourceAddr: sourceAddr.String(), + } + } + sort.Slice(root.RegistryMeta, func(i, j int) bool { + return root.Packages[i].SourceAddr < root.Packages[j].SourceAddr + }) + + buf, err := json.MarshalIndent(&root, "", " ") + if err != nil { + return fmt.Errorf("failed to serialize to JSON: %#w", err) + } + err = os.WriteFile(filename, buf, 0664) + if err != nil { + return fmt.Errorf("failed to write file: %#w", err) + } + + return nil +} + +type remoteArtifact struct { + sourceAddr sourceaddrs.RemoteSource + depFinder DependencyFinder +} + +type registryArtifact struct { + sourceAddr sourceaddrs.RegistrySource + versions versions.Set + depFinder DependencyFinder +} + +type registryPackageVersion struct { + pkg regaddr.ModulePackage + version versions.Version +} + +func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepath.WalkFunc { + return func(absPath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Get the relative path from the current src directory. + relPath, err := filepath.Rel(root, absPath) + if err != nil { + return fmt.Errorf("failed to get relative path for file %q: %w", absPath, err) + } + if relPath == "." { + return nil + } + + ignored, err := ignoreRules.Excludes(relPath) + if err != nil { + return fmt.Errorf("invalid .terraformignore rules: %#w", err) + } + if ignored { + err := os.RemoveAll(absPath) + if err != nil { + return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err) + } + return nil + } + + // For directories we also need to check with a path separator on the + // end, which ignores entire subtrees. + if info.IsDir() { + ignored, err := ignoreRules.Excludes(relPath + string(os.PathSeparator)) + if err != nil { + return fmt.Errorf("invalid .terraformignore rules: %#w", err) + } + if ignored { + err := os.RemoveAll(absPath) + if err != nil { + return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err) + } + return nil + } + } + + // If we get here then we have a file or directory that isn't + // covered by the ignore rules, but we still need to make sure it's + // valid for inclusion in a source bundle. + // We only allow regular files, directories, and symlinks to either + // of those as long as they are under the root directory prefix. + absRoot, err := filepath.Abs(root) + if err != nil { + return fmt.Errorf("failed to get absolute path for root directory %q: %w", root, err) + } + absRoot, err = filepath.EvalSymlinks(absRoot) + if err != nil { + return fmt.Errorf("failed to get absolute path for root directory %q: %w", root, err) + } + reAbsPath := filepath.Join(absRoot, relPath) + realPath, err := filepath.EvalSymlinks(reAbsPath) + if err != nil { + return fmt.Errorf("failed to get real path for sub-path %q: %w", relPath, err) + } + realPathRel, err := filepath.Rel(absRoot, realPath) + if err != nil { + return fmt.Errorf("failed to get real relative path for sub-path %q: %w", relPath, err) + } + + // After all of the above we can finally safely test whether the + // transformed path is "local", meaning that it only descends down + // from the real root. + if !filepath.IsLocal(realPathRel) { + return fmt.Errorf("module package path %q is symlink traversing out of the package root", relPath) + } + + // The real referent must also be either a regular file or a directory. + // (Not, for example, a Unix device node or socket or other such oddities.) + lInfo, err := os.Lstat(realPath) + if err != nil { + return fmt.Errorf("failed to stat %q: %w", realPath, err) + } + if !(lInfo.Mode().IsRegular() || lInfo.Mode().IsDir()) { + return fmt.Errorf("module package path %q is not a regular file or directory", relPath) + } + + return nil + } +} diff --git a/sourcebundle/builder_test.go b/sourcebundle/builder_test.go new file mode 100644 index 0000000..e0db29e --- /dev/null +++ b/sourcebundle/builder_test.go @@ -0,0 +1,948 @@ +package sourcebundle + +import ( + "bufio" + "context" + "errors" + "fmt" + "io" + "io/fs" + "net/url" + "os" + "path" + "path/filepath" + "strings" + "testing" + + "github.com/apparentlymart/go-versions/versions" + "github.com/apparentlymart/go-versions/versions/constraints" + "github.com/google/go-cmp/cmp" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +func TestBuilderSimple(t *testing.T) { + // This tests the common pattern of specifying a module registry address + // to start, having that translated into a real remote source address, + // and then downloading from that real source address. There are no + // oddities or edge-cases here. + + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/foo.tgz": "testdata/pkgs/hello", + }, + map[string]map[string]string{ + "example.com/foo/bar/baz": map[string]string{ + "1.0.0": "https://example.com/foo.tgz", + }, + }, + ) + + realSource := sourceaddrs.MustParseSource("https://example.com/foo.tgz").(sourceaddrs.RemoteSource) + regSource := sourceaddrs.MustParseSource("example.com/foo/bar/baz").(sourceaddrs.RegistrySource) + diags := builder.AddRegistrySource(ctx, regSource, versions.All, noDependencyFinder) + if len(diags) > 0 { + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start requesting versions for example.com/foo/bar/baz", + "success requesting versions for example.com/foo/bar/baz", + "start requesting source address for example.com/foo/bar/baz 1.0.0", + "source address for example.com/foo/bar/baz 1.0.0 is https://example.com/foo.tgz", + "start downloading https://example.com/foo.tgz", + "downloaded https://example.com/foo.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(realSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", realSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // Looking up the original registry address at the selected version + // should return the same directory, because the registry address is just + // an indirection over the same source address. + registryPkgDir, err := bundle.LocalPathForRegistrySource(regSource, versions.MustParseVersion("1.0.0")) + if err != nil { + t.Fatalf("builder does not know a local directory for %s: %s", regSource.Package(), err) + } + if registryPkgDir != localPkgDir { + t.Errorf("local dir for %s doesn't match local dir for %s", regSource, realSource) + } +} + +func TestBuilderSubdirs(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/subdirs.tgz": "testdata/pkgs/subdirs", + }, + map[string]map[string]string{ + "example.com/foo/bar/baz": map[string]string{ + // NOTE: The registry response points to a sub-directory of + // this package, not to the root of the package. + "1.0.0": "https://example.com/subdirs.tgz//a", + }, + }, + ) + + // NOTE: We're asking for subdir "b" of the registry address. That combines + // with the registry's own "b" subdir to produce "a/b" as the final + // subdirectory path. + regSource := sourceaddrs.MustParseSource("example.com/foo/bar/baz//b").(sourceaddrs.RegistrySource) + realSource := sourceaddrs.MustParseSource("https://example.com/subdirs.tgz//a/b").(sourceaddrs.RemoteSource) + diags := builder.AddRegistrySource(ctx, regSource, versions.All, noDependencyFinder) + if len(diags) > 0 { + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start requesting versions for example.com/foo/bar/baz", + "success requesting versions for example.com/foo/bar/baz", + "start requesting source address for example.com/foo/bar/baz 1.0.0", + "source address for example.com/foo/bar/baz 1.0.0 is https://example.com/subdirs.tgz//a", + "start downloading https://example.com/subdirs.tgz", + "downloaded https://example.com/subdirs.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(realSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", realSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "beepbeep")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // Looking up the original registry address at the selected version + // should return the same directory, because the registry address is just + // an indirection over the same source address. + registryPkgDir, err := bundle.LocalPathForRegistrySource(regSource, versions.MustParseVersion("1.0.0")) + if err != nil { + t.Fatalf("builder does not know a local directory for %s: %s", regSource.Package(), err) + } + if registryPkgDir != localPkgDir { + t.Errorf("local dir for %s doesn't match local dir for %s", regSource, realSource) + } +} + +func TestBuilderRemoteDeps(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/with-deps.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/with-deps.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{filename: "dependencies"}) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/with-deps.tgz", + "downloaded https://example.com/with-deps.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) +} + +func TestBuilderRemoteDepsDifferingTypes(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/self_dependency.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/self_dependency.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{ + filename: "self_dependency", + nextFilename: "dependencies", + }) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/self_dependency.tgz", + "downloaded https://example.com/self_dependency.tgz", + "reusing existing local copy of https://example.com/self_dependency.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) +} + +func TestBuilderTerraformIgnore(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/ignore.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/ignore.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, noDependencyFinder) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/ignore.tgz", + "downloaded https://example.com/ignore.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + if _, err := os.Lstat(filepath.Join(localPkgDir, "excluded")); err == nil { + t.Errorf("excluded file exists; should have been removed") + } else if !errors.Is(err, fs.ErrNotExist) { + t.Errorf("excluded file exists but is not readable; should have been removed altogether") + } +} + +func TestBuilderCoalescePackages(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/with-deps.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/hello", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/with-deps.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{filename: "dependencies"}) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/with-deps.tgz", + "downloaded https://example.com/with-deps.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // The package directory for dependency 2 should be the same as for + // dependency 1 because they both have identical content, despite + // having different source addresses. + otherLocalPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s", dep1Source.Package()) + } + if otherLocalPkgDir != localPkgDir { + t.Errorf("'hello' packages were not coalesced\ndep1 path: %s\ndep2 path: %s", otherLocalPkgDir, localPkgDir) + } + }) +} + +func testingBuilder(t *testing.T, targetDir string, remotePackages map[string]string, registryPackages map[string]map[string]string) *Builder { + t.Helper() + + type fakeRemotePackage struct { + sourceType string + url *url.URL + localDir string + } + type fakeRegistryPackage struct { + pkgAddr regaddr.ModulePackage + versions map[versions.Version]sourceaddrs.RemoteSource + } + + remotePkgs := make([]fakeRemotePackage, 0, len(remotePackages)) + registryPkgs := make([]fakeRegistryPackage, 0, len(registryPackages)) + + for pkgAddrRaw, localDir := range remotePackages { + pkgAddr, err := sourceaddrs.ParseRemotePackage(pkgAddrRaw) + if err != nil { + t.Fatalf("invalid remote package address %q: %s", pkgAddrRaw, err) + } + remotePkgs = append(remotePkgs, fakeRemotePackage{ + sourceType: pkgAddr.SourceType(), + url: pkgAddr.URL(), + localDir: localDir, + }) + } + + for pkgAddrRaw, versionsRaw := range registryPackages { + pkgAddr, err := sourceaddrs.ParseRegistryPackage(pkgAddrRaw) + if err != nil { + t.Fatalf("invalid registry package address %q: %s", pkgAddrRaw, err) + } + pkg := fakeRegistryPackage{ + pkgAddr: pkgAddr, + versions: make(map[versions.Version]sourceaddrs.RemoteSource), + } + for versionRaw, sourceAddrRaw := range versionsRaw { + version, err := versions.ParseVersion(versionRaw) + if err != nil { + t.Fatalf("invalid registry package version %q for %s: %s", versionRaw, pkgAddr, err) + } + sourceAddr, err := sourceaddrs.ParseRemoteSource(sourceAddrRaw) + if err != nil { + t.Fatalf("invalid registry package source address %q for %s %s: %s", sourceAddrRaw, pkgAddr, version, err) + } + pkg.versions[version] = sourceAddr + } + registryPkgs = append(registryPkgs, pkg) + } + + fetcher := packageFetcherFunc(func(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) { + // Our fake implementation of "fetching" is to just copy one local + // directory into another. + for _, pkg := range remotePkgs { + if pkg.sourceType != sourceType { + continue + } + if pkg.url.String() != url.String() { + continue + } + localDir := pkg.localDir + err := copyDir(targetDir, localDir) + if err != nil { + return nil, fmt.Errorf("copying %s to %s: %w", localDir, targetDir, err) + } + return nil, nil + } + return nil, fmt.Errorf("no fake remote package matches %s %s", sourceType, url) + }) + + registryClient := registryClientFuncs{ + modulePackageVersions: func(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) { + for _, pkg := range registryPkgs { + if pkg.pkgAddr != pkgAddr { + continue + } + ret := make(versions.List, len(pkg.versions)) + for version := range pkg.versions { + ret = append(ret, version) + } + return ret, nil + } + return nil, fmt.Errorf("no fake registry package matches %s", pkgAddr) + }, + modulePackageSourceAddr: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) { + for _, pkg := range registryPkgs { + if pkg.pkgAddr != pkgAddr { + continue + } + sourceAddr, ok := pkg.versions[version] + if !ok { + return sourceaddrs.RemoteSource{}, fmt.Errorf("no fake registry package matches %s %s", pkgAddr, version) + } + return sourceAddr, nil + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("no fake registry package matches %s", pkgAddr) + }, + } + + builder, err := NewBuilder(targetDir, fetcher, registryClient) + if err != nil { + t.Fatalf("failed to create builder: %s", err) + } + return builder +} + +// testBuildTracer is a BuildTracer that just remembers calls in memory +// as strings, for relatively-easy comparison in tests. +type testBuildTracer struct { + log []string +} + +func (t *testBuildTracer) OnContext(ctx context.Context) context.Context { + trace := BuildTracer{ + RegistryPackageVersionsStart: func(ctx context.Context, pkgAddr regaddr.ModulePackage) context.Context { + t.appendLogf("start requesting versions for %s", pkgAddr) + return ctx + }, + RegistryPackageVersionsSuccess: func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) { + t.appendLogf("success requesting versions for %s", pkgAddr) + }, + RegistryPackageVersionsFailure: func(ctx context.Context, pkgAddr regaddr.ModulePackage, err error) { + t.appendLogf("error requesting versions for %s: %s", pkgAddr, err) + }, + RegistryPackageVersionsAlready: func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) { + t.appendLogf("reusing existing versions for %s", pkgAddr) + }, + + RegistryPackageSourceStart: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) context.Context { + t.appendLogf("start requesting source address for %s %s", pkgAddr, version) + return ctx + }, + RegistryPackageSourceSuccess: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) { + t.appendLogf("source address for %s %s is %s", pkgAddr, version, sourceAddr) + }, + RegistryPackageSourceFailure: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, err error) { + t.appendLogf("error requesting source address for %s %s: %s", pkgAddr, version, err) + }, + RegistryPackageSourceAlready: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) { + t.appendLogf("reusing existing source address for %s %s: %s", pkgAddr, version, sourceAddr) + }, + + RemotePackageDownloadStart: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) context.Context { + t.appendLogf("start downloading %s", pkgAddr) + return ctx + }, + RemotePackageDownloadSuccess: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) { + t.appendLogf("downloaded %s", pkgAddr) + }, + RemotePackageDownloadFailure: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage, err error) { + t.appendLogf("failed to download %s: %s", pkgAddr, err) + }, + RemotePackageDownloadAlready: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) { + t.appendLogf("reusing existing local copy of %s", pkgAddr) + }, + + Diagnostics: func(ctx context.Context, diags Diagnostics) { + for _, diag := range diags { + switch diag.Severity() { + case DiagError: + t.appendLogf("Error: %s", diag.Description().Summary) + case DiagWarning: + t.appendLogf("Warning: %s", diag.Description().Summary) + default: + t.appendLogf("Diagnostic with invalid severity: %s", diag.Description().Summary) + } + } + }, + } + return trace.OnContext(ctx) +} + +func (t *testBuildTracer) appendLogf(f string, v ...interface{}) { + t.log = append(t.log, fmt.Sprintf(f, v...)) +} + +type packageFetcherFunc func(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) + +func (f packageFetcherFunc) FetchSourcePackage(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) { + return f(ctx, sourceType, url, targetDir) +} + +type registryClientFuncs struct { + modulePackageVersions func(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) + modulePackageSourceAddr func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) +} + +func (f registryClientFuncs) ModulePackageVersions(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) { + return f.modulePackageVersions(ctx, pkgAddr) +} + +func (f registryClientFuncs) ModulePackageSourceAddr(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) { + return f.modulePackageSourceAddr(ctx, pkgAddr, version) +} + +type noopDependencyFinder struct{} + +func (f noopDependencyFinder) FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics { + return nil +} + +var noDependencyFinder = noopDependencyFinder{} + +// stubDependencyFinder is a test-only [DependencyFinder] which just reads +// lines of text from a given filename and tries to treat each one as a source +// address, which it then reports as a dependency. +type stubDependencyFinder struct { + filename string + nextFilename string +} + +func (f stubDependencyFinder) FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics { + var diags Diagnostics + filePath := path.Join(subPath, f.filename) + file, err := fsys.Open(filePath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Missing stub dependency file", + detail: fmt.Sprintf("There is no file %q in the package.", filePath), + }) + } else { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid stub dependency file", + detail: fmt.Sprintf("Cannot open %q in the package: %s.", filePath, err), + }) + } + return diags + } + + sc := bufio.NewScanner(file) // defaults to scanning for lines + for sc.Scan() { + line := strings.TrimSpace(sc.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + sourceAddrRaw, versionsRaw, hasVersions := strings.Cut(line, " ") + sourceAddr, err := sourceaddrs.ParseSource(sourceAddrRaw) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid source address in stub dependency file", + detail: fmt.Sprintf("Cannot use %q as a source address: %s.", sourceAddrRaw, err), + }) + continue + } + if hasVersions && !sourceAddr.SupportsVersionConstraints() { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid source address in stub dependency file", + detail: fmt.Sprintf("Cannot specify a version constraint string for %s.", sourceAddr), + }) + continue + } + var allowedVersions versions.Set + if hasVersions { + cnsts, err := constraints.ParseRubyStyleMulti(versionsRaw) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid version constraints in stub dependency file", + detail: fmt.Sprintf("Cannot use %q as version constraints for %s: %s.", versionsRaw, sourceAddrRaw, err), + }) + continue + } + allowedVersions = versions.MeetingConstraints(cnsts) + } else { + allowedVersions = versions.All + } + + depFinder := DependencyFinder(noDependencyFinder) + if f.nextFilename != "" { + // If a next filename is specified then we're chaining to another + // dependency file for all of the discovered dependencies. + depFinder = stubDependencyFinder{filename: f.nextFilename} + } + + switch sourceAddr := sourceAddr.(type) { + case sourceaddrs.RemoteSource: + deps.AddRemoteSource(sourceAddr, depFinder) + case sourceaddrs.RegistrySource: + deps.AddRegistrySource(sourceAddr, allowedVersions, depFinder) + case sourceaddrs.LocalSource: + deps.AddLocalSource(sourceAddr, depFinder) + default: + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Unsupported source address type", + detail: fmt.Sprintf("stubDependencyFinder doesn't support %T addresses", sourceAddr), + }) + continue + } + } + if err := sc.Err(); err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid stub dependency file", + detail: fmt.Sprintf("Failed to read %s in the package: %s.", filePath, err), + }) + return diags + } + + return diags +} + +func copyDir(dst, src string) error { + src, err := filepath.EvalSymlinks(src) + if err != nil { + return err + } + + walkFn := func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if path == src { + return nil + } + + // The "path" has the src prefixed to it. We need to join our + // destination with the path without the src on it. + dstPath := filepath.Join(dst, path[len(src):]) + + // we don't want to try and copy the same file over itself. + if eq, err := sameFile(path, dstPath); eq { + return nil + } else if err != nil { + return err + } + + // If we have a directory, make that subdirectory, then continue + // the walk. + if info.IsDir() { + if path == filepath.Join(src, dst) { + // dst is in src; don't walk it. + return nil + } + + if err := os.MkdirAll(dstPath, 0755); err != nil { + return err + } + + return nil + } + + // If the current path is a symlink, recreate the symlink relative to + // the dst directory + if info.Mode()&os.ModeSymlink == os.ModeSymlink { + target, err := os.Readlink(path) + if err != nil { + return err + } + + return os.Symlink(target, dstPath) + } + + // If we have a file, copy the contents. + srcF, err := os.Open(path) + if err != nil { + return err + } + defer srcF.Close() + + dstF, err := os.Create(dstPath) + if err != nil { + return err + } + defer dstF.Close() + + if _, err := io.Copy(dstF, srcF); err != nil { + return err + } + + // Chmod it + return os.Chmod(dstPath, info.Mode()) + } + + return filepath.Walk(src, walkFn) +} + +func sameFile(a, b string) (bool, error) { + if a == b { + return true, nil + } + + aInfo, err := os.Lstat(a) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + + bInfo, err := os.Lstat(b) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + + return os.SameFile(aInfo, bInfo), nil +} diff --git a/sourcebundle/bundle.go b/sourcebundle/bundle.go new file mode 100644 index 0000000..576ec7a --- /dev/null +++ b/sourcebundle/bundle.go @@ -0,0 +1,391 @@ +package sourcebundle + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "sort" + "strings" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +const manifestFilename = "terraform-sources.json" + +type Bundle struct { + rootDir string + + manifestChecksum string + + remotePackageDirs map[sourceaddrs.RemotePackage]string + remotePackageMeta map[sourceaddrs.RemotePackage]*PackageMeta + + registryPackageSources map[regaddr.ModulePackage]map[versions.Version]sourceaddrs.RemoteSource +} + +// OpenDir opens a bundle rooted at the given base directory. +// +// If OpenDir succeeds then nothing else (inside or outside the calling program) +// may modify anything under the given base directory for the lifetime of +// the returned [Bundle] object. If the bundle directory is modified while the +// object is still alive then behavior is undefined. +func OpenDir(baseDir string) (*Bundle, error) { + // We'll take the absolute form of the directory to be resilient in case + // something else in this program rudely changes the current working + // directory while the bundle is still alive. + rootDir, err := filepath.Abs(baseDir) + if err != nil { + return nil, fmt.Errorf("cannot resolve base directory: %w", err) + } + + ret := &Bundle{ + rootDir: rootDir, + remotePackageDirs: make(map[sourceaddrs.RemotePackage]string), + remotePackageMeta: make(map[sourceaddrs.RemotePackage]*PackageMeta), + registryPackageSources: make(map[regaddr.ModulePackage]map[versions.Version]sourceaddrs.RemoteSource), + } + + manifestSrc, err := os.ReadFile(filepath.Join(rootDir, manifestFilename)) + if err != nil { + return nil, fmt.Errorf("cannot read manifest: %w", err) + } + + hash := sha256.New() + ret.manifestChecksum = hex.EncodeToString(hash.Sum(manifestSrc)) + + var manifest manifestRoot + err = json.Unmarshal(manifestSrc, &manifest) + if err != nil { + return nil, fmt.Errorf("invalid manifest: %w", err) + } + if manifest.FormatVersion != 1 { + return nil, fmt.Errorf("invalid manifest: unsupported format version %d", manifest.FormatVersion) + } + + for _, rpm := range manifest.Packages { + // We'll be quite fussy about the local directory name to avoid a + // crafted manifest sending us to other random places in the filesystem. + // It must be just a single directory name, without any path separators + // or any traversals. + localDir := filepath.ToSlash(rpm.LocalDir) + if !fs.ValidPath(localDir) || localDir == "." || strings.IndexByte(localDir, '/') >= 0 { + return nil, fmt.Errorf("invalid package directory name %q", rpm.LocalDir) + } + + pkgAddr, err := sourceaddrs.ParseRemotePackage(rpm.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid remote package address %q: %w", rpm.SourceAddr, err) + } + ret.remotePackageDirs[pkgAddr] = localDir + + if rpm.Meta.GitCommitID != "" { + ret.remotePackageMeta[pkgAddr] = PackageMetaWithGitCommit(rpm.Meta.GitCommitID) + } + } + + for _, rpm := range manifest.RegistryMeta { + pkgAddr, err := sourceaddrs.ParseRegistryPackage(rpm.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid registry package address %q: %w", rpm.SourceAddr, err) + } + vs := ret.registryPackageSources[pkgAddr] + if vs == nil { + vs = make(map[versions.Version]sourceaddrs.RemoteSource) + ret.registryPackageSources[pkgAddr] = vs + } + for versionStr, mv := range rpm.Versions { + version, err := versions.ParseVersion(versionStr) + if err != nil { + return nil, fmt.Errorf("invalid registry package version %q: %w", versionStr, err) + } + sourceAddr, err := sourceaddrs.ParseRemoteSource(mv.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid registry package source address %q: %w", mv.SourceAddr, err) + } + vs[version] = sourceAddr + } + } + + return ret, nil +} + +// LocalPathForSource takes either a remote or registry final source address +// and returns the local path within the bundle that corresponds with it. +// +// It doesn't make sense to pass a [sourceaddrs.LocalSource] to this function +// because a source bundle cannot contain anything other than remote packages, +// but as a concession to convenience this function will return a +// filepath-shaped relative path in that case, assuming that the source was +// intended to be a local filesystem path relative to the current working +// directory. The result will therefore not necessarily be a subdirectory of +// the recieving bundle in that case. +func (b *Bundle) LocalPathForSource(addr sourceaddrs.FinalSource) (string, error) { + switch addr := addr.(type) { + case sourceaddrs.RemoteSource: + return b.LocalPathForRemoteSource(addr) + case sourceaddrs.RegistrySourceFinal: + return b.LocalPathForRegistrySource(addr.Unversioned(), addr.SelectedVersion()) + case sourceaddrs.LocalSource: + return filepath.FromSlash(addr.RelativePath()), nil + default: + // If we get here then it's probably a bug: the above cases should be + // exhaustive for all sourceaddrs.FinalSource implementations. + return "", fmt.Errorf("cannot produce local path for source address of type %T", addr) + } +} + +// LocalPathForRemoteSource returns the local path within the bundle that +// corresponds with the given source address, or an error if the source address +// is within a source package not included in the bundle. +func (b *Bundle) LocalPathForRemoteSource(addr sourceaddrs.RemoteSource) (string, error) { + pkgAddr := addr.Package() + localName, ok := b.remotePackageDirs[pkgAddr] + if !ok { + return "", fmt.Errorf("source bundle does not include %s", pkgAddr) + } + subPath := filepath.FromSlash(addr.SubPath()) + return filepath.Join(b.rootDir, localName, subPath), nil +} + +// LocalPathForRegistrySource returns the local path within the bundle that +// corresponds with the given registry address and version, or an error if the +// source address is within a source package not included in the bundle. +func (b *Bundle) LocalPathForRegistrySource(addr sourceaddrs.RegistrySource, version versions.Version) (string, error) { + pkgAddr := addr.Package() + vs, ok := b.registryPackageSources[pkgAddr] + if !ok { + return "", fmt.Errorf("source bundle does not include %s", pkgAddr) + } + baseSourceAddr, ok := vs[version] + if !ok { + return "", fmt.Errorf("source bundle does not include %s v%s", pkgAddr, version) + } + + // The address we were given might have its own source address, so we need + // to incorporate that into our result. + finalSourceAddr := addr.FinalSourceAddr(baseSourceAddr) + return b.LocalPathForRemoteSource(finalSourceAddr) +} + +// LocalPathForFinalRegistrySource is a variant of +// [Bundle.LocalPathForRegistrySource] which passes the source address and +// selected version together as a single address value. +func (b *Bundle) LocalPathForFinalRegistrySource(addr sourceaddrs.RegistrySourceFinal) (string, error) { + return b.LocalPathForRegistrySource(addr.Unversioned(), addr.SelectedVersion()) +} + +// SourceForLocalPath is the inverse of [Bundle.LocalPathForSource], +// translating a local path beneath the bundle's base directory back into +// a source address that it's a snapshot of. +// +// Returns an error if the given directory is not within the bundle's base +// directory, or is not within one of the subdirectories of the bundle +// that represents a source package. A caller using this to present more +// user-friendly file paths in error messages etc could reasonably choose +// to just retain the source string if this function returns an error, and +// not show the error to the user. +// +// The [Bundle] implementation is optimized for forward lookups from source +// address to local path rather than the other way around, so this function +// may be considerably more expensive than the forward lookup and is intended +// primarily for reporting friendly source locations in diagnostic messages +// instead of exposing the opaque internal directory names from the source +// bundle. This function should not typically be used in performance-sensitive +// portions of the happy path. +func (b *Bundle) SourceForLocalPath(p string) (sourceaddrs.FinalSource, error) { + // This implementation is a best effort sort of thing, and might not + // always succeed in awkward cases. + + // We'll start by making our path absolute because that'll make it + // more comparable with b.rootDir, which is also absolute. + absPath, err := filepath.Abs(p) + if err != nil { + return nil, fmt.Errorf("can't determine absolute path for %q: %w", p, err) + } + + // Now we'll reinterpret the path as relative to our base directory, + // so we can see what local directory name it starts with. + relPath, err := filepath.Rel(b.rootDir, absPath) + if err != nil { + // If the path can't be made relative then that suggests it's on a + // different volume, such as a different drive letter on Windows. + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + // We'll do all of our remaining work in the abstract "forward-slash-path" + // mode, matching how we represent "sub-paths" for our source addresses. + subPath := path.Clean(filepath.ToSlash(relPath)) + if !fs.ValidPath(subPath) || subPath == "." { + // If the path isn't "valid" by now then that suggests it's a + // path outside of our source bundle which would appear as a + // path with a ".." segment on the front, or to the root of + // our source bundle which would appear as "." and isn't part + // of any particular package. + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + // If all of the above passed then we should now have one or more + // slash-separated path segments. The first one should be one of the + // local directories we know from our manifest, and then the rest is + // the sub-path in the associated package. + localDir, subPath, _ := strings.Cut(subPath, "/") + + // There can be potentially several packages all referring to the same + // directory, so to make the result deterministic we'll just take the + // one whose stringified source address is shortest. + var pkgAddr sourceaddrs.RemotePackage + found := false + for candidateAddr, candidateDir := range b.remotePackageDirs { + if candidateDir != localDir { + continue + } + if found { + // We've found multiple possible source addresses, so we + // need to decide which one to keep. + if len(candidateAddr.String()) > len(pkgAddr.String()) { + continue + } + } + pkgAddr = candidateAddr + found = true + } + + if !found { + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + return pkgAddr.SourceAddr(subPath), nil +} + +// ChecksumV1 returns a checksum of the contents of the source bundle that +// can be used to determine if another source bundle is equivalent to this one. +// +// "Equivalent" means that it contains all of the same source packages with +// identical content each. +// +// A successful result is a string with the prefix "h1:" to indicate that +// it was built with checksum algorithm version 1. Future versions may +// introduce other checksum formats. +func (b *Bundle) ChecksumV1() (string, error) { + // Our first checksum format assumes that the checksum of the manifest + // is sufficient to cover the entire archive, which in turn assumes that + // the builder either directly or indirectly encodes the checksum of + // each package into the manifest. For the initial implementation of + // Builder we achieve that by using the checksum as the directory name + // for each package, which avoids the need to redundantly store the + // checksum again. If a future Builder implementation moves away from + // using checksums as directory names then the builder will need to + // introduce explicit checksums as a separate property into the manifest + // in order to preserve our assumptions here. + return "h1:" + b.manifestChecksum, nil +} + +// RemotePackages returns a slice of all of the remote source packages that +// contributed to this source bundle. +// +// The result is sorted into a consistent but unspecified order. +func (b *Bundle) RemotePackages() []sourceaddrs.RemotePackage { + ret := make([]sourceaddrs.RemotePackage, 0, len(b.remotePackageDirs)) + for pkgAddr := range b.remotePackageDirs { + ret = append(ret, pkgAddr) + } + sort.Slice(ret, func(i, j int) bool { + return ret[i].String() < ret[j].String() + }) + return ret +} + +// RemotePackageMeta returns the package metadata for the given package address, +// or nil if there is no metadata for that package tracked in the bundle. +func (b *Bundle) RemotePackageMeta(pkgAddr sourceaddrs.RemotePackage) *PackageMeta { + return b.remotePackageMeta[pkgAddr] +} + +// RegistryPackages returns a list of all of the distinct registry packages +// that contributed to this bundle. +// +// The result is in a consistent but unspecified sorted order. +func (b *Bundle) RegistryPackages() []regaddr.ModulePackage { + ret := make([]regaddr.ModulePackage, 0, len(b.remotePackageDirs)) + for pkgAddr := range b.registryPackageSources { + ret = append(ret, pkgAddr) + } + sort.Slice(ret, func(i, j int) bool { + return ret[i].String() < ret[j].String() + }) + return ret +} + +// RegistryPackageVersions returns a list of all of the versions of the given +// module registry package that this bundle has package content for. +// +// This result can be used as a substitute for asking the remote registry which +// versions are available in any situation where a caller is interested only +// in what's bundled, and will not consider installing anything new from +// the origin registry. +// +// The result is guaranteed to be sorted with lower-precedence version numbers +// placed earlier in the list. +func (b *Bundle) RegistryPackageVersions(pkgAddr regaddr.ModulePackage) versions.List { + vs := b.registryPackageSources[pkgAddr] + if len(vs) == 0 { + return nil + } + ret := make(versions.List, 0, len(vs)) + for v := range vs { + ret = append(ret, v) + } + ret.Sort() + return ret +} + +// RegistryPackageSourceAddr returns the remote source address corresponding +// to the given version of the given module package, or sets its second return +// value to false if no such version is included in the bundle. +func (b *Bundle) RegistryPackageSourceAddr(pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, bool) { + sourceAddr, ok := b.registryPackageSources[pkgAddr][version] + return sourceAddr, ok +} + +// WriteArchive writes a source bundle archive containing the same contents +// as the bundle to the given writer. +// +// A source bundle archive is a gzip-compressed tar stream that can then +// be extracted in some other location to produce an equivalent source +// bundle directory. +func (b *Bundle) WriteArchive(w io.Writer) error { + // For this part we just delegate to the main slug packer, since a + // source bundle archive is effectively just a slug with multiple packages + // (and a manifest) inside it. + packer, err := slug.NewPacker(slug.DereferenceSymlinks()) + if err != nil { + return fmt.Errorf("can't instantiate archive packer: %w", err) + } + _, err = packer.Pack(b.rootDir, w) + return err +} + +// ExtractArchive reads a source bundle archive from the given reader and +// extracts it into the given target directory, which must already exist and +// must be empty. +// +// If successful, it returns a [Bundle] value representing the created bundle, +// as if the given target directory were passed to [OpenDir]. +func ExtractArchive(r io.Reader, targetDir string) (*Bundle, error) { + // A bundle archive is just a slug archive created over a bundle + // directory, so we can use the normal unpack function to deal with it. + err := slug.Unpack(r, targetDir) + if err != nil { + return nil, err + } + return OpenDir(targetDir) +} diff --git a/sourcebundle/dependency_finder.go b/sourcebundle/dependency_finder.go new file mode 100644 index 0000000..4acdbab --- /dev/null +++ b/sourcebundle/dependency_finder.go @@ -0,0 +1,92 @@ +package sourcebundle + +import ( + "io/fs" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" +) + +// A DependencyFinder analyzes a file or directory inside a source package +// and reports any dependencies described in that location. +// +// The same location could potentially be analyzed by multiple different +// DependencyFinder implementations if e.g. it's a directory containing +// a mixture of different kinds of artifact where each artifact has a +// disjoint set of relevant files. +// +// All DependencyFinder implementations must be comparable in the sense of +// supporting the == operator without panicking, and should typically be +// singletons, because [Builder] will use values of this type as part of +// the unique key for tracking whether a particular dependency has already +// been analyzed. A typical DependencyFinder implementation is an empty +// struct type with the FindDependency method implemented on it. +type DependencyFinder interface { + // FindDependencies should analyze the file or directory at the given + // sub-path of the given filesystem and then call the given callback + // once for each detected dependency, providing both its source + // address and the appropriate [DependencyFinder] for whatever kind + // of source artifact is expected at that source address. + // + // The same source address can potentially contain artifacts of multiple + // different types. The calling [Builder] will visit each distinct + // (source, finder) pair only once for analysis, and will also aim to + // avoid redundantly re-fetching the same source package more than once. + // + // If an implementer sends a local source address to the callback function, + // the calling [Builder] will automatically resolve that relative to + // the source address being analyzed. Implementers should typically first + // validate that the local address does not traverse up (with "..") more + // levels than are included in subPath, because implementers can return + // higher-quality error diagnostics (with source location information) + // than the calling Builder can. + // + // If the implementer emits diagnostics with source location information + // then the filenames in the source ranges must be strings that would + // pass [fs.ValidPath] describing a path from the root of the given fs + // to the file containing the error. The builder will then translate those + // paths into remote source address strings within the containing package. + FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics +} + +// Dependencies is part of the callback API for [DependencyFinder]. Dependency +// finders use the methods of this type to report the dependencies they find +// in the source artifact being analyzed. +type Dependencies struct { + baseAddr sourceaddrs.RemoteSource + + remoteCb func(source sourceaddrs.RemoteSource, depFinder DependencyFinder) + registryCb func(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) + localResolveErrCb func(err error) +} + +func (d *Dependencies) AddRemoteSource(source sourceaddrs.RemoteSource, depFinder DependencyFinder) { + d.remoteCb(source, depFinder) +} + +func (d *Dependencies) AddRegistrySource(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) { + d.registryCb(source, allowedVersions, depFinder) +} + +func (d *Dependencies) AddLocalSource(source sourceaddrs.LocalSource, depFinder DependencyFinder) { + // A local source always becomes a remote source in the same package as + // the current base address. + realSource, err := sourceaddrs.ResolveRelativeSource(d.baseAddr, source) + if err != nil { + d.localResolveErrCb(err) + return + } + // realSource is guaranteed to be a RemoteSource because source is + // a LocalSource and so the ResolveRelativeSource address is guaranteed + // to have the same source type as d.baseAddr. + d.remoteCb(realSource.(sourceaddrs.RemoteSource), depFinder) +} + +// disable ensures that a [DependencyFinder] implementation can't incorrectly +// hold on to its given Dependencies object and continue calling it after it +// returns. +func (d *Dependencies) disable() { + d.remoteCb = nil + d.registryCb = nil + d.localResolveErrCb = nil +} diff --git a/sourcebundle/diagnostics.go b/sourcebundle/diagnostics.go new file mode 100644 index 0000000..df2b932 --- /dev/null +++ b/sourcebundle/diagnostics.go @@ -0,0 +1,181 @@ +package sourcebundle + +import ( + "fmt" + + "github.com/hashicorp/go-slug/sourceaddrs" +) + +// Diagnostics is a collection of problems (errors and warnings) that occurred +// during an operation. +type Diagnostics []Diagnostic + +// Diagnostics represents a single problem (error or warning) that has occurred +// during an operation. +// +// This interface has no concrete implementations in this package. +// Implementors of [DependencyFinder] will need to implement this interface +// to report any problems they find while analyzing the designated source +// artifact. For example, a [DependencyFinder] that uses the HCL library +// to analyze an HCL-based language would probably implement this interface +// in terms of HCL's Diagnostic type. +type Diagnostic interface { + Severity() DiagSeverity + Description() DiagDescription + Source() DiagSource + + // ExtraInfo returns the raw extra information value. This is a low-level + // API which requires some work on the part of the caller to properly + // access associated information. This convention comes from HCL and + // Terraform and this is here primarily for their benefit; sourcebundle + // passes through these values verbatim without trying to interpret them. + ExtraInfo() interface{} +} + +func (diags Diagnostics) HasErrors() bool { + for _, diag := range diags { + if diag.Severity() == DiagError { + return true + } + } + return false +} + +func (diags Diagnostics) Append(more ...interface{}) Diagnostics { + for _, item := range more { + if item == nil { + continue + } + + switch item := item.(type) { + case Diagnostic: + diags = append(diags, item) + case Diagnostics: + diags = append(diags, item...) + default: + panic(fmt.Errorf("can't construct diagnostic(s) from %T", item)) + } + } + return diags +} + +type DiagSeverity rune + +const ( + DiagError DiagSeverity = 'E' + DiagWarning DiagSeverity = 'W' +) + +type DiagDescription struct { + Summary string + Detail string +} + +type DiagSource struct { + Subject *SourceRange + Context *SourceRange +} + +type SourceRange struct { + // Filename is a human-oriented label for the file that the range belongs + // to. This is often the string representation of a source address, but + // isn't guaranteed to be. + Filename string + Start, End SourcePos +} + +type SourcePos struct { + Line, Column, Byte int +} + +// diagnosticInSourcePackage is a thin wrapper around diagnostic that +// reinterprets the filenames in any source ranges to be relative to a +// particular remote source package, so it's unambiguous which remote +// source package the diagnostic originated in. +type diagnosticInSourcePackage struct { + wrapped Diagnostic + pkg sourceaddrs.RemotePackage +} + +// inRemoteSourcePackage modifies the reciever in-place so that all of the +// diagnostics will have their source filenames (if any) interpreted as +// sub-paths within the given source package. +// +// For convenience, returns the same diags slice whose backing array has now +// been modified with different diagnostics. +func (diags Diagnostics) inRemoteSourcePackage(pkg sourceaddrs.RemotePackage) Diagnostics { + for i, diag := range diags { + diags[i] = diagnosticInSourcePackage{ + wrapped: diag, + pkg: pkg, + } + } + return diags +} + +var _ Diagnostic = diagnosticInSourcePackage{} + +func (diag diagnosticInSourcePackage) Description() DiagDescription { + return diag.wrapped.Description() +} + +func (diag diagnosticInSourcePackage) ExtraInfo() interface{} { + return diag.wrapped.ExtraInfo() +} + +func (diag diagnosticInSourcePackage) Severity() DiagSeverity { + return diag.wrapped.Severity() +} + +func (diag diagnosticInSourcePackage) Source() DiagSource { + ret := diag.Source() + if ret.Subject != nil && sourceaddrs.ValidSubPath(ret.Subject.Filename) { + newRng := *ret.Subject // shallow copy + newRng.Filename = diag.pkg.SourceAddr(newRng.Filename).String() + ret.Subject = &newRng + } + if ret.Context != nil && sourceaddrs.ValidSubPath(ret.Context.Filename) { + newRng := *ret.Context // shallow copy + newRng.Filename = diag.pkg.SourceAddr(newRng.Filename).String() + ret.Context = &newRng + } + return ret +} + +// internalDiagnostic is a diagnostic type used to report this package's own +// errors as diagnostics. +// +// This package doesn't ever work directly with individual source file contents, +// so an internal diagnostic never has source location information. +type internalDiagnostic struct { + severity DiagSeverity + summary string + detail string +} + +var _ Diagnostic = (*internalDiagnostic)(nil) + +// Description implements Diagnostic +func (d *internalDiagnostic) Description() DiagDescription { + return DiagDescription{ + Summary: d.summary, + Detail: d.detail, + } +} + +// ExtraInfo implements Diagnostic +func (d *internalDiagnostic) ExtraInfo() interface{} { + return nil +} + +// Severity implements Diagnostic +func (d *internalDiagnostic) Severity() DiagSeverity { + return d.severity +} + +// Source implements Diagnostic +func (d *internalDiagnostic) Source() DiagSource { + return DiagSource{ + // Never any source location information for internal diagnostics. + } +} diff --git a/sourcebundle/doc.go b/sourcebundle/doc.go new file mode 100644 index 0000000..47ad1b4 --- /dev/null +++ b/sourcebundle/doc.go @@ -0,0 +1,17 @@ +// Package sourcebundle deals with the construction of and later consumption of +// "source bundles", which are in some sense "meta-slugs" that capture a +// variety of different source packages together into a single working +// directory, which can optionally be bundled up into an archive for insertion +// into a blob storage system. +// +// Whereas single slugs (as implemented in the parent package) have very little +// predefined structure aside from the possibility of a .terraformignore file, +// source bundles have a more prescriptive structure that allows callers to +// use a source bundle as a direct substitute for fetching the individual +// source packages it was built from. +// +// NOTE WELL: Everything in this package is currently experimental and subject +// to breaking changes even in patch releases. We will make stronger commitments +// to backward-compatibility once we have more experience using this +// functionality in real contexts. +package sourcebundle diff --git a/sourcebundle/manifest_json.go b/sourcebundle/manifest_json.go new file mode 100644 index 0000000..c2257fb --- /dev/null +++ b/sourcebundle/manifest_json.go @@ -0,0 +1,48 @@ +package sourcebundle + +// This file contains some internal-only types used to help with marshalling +// and unmarshalling our manifest file format. The manifest format is not +// itself a public interface, so these should stay unexported and any caller +// that needs to interact with previously-generated source bundle manifests +// should do so via the Bundle type. + +type manifestRoot struct { + // FormatVersion should always be 1 for now, because there is only + // one version of this format. + FormatVersion uint64 `json:"terraform_source_bundle"` + + Packages []manifestRemotePackage `json:"packages,omitempty"` + RegistryMeta []manifestRegistryMeta `json:"registry,omitempty"` +} + +type manifestRemotePackage struct { + // SourceAddr is the address of an entire remote package, meaning that + // it must not have a sub-path portion. + SourceAddr string `json:"source"` + + // LocalDir is the name of the subdirectory of the bundle containing the + // source code for this package. + LocalDir string `json:"local"` + + Meta manifestPackageMeta `json:"meta,omitempty"` +} + +type manifestRegistryMeta struct { + // SourceAddr is the address of an entire registry package, meaning that + // it must not have a sub-path portion. + SourceAddr string `json:"source"` + + // Versions is a map from string representations of [versions.Version]. + Versions map[string]manifestRegistryVersion `json:"versions,omitempty"` +} + +type manifestRegistryVersion struct { + // This SourceAddr is a full source address, so it might potentially + // have a sub-path portion. If it does then it must be combined with + // any sub-path included in the user's registry module source address. + SourceAddr string `json:"source"` +} + +type manifestPackageMeta struct { + GitCommitID string `json:"git_commit_id,omitempty"` +} diff --git a/sourcebundle/package_fetcher.go b/sourcebundle/package_fetcher.go new file mode 100644 index 0000000..229ac40 --- /dev/null +++ b/sourcebundle/package_fetcher.go @@ -0,0 +1,36 @@ +package sourcebundle + +import ( + "context" + "net/url" +) + +// A PackageFetcher knows how to fetch remote source packages into a local +// filesystem directory. +// +type PackageFetcher interface { + // FetchSourcePackage retrieves the a source package from the given + // location and extracts it into the given local filesystem directory. + // + // A package fetcher is responsible for ensuring that nothing gets written + // outside of the given target directory. However, a fetcher can assume that + // nothing should be modifying or moving targetDir and or any of its contents + // concurrently with the fetcher running. + // + // If the function returns with a nil error then the target directory must be + // a complete copy of the designated remote package, ready for further analysis. + // + // Package fetchers should respond to cancellation of the given + // [context.Context] to a reasonable extent, so that the source bundle build + // process can be interrupted relatively promptly. Return a non-nil error when + // cancelled to allow the caller to detect that the target directory might not + // be in a consistent state. + // + // PackageFetchers should not have any persistent mutable state: each call + // should be independent of all past, concurrent, and future calls. In + // particular, a fetcher should not attempt to implement any caching behavior, + // because it's [Builder]'s responsibility to handle caching and request + // coalescing during bundle construction to ensure that it will happen + // consistently across different fetcher implementations. + FetchSourcePackage(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) +} diff --git a/sourcebundle/package_meta.go b/sourcebundle/package_meta.go new file mode 100644 index 0000000..a195d0d --- /dev/null +++ b/sourcebundle/package_meta.go @@ -0,0 +1,37 @@ +package sourcebundle + +// PackageMeta is a collection of metadata about how the content of a +// particular remote package was derived. +// +// A nil value of this type represents no metadata. A non-nil value will +// typically omit some or all of the fields if they are not relevant. +type PackageMeta struct { + // NOTE: Everything in here is unexported for now because it's not clear + // how this is going to evolve in future and whether it's a good idea + // to just have a separate field for each piece of metadata. This will + // give some freedom to switch to other storage strategies in future if + // this struct ends up getting too big and is only sparsely used by most + // fetchers. + + gitCommitID string +} + +// PackageMetaWithGitCommit returns a [PackageMeta] object with a Git Commit +// ID tracked. The given commit ID must be a fully-qualified ID, and never an +// abbreviated commit ID, the name of a ref, or anything other proxy-for-commit +// identifier. +func PackageMetaWithGitCommit(commitID string) *PackageMeta { + return &PackageMeta{ + gitCommitID: commitID, + } +} + +// If the content of this package was derived from a particular commit +// from a Git repository, GitCommitID returns the fully-qualified ID of +// that commit. This is never an abbreviated commit ID, the name of a ref, +// or anything else that could serve as a proxy for a commit ID. +// +// If there is no relevant commit ID for this package, returns an empty string. +func (m *PackageMeta) GitCommitID() string { + return m.gitCommitID +} diff --git a/sourcebundle/registry_client.go b/sourcebundle/registry_client.go new file mode 100644 index 0000000..e811334 --- /dev/null +++ b/sourcebundle/registry_client.go @@ -0,0 +1,28 @@ +package sourcebundle + +import ( + "context" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistryClient provides a minimal client for the Terraform module registry +// protocol, sufficient to find the available versions for a particular +// registry entry and then to find the real remote package for a particular +// version. +// +// An implementation should not itself attempt to cache the direct results of +// the client methods, but it can (and probably should) cache prerequisite +// information such as the results of performing service discovery against +// the hostname in a module package address. +type RegistryClient interface { + // ModulePackageVersions returns all of the known exact versions + // available for the given package in its module registry. + ModulePackageVersions(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) + + // ModulePackageSourceAddr returns the real remote source address for the + // given version of the given module registry package. + ModulePackageSourceAddr(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) +} diff --git a/sourcebundle/testdata/pkgs/hello/hello b/sourcebundle/testdata/pkgs/hello/hello new file mode 100644 index 0000000..af5626b --- /dev/null +++ b/sourcebundle/testdata/pkgs/hello/hello @@ -0,0 +1 @@ +Hello, world! diff --git a/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep b/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep new file mode 100644 index 0000000..461ea4a --- /dev/null +++ b/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep @@ -0,0 +1 @@ +BEEP! diff --git a/sourcebundle/testdata/pkgs/terraformignore/.terraformignore b/sourcebundle/testdata/pkgs/terraformignore/.terraformignore new file mode 100644 index 0000000..bbde3dc --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/.terraformignore @@ -0,0 +1 @@ +excluded diff --git a/sourcebundle/testdata/pkgs/terraformignore/excluded b/sourcebundle/testdata/pkgs/terraformignore/excluded new file mode 100644 index 0000000..9a10460 --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/excluded @@ -0,0 +1 @@ +This file is ignored. diff --git a/sourcebundle/testdata/pkgs/terraformignore/included b/sourcebundle/testdata/pkgs/terraformignore/included new file mode 100644 index 0000000..f2834fd --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/included @@ -0,0 +1 @@ +This file is included. diff --git a/sourcebundle/testdata/pkgs/with-remote-deps/dependencies b/sourcebundle/testdata/pkgs/with-remote-deps/dependencies new file mode 100644 index 0000000..557aba6 --- /dev/null +++ b/sourcebundle/testdata/pkgs/with-remote-deps/dependencies @@ -0,0 +1,2 @@ +https://example.com/dependency1.tgz +https://example.com/dependency2.tgz diff --git a/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency b/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency new file mode 100644 index 0000000..2a71619 --- /dev/null +++ b/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency @@ -0,0 +1 @@ +https://example.com/self_dependency.tgz \ No newline at end of file diff --git a/sourcebundle/trace.go b/sourcebundle/trace.go new file mode 100644 index 0000000..748e301 --- /dev/null +++ b/sourcebundle/trace.go @@ -0,0 +1,94 @@ +package sourcebundle + +import ( + "context" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// BuildTracer contains a set of callbacks that a caller can optionally provide +// to [Builder] methods via their [context.Context] arguments to be notified +// when various long-running events are starting and stopping, to allow both +// for debugging and for UI feedback about progress. +// +// Any or all of the callbacks may be left as nil, in which case no event +// will be delivered for the corresponding event. +// +// The [context.Context] passed to each trace function is guaranteed to be a +// child of the one passed to whatever [Builder] method caused the event +// to occur, and so it can carry cross-cutting information such as distributed +// tracing clients. +// +// The "Start"-suffixed methods all allow returning a new context which will +// then be passed to the corresponding "Success"-suffixed or "Failure"-suffixed +// function, and also used for outgoing requests within the scope of that +// operation. This allows carrying values such as tracing spans between the +// start and end, so they can properly bracket the operation in question. If +// your tracer doesn't need this then just return the given context. +type BuildTracer struct { + // The RegistryPackageVersions... callbacks frame any requests to + // fetch the list of available versions for a module registry package. + RegistryPackageVersionsStart func(ctx context.Context, pkgAddr regaddr.ModulePackage) context.Context + RegistryPackageVersionsSuccess func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) + RegistryPackageVersionsFailure func(ctx context.Context, pkgAddr regaddr.ModulePackage, err error) + RegistryPackageVersionsAlready func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) + + // The RegistryPackageSource... callbacks frame any requests to fetch + // the real underlying source address for a selected registry package + // version. + RegistryPackageSourceStart func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) context.Context + RegistryPackageSourceSuccess func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) + RegistryPackageSourceFailure func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, err error) + RegistryPackageSourceAlready func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) + + // The RemotePackageDownload... callbacks frame any requests to download + // remote source packages. + RemotePackageDownloadStart func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) context.Context + RemotePackageDownloadSuccess func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) + RemotePackageDownloadFailure func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage, err error) + RemotePackageDownloadAlready func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) + + // Diagnostics will be called for any diagnostics that describe problems + // that aren't also reported by calling one of the "Failure" callbacks + // above. A recipient that is going to report the errors itself using + // the Failure callbacks anyway should consume diagnostics from this + // event, rather than from the return values of the [Builder] methods, + // to avoid redundantly reporting the same errors twice. + // + // Diagnostics might be called multiple times during an operation. Callers + // should consider each new call to represent additional diagnostics, + // not replacing any previously returned. + Diagnostics func(ctx context.Context, diags Diagnostics) +} + +// OnContext takes a context and returns a derived context which has everything +// the given context already had plus also the receiving BuildTrace object, +// so that passing the resulting context to methods of [Builder] will cause +// the trace object's callbacks to be called. +// +// Each context can have only one tracer, so if the given context already has +// a tracer then it will be overridden by the new one. +func (bt *BuildTracer) OnContext(ctx context.Context) context.Context { + return context.WithValue(ctx, buildTraceKey, bt) +} + +func buildTraceFromContext(ctx context.Context) *BuildTracer { + ret, ok := ctx.Value(buildTraceKey).(*BuildTracer) + if !ok { + // We'll always return a non-nil pointer just because that reduces + // the amount of boilerplate required in the caller when announcing + // events. + ret = &noopBuildTrace + } + return ret +} + +type buildTraceKeyType int + +const buildTraceKey buildTraceKeyType = 0 + +// noopBuildTrace is an all-nil [BuildTracer] we return a pointer to if we're +// asked for a BuildTrace from a context that doesn't have one. +var noopBuildTrace BuildTracer diff --git a/terraformignore.go b/terraformignore.go index 6803313..0863167 100644 --- a/terraformignore.go +++ b/terraformignore.go @@ -1,17 +1,14 @@ package slug import ( - "bufio" "fmt" - "io" "os" "path/filepath" - "regexp" - "strings" - "text/scanner" + + "github.com/hashicorp/go-slug/internal/ignorefiles" ) -func parseIgnoreFile(rootPath string) []rule { +func parseIgnoreFile(rootPath string) *ignorefiles.Ruleset { // Look for .terraformignore at our root path/src file, err := os.Open(filepath.Join(rootPath, ".terraformignore")) defer file.Close() @@ -22,209 +19,22 @@ func parseIgnoreFile(rootPath string) []rule { if !os.IsNotExist(err) { fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) } - return defaultExclusions - } - return readRules(file) -} - -func readRules(input io.Reader) []rule { - rules := defaultExclusions - scanner := bufio.NewScanner(input) - scanner.Split(bufio.ScanLines) - - for scanner.Scan() { - pattern := scanner.Text() - // Ignore blank lines - if len(pattern) == 0 { - continue - } - // Trim spaces - pattern = strings.TrimSpace(pattern) - // Ignore comments - if pattern[0] == '#' { - continue - } - // New rule structure - rule := rule{} - // Exclusions - if pattern[0] == '!' { - rule.excluded = true - pattern = pattern[1:] - } - // If it is a directory, add ** so we catch descendants - if pattern[len(pattern)-1] == os.PathSeparator { - pattern = pattern + "**" - } - // If it starts with /, it is absolute - if pattern[0] == os.PathSeparator { - pattern = pattern[1:] - } else { - // Otherwise prepend **/ - pattern = "**" + string(os.PathSeparator) + pattern - } - rule.val = pattern - rule.dirs = strings.Split(pattern, string(os.PathSeparator)) - rules = append(rules, rule) + return ignorefiles.DefaultRuleset } - if err := scanner.Err(); err != nil { - fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) - return defaultExclusions - } - return rules -} - -func matchIgnoreRule(path string, rules []rule) bool { - matched := false - path = filepath.FromSlash(path) - for _, rule := range rules { - match, _ := rule.match(path) - - if match { - matched = !rule.excluded - } - } - - if matched { - debug(true, path, "Skipping excluded path:", path) - } - - return matched -} - -type rule struct { - val string // the value of the rule itself - excluded bool // ! is present, an exclusion rule - dirs []string // directories of the rule - regex *regexp.Regexp // regular expression to match for the rule -} - -func (r *rule) match(path string) (bool, error) { - if r.regex == nil { - if err := r.compile(); err != nil { - return false, filepath.ErrBadPattern - } - } - - b := r.regex.MatchString(path) - debug(false, path, path, r.regex, b) - return b, nil -} - -func (r *rule) compile() error { - regStr := "^" - pattern := r.val - // Go through the pattern and convert it to a regexp. - // Use a scanner to support utf-8 chars. - var scan scanner.Scanner - scan.Init(strings.NewReader(pattern)) - - sl := string(os.PathSeparator) - escSL := sl - if sl == `\` { - escSL += `\` - } - - for scan.Peek() != scanner.EOF { - ch := scan.Next() - if ch == '*' { - if scan.Peek() == '*' { - // is some flavor of "**" - scan.Next() - - // Treat **/ as ** so eat the "/" - if string(scan.Peek()) == sl { - scan.Next() - } - - if scan.Peek() == scanner.EOF { - // is "**EOF" - to align with .gitignore just accept all - regStr += ".*" - } else { - // is "**" - // Note that this allows for any # of /'s (even 0) because - // the .* will eat everything, even /'s - regStr += "(.*" + escSL + ")?" - } - } else { - // is "*" so map it to anything but "/" - regStr += "[^" + escSL + "]*" - } - } else if ch == '?' { - // "?" is any char except "/" - regStr += "[^" + escSL + "]" - } else if ch == '.' || ch == '$' { - // Escape some regexp special chars that have no meaning - // in golang's filepath.Match - regStr += `\` + string(ch) - } else if ch == '\\' { - // escape next char. Note that a trailing \ in the pattern - // will be left alone (but need to escape it) - if sl == `\` { - // On windows map "\" to "\\", meaning an escaped backslash, - // and then just continue because filepath.Match on - // Windows doesn't allow escaping at all - regStr += escSL - continue - } - if scan.Peek() != scanner.EOF { - regStr += `\` + string(scan.Next()) - } else { - regStr += `\` - } - } else { - regStr += string(ch) - } - } - - regStr += "$" - re, err := regexp.Compile(regStr) + ret, err := ignorefiles.ParseIgnoreFileContent(file) if err != nil { - return err + fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) + return ignorefiles.DefaultRuleset } - r.regex = re - return nil + return ret } -/* - Default rules as they would appear in .terraformignore: - .git/ - .terraform/ - !.terraform/modules/ - terraform.tfstate -*/ - -var defaultExclusions = []rule{ - { - val: filepath.Join("**", ".git", "**"), - excluded: false, - }, - { - val: filepath.Join("**", ".terraform", "**"), - excluded: false, - }, - { - val: filepath.Join("**", ".terraform", "modules", "**"), - excluded: true, - }, - { - val: filepath.Join("**", "terraform.tfstate"), - excluded: false, - }, -} - -func debug(printAll bool, path string, message ...interface{}) { - logLevel := os.Getenv("TF_IGNORE") == "trace" - debugPath := os.Getenv("TF_IGNORE_DEBUG") - isPath := debugPath != "" - if isPath { - isPath = strings.Contains(path, debugPath) - } - - if logLevel { - if printAll || isPath { - fmt.Println(message...) - } - } +func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) bool { + // Ruleset.Excludes explicitly allows ignoring its error, in which + // case we are ignoring any individual invalid rules in the set + // but still taking all of the others into account. + ret, _ := ruleset.Excludes(path) + return ret }