From ccbbeeefcd661b65b9b2141ca5254c38bae49fe6 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Fri, 28 Apr 2023 14:06:17 -0700 Subject: [PATCH 1/7] ignorefiles: .terraformignore handling in a separate package The handling of .terraformignore files was previously just embedded directly in the root package of this repository as a bunch of unexported symbols. In future commits we'll introduce another package to this codebase that will also need to deal with .terraformignore files, but in a slightly different context. In preparation, here we factor out that handling into a separate internal package and adapt the root package to call into that package instead of using its own inline implementation. This carefully preserves the existing caller's ability to ignore various errors, while now allowing new callers to handle errors differently. --- internal/ignorefiles/ignorerules.go | 100 ++++++++ internal/ignorefiles/terraformignore.go | 186 +++++++++++++++ .../ignorefiles/terraformignore_test.go | 66 +++--- .../testdata/archive-dir/.terraform/file.txt | 0 .../archive-dir/.terraform/modules/README | 2 + .../archive-dir/.terraform/plugins/README | 2 + .../testdata/archive-dir/.terraformignore | 20 ++ .../testdata/archive-dir/.terraformrc | 0 .../ignorefiles/testdata/archive-dir/bar.txt | 1 + .../ignorefiles/testdata/archive-dir/baz.txt | 1 + internal/ignorefiles/testdata/archive-dir/exe | 0 .../archive-dir/foo.terraform/bar.txt | 0 .../ignorefiles/testdata/archive-dir/foo.txt | 1 + .../testdata/archive-dir/sub/bar.txt | 1 + .../testdata/archive-dir/sub/zip.txt | 1 + .../ignorefiles/testdata/external-dir/foo.txt | 1 + slug.go | 10 +- slug_test.go | 1 + terraformignore.go | 218 ++---------------- 19 files changed, 375 insertions(+), 236 deletions(-) create mode 100644 internal/ignorefiles/ignorerules.go create mode 100644 internal/ignorefiles/terraformignore.go rename terraformignore_test.go => internal/ignorefiles/terraformignore_test.go (72%) create mode 100644 internal/ignorefiles/testdata/archive-dir/.terraform/file.txt create mode 100644 internal/ignorefiles/testdata/archive-dir/.terraform/modules/README create mode 100644 internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README create mode 100644 internal/ignorefiles/testdata/archive-dir/.terraformignore create mode 100644 internal/ignorefiles/testdata/archive-dir/.terraformrc create mode 100644 internal/ignorefiles/testdata/archive-dir/bar.txt create mode 100644 internal/ignorefiles/testdata/archive-dir/baz.txt create mode 100755 internal/ignorefiles/testdata/archive-dir/exe create mode 100644 internal/ignorefiles/testdata/archive-dir/foo.terraform/bar.txt create mode 120000 internal/ignorefiles/testdata/archive-dir/foo.txt create mode 120000 internal/ignorefiles/testdata/archive-dir/sub/bar.txt create mode 100644 internal/ignorefiles/testdata/archive-dir/sub/zip.txt create mode 100644 internal/ignorefiles/testdata/external-dir/foo.txt diff --git a/internal/ignorefiles/ignorerules.go b/internal/ignorefiles/ignorerules.go new file mode 100644 index 0000000..25c2677 --- /dev/null +++ b/internal/ignorefiles/ignorerules.go @@ -0,0 +1,100 @@ +// Package ignorefiles deals with the ".terraformignore" file format, which +// is a convention similar to ".gitignore" that specifies path patterns that +// match files Terraform should discard or ignore when interpreting a package +// fetched from a remote location. +package ignorefiles + +import ( + "fmt" + "io" + "os" + "path/filepath" +) + +// A Ruleset is the result of reading, parsing, and compiling a +// ".terraformignore" file. +type Ruleset struct { + rules []rule +} + +// ParseIgnoreFileContent takes a reader over the content of a .terraformignore +// file and returns the Ruleset described by that file, or an error if the +// file is invalid. +func ParseIgnoreFileContent(r io.Reader) (*Ruleset, error) { + rules, err := readRules(r) + if err != nil { + return nil, err + } + return &Ruleset{rules: rules}, nil +} + +// LoadPackageIgnoreRules implements reasonable default behavior for finding +// ignore rules for a particular package root directory: if .terraformignore is +// present then use it, or otherwise just return DefaultRuleset. +// +// This function will return an error only if an ignore file is present but +// unreadable, or if an ignore file is present but contains invalid syntax. +func LoadPackageIgnoreRules(packageDir string) (*Ruleset, error) { + file, err := os.Open(filepath.Join(packageDir, ".terraformignore")) + if err != nil { + if os.IsNotExist(err) { + return DefaultRuleset, nil + } + return nil, fmt.Errorf("cannot read .terraformignore: %s", err) + } + defer file.Close() + + ret, err := ParseIgnoreFileContent(file) + if err != nil { + // The parse errors already mention that they were parsing ignore rules, + // so don't need an additional prefix added. + return nil, err + } + return ret, nil +} + +// Excludes tests whether the given path matches the set of paths that are +// excluded by the rules in the ruleset. +// +// If any of the rules in the ruleset have invalid syntax then Excludes will +// return an error, but it will also still return a boolean result which +// considers all of the remaining valid rules, to support callers that want to +// just ignore invalid exclusions. Such callers can safely ignore the error +// result: +// +// exc, _ = ruleset.Excludes(path) +func (r *Ruleset) Excludes(path string) (bool, error) { + if r == nil { + return false, nil + } + + var retErr error + foundMatch := false + for _, rule := range r.rules { + match, err := rule.match(path) + if err != nil { + // We'll remember the first error we encounter, but continue + // matching anyway to support callers that want to ignore invalid + // lines and just match with whatever's left. + if retErr == nil { + retErr = fmt.Errorf("invalid ignore rule %q", rule.val) + } + } + if match { + foundMatch = !rule.excluded + } + } + return foundMatch, retErr +} + +// Includes is the inverse of [Ruleset.Excludes]. +func (r *Ruleset) Includes(path string) (bool, error) { + notRet, err := r.Excludes(path) + return !notRet, err +} + +var DefaultRuleset *Ruleset + +func init() { + DefaultRuleset = &Ruleset{rules: defaultExclusions} +} diff --git a/internal/ignorefiles/terraformignore.go b/internal/ignorefiles/terraformignore.go new file mode 100644 index 0000000..0eda3a0 --- /dev/null +++ b/internal/ignorefiles/terraformignore.go @@ -0,0 +1,186 @@ +package ignorefiles + +import ( + "bufio" + "fmt" + "io" + "os" + "path/filepath" + "regexp" + "strings" + "text/scanner" +) + +func readRules(input io.Reader) ([]rule, error) { + rules := defaultExclusions + scanner := bufio.NewScanner(input) + scanner.Split(bufio.ScanLines) + + for scanner.Scan() { + pattern := scanner.Text() + // Ignore blank lines + if len(pattern) == 0 { + continue + } + // Trim spaces + pattern = strings.TrimSpace(pattern) + // Ignore comments + if pattern[0] == '#' { + continue + } + // New rule structure + rule := rule{} + // Exclusions + if pattern[0] == '!' { + rule.excluded = true + pattern = pattern[1:] + } + // If it is a directory, add ** so we catch descendants + if pattern[len(pattern)-1] == os.PathSeparator { + pattern = pattern + "**" + } + // If it starts with /, it is absolute + if pattern[0] == os.PathSeparator { + pattern = pattern[1:] + } else { + // Otherwise prepend **/ + pattern = "**" + string(os.PathSeparator) + pattern + } + rule.val = pattern + rule.dirs = strings.Split(pattern, string(os.PathSeparator)) + rules = append(rules, rule) + } + + if err := scanner.Err(); err != nil { + return nil, fmt.Errorf("syntax error in .terraformignore: %w", err) + } + return rules, nil +} + +type rule struct { + val string // the value of the rule itself + excluded bool // ! is present, an exclusion rule + dirs []string // directories of the rule + regex *regexp.Regexp // regular expression to match for the rule +} + +func (r *rule) match(path string) (bool, error) { + if r.regex == nil { + if err := r.compile(); err != nil { + return false, filepath.ErrBadPattern + } + } + + b := r.regex.MatchString(path) + return b, nil +} + +func (r *rule) compile() error { + regStr := "^" + pattern := r.val + // Go through the pattern and convert it to a regexp. + // Use a scanner to support utf-8 chars. + var scan scanner.Scanner + scan.Init(strings.NewReader(pattern)) + + sl := string(os.PathSeparator) + escSL := sl + if sl == `\` { + escSL += `\` + } + + for scan.Peek() != scanner.EOF { + ch := scan.Next() + if ch == '*' { + if scan.Peek() == '*' { + // is some flavor of "**" + scan.Next() + + // Treat **/ as ** so eat the "/" + if string(scan.Peek()) == sl { + scan.Next() + } + + if scan.Peek() == scanner.EOF { + // is "**EOF" - to align with .gitignore just accept all + regStr += ".*" + } else { + // is "**" + // Note that this allows for any # of /'s (even 0) because + // the .* will eat everything, even /'s + regStr += "(.*" + escSL + ")?" + } + } else { + // is "*" so map it to anything but "/" + regStr += "[^" + escSL + "]*" + } + } else if ch == '?' { + // "?" is any char except "/" + regStr += "[^" + escSL + "]" + } else if ch == '.' || ch == '$' { + // Escape some regexp special chars that have no meaning + // in golang's filepath.Match + regStr += `\` + string(ch) + } else if ch == '\\' { + // escape next char. Note that a trailing \ in the pattern + // will be left alone (but need to escape it) + if sl == `\` { + // On windows map "\" to "\\", meaning an escaped backslash, + // and then just continue because filepath.Match on + // Windows doesn't allow escaping at all + regStr += escSL + continue + } + if scan.Peek() != scanner.EOF { + regStr += `\` + string(scan.Next()) + } else { + regStr += `\` + } + } else { + regStr += string(ch) + } + } + + regStr += "$" + re, err := regexp.Compile(regStr) + if err != nil { + return err + } + + r.regex = re + return nil +} + +/* + Default rules as they would appear in .terraformignore: + .git/ + .terraform/ + !.terraform/modules/ +*/ + +var defaultExclusions = []rule{ + { + val: strings.Join([]string{"**", ".git", "**"}, string(os.PathSeparator)), + excluded: false, + }, + { + val: strings.Join([]string{"**", ".terraform", "**"}, string(os.PathSeparator)), + excluded: false, + }, + { + val: strings.Join([]string{"**", ".terraform", "modules", "**"}, string(os.PathSeparator)), + excluded: true, + }, +} + +func init() { + // We'll precompile all of the default rules at initialization, so we + // don't need to recompile them every time we encounter a package that + // doesn't have any rules (the common case). + for _, r := range defaultExclusions { + err := r.compile() + if err != nil { + panic(fmt.Sprintf("invalid default rule %q: %s", r.val, err)) + } + } +} diff --git a/terraformignore_test.go b/internal/ignorefiles/terraformignore_test.go similarity index 72% rename from terraformignore_test.go rename to internal/ignorefiles/terraformignore_test.go index 5ca2750..1ca5ba1 100644 --- a/terraformignore_test.go +++ b/internal/ignorefiles/terraformignore_test.go @@ -1,4 +1,4 @@ -package slug +package ignorefiles import ( "testing" @@ -6,13 +6,19 @@ import ( func TestTerraformIgnore(t *testing.T) { // path to directory without .terraformignore - p := parseIgnoreFile("testdata") - if len(p) != 4 { + rs, err := LoadPackageIgnoreRules("testdata/external-dir") + if err != nil { + t.Fatal(err) + } + if len(rs.rules) != 3 { t.Fatal("A directory without .terraformignore should get the default patterns") } // load the .terraformignore file's patterns - ignoreRules := parseIgnoreFile("testdata/archive-dir") + rs, err = LoadPackageIgnoreRules("testdata/archive-dir") + if err != nil { + t.Fatal(err) + } type file struct { // the actual path, should be file path format /dir/subdir/file.extension path string @@ -20,93 +26,97 @@ func TestTerraformIgnore(t *testing.T) { match bool } paths := []file{ - { + 0: { path: ".terraform/", match: true, }, - { + 1: { path: "included.txt", match: false, }, - { + 2: { path: ".terraform/foo/bar", match: true, }, - { + 3: { path: ".terraform/foo/bar/more/directories/so/many", match: true, }, - { + 4: { path: ".terraform/foo/ignored-subdirectory/", match: true, }, - { + 5: { path: "baz.txt", match: true, }, - { + 6: { path: "parent/foo/baz.txt", match: true, }, - { + 7: { path: "parent/foo/bar.tf", match: true, }, - { + 8: { path: "parent/bar/bar.tf", match: false, }, // baz.txt is ignored, but a file name including it should not be - { + 9: { path: "something/with-baz.txt", match: false, }, - { + 10: { path: "something/baz.x", match: false, }, // Getting into * patterns - { + 11: { path: "foo/ignored-doc.md", match: true, }, // Should match [a-z] group - { + 12: { path: "bar/something-a.txt", match: true, }, - // ignore sub- terraform.d paths - { + // ignore sub- terraform.d paths... + 13: { path: "some-module/terraform.d/x", match: true, }, - // but not the root one - { + // ...but not the root one + 14: { path: "terraform.d/", match: false, }, - { + 15: { path: "terraform.d/foo", match: false, }, // We ignore the directory, but a file of the same name could exist - { + 16: { path: "terraform.d", match: false, }, - // boop.text is ignored everywhere - { + // boop.txt is ignored everywhere... + 17: { path: "baz/boop.txt", match: true, }, - // except at current directory - { + // ...except in root directory + 18: { path: "boop.txt", match: false, }, } for i, p := range paths { - match := matchIgnoreRule(p.path, ignoreRules) + match, err := rs.Excludes(p.path) + if err != nil { + t.Errorf("invalid rule syntax when checking %s at index %d", p.path, i) + continue + } if match != p.match { t.Fatalf("%s at index %d should be %t", p.path, i, p.match) } diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/file.txt b/internal/ignorefiles/testdata/archive-dir/.terraform/file.txt new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README b/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README new file mode 100644 index 0000000..8c1ea48 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraform/modules/README @@ -0,0 +1,2 @@ +Keep this file and directory here to test if its properly ignored + diff --git a/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README b/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README new file mode 100644 index 0000000..8c1ea48 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraform/plugins/README @@ -0,0 +1,2 @@ +Keep this file and directory here to test if its properly ignored + diff --git a/internal/ignorefiles/testdata/archive-dir/.terraformignore b/internal/ignorefiles/testdata/archive-dir/.terraformignore new file mode 100644 index 0000000..3503ae9 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/.terraformignore @@ -0,0 +1,20 @@ +# comments are ignored + # extra spaces are irrelevant +# ignore a file + baz.txt +# below is an empty line + +# ignore a directory +terraform.d/ +# exclude ignoring a directory at the root +!/terraform.d/ +# ignore a file at a subpath +**/foo/bar.tf +# ignore files with specific endings +foo/*.md +# character groups +bar/something-[a-z].txt +# ignore a file +boop.txt +# but not one at the current directory +!/boop.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/.terraformrc b/internal/ignorefiles/testdata/archive-dir/.terraformrc new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/bar.txt b/internal/ignorefiles/testdata/archive-dir/bar.txt new file mode 100644 index 0000000..5716ca5 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/bar.txt @@ -0,0 +1 @@ +bar diff --git a/internal/ignorefiles/testdata/archive-dir/baz.txt b/internal/ignorefiles/testdata/archive-dir/baz.txt new file mode 100644 index 0000000..3f95386 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/baz.txt @@ -0,0 +1 @@ +baz \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/exe b/internal/ignorefiles/testdata/archive-dir/exe new file mode 100755 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/foo.terraform/bar.txt b/internal/ignorefiles/testdata/archive-dir/foo.terraform/bar.txt new file mode 100644 index 0000000..e69de29 diff --git a/internal/ignorefiles/testdata/archive-dir/foo.txt b/internal/ignorefiles/testdata/archive-dir/foo.txt new file mode 120000 index 0000000..b3b9b2f --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/foo.txt @@ -0,0 +1 @@ +../external-dir/foo.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/sub/bar.txt b/internal/ignorefiles/testdata/archive-dir/sub/bar.txt new file mode 120000 index 0000000..315e865 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/sub/bar.txt @@ -0,0 +1 @@ +../bar.txt \ No newline at end of file diff --git a/internal/ignorefiles/testdata/archive-dir/sub/zip.txt b/internal/ignorefiles/testdata/archive-dir/sub/zip.txt new file mode 100644 index 0000000..d0513b2 --- /dev/null +++ b/internal/ignorefiles/testdata/archive-dir/sub/zip.txt @@ -0,0 +1 @@ +zip diff --git a/internal/ignorefiles/testdata/external-dir/foo.txt b/internal/ignorefiles/testdata/external-dir/foo.txt new file mode 100644 index 0000000..257cc56 --- /dev/null +++ b/internal/ignorefiles/testdata/external-dir/foo.txt @@ -0,0 +1 @@ +foo diff --git a/slug.go b/slug.go index fd28d27..09d8fbe 100644 --- a/slug.go +++ b/slug.go @@ -8,6 +8,8 @@ import ( "os" "path/filepath" "strings" + + "github.com/hashicorp/go-slug/internal/ignorefiles" ) // Meta provides detailed information about a slug. @@ -151,7 +153,7 @@ func (p *Packer) Pack(src string, w io.Writer) (*Meta, error) { // Load the ignore rule configuration, which will use // defaults if no .terraformignore is configured - var ignoreRules []rule + var ignoreRules *ignorefiles.Ruleset if p.applyTerraformIgnore { ignoreRules = parseIgnoreFile(src) } @@ -175,7 +177,7 @@ func (p *Packer) Pack(src string, w io.Writer) (*Meta, error) { return meta, nil } -func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, ignoreRules []rule) filepath.WalkFunc { +func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, ignoreRules *ignorefiles.Ruleset) filepath.WalkFunc { return func(path string, info os.FileInfo, err error) error { if err != nil { return err @@ -190,14 +192,14 @@ func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta, return nil } - if m := matchIgnoreRule(subpath, ignoreRules); m { + if m := matchIgnoreRules(subpath, ignoreRules); m { return nil } // Catch directories so we don't end up with empty directories, // the files are ignored correctly if info.IsDir() { - if m := matchIgnoreRule(subpath+string(os.PathSeparator), ignoreRules); m { + if m := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); m { return nil } } diff --git a/slug_test.go b/slug_test.go index 9ff5758..44b6878 100644 --- a/slug_test.go +++ b/slug_test.go @@ -1217,6 +1217,7 @@ func verifyFile(t *testing.T, path string, mode os.FileMode, expect string) { } func verifyPerms(t *testing.T, path string, expect os.FileMode) { + t.Helper() fi, err := os.Stat(path) if err != nil { t.Fatal(err) diff --git a/terraformignore.go b/terraformignore.go index 6803313..0863167 100644 --- a/terraformignore.go +++ b/terraformignore.go @@ -1,17 +1,14 @@ package slug import ( - "bufio" "fmt" - "io" "os" "path/filepath" - "regexp" - "strings" - "text/scanner" + + "github.com/hashicorp/go-slug/internal/ignorefiles" ) -func parseIgnoreFile(rootPath string) []rule { +func parseIgnoreFile(rootPath string) *ignorefiles.Ruleset { // Look for .terraformignore at our root path/src file, err := os.Open(filepath.Join(rootPath, ".terraformignore")) defer file.Close() @@ -22,209 +19,22 @@ func parseIgnoreFile(rootPath string) []rule { if !os.IsNotExist(err) { fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) } - return defaultExclusions - } - return readRules(file) -} - -func readRules(input io.Reader) []rule { - rules := defaultExclusions - scanner := bufio.NewScanner(input) - scanner.Split(bufio.ScanLines) - - for scanner.Scan() { - pattern := scanner.Text() - // Ignore blank lines - if len(pattern) == 0 { - continue - } - // Trim spaces - pattern = strings.TrimSpace(pattern) - // Ignore comments - if pattern[0] == '#' { - continue - } - // New rule structure - rule := rule{} - // Exclusions - if pattern[0] == '!' { - rule.excluded = true - pattern = pattern[1:] - } - // If it is a directory, add ** so we catch descendants - if pattern[len(pattern)-1] == os.PathSeparator { - pattern = pattern + "**" - } - // If it starts with /, it is absolute - if pattern[0] == os.PathSeparator { - pattern = pattern[1:] - } else { - // Otherwise prepend **/ - pattern = "**" + string(os.PathSeparator) + pattern - } - rule.val = pattern - rule.dirs = strings.Split(pattern, string(os.PathSeparator)) - rules = append(rules, rule) + return ignorefiles.DefaultRuleset } - if err := scanner.Err(); err != nil { - fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) - return defaultExclusions - } - return rules -} - -func matchIgnoreRule(path string, rules []rule) bool { - matched := false - path = filepath.FromSlash(path) - for _, rule := range rules { - match, _ := rule.match(path) - - if match { - matched = !rule.excluded - } - } - - if matched { - debug(true, path, "Skipping excluded path:", path) - } - - return matched -} - -type rule struct { - val string // the value of the rule itself - excluded bool // ! is present, an exclusion rule - dirs []string // directories of the rule - regex *regexp.Regexp // regular expression to match for the rule -} - -func (r *rule) match(path string) (bool, error) { - if r.regex == nil { - if err := r.compile(); err != nil { - return false, filepath.ErrBadPattern - } - } - - b := r.regex.MatchString(path) - debug(false, path, path, r.regex, b) - return b, nil -} - -func (r *rule) compile() error { - regStr := "^" - pattern := r.val - // Go through the pattern and convert it to a regexp. - // Use a scanner to support utf-8 chars. - var scan scanner.Scanner - scan.Init(strings.NewReader(pattern)) - - sl := string(os.PathSeparator) - escSL := sl - if sl == `\` { - escSL += `\` - } - - for scan.Peek() != scanner.EOF { - ch := scan.Next() - if ch == '*' { - if scan.Peek() == '*' { - // is some flavor of "**" - scan.Next() - - // Treat **/ as ** so eat the "/" - if string(scan.Peek()) == sl { - scan.Next() - } - - if scan.Peek() == scanner.EOF { - // is "**EOF" - to align with .gitignore just accept all - regStr += ".*" - } else { - // is "**" - // Note that this allows for any # of /'s (even 0) because - // the .* will eat everything, even /'s - regStr += "(.*" + escSL + ")?" - } - } else { - // is "*" so map it to anything but "/" - regStr += "[^" + escSL + "]*" - } - } else if ch == '?' { - // "?" is any char except "/" - regStr += "[^" + escSL + "]" - } else if ch == '.' || ch == '$' { - // Escape some regexp special chars that have no meaning - // in golang's filepath.Match - regStr += `\` + string(ch) - } else if ch == '\\' { - // escape next char. Note that a trailing \ in the pattern - // will be left alone (but need to escape it) - if sl == `\` { - // On windows map "\" to "\\", meaning an escaped backslash, - // and then just continue because filepath.Match on - // Windows doesn't allow escaping at all - regStr += escSL - continue - } - if scan.Peek() != scanner.EOF { - regStr += `\` + string(scan.Next()) - } else { - regStr += `\` - } - } else { - regStr += string(ch) - } - } - - regStr += "$" - re, err := regexp.Compile(regStr) + ret, err := ignorefiles.ParseIgnoreFileContent(file) if err != nil { - return err + fmt.Fprintf(os.Stderr, "Error reading .terraformignore, default exclusions will apply: %v \n", err) + return ignorefiles.DefaultRuleset } - r.regex = re - return nil + return ret } -/* - Default rules as they would appear in .terraformignore: - .git/ - .terraform/ - !.terraform/modules/ - terraform.tfstate -*/ - -var defaultExclusions = []rule{ - { - val: filepath.Join("**", ".git", "**"), - excluded: false, - }, - { - val: filepath.Join("**", ".terraform", "**"), - excluded: false, - }, - { - val: filepath.Join("**", ".terraform", "modules", "**"), - excluded: true, - }, - { - val: filepath.Join("**", "terraform.tfstate"), - excluded: false, - }, -} - -func debug(printAll bool, path string, message ...interface{}) { - logLevel := os.Getenv("TF_IGNORE") == "trace" - debugPath := os.Getenv("TF_IGNORE_DEBUG") - isPath := debugPath != "" - if isPath { - isPath = strings.Contains(path, debugPath) - } - - if logLevel { - if printAll || isPath { - fmt.Println(message...) - } - } +func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) bool { + // Ruleset.Excludes explicitly allows ignoring its error, in which + // case we are ignoring any individual invalid rules in the set + // but still taking all of the others into account. + ret, _ := ruleset.Excludes(path) + return ret } From e7304bd53505aa6cd99f3656ab8b6e26dd8d3e2b Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Fri, 28 Apr 2023 18:09:13 -0700 Subject: [PATCH 2/7] sourceaddrs: Parsers and models for source package addresses This package models the source address types we'll support for modules when working with the forthcoming "source bundle" mechanism, which is essentially a "meta-slug" capturing the contents of many different source packages all at once. For the resulting source bundle to be useful we need to be able to talk about the source addresses it was built from, so that callers can request the bundle path equivalent to a particular remote source address path. This address syntax is intentionally a subset of the go-getter-based syntax used in today's Terraform CLI module installer, because we want to have a set of commonly-used address types that are treated equivalently in both implementations. However, we won't be actually using go-getter here because its high level of flexibility (and quirkiness) is a poor fit for safely constructing persistable source code bundles. --- go.mod | 2 + go.sum | 64 ++++++++++ sourceaddrs/doc.go | 4 + sourceaddrs/source.go | 62 +++++++++ sourceaddrs/source_local.go | 68 ++++++++++ sourceaddrs/source_registry.go | 72 +++++++++++ sourceaddrs/source_remote.go | 194 +++++++++++++++++++++++++++++ sourceaddrs/source_remote_types.go | 116 +++++++++++++++++ sourceaddrs/subpath.go | 98 +++++++++++++++ 9 files changed, 680 insertions(+) create mode 100644 go.sum create mode 100644 sourceaddrs/doc.go create mode 100644 sourceaddrs/source.go create mode 100644 sourceaddrs/source_local.go create mode 100644 sourceaddrs/source_registry.go create mode 100644 sourceaddrs/source_remote.go create mode 100644 sourceaddrs/source_remote_types.go create mode 100644 sourceaddrs/subpath.go diff --git a/go.mod b/go.mod index c27382a..1e76ccc 100644 --- a/go.mod +++ b/go.mod @@ -1,3 +1,5 @@ module github.com/hashicorp/go-slug go 1.15 + +require github.com/hashicorp/terraform-registry-address v0.2.0 // indirect diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..87d05b8 --- /dev/null +++ b/go.sum @@ -0,0 +1,64 @@ +cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= +github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= +github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= +github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= +github.com/hashicorp/terraform-registry-address v0.2.0 h1:92LUg03NhfgZv44zpNTLBGIbiyTokQCDcdH5BhVHT3s= +github.com/hashicorp/terraform-registry-address v0.2.0/go.mod h1:478wuzJPzdmqT6OGbB/iH82EDcI8VFM4yujknh/1nIs= +github.com/hashicorp/terraform-svchost v0.0.1 h1:Zj6fR5wnpOHnJUmLyWozjMeDaVuE+cstMPj41/eKmSQ= +github.com/hashicorp/terraform-svchost v0.0.1/go.mod h1:ut8JaH0vumgdCfJaihdcZULqkAwHdQNwNH7taIDdsZM= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= +github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4= +github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= +github.com/zclconf/go-cty v1.12.1/go.mod h1:s9IfD1LK5ccNMSWCVFCE2rJfHiZgi7JijgeWIMfhLvA= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= +golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= +golang.org/x/oauth2 v0.4.0/go.mod h1:RznEsdpjGAINPTOF0UH/t+xJ75L18YO3Ho6Pyn+uRec= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= +golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= +golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= +golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= +google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= +gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/sourceaddrs/doc.go b/sourceaddrs/doc.go new file mode 100644 index 0000000..ad57135 --- /dev/null +++ b/sourceaddrs/doc.go @@ -0,0 +1,4 @@ +// Package sourceaddrs deals with the various types of source code address +// that Terraform can gather into a source bundle via the sibling package +// "sourcebundle". +package sourceaddrs diff --git a/sourceaddrs/source.go b/sourceaddrs/source.go new file mode 100644 index 0000000..087c5ca --- /dev/null +++ b/sourceaddrs/source.go @@ -0,0 +1,62 @@ +package sourceaddrs + +import ( + "fmt" +) + +// Source acts as a tagged union over the three possible source address types, +// for situations where all three are acceptable. +// +// Only address types within this package can implement Source. +type Source interface { + sourceSigil() + + String() string + SupportsVersionConstraints() bool +} + +// Source attempts to parse the given string as any one of the three supported +// source address types, recognizing which type it belongs to based on the +// syntax differences between the address forms. +func ParseSource(given string) (Source, error) { + switch { + case looksLikeLocalSource(given): + ret, err := ParseLocalSource(given) + if err != nil { + return nil, fmt.Errorf("invalid local source address %q: %w", given, err) + } + return ret, nil + case looksLikeRegistrySource(given): + ret, err := ParseRegistrySource(given) + if err != nil { + return nil, fmt.Errorf("invalid module registry source address %q: %w", given, err) + } + return ret, nil + default: + // If it's neither a local source nor a module registry source then + // we'll assume it's intended to be a remote source. + // (This parser will return a suitable error if the given string + // is not of any of the supported address types.) + ret, err := ParseRemoteSource(given) + if err != nil { + return nil, fmt.Errorf("invalid remote source address %q: %w", given, err) + } + return ret, nil + } +} + +// ResolveRelativeSource calculates a new source address from the combination +// of two other source addresses. +// +// If "b" is already an absolute source address then the result is "b" verbatim. +// +// If "b" is a relative source then the result is an address of the same type +// as "a", but with a different path component. If "a" is an absolute address +// type then the result is guaranteed to also be an absolute address type. +// +// Returns an error if "b" is a relative path that attempts to traverse out +// of the package of an absolute address given in "a". +func ResolveRelativeSource(a, b Source) (Source, error) { + // TODO: implement + panic("unimplemented") +} diff --git a/sourceaddrs/source_local.go b/sourceaddrs/source_local.go new file mode 100644 index 0000000..09ab8a4 --- /dev/null +++ b/sourceaddrs/source_local.go @@ -0,0 +1,68 @@ +package sourceaddrs + +import ( + "fmt" + "path" + "strings" +) + +// LocalSource represents a relative traversal to another path within the same +// source package as whatever source artifact included this path. +// +// LocalSource sources will typically need to be resolved into either +// [RemoteSource] or [RegistrySource] addresses by reference to the address +// of whatever artifact declared them, because otherwise they cannot be +// mapped onto any real source location. +type LocalSource struct { + // relPath is a slash-separate path in the style of the Go standard + // library package "path", which should always be stored in its "Clean" + // form, aside from the mandatory "./" or "../" prefixes. + relPath string +} + +var _ Source = LocalSource{} + +// sourceSigil implements Source +func (s LocalSource) sourceSigil() {} + +func looksLikeLocalSource(given string) bool { + return strings.HasPrefix(given, "./") || strings.HasPrefix(given, "../") +} + +// ParseLocalSource interprets the given path as a local source address, or +// returns an error if it cannot be interpreted as such. +func ParseLocalSource(given string) (LocalSource, error) { + // First we'll catch some situations that seem likely to suggest that + // the caller was trying to use a real filesystem path instead of + // just a virtual relative path within a source package. + if strings.ContainsAny(given, ":\\") { + return LocalSource{}, fmt.Errorf("must be a relative path using forward-slash separators between segments, like in a relative URL") + } + + // We distinguish local source addresses from other address types by them + // starting with some kind of relative path prefix. + if !looksLikeLocalSource(given) { + return LocalSource{}, fmt.Errorf("must start with either ./ or ../ to indicate a local path") + } + + clean := path.Clean(given) + + // We use the "path" package's definition of "clean" aside from one + // exception: we need to retain the leading "./", if it was originally + // present, to disambiguate from module registry addresses. + if !looksLikeLocalSource(clean) { + clean = "./" + clean + } + + return LocalSource{relPath: clean}, nil +} + +// String implements Source +func (s LocalSource) String() string { + return s.relPath +} + +// SupportsVersionConstraints implements Source +func (s LocalSource) SupportsVersionConstraints() bool { + return false +} diff --git a/sourceaddrs/source_registry.go b/sourceaddrs/source_registry.go new file mode 100644 index 0000000..deadc18 --- /dev/null +++ b/sourceaddrs/source_registry.go @@ -0,0 +1,72 @@ +package sourceaddrs + +import ( + "fmt" + + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistrySource represents a source address referring to a set of versions +// published in a Module Registry. +// +// A RegistrySource is an extra indirection over a set of [RemoteSource] +// addresses, which Terraform chooses from based on version constraints given +// alongside the registry source address. +type RegistrySource struct { + pkg regaddr.ModulePackage + + // subPath is an optional subdirectory or sub-file path beneath the + // prefix of the selected underlying source address. + // + // Sub-paths are always slash-separated paths interpreted relative to + // the root of the package, and may not include ".." or "." segments. + // The sub-path is empty to indicate the root directory of the package. + subPath string +} + +// sourceSigil implements Source +func (s RegistrySource) sourceSigil() {} + +var _ Source = RegistrySource{} + +func looksLikeRegistrySource(given string) bool { + _, err := regaddr.ParseModuleSource(given) + return err == nil +} + +// ParseRegistrySource parses the given string as a registry source address, +// or returns an error if it does not use the correct syntax for interpretation +// as a registry source address. +func ParseRegistrySource(given string) (RegistrySource, error) { + // We delegate the first level of parsing to the shared library + // terraform-registry-address, but then we'll impose some additional + // validation and normalization over that since we're intentionally + // being a little stricter than Terraform has historically been, + // prioritizing "one obvious way to do it" over many esoteric variations. + + startingAddr, err := regaddr.ParseModuleSource(given) + if err != nil { + return RegistrySource{}, err + } + + subPath, err := normalizeSubpath(startingAddr.Subdir) + if err != nil { + return RegistrySource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + return RegistrySource{ + pkg: startingAddr.Package, + subPath: subPath, + }, nil +} + +func (s RegistrySource) String() string { + if s.subPath != "" { + return s.pkg.String() + "//" + s.subPath + } + return s.pkg.String() +} + +func (s RegistrySource) SupportsVersionConstraints() bool { + return true +} diff --git a/sourceaddrs/source_remote.go b/sourceaddrs/source_remote.go new file mode 100644 index 0000000..9acaad4 --- /dev/null +++ b/sourceaddrs/source_remote.go @@ -0,0 +1,194 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "regexp" + "strings" +) + +type RemoteSource struct { + sourceType string + url *url.URL + subPath string +} + +// sourceSigil implements Source +func (RemoteSource) sourceSigil() {} + +var _ Source = RemoteSource{} + +// ParseRemoteSource parses the given string as a remote source address, +// or returns an error if it does not use the correct syntax for interpretation +// as a remote source address. +func ParseRemoteSource(given string) (RemoteSource, error) { + pkgRaw, subPathRaw := splitSubPath(given) + subPath, err := normalizeSubpath(subPathRaw) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + for _, shorthand := range remoteSourceShorthands { + replacement, ok, err := shorthand(pkgRaw) + if err != nil { + return RemoteSource{}, err + } + if ok { + pkgRaw = replacement + } + } + + // Once we've dealt with all the "shorthand" business, our address + // should be in the form sourcetype::url, where "sourcetype::" is + // optional and defaults to matching the URL scheme if not present. + var sourceType string + if matches := remoteSourceTypePattern.FindStringSubmatch(pkgRaw); len(matches) != 0 { + sourceType = matches[0] + pkgRaw = matches[1] + } + + u, err := url.Parse(pkgRaw) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid URL syntax in %q: %w", pkgRaw, err) + } + if u.Scheme == "" { + return RemoteSource{}, fmt.Errorf("must contain an absolute URL with a scheme") + } + if u.User != nil { + return RemoteSource{}, fmt.Errorf("must not use username or password in URL portion") + } + + u.Scheme = strings.ToLower(u.Scheme) + sourceType = strings.ToLower(sourceType) + + if sourceType == "" { + // sourceType defaults to the URL scheme if not explicitly set. + sourceType = u.Scheme + } else if sourceType == u.Scheme { + // This catches weirdo constructions like: https::https://example.com/ + return RemoteSource{}, fmt.Errorf("don't specify redundant %q source type for %q URL", sourceType, u.Scheme) + } + + _, err = url.ParseQuery(u.RawQuery) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid URL query string syntax in %q: %w", pkgRaw, err) + } + + typeImpl, ok := remoteSourceTypes[sourceType] + if !ok { + if sourceType == u.Scheme { + // In this case the user didn't actually specify a source type, + // so we won't confuse them by mentioning it. + return RemoteSource{}, fmt.Errorf("unsupported URL scheme %q", u.Scheme) + } else { + return RemoteSource{}, fmt.Errorf("unsupported package source type %q", sourceType) + } + } + + err = typeImpl.PrepareURL(u) + if err != nil { + return RemoteSource{}, err + } + + return RemoteSource{ + sourceType: sourceType, + url: u, + subPath: subPath, + }, nil +} + +// String implements Source +func (s RemoteSource) String() string { + // Our address normalization rules are a bit odd since we inherited the + // fundamentals of this addressing scheme from go-getter. + if s.url.Scheme == s.sourceType { + // When scheme and source type match we don't actually mention the + // source type in the stringification, because it looks redundant + // and confusing. + if s.subPath != "" { + return s.url.String() + "//" + s.subPath + } + return s.url.String() + } + if s.subPath != "" { + return s.sourceType + "::" + s.url.String() + "//" + s.subPath + } + return s.sourceType + "::" + s.url.String() +} + +func (s RemoteSource) SupportsVersionConstraints() bool { + return false +} + +type remoteSourceShorthand func(given string) (normed string, ok bool, err error) + +var remoteSourceShorthands = []remoteSourceShorthand{ + func(given string) (string, bool, error) { + // Allows a github.com repository to be presented in a scheme-less + // format like github.com/organization/repository/path, which we'll + // turn into a git:: source string selecting the repository's main + // branch. + // + // This is intentionally compatible with what's accepted by the + // "GitHub detector" in the go-getter library, so that module authors + // can specify GitHub repositories in the same way both for the + // old Terraform module installer and the newer source bundle builder. + + if !strings.HasPrefix(given, "github.com/") { + return "", false, nil + } + + parts := strings.Split(given, "/") + if len(parts) < 3 { + return "", false, fmt.Errorf("GitHub.com shorthand addresses must start with github.com/organization/repository") + } + + urlStr := "https://" + strings.Join(parts[:3], "/") + if !strings.HasSuffix(urlStr, "git") { + urlStr += ".git" + } + + if len(parts) > 3 { + // The remaining parts will become the sub-path portion, since + // the repository as a whole is the source package. + urlStr += "//" + strings.Join(parts[3:], "/") + } + + return "git::" + urlStr, true, nil + }, + func(given string) (string, bool, error) { + // Allows a gitlab.com repository to be presented in a scheme-less + // format like gitlab.com/organization/repository/path, which we'll + // turn into a git:: source string selecting the repository's main + // branch. + // + // This is intentionally compatible with what's accepted by the + // "GitLab detector" in the go-getter library, so that module authors + // can specify GitHub repositories in the same way both for the + // old Terraform module installer and the newer source bundle builder. + + if !strings.HasPrefix(given, "gitlab.com/") { + return "", false, nil + } + + parts := strings.Split(given, "/") + if len(parts) < 3 { + return "", false, fmt.Errorf("GitLab.com shorthand addresses must start with gitlab.com/organization/repository") + } + + urlStr := "https://" + strings.Join(parts[:3], "/") + if !strings.HasSuffix(urlStr, "git") { + urlStr += ".git" + } + + if len(parts) > 3 { + // The remaining parts will become the sub-path portion, since + // the repository as a whole is the source package. + urlStr += "//" + strings.Join(parts[3:], "/") + } + + return "git::" + urlStr, true, nil + }, +} + +var remoteSourceTypePattern = regexp.MustCompile(`^([A-Za-z0-9]+)::(.+)$`) diff --git a/sourceaddrs/source_remote_types.go b/sourceaddrs/source_remote_types.go new file mode 100644 index 0000000..abb632f --- /dev/null +++ b/sourceaddrs/source_remote_types.go @@ -0,0 +1,116 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "strings" +) + +type remoteSourceType interface { + PrepareURL(u *url.URL) error +} + +var remoteSourceTypes = map[string]remoteSourceType{ + "git": gitSourceType{}, + "http": httpSourceType{}, + "https": httpSourceType{}, +} + +type gitSourceType struct{} + +func (gitSourceType) PrepareURL(u *url.URL) error { + // The Git source type requires one of the URL schemes that Git itself + // supports. We're also currently being more rigid than Git to ease + // initial implementation. We will extend this over time as the source + // bundle mechanism graduates from experimental to real use. + + if u.Scheme != "ssh" && u.Scheme != "https" { + // NOTE: We don't support "git" or "http" here because we require + // source code to originate from sources that can support + // authentication and encryption, to reduce the risk of mitm attacks + // introducing malicious code. + return fmt.Errorf("a Git repository URL must use either the https or ssh scheme") + } + + qs := u.Query() + for k, vs := range qs { + if k != "ref" { + return fmt.Errorf("a Git repository URL's query string may include only the argument 'ref'") + } + if len(vs) > 1 { + return fmt.Errorf("a Git repository URL's query string may include only one 'ref' argument") + } + } + + return nil +} + +type httpSourceType struct{} + +func (httpSourceType) PrepareURL(u *url.URL) error { + if u.Scheme == "http" { + return fmt.Errorf("source package addresses may not use unencrypted HTTP") + } + if u.Scheme != "https" { + return fmt.Errorf("invalid scheme %q for https source type", u.Scheme) + } + + // For our initial implementation the address must be something that + // go-getter would've recognized as referring to a gzipped tar archive, + // to reduce the scope of the initial source bundler fetcher + // implementations. We may extend this later, but if we do then we should + // use go-getter's syntax for anything go-getter also supports. + // + // Go-getter's treatment of HTTP is quite odd, because by default it does + // an extra module-registry-like indirection where it expects the + // given URL to return a header pointing to another source address type. + // We don't intend to support that here, but we do want to support the + // behavior of go-getter's special case for URLs whose paths end with + // suffixes that match those typically used for archives, and its magical + // treatment of the "archive" query string argument as a way to force + // treatment of archives. This does mean that we can't fetch from any + // URL that _really_ needs an "archive" query string parameter, but that's + // been true for Terraform for many years and hasn't been a problem, so + // we'll accept that for now and wait to see if any need for it arises. + // + // Ideally we'd just make an HTTP request and then decide what to do based + // on the Content-Type of the response, like a sensible HTTP client would, + // but for now compatibility with go-getter is more important than being + // sensible. + + qs := u.Query() + if vs := qs["archive"]; len(vs) > 0 { + if len(vs) > 1 { + return fmt.Errorf("a HTTPS URL's query string may include only one 'archive' argument") + } + if vs[0] != "tar.gz" && vs[0] != "tgz" { + return fmt.Errorf("the special 'archive' query string argument must be set to 'tgz' if present") + } + // We need to remove the special "archive" argument now so that we + // won't confuse the remote server with it. We preserve all of the + // other arguments because they might actually be intended for the + // server. + qs.Del("archive") + u.RawQuery = qs.Encode() + } else { + p := u.EscapedPath() + if !(strings.HasSuffix(p, ".tar.gz") || strings.HasSuffix(p, ".tgz")) { + return fmt.Errorf("a HTTPS URL's path must end with either .tar.gz or .tgz") + } + } + + if len(qs["checksum"]) != 0 { + // This is another go-getter oddity. go-getter would treat this as + // a request to verify that the result matches the given checksum + // and not send this argument to the server. However, go-getter actually + // doesn't support this (it returns an error) when it's dealing with + // an archive. We'll explicitly reject it to avoid folks being + // misled into thinking that it _is_ working, and thus believing + // they've achieved a verification that isn't present, though we + // might relax this later since go-getter wouldn't have allowed this + // anyway. + return fmt.Errorf("a HTTPS URL's query string must not include 'checksum' argument") + } + + return nil +} diff --git a/sourceaddrs/subpath.go b/sourceaddrs/subpath.go new file mode 100644 index 0000000..4475cea --- /dev/null +++ b/sourceaddrs/subpath.go @@ -0,0 +1,98 @@ +package sourceaddrs + +import ( + "fmt" + "io/fs" + "path" + "strings" +) + +// normalizeSubpath interprets the given string as a package "sub-path", +// returning a normalized form of the path or an error if the string does +// not use correct syntax. +func normalizeSubpath(given string) (string, error) { + if given == "" { + // The empty string is how we represent the absense of a subpath, + // which represents the root directory of a package. + return "", nil + } + + clean := path.Clean(given) + + // Our definition of "sub-path" aligns with the definition used by Go's + // virtual filesystem abstraction, since our "module package" idea + // is also essentially just a virtual filesystem. + // This definition prohibits "." and ".." segments and therefore prevents + // upward path traversal. + // Go's path wrangling uses "." to represent "root directory", but + // we represent that by omitting the subpath entirely, so we forbid that + // too even though Go would consider it valid. + if clean == "." || !fs.ValidPath(clean) { + return "", fmt.Errorf("must be slash-separated relative path without any .. or . segments") + } + + return clean, nil +} + +// subPathAsLocalSource interprets the given subpath (which should be a value +// previously returned from [normalizeSubpath]) as a local source address +// relative to the root of the package that the sub-path was presented against. +func subPathAsLocalSource(p string) LocalSource { + // Local source addresses are _mostly_ a superset of what we allow in + // sub-paths, except that downward traversals must always start with + // "./" to disambiguate from other address types. + return LocalSource{relPath: "./" + p} +} + +// splitSubPath takes a source address that would be accepted either as a +// remote source address or a registry source address and returns a tuple of +// its package address and its sub-path portion. +// +// For example: +// dom.com/path/?q=p => "dom.com/path/?q=p", "" +// proto://dom.com/path//*?q=p => "proto://dom.com/path?q=p", "*" +// proto://dom.com/path//path2?q=p => "proto://dom.com/path?q=p", "path2" +// +// This function DOES NOT validate or normalize the sub-path. Pass the second +// return value to [normalizeSubpath] to check if it is valid and to obtain +// its normalized form. +func splitSubPath(src string) (string, string) { + // This is careful to handle the query string portion of a remote source + // address. That's not actually necessary for a module registry address + // because those don't have query strings anyway, but it doesn't _hurt_ + // to check for a query string in that case and allows us to reuse this + // function for both cases. + + // URL might contains another url in query parameters + stop := len(src) + if idx := strings.Index(src, "?"); idx > -1 { + stop = idx + } + + // Calculate an offset to avoid accidentally marking the scheme + // as the dir. + var offset int + if idx := strings.Index(src[:stop], "://"); idx > -1 { + offset = idx + 3 + } + + // First see if we even have an explicit subdir + idx := strings.Index(src[offset:stop], "//") + if idx == -1 { + return src, "" + } + + idx += offset + subdir := src[idx+2:] + src = src[:idx] + + // Next, check if we have query parameters and push them onto the + // URL. + if idx = strings.Index(subdir, "?"); idx > -1 { + query := subdir[idx:] + subdir = subdir[:idx] + src += query + } + + return src, subdir +} From 7791976f6c33042f554b2ccf2ea5121fff24501c Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Fri, 28 Apr 2023 18:09:38 -0700 Subject: [PATCH 3/7] sourcebundle: Build "meta-slugs" containing many different source packages --- go.mod | 16 +- go.sum | 67 +- sourceaddrs/package_remote.go | 87 ++ sourceaddrs/source.go | 71 +- sourceaddrs/source_local.go | 22 +- sourceaddrs/source_registry.go | 62 +- sourceaddrs/source_remote.go | 87 +- sourceaddrs/source_remote_types.go | 14 +- sourceaddrs/source_test.go | 506 ++++++++++ sourceaddrs/subpath.go | 36 +- sourcebundle/builder.go | 681 +++++++++++++ sourcebundle/builder_test.go | 948 ++++++++++++++++++ sourcebundle/bundle.go | 364 +++++++ sourcebundle/dependency_finder.go | 92 ++ sourcebundle/diagnostics.go | 181 ++++ sourcebundle/doc.go | 12 + sourcebundle/manifest_json.go | 48 + sourcebundle/package_fetcher.go | 36 + sourcebundle/package_meta.go | 37 + sourcebundle/registry_client.go | 28 + sourcebundle/testdata/pkgs/hello/hello | 1 + .../testdata/pkgs/subdirs/a/b/beepbeep | 1 + .../pkgs/terraformignore/.terraformignore | 1 + .../testdata/pkgs/terraformignore/excluded | 1 + .../testdata/pkgs/terraformignore/included | 1 + .../pkgs/with-remote-deps/dependencies | 2 + .../pkgs/with-remote-deps/self_dependency | 1 + sourcebundle/trace.go | 94 ++ 28 files changed, 3384 insertions(+), 113 deletions(-) create mode 100644 sourceaddrs/package_remote.go create mode 100644 sourceaddrs/source_test.go create mode 100644 sourcebundle/builder.go create mode 100644 sourcebundle/builder_test.go create mode 100644 sourcebundle/bundle.go create mode 100644 sourcebundle/dependency_finder.go create mode 100644 sourcebundle/diagnostics.go create mode 100644 sourcebundle/doc.go create mode 100644 sourcebundle/manifest_json.go create mode 100644 sourcebundle/package_fetcher.go create mode 100644 sourcebundle/package_meta.go create mode 100644 sourcebundle/registry_client.go create mode 100644 sourcebundle/testdata/pkgs/hello/hello create mode 100644 sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep create mode 100644 sourcebundle/testdata/pkgs/terraformignore/.terraformignore create mode 100644 sourcebundle/testdata/pkgs/terraformignore/excluded create mode 100644 sourcebundle/testdata/pkgs/terraformignore/included create mode 100644 sourcebundle/testdata/pkgs/with-remote-deps/dependencies create mode 100644 sourcebundle/testdata/pkgs/with-remote-deps/self_dependency create mode 100644 sourcebundle/trace.go diff --git a/go.mod b/go.mod index 1e76ccc..ffc7671 100644 --- a/go.mod +++ b/go.mod @@ -1,5 +1,17 @@ module github.com/hashicorp/go-slug -go 1.15 +go 1.20 -require github.com/hashicorp/terraform-registry-address v0.2.0 // indirect +require ( + github.com/apparentlymart/go-versions v1.0.1 + github.com/google/go-cmp v0.5.9 + github.com/hashicorp/terraform-registry-address v0.2.0 + github.com/hashicorp/terraform-svchost v0.0.1 + golang.org/x/mod v0.10.0 +) + +require ( + github.com/go-test/deep v1.0.3 // indirect + golang.org/x/net v0.5.0 // indirect + golang.org/x/text v0.6.0 // indirect +) diff --git a/go.sum b/go.sum index 87d05b8..e61f2e4 100644 --- a/go.sum +++ b/go.sum @@ -1,64 +1,21 @@ -cloud.google.com/go/compute/metadata v0.2.0/go.mod h1:zFmK7XCadkQkj6TtorcaGlCW1hT1fIilQDwofLpJ20k= -github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= -github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= -github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.8/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/apparentlymart/go-versions v1.0.1 h1:ECIpSn0adcYNsBfSRwdDdz9fWlL+S/6EUd9+irwkBgU= +github.com/apparentlymart/go-versions v1.0.1/go.mod h1:YF5j7IQtrOAOnsGkniupEA5bfCjzd7i14yu0shZavyM= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/go-test/deep v1.0.1/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68= +github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA= +github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38= github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= -github.com/hashicorp/go-version v1.6.0/go.mod h1:fltr4n8CU8Ke44wwGCBoEymUuxUHl09ZGVZPK5anwXA= github.com/hashicorp/terraform-registry-address v0.2.0 h1:92LUg03NhfgZv44zpNTLBGIbiyTokQCDcdH5BhVHT3s= github.com/hashicorp/terraform-registry-address v0.2.0/go.mod h1:478wuzJPzdmqT6OGbB/iH82EDcI8VFM4yujknh/1nIs= github.com/hashicorp/terraform-svchost v0.0.1 h1:Zj6fR5wnpOHnJUmLyWozjMeDaVuE+cstMPj41/eKmSQ= github.com/hashicorp/terraform-svchost v0.0.1/go.mod h1:ut8JaH0vumgdCfJaihdcZULqkAwHdQNwNH7taIDdsZM= -github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= -github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= -github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= -github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4= -github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= -github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= -github.com/zclconf/go-cty v1.12.1/go.mod h1:s9IfD1LK5ccNMSWCVFCE2rJfHiZgi7JijgeWIMfhLvA= -golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= -golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= -golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4= +github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k= +golang.org/x/mod v0.10.0 h1:lFO9qtOdlre5W1jxS3r/4szv2/6iXxScdzjoBMXNhYk= +golang.org/x/mod v0.10.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= golang.org/x/net v0.5.0 h1:GyT4nK/YDHSqa1c4753ouYCDajOYKTja9Xb/OHtgvSw= golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws= -golang.org/x/oauth2 v0.4.0/go.mod h1:RznEsdpjGAINPTOF0UH/t+xJ75L18YO3Ho6Pyn+uRec= -golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.4.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= -golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ= -golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= -golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= -golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.6.0 h1:3XmdazWV+ubf7QgHSTWeykHOci5oeekaGJBLkrkaw4k= golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= -golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/appengine v1.6.7/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= -gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/sourceaddrs/package_remote.go b/sourceaddrs/package_remote.go new file mode 100644 index 0000000..701b770 --- /dev/null +++ b/sourceaddrs/package_remote.go @@ -0,0 +1,87 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" +) + +type RemotePackage struct { + sourceType string + + // NOTE: A remote package URL may never have a "userinfo" portion, and + // all relevant fields are comparable, so it's safe to compare + // RemotePackage using the == operator. + url url.URL +} + +// ParseRemotePackage parses a standalone remote package address, which is a +// remote source address without any sub-path portion. +func ParseRemotePackage(given string) (RemotePackage, error) { + srcAddr, err := ParseRemoteSource(given) + if err != nil { + return RemotePackage{}, err + } + if srcAddr.subPath != "" { + return RemotePackage{}, fmt.Errorf("remote package address may not have a sub-path") + } + return srcAddr.pkg, nil +} + +func (p RemotePackage) String() string { + // Our address normalization rules are a bit odd since we inherited the + // fundamentals of this addressing scheme from go-getter. + if p.url.Scheme == p.sourceType { + // When scheme and source type match we don't actually mention the + // source type in the stringification, because it looks redundant + // and confusing. + return p.url.String() + } + return p.sourceType + "::" + p.url.String() +} + +// SourceAddr returns a remote source address referring to the given sub-path +// inside the recieving package. +// +// subPath must be a valid sub-path (as defined by [ValidSubPath]) or this +// function will panic. An empty string is a valid sub-path representing the +// root directory of the package. +func (p RemotePackage) SourceAddr(subPath string) RemoteSource { + finalPath, err := normalizeSubpath(subPath) + if err != nil { + panic(fmt.Sprintf("invalid subPath: %s", subPath)) + } + return RemoteSource{ + pkg: p, + subPath: finalPath, + } +} + +func (p RemotePackage) subPathString(subPath string) string { + if subPath == "" { + // Easy case... the package address is also the source address + return p.String() + } + + // The weird syntax we've inherited from go-getter expects the URL's + // query string to appear after the subpath portion, so we need to + // now tweak the package URL to be a sub-path URL instead. + subURL := p.url // shallow copy + subURL.Path += "//" + subPath + if subURL.Scheme == p.sourceType { + return subURL.String() + } + return p.sourceType + "::" + subURL.String() +} + +// SourceType returns the source type component of the package address. +func (p RemotePackage) SourceType() string { + return p.sourceType +} + +// URL returns the URL component of the package address. +// +// Callers MUST NOT mutate anything accessible through the returned pointer, +// even though the Go type system cannot enforce that. +func (p RemotePackage) URL() *url.URL { + return &p.url +} diff --git a/sourceaddrs/source.go b/sourceaddrs/source.go index 087c5ca..6f08eef 100644 --- a/sourceaddrs/source.go +++ b/sourceaddrs/source.go @@ -2,6 +2,8 @@ package sourceaddrs import ( "fmt" + "path" + "strings" ) // Source acts as a tagged union over the three possible source address types, @@ -15,10 +17,16 @@ type Source interface { SupportsVersionConstraints() bool } -// Source attempts to parse the given string as any one of the three supported -// source address types, recognizing which type it belongs to based on the -// syntax differences between the address forms. +// ParseSource attempts to parse the given string as any one of the three +// supported source address types, recognizing which type it belongs to based +// on the syntax differences between the address forms. func ParseSource(given string) (Source, error) { + if strings.TrimSpace(given) != given { + return nil, fmt.Errorf("source address must not have leading or trailing spaces") + } + if len(given) == 0 { + return nil, fmt.Errorf("a valid source address is required") + } switch { case looksLikeLocalSource(given): ret, err := ParseLocalSource(given) @@ -45,6 +53,16 @@ func ParseSource(given string) (Source, error) { } } +// MustParseSource is a thin wrapper around [ParseSource] that panics if it +// returns an error, or returns its result if not. +func MustParseSource(given string) Source { + ret, err := ParseSource(given) + if err != nil { + panic(err) + } + return ret +} + // ResolveRelativeSource calculates a new source address from the combination // of two other source addresses. // @@ -57,6 +75,49 @@ func ParseSource(given string) (Source, error) { // Returns an error if "b" is a relative path that attempts to traverse out // of the package of an absolute address given in "a". func ResolveRelativeSource(a, b Source) (Source, error) { - // TODO: implement - panic("unimplemented") + if sourceIsAbs(b) { + return b, nil + } + // If we get here then b is definitely a local source, because + // otherwise it would have been absolute. + bRaw := b.(LocalSource).relPath + + switch a := a.(type) { + case LocalSource: + aRaw := a.relPath + new := path.Join(aRaw, bRaw) + if !looksLikeLocalSource(new) { + new = "./" + new // preserve LocalSource's prefix invariant + } + return LocalSource{relPath: new}, nil + case RegistrySource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RegistrySource{ + pkg: a.pkg, + subPath: newSub, + }, nil + case RemoteSource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RemoteSource{ + pkg: a.pkg, + subPath: newSub, + }, nil + default: + // Should not get here, because the cases above are exhaustive for + // all of our defined Source implementations. + panic(fmt.Sprintf("unsupported Source implementation %T", a)) + } +} + +func sourceIsAbs(source Source) bool { + _, isLocal := source.(LocalSource) + return !isLocal } diff --git a/sourceaddrs/source_local.go b/sourceaddrs/source_local.go index 09ab8a4..9552954 100644 --- a/sourceaddrs/source_local.go +++ b/sourceaddrs/source_local.go @@ -47,13 +47,23 @@ func ParseLocalSource(given string) (LocalSource, error) { clean := path.Clean(given) - // We use the "path" package's definition of "clean" aside from one - // exception: we need to retain the leading "./", if it was originally - // present, to disambiguate from module registry addresses. + // We use the "path" package's definition of "clean" aside from two + // exceptions: + // - we need to retain the leading "./", if it was originally present, to + // disambiguate from module registry addresses. + // - If the cleaned path is just ".." then we need a slash on the end + // because that's part of how we recognize an address as a relative path. + if clean == ".." { + clean = "../" + } if !looksLikeLocalSource(clean) { clean = "./" + clean } + if clean != given { + return LocalSource{}, fmt.Errorf("relative path must be written in canonical form %q", clean) + } + return LocalSource{relPath: clean}, nil } @@ -66,3 +76,9 @@ func (s LocalSource) String() string { func (s LocalSource) SupportsVersionConstraints() bool { return false } + +// RelativePath returns the effective relative path for this source address, +// in our platform-agnostic slash-separated canonical syntax. +func (s LocalSource) RelativePath() string { + return s.relPath +} diff --git a/sourceaddrs/source_registry.go b/sourceaddrs/source_registry.go index deadc18..e44a402 100644 --- a/sourceaddrs/source_registry.go +++ b/sourceaddrs/source_registry.go @@ -2,6 +2,7 @@ package sourceaddrs import ( "fmt" + "path" regaddr "github.com/hashicorp/terraform-registry-address" ) @@ -38,28 +39,46 @@ func looksLikeRegistrySource(given string) bool { // or returns an error if it does not use the correct syntax for interpretation // as a registry source address. func ParseRegistrySource(given string) (RegistrySource, error) { - // We delegate the first level of parsing to the shared library + pkgRaw, subPathRaw := splitSubPath(given) + subPath, err := normalizeSubpath(subPathRaw) + if err != nil { + return RegistrySource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + // We delegate the package address parsing to the shared library // terraform-registry-address, but then we'll impose some additional // validation and normalization over that since we're intentionally // being a little stricter than Terraform has historically been, // prioritizing "one obvious way to do it" over many esoteric variations. - - startingAddr, err := regaddr.ParseModuleSource(given) + pkgOnlyAddr, err := regaddr.ParseModuleSource(pkgRaw) if err != nil { return RegistrySource{}, err } - - subPath, err := normalizeSubpath(startingAddr.Subdir) - if err != nil { - return RegistrySource{}, fmt.Errorf("invalid sub-path: %w", err) + if pkgOnlyAddr.Subdir != "" { + // Should never happen, because we split the subpath off above. + panic("post-split registry address still has subdir") } return RegistrySource{ - pkg: startingAddr.Package, + pkg: pkgOnlyAddr.Package, subPath: subPath, }, nil } +// ParseRegistryPackage parses the given string as a registry package address, +// which is the same syntax as a registry source address with no sub-path +// portion. +func ParseRegistryPackage(given string) (regaddr.ModulePackage, error) { + srcAddr, err := ParseRegistrySource(given) + if err != nil { + return regaddr.ModulePackage{}, err + } + if srcAddr.subPath != "" { + return regaddr.ModulePackage{}, fmt.Errorf("remote package address may not have a sub-path") + } + return srcAddr.pkg, nil +} + func (s RegistrySource) String() string { if s.subPath != "" { return s.pkg.String() + "//" + s.subPath @@ -70,3 +89,30 @@ func (s RegistrySource) String() string { func (s RegistrySource) SupportsVersionConstraints() bool { return true } + +func (s RegistrySource) Package() regaddr.ModulePackage { + return s.pkg +} + +// FinalSourceAddr takes the result of looking up the package portion of the +// receiver in a module registry and appends the reciever's sub-path to the +// returned sub-path to produce the final fully-qualified remote source address. +func (s RegistrySource) FinalSourceAddr(realSource RemoteSource) RemoteSource { + if s.subPath == "" { + return realSource // Easy case + } + if realSource.subPath == "" { + return RemoteSource{ + pkg: realSource.pkg, + subPath: s.subPath, + } + } + // If we get here then both addresses have a sub-path, so we need to + // combine them together. This assumes that the "real source" from the + // module registry will always refer to a directory, which is a fundamental + // assumption of the module registry protocol. + return RemoteSource{ + pkg: realSource.pkg, + subPath: path.Join(realSource.subPath, s.subPath), + } +} diff --git a/sourceaddrs/source_remote.go b/sourceaddrs/source_remote.go index 9acaad4..9a2e6f2 100644 --- a/sourceaddrs/source_remote.go +++ b/sourceaddrs/source_remote.go @@ -8,9 +8,8 @@ import ( ) type RemoteSource struct { - sourceType string - url *url.URL - subPath string + pkg RemotePackage + subPath string } // sourceSigil implements Source @@ -22,29 +21,30 @@ var _ Source = RemoteSource{} // or returns an error if it does not use the correct syntax for interpretation // as a remote source address. func ParseRemoteSource(given string) (RemoteSource, error) { - pkgRaw, subPathRaw := splitSubPath(given) - subPath, err := normalizeSubpath(subPathRaw) - if err != nil { - return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) - } - + expandedGiven := given for _, shorthand := range remoteSourceShorthands { - replacement, ok, err := shorthand(pkgRaw) + replacement, ok, err := shorthand(given) if err != nil { return RemoteSource{}, err } if ok { - pkgRaw = replacement + expandedGiven = replacement } } + pkgRaw, subPathRaw := splitSubPath(expandedGiven) + subPath, err := normalizeSubpath(subPathRaw) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) + } + // Once we've dealt with all the "shorthand" business, our address // should be in the form sourcetype::url, where "sourcetype::" is // optional and defaults to matching the URL scheme if not present. var sourceType string if matches := remoteSourceTypePattern.FindStringSubmatch(pkgRaw); len(matches) != 0 { - sourceType = matches[0] - pkgRaw = matches[1] + sourceType = matches[1] + pkgRaw = matches[2] } u, err := url.Parse(pkgRaw) @@ -74,6 +74,27 @@ func ParseRemoteSource(given string) (RemoteSource, error) { return RemoteSource{}, fmt.Errorf("invalid URL query string syntax in %q: %w", pkgRaw, err) } + return makeRemoteSource(sourceType, u, subPath) +} + +// MakeRemoteSource constructs a [RemoteSource] from its component parts. +// +// This is useful for deriving one remote source from another, by disassembling +// the original address into its component parts, modifying those parts, and +// then combining the modified parts back together with this function. +func MakeRemoteSource(sourceType string, u *url.URL, subPath string) (RemoteSource, error) { + var err error + subPath, err = normalizeSubpath(subPath) + if err != nil { + return RemoteSource{}, fmt.Errorf("invalid sub-path: %w", err) + } + + copyU := *u // shallow copy so we can safely modify + + return makeRemoteSource(sourceType, ©U, subPath) +} + +func makeRemoteSource(sourceType string, u *url.URL, subPath string) (RemoteSource, error) { typeImpl, ok := remoteSourceTypes[sourceType] if !ok { if sourceType == u.Scheme { @@ -85,41 +106,37 @@ func ParseRemoteSource(given string) (RemoteSource, error) { } } - err = typeImpl.PrepareURL(u) + err := typeImpl.PrepareURL(u) if err != nil { return RemoteSource{}, err } return RemoteSource{ - sourceType: sourceType, - url: u, - subPath: subPath, + pkg: RemotePackage{ + sourceType: sourceType, + url: *u, + }, + subPath: subPath, }, nil } // String implements Source func (s RemoteSource) String() string { - // Our address normalization rules are a bit odd since we inherited the - // fundamentals of this addressing scheme from go-getter. - if s.url.Scheme == s.sourceType { - // When scheme and source type match we don't actually mention the - // source type in the stringification, because it looks redundant - // and confusing. - if s.subPath != "" { - return s.url.String() + "//" + s.subPath - } - return s.url.String() - } - if s.subPath != "" { - return s.sourceType + "::" + s.url.String() + "//" + s.subPath - } - return s.sourceType + "::" + s.url.String() + return s.pkg.subPathString(s.subPath) } func (s RemoteSource) SupportsVersionConstraints() bool { return false } +func (s RemoteSource) Package() RemotePackage { + return s.pkg +} + +func (s RemoteSource) SubPath() string { + return s.subPath +} + type remoteSourceShorthand func(given string) (normed string, ok bool, err error) var remoteSourceShorthands = []remoteSourceShorthand{ @@ -185,6 +202,12 @@ var remoteSourceShorthands = []remoteSourceShorthand{ // The remaining parts will become the sub-path portion, since // the repository as a whole is the source package. urlStr += "//" + strings.Join(parts[3:], "/") + // NOTE: We can't actually get here if there are exactly four + // parts, because gitlab.com is also a Terraform module registry + // and so gitlab.com/a/b/c must be interpreted as a registry + // module address instead of a GitLab repository address. Users + // must write an explicit git source address if they intend to + // refer to a Git repository. } return "git::" + urlStr, true, nil diff --git a/sourceaddrs/source_remote_types.go b/sourceaddrs/source_remote_types.go index abb632f..1c284a0 100644 --- a/sourceaddrs/source_remote_types.go +++ b/sourceaddrs/source_remote_types.go @@ -86,11 +86,15 @@ func (httpSourceType) PrepareURL(u *url.URL) error { if vs[0] != "tar.gz" && vs[0] != "tgz" { return fmt.Errorf("the special 'archive' query string argument must be set to 'tgz' if present") } - // We need to remove the special "archive" argument now so that we - // won't confuse the remote server with it. We preserve all of the - // other arguments because they might actually be intended for the - // server. - qs.Del("archive") + if vs[0] == "tar.gz" { + qs.Set("archive", "tgz") // normalize on the shorter form + } + // NOTE: We don't remove the "archive" argument here because the code + // which eventually fetches this will need it to understand what kind + // of archive it's supposed to be fetching, but that final client ought + // to remove this argument itself to avoid potentially confusing the + // remote server, since this is an argument reserved for go-getter and + // for the subset of go-getter's syntax we're implementing here. u.RawQuery = qs.Encode() } else { p := u.EscapedPath() diff --git a/sourceaddrs/source_test.go b/sourceaddrs/source_test.go new file mode 100644 index 0000000..7b7183a --- /dev/null +++ b/sourceaddrs/source_test.go @@ -0,0 +1,506 @@ +package sourceaddrs + +import ( + "fmt" + "net/url" + "reflect" + "testing" + + regaddr "github.com/hashicorp/terraform-registry-address" + svchost "github.com/hashicorp/terraform-svchost" +) + +func TestParseSource(t *testing.T) { + tests := []struct { + Given string + Want Source + WantErr string + }{ + { + Given: "", + WantErr: `a valid source address is required`, + }, + { + Given: " hello", + WantErr: `source address must not have leading or trailing spaces`, + }, + { + Given: "hello ", + WantErr: `source address must not have leading or trailing spaces`, + }, + { + Given: "./boop", + Want: LocalSource{ + relPath: "./boop", + }, + }, + { + Given: "./boop/../beep", + WantErr: `invalid local source address "./boop/../beep": relative path must be written in canonical form "./beep"`, + }, + { + Given: "../boop", + Want: LocalSource{ + relPath: "../boop", + }, + }, + { + Given: "../boop/../beep", + WantErr: `invalid local source address "../boop/../beep": relative path must be written in canonical form "../beep"`, + }, + { + Given: "hashicorp/subnets/cidr", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: regaddr.DefaultModuleRegistryHost, + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + }, + }, + }, + { + Given: "hashicorp/subnets/cidr//blah/blah", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: regaddr.DefaultModuleRegistryHost, + Namespace: "hashicorp", + Name: "subnets", + TargetSystem: "cidr", + }, + subPath: "blah/blah", + }, + }, + { + Given: "hashicorp/subnets/cidr//blah/blah/../bloop", + WantErr: `invalid module registry source address "hashicorp/subnets/cidr//blah/blah/../bloop": invalid sub-path: must be slash-separated relative path without any .. or . segments`, + }, + { + Given: "terraform.example.com/bleep/bloop/blorp", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("terraform.example.com"), + Namespace: "bleep", + Name: "bloop", + TargetSystem: "blorp", + }, + }, + }, + { + Given: "テラフォーム.example.com/bleep/bloop/blorp", + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("xn--jckxc1b4b2b6g.example.com"), + Namespace: "bleep", + Name: "bloop", + TargetSystem: "blorp", + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git//blah/blah", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git?ref=main"), + }, + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?ref=main&ref=main", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?ref=main&ref=main": a Git repository URL's query string may include only one 'ref' argument`, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git//blah/blah?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git?ref=main"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?sshkey=blahblah", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?sshkey=blahblah": a Git repository URL's query string may include only the argument 'ref'`, + }, + { + Given: "git::https://github.com/hashicorp/go-slug.git?depth=1", + WantErr: `invalid remote source address "git::https://github.com/hashicorp/go-slug.git?depth=1": a Git repository URL's query string may include only the argument 'ref'`, + }, + { + Given: "git::https://git@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://git@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::https://git:blit@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://git:blit@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::https://:blit@github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::https://:blit@github.com/hashicorp/go-slug.git": must not use username or password in URL portion`, + }, + { + Given: "git::ssh://github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("ssh://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "git::ssh://github.com/hashicorp/go-slug.git//blah/blah?ref=main", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("ssh://github.com/hashicorp/go-slug.git?ref=main"), + }, + subPath: "blah/blah", + }, + }, + { + Given: "git://github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git://github.com/hashicorp/go-slug.git": a Git repository URL must use either the https or ssh scheme`, + }, + { + Given: "git::git://github.com/hashicorp/go-slug.git", + WantErr: `invalid remote source address "git::git://github.com/hashicorp/go-slug.git": don't specify redundant "git" source type for "git" URL`, + }, + { + Given: "github.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "github.com/hashicorp/go-slug", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "github.com/hashicorp/go-slug/bleep", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://github.com/hashicorp/go-slug.git"), + }, + subPath: "bleep", + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug.git", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug/bleep", + // NOTE: gitlab.com _also_ hosts a Terraform Module registry, and so + // the registry address interpretation takes precedence if it + // matches. Users must write an explicit git:: source address if + // they want this to be interpreted as a Git source address. + Want: RegistrySource{ + pkg: regaddr.ModulePackage{ + Host: svchost.Hostname("gitlab.com"), + Namespace: "hashicorp", + Name: "go-slug", + TargetSystem: "bleep", + }, + }, + }, + { + // This is the explicit Git source address version of the previous + // case, overriding the default interpretation as module registry. + Given: "git::https://gitlab.com/hashicorp/go-slug//bleep", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug"), + }, + subPath: "bleep", + }, + }, + { + Given: "gitlab.com/hashicorp/go-slug/bleep/bloop", + // Two or more subpath portions is fine for Git interpretation, + // because that's not ambigious with module registry. This is + // an annoying inconsistency but necessary for backward + // compatibility with go-getter's interpretations. + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "git", + url: *mustParseURL("https://gitlab.com/hashicorp/go-slug.git"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tar.gz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz"), + }, + }, + }, + { + Given: "https://example.com/foo.tar.gz//bleep/bloop", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tar.gz?something=anything", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz?something=anything"), + }, + }, + }, + { + Given: "https://example.com/foo.tar.gz//bleep/bloop?something=anything", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tar.gz?something=anything"), + }, + subPath: "bleep/bloop", + }, + }, + { + Given: "https://example.com/foo.tgz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo.tgz"), + }, + }, + }, + { + Given: "https://example.com/foo?archive=tar.gz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo?archive=tgz"), + }, + }, + }, + { + Given: "https://example.com/foo?archive=tgz", + Want: RemoteSource{ + pkg: RemotePackage{ + sourceType: "https", + url: *mustParseURL("https://example.com/foo?archive=tgz"), + }, + }, + }, + { + Given: "https://example.com/foo.zip", + WantErr: `invalid remote source address "https://example.com/foo.zip": a HTTPS URL's path must end with either .tar.gz or .tgz`, + }, + { + Given: "https://example.com/foo?archive=zip", + WantErr: `invalid remote source address "https://example.com/foo?archive=zip": the special 'archive' query string argument must be set to 'tgz' if present`, + }, + { + Given: "http://example.com/foo.tar.gz", + WantErr: `invalid remote source address "http://example.com/foo.tar.gz": source package addresses may not use unencrypted HTTP`, + }, + { + Given: "http::http://example.com/foo.tar.gz", + WantErr: `invalid remote source address "http::http://example.com/foo.tar.gz": don't specify redundant "http" source type for "http" URL`, + }, + { + Given: "https::https://example.com/foo.tar.gz", + WantErr: `invalid remote source address "https::https://example.com/foo.tar.gz": don't specify redundant "https" source type for "https" URL`, + }, + { + Given: "https://foo@example.com/foo.tgz", + WantErr: `invalid remote source address "https://foo@example.com/foo.tgz": must not use username or password in URL portion`, + }, + { + Given: "https://foo:bar@example.com/foo.tgz", + WantErr: `invalid remote source address "https://foo:bar@example.com/foo.tgz": must not use username or password in URL portion`, + }, + { + Given: "https://:bar@example.com/foo.tgz", + WantErr: `invalid remote source address "https://:bar@example.com/foo.tgz": must not use username or password in URL portion`, + }, + } + + for _, test := range tests { + t.Run(test.Given, func(t *testing.T) { + got, gotErr := ParseSource(test.Given) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} + +func TestResolveRelativeSource(t *testing.T) { + tests := []struct { + Base Source + Rel Source + Want Source + WantErr string + }{ + { + Base: MustParseSource("./a/b"), + Rel: MustParseSource("../c"), + Want: MustParseSource("./a/c"), + }, + { + Base: MustParseSource("./a"), + Rel: MustParseSource("../c"), + Want: MustParseSource("./c"), + }, + { + Base: MustParseSource("./a"), + Rel: MustParseSource("../../c"), + Want: MustParseSource("../c"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("git::https://example.com/foo.git"), + Want: MustParseSource("git::https://example.com/foo.git"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../bloop"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/bloop"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../.."), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git"), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop"), + Rel: MustParseSource("../../../baz"), + WantErr: `invalid traversal from git::https://github.com/hashicorp/go-slug.git//beep/boop: relative path ../../../baz traverses up too many levels from source path beep/boop`, + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git"), + Rel: MustParseSource("./boop"), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//boop"), + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s + %s", test.Base, test.Rel), func(t *testing.T) { + got, gotErr := ResolveRelativeSource(test.Base, test.Rel) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} + +func mustParseURL(s string) *url.URL { + ret, err := url.Parse(s) + if err != nil { + panic(err) + } + return ret +} diff --git a/sourceaddrs/subpath.go b/sourceaddrs/subpath.go index 4475cea..492cbfe 100644 --- a/sourceaddrs/subpath.go +++ b/sourceaddrs/subpath.go @@ -7,6 +7,17 @@ import ( "strings" ) +// ValidSubPath returns true if the given string is a valid sub-path string +// as could be included in either a remote or registry source address. +// +// A sub-path is valid if it's a slash-separated sequence of path segments +// without a leading or trailing slash and without any "." or ".." segments, +// since a sub-path can only traverse downwards from the root of a package. +func ValidSubPath(s string) bool { + _, err := normalizeSubpath(s) + return err == nil +} + // normalizeSubpath interprets the given string as a package "sub-path", // returning a normalized form of the path or an error if the string does // not use correct syntax. @@ -17,17 +28,21 @@ func normalizeSubpath(given string) (string, error) { return "", nil } - clean := path.Clean(given) - // Our definition of "sub-path" aligns with the definition used by Go's // virtual filesystem abstraction, since our "module package" idea // is also essentially just a virtual filesystem. // This definition prohibits "." and ".." segments and therefore prevents // upward path traversal. + if !fs.ValidPath(given) { + return "", fmt.Errorf("must be slash-separated relative path without any .. or . segments") + } + + clean := path.Clean(given) + // Go's path wrangling uses "." to represent "root directory", but // we represent that by omitting the subpath entirely, so we forbid that // too even though Go would consider it valid. - if clean == "." || !fs.ValidPath(clean) { + if clean == "." { return "", fmt.Errorf("must be slash-separated relative path without any .. or . segments") } @@ -96,3 +111,18 @@ func splitSubPath(src string) (string, string) { return src, subdir } + +func joinSubPath(subPath, rel string) (string, error) { + new := path.Join(subPath, rel) + if new == "." { + return "", nil // the root of the package + } + // If subPath was a valid sub-path (no "." or ".." segments) then the + // appearance of such segments in our result suggests that "rel" has + // too many upward traversals and would thus escape from its containing + // package. + if !fs.ValidPath(new) { + return "", fmt.Errorf("relative path %s traverses up too many levels from source path %s", rel, subPath) + } + return new, nil +} diff --git a/sourcebundle/builder.go b/sourcebundle/builder.go new file mode 100644 index 0000000..bc6ec68 --- /dev/null +++ b/sourcebundle/builder.go @@ -0,0 +1,681 @@ +package sourcebundle + +import ( + "context" + "encoding/base64" + "encoding/json" + "fmt" + "io/ioutil" + "os" + "path/filepath" + "sort" + "strings" + "sync" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/internal/ignorefiles" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" + "golang.org/x/mod/sumdb/dirhash" +) + +// Builder deals with the process of gathering source code +type Builder struct { + // targetDir is the base directory of the source bundle we're writing + // into. + targetDir string + + // fetcher is the package fetching callback we'll use to fetch remote + // packages into subdirectories of the bundle directory. + fetcher PackageFetcher + + // registryClient is the module registry client we'll use to resolve + // any module registry sources into their underlying remote package + // addresses which we can then fetch using "fetcher". + registryClient RegistryClient + + // pendingRemote is an unordered set of remote artifacts that we've + // discovered we need to analyze but have not yet done so. + pendingRemote []remoteArtifact + + // analyzed is a set of remote artifacts that we've already analyzed and + // thus already found the dependencies of. + analyzed map[remoteArtifact]struct{} + + // remotePackageDirs tracks the local directory name for each remote + // package we've already fetched. The keys of this map also serve as our + // memory of which packages we've already fetched and therefore don't need + // to fetch again if we find more source addresses in those packages. + // + // In our current implementation thse directory names are always checksums + // of the content of the package, and we rely on that when building a + // manifest file so if a future update changes the directory naming scheme + // then we'll need a different solution for tracking the checksums for + // use in the manifest file. For external callers the local directory + // naming scheme is always an implementation detail that they may not + // rely on. + remotePackageDirs map[sourceaddrs.RemotePackage]string + + // remotePackageMeta tracks the package metadata of each remote package + // we've fetched so far. This does not include any packages for which + // the fetcher returned no metadata. + remotePackageMeta map[sourceaddrs.RemotePackage]*PackageMeta + + // pendingRegistry is an unordered set of registry artifacts that need to + // be translated into remote artifacts before further processing. + pendingRegistry []registryArtifact + + // resolvedRegistry tracks the underlying remote source address for each + // selected version of each module registry package. + resolvedRegistry map[registryPackageVersion]sourceaddrs.RemoteSource + + // registryPackageVersions caches responses from module registry calls to + // look up the available versions for a particular module package. Although + // these could potentially change while we're running, we assume that the + // lifetime of a particular Builder is short enough for that not to + // matter. + registryPackageVersions map[regaddr.ModulePackage]versions.List + + mu sync.Mutex +} + +// NewBuilder creates a new builder that will construct a source bundle in the +// given target directory, which must already exist and be empty before any +// work begins. +// +// During the lifetime of a builder the target directory must not be modified +// or moved by anything other than the builder, including other concurrent +// processes running on the system. The target directory is not a valid source +// bundle until a call to [Builder.Close] returns successfully; the directory +// may be apepar in an inconsistent state while the builder is working. +func NewBuilder(targetDir string, fetcher PackageFetcher, registryClient RegistryClient) (*Builder, error) { + // We'll lock in our absolute path here just in case someone changes the + // process working directory out from under us for some reason. + absDir, err := filepath.Abs(targetDir) + if err != nil { + return nil, fmt.Errorf("invalid target directory: %w", err) + } + return &Builder{ + targetDir: absDir, + fetcher: fetcher, + registryClient: registryClient, + analyzed: make(map[remoteArtifact]struct{}), + remotePackageDirs: make(map[sourceaddrs.RemotePackage]string), + remotePackageMeta: make(map[sourceaddrs.RemotePackage]*PackageMeta), + resolvedRegistry: make(map[registryPackageVersion]sourceaddrs.RemoteSource), + registryPackageVersions: make(map[regaddr.ModulePackage]versions.List), + }, nil +} + +// AddRemoteSource incorporates the package containing the given remote source +// into the bundle, and then analyzes the source artifact for dependencies +// using the given dependency finder. +// +// If the returned diagnostics contains errors then the bundle is left in an +// inconsistent state and must not be used for any other calls. +func (b *Builder) AddRemoteSource(ctx context.Context, addr sourceaddrs.RemoteSource, depFinder DependencyFinder) Diagnostics { + if b.targetDir == "" { + // The builder has been closed, so cannot be modified further. + // This is always a bug in the caller, which should discard a builder + // as soon as it's been closed. + panic("AddRemoteSource on closed sourcebundle.Builder") + } + + af := remoteArtifact{addr, depFinder} + b.mu.Lock() + if _, exists := b.analyzed[af]; exists { + // Nothing further to do with this one, then. + // NOTE: This early check is just an optimization; b.resolvePending + // will re-check whether each queued item has already been analyzed + // anyway, so this just avoids growing b.pendingRemote if possible, + // since once something has become analyzed it never becomes + // "un-analyzed" again. + b.mu.Unlock() + return nil + } + b.pendingRemote = append(b.pendingRemote, af) + b.mu.Unlock() + + return b.resolvePending(ctx) +} + +// AddRegistrySource incorporates the registry metadata for the given address +// and the package associated with the latest version in allowedVersions +// into the bundle, and then analyzes the new artifact for dependencies +// using the given dependency finder. +// +// If the returned diagnostics contains errors then the bundle is left in an +// inconsistent state and must not be used for any other calls. +func (b *Builder) AddRegistrySource(ctx context.Context, addr sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) Diagnostics { + if b.targetDir == "" { + // The builder has been closed, so cannot be modified further. + // This is always a bug in the caller, which should discard a builder + // as soon as it's been closed. + panic("AddRegistrySource on closed sourcebundle.Builder") + } + + b.mu.Lock() + b.pendingRegistry = append(b.pendingRegistry, registryArtifact{addr, allowedVersions, depFinder}) + b.mu.Unlock() + + return b.resolvePending(ctx) +} + +// Close ensures that the target directory is in a valid and consistent state +// to be used as a source bundle and then returns an object providing the +// read-only API for that bundle. +// +// After calling Close the receiving builder becomes invalid and must not be +// used any further. +func (b *Builder) Close() (*Bundle, error) { + b.mu.Lock() + if b.targetDir == "" { + b.mu.Unlock() + panic("Close on already-closed sourcebundle.Builder") + } + baseDir := b.targetDir + b.targetDir = "" // makes the Add... methods panic when called, to avoid mutating the finalized bundle + b.mu.Unlock() + + // We need to freeze all of the metadata we've been tracking into the + // manifest file so that OpenDir can discover equivalent metadata itself + // when opening the finalized bundle. + err := b.writeManifest(filepath.Join(baseDir, manifestFilename)) + if err != nil { + return nil, fmt.Errorf("failed to generate source bundle manifest: %w", err) + } + + ret, err := OpenDir(baseDir) + if err != nil { + // If we get here then it suggests that we've left the bundle directory + // in an inconsistent state which therefore made OpenDir fail its + // early checks. + return nil, fmt.Errorf("failed to open bundle after Close: %w", err) + } + return ret, nil +} + +// resolvePending depletes the queues of pending source artifacts, making sure +// that everything required is present in the bundle directory, both directly +// and indirectly. +func (b *Builder) resolvePending(ctx context.Context) (diags Diagnostics) { + b.mu.Lock() + defer func() { + // If anything we do here generates any errors then the bundle + // directory is in an inconsistent state and must not be used + // any further. This will make all subsequent calls panic. + if diags.HasErrors() { + b.targetDir = "" + } + + b.mu.Unlock() + }() + + trace := buildTraceFromContext(ctx) + + // We'll just keep iterating until we've depleted our queues. + // Note that the order of operations isn't actually important here and + // so we're consuming the "queues" in LIFO order instead of FIFO order, + // since that is easier to model using a Go slice. + for len(b.pendingRemote) > 0 || len(b.pendingRegistry) > 0 { + // We'll consume items from the "registry" queue first because resolving + // this will contribute additional items to the "remote" queue. + for len(b.pendingRegistry) > 0 { + next, remain := b.pendingRegistry[len(b.pendingRegistry)-1], b.pendingRegistry[:len(b.pendingRegistry)-1] + b.pendingRegistry = remain + + realSource, err := b.findRegistryPackageSource(ctx, next.sourceAddr, next.versions) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Cannot resolve module registry package", + detail: fmt.Sprintf("Error resolving module registry source %s: %s.", next.sourceAddr, err), + }) + continue + } + + b.pendingRemote = append(b.pendingRemote, remoteArtifact{ + sourceAddr: realSource, + depFinder: next.depFinder, + }) + } + + // Now we'll consume items from the "remote" queue, which might have + // grown as a result of resolving some registry queue items. + for len(b.pendingRemote) > 0 { + next, remain := b.pendingRemote[len(b.pendingRemote)-1], b.pendingRemote[:len(b.pendingRemote)-1] + b.pendingRemote = remain + + pkgAddr := next.sourceAddr.Package() + pkgLocalDir, err := b.ensureRemotePackage(ctx, pkgAddr) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Cannot install source package", + detail: fmt.Sprintf("Error installing %s: %s.", next.sourceAddr.Package(), err), + }) + continue + } + + // localDirPath now refers to the local equivalent of whatever + // sub-path or sub-file the source address referred to, so we + // can ask the dependency finder to analyze it and possibly + // contribute more items to our queues. + artifact := remoteArtifact{ + sourceAddr: next.sourceAddr, + depFinder: next.depFinder, + } + if _, exists := b.analyzed[artifact]; !exists { + fsys := os.DirFS(filepath.Join(b.targetDir, pkgLocalDir)) + subPath := next.sourceAddr.SubPath() + depFinder := next.depFinder + + deps := Dependencies{ + baseAddr: next.sourceAddr, + + remoteCb: func(source sourceaddrs.RemoteSource, depFinder DependencyFinder) { + b.pendingRemote = append(b.pendingRemote, remoteArtifact{ + sourceAddr: source, + depFinder: depFinder, + }) + }, + registryCb: func(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) { + b.pendingRegistry = append(b.pendingRegistry, registryArtifact{ + sourceAddr: source, + versions: allowedVersions, + depFinder: depFinder, + }) + }, + localResolveErrCb: func(err error) { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid relative source address", + detail: fmt.Sprintf("Invalid relative path from %s: %s.", next.sourceAddr, err), + }) + }, + } + moreDiags := depFinder.FindDependencies(fsys, subPath, &deps) + deps.disable() + b.analyzed[artifact] = struct{}{} + if len(moreDiags) != 0 { + moreDiags = moreDiags.inRemoteSourcePackage(pkgAddr) + if cb := trace.Diagnostics; cb != nil { + cb(ctx, moreDiags) + } + } + diags = diags.Append(moreDiags) + if diags.HasErrors() { + continue + } + } + } + } + + return diags +} + +func (b *Builder) findRegistryPackageSource(ctx context.Context, sourceAddr sourceaddrs.RegistrySource, allowedVersions versions.Set) (sourceaddrs.RemoteSource, error) { + // NOTE: This expects to be called while b.mu is already locked. + + trace := buildTraceFromContext(ctx) + + pkgAddr := sourceAddr.Package() + availableVersions, ok := b.registryPackageVersions[pkgAddr] + if !ok { + var reqCtx context.Context + if cb := trace.RegistryPackageVersionsStart; cb != nil { + reqCtx = cb(ctx, pkgAddr) + } + if reqCtx == nil { + reqCtx = ctx + } + + vs, err := b.registryClient.ModulePackageVersions(reqCtx, pkgAddr) + if err != nil { + if cb := trace.RegistryPackageVersionsFailure; cb != nil { + cb(reqCtx, pkgAddr, err) + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("failed to query available versions for %s: %w", pkgAddr, err) + } + vs.Sort() + availableVersions = vs + b.registryPackageVersions[pkgAddr] = availableVersions + if cb := trace.RegistryPackageVersionsSuccess; cb != nil { + cb(reqCtx, pkgAddr, availableVersions) + } + } else { + if cb := trace.RegistryPackageVersionsAlready; cb != nil { + cb(ctx, pkgAddr, availableVersions) + } + } + + selectedVersion := availableVersions.NewestInSet(allowedVersions) + if selectedVersion == versions.Unspecified { + return sourceaddrs.RemoteSource{}, fmt.Errorf("no available version of %s matches the specified version constraint", pkgAddr) + } + + pkgVer := registryPackageVersion{ + pkg: pkgAddr, + version: selectedVersion, + } + realSourceAddr, ok := b.resolvedRegistry[pkgVer] + if !ok { + var reqCtx context.Context + if cb := trace.RegistryPackageSourceStart; cb != nil { + reqCtx = cb(ctx, pkgAddr, selectedVersion) + } + if reqCtx == nil { + reqCtx = ctx + } + + sa, err := b.registryClient.ModulePackageSourceAddr(reqCtx, pkgAddr, selectedVersion) + if err != nil { + if cb := trace.RegistryPackageSourceFailure; cb != nil { + cb(reqCtx, pkgAddr, selectedVersion, err) + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("failed to find real source address for %s %s: %w", pkgAddr, selectedVersion, err) + } + realSourceAddr = sa + b.resolvedRegistry[pkgVer] = realSourceAddr + if cb := trace.RegistryPackageSourceSuccess; cb != nil { + cb(reqCtx, pkgAddr, selectedVersion, realSourceAddr) + } + } else { + if cb := trace.RegistryPackageSourceAlready; cb != nil { + cb(ctx, pkgAddr, selectedVersion, realSourceAddr) + } + } + + // If our original source address had its own sub-path component then we + // need to combine that with the one in realSourceAddr to get the correct + // final path: the sourceAddr subpath is relative to the realSourceAddr + // subpath. + realSourceAddr = sourceAddr.FinalSourceAddr(realSourceAddr) + + return realSourceAddr, nil +} + +func (b *Builder) ensureRemotePackage(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) (localDir string, err error) { + // NOTE: This expects to be called while b.mu is already locked. + + trace := buildTraceFromContext(ctx) + + existingDir, ok := b.remotePackageDirs[pkgAddr] + if ok { + // We already have this package, so there's nothing more to do. + if cb := trace.RemotePackageDownloadAlready; cb != nil { + cb(ctx, pkgAddr) + } + return existingDir, nil + } + + var reqCtx context.Context + if cb := trace.RemotePackageDownloadStart; cb != nil { + reqCtx = cb(ctx, pkgAddr) + } + if reqCtx == nil { + reqCtx = ctx + } + defer func() { + if err == nil { + if cb := trace.RemotePackageDownloadSuccess; cb != nil { + cb(reqCtx, pkgAddr) + } + } else { + if cb := trace.RemotePackageDownloadFailure; cb != nil { + cb(reqCtx, pkgAddr, err) + } + } + }() + + // We'll eventually name our local directory after a checksum of its + // content, but we don't know its content yet so we'll use a temporary + // name while we work on getting it populated. + workDir, err := ioutil.TempDir(b.targetDir, ".tmp-") + if err != nil { + return "", fmt.Errorf("failed to create new package directory: %w", err) + } + + meta, err := b.fetcher.FetchSourcePackage(reqCtx, pkgAddr.SourceType(), pkgAddr.URL(), workDir) + if err != nil { + return "", fmt.Errorf("failed to fetch package: %w", err) + } + if meta != nil { + // We'll remember the meta so we can use it when building a manifest later. + b.remotePackageMeta[pkgAddr] = meta + } + + // If the package has a .terraformignore file then we now need to remove + // everything that we've been instructed to ignore. + ignoreRules, err := ignorefiles.LoadPackageIgnoreRules(workDir) + if err != nil { + return "", fmt.Errorf("invalid .terraformignore file: %w", err) + } + + // NOTE: The checks in packagePrepareWalkFn are safe only if we are sure + // that no other process is concurrently modifying our temporary directory. + // Source bundle building should only occur on hosts that are trusted by + // whoever will ultimately be using the generated bundle. + err = filepath.Walk(workDir, packagePrepareWalkFn(workDir, ignoreRules)) + if err != nil { + return "", fmt.Errorf("failed to prepare package directory: %#w", err) + } + + // If we got here then our tmpDir contains the final source code of a valid + // module package. We'll compute a hash of its contents so we can notice + // if it is identical to some other package we already installed, and then + // if not rename it into its final directory name. + // For this purpose we reuse the same directory tree hashing scheme that + // Go uses for its own modules, although that's an implementation detail + // subject to change in future versions: callers should always resolve + // paths through the source bundle's manifest rather than assuming a path. + // + // FIXME: We should implement our own thing similar to Go's dirhash but + // which can preserve file metadata at least to the level of detail that + // Git can, so that we can e.g. avoid coalescing two packages that differ + // only in whether a particular file is executable, or similar. + // + // We do currently _internally_ rely on the temporary directory being a + // hash when we build the final manifest for the bundle, so if you change + // this naming scheme you'll need to devise a new way for the manifest + // to learn about the checksum. External callers are forbidden from relying + // on it though, so you only have to worry about making the internals of + // this package self-consistent in how they deal with naming and hashes. + hash, err := dirhash.HashDir(workDir, "", dirhash.Hash1) + if err != nil { + return "", fmt.Errorf("failed to calculate package checksum: %w", err) + } + dirName := strings.TrimPrefix(hash, "h1:") + + // dirhash produces standard base64 encoding, but we need URL-friendly + // base64 encoding since we're using these as filenames. + rawChecksum, err := base64.StdEncoding.DecodeString(dirName) + if err != nil { + // Should not get here + return "", fmt.Errorf("package has invalid checksum: %w", err) + } + dirName = base64.RawURLEncoding.EncodeToString(rawChecksum) + + b.remotePackageDirs[pkgAddr] = dirName + + // We might already have a directory with the same hash if we have two + // different package addresses that happen to return the same source code. + // For example, this could happen if one Git source leaves ref unspecified + // and another explicitly specifies the main branch, therefore causing us + // to fetch the same source code in two different ways. If a directory + // already exists then we'll assume that it's suitable for this package + // and discard the temporary directory we've been working on here, thereby + // making the final bundle smaller. + finalDir := filepath.Join(b.targetDir, dirName) + if info, err := os.Lstat(finalDir); err == nil && info.IsDir() { + err := os.RemoveAll(workDir) + if err != nil { + return "", fmt.Errorf("failed to clean temporary directory: %w", err) + } + return dirName, nil + } + + // If a directory isn't already present then we'll now rename our + // temporary directory to its final name. + err = os.Rename(workDir, finalDir) + if err != nil { + return "", fmt.Errorf("failed to place final package directory: %w", err) + } + + return dirName, nil +} + +func (b *Builder) writeManifest(filename string) error { + var root manifestRoot + root.FormatVersion = 1 + + for pkgAddr, localDirName := range b.remotePackageDirs { + pkgMeta := b.remotePackageMeta[pkgAddr] + + manifestPkg := manifestRemotePackage{ + SourceAddr: pkgAddr.String(), + LocalDir: localDirName, + } + if pkgMeta != nil && pkgMeta.gitCommitID != "" { + manifestPkg.Meta.GitCommitID = pkgMeta.gitCommitID + } + + root.Packages = append(root.Packages, manifestPkg) + } + sort.Slice(root.Packages, func(i, j int) bool { + return root.Packages[i].SourceAddr < root.Packages[j].SourceAddr + }) + + registryObjs := make(map[regaddr.ModulePackage]*manifestRegistryMeta) + for rpv, sourceAddr := range b.resolvedRegistry { + manifestMeta, ok := registryObjs[rpv.pkg] + if !ok { + root.RegistryMeta = append(root.RegistryMeta, manifestRegistryMeta{ + SourceAddr: rpv.pkg.String(), + Versions: make(map[string]manifestRegistryVersion), + }) + manifestMeta = &root.RegistryMeta[len(root.RegistryMeta)-1] + registryObjs[rpv.pkg] = manifestMeta + } + manifestMeta.Versions[rpv.version.String()] = manifestRegistryVersion{ + SourceAddr: sourceAddr.String(), + } + } + sort.Slice(root.RegistryMeta, func(i, j int) bool { + return root.Packages[i].SourceAddr < root.Packages[j].SourceAddr + }) + + buf, err := json.MarshalIndent(&root, "", " ") + if err != nil { + return fmt.Errorf("failed to serialize to JSON: %#w", err) + } + err = os.WriteFile(filename, buf, 0664) + if err != nil { + return fmt.Errorf("failed to write file: %#w", err) + } + + return nil +} + +type remoteArtifact struct { + sourceAddr sourceaddrs.RemoteSource + depFinder DependencyFinder +} + +type registryArtifact struct { + sourceAddr sourceaddrs.RegistrySource + versions versions.Set + depFinder DependencyFinder +} + +type registryPackageVersion struct { + pkg regaddr.ModulePackage + version versions.Version +} + +func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepath.WalkFunc { + return func(absPath string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Get the relative path from the current src directory. + relPath, err := filepath.Rel(root, absPath) + if err != nil { + return fmt.Errorf("failed to get relative path for file %q: %w", absPath, err) + } + if relPath == "." { + return nil + } + + ignored, err := ignoreRules.Excludes(relPath) + if err != nil { + return fmt.Errorf("invalid .terraformignore rules: %#w", err) + } + if ignored { + err := os.RemoveAll(absPath) + if err != nil { + return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err) + } + return nil + } + + // For directories we also need to check with a path separator on the + // end, which ignores entire subtrees. + if info.IsDir() { + ignored, err := ignoreRules.Excludes(relPath + string(os.PathSeparator)) + if err != nil { + return fmt.Errorf("invalid .terraformignore rules: %#w", err) + } + if ignored { + err := os.RemoveAll(absPath) + if err != nil { + return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err) + } + return nil + } + } + + // If we get here then we have a file or directory that isn't + // covered by the ignore rules, but we still need to make sure it's + // valid for inclusion in a source bundle. + // We only allow regular files, directories, and symlinks to either + // of those as long as they are under the root directory prefix. + absRoot, err := filepath.Abs(root) + if err != nil { + return fmt.Errorf("failed to get absolute path for root directory %q: %w", root, err) + } + absRoot, err = filepath.EvalSymlinks(absRoot) + if err != nil { + return fmt.Errorf("failed to get absolute path for root directory %q: %w", root, err) + } + reAbsPath := filepath.Join(absRoot, relPath) + realPath, err := filepath.EvalSymlinks(reAbsPath) + if err != nil { + return fmt.Errorf("failed to get real path for sub-path %q: %w", relPath, err) + } + realPathRel, err := filepath.Rel(absRoot, realPath) + if err != nil { + return fmt.Errorf("failed to get real relative path for sub-path %q: %w", relPath, err) + } + + // After all of the above we can finally safely test whether the + // transformed path is "local", meaning that it only descends down + // from the real root. + if !filepath.IsLocal(realPathRel) { + return fmt.Errorf("module package path %q is symlink traversing out of the package root", relPath) + } + + // The real referent must also be either a regular file or a directory. + // (Not, for example, a Unix device node or socket or other such oddities.) + lInfo, err := os.Lstat(realPath) + if err != nil { + return fmt.Errorf("failed to stat %q: %w", realPath, err) + } + if !(lInfo.Mode().IsRegular() || lInfo.Mode().IsDir()) { + return fmt.Errorf("module package path %q is not a regular file or directory", relPath) + } + + return nil + } +} diff --git a/sourcebundle/builder_test.go b/sourcebundle/builder_test.go new file mode 100644 index 0000000..9735e1a --- /dev/null +++ b/sourcebundle/builder_test.go @@ -0,0 +1,948 @@ +package sourcebundle + +import ( + "bufio" + "context" + "errors" + "fmt" + "io" + "io/fs" + "net/url" + "os" + "path" + "path/filepath" + "strings" + "testing" + + "github.com/apparentlymart/go-versions/versions" + "github.com/apparentlymart/go-versions/versions/constraints" + "github.com/google/go-cmp/cmp" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +func TestBuilderSimple(t *testing.T) { + // This tests the common pattern of specifying a module registry address + // to start, having that translated into a real remote source address, + // and then downloading from that real source address. There are no + // oddities or edge-cases here. + + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/foo.tgz": "testdata/pkgs/hello", + }, + map[string]map[string]string{ + "example.com/foo/bar/baz": map[string]string{ + "1.0.0": "https://example.com/foo.tgz", + }, + }, + ) + + realSource := sourceaddrs.MustParseSource("https://example.com/foo.tgz").(sourceaddrs.RemoteSource) + regSource := sourceaddrs.MustParseSource("example.com/foo/bar/baz").(sourceaddrs.RegistrySource) + diags := builder.AddRegistrySource(ctx, regSource, versions.All, noDependencyFinder) + if len(diags) > 0 { + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start requesting versions for example.com/foo/bar/baz", + "success requesting versions for example.com/foo/bar/baz", + "start requesting source address for example.com/foo/bar/baz 1.0.0", + "source address for example.com/foo/bar/baz 1.0.0 is https://example.com/foo.tgz", + "start downloading https://example.com/foo.tgz", + "downloaded https://example.com/foo.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(realSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", realSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // Looking up the original registry address at the selected version + // should return the same directory, because the registry address is just + // an indirection over the same source address. + registryPkgDir, err := bundle.LocalPathForRegistrySource(regSource, versions.MustParseVersion("1.0.0")) + if err != nil { + t.Fatalf("builder does not know a local directory for %s: %s", regSource.Package(), err) + } + if registryPkgDir != localPkgDir { + t.Errorf("local dir for %s doesn't match local dir for %s", regSource, realSource) + } +} + +func TestBuilderSubdirs(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/subdirs.tgz": "testdata/pkgs/subdirs", + }, + map[string]map[string]string{ + "example.com/foo/bar/baz": map[string]string{ + // NOTE: The registry response points to a sub-directory of + // this package, not to the root of the package. + "1.0.0": "https://example.com/subdirs.tgz//a", + }, + }, + ) + + // NOTE: We're asking for subdir "b" of the registry address. That combines + // with the registry's own "b" subdir to produce "a/b" as the final + // subdirectory path. + regSource := sourceaddrs.MustParseSource("example.com/foo/bar/baz//b").(sourceaddrs.RegistrySource) + realSource := sourceaddrs.MustParseSource("https://example.com/subdirs.tgz//a/b").(sourceaddrs.RemoteSource) + diags := builder.AddRegistrySource(ctx, regSource, versions.All, noDependencyFinder) + if len(diags) > 0 { + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start requesting versions for example.com/foo/bar/baz", + "success requesting versions for example.com/foo/bar/baz", + "start requesting source address for example.com/foo/bar/baz 1.0.0", + "source address for example.com/foo/bar/baz 1.0.0 is https://example.com/subdirs.tgz//a", + "start downloading https://example.com/subdirs.tgz", + "downloaded https://example.com/subdirs.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(realSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", realSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "beepbeep")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // Looking up the original registry address at the selected version + // should return the same directory, because the registry address is just + // an indirection over the same source address. + registryPkgDir, err := bundle.LocalPathForRegistrySource(regSource, versions.MustParseVersion("1.0.0")) + if err != nil { + t.Fatalf("builder does not know a local directory for %s: %s", regSource.Package(), err) + } + if registryPkgDir != localPkgDir { + t.Errorf("local dir for %s doesn't match local dir for %s", regSource, realSource) + } +} + +func TestBuilderRemoteDeps(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/with-deps.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/with-deps.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{filename: "dependencies"}) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/with-deps.tgz", + "downloaded https://example.com/with-deps.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) +} + +func TestBuilderRemoteDepsDifferingTypes(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/self_dependency.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/self_dependency.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{ + filename: "self_dependency", + nextFilename: "dependencies", + }) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/self_dependency.tgz", + "downloaded https://example.com/self_dependency.tgz", + "reusing existing local copy of https://example.com/self_dependency.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) +} + +func TestBuilderTerraformIgnore(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/ignore.tgz": "testdata/pkgs/terraformignore", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/ignore.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, noDependencyFinder) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/ignore.tgz", + "downloaded https://example.com/ignore.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "included")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + if _, err := os.Lstat(filepath.Join(localPkgDir, "excluded")); err == nil { + t.Errorf("excluded file exists; should have been removed") + } else if !errors.Is(err, fs.ErrNotExist) { + t.Errorf("excluded file exists but is not readable; should have been removed altogether") + } +} + +func TestBuilderCoalescePackages(t *testing.T) { + tracer := testBuildTracer{} + ctx := tracer.OnContext(context.Background()) + + targetDir := t.TempDir() + builder := testingBuilder( + t, targetDir, + map[string]string{ + "https://example.com/with-deps.tgz": "testdata/pkgs/with-remote-deps", + "https://example.com/dependency1.tgz": "testdata/pkgs/hello", + "https://example.com/dependency2.tgz": "testdata/pkgs/hello", + }, + nil, + ) + + startSource := sourceaddrs.MustParseSource("https://example.com/with-deps.tgz").(sourceaddrs.RemoteSource) + dep1Source := sourceaddrs.MustParseSource("https://example.com/dependency1.tgz").(sourceaddrs.RemoteSource) + dep2Source := sourceaddrs.MustParseSource("https://example.com/dependency2.tgz").(sourceaddrs.RemoteSource) + diags := builder.AddRemoteSource(ctx, startSource, stubDependencyFinder{filename: "dependencies"}) + if len(diags) > 0 { + for _, diag := range diags { + t.Errorf("unexpected diagnostic\nSummary: %s\nDetail: %s", diag.Description().Summary, diag.Description().Detail) + } + t.Fatal("unexpected diagnostics") + } + + wantLog := []string{ + "start downloading https://example.com/with-deps.tgz", + "downloaded https://example.com/with-deps.tgz", + + // NOTE: The exact ordering of these two pairs is an implementation + // detail of Builder: it consumes its "queues" in LIFO order. If you've + // changed that implementation to a different order then it's expected + // for this to mismatch and you can just reorder these as long as + // all of the same events appear in any sensible order. Callers are + // not allowed to depend on the relative ordering of events relating + // to different packages. + "start downloading https://example.com/dependency2.tgz", + "downloaded https://example.com/dependency2.tgz", + "start downloading https://example.com/dependency1.tgz", + "downloaded https://example.com/dependency1.tgz", + } + gotLog := tracer.log + if diff := cmp.Diff(wantLog, gotLog); diff != "" { + t.Errorf("wrong trace events\n%s", diff) + } + + bundle, err := builder.Close() + if err != nil { + t.Fatalf("failed to close bundle: %s", err) + } + + t.Run("starting package", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(startSource) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", startSource.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "dependencies")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 1", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep1Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + }) + t.Run("dependency 2", func(t *testing.T) { + localPkgDir, err := bundle.LocalPathForRemoteSource(dep2Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s: %s", dep2Source.Package(), err) + } + + if info, err := os.Lstat(filepath.Join(localPkgDir, "hello")); err != nil { + t.Errorf("problem with output file: %s", err) + } else if !info.Mode().IsRegular() { + t.Errorf("output file is not a regular file") + } + + // The package directory for dependency 2 should be the same as for + // dependency 1 because they both have identical content, despite + // having different source addresses. + otherLocalPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + if err != nil { + for pkgAddr, localDir := range builder.remotePackageDirs { + t.Logf("contents of %s are in %s", pkgAddr, localDir) + } + t.Fatalf("builder does not know a local directory for %s", dep1Source.Package()) + } + if otherLocalPkgDir != localPkgDir { + t.Errorf("'hello' packages were not coalesced\ndep1 path: %s\ndep2 path: %s", otherLocalPkgDir, localPkgDir) + } + }) +} + +func testingBuilder(t *testing.T, targetDir string, remotePackages map[string]string, registryPackages map[string]map[string]string) *Builder { + t.Helper() + + type fakeRemotePackage struct { + sourceType string + url *url.URL + localDir string + } + type fakeRegistryPackage struct { + pkgAddr regaddr.ModulePackage + versions map[versions.Version]sourceaddrs.RemoteSource + } + + remotePkgs := make([]fakeRemotePackage, 0, len(remotePackages)) + registryPkgs := make([]fakeRegistryPackage, 0, len(registryPackages)) + + for pkgAddrRaw, localDir := range remotePackages { + pkgAddr, err := sourceaddrs.ParseRemotePackage(pkgAddrRaw) + if err != nil { + t.Fatalf("invalid remote package address %q: %s", pkgAddrRaw, err) + } + remotePkgs = append(remotePkgs, fakeRemotePackage{ + sourceType: pkgAddr.SourceType(), + url: pkgAddr.URL(), + localDir: localDir, + }) + } + + for pkgAddrRaw, versionsRaw := range registryPackages { + pkgAddr, err := sourceaddrs.ParseRegistryPackage(pkgAddrRaw) + if err != nil { + t.Fatalf("invalid registry package address %q: %s", pkgAddrRaw, err) + } + pkg := fakeRegistryPackage{ + pkgAddr: pkgAddr, + versions: make(map[versions.Version]sourceaddrs.RemoteSource), + } + for versionRaw, sourceAddrRaw := range versionsRaw { + version, err := versions.ParseVersion(versionRaw) + if err != nil { + t.Fatalf("invalid registry package version %q for %s: %s", versionRaw, pkgAddr, err) + } + sourceAddr, err := sourceaddrs.ParseRemoteSource(sourceAddrRaw) + if err != nil { + t.Fatalf("invalid registry package source address %q for %s %s: %s", sourceAddrRaw, pkgAddr, version, err) + } + pkg.versions[version] = sourceAddr + } + registryPkgs = append(registryPkgs, pkg) + } + + fetcher := packageFetcherFunc(func(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) { + // Our fake implementation of "fetching" is to just copy one local + // directory into another. + for _, pkg := range remotePkgs { + if pkg.sourceType != sourceType { + continue + } + if pkg.url.String() != url.String() { + continue + } + localDir := pkg.localDir + err := copyDir(targetDir, localDir) + if err != nil { + return nil, fmt.Errorf("copying %s to %s: %w", localDir, targetDir, err) + } + return nil, nil + } + return nil, fmt.Errorf("no fake remote package matches %s %s", sourceType, url) + }) + + registryClient := registryClientFuncs{ + modulePackageVersions: func(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) { + for _, pkg := range registryPkgs { + if pkg.pkgAddr != pkgAddr { + continue + } + ret := make(versions.List, len(pkg.versions)) + for version := range pkg.versions { + ret = append(ret, version) + } + return ret, nil + } + return nil, fmt.Errorf("no fake registry package matches %s", pkgAddr) + }, + modulePackageSourceAddr: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) { + for _, pkg := range registryPkgs { + if pkg.pkgAddr != pkgAddr { + continue + } + sourceAddr, ok := pkg.versions[version] + if !ok { + return sourceaddrs.RemoteSource{}, fmt.Errorf("no fake registry package matches %s %s", pkgAddr, version) + } + return sourceAddr, nil + } + return sourceaddrs.RemoteSource{}, fmt.Errorf("no fake registry package matches %s", pkgAddr) + }, + } + + builder, err := NewBuilder(targetDir, fetcher, registryClient) + if err != nil { + t.Fatalf("failed to create builder: %s", err) + } + return builder +} + +// testBuildTracer is a BuildTracer that just remembers calls in memory +// as strings, for relatively-easy comparison in tests. +type testBuildTracer struct { + log []string +} + +func (t *testBuildTracer) OnContext(ctx context.Context) context.Context { + trace := BuildTracer{ + RegistryPackageVersionsStart: func(ctx context.Context, pkgAddr regaddr.ModulePackage) context.Context { + t.appendLogf("start requesting versions for %s", pkgAddr) + return ctx + }, + RegistryPackageVersionsSuccess: func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) { + t.appendLogf("success requesting versions for %s", pkgAddr) + }, + RegistryPackageVersionsFailure: func(ctx context.Context, pkgAddr regaddr.ModulePackage, err error) { + t.appendLogf("error requesting versions for %s: %s", pkgAddr, err) + }, + RegistryPackageVersionsAlready: func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) { + t.appendLogf("reusing existing versions for %s", pkgAddr) + }, + + RegistryPackageSourceStart: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) context.Context { + t.appendLogf("start requesting source address for %s %s", pkgAddr, version) + return ctx + }, + RegistryPackageSourceSuccess: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) { + t.appendLogf("source address for %s %s is %s", pkgAddr, version, sourceAddr) + }, + RegistryPackageSourceFailure: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, err error) { + t.appendLogf("error requesting source address for %s %s: %s", pkgAddr, version, err) + }, + RegistryPackageSourceAlready: func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) { + t.appendLogf("reusing existing source address for %s %s: %s", pkgAddr, version, sourceAddr) + }, + + RemotePackageDownloadStart: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) context.Context { + t.appendLogf("start downloading %s", pkgAddr) + return ctx + }, + RemotePackageDownloadSuccess: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) { + t.appendLogf("downloaded %s", pkgAddr) + }, + RemotePackageDownloadFailure: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage, err error) { + t.appendLogf("failed to download %s: %s", pkgAddr, err) + }, + RemotePackageDownloadAlready: func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) { + t.appendLogf("reusing existing local copy of %s", pkgAddr) + }, + + Diagnostics: func(ctx context.Context, diags Diagnostics) { + for _, diag := range diags { + switch diag.Severity() { + case DiagError: + t.appendLogf("Error: %s", diag.Description().Summary) + case DiagWarning: + t.appendLogf("Warning: %s", diag.Description().Summary) + default: + t.appendLogf("Diagnostic with invalid severity: %s", diag.Description().Summary) + } + } + }, + } + return trace.OnContext(ctx) +} + +func (t *testBuildTracer) appendLogf(f string, v ...interface{}) { + t.log = append(t.log, fmt.Sprintf(f, v...)) +} + +type packageFetcherFunc func(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) + +func (f packageFetcherFunc) FetchSourcePackage(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) { + return f(ctx, sourceType, url, targetDir) +} + +type registryClientFuncs struct { + modulePackageVersions func(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) + modulePackageSourceAddr func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) +} + +func (f registryClientFuncs) ModulePackageVersions(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) { + return f.modulePackageVersions(ctx, pkgAddr) +} + +func (f registryClientFuncs) ModulePackageSourceAddr(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) { + return f.modulePackageSourceAddr(ctx, pkgAddr, version) +} + +type noopDependencyFinder struct{} + +func (f noopDependencyFinder) FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics { + return nil +} + +var noDependencyFinder = noopDependencyFinder{} + +// stubDependencyFinder is a test-only [DependencyFinder] which just reads +// lines of text from a given filename and tries to treat each one as a source +// address, which it then reports as a dependency. +type stubDependencyFinder struct { + filename string + nextFilename string +} + +func (f stubDependencyFinder) FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics { + var diags Diagnostics + filePath := path.Join(subPath, f.filename) + file, err := fsys.Open(filePath) + if err != nil { + if errors.Is(err, fs.ErrNotExist) { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Missing stub dependency file", + detail: fmt.Sprintf("There is no file %q in the package.", filePath), + }) + } else { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid stub dependency file", + detail: fmt.Sprintf("Cannot open %q in the package: %s.", filePath, err), + }) + } + return diags + } + + sc := bufio.NewScanner(file) // defaults to scanning for lines + for sc.Scan() { + line := strings.TrimSpace(sc.Text()) + if line == "" || strings.HasPrefix(line, "#") { + continue + } + sourceAddrRaw, versionsRaw, hasVersions := strings.Cut(line, " ") + sourceAddr, err := sourceaddrs.ParseSource(sourceAddrRaw) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid source address in stub dependency file", + detail: fmt.Sprintf("Cannot use %q as a source address: %s.", sourceAddrRaw, err), + }) + continue + } + if hasVersions && !sourceAddr.SupportsVersionConstraints() { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid source address in stub dependency file", + detail: fmt.Sprintf("Cannot specify a version constraint string for %s.", sourceAddr), + }) + continue + } + var allowedVersions versions.Set + if hasVersions { + cnsts, err := constraints.ParseRubyStyleMulti(versionsRaw) + if err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid version constraints in stub dependency file", + detail: fmt.Sprintf("Cannot use %q as version constraints for %s: %s.", versionsRaw, sourceAddrRaw, err), + }) + continue + } + allowedVersions = versions.MeetingConstraints(cnsts) + } else { + allowedVersions = versions.All + } + + depFinder := DependencyFinder(noDependencyFinder) + if f.nextFilename != "" { + // If a next filename is specified then we're chaining to another + // dependency file for all of the discovered dependencies. + depFinder = stubDependencyFinder{filename: f.nextFilename} + } + + switch sourceAddr := sourceAddr.(type) { + case sourceaddrs.RemoteSource: + deps.AddRemoteSource(sourceAddr, depFinder) + case sourceaddrs.RegistrySource: + deps.AddRegistrySource(sourceAddr, allowedVersions, depFinder) + case sourceaddrs.LocalSource: + deps.AddLocalSource(sourceAddr, depFinder) + default: + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Unsupported source address type", + detail: fmt.Sprintf("stubDependencyFinder doesn't support %T addresses", sourceAddr), + }) + continue + } + } + if err := sc.Err(); err != nil { + diags = diags.Append(&internalDiagnostic{ + severity: DiagError, + summary: "Invalid stub dependency file", + detail: fmt.Sprintf("Failed to read %s in the package: %s.", filePath, err), + }) + return diags + } + + return diags +} + +func copyDir(dst, src string) error { + src, err := filepath.EvalSymlinks(src) + if err != nil { + return err + } + + walkFn := func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + if path == src { + return nil + } + + // The "path" has the src prefixed to it. We need to join our + // destination with the path without the src on it. + dstPath := filepath.Join(dst, path[len(src):]) + + // we don't want to try and copy the same file over itself. + if eq, err := sameFile(path, dstPath); eq { + return nil + } else if err != nil { + return err + } + + // If we have a directory, make that subdirectory, then continue + // the walk. + if info.IsDir() { + if path == filepath.Join(src, dst) { + // dst is in src; don't walk it. + return nil + } + + if err := os.MkdirAll(dstPath, 0755); err != nil { + return err + } + + return nil + } + + // If the current path is a symlink, recreate the symlink relative to + // the dst directory + if info.Mode()&os.ModeSymlink == os.ModeSymlink { + target, err := os.Readlink(path) + if err != nil { + return err + } + + return os.Symlink(target, dstPath) + } + + // If we have a file, copy the contents. + srcF, err := os.Open(path) + if err != nil { + return err + } + defer srcF.Close() + + dstF, err := os.Create(dstPath) + if err != nil { + return err + } + defer dstF.Close() + + if _, err := io.Copy(dstF, srcF); err != nil { + return err + } + + // Chmod it + return os.Chmod(dstPath, info.Mode()) + } + + return filepath.Walk(src, walkFn) +} + +func sameFile(a, b string) (bool, error) { + if a == b { + return true, nil + } + + aInfo, err := os.Lstat(a) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + + bInfo, err := os.Lstat(b) + if err != nil { + if os.IsNotExist(err) { + return false, nil + } + return false, err + } + + return os.SameFile(aInfo, bInfo), nil +} diff --git a/sourcebundle/bundle.go b/sourcebundle/bundle.go new file mode 100644 index 0000000..166f525 --- /dev/null +++ b/sourcebundle/bundle.go @@ -0,0 +1,364 @@ +package sourcebundle + +import ( + "crypto/sha256" + "encoding/hex" + "encoding/json" + "fmt" + "io" + "io/fs" + "os" + "path" + "path/filepath" + "sort" + "strings" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +const manifestFilename = "terraform-sources.json" + +type Bundle struct { + rootDir string + + manifestChecksum string + + remotePackageDirs map[sourceaddrs.RemotePackage]string + remotePackageMeta map[sourceaddrs.RemotePackage]*PackageMeta + + registryPackageSources map[regaddr.ModulePackage]map[versions.Version]sourceaddrs.RemoteSource +} + +// OpenDir opens a bundle rooted at the given base directory. +// +// If OpenDir succeeds then nothing else (inside or outside the calling program) +// may modify anything under the given base directory for the lifetime of +// the returned [Bundle] object. If the bundle directory is modified while the +// object is still alive then behavior is undefined. +func OpenDir(baseDir string) (*Bundle, error) { + // We'll take the absolute form of the directory to be resilient in case + // something else in this program rudely changes the current working + // directory while the bundle is still alive. + rootDir, err := filepath.Abs(baseDir) + if err != nil { + return nil, fmt.Errorf("cannot resolve base directory: %w", err) + } + + ret := &Bundle{ + rootDir: rootDir, + remotePackageDirs: make(map[sourceaddrs.RemotePackage]string), + remotePackageMeta: make(map[sourceaddrs.RemotePackage]*PackageMeta), + registryPackageSources: make(map[regaddr.ModulePackage]map[versions.Version]sourceaddrs.RemoteSource), + } + + manifestSrc, err := os.ReadFile(filepath.Join(rootDir, manifestFilename)) + if err != nil { + return nil, fmt.Errorf("cannot read manifest: %w", err) + } + + hash := sha256.New() + ret.manifestChecksum = hex.EncodeToString(hash.Sum(manifestSrc)) + + var manifest manifestRoot + err = json.Unmarshal(manifestSrc, &manifest) + if err != nil { + return nil, fmt.Errorf("invalid manifest: %w", err) + } + if manifest.FormatVersion != 1 { + return nil, fmt.Errorf("invalid manifest: unsupported format version %d", manifest.FormatVersion) + } + + for _, rpm := range manifest.Packages { + // We'll be quite fussy about the local directory name to avoid a + // crafted manifest sending us to other random places in the filesystem. + // It must be just a single directory name, without any path separators + // or any traversals. + localDir := filepath.ToSlash(rpm.LocalDir) + if !fs.ValidPath(localDir) || localDir == "." || strings.IndexByte(localDir, '/') >= 0 { + return nil, fmt.Errorf("invalid package directory name %q", rpm.LocalDir) + } + + pkgAddr, err := sourceaddrs.ParseRemotePackage(rpm.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid remote package address %q: %w", rpm.SourceAddr, err) + } + ret.remotePackageDirs[pkgAddr] = localDir + + if rpm.Meta.GitCommitID != "" { + ret.remotePackageMeta[pkgAddr] = PackageMetaWithGitCommit(rpm.Meta.GitCommitID) + } + } + + for _, rpm := range manifest.RegistryMeta { + pkgAddr, err := sourceaddrs.ParseRegistryPackage(rpm.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid registry package address %q: %w", rpm.SourceAddr, err) + } + vs := ret.registryPackageSources[pkgAddr] + if vs == nil { + vs = make(map[versions.Version]sourceaddrs.RemoteSource) + ret.registryPackageSources[pkgAddr] = vs + } + for versionStr, mv := range rpm.Versions { + version, err := versions.ParseVersion(versionStr) + if err != nil { + return nil, fmt.Errorf("invalid registry package version %q: %w", versionStr, err) + } + sourceAddr, err := sourceaddrs.ParseRemoteSource(mv.SourceAddr) + if err != nil { + return nil, fmt.Errorf("invalid registry package source address %q: %w", mv.SourceAddr, err) + } + vs[version] = sourceAddr + } + } + + return ret, nil +} + +// LocalPathForRemoteSource returns the local path within the bundle that +// corresponds with the given source address, or an error if the source address +// is within a source package not included in the bundle. +func (b *Bundle) LocalPathForRemoteSource(addr sourceaddrs.RemoteSource) (string, error) { + pkgAddr := addr.Package() + localName, ok := b.remotePackageDirs[pkgAddr] + if !ok { + return "", fmt.Errorf("source bundle does not include %s", pkgAddr) + } + subPath := filepath.FromSlash(addr.SubPath()) + return filepath.Join(b.rootDir, localName, subPath), nil +} + +// LocalPathForRegistrySource returns the local path within the bundle that +// corresponds with the given registry address and version, or an error if the +// source address is within a source package not included in the bundle. +// +// A source bundle does not have any direct representation of local source +// addresses -- they are always relative to a location in a remote source +// package -- so this function will always fail when given a local source +// address. +func (b *Bundle) LocalPathForRegistrySource(addr sourceaddrs.RegistrySource, version versions.Version) (string, error) { + pkgAddr := addr.Package() + vs, ok := b.registryPackageSources[pkgAddr] + if !ok { + return "", fmt.Errorf("source bundle does not include %s", pkgAddr) + } + baseSourceAddr, ok := vs[version] + if !ok { + return "", fmt.Errorf("source bundle does not include %s v%s", pkgAddr, version) + } + + // The address we were given might have its own source address, so we need + // to incorporate that into our result. + finalSourceAddr := addr.FinalSourceAddr(baseSourceAddr) + return b.LocalPathForRemoteSource(finalSourceAddr) +} + +// SourceForLocalPath is the inverse of [Bundle.LocalPathForSource], +// translating a local path beneath the bundle's base directory back into +// a source address that it's a snapshot of. +// +// Returns an error if the given directory is not within the bundle's base +// directory, or is not within one of the subdirectories of the bundle +// that represents a source package. A caller using this to present more +// user-friendly file paths in error messages etc could reasonably choose +// to just retain the source string if this function returns an error, and +// not show the error to the user. +// +// The [Bundle] implementation is optimized for forward lookups from source +// address to local path rather than the other way around, so this function +// may be considerably more expensive than the forward lookup and is intended +// primarily for reporting friendly source locations in diagnostic messages +// instead of exposing the opaque internal directory names from the source +// bundle. This function should not typically be used in performance-sensitive +// portions of the happy path. +func (b *Bundle) SourceForLocalPath(p string) (sourceaddrs.Source, error) { + // This implementation is a best effort sort of thing, and might not + // always succeed in awkward cases. + + // We'll start by making our path absolute because that'll make it + // more comparable with b.rootDir, which is also absolute. + absPath, err := filepath.Abs(p) + if err != nil { + return nil, fmt.Errorf("can't determine absolute path for %q: %w", p, err) + } + + // Now we'll reinterpret the path as relative to our base directory, + // so we can see what local directory name it starts with. + relPath, err := filepath.Rel(b.rootDir, absPath) + if err != nil { + // If the path can't be made relative then that suggests it's on a + // different volume, such as a different drive letter on Windows. + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + // We'll do all of our remaining work in the abstract "forward-slash-path" + // mode, matching how we represent "sub-paths" for our source addresses. + subPath := path.Clean(filepath.ToSlash(relPath)) + if !fs.ValidPath(subPath) || subPath == "." { + // If the path isn't "valid" by now then that suggests it's a + // path outside of our source bundle which would appear as a + // path with a ".." segment on the front, or to the root of + // our source bundle which would appear as "." and isn't part + // of any particular package. + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + // If all of the above passed then we should now have one or more + // slash-separated path segments. The first one should be one of the + // local directories we know from our manifest, and then the rest is + // the sub-path in the associated package. + localDir, subPath, _ := strings.Cut(subPath, "/") + + // There can be potentially several packages all referring to the same + // directory, so to make the result deterministic we'll just take the + // one whose stringified source address is shortest. + var pkgAddr sourceaddrs.RemotePackage + found := false + for candidateAddr, candidateDir := range b.remotePackageDirs { + if candidateDir != localDir { + continue + } + if found { + // We've found multiple possible source addresses, so we + // need to decide which one to keep. + if len(candidateAddr.String()) > len(pkgAddr.String()) { + continue + } + } + pkgAddr = candidateAddr + found = true + } + + if !found { + return nil, fmt.Errorf("path %q does not belong to the source bundle", absPath) + } + + return pkgAddr.SourceAddr(subPath), nil +} + +// ChecksumV1 returns a checksum of the contents of the source bundle that +// can be used to determine if another source bundle is equivalent to this one. +// +// "Equivalent" means that it contains all of the same source packages with +// identical content each. +// +// A successful result is a string with the prefix "h1:" to indicate that +// it was built with checksum algorithm version 1. Future versions may +// introduce other checksum formats. +func (b *Bundle) ChecksumV1() (string, error) { + // Our first checksum format assumes that the checksum of the manifest + // is sufficient to cover the entire archive, which in turn assumes that + // the builder either directly or indirectly encodes the checksum of + // each package into the manifest. For the initial implementation of + // Builder we achieve that by using the checksum as the directory name + // for each package, which avoids the need to redundantly store the + // checksum again. If a future Builder implementation moves away from + // using checksums as directory names then the builder will need to + // introduce explicit checksums as a separate property into the manifest + // in order to preserve our assumptions here. + return "h1:" + b.manifestChecksum, nil +} + +// RemotePackages returns a slice of all of the remote source packages that +// contributed to this source bundle. +// +// The result is sorted into a consistent but unspecified order. +func (b *Bundle) RemotePackages() []sourceaddrs.RemotePackage { + ret := make([]sourceaddrs.RemotePackage, 0, len(b.remotePackageDirs)) + for pkgAddr := range b.remotePackageDirs { + ret = append(ret, pkgAddr) + } + sort.Slice(ret, func(i, j int) bool { + return ret[i].String() < ret[j].String() + }) + return ret +} + +// RemotePackageMeta returns the package metadata for the given package address, +// or nil if there is no metadata for that package tracked in the bundle. +func (b *Bundle) RemotePackageMeta(pkgAddr sourceaddrs.RemotePackage) *PackageMeta { + return b.remotePackageMeta[pkgAddr] +} + +// RegistryPackages returns a list of all of the distinct registry packages +// that contributed to this bundle. +// +// The result is in a consistent but unspecified sorted order. +func (b *Bundle) RegistryPackages() []regaddr.ModulePackage { + ret := make([]regaddr.ModulePackage, 0, len(b.remotePackageDirs)) + for pkgAddr := range b.registryPackageSources { + ret = append(ret, pkgAddr) + } + sort.Slice(ret, func(i, j int) bool { + return ret[i].String() < ret[j].String() + }) + return ret +} + +// RegistryPackageVersions returns a list of all of the versions of the given +// module registry package that this bundle has package content for. +// +// This result can be used as a substitute for asking the remote registry which +// versions are available in any situation where a caller is interested only +// in what's bundled, and will not consider installing anything new from +// the origin registry. +// +// The result is guaranteed to be sorted with lower-precedence version numbers +// placed earlier in the list. +func (b *Bundle) RegistryPackageVersions(pkgAddr regaddr.ModulePackage) versions.List { + vs := b.registryPackageSources[pkgAddr] + if len(vs) == 0 { + return nil + } + ret := make(versions.List, 0, len(vs)) + for v := range vs { + ret = append(ret, v) + } + ret.Sort() + return ret +} + +// RegistryPackageSourceAddr returns the remote source address corresponding +// to the given version of the given module package, or sets its second return +// value to false if no such version is included in the bundle. +func (b *Bundle) RegistryPackageSourceAddr(pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, bool) { + sourceAddr, ok := b.registryPackageSources[pkgAddr][version] + return sourceAddr, ok +} + +// WriteArchive writes a source bundle archive containing the same contents +// as the bundle to the given writer. +// +// A source bundle archive is a gzip-compressed tar stream that can then +// be extracted in some other location to produce an equivalent source +// bundle directory. +func (b *Bundle) WriteArchive(w io.Writer) error { + // For this part we just delegate to the main slug packer, since a + // source bundle archive is effectively just a slug with multiple packages + // (and a manifest) inside it. + packer, err := slug.NewPacker(slug.DereferenceSymlinks()) + if err != nil { + return fmt.Errorf("can't instantiate archive packer: %w", err) + } + _, err = packer.Pack(b.rootDir, w) + return err +} + +// ExtractArchive reads a source bundle archive from the given reader and +// extracts it into the given target directory, which must already exist and +// must be empty. +// +// If successful, it returns a [Bundle] value representing the created bundle, +// as if the given target directory were passed to [OpenDir]. +func ExtractArchive(r io.Reader, targetDir string) (*Bundle, error) { + // A bundle archive is just a slug archive created over a bundle + // directory, so we can use the normal unpack function to deal with it. + err := slug.Unpack(r, targetDir) + if err != nil { + return nil, err + } + return OpenDir(targetDir) +} diff --git a/sourcebundle/dependency_finder.go b/sourcebundle/dependency_finder.go new file mode 100644 index 0000000..4acdbab --- /dev/null +++ b/sourcebundle/dependency_finder.go @@ -0,0 +1,92 @@ +package sourcebundle + +import ( + "io/fs" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" +) + +// A DependencyFinder analyzes a file or directory inside a source package +// and reports any dependencies described in that location. +// +// The same location could potentially be analyzed by multiple different +// DependencyFinder implementations if e.g. it's a directory containing +// a mixture of different kinds of artifact where each artifact has a +// disjoint set of relevant files. +// +// All DependencyFinder implementations must be comparable in the sense of +// supporting the == operator without panicking, and should typically be +// singletons, because [Builder] will use values of this type as part of +// the unique key for tracking whether a particular dependency has already +// been analyzed. A typical DependencyFinder implementation is an empty +// struct type with the FindDependency method implemented on it. +type DependencyFinder interface { + // FindDependencies should analyze the file or directory at the given + // sub-path of the given filesystem and then call the given callback + // once for each detected dependency, providing both its source + // address and the appropriate [DependencyFinder] for whatever kind + // of source artifact is expected at that source address. + // + // The same source address can potentially contain artifacts of multiple + // different types. The calling [Builder] will visit each distinct + // (source, finder) pair only once for analysis, and will also aim to + // avoid redundantly re-fetching the same source package more than once. + // + // If an implementer sends a local source address to the callback function, + // the calling [Builder] will automatically resolve that relative to + // the source address being analyzed. Implementers should typically first + // validate that the local address does not traverse up (with "..") more + // levels than are included in subPath, because implementers can return + // higher-quality error diagnostics (with source location information) + // than the calling Builder can. + // + // If the implementer emits diagnostics with source location information + // then the filenames in the source ranges must be strings that would + // pass [fs.ValidPath] describing a path from the root of the given fs + // to the file containing the error. The builder will then translate those + // paths into remote source address strings within the containing package. + FindDependencies(fsys fs.FS, subPath string, deps *Dependencies) Diagnostics +} + +// Dependencies is part of the callback API for [DependencyFinder]. Dependency +// finders use the methods of this type to report the dependencies they find +// in the source artifact being analyzed. +type Dependencies struct { + baseAddr sourceaddrs.RemoteSource + + remoteCb func(source sourceaddrs.RemoteSource, depFinder DependencyFinder) + registryCb func(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) + localResolveErrCb func(err error) +} + +func (d *Dependencies) AddRemoteSource(source sourceaddrs.RemoteSource, depFinder DependencyFinder) { + d.remoteCb(source, depFinder) +} + +func (d *Dependencies) AddRegistrySource(source sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) { + d.registryCb(source, allowedVersions, depFinder) +} + +func (d *Dependencies) AddLocalSource(source sourceaddrs.LocalSource, depFinder DependencyFinder) { + // A local source always becomes a remote source in the same package as + // the current base address. + realSource, err := sourceaddrs.ResolveRelativeSource(d.baseAddr, source) + if err != nil { + d.localResolveErrCb(err) + return + } + // realSource is guaranteed to be a RemoteSource because source is + // a LocalSource and so the ResolveRelativeSource address is guaranteed + // to have the same source type as d.baseAddr. + d.remoteCb(realSource.(sourceaddrs.RemoteSource), depFinder) +} + +// disable ensures that a [DependencyFinder] implementation can't incorrectly +// hold on to its given Dependencies object and continue calling it after it +// returns. +func (d *Dependencies) disable() { + d.remoteCb = nil + d.registryCb = nil + d.localResolveErrCb = nil +} diff --git a/sourcebundle/diagnostics.go b/sourcebundle/diagnostics.go new file mode 100644 index 0000000..df2b932 --- /dev/null +++ b/sourcebundle/diagnostics.go @@ -0,0 +1,181 @@ +package sourcebundle + +import ( + "fmt" + + "github.com/hashicorp/go-slug/sourceaddrs" +) + +// Diagnostics is a collection of problems (errors and warnings) that occurred +// during an operation. +type Diagnostics []Diagnostic + +// Diagnostics represents a single problem (error or warning) that has occurred +// during an operation. +// +// This interface has no concrete implementations in this package. +// Implementors of [DependencyFinder] will need to implement this interface +// to report any problems they find while analyzing the designated source +// artifact. For example, a [DependencyFinder] that uses the HCL library +// to analyze an HCL-based language would probably implement this interface +// in terms of HCL's Diagnostic type. +type Diagnostic interface { + Severity() DiagSeverity + Description() DiagDescription + Source() DiagSource + + // ExtraInfo returns the raw extra information value. This is a low-level + // API which requires some work on the part of the caller to properly + // access associated information. This convention comes from HCL and + // Terraform and this is here primarily for their benefit; sourcebundle + // passes through these values verbatim without trying to interpret them. + ExtraInfo() interface{} +} + +func (diags Diagnostics) HasErrors() bool { + for _, diag := range diags { + if diag.Severity() == DiagError { + return true + } + } + return false +} + +func (diags Diagnostics) Append(more ...interface{}) Diagnostics { + for _, item := range more { + if item == nil { + continue + } + + switch item := item.(type) { + case Diagnostic: + diags = append(diags, item) + case Diagnostics: + diags = append(diags, item...) + default: + panic(fmt.Errorf("can't construct diagnostic(s) from %T", item)) + } + } + return diags +} + +type DiagSeverity rune + +const ( + DiagError DiagSeverity = 'E' + DiagWarning DiagSeverity = 'W' +) + +type DiagDescription struct { + Summary string + Detail string +} + +type DiagSource struct { + Subject *SourceRange + Context *SourceRange +} + +type SourceRange struct { + // Filename is a human-oriented label for the file that the range belongs + // to. This is often the string representation of a source address, but + // isn't guaranteed to be. + Filename string + Start, End SourcePos +} + +type SourcePos struct { + Line, Column, Byte int +} + +// diagnosticInSourcePackage is a thin wrapper around diagnostic that +// reinterprets the filenames in any source ranges to be relative to a +// particular remote source package, so it's unambiguous which remote +// source package the diagnostic originated in. +type diagnosticInSourcePackage struct { + wrapped Diagnostic + pkg sourceaddrs.RemotePackage +} + +// inRemoteSourcePackage modifies the reciever in-place so that all of the +// diagnostics will have their source filenames (if any) interpreted as +// sub-paths within the given source package. +// +// For convenience, returns the same diags slice whose backing array has now +// been modified with different diagnostics. +func (diags Diagnostics) inRemoteSourcePackage(pkg sourceaddrs.RemotePackage) Diagnostics { + for i, diag := range diags { + diags[i] = diagnosticInSourcePackage{ + wrapped: diag, + pkg: pkg, + } + } + return diags +} + +var _ Diagnostic = diagnosticInSourcePackage{} + +func (diag diagnosticInSourcePackage) Description() DiagDescription { + return diag.wrapped.Description() +} + +func (diag diagnosticInSourcePackage) ExtraInfo() interface{} { + return diag.wrapped.ExtraInfo() +} + +func (diag diagnosticInSourcePackage) Severity() DiagSeverity { + return diag.wrapped.Severity() +} + +func (diag diagnosticInSourcePackage) Source() DiagSource { + ret := diag.Source() + if ret.Subject != nil && sourceaddrs.ValidSubPath(ret.Subject.Filename) { + newRng := *ret.Subject // shallow copy + newRng.Filename = diag.pkg.SourceAddr(newRng.Filename).String() + ret.Subject = &newRng + } + if ret.Context != nil && sourceaddrs.ValidSubPath(ret.Context.Filename) { + newRng := *ret.Context // shallow copy + newRng.Filename = diag.pkg.SourceAddr(newRng.Filename).String() + ret.Context = &newRng + } + return ret +} + +// internalDiagnostic is a diagnostic type used to report this package's own +// errors as diagnostics. +// +// This package doesn't ever work directly with individual source file contents, +// so an internal diagnostic never has source location information. +type internalDiagnostic struct { + severity DiagSeverity + summary string + detail string +} + +var _ Diagnostic = (*internalDiagnostic)(nil) + +// Description implements Diagnostic +func (d *internalDiagnostic) Description() DiagDescription { + return DiagDescription{ + Summary: d.summary, + Detail: d.detail, + } +} + +// ExtraInfo implements Diagnostic +func (d *internalDiagnostic) ExtraInfo() interface{} { + return nil +} + +// Severity implements Diagnostic +func (d *internalDiagnostic) Severity() DiagSeverity { + return d.severity +} + +// Source implements Diagnostic +func (d *internalDiagnostic) Source() DiagSource { + return DiagSource{ + // Never any source location information for internal diagnostics. + } +} diff --git a/sourcebundle/doc.go b/sourcebundle/doc.go new file mode 100644 index 0000000..5331fd1 --- /dev/null +++ b/sourcebundle/doc.go @@ -0,0 +1,12 @@ +// Package sourcebundle deals with the construction of and later consumption of +// "source bundles", which are in some sense "meta-slugs" that capture a +// variety of different source packages together into a single working +// directory, which can optionally be bundled up into an archive for insertion +// into a blob storage system. +// +// Whereas single slugs (as implemented in the parent package) have very little +// predefined structure aside from the possibility of a .terraformignore file, +// source bundles have a more prescriptive structure that allows callers to +// use a source bundle as a direct substitute for fetching the individual +// source packages it was built from. +package sourcebundle diff --git a/sourcebundle/manifest_json.go b/sourcebundle/manifest_json.go new file mode 100644 index 0000000..c2257fb --- /dev/null +++ b/sourcebundle/manifest_json.go @@ -0,0 +1,48 @@ +package sourcebundle + +// This file contains some internal-only types used to help with marshalling +// and unmarshalling our manifest file format. The manifest format is not +// itself a public interface, so these should stay unexported and any caller +// that needs to interact with previously-generated source bundle manifests +// should do so via the Bundle type. + +type manifestRoot struct { + // FormatVersion should always be 1 for now, because there is only + // one version of this format. + FormatVersion uint64 `json:"terraform_source_bundle"` + + Packages []manifestRemotePackage `json:"packages,omitempty"` + RegistryMeta []manifestRegistryMeta `json:"registry,omitempty"` +} + +type manifestRemotePackage struct { + // SourceAddr is the address of an entire remote package, meaning that + // it must not have a sub-path portion. + SourceAddr string `json:"source"` + + // LocalDir is the name of the subdirectory of the bundle containing the + // source code for this package. + LocalDir string `json:"local"` + + Meta manifestPackageMeta `json:"meta,omitempty"` +} + +type manifestRegistryMeta struct { + // SourceAddr is the address of an entire registry package, meaning that + // it must not have a sub-path portion. + SourceAddr string `json:"source"` + + // Versions is a map from string representations of [versions.Version]. + Versions map[string]manifestRegistryVersion `json:"versions,omitempty"` +} + +type manifestRegistryVersion struct { + // This SourceAddr is a full source address, so it might potentially + // have a sub-path portion. If it does then it must be combined with + // any sub-path included in the user's registry module source address. + SourceAddr string `json:"source"` +} + +type manifestPackageMeta struct { + GitCommitID string `json:"git_commit_id,omitempty"` +} diff --git a/sourcebundle/package_fetcher.go b/sourcebundle/package_fetcher.go new file mode 100644 index 0000000..229ac40 --- /dev/null +++ b/sourcebundle/package_fetcher.go @@ -0,0 +1,36 @@ +package sourcebundle + +import ( + "context" + "net/url" +) + +// A PackageFetcher knows how to fetch remote source packages into a local +// filesystem directory. +// +type PackageFetcher interface { + // FetchSourcePackage retrieves the a source package from the given + // location and extracts it into the given local filesystem directory. + // + // A package fetcher is responsible for ensuring that nothing gets written + // outside of the given target directory. However, a fetcher can assume that + // nothing should be modifying or moving targetDir and or any of its contents + // concurrently with the fetcher running. + // + // If the function returns with a nil error then the target directory must be + // a complete copy of the designated remote package, ready for further analysis. + // + // Package fetchers should respond to cancellation of the given + // [context.Context] to a reasonable extent, so that the source bundle build + // process can be interrupted relatively promptly. Return a non-nil error when + // cancelled to allow the caller to detect that the target directory might not + // be in a consistent state. + // + // PackageFetchers should not have any persistent mutable state: each call + // should be independent of all past, concurrent, and future calls. In + // particular, a fetcher should not attempt to implement any caching behavior, + // because it's [Builder]'s responsibility to handle caching and request + // coalescing during bundle construction to ensure that it will happen + // consistently across different fetcher implementations. + FetchSourcePackage(ctx context.Context, sourceType string, url *url.URL, targetDir string) (*PackageMeta, error) +} diff --git a/sourcebundle/package_meta.go b/sourcebundle/package_meta.go new file mode 100644 index 0000000..a195d0d --- /dev/null +++ b/sourcebundle/package_meta.go @@ -0,0 +1,37 @@ +package sourcebundle + +// PackageMeta is a collection of metadata about how the content of a +// particular remote package was derived. +// +// A nil value of this type represents no metadata. A non-nil value will +// typically omit some or all of the fields if they are not relevant. +type PackageMeta struct { + // NOTE: Everything in here is unexported for now because it's not clear + // how this is going to evolve in future and whether it's a good idea + // to just have a separate field for each piece of metadata. This will + // give some freedom to switch to other storage strategies in future if + // this struct ends up getting too big and is only sparsely used by most + // fetchers. + + gitCommitID string +} + +// PackageMetaWithGitCommit returns a [PackageMeta] object with a Git Commit +// ID tracked. The given commit ID must be a fully-qualified ID, and never an +// abbreviated commit ID, the name of a ref, or anything other proxy-for-commit +// identifier. +func PackageMetaWithGitCommit(commitID string) *PackageMeta { + return &PackageMeta{ + gitCommitID: commitID, + } +} + +// If the content of this package was derived from a particular commit +// from a Git repository, GitCommitID returns the fully-qualified ID of +// that commit. This is never an abbreviated commit ID, the name of a ref, +// or anything else that could serve as a proxy for a commit ID. +// +// If there is no relevant commit ID for this package, returns an empty string. +func (m *PackageMeta) GitCommitID() string { + return m.gitCommitID +} diff --git a/sourcebundle/registry_client.go b/sourcebundle/registry_client.go new file mode 100644 index 0000000..e811334 --- /dev/null +++ b/sourcebundle/registry_client.go @@ -0,0 +1,28 @@ +package sourcebundle + +import ( + "context" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistryClient provides a minimal client for the Terraform module registry +// protocol, sufficient to find the available versions for a particular +// registry entry and then to find the real remote package for a particular +// version. +// +// An implementation should not itself attempt to cache the direct results of +// the client methods, but it can (and probably should) cache prerequisite +// information such as the results of performing service discovery against +// the hostname in a module package address. +type RegistryClient interface { + // ModulePackageVersions returns all of the known exact versions + // available for the given package in its module registry. + ModulePackageVersions(ctx context.Context, pkgAddr regaddr.ModulePackage) (versions.List, error) + + // ModulePackageSourceAddr returns the real remote source address for the + // given version of the given module registry package. + ModulePackageSourceAddr(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) (sourceaddrs.RemoteSource, error) +} diff --git a/sourcebundle/testdata/pkgs/hello/hello b/sourcebundle/testdata/pkgs/hello/hello new file mode 100644 index 0000000..af5626b --- /dev/null +++ b/sourcebundle/testdata/pkgs/hello/hello @@ -0,0 +1 @@ +Hello, world! diff --git a/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep b/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep new file mode 100644 index 0000000..461ea4a --- /dev/null +++ b/sourcebundle/testdata/pkgs/subdirs/a/b/beepbeep @@ -0,0 +1 @@ +BEEP! diff --git a/sourcebundle/testdata/pkgs/terraformignore/.terraformignore b/sourcebundle/testdata/pkgs/terraformignore/.terraformignore new file mode 100644 index 0000000..bbde3dc --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/.terraformignore @@ -0,0 +1 @@ +excluded diff --git a/sourcebundle/testdata/pkgs/terraformignore/excluded b/sourcebundle/testdata/pkgs/terraformignore/excluded new file mode 100644 index 0000000..9a10460 --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/excluded @@ -0,0 +1 @@ +This file is ignored. diff --git a/sourcebundle/testdata/pkgs/terraformignore/included b/sourcebundle/testdata/pkgs/terraformignore/included new file mode 100644 index 0000000..f2834fd --- /dev/null +++ b/sourcebundle/testdata/pkgs/terraformignore/included @@ -0,0 +1 @@ +This file is included. diff --git a/sourcebundle/testdata/pkgs/with-remote-deps/dependencies b/sourcebundle/testdata/pkgs/with-remote-deps/dependencies new file mode 100644 index 0000000..557aba6 --- /dev/null +++ b/sourcebundle/testdata/pkgs/with-remote-deps/dependencies @@ -0,0 +1,2 @@ +https://example.com/dependency1.tgz +https://example.com/dependency2.tgz diff --git a/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency b/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency new file mode 100644 index 0000000..2a71619 --- /dev/null +++ b/sourcebundle/testdata/pkgs/with-remote-deps/self_dependency @@ -0,0 +1 @@ +https://example.com/self_dependency.tgz \ No newline at end of file diff --git a/sourcebundle/trace.go b/sourcebundle/trace.go new file mode 100644 index 0000000..748e301 --- /dev/null +++ b/sourcebundle/trace.go @@ -0,0 +1,94 @@ +package sourcebundle + +import ( + "context" + + "github.com/apparentlymart/go-versions/versions" + "github.com/hashicorp/go-slug/sourceaddrs" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// BuildTracer contains a set of callbacks that a caller can optionally provide +// to [Builder] methods via their [context.Context] arguments to be notified +// when various long-running events are starting and stopping, to allow both +// for debugging and for UI feedback about progress. +// +// Any or all of the callbacks may be left as nil, in which case no event +// will be delivered for the corresponding event. +// +// The [context.Context] passed to each trace function is guaranteed to be a +// child of the one passed to whatever [Builder] method caused the event +// to occur, and so it can carry cross-cutting information such as distributed +// tracing clients. +// +// The "Start"-suffixed methods all allow returning a new context which will +// then be passed to the corresponding "Success"-suffixed or "Failure"-suffixed +// function, and also used for outgoing requests within the scope of that +// operation. This allows carrying values such as tracing spans between the +// start and end, so they can properly bracket the operation in question. If +// your tracer doesn't need this then just return the given context. +type BuildTracer struct { + // The RegistryPackageVersions... callbacks frame any requests to + // fetch the list of available versions for a module registry package. + RegistryPackageVersionsStart func(ctx context.Context, pkgAddr regaddr.ModulePackage) context.Context + RegistryPackageVersionsSuccess func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) + RegistryPackageVersionsFailure func(ctx context.Context, pkgAddr regaddr.ModulePackage, err error) + RegistryPackageVersionsAlready func(ctx context.Context, pkgAddr regaddr.ModulePackage, versions versions.List) + + // The RegistryPackageSource... callbacks frame any requests to fetch + // the real underlying source address for a selected registry package + // version. + RegistryPackageSourceStart func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version) context.Context + RegistryPackageSourceSuccess func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) + RegistryPackageSourceFailure func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, err error) + RegistryPackageSourceAlready func(ctx context.Context, pkgAddr regaddr.ModulePackage, version versions.Version, sourceAddr sourceaddrs.RemoteSource) + + // The RemotePackageDownload... callbacks frame any requests to download + // remote source packages. + RemotePackageDownloadStart func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) context.Context + RemotePackageDownloadSuccess func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) + RemotePackageDownloadFailure func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage, err error) + RemotePackageDownloadAlready func(ctx context.Context, pkgAddr sourceaddrs.RemotePackage) + + // Diagnostics will be called for any diagnostics that describe problems + // that aren't also reported by calling one of the "Failure" callbacks + // above. A recipient that is going to report the errors itself using + // the Failure callbacks anyway should consume diagnostics from this + // event, rather than from the return values of the [Builder] methods, + // to avoid redundantly reporting the same errors twice. + // + // Diagnostics might be called multiple times during an operation. Callers + // should consider each new call to represent additional diagnostics, + // not replacing any previously returned. + Diagnostics func(ctx context.Context, diags Diagnostics) +} + +// OnContext takes a context and returns a derived context which has everything +// the given context already had plus also the receiving BuildTrace object, +// so that passing the resulting context to methods of [Builder] will cause +// the trace object's callbacks to be called. +// +// Each context can have only one tracer, so if the given context already has +// a tracer then it will be overridden by the new one. +func (bt *BuildTracer) OnContext(ctx context.Context) context.Context { + return context.WithValue(ctx, buildTraceKey, bt) +} + +func buildTraceFromContext(ctx context.Context) *BuildTracer { + ret, ok := ctx.Value(buildTraceKey).(*BuildTracer) + if !ok { + // We'll always return a non-nil pointer just because that reduces + // the amount of boilerplate required in the caller when announcing + // events. + ret = &noopBuildTrace + } + return ret +} + +type buildTraceKeyType int + +const buildTraceKey buildTraceKeyType = 0 + +// noopBuildTrace is an all-nil [BuildTracer] we return a pointer to if we're +// asked for a BuildTrace from a context that doesn't have one. +var noopBuildTrace BuildTracer From 4fbce8ee92cdd14970c4df8c46356d72e33ea320 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Wed, 24 May 2023 10:04:48 -0700 Subject: [PATCH 4/7] sourceaddrs: SourceFilename helper function We typically decide between using HCL native syntax vs. JSON HCL syntax based on filename suffixes, and so it's helpful to have a robust way to retrieve just the filename portion of an arbitrary source address (as long as it's referring to a file) which takes into account edge-cases such as the query string portion of remote source addresses. --- sourceaddrs/source.go | 22 ++++++++++++++++ sourceaddrs/source_registry.go | 4 +++ sourceaddrs/source_test.go | 48 ++++++++++++++++++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/sourceaddrs/source.go b/sourceaddrs/source.go index 6f08eef..8890c45 100644 --- a/sourceaddrs/source.go +++ b/sourceaddrs/source.go @@ -117,6 +117,28 @@ func ResolveRelativeSource(a, b Source) (Source, error) { } } +// SourceFilename returns the base name (in the same sense as [path.Base]) +// of the sub-path or local path portion of the given source address. +// +// This only really makes sense for a source address that refers to an +// individual file, and is intended for needs such as using the suffix of +// the filename to decide how to parse a particular file. Passing a source +// address that refers to a directory will not fail but its result is +// unlikely to be useful. +func SourceFilename(addr Source) string { + switch addr := addr.(type) { + case LocalSource: + return path.Base(addr.RelativePath()) + case RemoteSource: + return path.Base(addr.SubPath()) + case RegistrySource: + return path.Base(addr.SubPath()) + default: + // above should be exhaustive for all source types + panic(fmt.Sprintf("cannot SourceFilename for %T", addr)) + } +} + func sourceIsAbs(source Source) bool { _, isLocal := source.(LocalSource) return !isLocal diff --git a/sourceaddrs/source_registry.go b/sourceaddrs/source_registry.go index e44a402..bedd1ab 100644 --- a/sourceaddrs/source_registry.go +++ b/sourceaddrs/source_registry.go @@ -94,6 +94,10 @@ func (s RegistrySource) Package() regaddr.ModulePackage { return s.pkg } +func (s RegistrySource) SubPath() string { + return s.subPath +} + // FinalSourceAddr takes the result of looking up the package portion of the // receiver in a module registry and appends the reciever's sub-path to the // returned sub-path to produce the final fully-qualified remote source address. diff --git a/sourceaddrs/source_test.go b/sourceaddrs/source_test.go index 7b7183a..0906b25 100644 --- a/sourceaddrs/source_test.go +++ b/sourceaddrs/source_test.go @@ -497,6 +497,54 @@ func TestResolveRelativeSource(t *testing.T) { } } +func TestSourceFilename(t *testing.T) { + tests := []struct { + Addr Source + Want string + }{ + { + MustParseSource("./foo.tf"), + "foo.tf", + }, + { + MustParseSource("./boop/foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//boop/foo.tf"), + "foo.tf", + }, + { + MustParseSource("git::https://example.com/foo.git//boop/foo.tf?ref=main"), + "foo.tf", + }, + { + MustParseSource("hashicorp/subnets/cidr//main.tf"), + "main.tf", + }, + { + MustParseSource("hashicorp/subnets/cidr//test/simple.tf"), + "simple.tf", + }, + } + + for _, test := range tests { + t.Run(test.Addr.String(), func(t *testing.T) { + got := SourceFilename(test.Addr) + if got != test.Want { + t.Errorf( + "wrong result\naddr: %s\ngot: %s\nwant: %s", + test.Addr, got, test.Want, + ) + } + }) + } +} + func mustParseURL(s string) *url.URL { ret, err := url.Parse(s) if err != nil { From 10843fe5d6381e1cd1c34746e93226b05f42e301 Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Wed, 24 May 2023 11:08:45 -0700 Subject: [PATCH 5/7] sourceaddrs: FinalSource interface type Terraform module source addresses have a historical design oddity where both local and remote source addresses are self-contained but registry source addresses must always be combined with a version constraint and then resolved through an extra step of selecting the latest available version that matches that version constraint. This means that we cannot use the same type to represent both a package to be installed and a package that has already been resolved. To fill that gap here we introduce a variant address type sourceaddrs.FinalSource which represents addresses of packages that have already been installed, rather that addresses of packages that are requested to be installed. LocalSource and RemoteSource implement both interfaces, while RegistrySource only implements Source and the new RegistrySourceFinal only implements FinalSource. This then allows other subsystems that are built in terms of these address types to be explicit about whether they are expecting an address of something to be installed or an address of something that has already been installed, and thus the Go compiler can help ensure that we handle both cases fully rather than forgetting about the need for the extra version selection information for already-installed registry module source addresses. --- sourceaddrs/source.go | 5 ++ sourceaddrs/source_final.go | 94 +++++++++++++++++++++++ sourceaddrs/source_final_test.go | 108 +++++++++++++++++++++++++++ sourceaddrs/source_local.go | 4 + sourceaddrs/source_registry.go | 11 +++ sourceaddrs/source_registry_final.go | 64 ++++++++++++++++ sourceaddrs/source_remote.go | 6 +- sourceaddrs/source_test.go | 5 ++ sourcebundle/builder.go | 19 +++++ sourcebundle/builder_test.go | 2 +- sourcebundle/bundle.go | 37 +++++++-- 11 files changed, 348 insertions(+), 7 deletions(-) create mode 100644 sourceaddrs/source_final.go create mode 100644 sourceaddrs/source_final_test.go create mode 100644 sourceaddrs/source_registry_final.go diff --git a/sourceaddrs/source.go b/sourceaddrs/source.go index 8890c45..2b8586b 100644 --- a/sourceaddrs/source.go +++ b/sourceaddrs/source.go @@ -9,6 +9,11 @@ import ( // Source acts as a tagged union over the three possible source address types, // for situations where all three are acceptable. // +// Source is used to specify source addresses for installation. Once packages +// have been resolved and installed we use [SourceFinal] instead to represent +// those finalized selections, which allows capturing the selected version +// number for a module registry source address. +// // Only address types within this package can implement Source. type Source interface { sourceSigil() diff --git a/sourceaddrs/source_final.go b/sourceaddrs/source_final.go new file mode 100644 index 0000000..ad85942 --- /dev/null +++ b/sourceaddrs/source_final.go @@ -0,0 +1,94 @@ +package sourceaddrs + +import ( + "fmt" + "path" +) + +// FinalSource is a variant of [Source] that always refers to a single +// specific package. +// +// Specifically this models the annoying oddity that while [LocalSource] and +// [RemoteSource] fully specify what they refer to, [RegistrySource] only +// gives partial information and must be qualified with a selected version +// number to determine exactly what it refers to. +type FinalSource interface { + finalSourceSigil() + + String() string +} + +// FinalSourceFilename returns the base name (in the same sense as [path.Base]) +// of the sub-path or local path portion of the given final source address. +// +// This only really makes sense for a source address that refers to an +// individual file, and is intended for needs such as using the suffix of +// the filename to decide how to parse a particular file. Passing a source +// address that refers to a directory will not fail but its result is +// unlikely to be useful. +func FinalSourceFilename(addr FinalSource) string { + switch addr := addr.(type) { + case LocalSource: + return path.Base(addr.RelativePath()) + case RemoteSource: + return path.Base(addr.SubPath()) + case RegistrySourceFinal: + return path.Base(addr.SubPath()) + default: + // above should be exhaustive for all final source types + panic(fmt.Sprintf("cannot FinalSourceFilename for %T", addr)) + } +} + +// ResolveRelativeFinalSource is like [ResolveRelativeSource] but for +// [FinalSource] addresses instead of [Source] addresses. +// +// Aside from the address type difference its meaning and behavior rules +// are the same. +func ResolveRelativeFinalSource(a, b FinalSource) (FinalSource, error) { + if finalSourceIsAbs(b) { + return b, nil + } + // If we get here then b is definitely a local source, because + // otherwise it would have been absolute. + bRaw := b.(LocalSource).relPath + + switch a := a.(type) { + case LocalSource: + aRaw := a.relPath + new := path.Join(aRaw, bRaw) + if !looksLikeLocalSource(new) { + new = "./" + new // preserve LocalSource's prefix invariant + } + return LocalSource{relPath: new}, nil + case RegistrySourceFinal: + aSub := a.src.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RegistrySource{ + pkg: a.Package(), + subPath: newSub, + }.Versioned(a.version), nil + case RemoteSource: + aSub := a.subPath + newSub, err := joinSubPath(aSub, bRaw) + if err != nil { + return nil, fmt.Errorf("invalid traversal from %s: %w", a.String(), err) + } + return RemoteSource{ + pkg: a.pkg, + subPath: newSub, + }, nil + default: + // Should not get here, because the cases above are exhaustive for + // all of our defined Source implementations. + panic(fmt.Sprintf("unsupported Source implementation %T", a)) + } +} + +func finalSourceIsAbs(source FinalSource) bool { + _, isLocal := source.(LocalSource) + return !isLocal +} diff --git a/sourceaddrs/source_final_test.go b/sourceaddrs/source_final_test.go new file mode 100644 index 0000000..b2250a4 --- /dev/null +++ b/sourceaddrs/source_final_test.go @@ -0,0 +1,108 @@ +package sourceaddrs + +import ( + "fmt" + "reflect" + "testing" + + "github.com/apparentlymart/go-versions/versions" +) + +func TestResolveRelativeFinalSource(t *testing.T) { + onePointOh := versions.MustParseVersion("1.0.0") + + tests := []struct { + Base FinalSource + Rel FinalSource + Want FinalSource + WantErr string + }{ + { + Base: MustParseSource("./a/b").(FinalSource), + Rel: MustParseSource("../c").(FinalSource), + Want: MustParseSource("./a/c").(FinalSource), + }, + { + Base: MustParseSource("./a").(FinalSource), + Rel: MustParseSource("../c").(FinalSource), + Want: MustParseSource("./c").(FinalSource), + }, + { + Base: MustParseSource("./a").(FinalSource), + Rel: MustParseSource("../../c").(FinalSource), + Want: MustParseSource("../c").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//blah/blah").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("git::https://example.com/foo.git").(FinalSource), + Want: MustParseSource("git::https://example.com/foo.git").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../bloop").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/bloop").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../..").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git").(FinalSource), + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git//beep/boop").(FinalSource), + Rel: MustParseSource("../../../baz").(FinalSource), + WantErr: `invalid traversal from git::https://github.com/hashicorp/go-slug.git//beep/boop: relative path ../../../baz traverses up too many levels from source path beep/boop`, + }, + { + Base: MustParseSource("git::https://github.com/hashicorp/go-slug.git").(FinalSource), + Rel: MustParseSource("./boop").(FinalSource), + Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//boop").(FinalSource), + }, + { + Base: MustParseSource("example.com/foo/bar/baz//beep/boop").(RegistrySource).Versioned(onePointOh), + Rel: MustParseSource("../").(FinalSource), + Want: MustParseSource("example.com/foo/bar/baz//beep").(RegistrySource).Versioned(onePointOh), + }, + } + + for _, test := range tests { + t.Run(fmt.Sprintf("%s + %s", test.Base, test.Rel), func(t *testing.T) { + got, gotErr := ResolveRelativeFinalSource(test.Base, test.Rel) + + if test.WantErr != "" { + if gotErr == nil { + t.Fatalf("unexpected success\ngot result: %s (%T)\nwant error: %s", got, got, test.WantErr) + } + if got, want := gotErr.Error(), test.WantErr; got != want { + t.Fatalf("wrong error\ngot error: %s\nwant error: %s", got, want) + } + return + } + + if gotErr != nil { + t.Fatalf("unexpected error: %s", gotErr) + } + + // Two addresses are equal if they have the same string representation + // and the same dynamic type. + gotStr := got.String() + wantStr := test.Want.String() + if gotStr != wantStr { + t.Errorf("wrong result\ngot: %s\nwant: %s", gotStr, wantStr) + } + + if gotType, wantType := reflect.TypeOf(got), reflect.TypeOf(test.Want); gotType != wantType { + t.Errorf("wrong result type\ngot: %s\nwant: %s", gotType, wantType) + } + }) + } +} diff --git a/sourceaddrs/source_local.go b/sourceaddrs/source_local.go index 9552954..d002dba 100644 --- a/sourceaddrs/source_local.go +++ b/sourceaddrs/source_local.go @@ -21,10 +21,14 @@ type LocalSource struct { } var _ Source = LocalSource{} +var _ FinalSource = LocalSource{} // sourceSigil implements Source func (s LocalSource) sourceSigil() {} +// finalSourceSigil implements FinalSource +func (s LocalSource) finalSourceSigil() {} + func looksLikeLocalSource(given string) bool { return strings.HasPrefix(given, "./") || strings.HasPrefix(given, "../") } diff --git a/sourceaddrs/source_registry.go b/sourceaddrs/source_registry.go index bedd1ab..000526a 100644 --- a/sourceaddrs/source_registry.go +++ b/sourceaddrs/source_registry.go @@ -4,6 +4,7 @@ import ( "fmt" "path" + "github.com/apparentlymart/go-versions/versions" regaddr "github.com/hashicorp/terraform-registry-address" ) @@ -98,6 +99,16 @@ func (s RegistrySource) SubPath() string { return s.subPath } +// Versioned combines the receiver with a specific selected version number to +// produce a final source address that can be used to resolve to a single +// source package. +func (s RegistrySource) Versioned(selectedVersion versions.Version) RegistrySourceFinal { + return RegistrySourceFinal{ + src: s, + version: selectedVersion, + } +} + // FinalSourceAddr takes the result of looking up the package portion of the // receiver in a module registry and appends the reciever's sub-path to the // returned sub-path to produce the final fully-qualified remote source address. diff --git a/sourceaddrs/source_registry_final.go b/sourceaddrs/source_registry_final.go new file mode 100644 index 0000000..7ed9338 --- /dev/null +++ b/sourceaddrs/source_registry_final.go @@ -0,0 +1,64 @@ +package sourceaddrs + +import ( + "github.com/apparentlymart/go-versions/versions" + regaddr "github.com/hashicorp/terraform-registry-address" +) + +// RegistrySourceFinal annotates a [RegistrySource] with a specific version +// selection, thereby making it sufficient for selecting a single real source +// package. +// +// Registry sources are weird in comparison to others in that they must be +// combined with a version constraint to select from possibly many available +// versions. After completing the version selection process, the result can +// be represented as a RegistrySourceFinal that carries the selected version +// number along with the originally-specified source address. +type RegistrySourceFinal struct { + src RegistrySource + version versions.Version +} + +// NOTE: RegistrySourceFinal is intentionally not a Source, because it isn't +// possible to represent a final registry source as a single source address +// string. +var _ FinalSource = RegistrySourceFinal{} + +// finalSourceSigil implements FinalSource +func (s RegistrySourceFinal) finalSourceSigil() {} + +// Unversioned returns the address of the registry package that this final +// address is a version of. +func (s RegistrySourceFinal) Unversioned() RegistrySource { + return s.src +} + +func (s RegistrySourceFinal) Package() regaddr.ModulePackage { + return s.src.Package() +} + +func (s RegistrySourceFinal) SubPath() string { + return s.src.SubPath() +} + +func (s RegistrySourceFinal) SelectedVersion() versions.Version { + return s.version +} + +func (s RegistrySourceFinal) String() string { + pkgAddr := s.src.Package() + subPath := s.src.SubPath() + if subPath != "" { + return pkgAddr.String() + "@" + s.version.String() + "//" + subPath + } + return pkgAddr.String() + "@" + s.version.String() +} + +// FinalSourceAddr takes the result of looking up the package portion of the +// receiver in a module registry and appends the reciever's sub-path to the +// returned sub-path to produce the final fully-qualified remote source address. +func (s RegistrySourceFinal) FinalSourceAddr(realSource RemoteSource) RemoteSource { + // The version number doesn't have any impact on how we combine the + // paths together, so we can just delegate to our unversioned equivalent. + return s.Unversioned().FinalSourceAddr(realSource) +} diff --git a/sourceaddrs/source_remote.go b/sourceaddrs/source_remote.go index 9a2e6f2..8ba5858 100644 --- a/sourceaddrs/source_remote.go +++ b/sourceaddrs/source_remote.go @@ -12,10 +12,14 @@ type RemoteSource struct { subPath string } +var _ Source = RemoteSource{} +var _ FinalSource = RemoteSource{} + // sourceSigil implements Source func (RemoteSource) sourceSigil() {} -var _ Source = RemoteSource{} +// finalSourceSigil implements FinalSource +func (RemoteSource) finalSourceSigil() {} // ParseRemoteSource parses the given string as a remote source address, // or returns an error if it does not use the correct syntax for interpretation diff --git a/sourceaddrs/source_test.go b/sourceaddrs/source_test.go index 0906b25..1b003d8 100644 --- a/sourceaddrs/source_test.go +++ b/sourceaddrs/source_test.go @@ -462,6 +462,11 @@ func TestResolveRelativeSource(t *testing.T) { Rel: MustParseSource("./boop"), Want: MustParseSource("git::https://github.com/hashicorp/go-slug.git//boop"), }, + { + Base: MustParseSource("example.com/foo/bar/baz//beep/boop"), + Rel: MustParseSource("../"), + Want: MustParseSource("example.com/foo/bar/baz//beep"), + }, } for _, test := range tests { diff --git a/sourcebundle/builder.go b/sourcebundle/builder.go index bc6ec68..06734ba 100644 --- a/sourcebundle/builder.go +++ b/sourcebundle/builder.go @@ -144,6 +144,9 @@ func (b *Builder) AddRemoteSource(ctx context.Context, addr sourceaddrs.RemoteSo // into the bundle, and then analyzes the new artifact for dependencies // using the given dependency finder. // +// If you have already selected a specific version to install, consider using +// [Builder.AddFinalRegistrySource] instead. +// // If the returned diagnostics contains errors then the bundle is left in an // inconsistent state and must not be used for any other calls. func (b *Builder) AddRegistrySource(ctx context.Context, addr sourceaddrs.RegistrySource, allowedVersions versions.Set, depFinder DependencyFinder) Diagnostics { @@ -161,6 +164,22 @@ func (b *Builder) AddRegistrySource(ctx context.Context, addr sourceaddrs.Regist return b.resolvePending(ctx) } +// AddFinalRegistrySource is a variant of [Builder.AddRegistrySource] which +// takes an already-selected version of a registry source, instead of taking +// a version constraint and then selecting the latest available version +// matching that constraint. +// +// This function still asks the registry for its set of available versions for +// the unversioned package first, to ensure that the results from installing +// from a final source will always be consistent with those from installing +// from a not-yet-resolved registry source. +func (b *Builder) AddFinalRegistrySource(ctx context.Context, addr sourceaddrs.RegistrySourceFinal, depFinder DependencyFinder) Diagnostics { + // We handle this just by turning the version selection into an exact + // version set and then installing from that as normal. + allowedVersions := versions.Only(addr.SelectedVersion()) + return b.AddRegistrySource(ctx, addr.Unversioned(), allowedVersions, depFinder) +} + // Close ensures that the target directory is in a valid and consistent state // to be used as a source bundle and then returns an object providing the // read-only API for that bundle. diff --git a/sourcebundle/builder_test.go b/sourcebundle/builder_test.go index 9735e1a..e0db29e 100644 --- a/sourcebundle/builder_test.go +++ b/sourcebundle/builder_test.go @@ -494,7 +494,7 @@ func TestBuilderCoalescePackages(t *testing.T) { } }) t.Run("dependency 1", func(t *testing.T) { - localPkgDir, err := bundle.LocalPathForRemoteSource(dep1Source) + localPkgDir, err := bundle.LocalPathForSource(dep1Source) if err != nil { for pkgAddr, localDir := range builder.remotePackageDirs { t.Logf("contents of %s are in %s", pkgAddr, localDir) diff --git a/sourcebundle/bundle.go b/sourcebundle/bundle.go index 166f525..8067c76 100644 --- a/sourcebundle/bundle.go +++ b/sourcebundle/bundle.go @@ -118,6 +118,31 @@ func OpenDir(baseDir string) (*Bundle, error) { return ret, nil } +// LocalPathForSource takes either a remote or registry final source address +// and returns the local path within the bundle that corresponds with it. +// +// It doesn't make sense to pass a [sourceaddrs.LocalSource] to this function +// because a source bundle cannot contain anything other than remote packages, +// but as a concession to convenience this function will return a +// filepath-shaped relative path in that case, assuming that the source was +// intended to be a local filesystem path relative to the current working +// directory. The result will therefore not necessarily be a subdirectory of +// the recieving bundle in that case. +func (b *Bundle) LocalPathForSource(addr sourceaddrs.FinalSource) (string, error) { + switch addr := addr.(type) { + case sourceaddrs.RemoteSource: + return b.LocalPathForRemoteSource(addr) + case sourceaddrs.RegistrySourceFinal: + return b.LocalPathForRegistrySource(addr.Unversioned(), addr.SelectedVersion()) + case sourceaddrs.LocalSource: + return filepath.FromSlash(addr.RelativePath()), nil + default: + // If we get here then it's probably a bug: the above cases should be + // exhaustive for all sourceaddrs.FinalSource implementations. + return "", fmt.Errorf("cannot produce local path for source address of type %T", addr) + } +} + // LocalPathForRemoteSource returns the local path within the bundle that // corresponds with the given source address, or an error if the source address // is within a source package not included in the bundle. @@ -134,11 +159,6 @@ func (b *Bundle) LocalPathForRemoteSource(addr sourceaddrs.RemoteSource) (string // LocalPathForRegistrySource returns the local path within the bundle that // corresponds with the given registry address and version, or an error if the // source address is within a source package not included in the bundle. -// -// A source bundle does not have any direct representation of local source -// addresses -- they are always relative to a location in a remote source -// package -- so this function will always fail when given a local source -// address. func (b *Bundle) LocalPathForRegistrySource(addr sourceaddrs.RegistrySource, version versions.Version) (string, error) { pkgAddr := addr.Package() vs, ok := b.registryPackageSources[pkgAddr] @@ -156,6 +176,13 @@ func (b *Bundle) LocalPathForRegistrySource(addr sourceaddrs.RegistrySource, ver return b.LocalPathForRemoteSource(finalSourceAddr) } +// LocalPathForFinalRegistrySource is a variant of +// [Bundle.LocalPathForRegistrySource] which passes the source address and +// selected version together as a single address value. +func (b *Bundle) LocalPathForFinalRegistrySource(addr sourceaddrs.RegistrySourceFinal) (string, error) { + return b.LocalPathForRegistrySource(addr.Unversioned(), addr.SelectedVersion()) +} + // SourceForLocalPath is the inverse of [Bundle.LocalPathForSource], // translating a local path beneath the bundle's base directory back into // a source address that it's a snapshot of. From f2e204a63e3c413b1ce79ff09a1dde56db0913ca Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Mon, 5 Jun 2023 08:37:42 -0700 Subject: [PATCH 6/7] sourcebundle and sourceaddrs: Both are experimental Since this is entirely new functionality that wasn't previously in this codebase's scope, we'll need to gather some real experience using this new API before we can be confident that it's suitable to commit to. At the time of this commit this entire codebase is pre-1.0 and so subject to breaking changes anyway, but these extra notes are here just in case the root package functionality gets stabilized before these new packages are ready to be stabilized. We'll remove these warnings at a later date once we've got enough experience with these new packages in internal projects to confidently commit to a public-facing API. --- sourceaddrs/doc.go | 5 +++++ sourcebundle/doc.go | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/sourceaddrs/doc.go b/sourceaddrs/doc.go index ad57135..b0f3c91 100644 --- a/sourceaddrs/doc.go +++ b/sourceaddrs/doc.go @@ -1,4 +1,9 @@ // Package sourceaddrs deals with the various types of source code address // that Terraform can gather into a source bundle via the sibling package // "sourcebundle". +// +// NOTE WELL: Everything in this package is currently experimental and subject +// to breaking changes even in patch releases. We will make stronger commitments +// to backward-compatibility once we have more experience using this +// functionality in real contexts. package sourceaddrs diff --git a/sourcebundle/doc.go b/sourcebundle/doc.go index 5331fd1..47ad1b4 100644 --- a/sourcebundle/doc.go +++ b/sourcebundle/doc.go @@ -9,4 +9,9 @@ // source bundles have a more prescriptive structure that allows callers to // use a source bundle as a direct substitute for fetching the individual // source packages it was built from. +// +// NOTE WELL: Everything in this package is currently experimental and subject +// to breaking changes even in patch releases. We will make stronger commitments +// to backward-compatibility once we have more experience using this +// functionality in real contexts. package sourcebundle From 386bf9ba1ed7e9e2c1cfa9f7aee1aebacca8153a Mon Sep 17 00:00:00 2001 From: Martin Atkins Date: Mon, 5 Jun 2023 09:04:58 -0700 Subject: [PATCH 7/7] sourcebundle: Bundle.SourceForLocalPath returns FinalSource Currently this function can only possibly return RemoteSource values anyway and so the distinction between Source and FinalSource is moot, but semantically it's more correct to say that we're returning a finalized source address here and so using the more correct type will make this compose better with other functionality which works generically with final source addresses, and will also allow us to potentially make this return registry addresses sometimes if we decide that gives better results for the assumed use-case of producing diagnostic messages. --- sourcebundle/bundle.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sourcebundle/bundle.go b/sourcebundle/bundle.go index 8067c76..576ec7a 100644 --- a/sourcebundle/bundle.go +++ b/sourcebundle/bundle.go @@ -201,7 +201,7 @@ func (b *Bundle) LocalPathForFinalRegistrySource(addr sourceaddrs.RegistrySource // instead of exposing the opaque internal directory names from the source // bundle. This function should not typically be used in performance-sensitive // portions of the happy path. -func (b *Bundle) SourceForLocalPath(p string) (sourceaddrs.Source, error) { +func (b *Bundle) SourceForLocalPath(p string) (sourceaddrs.FinalSource, error) { // This implementation is a best effort sort of thing, and might not // always succeed in awkward cases.