Skip to content

Commit

Permalink
Optimize Pack for deep, ignored directories
Browse files Browse the repository at this point in the history
When ignore rules match a directory, and certainly no exclusion rules would include subdirectories, short circuit further descent into that directory.

The optimization supports a certain case where no exclusion rules of any kind follow a matching rule. For rules where this is the case, the rule is said to be 'dominant' in the result. Dominant rules can be certain that they won't be undone by additional rules because all the following rules ignore more files.

I couldn't expand the definition of Dominant because globbing, character classes, and non-absolute rules conspire to make it difficult to know whether a particular exclusion rule would countermand a particular previous rule.
  • Loading branch information
brandonc committed Nov 24, 2023
1 parent ffdd023 commit ff9113c
Show file tree
Hide file tree
Showing 11 changed files with 96 additions and 19 deletions.
29 changes: 22 additions & 7 deletions internal/ignorefiles/ignorerules.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ type Ruleset struct {
rules []rule
}

// ExcludesResult is the result of matching a path against a Ruleset. A result
// is a Match if it matches a set of paths that are excluded by the rules in a
// Ruleset. A matching result is Dominating if none of the rules that follow it
// contain an exclusion, implying that if the rule excludes a directory,
// everything below that directory may be ignored.
type ExcludesResult struct {
Match bool
Dominating bool
}

// ParseIgnoreFileContent takes a reader over the content of a .terraformignore
// file and returns the Ruleset described by that file, or an error if the
// file is invalid.
Expand Down Expand Up @@ -57,19 +67,20 @@ func LoadPackageIgnoreRules(packageDir string) (*Ruleset, error) {
// excluded by the rules in the ruleset.
//
// If any of the rules in the ruleset have invalid syntax then Excludes will
// return an error, but it will also still return a boolean result which
// return an error, but it will also still return a result which
// considers all of the remaining valid rules, to support callers that want to
// just ignore invalid exclusions. Such callers can safely ignore the error
// result:
//
// exc, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (bool, error) {
// exc, matching, _ = ruleset.Excludes(path)
func (r *Ruleset) Excludes(path string) (ExcludesResult, error) {
if r == nil {
return false, nil
return ExcludesResult{}, nil
}

var retErr error
foundMatch := false
dominating := false
for _, rule := range r.rules {
match, err := rule.match(path)
if err != nil {
Expand All @@ -82,15 +93,19 @@ func (r *Ruleset) Excludes(path string) (bool, error) {
}
if match {
foundMatch = !rule.excluded
dominating = foundMatch && !rule.exclusionsAfter
}
}
return foundMatch, retErr
return ExcludesResult{
Match: foundMatch,
Dominating: dominating,
}, retErr
}

// Includes is the inverse of [Ruleset.Excludes].
func (r *Ruleset) Includes(path string) (bool, error) {
notRet, err := r.Excludes(path)
return !notRet, err
result, err := r.Excludes(path)
return !result.Match, err
}

var DefaultRuleset *Ruleset
Expand Down
18 changes: 14 additions & 4 deletions internal/ignorefiles/terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ func readRules(input io.Reader) ([]rule, error) {
rules := defaultExclusions
scanner := bufio.NewScanner(input)
scanner.Split(bufio.ScanLines)
currentRuleIndex := len(defaultExclusions) - 1

for scanner.Scan() {
pattern := scanner.Text()
Expand All @@ -34,6 +35,13 @@ func readRules(input io.Reader) ([]rule, error) {
if pattern[0] == '!' {
rule.excluded = true
pattern = pattern[1:]
// Mark all previous rules as having exclusions after it
for i := currentRuleIndex; i >= 0; i-- {
if rules[i].exclusionsAfter {
break
}
rules[i].exclusionsAfter = true
}
}
// If it is a directory, add ** so we catch descendants
if pattern[len(pattern)-1] == os.PathSeparator {
Expand All @@ -49,6 +57,7 @@ func readRules(input io.Reader) ([]rule, error) {
rule.val = pattern
rule.dirs = strings.Split(pattern, string(os.PathSeparator))
rules = append(rules, rule)
currentRuleIndex += 1
}

if err := scanner.Err(); err != nil {
Expand All @@ -58,10 +67,11 @@ func readRules(input io.Reader) ([]rule, error) {
}

type rule struct {
val string // the value of the rule itself
excluded bool // ! is present, an exclusion rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
val string // the value of the rule itself
excluded bool // ! is present, an exclusion rule
exclusionsAfter bool // exclusion rules appear after this rule
dirs []string // directories of the rule
regex *regexp.Regexp // regular expression to match for the rule
}

func (r *rule) match(path string) (bool, error) {
Expand Down
38 changes: 36 additions & 2 deletions internal/ignorefiles/terraformignore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ func TestTerraformIgnore(t *testing.T) {
if err != nil {
t.Fatal(err)
}

type file struct {
// the actual path, should be file path format /dir/subdir/file.extension
path string
Expand Down Expand Up @@ -112,13 +113,46 @@ func TestTerraformIgnore(t *testing.T) {
},
}
for i, p := range paths {
match, err := rs.Excludes(p.path)
result, err := rs.Excludes(p.path)
if err != nil {
t.Errorf("invalid rule syntax when checking %s at index %d", p.path, i)
continue
}
if match != p.match {
if result.Match != p.match {
t.Fatalf("%s at index %d should be %t", p.path, i, p.match)
}
}
}

func TestTerraformExclusionOptimization(t *testing.T) {
rs, err := LoadPackageIgnoreRules("testdata/with-exclusion")
if err != nil {
t.Fatal(err)
}
if len(rs.rules) != 7 {
t.Fatalf("Expected 7 rules, got %d", len(rs.rules))
}

// reflects that no exclusions follow the last rule
afterValue := false
for i := len(rs.rules) - 1; i >= 0; i-- {
r := rs.rules[i]
if r.exclusionsAfter != afterValue {
t.Errorf("Expected exclusionsAfter to be %v at index %d", afterValue, i)
}
if r.excluded {
afterValue = true
}
}

// last two will be dominating
for _, r := range []string{"logs/", "tmp/"} {
result, err := rs.Excludes(r)
if err != nil {
t.Fatal(err)
}
if !result.Dominating {
t.Errorf("Expected %q to be a dominating rule", r)
}
}
}
5 changes: 5 additions & 0 deletions internal/ignorefiles/testdata/with-exclusion/.terraformignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
src/**/*
# except at one directory
!src/foo/bar.txt
logs/
tmp/
1 change: 1 addition & 0 deletions internal/ignorefiles/testdata/with-exclusion/logs/foo.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
foo
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
ignored
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
bar
1 change: 1 addition & 0 deletions internal/ignorefiles/testdata/with-exclusion/tmp/tmp.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
tmp
10 changes: 7 additions & 3 deletions slug.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,15 +199,19 @@ func (p *Packer) packWalkFn(root, src, dst string, tarW *tar.Writer, meta *Meta,
return nil
}

if m := matchIgnoreRules(subpath, ignoreRules); m {
if r := matchIgnoreRules(subpath, ignoreRules); r.Match {
return nil
}

// Catch directories so we don't end up with empty directories,
// the files are ignored correctly
if info.IsDir() {
if m := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); m {
return nil
if r := matchIgnoreRules(subpath+string(os.PathSeparator), ignoreRules); r.Match {
if r.Dominating {
return filepath.SkipDir
} else {
return nil
}
}
}

Expand Down
9 changes: 7 additions & 2 deletions sourcebundle/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -631,7 +631,7 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Match {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand All @@ -641,12 +641,17 @@ func packagePrepareWalkFn(root string, ignoreRules *ignorefiles.Ruleset) filepat

// For directories we also need to check with a path separator on the
// end, which ignores entire subtrees.
//
// TODO: What about exclusion rules that follow a matching directory?
// Example:
// /logs
// !/logs/production/*
if info.IsDir() {
ignored, err := ignoreRules.Excludes(relPath + string(os.PathSeparator))
if err != nil {
return fmt.Errorf("invalid .terraformignore rules: %#w", err)
}
if ignored {
if ignored.Match {
err := os.RemoveAll(absPath)
if err != nil {
return fmt.Errorf("failed to remove ignored file %s: %s", relPath, err)
Expand Down
2 changes: 1 addition & 1 deletion terraformignore.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func parseIgnoreFile(rootPath string) *ignorefiles.Ruleset {
return ret
}

func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) bool {
func matchIgnoreRules(path string, ruleset *ignorefiles.Ruleset) ignorefiles.ExcludesResult {
// Ruleset.Excludes explicitly allows ignoring its error, in which
// case we are ignoring any individual invalid rules in the set
// but still taking all of the others into account.
Expand Down

0 comments on commit ff9113c

Please sign in to comment.