Skip to content

Commit

Permalink
internal/url: Improve scheme splitting
Browse files Browse the repository at this point in the history
  • Loading branch information
tkw1536 committed Oct 7, 2024
1 parent 3f5bbc8 commit e107598
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 95 deletions.
10 changes: 2 additions & 8 deletions env/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,14 +66,8 @@ func ParseURL(s string) (repo URL) {
// normalize for windows
s = windowsReplacer.Replace(s)

// Trim off a leading scheme (as separated by '://') and (if it is valid) store it.
{
scheme, rest := split.Before(s, "://")
if url.IsValidURLScheme(scheme) {
repo.Scheme = scheme
s = rest
}
}
// split off the url scheme
repo.Scheme, s = url.SplitURLScheme(s)

// Next, we split of the authentication if we have an '@' sign.
// Technically the if { } clause isn't required, the code will work fine without it.
Expand Down
15 changes: 0 additions & 15 deletions internal/split/split.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,24 +9,9 @@ import (
"unicode/utf8"
)

// Before splits the string s into a part before a separator, called the prefix, and a part after the separator, called the suffix.
// If separator is not contained in the source string, prefix is empty and suffix is equal to the input string.
//
// See also AfterRune.
func Before(s, sep string) (prefix, suffix string) {
if i := strings.Index(s, sep); i >= 0 {
return s[:i], s[i+len(sep):]
}
return "", s
}

// AfterRune splits the string s into a part before a separator, called the prefix, and a part after the separator, called the suffix.
// If separator is not contained in the source string, suffix is empty and prefix is equal to the input string.
//
// See also Before.
func AfterRune(s string, sep rune) (prefix, suffix string) {
// NOTE: This uses sep as a rune, because nothing else is required.
// And that turns out to be the most efficient variant.
if i := strings.IndexRune(s, sep); i >= 0 {
return s[:i], s[i+utf8.RuneLen(sep):]
}
Expand Down
36 changes: 0 additions & 36 deletions internal/split/split_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,42 +6,6 @@ import (
"testing"
)

func TestBefore(t *testing.T) {
type args struct {
s string
sep string
}
tests := []struct {
name string
args args
wantPrefix string
wantSuffix string
}{
{"splitFoundOnce", args{"a;b", ";"}, "a", "b"},
{"splitFoundMultiple", args{"a;b;c", ";"}, "a", "b;c"},
{"splitNotFound", args{"aaa", ";"}, "", "aaa"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotPrefix, gotSuffix := Before(tt.args.s, tt.args.sep)
if gotPrefix != tt.wantPrefix {
t.Errorf("Before() gotPrefix = %v, want %v", gotPrefix, tt.wantPrefix)
}
if gotSuffix != tt.wantSuffix {
t.Errorf("Before() gotSuffix = %v, want %v", gotSuffix, tt.wantSuffix)
}
})
}
}

func Benchmark_Before(b *testing.B) {
for i := 0; i < b.N; i++ {
Before("a;b", ";")
Before("a;b;c", ";")
Before("aaa", ";")
}
}

func TestAfterRune(t *testing.T) {
type args struct {
s string
Expand Down
43 changes: 34 additions & 9 deletions internal/url/url.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,23 @@ func ParsePort(s string) (port uint16, err error) {
return port, nil
}

// IsValidURLScheme checks if a string represents a valid URL scheme.
// SplitURLScheme splits off the URL scheme from s, returning the rest of the string in rest.
// If it does not contain a valid scheme, returns "", s.
//
// A valid url scheme is one that matches the regex [a-zA-Z][a-zA-Z0-9+\-\.]*.
func IsValidURLScheme(s string) bool {
// An obvious implementation of this function would be:
// regexp.MustCompile(`^[a-zA-Z][a-zA-Z0-9+\-\.]*$`).MatchString(s)
// A scheme is of the form 'scheme://rest'.
// Scheme must match the regular expression `^[a-zA-Z][a-zA-Z0-9+\-\.]*$`.
func SplitURLScheme(s string) (scheme string, rest string) {
// An obvious implementation of this function would simply match against the
// regular expression.
//
// However such an implementation would be a lot slower when called repeatedly.
// Instead we directly build code that implements this regex.
// Instead we directly build code that directly implements the trimming.

// Iterate over the bytes in this string.
// We can do this safely, because any valid scheme must be ascii.
bytes := []byte(s)
if len(bytes) == 0 {
return false
return "", s
}

var (
Expand All @@ -92,12 +95,18 @@ nextLetter:
// or be done
idx++
if idx >= len(bytes) {
return true
goto noScheme
}

// get the current letter
c = bytes[idx]

// reached end of the scheme
// we can make use of this because no valid scheme has a ':'.
if c == ':' {
goto scheme
}

if '0' <= c && c <= '9' {
goto nextLetter
}
Expand All @@ -115,5 +124,21 @@ start:
goto nextLetter
}

return false
noScheme:
// not a valid letter
return "", s

scheme:
// split into scheme and rest
scheme = string(bytes[:idx])
rest = string(bytes[idx+1:])

// check that the rest starts with "//"
if len(rest) < 2 || rest[0] != '/' || rest[1] != '/' {
goto noScheme
}

// and trim off the valid prefix
rest = rest[2:]
return
}
57 changes: 30 additions & 27 deletions internal/url/url_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,42 +78,45 @@ func Benchmark_ParsePort(b *testing.B) {
}
}

func Test_IsValidURLScheme(t *testing.T) {
func Test_SplitURLScheme(t *testing.T) {
type args struct {
scheme string
input string
}
tests := []struct {
name string
args args
want bool
name string
args args
wantScheme string
wantRest string
}{
{"http scheme is valid", args{"http"}, true},
{"https scheme is valid", args{"https"}, true},
{"git scheme is valid", args{"git"}, true},
{"ssh scheme is valid", args{"ssh"}, true},
{"ssh2 scheme is valid", args{"ssh2"}, true},
{"file scheme is valid", args{"file"}, true},
{"combined with + scheme is valid", args{"ssh+git"}, true},
{"combined with - scheme is valid", args{"ssh-git"}, true},
{"combined with . scheme is valid", args{"ssh.git"}, true},
{name: "http scheme is valid", args: args{"http://rest"}, wantScheme: "http", wantRest: "rest"},
{name: "https scheme is valid", args: args{"https://rest"}, wantScheme: "https", wantRest: "rest"},
{name: "git scheme is valid", args: args{"git://rest"}, wantScheme: "git", wantRest: "rest"},
{name: "ssh scheme is valid", args: args{"ssh://rest"}, wantScheme: "ssh", wantRest: "rest"},
{name: "ssh2 scheme is valid", args: args{"ssh2://rest"}, wantScheme: "ssh2", wantRest: "rest"},
{name: "file scheme is valid", args: args{"file://rest"}, wantScheme: "file", wantRest: "rest"},
{name: "combined with + scheme is valid", args: args{"ssh+git://rest"}, wantScheme: "ssh+git", wantRest: "rest"},
{name: "combined with - scheme is valid", args: args{"ssh-git://rest"}, wantScheme: "ssh-git", wantRest: "rest"},
{name: "combined with . scheme is valid", args: args{"ssh.git://rest"}, wantScheme: "ssh.git", wantRest: "rest"},

{"empty scheme is invalid", args{""}, false},
{"numerical scheme is invalid", args{"01234"}, false},
{"numerical starting scheme is invalid", args{"01git"}, false},
{"non-scheme is invalid", args{"://"}, false},
{name: "valid scheme without valid ending", args: args{"valid"}, wantScheme: "", wantRest: "valid"},
{name: "valid scheme with only :", args: args{"valid:"}, wantScheme: "", wantRest: "valid:"},
{name: "valid scheme with only one /", args: args{"valid:/"}, wantScheme: "", wantRest: "valid:/"},
{name: "valid scheme without rest", args: args{"valid://"}, wantScheme: "valid", wantRest: ""},

{name: "empty input passed through", args: args{""}, wantScheme: "", wantRest: ""},
{name: "numerical scheme is invalid", args: args{"01234"}, wantScheme: "", wantRest: "01234"},
{name: "numerical starting scheme is invalid", args: args{"01git"}, wantScheme: "", wantRest: "01git"},
{name: "non-scheme is invalid", args: args{"://"}, wantScheme: "", wantRest: "://"},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := IsValidURLScheme(tt.args.scheme); got != tt.want {
t.Errorf("validateScheme() = %v, want %v", got, tt.want)
gotScheme, gotRest := SplitURLScheme(tt.args.input)
if gotScheme != tt.wantScheme {
t.Errorf("SplitURLScheme() scheme = %v, want %v", gotScheme, tt.wantScheme)
}
if gotRest != tt.wantRest {
t.Errorf("SplitURLScheme() rest = %v, want %v", gotRest, tt.wantRest)
}
})
}
}

func Benchmark_IsValidURLScheme(b *testing.B) {
for i := 0; i < b.N; i++ {
IsValidURLScheme("0http")
IsValidURLScheme("http")
}
}

0 comments on commit e107598

Please sign in to comment.