From dcdd196b886317097c686f0b5501a9d27657b8b3 Mon Sep 17 00:00:00 2001 From: Kristian Lyngstol Date: Thu, 26 Sep 2024 15:40:36 +0200 Subject: [PATCH] Add regular expressions to ban transformer This will only work on text or byte arrays. Also includes some basic benchmarks to demonstrate performance, though this could be more specific. It's important to note that we need to be very careful about types. --- data_test.go | 38 ++++++++++++++++ transformer/ban.go | 96 +++++++++++++++++++++++++++++++++++++++-- transformer/ban_test.go | 48 +++++++++++++++++++-- 3 files changed, 175 insertions(+), 7 deletions(-) diff --git a/data_test.go b/data_test.go index cd7600b3..ad713b44 100644 --- a/data_test.go +++ b/data_test.go @@ -30,6 +30,8 @@ import ( "github.com/telenornms/skogul" "github.com/telenornms/skogul/parser" "log" + "regexp" + "strings" "testing" "time" ) @@ -203,6 +205,42 @@ func BenchmarkValidate(b *testing.B) { } +func BenchmarkCompareText(b *testing.B) { + data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"} + str := "only 1337 allowed" + for i := 0; i < b.N; i++ { + for x := 0; x < 3; x++ { + if data[x] == str { + continue + } + } + } +} +func BenchmarkCompareRegexp(b *testing.B) { + data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"} + exp, err := regexp.Compile(".*some-variable=.*") + if err != nil { + b.Fatalf("Couldn't compile regexp: %v", err) + } + for i := 0; i < b.N; i++ { + for x := 0; x < 3; x++ { + if exp.Match([]byte(data[x])) { + continue + } + } + } +} +func BenchmarkCompareSubstr(b *testing.B) { + data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"} + for i := 0; i < b.N; i++ { + for x := 0; x < 3; x++ { + if strings.Contains(data[x], "1337") { + continue + } + } + } +} + func TestString_invalid(t *testing.T) { c := skogul.Container{} metric1 := skogul.Metric{} diff --git a/transformer/ban.go b/transformer/ban.go index ae1fb775..8b86988d 100644 --- a/transformer/ban.go +++ b/transformer/ban.go @@ -1,23 +1,51 @@ package transformer import ( + "fmt" "github.com/dolmen-go/jsonptr" "github.com/telenornms/skogul" + "regexp" + "sync" ) type Ban struct { - LookupData map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\""` - LookupMetadata map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\""` + LookupData map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This is an exact match and can use any data type."` + LookupMetadata map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This is an exact match and can use any data type."` + RegexpData map[string]string `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This uses regular expression and only works on strings or byte arrays."` + RegexpMetadata map[string]string `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This uses a regular expression and only works on strings or byte arrays."` + dataRegexps map[string]*regexp.Regexp + metadataRegexps map[string]*regexp.Regexp + err error + init sync.Once } func (b *Ban) Transform(c *skogul.Container) error { + b.init.Do(func() { + b.dataRegexps = make(map[string]*regexp.Regexp) + b.metadataRegexps = make(map[string]*regexp.Regexp) + for pathKey, pathValue := range b.RegexpData { + b.dataRegexps[pathKey], b.err = regexp.Compile(pathValue) + if b.err != nil { + return + } + } + for pathKey, pathValue := range b.RegexpMetadata { + b.metadataRegexps[pathKey], b.err = regexp.Compile(pathValue) + if b.err != nil { + return + } + } + }) + + if b.err != nil { + return fmt.Errorf("unable to compile regexp: %w", b.err) + } for pathKey, pathValue := range b.LookupData { newMetrics := make([]*skogul.Metric, 0, len(c.Metrics)) for _, mi := range c.Metrics { var ptr interface{} ptr, _ = jsonptr.Get(mi.Data, pathKey) - if ptr != pathValue { newMetrics = append(newMetrics, mi) } @@ -29,7 +57,6 @@ func (b *Ban) Transform(c *skogul.Container) error { newMetrics := make([]*skogul.Metric, 0, len(c.Metrics)) for _, mi := range c.Metrics { var ptr interface{} - ptr, _ = jsonptr.Get(mi.Metadata, pathKey) if ptr != pathValue { newMetrics = append(newMetrics, mi) @@ -37,8 +64,69 @@ func (b *Ban) Transform(c *skogul.Container) error { } c.Metrics = newMetrics } + + for pathKey, pathValue := range b.dataRegexps { + newMetrics := make([]*skogul.Metric, 0, len(c.Metrics)) + for _, mi := range c.Metrics { + var ptr interface{} + ptr, _ = jsonptr.Get(mi.Data, pathKey) + var sptr []byte + switch ptr.(type) { + case string: + sptr = []byte(ptr.(string)) + case []byte: + sptr = ptr.([]byte) + default: + newMetrics = append(newMetrics, mi) + continue + } + if !pathValue.Match(sptr) { + newMetrics = append(newMetrics, mi) + } + } + c.Metrics = newMetrics + } + + for pathKey, pathValue := range b.metadataRegexps { + newMetrics := make([]*skogul.Metric, 0, len(c.Metrics)) + for _, mi := range c.Metrics { + var ptr interface{} + + ptr, _ = jsonptr.Get(mi.Metadata, pathKey) + var sptr []byte + switch ptr.(type) { + case string: + sptr = []byte(ptr.(string)) + case []byte: + sptr = ptr.([]byte) + default: + newMetrics = append(newMetrics, mi) + continue + } + if !pathValue.Match(sptr) { + newMetrics = append(newMetrics, mi) + } + } + c.Metrics = newMetrics + } newMetrics := make([]*skogul.Metric, len(c.Metrics)) copy(newMetrics, c.Metrics) c.Metrics = newMetrics return nil } + +func (b *Ban) Verify() error { + for _, pathValue := range b.RegexpData { + _, err := regexp.Compile(pathValue) + if err != nil { + return fmt.Errorf("unable to compile regexp `%s': %w", pathValue, err) + } + } + for _, pathValue := range b.RegexpMetadata { + _, err := regexp.Compile(pathValue) + if err != nil { + return fmt.Errorf("unable to compile regexp `%s': %w", pathValue, err) + } + } + return nil +} diff --git a/transformer/ban_test.go b/transformer/ban_test.go index 393efae9..dc0259d2 100644 --- a/transformer/ban_test.go +++ b/transformer/ban_test.go @@ -109,9 +109,41 @@ func TestBan(t *testing.T) { "funny": "", }, } + metric7 := skogul.Metric{ + Metadata: map[string]interface{}{ + "bar2": "hmm", + }, + Data: map[string]interface{}{ + "foo2": "dette er 1339, two steps ahead", + }, + } + metric8 := skogul.Metric{ + Metadata: map[string]interface{}{ + "bar2": "hmm", + }, + Data: map[string]interface{}{ + "foo2": "dette er 1337, akkurat passe", + }, + } + metric9 := skogul.Metric{ + Metadata: map[string]interface{}{ + "bar2": "1234578901337890", + }, + Data: map[string]interface{}{ + "foo2": "dette er 1335, litt veikt", + }, + } + metric10 := skogul.Metric{ + Metadata: map[string]interface{}{ + "bar2": []byte("1234578901337890"), + }, + Data: map[string]interface{}{ + "foo2": "dette er 1335, litt veikt", + }, + } c := skogul.Container{} - c.Metrics = []*skogul.Metric{&metric, &metric2, &metric3, &metric4, &metric5, &metric6} + c.Metrics = []*skogul.Metric{&metric, &metric2, &metric3, &metric4, &metric5, &metric6, &metric7, &metric8, &metric9, &metric10} ban := &transformer.Ban{} @@ -125,14 +157,24 @@ func TestBan(t *testing.T) { "/funny": "", } + ban.RegexpData = map[string]string{ + "/foo2": ".*1337.*", + } + ban.RegexpMetadata = map[string]string{ + "/bar2": ".*1337.*", + } + err := ban.Transform(&c) if err != nil { t.Fatalf("error occurred %v", err.Error()) } - if len(c.Metrics) != 1 { + if len(c.Metrics) != 2 { + for _, x := range c.Metrics { + t.Logf("metric left: %#v", x) + } t.Fatalf("expected exactly 1 metric to remain, got %d", len(c.Metrics)) } - if cap(c.Metrics) != 1 { + if cap(c.Metrics) != 2 { t.Fatalf("expected exactly len(metrics) == 1, got %d", cap(c.Metrics)) } }