Skip to content

Commit

Permalink
Add regular expressions to ban transformer
Browse files Browse the repository at this point in the history
This will only work on text or byte arrays. Also includes some basic
benchmarks to demonstrate performance, though this could be more
specific. It's important to note that we need to be very careful about
types.
  • Loading branch information
KristianLyng committed Sep 26, 2024
1 parent 0b68707 commit dcdd196
Show file tree
Hide file tree
Showing 3 changed files with 175 additions and 7 deletions.
38 changes: 38 additions & 0 deletions data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ import (
"github.com/telenornms/skogul"
"github.com/telenornms/skogul/parser"
"log"
"regexp"
"strings"
"testing"
"time"
)
Expand Down Expand Up @@ -203,6 +205,42 @@ func BenchmarkValidate(b *testing.B) {

}

func BenchmarkCompareText(b *testing.B) {
data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"}
str := "only 1337 allowed"
for i := 0; i < b.N; i++ {
for x := 0; x < 3; x++ {
if data[x] == str {
continue
}
}
}
}
func BenchmarkCompareRegexp(b *testing.B) {
data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"}
exp, err := regexp.Compile(".*some-variable=.*")
if err != nil {
b.Fatalf("Couldn't compile regexp: %v", err)
}
for i := 0; i < b.N; i++ {
for x := 0; x < 3; x++ {
if exp.Match([]byte(data[x])) {
continue
}
}
}
}
func BenchmarkCompareSubstr(b *testing.B) {
data := []string{"the fox jumps over the some-variable=na something", "this is fine, nothing is on fire", "only 1337 allowed"}
for i := 0; i < b.N; i++ {
for x := 0; x < 3; x++ {
if strings.Contains(data[x], "1337") {
continue
}
}
}
}

func TestString_invalid(t *testing.T) {
c := skogul.Container{}
metric1 := skogul.Metric{}
Expand Down
96 changes: 92 additions & 4 deletions transformer/ban.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,51 @@
package transformer

import (
"fmt"
"github.com/dolmen-go/jsonptr"
"github.com/telenornms/skogul"
"regexp"
"sync"
)

type Ban struct {
LookupData map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\""`
LookupMetadata map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\""`
LookupData map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This is an exact match and can use any data type."`
LookupMetadata map[string]interface{} `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This is an exact match and can use any data type."`
RegexpData map[string]string `doc:"Map of key value pairs to lookup in metrics. Looks in data fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This uses regular expression and only works on strings or byte arrays."`
RegexpMetadata map[string]string `doc:"Map of key value pairs to lookup in metrics. Looks in metadata fields. Key is json pointer, value any. E.g. /foo/bar: \"bar\". This uses a regular expression and only works on strings or byte arrays."`
dataRegexps map[string]*regexp.Regexp
metadataRegexps map[string]*regexp.Regexp
err error
init sync.Once
}

func (b *Ban) Transform(c *skogul.Container) error {
b.init.Do(func() {
b.dataRegexps = make(map[string]*regexp.Regexp)
b.metadataRegexps = make(map[string]*regexp.Regexp)
for pathKey, pathValue := range b.RegexpData {
b.dataRegexps[pathKey], b.err = regexp.Compile(pathValue)
if b.err != nil {
return
}
}
for pathKey, pathValue := range b.RegexpMetadata {
b.metadataRegexps[pathKey], b.err = regexp.Compile(pathValue)
if b.err != nil {
return
}
}
})

if b.err != nil {
return fmt.Errorf("unable to compile regexp: %w", b.err)
}

for pathKey, pathValue := range b.LookupData {
newMetrics := make([]*skogul.Metric, 0, len(c.Metrics))
for _, mi := range c.Metrics {
var ptr interface{}
ptr, _ = jsonptr.Get(mi.Data, pathKey)

if ptr != pathValue {
newMetrics = append(newMetrics, mi)
}
Expand All @@ -29,16 +57,76 @@ func (b *Ban) Transform(c *skogul.Container) error {
newMetrics := make([]*skogul.Metric, 0, len(c.Metrics))
for _, mi := range c.Metrics {
var ptr interface{}

ptr, _ = jsonptr.Get(mi.Metadata, pathKey)
if ptr != pathValue {
newMetrics = append(newMetrics, mi)
}
}
c.Metrics = newMetrics
}

for pathKey, pathValue := range b.dataRegexps {
newMetrics := make([]*skogul.Metric, 0, len(c.Metrics))
for _, mi := range c.Metrics {
var ptr interface{}
ptr, _ = jsonptr.Get(mi.Data, pathKey)
var sptr []byte
switch ptr.(type) {
case string:
sptr = []byte(ptr.(string))
case []byte:
sptr = ptr.([]byte)
default:
newMetrics = append(newMetrics, mi)
continue
}
if !pathValue.Match(sptr) {
newMetrics = append(newMetrics, mi)
}
}
c.Metrics = newMetrics
}

for pathKey, pathValue := range b.metadataRegexps {
newMetrics := make([]*skogul.Metric, 0, len(c.Metrics))
for _, mi := range c.Metrics {
var ptr interface{}

ptr, _ = jsonptr.Get(mi.Metadata, pathKey)
var sptr []byte
switch ptr.(type) {
case string:
sptr = []byte(ptr.(string))
case []byte:
sptr = ptr.([]byte)
default:
newMetrics = append(newMetrics, mi)
continue
}
if !pathValue.Match(sptr) {
newMetrics = append(newMetrics, mi)
}
}
c.Metrics = newMetrics
}
newMetrics := make([]*skogul.Metric, len(c.Metrics))
copy(newMetrics, c.Metrics)
c.Metrics = newMetrics
return nil
}

func (b *Ban) Verify() error {
for _, pathValue := range b.RegexpData {
_, err := regexp.Compile(pathValue)
if err != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", pathValue, err)
}
}
for _, pathValue := range b.RegexpMetadata {
_, err := regexp.Compile(pathValue)
if err != nil {
return fmt.Errorf("unable to compile regexp `%s': %w", pathValue, err)
}
}
return nil
}
48 changes: 45 additions & 3 deletions transformer/ban_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,41 @@ func TestBan(t *testing.T) {
"funny": "",
},
}
metric7 := skogul.Metric{
Metadata: map[string]interface{}{
"bar2": "hmm",
},
Data: map[string]interface{}{
"foo2": "dette er 1339, two steps ahead",
},
}
metric8 := skogul.Metric{
Metadata: map[string]interface{}{
"bar2": "hmm",
},
Data: map[string]interface{}{
"foo2": "dette er 1337, akkurat passe",
},
}
metric9 := skogul.Metric{
Metadata: map[string]interface{}{
"bar2": "1234578901337890",
},
Data: map[string]interface{}{
"foo2": "dette er 1335, litt veikt",
},
}
metric10 := skogul.Metric{
Metadata: map[string]interface{}{
"bar2": []byte("1234578901337890"),
},
Data: map[string]interface{}{
"foo2": "dette er 1335, litt veikt",
},
}

c := skogul.Container{}
c.Metrics = []*skogul.Metric{&metric, &metric2, &metric3, &metric4, &metric5, &metric6}
c.Metrics = []*skogul.Metric{&metric, &metric2, &metric3, &metric4, &metric5, &metric6, &metric7, &metric8, &metric9, &metric10}

ban := &transformer.Ban{}

Expand All @@ -125,14 +157,24 @@ func TestBan(t *testing.T) {
"/funny": "",
}

ban.RegexpData = map[string]string{
"/foo2": ".*1337.*",
}
ban.RegexpMetadata = map[string]string{
"/bar2": ".*1337.*",
}

err := ban.Transform(&c)
if err != nil {
t.Fatalf("error occurred %v", err.Error())
}
if len(c.Metrics) != 1 {
if len(c.Metrics) != 2 {
for _, x := range c.Metrics {
t.Logf("metric left: %#v", x)
}
t.Fatalf("expected exactly 1 metric to remain, got %d", len(c.Metrics))
}
if cap(c.Metrics) != 1 {
if cap(c.Metrics) != 2 {
t.Fatalf("expected exactly len(metrics) == 1, got %d", cap(c.Metrics))
}
}

0 comments on commit dcdd196

Please sign in to comment.