From a1efafdad22f18c960f6a3bc273072f24f988121 Mon Sep 17 00:00:00 2001 From: benclive Date: Wed, 3 Jul 2024 11:33:55 +0100 Subject: [PATCH] perf: Limit tokens per pattern to 128 (#13376) --- pkg/pattern/drain/drain_test.go | 9 ++++----- pkg/pattern/drain/line_tokenizer.go | 23 ++++++++++------------- pkg/pattern/drain/line_tokenizer_test.go | 17 +++++++++++++++++ 3 files changed, 31 insertions(+), 18 deletions(-) diff --git a/pkg/pattern/drain/drain_test.go b/pkg/pattern/drain/drain_test.go index 0f9313391c1e..16e2841e794e 100644 --- a/pkg/pattern/drain/drain_test.go +++ b/pkg/pattern/drain/drain_test.go @@ -44,7 +44,7 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) { `ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_> op=CREATE`, `ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/<_><_><_>/<_><_><_>.<_>.<_> op=CREATE`, `ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:181 level=info component=logs logs_config=default msg="received file watcher event" name=/var/log/pods/hosted-grafana_.<_>/<_>/0.log.<_>.<_> op=CREATE`, - `ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=\"<_>\", pod_template_hash=\"<_>\", resource_version=\"<_>\", slug=\"<_>\", stackId=\"<_>\"}"`, + `ts=2024-04-16T15:10:<_>.<_> caller=filetargetmanager.go:<_> level=info component=logs logs_config=default msg="<_> target" key="/var/log/pods/*<_>/<_>/*.log:{app=\"grafana\", conprof=\"true\", container=\"<_>\", instanceId=\"<_>\", job=\"hosted-grafana/grafana\", name=\"grafana\", namespace=\"hosted-grafana\", org=\"<_>\", plan=\"free\", pod=<_>`, `ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/<_>/<_>/<_>.log ..."`, `ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Re-opening moved/deleted file /var/log/pods/hosted-grafana_.<_>/<_>/0.log ..."`, `ts=2024-04-16T15:10:<_>.<_> caller=log.go:168 component=logs logs_config=default level=info msg="Seeked /var/log/pods/<_>/<_>/0.log - &{Offset:0 Whence:0}"`, @@ -118,12 +118,12 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) { `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with CreateContainerConfigError: \"secret \\\"<_>\\\" not found\"" pod="<_>/<_>" podUID="<_>"`, `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/hosted-grafana/<_>:<_>.<_>.<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`, `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"<_>\" with ImagePullBackOff: \"Back-off pulling image \\\"us.gcr.io/kubernetes-dev/<_>:<_>\\\"\"" pod="<_>/<_>" podUID="<_>"`, - `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found, failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden]\"" pod="hosted-grafana/<_>" podUID="<_>"`, + `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"grafana\" with ErrImagePull: \"[rpc error: code = NotFound desc = failed to pull and unpack image \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": failed to resolve reference \\\"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\\\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not <_>`, `E0507 11:59:<_>.<_> <_> pod_workers.go:1300] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"pdc\" with ErrImageNeverPull: \"Container image \\\"us.gcr.io/hosted-grafana/pdc:0.1.415\\\" is not present with pull policy of Never\"" pod="pdc/<_>" podUID="<_>"`, `E0507 11:59:<_>.<_> <_> prober.go:104] "Probe errored" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" probeType="Readiness" pod="hosted-grafana/<_>" podUID="<_>" containerName="grafana"`, `E0507 11:59:<_>.<_> <_> prober.go:239] "Unable to write all bytes from execInContainer" err="short write" expectedBytes=<_> actualBytes=10240`, `E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = NotFound desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>: not found" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`, - `E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>"`, + `E0507 11:59:<_>.<_> <_> remote_image.go:180] "PullImage from image service failed" err="rpc error: code = Unknown desc = failed to pull and unpack image \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": failed to resolve reference \"us.gcr.io/hosted-grafana/hosted-grafana-pro:<_>.1.<_>\": unexpected status from HEAD request to https://us.gcr.io/v2/hosted-grafana/hosted-grafana-pro/manifests/<_>.1.<_>: 403 Forbidden" image="us.gcr.io/hosted-grafana/<_>`, `E0507 11:59:<_>.<_> <_> remote_runtime.go:432] "ContainerStatus from runtime service failed" err="rpc error: code = NotFound desc = an error occurred when try to find container \"<_>\": not found" containerID="<_>"`, `E0507 11:59:<_>.<_> <_> remote_runtime.go:496] "ExecSync cmd from runtime service failed" err="rpc error: code = NotFound desc = failed to exec in container: failed to load task: no running task found: task <_> not found: not found" containerID="<_>" cmd=["/bin/hgrun","check"]`, `I0507 11:59:31.815514 2791 azure_credentials.go:220] image(us.gcr.io/hosted-grafana/hosted-grafana-pro) is not from ACR, return empty authentication`, @@ -311,10 +311,9 @@ func TestDrain_TrainExtractsPatterns(t *testing.T) { `2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 686: Reconcile against kernel programming ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`, `2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 880: Processing route: 254 <_> 10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`, `2024-05-08 15:23:56.<_> [DEBUG][76] felix/route_table.go 915: Route is correct dest=10.68.10.<_>/32 ifaceName="<_>" ifaceRegex="^azv.*" ipVersion=0x4 tableIndex=0`, - `2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "", "", "", "", "", "", "", "", "tVnHkvAo15HuiPy0", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="raw"`, `2024-05-08 15:23:57.942 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "6gwbT8clXdHdC1b1"} chainName="PREROUTING" expectedRuleIDs=[]string{"6gwbT8clXdHdC1b1", "", "", "", ""} ipVersion=0x4 table="raw"`, - `2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", ""} chainName="INPUT" expectedRuleIDs=[]string{"Cz_u1IQiXIMmKD4c", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`, `2024-05-08 15:23:57.969 [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "tVnHkvAo15HuiPy0", "", "", "", "", ""} chainName="OUTPUT" expectedRuleIDs=[]string{"tVnHkvAo15HuiPy0", "", "", "", "", "", "", "", "", ""} ipVersion=0x4 table="filter"`, + `2024-05-08 15:23:57.<_> [WARNING][56] felix/table.go 654: Detected out-of-sync inserts, marking for resync actualRuleIDs=[]string{"", "", "", "", "<_><_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> "<_><_> <_>", "", ""<_> <_><_><_> <_>"<_> <_><_><_><_><_> <_><_><_><_><_><_><_><_>", "", "", "", "", "", "", "", "", ""<_>`, `2024-05-08 15:23:58.169 [INFO][2333] felix/summary.go 100: Summarising 35 dataplane reconciliation loops over 1m2s: avg=12ms longest=46ms (resync-filter-v4,resync-filter-v6,resync-mangle-v4,resync-mangle-v6,update-filter-v4,update-filter-v6)`, `2024-05-08 15:23:58.566 [DEBUG][3576126] felix/int_dataplane.go 957: Examining link for MTU calculation mtu=1500 name="eth0"`, `2024-05-08 15:23:58.680 [DEBUG][216945] felix/int_dataplane.go 1785: Reschedule kick received`, diff --git a/pkg/pattern/drain/line_tokenizer.go b/pkg/pattern/drain/line_tokenizer.go index 89bf34a5569b..9a366edc6fe8 100644 --- a/pkg/pattern/drain/line_tokenizer.go +++ b/pkg/pattern/drain/line_tokenizer.go @@ -38,39 +38,36 @@ func newPunctuationTokenizer() *punctuationTokenizer { } func (p *punctuationTokenizer) Tokenize(line string) ([]string, interface{}) { - tokens := make([]string, len(line)) // Maximum size is every character is punctuation - spacesAfter := make([]int, strings.Count(line, " ")) // Could be a bitmap, but it's not worth it for a few bytes. + tokens := make([]string, 0, 128) + spacesAfter := make([]int, 0, 64) start := 0 - nextTokenIdx := 0 - nextSpaceIdx := 0 for i, char := range line { + if len(tokens) >= cap(tokens)-1 { + break + } if unicode.IsLetter(char) || unicode.IsNumber(char) || char < 128 && p.excludeDelimiters[char] != 0 { continue } included := char < 128 && p.includeDelimiters[char] != 0 if char == ' ' || included || unicode.IsPunct(char) { if i > start { - tokens[nextTokenIdx] = line[start:i] - nextTokenIdx++ + tokens = append(tokens, line[start:i]) } if char == ' ' { - spacesAfter[nextSpaceIdx] = nextTokenIdx - 1 - nextSpaceIdx++ + spacesAfter = append(spacesAfter, len(tokens)-1) } else { - tokens[nextTokenIdx] = line[i : i+1] - nextTokenIdx++ + tokens = append(tokens, line[i:i+1]) } start = i + 1 } } if start < len(line) { - tokens[nextTokenIdx] = line[start:] - nextTokenIdx++ + tokens = append(tokens, line[start:]) } - return tokens[:nextTokenIdx], spacesAfter[:nextSpaceIdx] + return tokens, spacesAfter } func (p *punctuationTokenizer) Join(tokens []string, state interface{}) string { diff --git a/pkg/pattern/drain/line_tokenizer_test.go b/pkg/pattern/drain/line_tokenizer_test.go index 1eda1b51068a..7aac061d28f3 100644 --- a/pkg/pattern/drain/line_tokenizer_test.go +++ b/pkg/pattern/drain/line_tokenizer_test.go @@ -1,6 +1,7 @@ package drain import ( + "strings" "testing" "github.com/stretchr/testify/require" @@ -88,6 +89,22 @@ var testCases = []TestCase{ typeSplitting: {"ts=", "2024-05-30T12:50:36.648377186Z", "caller=", "scheduler_processor.go:143", "level=", "warn", "msg=", "\"error", "contacting", "scheduler\"", "err=", "\"rpc", "error:", "code", "=", ``, "Unavailable", "desc", "=", ``, "connection", "error:", "desc", "=", ``, `\"error`, "reading", "server", "preface:", `EOF\""`, "addr=", "10.0.151.101:9095"}, }, }, + { + name: "Exactly 128 tokens are not combined", + line: strings.Repeat(`A `, 126) + "127 128", + want: map[string][]string{ + typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"), + typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128"), + }, + }, + { + name: "More than 128 tokens combined suffix into one token", + line: strings.Repeat(`A `, 126) + "127 128 129", + want: map[string][]string{ + typePunctuation: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128 129"), + typeSplitting: append(strings.Split(strings.Repeat(`A `, 126), " ")[:126], "127", "128", "129"), + }, + }, { name: "Only punctation", line: `!@£$%^&*()`,