Skip to content

Commit

Permalink
[metrics] Fix overhead_program metrics for return probes
Browse files Browse the repository at this point in the history
Let's assume the following example:

$ cat pol.yaml
apiVersion: cilium.io/v1alpha1
kind: TracingPolicy
metadata:
  name: "file-monitoring-mmap"
spec:
  kprobes:
  - call: "security_mmap_file"
    syscall: false
    return: true
    args:
    - index: 0
      type: "file" # (struct file *) used for getting the path
    - index: 1
      type: "uint32" # the prot flags PROT_READ(0x01), PROT_WRITE(0x02), PROT_EXEC(0x04)
    - index: 2
      type: "nop" # the mmap flags (i.e. MAP_SHARED, ...)
    returnArg:
      index: 0
      type: "int"
    returnArgAction: "Post"
    selectors:
    - matchArgs:
      - index: 0
        operator: "Prefix"
        values:
        - "/etc/" # filenames to filter for
$ sudo ./tetragon --btf /sys/kernel/btf/vmlinux  --bpf-lib ./bpf/objs/ --metrics-server ':2112' --tracing-policy ./pol.yaml  --disable-kprobe-multi

After that, if we try to get the metrics from another terminal we get
the following errors:

$ curl http://localhost:2112/metrics
An error has occurred while serving metrics:

2 error(s) occurred:
* collected metric "tetragon_overhead_program_seconds_total" { label:{name:"attach"  value:"security_mmap_file"}  label:{name:"policy"  value:"file-monitoring-mmap"}  label:{name:"policy_namespace"  value:""}  label:{name:"sensor"  value:"generic_kprobe"}  counter:{value:0}} was collected before with the same name and label values
* collected metric "tetragon_overhead_program_runs_total" { label:{name:"attach"  value:"security_mmap_file"}  label:{name:"policy"  value:"file-monitoring-mmap"}  label:{name:"policy_namespace"  value:""}  label:{name:"sensor"  value:"generic_kprobe"}  counter:{value:0}} was collected before with the same name and label values

The issue here, is that we get two metrics withg the same labels. This
happens because we need the retprobe as well (i.e. returnArg) and this
have the same name as the kprobe.

To fix that we need to add another label for the section that we use to
attach. This patch adds that and the example metrics from the previous
example are:

tetragon_overhead_program_seconds_total{attach="security_mmap_file",policy="file-monitoring-mmap",policy_namespace="",section="kprobe/generic_kprobe",sensor="generic_kprobe"} 0
tetragon_overhead_program_seconds_total{attach="security_mmap_file",policy="file-monitoring-mmap",policy_namespace="",section="kprobe/generic_retkprobe",sensor="generic_kprobe"} 0

Which reports both the attach function (i.e. security_mmap_file) and the
program that we use to attach (i.e. kprobe/generic_kprobe and
kprobe/generic_retkprobe).

Signed-off-by: Anastasios Papagiannis <[email protected]>
  • Loading branch information
tpapagian authored and kkourt committed Nov 5, 2024
1 parent 718db2c commit 0e789ea
Show file tree
Hide file tree
Showing 4 changed files with 10 additions and 4 deletions.
2 changes: 2 additions & 0 deletions docs/content/en/docs/reference/metrics.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions pkg/metrics/overhead/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ func collect(ch chan<- prometheus.Metric) {
}

for _, ovh := range overheads {
ch <- time.MustMetric(float64(ovh.RunTime), ovh.Namespace, ovh.Policy, ovh.Sensor, ovh.Attach)
ch <- runs.MustMetric(float64(ovh.RunCnt), ovh.Namespace, ovh.Policy, ovh.Sensor, ovh.Attach)
ch <- time.MustMetric(float64(ovh.RunTime), ovh.Namespace, ovh.Policy, ovh.Sensor, ovh.Attach, ovh.Label)
ch <- runs.MustMetric(float64(ovh.RunCnt), ovh.Namespace, ovh.Policy, ovh.Sensor, ovh.Attach, ovh.Label)
}
}

func collectForDocs(ch chan<- prometheus.Metric) {
ch <- time.MustMetric(0, "ns", "enforce", "generic_kprobe", "sys_open")
ch <- runs.MustMetric(0, "ns", "enforce", "generic_kprobe", "sys_open")
ch <- time.MustMetric(0, "ns", "enforce", "generic_kprobe", "sys_open", "kprobe/sys_open")
ch <- runs.MustMetric(0, "ns", "enforce", "generic_kprobe", "sys_open", "kprobe/sys_open")
}
2 changes: 2 additions & 0 deletions pkg/metrics/overhead/overhead.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ var (
metrics.LabelPolicy,
metrics.UnconstrainedLabel{Name: "sensor", ExampleValue: "generic_kprobe"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_open"},
metrics.UnconstrainedLabel{Name: "section", ExampleValue: "kprobe/sys_open"},
},
))

Expand All @@ -28,6 +29,7 @@ var (
metrics.LabelPolicy,
metrics.UnconstrainedLabel{Name: "sensor", ExampleValue: "generic_kprobe"},
metrics.UnconstrainedLabel{Name: "attach", ExampleValue: "sys_open"},
metrics.UnconstrainedLabel{Name: "section", ExampleValue: "kprobe/sys_open"},
},
))
)
2 changes: 2 additions & 0 deletions pkg/sensors/sensors.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ type ProgOverhead struct {
Policy string
Sensor string
Attach string
Label string
RunTime uint64
RunCnt uint64
}
Expand Down Expand Up @@ -127,6 +128,7 @@ func (s *Sensor) Overhead() ([]ProgOverhead, bool) {

list = append(list, ProgOverhead{
Attach: p.Attach,
Label: p.Label,
Sensor: s.Name,
RunTime: uint64(runTime),
RunCnt: runCnt,
Expand Down

0 comments on commit 0e789ea

Please sign in to comment.