Skip to content

Commit

Permalink
WIP Refactor metrics to avoid using promauto package
Browse files Browse the repository at this point in the history
Signed-off-by: Anna Kapuscinska <[email protected]>
  • Loading branch information
lambdanis committed Aug 1, 2023
1 parent e99f864 commit 254ffe1
Show file tree
Hide file tree
Showing 14 changed files with 128 additions and 42 deletions.
7 changes: 5 additions & 2 deletions pkg/grpc/tracing/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@ package tracing
import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
LoaderStats = promauto.NewCounterVec(prometheus.CounterOpts{
LoaderStats = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "process_loader_stats",
Help: "Process Loader event statistics. For internal use only.",
ConstLabels: nil,
}, []string{"count"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(LoaderStats)
}

type LoaderType int

const (
Expand Down
10 changes: 7 additions & 3 deletions pkg/metrics/errormetrics/errormetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

type ErrorType string
Expand All @@ -35,21 +34,26 @@ var (
)

var (
ErrorTotal = promauto.NewCounterVec(prometheus.CounterOpts{
ErrorTotal = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "errors_total",
Help: "The total number of Tetragon errors. For internal use only.",
ConstLabels: nil,
}, []string{"type"})

HandlerErrors = promauto.NewCounterVec(prometheus.CounterOpts{
HandlerErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "handler_errors_total",
Help: "The total number of event handler errors. For internal use only.",
ConstLabels: nil,
}, []string{"opcode", "error_type"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(ErrorTotal)
registry.MustRegister(HandlerErrors)
}

// Get a new handle on an ErrorTotal metric for an ErrorType
func GetErrorTotal(t ErrorType) prometheus.Counter {
return ErrorTotal.WithLabelValues(string(t))
Expand Down
13 changes: 9 additions & 4 deletions pkg/metrics/eventcachemetrics/eventcachemetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,35 @@ package eventcachemetrics
import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
processInfoErrors = promauto.NewCounterVec(prometheus.CounterOpts{
processInfoErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "event_cache_process_info_errors_total",
Help: "The total of times we failed to fetch cached process info for a given event type.",
ConstLabels: nil,
}, []string{"event_type"})
podInfoErrors = promauto.NewCounterVec(prometheus.CounterOpts{
podInfoErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "event_cache_pod_info_errors_total",
Help: "The total of times we failed to fetch cached pod info for a given event type.",
ConstLabels: nil,
}, []string{"event_type"})
EventCacheCount = promauto.NewCounter(prometheus.CounterOpts{
EventCacheCount = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "event_cache_accesses_total",
Help: "The total number of Tetragon event cache accesses. For internal use only.",
ConstLabels: nil,
})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(processInfoErrors)
registry.MustRegister(podInfoErrors)
registry.MustRegister(EventCacheCount)
}

// Get a new handle on an processInfoErrors metric for an eventType
func ProcessInfoError(eventType string) prometheus.Counter {
return processInfoErrors.WithLabelValues(eventType)
Expand Down
16 changes: 11 additions & 5 deletions pkg/metrics/eventmetrics/eventmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,37 +16,43 @@ import (
"github.com/cilium/tetragon/pkg/reader/exec"
"github.com/cilium/tetragon/pkg/tracingpolicy"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
EventsProcessed = promauto.NewCounterVec(prometheus.CounterOpts{
EventsProcessed = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "events_total",
Help: "The total number of Tetragon events",
ConstLabels: nil,
}, []string{"type", "namespace", "pod", "binary"})
FlagCount = promauto.NewCounterVec(prometheus.CounterOpts{
FlagCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "flags_total",
Help: "The total number of Tetragon flags. For internal use only.",
ConstLabels: nil,
}, []string{"type"})
NotifyOverflowedEvents = promauto.NewCounter(prometheus.CounterOpts{
NotifyOverflowedEvents = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "notify_overflowed_events_total",
Help: "The total number of events dropped because listener buffer was full",
ConstLabels: nil,
})

policyStats = promauto.NewCounterVec(prometheus.CounterOpts{
policyStats = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policy_events_total",
Help: "Policy events calls observed.",
ConstLabels: nil,
}, []string{"policy", "hook", "namespace", "pod", "binary"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(EventsProcessed)
registry.MustRegister(FlagCount)
registry.MustRegister(NotifyOverflowedEvents)
registry.MustRegister(policyStats)
}

func GetProcessInfo(process *tetragon.Process) (binary, pod, namespace string) {
if process != nil {
binary = process.Binary
Expand Down
13 changes: 9 additions & 4 deletions pkg/metrics/kprobemetrics/kprobemetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,30 +6,35 @@ package kprobemetrics
import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
MergeErrors = promauto.NewCounterVec(prometheus.CounterOpts{
MergeErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "generic_kprobe_merge_errors_total",
Help: "The total number of failed attempts to merge a kprobe and kretprobe event.",
ConstLabels: nil,
}, []string{"curr_fn", "curr_type", "prev_fn", "prev_type"})
MergeOkTotal = promauto.NewCounter(prometheus.CounterOpts{
MergeOkTotal = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "generic_kprobe_merge_ok_total",
Help: "The total number of successful attempts to merge a kprobe and kretprobe event.",
ConstLabels: nil,
})
MergePushed = promauto.NewCounter(prometheus.CounterOpts{
MergePushed = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "generic_kprobe_merge_pushed_total",
Help: "The total number of pushed events for later merge.",
ConstLabels: nil,
})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(MergeErrors)
registry.MustRegister(MergeOkTotal)
registry.MustRegister(MergePushed)
}

// Get a new handle on the mergeErrors metric for a current and previous function
// name and probe type
func GetMergeErrors(currFn, currType, prevFn, prevType string) prometheus.Counter {
Expand Down
10 changes: 7 additions & 3 deletions pkg/metrics/mapmetrics/mapmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,25 +8,29 @@ import (

"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
MapSize = promauto.NewGaugeVec(prometheus.GaugeOpts{
MapSize = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: consts.MetricsNamespace,
Name: "map_in_use_gauge",
Help: "The total number of in-use entries per map.",
ConstLabels: nil,
}, []string{"map", "total"})

MapDrops = promauto.NewCounterVec(prometheus.CounterOpts{
MapDrops = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "map_drops_total",
Help: "The total number of entries dropped per LRU map.",
ConstLabels: nil,
}, []string{"map"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(MapSize)
registry.MustRegister(MapDrops)
}

// Get a new handle on a mapSize metric for a mapName and totalCapacity
func GetMapSize(mapName string, totalCapacity int) prometheus.Gauge {
return MapSize.WithLabelValues(mapName, fmt.Sprint(totalCapacity))
Expand Down
34 changes: 33 additions & 1 deletion pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,44 @@ package metrics
import (
"net/http"

"github.com/cilium/tetragon/pkg/grpc/tracing"
"github.com/cilium/tetragon/pkg/logger"
"github.com/cilium/tetragon/pkg/metrics/errormetrics"
"github.com/cilium/tetragon/pkg/metrics/eventcachemetrics"
"github.com/cilium/tetragon/pkg/metrics/eventmetrics"
"github.com/cilium/tetragon/pkg/metrics/kprobemetrics"
"github.com/cilium/tetragon/pkg/metrics/mapmetrics"
"github.com/cilium/tetragon/pkg/metrics/opcodemetrics"
pfmetrics "github.com/cilium/tetragon/pkg/metrics/policyfilter"
"github.com/cilium/tetragon/pkg/metrics/processexecmetrics"
"github.com/cilium/tetragon/pkg/metrics/ringbufmetrics"
"github.com/cilium/tetragon/pkg/metrics/syscallmetrics"
"github.com/cilium/tetragon/pkg/metrics/watchermetrics"
"github.com/cilium/tetragon/pkg/observer"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)

func InitAllMetrics(registry *prometheus.Registry) {
errormetrics.InitMetrics(registry)
eventcachemetrics.InitMetrics(registry)
eventmetrics.InitMetrics(registry)
kprobemetrics.InitMetrics(registry)
mapmetrics.InitMetrics(registry)
opcodemetrics.InitMetrics(registry)
pfmetrics.InitMetrics(registry)
processexecmetrics.InitMetrics(registry)
ringbufmetrics.InitMetrics(registry)
syscallmetrics.InitMetrics(registry)
watchermetrics.InitMetrics(registry)
observer.InitMetrics(registry)
tracing.InitMetrics(registry)
}

func EnableMetrics(address string) {
reg := prometheus.NewRegistry()
InitAllMetrics(reg)
logger.GetLogger().WithField("addr", address).Info("Starting metrics server")
http.Handle("/metrics", promhttp.Handler())
http.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}))
http.ListenAndServe(address, nil)
}
10 changes: 7 additions & 3 deletions pkg/metrics/opcodemetrics/opcodemetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,17 @@ import (

"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
MsgOpsCount = promauto.NewCounterVec(prometheus.CounterOpts{
MsgOpsCount = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "msg_op_total",
Help: "The total number of times we encounter a given message opcode. For internal use only.",
ConstLabels: nil,
}, []string{"msg_op"})

LatencyStats = promauto.NewHistogramVec(prometheus.HistogramOpts{
LatencyStats = prometheus.NewHistogramVec(prometheus.HistogramOpts{
Namespace: consts.MetricsNamespace,
Name: "handling_latency",
Help: "The latency of handling messages in us.",
Expand All @@ -28,6 +27,11 @@ var (
}, []string{"op"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(MsgOpsCount)
registry.MustRegister(LatencyStats)
}

// Get a new handle on a msgOpsCount metric for an OpCode
func GetOpTotal(op int) prometheus.Counter {
return MsgOpsCount.WithLabelValues(fmt.Sprint(op))
Expand Down
7 changes: 5 additions & 2 deletions pkg/metrics/policyfilter/policyfilter.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,21 @@ import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/pkg/errors"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
PolicyFilterOpMetrics = promauto.NewCounterVec(prometheus.CounterOpts{
PolicyFilterOpMetrics = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "policyflter_metrics_total",
Help: "Policy filter metrics. For internal use only.",
ConstLabels: nil,
}, []string{"subsys", "op", "error_type"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(PolicyFilterOpMetrics)
}

func OpInc(subsys, op string, err error) {
PolicyFilterOpMetrics.WithLabelValues(
subsys, op,
Expand Down
10 changes: 7 additions & 3 deletions pkg/metrics/processexecmetrics/processexecmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,28 @@ package processexecmetrics
import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
MissingParentErrors = promauto.NewCounterVec(prometheus.CounterOpts{
MissingParentErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "exec_missing_parent_errors_total",
Help: "The total of times a given parent exec id could not be found in an exec event.",
ConstLabels: nil,
}, []string{"parent_exec_id"})
SameExecIdErrors = promauto.NewCounterVec(prometheus.CounterOpts{
SameExecIdErrors = prometheus.NewCounterVec(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "exec_parent_child_same_id_errors_total",
Help: "The total of times an error occurs due to a parent and child process have the same exec id.",
ConstLabels: nil,
}, []string{"exec_id"})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(MissingParentErrors)
registry.MustRegister(SameExecIdErrors)
}

// Get a new handle on the missingParentErrors metric for an execId
func GetMissingParent(execId string) prometheus.Counter {
return MissingParentErrors.WithLabelValues(execId)
Expand Down
13 changes: 9 additions & 4 deletions pkg/metrics/ringbufmetrics/ringbufmetrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,31 @@ package ringbufmetrics
import (
"github.com/cilium/tetragon/pkg/metrics/consts"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)

var (
PerfEventReceived = promauto.NewCounter(prometheus.CounterOpts{
PerfEventReceived = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "ringbuf_perf_event_received_total",
Help: "The total number of Tetragon ringbuf perf events received.",
ConstLabels: nil,
})
PerfEventLost = promauto.NewCounter(prometheus.CounterOpts{
PerfEventLost = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "ringbuf_perf_event_lost_total",
Help: "The total number of Tetragon ringbuf perf events lost.",
ConstLabels: nil,
})
PerfEventErrors = promauto.NewCounter(prometheus.CounterOpts{
PerfEventErrors = prometheus.NewCounter(prometheus.CounterOpts{
Namespace: consts.MetricsNamespace,
Name: "ringbuf_perf_event_errors_total",
Help: "The total number of errors when reading the Tetragon ringbuf.",
ConstLabels: nil,
})
)

func InitMetrics(registry *prometheus.Registry) {
registry.MustRegister(PerfEventReceived)
registry.MustRegister(PerfEventLost)
registry.MustRegister(PerfEventErrors)
}
Loading

0 comments on commit 254ffe1

Please sign in to comment.