Skip to content

Commit

Permalink
Add crd metrics usage information (#2825)
Browse files Browse the repository at this point in the history
* Add crd metrics usage information

Signed-off-by: Ruben Vargas <[email protected]>

* Add mode metric

Signed-off-by: Ruben Vargas <[email protected]>

* Refactor CR metrics

Signed-off-by: Ruben Vargas <[email protected]>

* Add annotation to avoid generate Metrics

Signed-off-by: Ruben Vargas <[email protected]>

* Add unit tests

Signed-off-by: Ruben Vargas <[email protected]>

* remove space

Signed-off-by: Ruben Vargas <[email protected]>

* remove global provider

Signed-off-by: Ruben Vargas <[email protected]>

* Update main.go

Co-authored-by: Israel Blancas <[email protected]>

* revert kusttomization.yaml

Signed-off-by: Ruben Vargas <[email protected]>

* rename some constants

Signed-off-by: Ruben Vargas <[email protected]>

* Add connectors metrics

Signed-off-by: Ruben Vargas <[email protected]>

* Update chlog

Signed-off-by: Ruben Vargas <[email protected]>

* merge new with init, rename some functions, improve changelog entry

Signed-off-by: Ruben Vargas <[email protected]>

* improve todo comment

Signed-off-by: Ruben Vargas <[email protected]>

* fix tests

Signed-off-by: Ruben Vargas <[email protected]>

* set flag to default false

Signed-off-by: Ruben Vargas <[email protected]>

* fix lint issues

Signed-off-by: Ruben Vargas <[email protected]>

* breaking line

Signed-off-by: Ruben Vargas <[email protected]>

* Use api reader to avoid cache issues

Signed-off-by: Ruben Vargas <[email protected]>

* Add info metric to changelog entry

Signed-off-by: Ruben Vargas <[email protected]>

---------

Signed-off-by: Ruben Vargas <[email protected]>
Co-authored-by: Israel Blancas <[email protected]>
  • Loading branch information
rubenvp8510 and iblancasa committed May 30, 2024
1 parent 9a186e1 commit dac4774
Show file tree
Hide file tree
Showing 11 changed files with 1,166 additions and 9 deletions.
25 changes: 25 additions & 0 deletions .chloggen/usage_metrics.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: enhancement

# The name of the component, or a single word describing the area of concern, (e.g. collector, target allocator, auto-instrumentation, opamp, github action)
component: collector

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Add usage metrics for the collector

# One or more tracking issues related to the change
issues: [2829]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext: |
This change will add metrics to the OpenTelemetry operator about how the collector is used in the cluster,
it will add the following metrics to the opentelemetry-operator metrics endpoint
```
opentelemetry_collector_receivers{collector_name="collector_name", namespace="ns", type="otlp"} 1
opentelemetry_collector_exporters{collector_name="collector_name", namespace="ns", type="otlp"} 1
opentelemetry_collector_processors{collector_name="collector_name", namespace="ns", type="otlp"} 1
opentelemetry_collector_connectors{collector_name="collector_name", namespace="ns", type="myconnector"} 0
opentelemetry_collector_info{collector_name="simplest",namespace="default", type="deployment"} 1
```
46 changes: 41 additions & 5 deletions apis/v1beta1/collector_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ type CollectorWebhook struct {
cfg config.Config
scheme *runtime.Scheme
reviewer *rbac.Reviewer
metrics *Metrics
}

func (c CollectorWebhook) Default(_ context.Context, obj runtime.Object) error {
Expand Down Expand Up @@ -166,23 +167,57 @@ func (c CollectorWebhook) ValidateCreate(ctx context.Context, obj runtime.Object
if !ok {
return nil, fmt.Errorf("expected an OpenTelemetryCollector, received %T", obj)
}
return c.validate(ctx, otelcol)

warnings, err := c.validate(ctx, otelcol)
if err != nil {
return warnings, err
}
if c.metrics != nil {
c.metrics.create(ctx, otelcol)
}

return warnings, nil
}

func (c CollectorWebhook) ValidateUpdate(ctx context.Context, _, newObj runtime.Object) (admission.Warnings, error) {
func (c CollectorWebhook) ValidateUpdate(ctx context.Context, oldObj, newObj runtime.Object) (admission.Warnings, error) {
otelcol, ok := newObj.(*OpenTelemetryCollector)
if !ok {
return nil, fmt.Errorf("expected an OpenTelemetryCollector, received %T", newObj)
}
return c.validate(ctx, otelcol)

otelcolOld, ok := oldObj.(*OpenTelemetryCollector)
if !ok {
return nil, fmt.Errorf("expected an OpenTelemetryCollector, received %T", oldObj)
}

warnings, err := c.validate(ctx, otelcol)
if err != nil {
return warnings, err
}

if c.metrics != nil {
c.metrics.update(ctx, otelcolOld, otelcol)
}

return warnings, nil
}

func (c CollectorWebhook) ValidateDelete(ctx context.Context, obj runtime.Object) (admission.Warnings, error) {
otelcol, ok := obj.(*OpenTelemetryCollector)
if !ok || otelcol == nil {
return nil, fmt.Errorf("expected an OpenTelemetryCollector, received %T", obj)
}
return c.validate(ctx, otelcol)

warnings, err := c.validate(ctx, otelcol)
if err != nil {
return warnings, err
}

if c.metrics != nil {
c.metrics.delete(ctx, otelcol)
}

return warnings, nil
}

func (c CollectorWebhook) validate(ctx context.Context, r *OpenTelemetryCollector) (admission.Warnings, error) {
Expand Down Expand Up @@ -419,12 +454,13 @@ func checkAutoscalerSpec(autoscaler *AutoscalerSpec) error {
return nil
}

func SetupCollectorWebhook(mgr ctrl.Manager, cfg config.Config, reviewer *rbac.Reviewer) error {
func SetupCollectorWebhook(mgr ctrl.Manager, cfg config.Config, reviewer *rbac.Reviewer, metrics *Metrics) error {
cvw := &CollectorWebhook{
reviewer: reviewer,
logger: mgr.GetLogger().WithValues("handler", "CollectorWebhook", "version", "v1beta1"),
scheme: mgr.GetScheme(),
cfg: cfg,
metrics: metrics,
}
return ctrl.NewWebhookManagedBy(mgr).
For(&OpenTelemetryCollector{}).
Expand Down
231 changes: 231 additions & 0 deletions apis/v1beta1/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
// Copyright The OpenTelemetry Authors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package v1beta1

import (
"context"
"fmt"
"strings"

"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/metrics"
)

const (
meterName = "crd-metrics"
)

// Metric labels

const (
prefix = "opentelemetry_collector_"
receiversMetricName = prefix + "receivers"
exportersMetricName = prefix + "exporters"
processorsMetricName = prefix + "processors"
extensionsMetricName = prefix + "extensions"
connectorsMetricName = prefix + "connectors"
modeMetricName = prefix + "info"
)

// TODO: Refactor this logic, centralize it. See: https://github.com/open-telemetry/opentelemetry-operator/issues/2603
type components struct {
receivers []string
processors []string
exporters []string
extensions []string
connectors []string
}

// Metrics hold all gauges for the different metrics related to the CRs
// +kubebuilder:object:generate=false
type Metrics struct {
modeCounter metric.Int64UpDownCounter
receiversCounter metric.Int64UpDownCounter
exporterCounter metric.Int64UpDownCounter
processorCounter metric.Int64UpDownCounter
extensionsCounter metric.Int64UpDownCounter
connectorsCounter metric.Int64UpDownCounter
}

// BootstrapMetrics configures the OpenTelemetry meter provider with the Prometheus exporter.
func BootstrapMetrics() (metric.MeterProvider, error) {
exporter, err := prometheus.New(prometheus.WithRegisterer(metrics.Registry))
if err != nil {
return nil, err
}
return sdkmetric.NewMeterProvider(sdkmetric.WithReader(exporter)), err
}

func NewMetrics(prv metric.MeterProvider, ctx context.Context, cl client.Reader) (*Metrics, error) {
meter := prv.Meter(meterName)
modeCounter, err := meter.Int64UpDownCounter(modeMetricName)
if err != nil {
return nil, err
}
receiversCounter, err := meter.Int64UpDownCounter(receiversMetricName)
if err != nil {
return nil, err
}

exporterCounter, err := meter.Int64UpDownCounter(exportersMetricName)
if err != nil {
return nil, err
}

processorCounter, err := meter.Int64UpDownCounter(processorsMetricName)
if err != nil {
return nil, err
}

extensionsCounter, err := meter.Int64UpDownCounter(extensionsMetricName)
if err != nil {
return nil, err
}

connectorsCounter, err := meter.Int64UpDownCounter(connectorsMetricName)
if err != nil {
return nil, err
}

m := &Metrics{
modeCounter: modeCounter,
receiversCounter: receiversCounter,
exporterCounter: exporterCounter,
processorCounter: processorCounter,
extensionsCounter: extensionsCounter,
connectorsCounter: connectorsCounter,
}

err = m.init(ctx, cl)
if err != nil {
return nil, err
}
return m, nil
}

// Init metrics from the first time the operator starts.
func (m *Metrics) init(ctx context.Context, cl client.Reader) error {
opts := []client.ListOption{
client.MatchingLabels(map[string]string{
"app.kubernetes.io/managed-by": "opentelemetry-operator",
}),
}
list := &OpenTelemetryCollectorList{}
if err := cl.List(ctx, list, opts...); err != nil {
return fmt.Errorf("failed to list: %w", err)
}

for i := range list.Items {
m.create(ctx, &list.Items[i])
}
return nil
}

func (m *Metrics) create(ctx context.Context, collector *OpenTelemetryCollector) {
m.updateComponentCounters(ctx, collector, true)
m.updateGeneralCRMetricsComponents(ctx, collector, true)
}

func (m *Metrics) delete(ctx context.Context, collector *OpenTelemetryCollector) {
m.updateComponentCounters(ctx, collector, false)
m.updateGeneralCRMetricsComponents(ctx, collector, false)
}

func (m *Metrics) update(ctx context.Context, oldCollector *OpenTelemetryCollector, newCollector *OpenTelemetryCollector) {
m.delete(ctx, oldCollector)
m.create(ctx, newCollector)
}

func (m *Metrics) updateGeneralCRMetricsComponents(ctx context.Context, collector *OpenTelemetryCollector, up bool) {

inc := 1
if !up {
inc = -1
}
m.modeCounter.Add(ctx, int64(inc), metric.WithAttributes(
attribute.Key("collector_name").String(collector.Name),
attribute.Key("namespace").String(collector.Namespace),
attribute.Key("type").String(string(collector.Spec.Mode)),
))
}
func (m *Metrics) updateComponentCounters(ctx context.Context, collector *OpenTelemetryCollector, up bool) {
components := getComponentsFromConfig(collector.Spec.Config)
moveCounter(ctx, collector, components.receivers, m.receiversCounter, up)
moveCounter(ctx, collector, components.exporters, m.exporterCounter, up)
moveCounter(ctx, collector, components.processors, m.processorCounter, up)
moveCounter(ctx, collector, components.extensions, m.extensionsCounter, up)
moveCounter(ctx, collector, components.connectors, m.connectorsCounter, up)

}

func extractElements(elements map[string]interface{}) []string {
// TODO: we should get rid of this method and centralize the parse logic
// see https://github.com/open-telemetry/opentelemetry-operator/issues/2603
if elements == nil {
return []string{}
}

itemsMap := map[string]struct{}{}
var items []string
for key := range elements {
itemName := strings.SplitN(key, "/", 2)[0]
itemsMap[itemName] = struct{}{}
}
for key := range itemsMap {
items = append(items, key)
}
return items
}

func getComponentsFromConfig(yamlContent Config) *components {

info := &components{
receivers: extractElements(yamlContent.Receivers.Object),
exporters: extractElements(yamlContent.Exporters.Object),
}

if yamlContent.Processors != nil {
info.processors = extractElements(yamlContent.Processors.Object)
}

if yamlContent.Extensions != nil {
info.extensions = extractElements(yamlContent.Extensions.Object)
}

if yamlContent.Connectors != nil {
info.connectors = extractElements(yamlContent.Connectors.Object)
}

return info
}

func moveCounter(
ctx context.Context, collector *OpenTelemetryCollector, types []string, upDown metric.Int64UpDownCounter, up bool) {
for _, exporter := range types {
inc := 1
if !up {
inc = -1
}
upDown.Add(ctx, int64(inc), metric.WithAttributes(
attribute.Key("collector_name").String(collector.Name),
attribute.Key("namespace").String(collector.Namespace),
attribute.Key("type").String(exporter),
))
}
}
Loading

0 comments on commit dac4774

Please sign in to comment.