Skip to content

Commit

Permalink
[service/proctelemetry] Offer to override the HOST_PROC environment v…
Browse files Browse the repository at this point in the history
…ariable with a programmatic value (#7998)

Reprising
#7434

**Description:** 

While debugging the below error in k8s env
````
Error: failed to register process metrics: process does not exist
2023/03/23 03:44:47 main.go:115: application run finished with error: failed to register process metrics: process does not exist
````
I have noticed that the metric server is calling GOPSUTIL while the
HOST_PROC variable is set , this causes gopsutil `PidExistsWithContext `
to retrieve the process from the host instead from the container

````
func PidExistsWithContext(ctx context.Context, pid int32) (bool, error) {
	if pid <= 0 {
		return false, fmt.Errorf("invalid pid %v", pid)
	}
	proc, err := os.FindProcess(int(pid))
	if err != nil {
		return false, err
	}

	if isMount(common.HostProc()) { // if /<HOST_PROC>/proc exists and is mounted, check if /<HOST_PROC>/proc/<PID> folder exists
		_, err := os.Stat(common.HostProc(strconv.Itoa(int(pid))))
		if os.IsNotExist(err) {
			return false, nil
		}
		return err == nil, err
	}
````
This PR unsets and resets the host_proc variable and introduces an
option to allow the use of host_proc if for whatever reason they need to

**Link to tracking Issue:**
Fixes #7435

**Testing:**
unit tests

---------

Signed-off-by: Dani Louca <[email protected]>
Co-authored-by: Dani Louca <[email protected]>
Co-authored-by: Alex Boten <[email protected]>
  • Loading branch information
3 people authored Aug 15, 2023
1 parent 5b47503 commit 55902b6
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 4 deletions.
40 changes: 36 additions & 4 deletions service/internal/proctelemetry/process_telemetry.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (
"sync"
"time"

"github.com/shirou/gopsutil/v3/common"
"github.com/shirou/gopsutil/v3/process"
"go.opencensus.io/metric"
"go.opencensus.io/stats"
Expand All @@ -27,6 +28,7 @@ type processMetrics struct {
startTimeUnixNano int64
ballastSizeBytes uint64
proc *process.Process
context context.Context

processUptime *metric.Float64DerivedCumulative
allocMem *metric.Int64DerivedGauge
Expand All @@ -49,17 +51,47 @@ type processMetrics struct {
ms *runtime.MemStats
}

type RegisterOption interface {
apply(*registerOption)
}

type registerOption struct {
hostProc string
}

type registerOptionFunc func(*registerOption)

func (fn registerOptionFunc) apply(set *registerOption) {
fn(set)
}

// WithHostProc overrides the /proc folder on Linux used by process telemetry.
func WithHostProc(hostProc string) RegisterOption {
return registerOptionFunc(func(uo *registerOption) {
uo.hostProc = hostProc
})
}

// RegisterProcessMetrics creates a new set of processMetrics (mem, cpu) that can be used to measure
// basic information about this process.
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64) error {
func RegisterProcessMetrics(ocRegistry *metric.Registry, mp otelmetric.MeterProvider, useOtel bool, ballastSizeBytes uint64, opts ...RegisterOption) error {
set := registerOption{}
for _, opt := range opts {
opt.apply(&set)
}
var err error
pm := &processMetrics{
startTimeUnixNano: time.Now().UnixNano(),
ballastSizeBytes: ballastSizeBytes,
ms: &runtime.MemStats{},
}

pm.proc, err = process.NewProcess(int32(os.Getpid()))
ctx := context.Background()
if set.hostProc != "" {
ctx = context.WithValue(ctx, common.EnvKey, common.EnvMap{common.HostProcEnvKey: set.hostProc})
}
pm.context = ctx
pm.proc, err = process.NewProcessWithContext(pm.context, int32(os.Getpid()))
if err != nil {
return err
}
Expand Down Expand Up @@ -231,7 +263,7 @@ func (pm *processMetrics) updateSysMem() int64 {
}

func (pm *processMetrics) updateCPUSeconds() float64 {
times, err := pm.proc.Times()
times, err := pm.proc.TimesWithContext(pm.context)
if err != nil {
return 0
}
Expand All @@ -241,7 +273,7 @@ func (pm *processMetrics) updateCPUSeconds() float64 {
}

func (pm *processMetrics) updateRSSMemory() int64 {
mem, err := pm.proc.MemoryInfo()
mem, err := pm.proc.MemoryInfoWithContext(pm.context)
if err != nil {
return 0
}
Expand Down
53 changes: 53 additions & 0 deletions service/internal/proctelemetry/process_telemetry_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
// Copyright The OpenTelemetry Authors
// SPDX-License-Identifier: Apache-2.0

//go:build linux
// +build linux

package proctelemetry

import (
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opencensus.io/metric"
"go.opentelemetry.io/otel/metric/noop"
)

func TestOCProcessTelemetryWithHostProc(t *testing.T) {
ocRegistry := metric.NewRegistry()
// Make the sure the environment variable value is not used.
t.Setenv("HOST_PROC", "foo/bar")

require.NoError(t, RegisterProcessMetrics(ocRegistry, noop.NewMeterProvider(), false, 0, WithHostProc("/proc")))

// Check that the metrics are actually filled.
time.Sleep(200 * time.Millisecond)

metrics := ocRegistry.Read()

for _, metricName := range expectedMetrics {
m := findMetric(metrics, metricName)
require.NotNil(t, m)
require.Len(t, m.TimeSeries, 1)
ts := m.TimeSeries[0]
assert.Len(t, ts.LabelValues, 0)
require.Len(t, ts.Points, 1)

var value float64
if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
value = ts.Points[0].Value.(float64)
} else {
value = float64(ts.Points[0].Value.(int64))
}

if metricName == "process/uptime" || metricName == "process/cpu_seconds" {
// This likely will still be zero when running the test.
assert.GreaterOrEqual(t, value, float64(0), metricName)
continue
}
assert.Greater(t, value, float64(0), metricName)
}
}

0 comments on commit 55902b6

Please sign in to comment.