Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

PLATFORM-7278 | Fix Jaeger trace propagation #6

Merged
merged 4 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 24 additions & 13 deletions otelx/jaeger.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,15 @@ import (
"go.opentelemetry.io/otel/trace"
)

// Optionally, Config.Providers.Jaeger.LocalAgentAddress can be set.
// NOTE: If Config.Providers.Jaeger.Sampling.ServerURL is not specfied,
// AlwaysSample is used.
// SetupJaeger configures and returns a Jaeger tracer.
//
// The returned tracer will by default attempt to send spans to a local Jaeger agent.
// Optionally, [otelx.JaegerConfig.LocalAgentAddress] can be set to specify a different target.
//
// By default, unless a parent sampler has taken a sampling decision, every span is sampled.
// [otelx.JaegerSampling.TraceIdRatio] may be used to customize the sampling probability,
// optionally alongside [otelx.JaegerSampling.ServerURL] to consult a remote server
// for the sampling strategy to be used.
func SetupJaeger(t *Tracer, tracerName string, c *Config) (trace.Tracer, error) {
host, port, err := net.SplitHostPort(c.Providers.Jaeger.LocalAgentAddress)
if err != nil {
Expand All @@ -45,30 +51,35 @@ func SetupJaeger(t *Tracer, tracerName string, c *Config) (trace.Tracer, error)
}

samplingServerURL := c.Providers.Jaeger.Sampling.ServerURL
traceIdRatio := c.Providers.Jaeger.Sampling.TraceIdRatio

sampler := sdktrace.TraceIDRatioBased(traceIdRatio)

if samplingServerURL != "" {
jaegerRemoteSampler := jaegerremote.New(
sampler = jaegerremote.New(
"jaegerremote",
jaegerremote.WithSamplingServerURL(samplingServerURL),
jaegerremote.WithInitialSampler(sdktrace.TraceIDRatioBased(c.Providers.Jaeger.Sampling.TraceIdRatio)),
jaegerremote.WithInitialSampler(sampler),
)
tpOpts = append(tpOpts, sdktrace.WithSampler(jaegerRemoteSampler))
} else {
tpOpts = append(tpOpts, sdktrace.WithSampler(sdktrace.AlwaysSample()))
}

// Respect any sampling decision taken by the client.
sampler = sdktrace.ParentBased(sampler)
tpOpts = append(tpOpts, sdktrace.WithSampler(sampler))

tp := sdktrace.NewTracerProvider(tpOpts...)
otel.SetTracerProvider(tp)

// At the moment, software across our cloud stack only support Zipkin (B3)
// and Jaeger propagation formats. Proposals for standardized formats for
// context propagation are in the works (ref: https://www.w3.org/TR/trace-context/
// and Jaeger propagation formats. For interoperability with other setups,
// we also configure propagation using standardized formats for
// context propagation (ref: https://www.w3.org/TR/trace-context/
// and https://www.w3.org/TR/baggage/).
//
// Simply add propagation.TraceContext{} and propagation.Baggage{}
// here to enable those as well.
prop := propagation.NewCompositeTextMapPropagator(
jaegerPropagator.Jaeger{},
b3.New(b3.WithInjectEncoding(b3.B3MultipleHeader|b3.B3SingleHeader)),
propagation.TraceContext{},
propagation.Baggage{},
)
otel.SetTextMapPropagator(prop)
return tp.Tracer(tracerName), nil
Expand Down
70 changes: 65 additions & 5 deletions otelx/otel_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@ import (
"net/http"
"net/http/httptest"
"net/url"
"strings"
"testing"
"time"

"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/propagation"
"go.opentelemetry.io/otel/trace"
"golang.org/x/sync/errgroup"
"google.golang.org/protobuf/proto"

Expand Down Expand Up @@ -65,8 +69,8 @@ type zipkinSpanRequest struct {
Tags map[string]string
}

func TestJaegerTracer(t *testing.T) {
done := make(chan struct{})
// runTestJaegerAgent starts a mock server listening on a random port for Jaeger spans sent over UDP.
func runTestJaegerAgent(t *testing.T, errs *errgroup.Group, done chan<- struct{}) net.Conn {
addr := "127.0.0.1:0"

udpAddr, err := net.ResolveUDPAddr("udp", addr)
Expand All @@ -75,8 +79,6 @@ func TestJaegerTracer(t *testing.T) {
srv, err := net.ListenUDP("udp", udpAddr)
require.NoError(t, err)

errs := errgroup.Group{}

errs.Go(func() error {
t.Logf("Starting test UDP server for Jaeger spans on %s", srv.LocalAddr().String())

Expand All @@ -91,20 +93,32 @@ func TestJaegerTracer(t *testing.T) {
continue
}
if len(buf) != 0 {
t.Log("recieved span!")
t.Log("received span!")
done <- struct{}{}
}
break
}
return nil
})

return srv
}

func TestJaegerTracer(t *testing.T) {
done := make(chan struct{})
errs := errgroup.Group{}

srv := runTestJaegerAgent(t, &errs, done)

jt, err := New(testTracingComponent, logrusx.New("ory/x", "1"), &Config{
ServiceName: "Ory X",
Provider: "jaeger",
Providers: ProvidersConfig{
Jaeger: JaegerConfig{
LocalAgentAddress: srv.LocalAddr().String(),
Sampling: JaegerSampling{
TraceIdRatio: 1,
},
},
},
})
Expand All @@ -123,6 +137,52 @@ func TestJaegerTracer(t *testing.T) {
require.NoError(t, errs.Wait())
}

func TestJaegerTracerRespectsParentSamplingDecision(t *testing.T) {
done := make(chan struct{})
errs := errgroup.Group{}

srv := runTestJaegerAgent(t, &errs, done)

jt, err := New(testTracingComponent, logrusx.New("ory/x", "1"), &Config{
ServiceName: "Ory X",
Provider: "jaeger",
Providers: ProvidersConfig{
Jaeger: JaegerConfig{
LocalAgentAddress: srv.LocalAddr().String(),
Sampling: JaegerSampling{
// Effectively disable local sampling.
TraceIdRatio: 0,
},
},
},
})
require.NoError(t, err)

traceId := strings.Repeat("a", 32)
spanId := strings.Repeat("b", 16)
sampledFlag := "1"
traceHeaders := map[string]string{"uber-trace-id": traceId + ":" + spanId + ":0:" + sampledFlag}

ctx := otel.GetTextMapPropagator().Extract(context.Background(), propagation.MapCarrier(traceHeaders))
spanContext := trace.SpanContextFromContext(ctx)

assert.True(t, spanContext.IsValid())
assert.True(t, spanContext.IsSampled())
assert.True(t, spanContext.IsRemote())

trc := jt.Tracer()
_, span := trc.Start(ctx, "testSpan", trace.WithLinks(trace.Link{SpanContext: spanContext}))
span.SetAttributes(attribute.Bool("testAttribute", true))
span.End()

select {
case <-done:
case <-time.After(15 * time.Second):
t.Fatalf("Test server did not receive spans")
}
require.NoError(t, errs.Wait())
}

func TestZipkinTracer(t *testing.T) {
done := make(chan struct{})
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
Expand Down
Loading