-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[jaeger-v2] Implement UTF-8 sanitizer for OTLP #6078
base: main
Are you sure you want to change the base?
Changes from all commits
29e53b8
9f936e3
0fc9df1
b799ffe
c4797d5
d363e9d
349e8df
9b1e665
8212eb2
aad0bc5
13f6f82
bf3f7ad
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"unicode/utf8" | ||
|
||
"go.opentelemetry.io/collector/pdata/pcommon" | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
const ( | ||
invalidSpanName = "invalid-span-name" | ||
invalidTagKey = "invalid-tag-key" | ||
) | ||
|
||
// NewUTF8Sanitizer returns a sanitizer function that performs the following sanitizations | ||
// on the trace data: | ||
// - Checks all attributes of the span to ensure that the keys are valid UTF-8 strings | ||
// and all string typed values are valid UTF-8 strings. If any keys or values are invalid, | ||
// they are replaced with a valid UTF-8 string containing debugging data related to the invalidations. | ||
// - Explicitly checks that all span names are valid UTF-8 strings. If they are not, they are replaced | ||
// with a valid UTF-8 string and debugging information is put into the attributes of the span. | ||
// - Explicitly checks that the service name is a valid UTF-8 string. If it is not, it is replaced with | ||
// a valid UTF-8 string that contains debugging information related to the invalidation. | ||
func NewUTF8Sanitizer() Func { | ||
return sanitizeUTF8 | ||
} | ||
|
||
func sanitizeUTF8(traces ptrace.Traces) ptrace.Traces { | ||
resourceSpans := traces.ResourceSpans() | ||
for i := 0; i < resourceSpans.Len(); i++ { | ||
resourceSpan := resourceSpans.At(i) | ||
sanitizeAttributes(resourceSpan.Resource().Attributes()) | ||
|
||
scopeSpans := resourceSpan.ScopeSpans() | ||
for j := 0; j < scopeSpans.Len(); j++ { | ||
scopeSpan := scopeSpans.At(j) | ||
sanitizeAttributes(scopeSpan.Scope().Attributes()) | ||
|
||
spans := scopeSpan.Spans() | ||
|
||
for k := 0; k < spans.Len(); k++ { | ||
span := spans.At(k) | ||
|
||
if !utf8.ValidString(span.Name()) { | ||
sanitized := []byte(span.Name()) | ||
newVal := span.Attributes().PutEmptyBytes(invalidSpanName) | ||
newVal.Append(sanitized...) | ||
span.SetName(invalidSpanName) | ||
} | ||
|
||
sanitizeAttributes(span.Attributes()) | ||
} | ||
} | ||
} | ||
|
||
return traces | ||
} | ||
|
||
func sanitizeAttributes(attributes pcommon.Map) { | ||
// collect invalid keys during iteration to avoid changing the keys of the map | ||
// while iterating over the map using Range | ||
invalidKeys := make(map[string]pcommon.Value) | ||
|
||
attributes.Range(func(k string, v pcommon.Value) bool { | ||
if !utf8.ValidString(k) { | ||
invalidKeys[k] = v | ||
} | ||
|
||
if v.Type() == pcommon.ValueTypeStr && !utf8.ValidString(v.Str()) { | ||
sanitized := []byte(v.Str()) | ||
newVal := attributes.PutEmptyBytes(k) | ||
newVal.Append(sanitized...) | ||
} | ||
return true | ||
}) | ||
|
||
for k, v := range invalidKeys { | ||
sanitized := []byte(k + ":") | ||
switch v.Type() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. From the godoc of // Type returns the type of the value for this Value.
// Calling this function on zero-initialized Value will cause a panic. Can we run into a situation where we call this on a zero-initialized value in the regular exporter flow? |
||
case pcommon.ValueTypeBytes: | ||
sanitized = append(sanitized, v.Bytes().AsRaw()...) | ||
default: | ||
sanitized = append(sanitized, []byte(v.AsString())...) | ||
} | ||
|
||
attributes.Remove(k) | ||
newVal := attributes.PutEmptyBytes(invalidTagKey) | ||
newVal.Append(sanitized...) | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,250 @@ | ||
// Copyright (c) 2024 The Jaeger Authors. | ||
// SPDX-License-Identifier: Apache-2.0 | ||
|
||
package sanitizer | ||
|
||
import ( | ||
"encoding/hex" | ||
"fmt" | ||
"testing" | ||
|
||
"github.com/stretchr/testify/require" | ||
"go.opentelemetry.io/collector/pdata/pcommon" | ||
"go.opentelemetry.io/collector/pdata/ptrace" | ||
) | ||
|
||
func invalidUTF8() string { | ||
s, _ := hex.DecodeString("fefeffff") | ||
return string(s) | ||
} | ||
|
||
func getBytesValueFromString(s string) pcommon.Value { | ||
b := pcommon.NewValueBytes() | ||
b.Bytes().Append([]byte(s)...) | ||
return b | ||
} | ||
|
||
var utf8EncodingTests = []struct { | ||
name string | ||
key string | ||
value string | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It seems if your tests only allow string values you cannot test other attribute types. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @yurishkuro I added some separate tests for non-string attributes |
||
expectedKey string | ||
expectedValue pcommon.Value | ||
}{ | ||
{ | ||
name: "valid key + valid value", | ||
key: "key", | ||
value: "value", | ||
expectedKey: "key", | ||
expectedValue: pcommon.NewValueStr("value"), | ||
}, | ||
{ | ||
name: "invalid key + valid value", | ||
key: invalidUTF8(), | ||
value: "value", | ||
expectedKey: "invalid-tag-key", | ||
expectedValue: getBytesValueFromString(fmt.Sprintf("%s:value", invalidUTF8())), | ||
}, | ||
{ | ||
name: "valid key + invalid value", | ||
key: "key", | ||
value: invalidUTF8(), | ||
expectedKey: "key", | ||
expectedValue: getBytesValueFromString(invalidUTF8()), | ||
}, | ||
{ | ||
name: "invalid key + invalid value", | ||
key: invalidUTF8(), | ||
value: invalidUTF8(), | ||
expectedKey: "invalid-tag-key", | ||
expectedValue: getBytesValueFromString(fmt.Sprintf("%s:%s", invalidUTF8(), invalidUTF8())), | ||
}, | ||
} | ||
|
||
func TestUTF8Sanitizer_SanitizesResourceSpanAttributes(t *testing.T) { | ||
tests := utf8EncodingTests | ||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
Resource(). | ||
Attributes(). | ||
PutStr(test.key, test.value) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
value, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
Resource(). | ||
Attributes(). | ||
Get(test.expectedKey) | ||
require.True(t, ok) | ||
require.Equal(t, test.expectedValue, value) | ||
}) | ||
} | ||
} | ||
|
||
func TestUTF8Sanitizer_SanitizesScopeSpanAttributes(t *testing.T) { | ||
tests := utf8EncodingTests | ||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
ScopeSpans(). | ||
AppendEmpty(). | ||
Scope(). | ||
Attributes(). | ||
PutStr(test.key, test.value) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
value, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
ScopeSpans(). | ||
At(0). | ||
Scope(). | ||
Attributes(). | ||
Get(test.expectedKey) | ||
require.True(t, ok) | ||
require.Equal(t, test.expectedValue, value) | ||
}) | ||
} | ||
} | ||
|
||
func TestUTF8Sanitizer_SanitizesSpanAttributes(t *testing.T) { | ||
tests := utf8EncodingTests | ||
for _, test := range tests { | ||
t.Run(test.name, func(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
ScopeSpans(). | ||
AppendEmpty(). | ||
Spans(). | ||
AppendEmpty(). | ||
Attributes(). | ||
PutStr(test.key, test.value) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
value, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
ScopeSpans(). | ||
At(0). | ||
Spans(). | ||
At(0). | ||
Attributes(). | ||
Get(test.expectedKey) | ||
require.True(t, ok) | ||
require.Equal(t, test.expectedValue, value) | ||
}) | ||
} | ||
} | ||
|
||
func TestUTF8Sanitizer_SanitizesInvalidSpanName(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
ScopeSpans(). | ||
AppendEmpty(). | ||
Spans(). | ||
AppendEmpty(). | ||
SetName(invalidUTF8()) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
name := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
ScopeSpans(). | ||
At(0). | ||
Spans(). | ||
At(0). | ||
Name() | ||
require.Equal(t, "invalid-span-name", name) | ||
} | ||
|
||
func TestUTF8Sanitizer_DoesNotSanitizeInvalidSpanName(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
ScopeSpans(). | ||
AppendEmpty(). | ||
Spans(). | ||
AppendEmpty(). | ||
SetName("name") | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
name := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
ScopeSpans(). | ||
At(0). | ||
Spans(). | ||
At(0). | ||
Name() | ||
require.Equal(t, "name", name) | ||
} | ||
|
||
func TestUTF8Sanitizer_RemovesInvalidKeys(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
Resource(). | ||
Attributes(). | ||
PutStr(invalidUTF8(), "value") | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
_, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
Resource(). | ||
Attributes(). | ||
Get(invalidUTF8()) | ||
require.False(t, ok) | ||
} | ||
|
||
func TestUTF8Sanitizer_DoesNotSanitizeNonStringAttributeValue(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
Resource(). | ||
Attributes().PutInt("key", 1) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
value, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
Resource(). | ||
Attributes(). | ||
Get("key") | ||
require.True(t, ok) | ||
require.EqualValues(t, 1, value.Int()) | ||
} | ||
|
||
func TestUTF8Sanitizer_SanitizesNonStringAttributeValueWithInvalidKey(t *testing.T) { | ||
traces := ptrace.NewTraces() | ||
traces. | ||
ResourceSpans(). | ||
AppendEmpty(). | ||
Resource(). | ||
Attributes().PutInt(invalidUTF8(), 1) | ||
sanitizer := NewUTF8Sanitizer() | ||
sanitized := sanitizer(traces) | ||
value, ok := sanitized. | ||
ResourceSpans(). | ||
At(0). | ||
Resource(). | ||
Attributes(). | ||
Get("invalid-tag-key") | ||
require.True(t, ok) | ||
require.EqualValues(t, getBytesValueFromString(fmt.Sprintf("%s:1", invalidUTF8())), value) | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This contradicts the comment above about modifying the attributes while iterating over them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@yurishkuro the comment was for modifying the keys of the map. is there anything wrong with modifying the value associated with a key?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I don't know, the command is put(), it may depend on the implementation.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks to just be changing the value if one already exists. We could do this in two passes if you prefer however.