Skip to content

Commit

Permalink
Set status code base on GRPC status code for OpenTelemetry tracers (#…
Browse files Browse the repository at this point in the history
…36173)

<!--
!!!ATTENTION!!!

If you are fixing *any* crash or *any* potential security issue, *do
not*
open a pull request in this repo. Please report the issue via emailing
[email protected] where the issue will be triaged
appropriately.
Thank you in advance for helping to keep Envoy secure.

!!!ATTENTION!!!

For an explanation of how to fill out the fields, please see the
relevant section
in
[PULL_REQUESTS.md](https://github.com/envoyproxy/envoy/blob/main/PULL_REQUESTS.md)
-->

Commit Message: Set status code base on GRPC status code for
OpenTelemetry tracers
Additional Description: related to
istio/istio#53025
Risk Level:
Testing:
Docs Changes:
Release Notes:
Platform Specific Features:
[Optional Runtime guard:]
[Optional Fixes #Issue]
[Optional Fixes commit #PR or SHA]
[Optional Deprecated:]
[Optional [API
Considerations](https://github.com/envoyproxy/envoy/blob/main/api/review_checklist.md):]

---------

Signed-off-by: zirain <[email protected]>
  • Loading branch information
zirain authored Sep 26, 2024
1 parent 4074450 commit b69d1f5
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 1 deletion.
3 changes: 3 additions & 0 deletions changelogs/current.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,9 @@ minor_behavior_changes:
change: |
Enhanced listener filter chain execution to include the case that listener filter has maxReadBytes() of 0,
but may return StopIteration in onAccept to wait for asynchronous callback.
- area: tracers
change: |
Set status code based on GRPC status code for OpenTelemetry tracers (previously unset).
- area: http2
change: |
Changes the default value of ``envoy.reloadable_features.http2_use_oghttp2`` to ``false``. This changes the codec used for HTTP/2
Expand Down
37 changes: 36 additions & 1 deletion source/extensions/tracers/opentelemetry/tracer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,43 @@ void Span::setAttribute(absl::string_view name, const OTelAttribute& attribute_v
*span_.add_attributes() = key_value;
}

::opentelemetry::proto::trace::v1::Status_StatusCode
convertGrpcStatusToTraceStatusCode(::opentelemetry::proto::trace::v1::Span_SpanKind kind,
absl::string_view value) {
uint64_t grpc_status_code;
if (!absl::SimpleAtoi(value, &grpc_status_code)) {
// If the value is not a number, we can't map it to a status code.
// In this case, we should leave the status code unset.
return ::opentelemetry::proto::trace::v1::Status::STATUS_CODE_UNSET;
}

Grpc::Status::GrpcStatus grpc_status = static_cast<Grpc::Status::GrpcStatus>(grpc_status_code);
// Check mapping https://opentelemetry.io/docs/specs/semconv/rpc/grpc/#grpc-status
if (kind == ::opentelemetry::proto::trace::v1::Span::SPAN_KIND_CLIENT) {
if (grpc_status == Grpc::Status::WellKnownGrpcStatus::Ok) {
return ::opentelemetry::proto::trace::v1::Status::STATUS_CODE_UNSET;
}
return ::opentelemetry::proto::trace::v1::Status::STATUS_CODE_ERROR;
}

// SPAN_KIND_SERVER
switch (grpc_status) {
case Grpc::Status::WellKnownGrpcStatus::Unknown:
case Grpc::Status::WellKnownGrpcStatus::DeadlineExceeded:
case Grpc::Status::WellKnownGrpcStatus::Unimplemented:
case Grpc::Status::WellKnownGrpcStatus::Internal:
case Grpc::Status::WellKnownGrpcStatus::Unavailable:
case Grpc::Status::WellKnownGrpcStatus::DataLoss:
return ::opentelemetry::proto::trace::v1::Status::STATUS_CODE_ERROR;
default:
return ::opentelemetry::proto::trace::v1::Status::STATUS_CODE_UNSET;
}
}

void Span::setTag(absl::string_view name, absl::string_view value) {
if (name == Tracing::Tags::get().HttpStatusCode) {
if (name == Tracing::Tags::get().GrpcStatusCode) {
span_.mutable_status()->set_code(convertGrpcStatusToTraceStatusCode(span_.kind(), value));
} else if (name == Tracing::Tags::get().HttpStatusCode) {
uint64_t status_code;
// For HTTP status codes in the 5xx range, as well as any other code the client failed to
// interpret, span status MUST be set to Error.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -686,6 +686,89 @@ TEST_F(OpenTelemetryDriverTest, ExportOTLPSpanWithAttributesAndStatus) {
EXPECT_EQ(1U, stats_.counter("tracing.opentelemetry.spans_sent").value());
}

// Verifies Grpc spans are exported with their attributes and status
TEST_F(OpenTelemetryDriverTest, ExportOTLPGRPCSpanWithAttributesAndStatus) {
setupValidDriver();
Tracing::TestTraceContextImpl request_headers{
{":authority", "test.com"}, {":path", "/"}, {":method", "GET"}};
NiceMock<Random::MockRandomGenerator>& mock_random_generator_ =
context_.server_factory_context_.api_.random_;
int64_t generated_int = 1;
EXPECT_CALL(mock_random_generator_, random()).Times(3).WillRepeatedly(Return(generated_int));
SystemTime timestamp = time_system_.systemTime();
ON_CALL(stream_info_, startTime()).WillByDefault(Return(timestamp));

Tracing::SpanPtr span = driver_->startSpan(mock_tracing_config_, request_headers, stream_info_,
operation_name_, {Tracing::Reason::Sampling, true});
EXPECT_NE(span.get(), nullptr);

span->setTag("first_tag_name", "first_tag_value");
span->setTag("second_tag_name", "second_tag_value");
// Try an empty tag.
span->setTag("", "empty_tag_value");
// Overwrite a tag.
span->setTag("first_tag_name", "first_tag_new_value");
span->setTag("http.status_code", "200");
span->setTag("grpc.status_code", "13");
span->setTag("grpc.message", "connect Canceled randomly");

// Note the placeholders for the bytes - cleaner to manually set after.
constexpr absl::string_view request_yaml = R"(
resource_spans:
resource:
attributes:
key: "service.name"
value:
string_value: "unknown_service:envoy"
key: "key1"
value:
string_value: "val1"
scope_spans:
spans:
trace_id: "AAA"
span_id: "AAA"
name: "test"
kind: SPAN_KIND_SERVER
start_time_unix_nano: {}
end_time_unix_nano: {}
status:
code: STATUS_CODE_ERROR
attributes:
- key: "first_tag_name"
value:
string_value: "first_tag_new_value"
- key: "second_tag_name"
value:
string_value: "second_tag_value"
- key: "http.status_code"
value:
string_value: "200"
- key: "grpc.status_code"
value:
string_value: "13"
- key: "grpc.message"
value:
string_value: "connect Canceled randomly"
)";
opentelemetry::proto::collector::trace::v1::ExportTraceServiceRequest request_proto;
int64_t timestamp_ns = std::chrono::nanoseconds(timestamp.time_since_epoch()).count();
TestUtility::loadFromYaml(fmt::format(request_yaml, timestamp_ns, timestamp_ns), request_proto);
std::string generated_int_hex = Hex::uint64ToHex(generated_int);
auto* expected_span =
request_proto.mutable_resource_spans(0)->mutable_scope_spans(0)->mutable_spans(0);
expected_span->set_trace_id(
absl::HexStringToBytes(absl::StrCat(generated_int_hex, generated_int_hex)));
expected_span->set_span_id(absl::HexStringToBytes(absl::StrCat(generated_int_hex)));

EXPECT_CALL(runtime_.snapshot_, getInteger("tracing.opentelemetry.min_flush_spans", 5U))
.Times(1)
.WillRepeatedly(Return(1));
EXPECT_CALL(*mock_stream_ptr_,
sendMessageRaw_(Grpc::ProtoBufferEqIgnoreRepeatedFieldOrdering(request_proto), _));
span->finishSpan();
EXPECT_EQ(1U, stats_.counter("tracing.opentelemetry.spans_sent").value());
}

// Not sampled spans are ignored
TEST_F(OpenTelemetryDriverTest, IgnoreNotSampledSpan) {
setupValidDriver();
Expand Down

0 comments on commit b69d1f5

Please sign in to comment.