Skip to content

Commit

Permalink
Rewrite dd shim classes (#66)
Browse files Browse the repository at this point in the history
* Rewrite dd shim classes

* Fix logging filters/processors

* Up version

* Allow to reuse otel span processors

* Cleanup

* Fix linting

* Fix linting

* Fix uvicorn logging

* Some performance improvements

* Bump version
  • Loading branch information
Guðmundur Björn Birkisson authored Feb 25, 2023
1 parent 18c6f0a commit f43d20f
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 261 deletions.
20 changes: 0 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,6 @@
* [Receive context](#receive-context)
* [Trace sampling](#trace-sampling)
* [Trace debugging](#trace-debugging)
* [Context detach exceptions](#context-detach-exceptions)
* [Profiling](#profiling)
* [Setup endpoint](#setup-endpoint)
* [Enable scraping](#enable-scraping)
Expand Down Expand Up @@ -465,25 +464,6 @@ DD_TRACE_SAMPLE_RATE=0.05

You can enable trace debugging by setting the environmental variable `OTEL_TRACE_DEBUG=true`. That will print all spans to the console. If you would rather get the spans in a file you can also provide the variable `OTEL_TRACE_DEBUG_FILE=/tmp/traces`.

### Context detach exceptions

We have observed the following exception in some services:

```console
Traceback (most recent call last):
File "opentelemetry/context/__init__.py", line 157, in detach
_RUNTIME_CONTEXT.detach(token) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "opentelemetry/context/contextvars_context.py", line 50, in detach
self._current_context.reset(token) # type: ignore
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
ValueError: <Token> was created in a different Context
```

This is caused by a [bug in OTEL](https://github.com/open-telemetry/opentelemetry-python/issues/2606). In these cases the first (or root) span of an incoming starlette request fails to detach its context when it finishes. This does not seem to affect tracing, nor to cause any memory leaks. It just floods the logs with exceptions.

The solution is to set the environmental variable `TRONCOS_OMIT_ROOT_CONTEXT_DETACH=true`. You can also set up the logging filter `ContextDetachExceptionDropFilter` to suppress those exceptions in the logs.

## Profiling

> **Warning**: Profiling while using Python 3.11 is [not yet fully supported](https://github.com/DataDog/dd-trace-py/issues/4149).
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "troncos"
version = "3.0.5"
version = "3.1.0"
description = "Collection of Python logging, tracing and profiling tools"
authors = [
"Guðmundur Björn Birkisson <[email protected]>",
Expand Down
7 changes: 4 additions & 3 deletions troncos/frameworks/starlette/uvicorn.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import logging

from starlette.middleware import Middleware

from troncos.frameworks.asgi.middleware import AsgiLoggingMiddleware
from troncos.logs.filters import HttpPathFilter

Expand Down Expand Up @@ -100,7 +102,6 @@ def init_uvicorn_logging(
app_logger_error.setLevel(logging.ERROR)

# Add our middleware to starlette
app.add_middleware(
AsgiLoggingMiddleware,
logger_name=logger_name,
app.user_middleware.append(
Middleware(AsgiLoggingMiddleware, logger_name=logger_name)
)
17 changes: 8 additions & 9 deletions troncos/frameworks/structlog/processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
the filters to allow for parallel feature parity while we finish the current troncos
adoption.
"""
import opentelemetry.trace as trace

from troncos._ddlazy import ddlazy

try:
Expand Down Expand Up @@ -53,14 +51,15 @@ def trace_injection_processor(
Simple logging processor that adds a trace_id to the log record if available.
"""

span = trace.get_current_span()
if not isinstance(span, trace.NonRecordingSpan):
event_dict["trace_id"] = f"{span.get_span_context().trace_id:x}"
event_dict["span_id"] = f"{span.get_span_context().span_id:x}"
if not ddlazy.dd_initialized():
return event_dict

dd_context = ddlazy.dd_tracer().current_trace_context()
if dd_context:
event_dict["trace_id"] = f"{dd_context.trace_id:x}".zfill(32)
event_dict["span_id"] = f"{dd_context.span_id:x}"

if ddlazy.dd_trace_export_enabled():
dd_context = ddlazy.dd_tracer().current_trace_context()
if dd_context:
if ddlazy.dd_trace_export_enabled():
event_dict["dd_trace_id"] = dd_context.trace_id
event_dict["dd_span_id"] = dd_context.span_id

Expand Down
20 changes: 10 additions & 10 deletions troncos/logs/filters.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
import logging

import opentelemetry.trace as trace

from troncos._ddlazy import ddlazy

logger = logging.getLogger(__name__)
Expand All @@ -21,14 +19,15 @@ def __init__(self, name: str = "TraceIdFilter") -> None:
super().__init__(name)

def filter(self, record: logging.LogRecord) -> bool:
otel_span = trace.get_current_span()
if not isinstance(otel_span, trace.NonRecordingSpan):
record.trace_id = f"{otel_span.get_span_context().trace_id:x}"
record.span_id = f"{otel_span.get_span_context().span_id:x}"

if ddlazy.dd_trace_export_enabled():
dd_context = ddlazy.dd_tracer().current_trace_context()
if dd_context:
if not ddlazy.dd_initialized():
return True

dd_context = ddlazy.dd_tracer().current_trace_context()
if dd_context:
record.trace_id = f"{dd_context.trace_id:x}".zfill(32)
record.span_id = f"{dd_context.span_id:x}"

if ddlazy.dd_trace_export_enabled():
record.dd_trace_id = dd_context.trace_id
record.dd_span_id = dd_context.span_id

Expand Down Expand Up @@ -81,6 +80,7 @@ class ContextDetachExceptionDropFilter(logging.Filter):
def __init__(self, name: str = "ContextDetachExceptionDropFilter") -> None:
super().__init__(name)
self._count = 0
logger.warning("ContextDetachExceptionDropFilter is no longer needed!")

def filter(self, record: logging.LogRecord) -> bool:
if (
Expand Down
45 changes: 24 additions & 21 deletions troncos/traces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,16 @@
import sys
from typing import Any

import opentelemetry
from opentelemetry.sdk.trace import SpanProcessor
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
SimpleSpanProcessor,
)
from opentelemetry.sdk.trace.export.in_memory_span_exporter import InMemorySpanExporter

from troncos._ddlazy import ddlazy
from troncos.traces.dd_shim import DDSpanProcessor, OtelTracerProvider
from troncos.traces.dd_shim import DDSpanProcessor

logger = logging.getLogger(__name__)
TRACE_HEADERS = [
Expand Down Expand Up @@ -59,12 +59,12 @@ def _create_span_processor(
service_attributes: dict[str, str] | None = None,
endpoint: str | None = None,
endpoint_dd: str | None = None,
otel_omit_root_context_detach: bool | None = None,
) -> DDSpanProcessor:
"""
Creates a DD span processor that converts DD spans into OTEL spans
"""

flush_on_shutdown = True
otel_span_processors: list[SpanProcessor] = []
if endpoint:
# Create an exporter
Expand All @@ -87,14 +87,20 @@ def _create_span_processor(
otel_span_processors.append(
BatchSpanProcessor(trace_exporter.OTLPSpanExporter(endpoint=endpoint))
)
else:
if _bool_from_string(os.environ.get("TRONCOS_REUSE_OTEL_PROCESSOR", "false")):
tp = opentelemetry.trace.get_tracer_provider()
if hasattr(tp, "_active_span_processor"):
logger.info("Reusing OTEL span processor")
otel_span_processors.append(tp._active_span_processor)
flush_on_shutdown = False

# Fallback to InMemorySpanExporter
if len(otel_span_processors) == 0:
logger.warning("No OTEL span processor configured")
otel_span_processors.append(SimpleSpanProcessor(InMemorySpanExporter())) # type: ignore[no-untyped-call] # noqa: E501

# Setup OTEL debug processor
otel_trace_debug = os.environ.get("OTEL_TRACE_DEBUG")
otel_trace_debug = _bool_from_string(os.environ.get("OTEL_TRACE_DEBUG", "false"))
otel_trace_debug_file = os.environ.get("OTEL_TRACE_DEBUG_FILE")
if otel_trace_debug:
logger.info(f"OTEL debug processor to {otel_trace_debug_file or 'stdout'}")
Expand All @@ -107,21 +113,20 @@ def _create_span_processor(
SimpleSpanProcessor(ConsoleSpanExporter(out=debug_out))
)

# Set up a dynamic tracer provider
otel_trace_provider = OtelTracerProvider(
span_processors=otel_span_processors,
service=service_name,
attributes=service_attributes,
env=service_env,
version=service_version,
)
service_attributes = service_attributes or {}

if service_env:
service_attributes["environment"] = service_env

if service_version:
service_attributes["version"] = service_version

# Return the span processor
return DDSpanProcessor(
otel_tracer_provider=otel_trace_provider,
tracer_attributes=service_attributes,
service_name,
service_attributes,
otel_span_processors,
dd_traces_exported=endpoint_dd is not None,
omit_root_context_detach=otel_omit_root_context_detach or False,
flush_on_shutdown=flush_on_shutdown,
)


Expand Down Expand Up @@ -164,9 +169,8 @@ def init_tracing_basic(
logger.warning("Function 'init_tracing_basic' called multiple times!")
return

otel_omit_root_context_detach = _bool_from_string(
os.environ.get("TRONCOS_OMIT_ROOT_CONTEXT_DETACH", "false")
)
if _bool_from_string(os.environ.get("TRONCOS_OMIT_ROOT_CONTEXT_DETACH", "false")):
logger.warning("TRONCOS_OMIT_ROOT_CONTEXT_DETACH is no longer needed!")

# Create custom span processor
custom_dd_span_processor = _create_span_processor(
Expand All @@ -176,7 +180,6 @@ def init_tracing_basic(
service_attributes=service_attributes,
endpoint=endpoint,
endpoint_dd=endpoint_dd,
otel_omit_root_context_detach=otel_omit_root_context_detach,
)

# Set service info
Expand Down
Loading

0 comments on commit f43d20f

Please sign in to comment.