Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(er): capture manual exceptions #10430

Merged
merged 10 commits into from
Aug 30, 2024
4 changes: 2 additions & 2 deletions ddtrace/bootstrap/preload.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def register_post_preload(func: t.Callable) -> None:
DynamicInstrumentation.enable()

if er_config.enabled: # Exception Replay
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._exception.replay import SpanExceptionHandler

SpanExceptionProcessor().register()
SpanExceptionHandler().enable()

if config._runtime_metrics_enabled:
RuntimeWorker.enable()
Expand Down
7 changes: 3 additions & 4 deletions ddtrace/debugging/_debugger.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
from ddtrace import config as ddconfig
from ddtrace._trace.tracer import Tracer
from ddtrace.debugging._config import di_config
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._exception.replay import SpanExceptionHandler
from ddtrace.debugging._function.discovery import FunctionDiscovery
from ddtrace.debugging._function.store import FullyNamedWrappedFunction
from ddtrace.debugging._function.store import FunctionStore
Expand Down Expand Up @@ -280,7 +280,7 @@ def __exit__(
class Debugger(Service):
_instance: Optional["Debugger"] = None
_probe_meter = _probe_metrics.get_meter("probe")
_span_processor: Optional[SpanExceptionProcessor] = None
_span_exc_handler: Optional[SpanExceptionHandler] = None

__rc_adapter__ = ProbeRCAdapter
__uploader__ = LogsIntakeUploaderV1
Expand Down Expand Up @@ -334,8 +334,7 @@ def disable(cls, join: bool = True) -> None:
atexit.unregister(cls.disable)
unregister_post_run_module_hook(cls._on_run_module)

if cls._instance._span_processor:
cls._instance._span_processor.unregister()
# TODO: Currently there is no way of disabling a core event handler
P403n1x87 marked this conversation as resolved.
Show resolved Hide resolved

cls._instance.stop(join=join)
cls._instance = None
Expand Down
31 changes: 14 additions & 17 deletions ddtrace/debugging/_exception/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,28 @@
from dataclasses import dataclass
from itertools import count
from pathlib import Path
import sys
from threading import current_thread
from types import FrameType
from types import TracebackType
import typing as t
import uuid

from ddtrace._trace.processor import SpanProcessor
from ddtrace._trace.span import Span
from ddtrace.debugging._probe.model import LiteralTemplateSegment
from ddtrace.debugging._probe.model import LogLineProbe
from ddtrace.debugging._signal.snapshot import DEFAULT_CAPTURE_LIMITS
from ddtrace.debugging._signal.snapshot import Snapshot
from ddtrace.debugging._uploader import LogsIntakeUploaderV1
from ddtrace.debugging._uploader import UploaderProduct
from ddtrace.internal import core
from ddtrace.internal.logger import get_logger
from ddtrace.internal.packages import is_user_code
from ddtrace.internal.rate_limiter import BudgetRateLimiterWithJitter as RateLimiter
from ddtrace.internal.rate_limiter import RateLimitExceeded


log = get_logger(__name__)
P403n1x87 marked this conversation as resolved.
Show resolved Hide resolved

GLOBAL_RATE_LIMITER = RateLimiter(
limit_rate=1, # one trace per second
raise_on_exceed=False,
Expand Down Expand Up @@ -142,19 +144,16 @@ def can_capture(span: Span) -> bool:


@dataclass
class SpanExceptionProcessor(SpanProcessor):
class SpanExceptionHandler:
P403n1x87 marked this conversation as resolved.
Show resolved Hide resolved
P403n1x87 marked this conversation as resolved.
Show resolved Hide resolved
__uploader__ = LogsIntakeUploaderV1

def on_span_start(self, span: Span) -> None:
pass

def on_span_finish(self, span: Span) -> None:
if not (span.error and can_capture(span)):
# No error or budget to capture
def on_span_exception(
self, span: Span, _exc_type: t.Type[BaseException], exc: BaseException, _tb: t.Optional[TracebackType]
) -> None:
if span.get_tag(DEBUG_INFO_TAG) == "true" or not can_capture(span):
# Debug info for span already captured or no budget to capture
return

_, exc, _tb = sys.exc_info()

chain, exc_id = unwind_exception_chain(exc, _tb)
if not chain or exc_id is None:
# No exceptions to capture
Expand Down Expand Up @@ -208,12 +207,10 @@ def on_span_finish(self, span: Span) -> None:
span.set_tag_str(DEBUG_INFO_TAG, "true")
span.set_tag_str(EXCEPTION_ID_TAG, str(exc_id))

def register(self) -> None:
super().register()

def enable(self) -> None:
self.__uploader__.register(UploaderProduct.EXCEPTION_REPLAY)
core.on("span.exception", self.on_span_exception, name=__name__)

def unregister(self) -> None:
def disable(self) -> None:
core.reset_listeners("span.exception", self.on_span_exception)
self.__uploader__.unregister(UploaderProduct.EXCEPTION_REPLAY)

return super().unregister()
32 changes: 30 additions & 2 deletions tests/debugging/exception/test_replay.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from contextlib import contextmanager
import sys

import pytest

Expand Down Expand Up @@ -79,7 +80,7 @@ def c(foo=42):
snapshots = {str(s.uuid): s for s in uploader.collector.queue}

for n, span in enumerate(self.spans):
assert span.get_tag("error.debug_info_captured") == "true"
assert span.get_tag(replay.DEBUG_INFO_TAG) == "true"

exc_id = span.get_tag("_dd.debug.error.exception_id")

Expand Down Expand Up @@ -146,7 +147,7 @@ def c(foo=42):
number_of_exc_ids = 1

for n, span in enumerate(self.spans):
assert span.get_tag("error.debug_info_captured") == "true"
assert span.get_tag(replay.DEBUG_INFO_TAG) == "true"

exc_id = span.get_tag("_dd.debug.error.exception_id")

Expand Down Expand Up @@ -184,3 +185,30 @@ def c(foo=42):
self.assert_span_count(6)
# no new snapshots
assert len(uploader.collector.queue) == 3

def test_debugger_capture_exception(self):
def a(v):
with self.trace("a") as span:
try:
raise ValueError("hello", v)
except Exception:
span.set_exc_info(*sys.exc_info())
# Check that we don't capture multiple times
span.set_exc_info(*sys.exc_info())

def b():
with self.trace("b"):
a(42)

with exception_replay() as uploader:
with with_rate_limiter(RateLimiter(limit_rate=1, raise_on_exceed=False)):
b()

self.assert_span_count(2)
assert len(uploader.collector.queue) == 1

span_b, span_a = self.spans

assert span_a.name == "a"
assert span_a.get_tag(replay.DEBUG_INFO_TAG) == "true"
assert span_b.get_tag(replay.DEBUG_INFO_TAG) is None
12 changes: 6 additions & 6 deletions tests/debugging/mocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ddtrace.debugging._config import di_config
from ddtrace.debugging._debugger import Debugger
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._exception.replay import SpanExceptionHandler
from ddtrace.debugging._probe.model import Probe
from ddtrace.debugging._probe.remoteconfig import ProbePollerEvent
from ddtrace.debugging._probe.remoteconfig import _filter_by_env_and_version
Expand Down Expand Up @@ -196,15 +196,15 @@ def debugger(**config_overrides: Any) -> Generator[TestDebugger, None, None]:
yield debugger


class MockSpanExceptionProcessor(SpanExceptionProcessor):
class MockSpanExceptionHandler(SpanExceptionHandler):
__uploader__ = MockLogsIntakeUploaderV1


@contextmanager
def exception_replay(**config_overrides: Any) -> Generator[MockLogsIntakeUploaderV1, None, None]:
processor = MockSpanExceptionProcessor()
processor.register()
handler = MockSpanExceptionHandler()
handler.enable()
try:
yield processor.__uploader__._instance
yield handler.__uploader__._instance
finally:
processor.unregister()
handler.disable()
Loading