Skip to content

Commit

Permalink
feat(er): capture manual exceptions (#10430)
Browse files Browse the repository at this point in the history
We add a `span.exception` event listener when Exception Replay is
enabled to capture any exception information that is manually attached
to a span.

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))

## Reviewer Checklist
- [ ] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
P403n1x87 authored Aug 30, 2024
1 parent 3282493 commit 0a49f52
Show file tree
Hide file tree
Showing 6 changed files with 81 additions and 32 deletions.
4 changes: 2 additions & 2 deletions ddtrace/bootstrap/preload.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,9 @@ def register_post_preload(func: t.Callable) -> None:
DynamicInstrumentation.enable()

if er_config.enabled: # Exception Replay
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._exception.replay import SpanExceptionHandler

SpanExceptionProcessor().register()
SpanExceptionHandler.enable()

if config._runtime_metrics_enabled:
RuntimeWorker.enable()
Expand Down
5 changes: 0 additions & 5 deletions ddtrace/debugging/_debugger.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
from ddtrace import config as ddconfig
from ddtrace._trace.tracer import Tracer
from ddtrace.debugging._config import di_config
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._function.discovery import FunctionDiscovery
from ddtrace.debugging._function.store import FullyNamedWrappedFunction
from ddtrace.debugging._function.store import FunctionStore
Expand Down Expand Up @@ -274,7 +273,6 @@ def __exit__(
class Debugger(Service):
_instance: Optional["Debugger"] = None
_probe_meter = _probe_metrics.get_meter("probe")
_span_processor: Optional[SpanExceptionProcessor] = None

__rc_adapter__ = ProbeRCAdapter
__uploader__ = LogsIntakeUploaderV1
Expand Down Expand Up @@ -328,9 +326,6 @@ def disable(cls, join: bool = True) -> None:
atexit.unregister(cls.disable)
unregister_post_run_module_hook(cls._on_run_module)

if cls._instance._span_processor:
cls._instance._span_processor.unregister()

cls._instance.stop(join=join)
cls._instance = None

Expand Down
54 changes: 37 additions & 17 deletions ddtrace/debugging/_exception/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,28 @@
from dataclasses import dataclass
from itertools import count
from pathlib import Path
import sys
from threading import current_thread
from types import FrameType
from types import TracebackType
import typing as t
import uuid

from ddtrace._trace.processor import SpanProcessor
from ddtrace._trace.span import Span
from ddtrace.debugging._probe.model import LiteralTemplateSegment
from ddtrace.debugging._probe.model import LogLineProbe
from ddtrace.debugging._signal.snapshot import DEFAULT_CAPTURE_LIMITS
from ddtrace.debugging._signal.snapshot import Snapshot
from ddtrace.debugging._uploader import LogsIntakeUploaderV1
from ddtrace.debugging._uploader import UploaderProduct
from ddtrace.internal import core
from ddtrace.internal.logger import get_logger
from ddtrace.internal.packages import is_user_code
from ddtrace.internal.rate_limiter import BudgetRateLimiterWithJitter as RateLimiter
from ddtrace.internal.rate_limiter import RateLimitExceeded


log = get_logger(__name__)

GLOBAL_RATE_LIMITER = RateLimiter(
limit_rate=1, # one trace per second
raise_on_exceed=False,
Expand Down Expand Up @@ -141,20 +143,18 @@ def can_capture(span: Span) -> bool:
raise ValueError(msg)


@dataclass
class SpanExceptionProcessor(SpanProcessor):
class SpanExceptionHandler:
__uploader__ = LogsIntakeUploaderV1

def on_span_start(self, span: Span) -> None:
pass
_instance: t.Optional["SpanExceptionHandler"] = None

def on_span_finish(self, span: Span) -> None:
if not (span.error and can_capture(span)):
# No error or budget to capture
def on_span_exception(
self, span: Span, _exc_type: t.Type[BaseException], exc: BaseException, _tb: t.Optional[TracebackType]
) -> None:
if span.get_tag(DEBUG_INFO_TAG) == "true" or not can_capture(span):
# Debug info for span already captured or no budget to capture
return

_, exc, _tb = sys.exc_info()

chain, exc_id = unwind_exception_chain(exc, _tb)
if not chain or exc_id is None:
# No exceptions to capture
Expand Down Expand Up @@ -208,12 +208,32 @@ def on_span_finish(self, span: Span) -> None:
span.set_tag_str(DEBUG_INFO_TAG, "true")
span.set_tag_str(EXCEPTION_ID_TAG, str(exc_id))

def register(self) -> None:
super().register()
@classmethod
def enable(cls) -> None:
if cls._instance is not None:
log.debug("SpanExceptionHandler already enabled")
return

log.debug("Enabling SpanExceptionHandler")

instance = cls()

instance.__uploader__.register(UploaderProduct.EXCEPTION_REPLAY)
core.on("span.exception", instance.on_span_exception, name=__name__)

cls._instance = instance

@classmethod
def disable(cls) -> None:
if cls._instance is None:
log.debug("SpanExceptionHandler already disabled")
return

log.debug("Disabling SpanExceptionHandler")

self.__uploader__.register(UploaderProduct.EXCEPTION_REPLAY)
instance = cls._instance

def unregister(self) -> None:
self.__uploader__.unregister(UploaderProduct.EXCEPTION_REPLAY)
core.reset_listeners("span.exception", instance.on_span_exception)
instance.__uploader__.unregister(UploaderProduct.EXCEPTION_REPLAY)

return super().unregister()
cls._instance = None
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
features:
- |
Exception Replay will capture any exceptions that are manually attached to
a span with a call to ``set_exc_info``.
32 changes: 30 additions & 2 deletions tests/debugging/exception/test_replay.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from contextlib import contextmanager
import sys

import pytest

Expand Down Expand Up @@ -79,7 +80,7 @@ def c(foo=42):
snapshots = {str(s.uuid): s for s in uploader.collector.queue}

for n, span in enumerate(self.spans):
assert span.get_tag("error.debug_info_captured") == "true"
assert span.get_tag(replay.DEBUG_INFO_TAG) == "true"

exc_id = span.get_tag("_dd.debug.error.exception_id")

Expand Down Expand Up @@ -146,7 +147,7 @@ def c(foo=42):
number_of_exc_ids = 1

for n, span in enumerate(self.spans):
assert span.get_tag("error.debug_info_captured") == "true"
assert span.get_tag(replay.DEBUG_INFO_TAG) == "true"

exc_id = span.get_tag("_dd.debug.error.exception_id")

Expand Down Expand Up @@ -184,3 +185,30 @@ def c(foo=42):
self.assert_span_count(6)
# no new snapshots
assert len(uploader.collector.queue) == 3

def test_debugger_capture_exception(self):
def a(v):
with self.trace("a") as span:
try:
raise ValueError("hello", v)
except Exception:
span.set_exc_info(*sys.exc_info())
# Check that we don't capture multiple times
span.set_exc_info(*sys.exc_info())

def b():
with self.trace("b"):
a(42)

with exception_replay() as uploader:
with with_rate_limiter(RateLimiter(limit_rate=1, raise_on_exceed=False)):
b()

self.assert_span_count(2)
assert len(uploader.collector.queue) == 1

span_b, span_a = self.spans

assert span_a.name == "a"
assert span_a.get_tag(replay.DEBUG_INFO_TAG) == "true"
assert span_b.get_tag(replay.DEBUG_INFO_TAG) is None
13 changes: 7 additions & 6 deletions tests/debugging/mocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

from ddtrace.debugging._config import di_config
from ddtrace.debugging._debugger import Debugger
from ddtrace.debugging._exception.replay import SpanExceptionProcessor
from ddtrace.debugging._exception.replay import SpanExceptionHandler
from ddtrace.debugging._probe.model import Probe
from ddtrace.debugging._probe.remoteconfig import ProbePollerEvent
from ddtrace.debugging._probe.remoteconfig import _filter_by_env_and_version
Expand Down Expand Up @@ -196,15 +196,16 @@ def debugger(**config_overrides: Any) -> Generator[TestDebugger, None, None]:
yield debugger


class MockSpanExceptionProcessor(SpanExceptionProcessor):
class MockSpanExceptionHandler(SpanExceptionHandler):
__uploader__ = MockLogsIntakeUploaderV1


@contextmanager
def exception_replay(**config_overrides: Any) -> Generator[MockLogsIntakeUploaderV1, None, None]:
processor = MockSpanExceptionProcessor()
processor.register()
MockSpanExceptionHandler.enable()

handler = MockSpanExceptionHandler._instance
try:
yield processor.__uploader__._instance
yield handler.__uploader__._instance
finally:
processor.unregister()
MockSpanExceptionHandler.disable()

0 comments on commit 0a49f52

Please sign in to comment.