Skip to content

Commit

Permalink
fix(llmobs): add parent ID handling for integration-generated spans (#…
Browse files Browse the repository at this point in the history
…9417)

This PR adds a fix to add handling for integration-generated spans for
LLMObs parent ID propagation. #9152 added a edge case handling in
`LLMObs._start_span()` if the span was part of the first service in a
distributed trace, in which case we would need to check the
`span.get_tag(PROPAGATED_PARENT_KEY)` due to the distributed header
being propagated upwards to the local root of the original service at
span finish time (but would always be propagated to all spans in
subsequent services at span start time).

Integration (openai, bedrock, langchain) generated spans use
`BaseLLMIntegration.trace(...)` instead of `LLMObs._start_span()` so we
needed to add handling here.

## Checklist

- [x] Change(s) are motivated and described in the PR description
- [x] Testing strategy is described if automated tests are not included
in the PR
- [x] Risks are described (performance impact, potential for breakage,
maintainability)
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] [Library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
are followed or label `changelog/no-changelog` is set
- [x] Documentation is included (in-code, generated user docs, [public
corp docs](https://github.com/DataDog/documentation/))
- [x] Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))
- [x] If this PR changes the public interface, I've notified
`@DataDog/apm-tees`.

## Reviewer Checklist

- [x] Title is accurate
- [x] All changes are related to the pull request's stated goal
- [x] Description motivates each change
- [x] Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- [x] Testing strategy adequately addresses listed risks
- [x] Change is maintainable (easy to change, telemetry, documentation)
- [x] Release note makes sense to a user of the library
- [x] Author has acknowledged and discussed the performance implications
of this PR as reported in the benchmarks PR comment
- [x] Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)
  • Loading branch information
Yun-Kim authored May 29, 2024
1 parent 0d695e6 commit 538a024
Show file tree
Hide file tree
Showing 5 changed files with 25 additions and 5 deletions.
3 changes: 0 additions & 3 deletions ddtrace/contrib/openai/_endpoint_hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from ddtrace.contrib.openai.utils import _loop_handler
from ddtrace.contrib.openai.utils import _process_finished_stream
from ddtrace.contrib.openai.utils import _tag_tool_calls
from ddtrace.ext import SpanTypes
from ddtrace.internal.utils.version import parse_version
from ddtrace.llmobs._constants import SPAN_KIND

Expand Down Expand Up @@ -190,7 +189,6 @@ class _CompletionHook(_BaseCompletionHook):

def _record_request(self, pin, integration, span, args, kwargs):
super()._record_request(pin, integration, span, args, kwargs)
span.span_type = SpanTypes.LLM
if integration.is_pc_sampled_llmobs(span):
span.set_tag_str(SPAN_KIND, "llm")
if integration.is_pc_sampled_span(span):
Expand Down Expand Up @@ -249,7 +247,6 @@ class _ChatCompletionHook(_BaseCompletionHook):

def _record_request(self, pin, integration, span, args, kwargs):
super()._record_request(pin, integration, span, args, kwargs)
span.span_type = SpanTypes.LLM
if integration.is_pc_sampled_llmobs(span):
span.set_tag_str(SPAN_KIND, "llm")
for idx, m in enumerate(kwargs.get("messages", [])):
Expand Down
9 changes: 9 additions & 0 deletions ddtrace/llmobs/_integrations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
from ddtrace.internal.dogstatsd import get_dogstatsd_client
from ddtrace.internal.hostname import get_hostname
from ddtrace.internal.utils.formats import asbool
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
from ddtrace.llmobs._llmobs import LLMObs
from ddtrace.llmobs._log_writer import V2LogWriter
from ddtrace.llmobs._utils import _get_llmobs_parent_id
from ddtrace.sampler import RateSampler
from ddtrace.settings import IntegrationConfig

Expand Down Expand Up @@ -123,6 +126,12 @@ def trace(self, pin: Pin, operation_id: str, submit_to_llmobs: bool = False, **k
self._set_base_span_tags(span, **kwargs)
if submit_to_llmobs:
span.span_type = SpanTypes.LLM
if self.llmobs_enabled and span.get_tag(PROPAGATED_PARENT_ID_KEY) is None:
# For non-distributed traces or spans in the first service of a distributed trace,
# The LLMObs parent ID tag is not set at span start time. We need to manually set the parent ID tag now
# in these cases to avoid conflicting with the later propagated tags.
parent_id = _get_llmobs_parent_id(span) or "undefined"
span.set_tag_str(PARENT_ID_KEY, str(parent_id))
return span

@classmethod
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/llmobs/_integrations/bedrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,11 @@
from ddtrace.llmobs._constants import MODEL_NAME
from ddtrace.llmobs._constants import MODEL_PROVIDER
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import PROPAGATED_PARENT_ID_KEY
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._integrations import BaseLLMIntegration
from ddtrace.llmobs._utils import _get_llmobs_parent_id


log = get_logger(__name__)
Expand All @@ -31,6 +34,9 @@ def llmobs_set_tags(
"""Extract prompt/response tags from a completion and set them as temporary "_ml_obs.*" tags."""
if not self.llmobs_enabled:
return
if span.get_tag(PROPAGATED_PARENT_ID_KEY) is None:
parent_id = _get_llmobs_parent_id(span) or "undefined"
span.set_tag(PARENT_ID_KEY, parent_id)
parameters = {"temperature": float(span.get_tag("bedrock.request.temperature") or 0.0)}
max_tokens = int(span.get_tag("bedrock.request.max_tokens") or 0)
if max_tokens:
Expand Down
6 changes: 6 additions & 0 deletions ddtrace/llmobs/_integrations/openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._integrations.base import BaseLLMIntegration
from ddtrace.pin import Pin


class OpenAIIntegration(BaseLLMIntegration):
Expand Down Expand Up @@ -44,6 +45,11 @@ def user_api_key(self, value: str) -> None:
# Match the API key representation that OpenAI uses in their UI.
self._user_api_key = "sk-...%s" % value[-4:]

def trace(self, pin: Pin, operation_id: str, submit_to_llmobs: bool = False, **kwargs: Dict[str, Any]) -> Span:
if operation_id.endswith("Completion"):
submit_to_llmobs = True
return super().trace(pin, operation_id, submit_to_llmobs, **kwargs)

def _set_base_span_tags(self, span: Span, **kwargs) -> None:
span.set_tag_str(COMPONENT, self.integration_config.integration_name)
if self._user_api_key is not None:
Expand Down
6 changes: 4 additions & 2 deletions ddtrace/llmobs/_trace_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from ddtrace.llmobs._constants import OUTPUT_DOCUMENTS
from ddtrace.llmobs._constants import OUTPUT_MESSAGES
from ddtrace.llmobs._constants import OUTPUT_VALUE
from ddtrace.llmobs._constants import PARENT_ID_KEY
from ddtrace.llmobs._constants import SESSION_ID
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import TAGS
Expand Down Expand Up @@ -100,11 +101,12 @@ def _llmobs_span_event(self, span: Span) -> Dict[str, Any]:
span.set_tag_str(ML_APP, ml_app)
session_id = _get_session_id(span)
span.set_tag_str(SESSION_ID, session_id)

parent_id = str(_get_llmobs_parent_id(span) or "undefined")
span._meta.pop(PARENT_ID_KEY, None)
return {
"trace_id": "{:x}".format(span.trace_id),
"span_id": str(span.span_id),
"parent_id": str(_get_llmobs_parent_id(span) or "undefined"),
"parent_id": parent_id,
"session_id": session_id,
"name": _get_span_name(span),
"tags": self._llmobs_tags(span, ml_app=ml_app, session_id=session_id),
Expand Down

0 comments on commit 538a024

Please sign in to comment.