Skip to content

Commit

Permalink
feat(llmobs): support annotating primitive prompt templates (#10677)
Browse files Browse the repository at this point in the history
Support the ability for users to annotate prompt template data via the
annotate function.

Users can pass in two types of objects as a prompt
- a dictionary that adheres to our `Prompt` typed dict
- importing `ddtrace.llmobs.utils.Prompt` and passing in the `Prompt`
typed dict

Follow up: be able to pass in prompt template data to annotation_context

## Checklist
- [x] PR author has checked that all the criteria below are met
- The PR description includes an overview of the change
- The PR description articulates the motivation for the change
- The change includes tests OR the PR description describes a testing
strategy
- The PR description notes risks associated with the change, if any
- Newly-added code is easy to change
- The change follows the [library release note
guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html)
- The change includes or references documentation updates if necessary
- Backport labels are set (if
[applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting))



## Reviewer Checklist
- [x] Reviewer has checked that all the criteria below are met 
- Title is accurate
- All changes are related to the pull request's stated goal
- Avoids breaking
[API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces)
changes
- Testing strategy adequately addresses listed risks
- Newly-added code is easy to change
- Release note makes sense to a user of the library
- If necessary, author has acknowledged and discussed the performance
implications of this PR as reported in the benchmarks PR comment
- Backport labels are set in a manner that is consistent with the
[release branch maintenance
policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)

---------

Co-authored-by: lievan <[email protected]>
Co-authored-by: Yun Kim <[email protected]>
  • Loading branch information
3 people authored Sep 20, 2024
1 parent 0d44103 commit 6c8aadd
Show file tree
Hide file tree
Showing 8 changed files with 137 additions and 0 deletions.
1 change: 1 addition & 0 deletions ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
INPUT_MESSAGES = "_ml_obs.meta.input.messages"
INPUT_VALUE = "_ml_obs.meta.input.value"
INPUT_PARAMETERS = "_ml_obs.meta.input.parameters"
INPUT_PROMPT = "_ml_obs.meta.input.prompt"

OUTPUT_DOCUMENTS = "_ml_obs.meta.output.documents"
OUTPUT_MESSAGES = "_ml_obs.meta.output.messages"
Expand Down
21 changes: 21 additions & 0 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand All @@ -46,6 +47,7 @@
from ddtrace.llmobs._utils import _get_session_id
from ddtrace.llmobs._utils import _inject_llmobs_parent_id
from ddtrace.llmobs._utils import safe_json
from ddtrace.llmobs._utils import validate_prompt
from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
from ddtrace.llmobs._writer import LLMObsSpanWriter
from ddtrace.llmobs.utils import Documents
Expand Down Expand Up @@ -475,6 +477,7 @@ def annotate(
cls,
span: Optional[Span] = None,
parameters: Optional[Dict[str, Any]] = None,
prompt: Optional[dict] = None,
input_data: Optional[Any] = None,
output_data: Optional[Any] = None,
metadata: Optional[Dict[str, Any]] = None,
Expand All @@ -487,6 +490,8 @@ def annotate(
:param Span span: Span to annotate. If no span is provided, the current active span will be used.
Must be an LLMObs-type span, i.e. generated by the LLMObs SDK.
:param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "value_1", ...}}.
:param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind:
- llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
or a list of dictionaries with the same signature.
Expand Down Expand Up @@ -532,6 +537,12 @@ def annotate(
if not span_kind:
log.debug("Span kind not specified, skipping annotation for input/output data")
return
if prompt is not None:
if span_kind == "llm":
cls._tag_prompt(span, prompt)
else:
log.warning("Annotating prompts are only supported for LLM span kinds.")

if input_data or output_data:
if span_kind == "llm":
cls._tag_llm_io(span, input_messages=input_data, output_messages=output_data)
Expand All @@ -542,6 +553,16 @@ def annotate(
else:
cls._tag_text_io(span, input_value=input_data, output_value=output_data)

@staticmethod
def _tag_prompt(span, prompt: dict) -> None:
"""Tags a given LLMObs span with a prompt"""
try:
validated_prompt = validate_prompt(prompt)
span.set_tag_str(INPUT_PROMPT, safe_json(validated_prompt))
except TypeError:
log.warning("Failed to validate prompt with error: ", exc_info=True)
return

@staticmethod
def _tag_params(span: Span, params: Dict[str, Any]) -> None:
"""Tags input parameters for a given LLMObs span.
Expand Down
3 changes: 3 additions & 0 deletions ddtrace/llmobs/_trace_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand Down Expand Up @@ -85,6 +86,8 @@ def _llmobs_span_event(self, span: Span) -> Dict[str, Any]:
meta["output"]["value"] = span._meta.pop(OUTPUT_VALUE)
if span_kind == "retrieval" and span.get_tag(OUTPUT_DOCUMENTS) is not None:
meta["output"]["documents"] = json.loads(span._meta.pop(OUTPUT_DOCUMENTS))
if span_kind == "llm" and span.get_tag(INPUT_PROMPT) is not None:
meta["input"]["prompt"] = json.loads(span._meta.pop(INPUT_PROMPT))
if span.error:
meta[ERROR_MSG] = span.get_tag(ERROR_MSG)
meta[ERROR_STACK] = span.get_tag(ERROR_STACK)
Expand Down
31 changes: 31 additions & 0 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
from typing import Dict
from typing import Optional
from typing import Union

import ddtrace
from ddtrace import Span
Expand All @@ -18,6 +20,35 @@
log = get_logger(__name__)


def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict]]:
validated_prompt = {} # type: Dict[str, Union[str, dict]]
if not isinstance(prompt, dict):
raise TypeError("Prompt must be a dictionary")
variables = prompt.get("variables")
template = prompt.get("template")
version = prompt.get("version")
prompt_id = prompt.get("id")
if variables is not None:
if not isinstance(variables, dict):
raise TypeError("Prompt variables must be a dictionary.")
if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()):
raise TypeError("Prompt variable keys and values must be strings.")
validated_prompt["variables"] = variables
if template is not None:
if not isinstance(template, str):
raise TypeError("Prompt template must be a string")
validated_prompt["template"] = template
if version is not None:
if not isinstance(version, str):
raise TypeError("Prompt version must be a string.")
validated_prompt["version"] = version
if prompt_id is not None:
if not isinstance(prompt_id, str):
raise TypeError("Prompt id must be a string.")
validated_prompt["id"] = prompt_id
return validated_prompt


class AnnotationContext:
def __init__(self, _tracer, _annotation_callback):
self._tracer = _tracer
Expand Down
1 change: 1 addition & 0 deletions ddtrace/llmobs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str})
Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False)
Message = TypedDict("Message", {"content": str, "role": str}, total=False)
Prompt = TypedDict("Prompt", {"variables": Dict[str, str], "template": str, "id": str, "version": str}, total=False)


class Messages:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
features:
- |
LLM Observability: Introduces prompt template annotation, which can be passed as an argument to `LLMObs.annotate(prompt={...})` for LLM span kinds.
For more information on prompt annotations, see https://docs.datadoghq.com/llm_observability/setup/sdk/#annotating-a-span.
60 changes: 60 additions & 0 deletions tests/llmobs/test_llmobs_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from ddtrace.llmobs._constants import INPUT_DOCUMENTS
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import METADATA
from ddtrace.llmobs._constants import METRICS
Expand All @@ -28,6 +29,7 @@
from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
from ddtrace.llmobs._constants import TAGS
from ddtrace.llmobs._llmobs import LLMObsTraceProcessor
from ddtrace.llmobs.utils import Prompt
from tests.llmobs._utils import _expected_llmobs_eval_metric_event
from tests.llmobs._utils import _expected_llmobs_llm_span_event
from tests.llmobs._utils import _expected_llmobs_non_llm_span_event
Expand Down Expand Up @@ -746,6 +748,64 @@ def test_annotate_metrics_unserializable_uses_placeholder(LLMObs, mock_logs):
assert "[Unserializable object: <object object at" in metrics["content"]


def test_annotate_prompt_dict(LLMObs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(
span=span,
prompt={
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
},
)
assert json.loads(span.get_tag(INPUT_PROMPT)) == {
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
}


def test_annotate_prompt_typed_dict(LLMObs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(
span=span,
prompt=Prompt(
template="{var1} {var3}",
variables={"var1": "var1", "var2": "var3"},
version="1.0.0",
id="test_prompt",
),
)
assert json.loads(span.get_tag(INPUT_PROMPT)) == {
"template": "{var1} {var3}",
"variables": {"var1": "var1", "var2": "var3"},
"version": "1.0.0",
"id": "test_prompt",
}


def test_annotate_prompt_wrong_type(LLMObs, mock_logs):
with LLMObs.llm(model_name="test_model") as span:
LLMObs.annotate(span=span, prompt="prompt")
assert span.get_tag(INPUT_PROMPT) is None
mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
mock_logs.reset_mock()

LLMObs.annotate(span=span, prompt={"template": 1})
mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
mock_logs.reset_mock()


def test_annotate_prompt_wrong_kind(LLMObs, mock_logs):
with LLMObs.task(name="dummy") as span:
LLMObs.annotate(prompt={"variables": {"var1": "var1"}})
assert span.get_tag(INPUT_PROMPT) is None
mock_logs.warning.assert_called_once_with("Annotating prompts are only supported for LLM span kinds.")
mock_logs.reset_mock()


def test_span_error_sets_error(LLMObs, mock_llmobs_span_writer):
with pytest.raises(ValueError):
with LLMObs.llm(model_name="test_model", model_provider="test_model_provider") as span:
Expand Down
15 changes: 15 additions & 0 deletions tests/llmobs/test_llmobs_trace_processor.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import json

import mock
import pytest

from ddtrace._trace.span import Span
from ddtrace.ext import SpanTypes
from ddtrace.llmobs._constants import INPUT_MESSAGES
from ddtrace.llmobs._constants import INPUT_PARAMETERS
from ddtrace.llmobs._constants import INPUT_PROMPT
from ddtrace.llmobs._constants import INPUT_VALUE
from ddtrace.llmobs._constants import LANGCHAIN_APM_SPAN_NAME
from ddtrace.llmobs._constants import METADATA
Expand Down Expand Up @@ -326,6 +329,18 @@ def test_output_value_is_set():
assert tp._llmobs_span_event(llm_span)["meta"]["output"]["value"] == "value"


def test_prompt_is_set():
"""Test that prompt is set on the span event if they are present on the span."""
dummy_tracer = DummyTracer()
mock_llmobs_span_writer = mock.MagicMock()
with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
llm_span.set_tag(SPAN_KIND, "llm")
llm_span.set_tag(INPUT_PROMPT, json.dumps({"variables": {"var1": "var2"}}))
tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
assert tp._llmobs_span_event(llm_span)["meta"]["input"]["prompt"] == {"variables": {"var1": "var2"}}


def test_metadata_is_set():
"""Test that metadata is set on the span event if it is present on the span."""
dummy_tracer = DummyTracer()
Expand Down

0 comments on commit 6c8aadd

Please sign in to comment.