feat(llmobs): support annotating primitive prompt templates (#10677)

Support the ability for users to annotate prompt template data via the annotate function. Users can pass in two types of objects as a prompt - a dictionary that adheres to our `Prompt` typed dict - importing `ddtrace.llmobs.utils.Prompt` and passing in the `Prompt` typed dict Follow up: be able to pass in prompt template data to annotation_context ## Checklist - [x] PR author has checked that all the criteria below are met - The PR description includes an overview of the change - The PR description articulates the motivation for the change - The change includes tests OR the PR description describes a testing strategy - The PR description notes risks associated with the change, if any - Newly-added code is easy to change - The change follows the [library release note guidelines](https://ddtrace.readthedocs.io/en/stable/releasenotes.html) - The change includes or references documentation updates if necessary - Backport labels are set (if [applicable](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting)) ## Reviewer Checklist - [x] Reviewer has checked that all the criteria below are met - Title is accurate - All changes are related to the pull request's stated goal - Avoids breaking [API](https://ddtrace.readthedocs.io/en/stable/versioning.html#interfaces) changes - Testing strategy adequately addresses listed risks - Newly-added code is easy to change - Release note makes sense to a user of the library - If necessary, author has acknowledged and discussed the performance implications of this PR as reported in the benchmarks PR comment - Backport labels are set in a manner that is consistent with the [release branch maintenance policy](https://ddtrace.readthedocs.io/en/latest/contributing.html#backporting) --------- Co-authored-by: lievan <[email protected]> Co-authored-by: Yun Kim <[email protected]>
DataDog · Sep 20, 2024 · 6c8aadd · 6c8aadd
1 parent 0d44103
commit 6c8aadd
Show file tree

Hide file tree

Showing 8 changed files with 137 additions and 0 deletions.
diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py
@@ -14,6 +14,7 @@
 INPUT_MESSAGES = "_ml_obs.meta.input.messages"
 INPUT_VALUE = "_ml_obs.meta.input.value"
 INPUT_PARAMETERS = "_ml_obs.meta.input.parameters"
+INPUT_PROMPT = "_ml_obs.meta.input.prompt"
 
 OUTPUT_DOCUMENTS = "_ml_obs.meta.output.documents"
 OUTPUT_MESSAGES = "_ml_obs.meta.output.messages"

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -24,6 +24,7 @@
 from ddtrace.llmobs._constants import INPUT_DOCUMENTS
 from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_PARAMETERS
+from ddtrace.llmobs._constants import INPUT_PROMPT
 from ddtrace.llmobs._constants import INPUT_VALUE
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
@@ -46,6 +47,7 @@
 from ddtrace.llmobs._utils import _get_session_id
 from ddtrace.llmobs._utils import _inject_llmobs_parent_id
 from ddtrace.llmobs._utils import safe_json
+from ddtrace.llmobs._utils import validate_prompt
 from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
 from ddtrace.llmobs._writer import LLMObsSpanWriter
 from ddtrace.llmobs.utils import Documents
@@ -475,6 +477,7 @@ def annotate(
         cls,
         span: Optional[Span] = None,
         parameters: Optional[Dict[str, Any]] = None,
+        prompt: Optional[dict] = None,
         input_data: Optional[Any] = None,
         output_data: Optional[Any] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -487,6 +490,8 @@ def annotate(
 
         :param Span span: Span to annotate. If no span is provided, the current active span will be used.
                           Must be an LLMObs-type span, i.e. generated by the LLMObs SDK.
+        :param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
+                    {"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "value_1", ...}}.
         :param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind:
                            - llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
                                         or a list of dictionaries with the same signature.
@@ -532,6 +537,12 @@ def annotate(
         if not span_kind:
             log.debug("Span kind not specified, skipping annotation for input/output data")
             return
+        if prompt is not None:
+            if span_kind == "llm":
+                cls._tag_prompt(span, prompt)
+            else:
+                log.warning("Annotating prompts are only supported for LLM span kinds.")
+
         if input_data or output_data:
             if span_kind == "llm":
                 cls._tag_llm_io(span, input_messages=input_data, output_messages=output_data)
@@ -542,6 +553,16 @@ def annotate(
             else:
                 cls._tag_text_io(span, input_value=input_data, output_value=output_data)
 
+    @staticmethod
+    def _tag_prompt(span, prompt: dict) -> None:
+        """Tags a given LLMObs span with a prompt"""
+        try:
+            validated_prompt = validate_prompt(prompt)
+            span.set_tag_str(INPUT_PROMPT, safe_json(validated_prompt))
+        except TypeError:
+            log.warning("Failed to validate prompt with error: ", exc_info=True)
+            return
+
     @staticmethod
     def _tag_params(span: Span, params: Dict[str, Any]) -> None:
         """Tags input parameters for a given LLMObs span.

diff --git a/ddtrace/llmobs/_trace_processor.py b/ddtrace/llmobs/_trace_processor.py
@@ -16,6 +16,7 @@
 from ddtrace.llmobs._constants import INPUT_DOCUMENTS
 from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_PARAMETERS
+from ddtrace.llmobs._constants import INPUT_PROMPT
 from ddtrace.llmobs._constants import INPUT_VALUE
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
@@ -85,6 +86,8 @@ def _llmobs_span_event(self, span: Span) -> Dict[str, Any]:
             meta["output"]["value"] = span._meta.pop(OUTPUT_VALUE)
         if span_kind == "retrieval" and span.get_tag(OUTPUT_DOCUMENTS) is not None:
             meta["output"]["documents"] = json.loads(span._meta.pop(OUTPUT_DOCUMENTS))
+        if span_kind == "llm" and span.get_tag(INPUT_PROMPT) is not None:
+            meta["input"]["prompt"] = json.loads(span._meta.pop(INPUT_PROMPT))
         if span.error:
             meta[ERROR_MSG] = span.get_tag(ERROR_MSG)
             meta[ERROR_STACK] = span.get_tag(ERROR_STACK)

diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py
@@ -1,5 +1,7 @@
 import json
+from typing import Dict
 from typing import Optional
+from typing import Union
 
 import ddtrace
 from ddtrace import Span
@@ -18,6 +20,35 @@
 log = get_logger(__name__)
 
 
+def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict]]:
+    validated_prompt = {}  # type: Dict[str, Union[str, dict]]
+    if not isinstance(prompt, dict):
+        raise TypeError("Prompt must be a dictionary")
+    variables = prompt.get("variables")
+    template = prompt.get("template")
+    version = prompt.get("version")
+    prompt_id = prompt.get("id")
+    if variables is not None:
+        if not isinstance(variables, dict):
+            raise TypeError("Prompt variables must be a dictionary.")
+        if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()):
+            raise TypeError("Prompt variable keys and values must be strings.")
+        validated_prompt["variables"] = variables
+    if template is not None:
+        if not isinstance(template, str):
+            raise TypeError("Prompt template must be a string")
+        validated_prompt["template"] = template
+    if version is not None:
+        if not isinstance(version, str):
+            raise TypeError("Prompt version must be a string.")
+        validated_prompt["version"] = version
+    if prompt_id is not None:
+        if not isinstance(prompt_id, str):
+            raise TypeError("Prompt id must be a string.")
+        validated_prompt["id"] = prompt_id
+    return validated_prompt
+
+
 class AnnotationContext:
     def __init__(self, _tracer, _annotation_callback):
         self._tracer = _tracer

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
@@ -19,6 +19,7 @@
 ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str})
 Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False)
 Message = TypedDict("Message", {"content": str, "role": str}, total=False)
+Prompt = TypedDict("Prompt", {"variables": Dict[str, str], "template": str, "id": str, "version": str}, total=False)
 
 
 class Messages:

diff --git a/releasenotes/notes/support-prompt-annotations-b8e406261197f61a.yaml b/releasenotes/notes/support-prompt-annotations-b8e406261197f61a.yaml
@@ -0,0 +1,5 @@
+---
+features:
+  - |
+    LLM Observability: Introduces prompt template annotation, which can be passed as an argument to `LLMObs.annotate(prompt={...})` for LLM span kinds.
+    For more information on prompt annotations, see https://docs.datadoghq.com/llm_observability/setup/sdk/#annotating-a-span.
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py
@@ -14,6 +14,7 @@
 from ddtrace.llmobs._constants import INPUT_DOCUMENTS
 from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_PARAMETERS
+from ddtrace.llmobs._constants import INPUT_PROMPT
 from ddtrace.llmobs._constants import INPUT_VALUE
 from ddtrace.llmobs._constants import METADATA
 from ddtrace.llmobs._constants import METRICS
@@ -28,6 +29,7 @@
 from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
 from ddtrace.llmobs._constants import TAGS
 from ddtrace.llmobs._llmobs import LLMObsTraceProcessor
+from ddtrace.llmobs.utils import Prompt
 from tests.llmobs._utils import _expected_llmobs_eval_metric_event
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 from tests.llmobs._utils import _expected_llmobs_non_llm_span_event
@@ -746,6 +748,64 @@ def test_annotate_metrics_unserializable_uses_placeholder(LLMObs, mock_logs):
         assert "[Unserializable object: <object object at" in metrics["content"]
 
 
+def test_annotate_prompt_dict(LLMObs):
+    with LLMObs.llm(model_name="test_model") as span:
+        LLMObs.annotate(
+            span=span,
+            prompt={
+                "template": "{var1} {var3}",
+                "variables": {"var1": "var1", "var2": "var3"},
+                "version": "1.0.0",
+                "id": "test_prompt",
+            },
+        )
+        assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+            "template": "{var1} {var3}",
+            "variables": {"var1": "var1", "var2": "var3"},
+            "version": "1.0.0",
+            "id": "test_prompt",
+        }
+
+
+def test_annotate_prompt_typed_dict(LLMObs):
+    with LLMObs.llm(model_name="test_model") as span:
+        LLMObs.annotate(
+            span=span,
+            prompt=Prompt(
+                template="{var1} {var3}",
+                variables={"var1": "var1", "var2": "var3"},
+                version="1.0.0",
+                id="test_prompt",
+            ),
+        )
+        assert json.loads(span.get_tag(INPUT_PROMPT)) == {
+            "template": "{var1} {var3}",
+            "variables": {"var1": "var1", "var2": "var3"},
+            "version": "1.0.0",
+            "id": "test_prompt",
+        }
+
+
+def test_annotate_prompt_wrong_type(LLMObs, mock_logs):
+    with LLMObs.llm(model_name="test_model") as span:
+        LLMObs.annotate(span=span, prompt="prompt")
+        assert span.get_tag(INPUT_PROMPT) is None
+        mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
+        mock_logs.reset_mock()
+
+        LLMObs.annotate(span=span, prompt={"template": 1})
+        mock_logs.warning.assert_called_once_with("Failed to validate prompt with error: ", exc_info=True)
+        mock_logs.reset_mock()
+
+
+def test_annotate_prompt_wrong_kind(LLMObs, mock_logs):
+    with LLMObs.task(name="dummy") as span:
+        LLMObs.annotate(prompt={"variables": {"var1": "var1"}})
+        assert span.get_tag(INPUT_PROMPT) is None
+        mock_logs.warning.assert_called_once_with("Annotating prompts are only supported for LLM span kinds.")
+        mock_logs.reset_mock()
+
+
 def test_span_error_sets_error(LLMObs, mock_llmobs_span_writer):
     with pytest.raises(ValueError):
         with LLMObs.llm(model_name="test_model", model_provider="test_model_provider") as span:

diff --git a/tests/llmobs/test_llmobs_trace_processor.py b/tests/llmobs/test_llmobs_trace_processor.py
@@ -1,10 +1,13 @@
+import json
+
 import mock
 import pytest
 
 from ddtrace._trace.span import Span
 from ddtrace.ext import SpanTypes
 from ddtrace.llmobs._constants import INPUT_MESSAGES
 from ddtrace.llmobs._constants import INPUT_PARAMETERS
+from ddtrace.llmobs._constants import INPUT_PROMPT
 from ddtrace.llmobs._constants import INPUT_VALUE
 from ddtrace.llmobs._constants import LANGCHAIN_APM_SPAN_NAME
 from ddtrace.llmobs._constants import METADATA
@@ -326,6 +329,18 @@ def test_output_value_is_set():
         assert tp._llmobs_span_event(llm_span)["meta"]["output"]["value"] == "value"
 
 
+def test_prompt_is_set():
+    """Test that prompt is set on the span event if they are present on the span."""
+    dummy_tracer = DummyTracer()
+    mock_llmobs_span_writer = mock.MagicMock()
+    with override_global_config(dict(_llmobs_ml_app="unnamed-ml-app")):
+        with dummy_tracer.trace("root_llm_span", span_type=SpanTypes.LLM) as llm_span:
+            llm_span.set_tag(SPAN_KIND, "llm")
+            llm_span.set_tag(INPUT_PROMPT, json.dumps({"variables": {"var1": "var2"}}))
+        tp = LLMObsTraceProcessor(llmobs_span_writer=mock_llmobs_span_writer)
+        assert tp._llmobs_span_event(llm_span)["meta"]["input"]["prompt"] == {"variables": {"var1": "var2"}}
+
+
 def test_metadata_is_set():
     """Test that metadata is set on the span event if it is present on the span."""
     dummy_tracer = DummyTracer()