feat(LLMObs): update the prompt annotation (#12551)

yahya-mouman · Yun-Kim · Kyle-Verhoog · web-flow · commit 8fbf8366ffcc · 2025-09-19T13:07:28.000-04:00
This PR updates the prompt typed dict extending it with a couple fields
:
- name : name of the prompt
- chat_template : list of role,content pairs where content is a string
template of a prompt
- tags : list of tags for the prompt run.

It also adds the strict validation mode. Strict validation adds the
following checks :
- id is mandatory
- either a template or a chat_template should be provided


Co-authored-by: Yun Kim &lt;35776586+Yun-Kim@users.noreply.github.com&gt;
Co-authored-by: kyle &lt;kyle@verhoog.ca&gt;
diff --git a/ddtrace/llmobs/_constants.py b/ddtrace/llmobs/_constants.py
@@ -86,6 +86,10 @@
 
 SPAN_LINKS = "_ml_obs.span_links"
 NAME = "_ml_obs.name"
+
+# Prompt constants
+DEFAULT_PROMPT_NAME = "unnamed-prompt"
+
 DECORATOR = "_ml_obs.decorator"
 INTEGRATION = "_ml_obs.integration"
 
diff --git a/ddtrace/llmobs/_integrations/langchain.py b/ddtrace/llmobs/_integrations/langchain.py
@@ -40,8 +40,8 @@
 from ddtrace.llmobs._integrations.utils import update_proxy_workflow_input_output_value
 from ddtrace.llmobs._utils import _get_attr
 from ddtrace.llmobs._utils import _get_nearest_llmobs_ancestor
+from ddtrace.llmobs._utils import _validate_prompt
 from ddtrace.llmobs._utils import safe_json
-from ddtrace.llmobs._utils import validate_prompt
 from ddtrace.llmobs.utils import Document
 from ddtrace.trace import Span
 
@@ -885,7 +885,7 @@ def llmobs_set_prompt_tag(self, instance, span: Span):
         if prompt_value_meta is not None:
             prompt = prompt_value_meta
             try:
-                prompt = validate_prompt(prompt)
+                prompt = _validate_prompt(prompt, strict_validation=True)
                 span._set_ctx_item(INPUT_PROMPT, prompt)
             except Exception as e:
                 log.debug("Failed to validate langchain prompt", e)
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
@@ -100,9 +100,9 @@
 from ddtrace.llmobs._utils import _get_session_id
 from ddtrace.llmobs._utils import _get_span_name
 from ddtrace.llmobs._utils import _is_evaluation_span
+from ddtrace.llmobs._utils import _validate_prompt
 from ddtrace.llmobs._utils import enforce_message_role
 from ddtrace.llmobs._utils import safe_json
-from ddtrace.llmobs._utils import validate_prompt
 from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
 from ddtrace.llmobs._writer import LLMObsEvaluationMetricEvent
 from ddtrace.llmobs._writer import LLMObsExperimentsClient
@@ -112,6 +112,7 @@
 from ddtrace.llmobs.utils import Documents
 from ddtrace.llmobs.utils import ExportedLLMObsSpan
 from ddtrace.llmobs.utils import Messages
+from ddtrace.llmobs.utils import Prompt
 from ddtrace.llmobs.utils import extract_tool_definitions
 from ddtrace.propagation.http import HTTPPropagator
 
@@ -841,7 +842,10 @@ def _tag_span_links(self, span, span_links):
 
     @classmethod
     def annotation_context(
-        cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[dict] = None, name: Optional[str] = None
+        cls,
+        tags: Optional[Dict[str, Any]] = None,
+        prompt: Optional[Union[dict, Prompt]] = None,
+        name: Optional[str] = None,
     ) -> AnnotationContext:
         """
         Sets specified attributes on all LLMObs spans created while the returned AnnotationContext is active.
@@ -850,10 +854,16 @@ def annotation_context(
         :param tags: Dictionary of JSON serializable key-value tag pairs to set or update on the LLMObs span
                      regarding the span's context.
         :param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
-                        `{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "...", ...}}`.
+                        `{
+                            "id": "...",
+                            "version": "...",
+                            "chat_template": [{"content": "...", "role": "..."}, ...],
+                            "variables": {"variable_1": "...", ...}}`.
+                            "tags": {"key1": "value1", "key2": "value2"},
+                        }`
                         Can also be set using the `ddtrace.llmobs.utils.Prompt` constructor class.
                         - This argument is only applicable to LLM spans.
-                        - The dictionary may contain two optional keys relevant to RAG applications:
+                        - The dictionary may contain optional keys relevant to Templates and RAG applications:
                             `rag_context_variables` - a list of variable key names that contain ground
                                                         truth context information
                             `rag_query_variables` - a list of variable key names that contains query
@@ -1289,7 +1299,14 @@ def annotate(
         :param Span span: Span to annotate. If no span is provided, the current active span will be used.
                           Must be an LLMObs-type span, i.e. generated by the LLMObs SDK.
         :param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
-                        `{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "...", ...}}`.
+                        `{
+                            "id": "...",
+                            "template": "...",
+                            "chat_template": [{"content": "...", "role": "..."}, ...])
+                            "version": "...",
+                            "variables": {"variable_1": "...", ...},
+                            tags": {"tag_1": "...", ...},
+                        }`.
                         Can also be set using the `ddtrace.llmobs.utils.Prompt` constructor class.
                         - This argument is only applicable to LLM spans.
                         - The dictionary may contain two optional keys relevant to RAG applications:
@@ -1373,11 +1390,11 @@ def annotate(
                 span.name = _name
             if prompt is not None:
                 try:
-                    validated_prompt = validate_prompt(prompt)
+                    validated_prompt = _validate_prompt(prompt, strict_validation=False)
                     cls._set_dict_attribute(span, INPUT_PROMPT, validated_prompt)
-                except TypeError:
+                except (ValueError, TypeError) as e:
                     error = "invalid_prompt"
-                    log.warning("Failed to validate prompt with error: ", exc_info=True)
+                    log.warning("Failed to validate prompt with error:", str(e), exc_info=True)
             if not span_kind:
                 log.debug("Span kind not specified, skipping annotation for input/output data")
                 return
diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py
@@ -2,6 +2,7 @@
 from dataclasses import dataclass
 from dataclasses import is_dataclass
 import json
+from typing import Any
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -14,6 +15,7 @@
 from ddtrace.internal.logger import get_logger
 from ddtrace.internal.utils.formats import format_trace_id
 from ddtrace.llmobs._constants import CREWAI_APM_SPAN_NAME
+from ddtrace.llmobs._constants import DEFAULT_PROMPT_NAME
 from ddtrace.llmobs._constants import GEMINI_APM_SPAN_NAME
 from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS
 from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS
@@ -27,11 +29,14 @@
 from ddtrace.llmobs._constants import SESSION_ID
 from ddtrace.llmobs._constants import SPAN_LINKS
 from ddtrace.llmobs._constants import VERTEXAI_APM_SPAN_NAME
+from ddtrace.llmobs.utils import Message
+from ddtrace.llmobs.utils import Prompt
 from ddtrace.trace import Span
 
 
 log = get_logger(__name__)
 
+ValidatedPromptDict = Dict[str, Union[str, Dict[str, Any], List[str], List[Dict[str, str]], List[Message]]]
 
 STANDARD_INTEGRATION_SPAN_NAMES = (
     CREWAI_APM_SPAN_NAME,
@@ -42,50 +47,100 @@
 )
 
 
-def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict, List[str]]]:
-    validated_prompt = {}  # type: Dict[str, Union[str, dict, List[str]]]
+def _validate_prompt(prompt: Union[Dict[str, Any], Prompt], strict_validation: bool) -> ValidatedPromptDict:
     if not isinstance(prompt, dict):
-        raise TypeError("Prompt must be a dictionary")
+        raise TypeError(f"Prompt must be a dictionary, received {type(prompt).__name__}.")
+
+    ml_app = config._llmobs_ml_app
+    prompt_id = prompt.get("id")
+    version = prompt.get("version")
+    tags = prompt.get("tags")
     variables = prompt.get("variables")
     template = prompt.get("template")
-    version = prompt.get("version")
-    prompt_id = prompt.get("id")
+    chat_template = prompt.get("chat_template")
     ctx_variable_keys = prompt.get("rag_context_variables")
-    rag_query_variable_keys = prompt.get("rag_query_variables")
-    if variables is not None:
+    query_variable_keys = prompt.get("rag_query_variables")
+
+    if strict_validation:
+        if prompt_id is None:
+            raise ValueError("'id' must be provided")
+        if template is None and chat_template is None:
+            raise ValueError("One of 'template' or 'chat_template' must be provided to annotate a prompt.")
+
+    if template and chat_template:
+        raise ValueError("Only one of 'template' or 'chat_template' can be provided, not both.")
+
+    final_prompt_id = prompt_id or f"{ml_app}_{DEFAULT_PROMPT_NAME}"
+    final_ctx_variable_keys = ctx_variable_keys or ["context"]
+    final_query_variable_keys = query_variable_keys or ["question"]
+
+    if not isinstance(final_prompt_id, str):
+        raise TypeError(f"prompt_id {final_prompt_id} must be a string, received {type(final_prompt_id).__name__}")
+
+    if not (isinstance(final_ctx_variable_keys, list) and all(isinstance(i, str) for i in final_ctx_variable_keys)):
+        raise TypeError(f"ctx_variables must be a list of strings, received {type(final_ctx_variable_keys).__name__}")
+
+    if not (isinstance(final_query_variable_keys, list) and all(isinstance(i, str) for i in final_query_variable_keys)):
+        raise TypeError(
+            f"query_variables must be a list of strings, received {type(final_query_variable_keys).__name__}"
+        )
+
+    if version and not isinstance(version, str):
+        raise TypeError(f"version: {version} must be a string, received {type(version).__name__}")
+
+    if tags:
+        if not isinstance(tags, dict):
+            raise TypeError(
+                f"tags: {tags} must be a dictionary of string key-value pairs, received {type(tags).__name__}"
+            )
+        if not all(isinstance(k, str) for k in tags):
+            raise TypeError("Keys of 'tags' must all be strings.")
+        if not all(isinstance(k, str) for k in tags.values()):
+            raise TypeError("Values of 'tags' must all be strings.")
+
+    if template and not isinstance(template, str):
+        raise TypeError(f"template: {template} must be a string, received {type(template).__name__}")
+
+    if chat_template:
+        if not isinstance(chat_template, list):
+            raise TypeError("chat_template must be a list of dictionaries with string-string key value pairs.")
+        for ct in chat_template:
+            if not (isinstance(ct, dict) and all(k in ct for k in ("role", "content"))):
+                raise TypeError(
+                    "Each 'chat_template' entry should be a string-string dictionary with role and content keys."
+                )
+
+    if variables:
         if not isinstance(variables, dict):
-            raise TypeError("Prompt variables must be a dictionary.")
-        if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()):
-            raise TypeError("Prompt variable keys and values must be strings.")
+            raise TypeError(
+                f"variables: {variables} must be a dictionary with string keys, received {type(variables).__name__}"
+            )
+        if not all(isinstance(k, str) for k in variables):
+            raise TypeError("Keys of 'variables' must all be strings.")
+
+    final_chat_template = []
+    if chat_template:
+        for msg in chat_template:
+            final_chat_template.append(Message(role=msg["role"], content=msg["content"]))
+
+    validated_prompt: ValidatedPromptDict = {}
+    if final_prompt_id:
+        validated_prompt["id"] = final_prompt_id
+    if version:
+        validated_prompt["version"] = version
+    if variables:
         validated_prompt["variables"] = variables
-    if template is not None:
-        if not isinstance(template, str):
-            raise TypeError("Prompt template must be a string")
+    if template:
         validated_prompt["template"] = template
-    if version is not None:
-        if not isinstance(version, str):
-            raise TypeError("Prompt version must be a string.")
-        validated_prompt["version"] = version
-    if prompt_id is not None:
-        if not isinstance(prompt_id, str):
-            raise TypeError("Prompt id must be a string.")
-        validated_prompt["id"] = prompt_id
-    if ctx_variable_keys is not None:
-        if not isinstance(ctx_variable_keys, list):
-            raise TypeError("Prompt field `context_variable_keys` must be a list of strings.")
-        if not all(isinstance(k, str) for k in ctx_variable_keys):
-            raise TypeError("Prompt field `context_variable_keys` must be a list of strings.")
-        validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ctx_variable_keys
-    else:
-        validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ["context"]
-    if rag_query_variable_keys is not None:
-        if not isinstance(rag_query_variable_keys, list):
-            raise TypeError("Prompt field `rag_query_variables` must be a list of strings.")
-        if not all(isinstance(k, str) for k in rag_query_variable_keys):
-            raise TypeError("Prompt field `rag_query_variables` must be a list of strings.")
-        validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = rag_query_variable_keys
-    else:
-        validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = ["question"]
+    if final_chat_template:
+        validated_prompt["chat_template"] = final_chat_template
+    if tags:
+        validated_prompt["tags"] = tags
+    if final_ctx_variable_keys:
+        validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = final_ctx_variable_keys
+    if final_query_variable_keys:
+        validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = final_query_variable_keys
+
     return validated_prompt
 
 
diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
@@ -75,20 +75,6 @@ def _extract_tool_result(tool_result: Dict[str, Any]) -> "ToolResult":
     {"content": str, "role": str, "tool_calls": List["ToolCall"], "tool_results": List["ToolResult"]},
     total=False,
 )
-Prompt = TypedDict(
-    "Prompt",
-    {
-        "variables": Dict[str, str],
-        "template": str,
-        "id": str,
-        "version": str,
-        "rag_context_variables": List[
-            str
-        ],  # a list of variable key names that contain ground truth context information
-        "rag_query_variables": List[str],  # a list of variable key names that contains query information
-    },
-    total=False,
-)
 ToolCall = TypedDict(
     "ToolCall",
     {
@@ -163,6 +149,33 @@ def extract_tool_definitions(tool_definitions: List[Dict[str, Any]]) -> List[Too
     return validated_tool_definitions
 
 
+class Prompt(TypedDict, total=False):
+    """
+    A Prompt object that contains the information needed to render a prompt.
+        id: str - the id of the prompt set by the user. Should be unique per ml_app.
+        version: str - user tag for the version of the prompt.
+        variables: Dict[str, str] - a dictionary of variables that will be used to render the prompt
+        chat_template: Optional[Union[List[Dict[str, str]], List[Message]]]
+            - A list of dicts of (role,template)
+            where role is the role of the prompt and template is the template string
+        template: Optional[str]
+            - It also accepts a string that represents the template for the prompt. Will default to "user" for a role
+        tags: Optional[Dict[str, str]]
+            - List of tags to add to the prompt run.
+        rag_context_variables: List[str] - a list of variable key names that contain ground truth context information
+        rag_query_variables: List[str] - a list of variable key names that contains query information
+    """
+
+    version: str
+    id: str
+    template: str
+    chat_template: Union[List[Dict[str, str]], List[Message]]
+    variables: Dict[str, str]
+    tags: Dict[str, str]
+    rag_context_variables: List[str]
+    rag_query_variables: List[str]
+
+
 class Messages:
     def __init__(self, messages: Union[List[Dict[str, Any]], Dict[str, Any], str]):
         self.messages = []
diff --git a/releasenotes/notes/update-prompt-annotation-0fa90edf6829fe1d.yaml b/releasenotes/notes/update-prompt-annotation-0fa90edf6829fe1d.yaml
@@ -0,0 +1,5 @@
+---
+features:
+  - |
+    LLM Observability: Extends the prompt structure to add ``tags`` and ``chat_template``.
+    A new ``Prompt`` TypedDict class that would be used in annotation and annotation_context.
diff --git a/tests/llmobs/test_llmobs.py b/tests/llmobs/test_llmobs.py
@@ -12,6 +12,7 @@
 from ddtrace.llmobs._constants import PARENT_ID_KEY
 from ddtrace.llmobs._constants import ROOT_PARENT_ID
 from ddtrace.llmobs._utils import _get_session_id
+from ddtrace.llmobs.utils import Prompt
 from tests.llmobs._utils import _expected_llmobs_llm_span_event
 
 
@@ -460,15 +461,42 @@ def test_structured_io_data(llmobs, llmobs_backend):
 
 def test_structured_prompt_data(llmobs, llmobs_backend):
     with llmobs.llm() as span:
-        llmobs.annotate(span, prompt={"template": "test {{value}}"})
+        llmobs.annotate(span, input_data={"data": "test1"}, prompt={"template": "test {{value}}"})
+    events = llmobs_backend.wait_for_num_events(num=1)
+    assert len(events) == 1
+    assert events[0][0]["spans"][0]["meta"]["input"]["prompt"] == {
+        "id": "unnamed-ml-app_unnamed-prompt",
+        "template": "test {{value}}",
+        "_dd_context_variable_keys": ["context"],
+        "_dd_query_variable_keys": ["question"],
+    }
+
+
+def test_structured_prompt_data_v2(llmobs, llmobs_backend):
+    prompt = Prompt(
+        id="test",
+        chat_template=[{"role": "user", "content": "test {{value}}"}],
+        variables={"value": "test", "context": "test", "question": "test"},
+        tags={"env": "prod", "llm": "openai"},
+        rag_context_variables=["context"],
+        rag_query_variables=["question"],
+    )
+    with llmobs.llm() as span:
+        llmobs.annotate(
+            span,
+            prompt=prompt,
+        )
     events = llmobs_backend.wait_for_num_events(num=1)
     assert len(events) == 1
     assert events[0][0]["spans"][0]["meta"]["input"] == {
         "prompt": {
-            "template": "test {{value}}",
+            "id": "test",
+            "chat_template": [{"role": "user", "content": "test {{value}}"}],
+            "variables": {"value": "test", "context": "test", "question": "test"},
+            "tags": {"env": "prod", "llm": "openai"},
             "_dd_context_variable_keys": ["context"],
             "_dd_query_variable_keys": ["question"],
-        },
+        }
     }
 
 
diff --git a/tests/llmobs/test_llmobs_service.py b/tests/llmobs/test_llmobs_service.py