From dc014efe4132a9752212eda2bcd74f067289850d Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Wed, 19 Feb 2025 18:22:17 +0100
Subject: [PATCH 01/11] Added Prompt class and updated submission logic

---
 ddtrace/llmobs/_llmobs.py |  25 +++---
 ddtrace/llmobs/_utils.py  |  51 +-----------
 ddtrace/llmobs/utils.py   | 171 ++++++++++++++++++++++++++++++++++----
 3 files changed, 168 insertions(+), 79 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 58b233f00a2..f7094114e8d 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -66,10 +66,10 @@
 from ddtrace.llmobs._utils import _get_span_name
 from ddtrace.llmobs._utils import _is_evaluation_span
 from ddtrace.llmobs._utils import safe_json
-from ddtrace.llmobs._utils import validate_prompt
 from ddtrace.llmobs._writer import LLMObsEvalMetricWriter
 from ddtrace.llmobs._writer import LLMObsSpanWriter
 from ddtrace.llmobs.utils import Documents
+from ddtrace.llmobs.utils import Prompt
 from ddtrace.llmobs.utils import ExportedLLMObsSpan
 from ddtrace.llmobs.utils import Messages
 from ddtrace.propagation.http import HTTPPropagator
@@ -463,7 +463,7 @@ def _tag_span_links(self, span, span_links):
 
     @classmethod
     def annotation_context(
-        cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[dict] = None, name: Optional[str] = None
+        cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[Prompt] = None, name: Optional[str] = None
     ) -> AnnotationContext:
         """
         Sets specified attributes on all LLMObs spans created while the returned AnnotationContext is active.
@@ -809,7 +809,7 @@ def retrieval(
     def annotate(
         cls,
         span: Optional[Span] = None,
-        prompt: Optional[dict] = None,
+        prompt: Optional[Prompt] = None,
         input_data: Optional[Any] = None,
         output_data: Optional[Any] = None,
         metadata: Optional[Dict[str, Any]] = None,
@@ -823,15 +823,8 @@ def annotate(
 
         :param Span span: Span to annotate. If no span is provided, the current active span will be used.
                           Must be an LLMObs-type span, i.e. generated by the LLMObs SDK.
-        :param prompt: A dictionary that represents the prompt used for an LLM call in the following form:
-                        `{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "...", ...}}`.
-                        Can also be set using the `ddtrace.llmobs.utils.Prompt` constructor class.
-                        - This argument is only applicable to LLM spans.
-                        - The dictionary may contain two optional keys relevant to RAG applications:
-                            `rag_context_variables` - a list of variable key names that contain ground
-                                                        truth context information
-                            `rag_query_variables` - a list of variable key names that contains query
-                                                        information for an LLM call
+        :param prompt: An instance of the `ddtrace.llmobs.utils.Prompt` class that represents the prompt used for an LLM call.
+            - This argument is only applicable to LLM spans.
         :param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind:
                            - llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."},
                                         or a list of dictionaries with the same signature.
@@ -883,8 +876,12 @@ def annotate(
             span.name = _name
         if prompt is not None:
             try:
-                validated_prompt = validate_prompt(prompt)
-                cls._set_dict_attribute(span, INPUT_PROMPT, validated_prompt)
+                ml_app = _get_ml_app(span)
+                if ml_app is not None:
+                    prompt.regenerate_ids(ml_app)
+                prompt.validate()
+                dict_prompt = prompt.to_dict()
+                cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt)
             except TypeError:
                 log.warning("Failed to validate prompt with error: ", exc_info=True)
         if not span_kind:
diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py
index f178582f518..92a54bba54d 100644
--- a/ddtrace/llmobs/_utils.py
+++ b/ddtrace/llmobs/_utils.py
@@ -20,59 +20,12 @@
 from ddtrace.llmobs._constants import OPENAI_APM_SPAN_NAME
 from ddtrace.llmobs._constants import SESSION_ID
 from ddtrace.llmobs._constants import VERTEXAI_APM_SPAN_NAME
+from ddtrace.llmobs.utils import Prompt
 from ddtrace.trace import Span
 
 
 log = get_logger(__name__)
 
-
-def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict, List[str]]]:
-    validated_prompt = {}  # type: Dict[str, Union[str, dict, List[str]]]
-    if not isinstance(prompt, dict):
-        raise TypeError("Prompt must be a dictionary")
-    variables = prompt.get("variables")
-    template = prompt.get("template")
-    version = prompt.get("version")
-    prompt_id = prompt.get("id")
-    ctx_variable_keys = prompt.get("rag_context_variables")
-    rag_query_variable_keys = prompt.get("rag_query_variables")
-    if variables is not None:
-        if not isinstance(variables, dict):
-            raise TypeError("Prompt variables must be a dictionary.")
-        if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()):
-            raise TypeError("Prompt variable keys and values must be strings.")
-        validated_prompt["variables"] = variables
-    if template is not None:
-        if not isinstance(template, str):
-            raise TypeError("Prompt template must be a string")
-        validated_prompt["template"] = template
-    if version is not None:
-        if not isinstance(version, str):
-            raise TypeError("Prompt version must be a string.")
-        validated_prompt["version"] = version
-    if prompt_id is not None:
-        if not isinstance(prompt_id, str):
-            raise TypeError("Prompt id must be a string.")
-        validated_prompt["id"] = prompt_id
-    if ctx_variable_keys is not None:
-        if not isinstance(ctx_variable_keys, list):
-            raise TypeError("Prompt field `context_variable_keys` must be a list of strings.")
-        if not all(isinstance(k, str) for k in ctx_variable_keys):
-            raise TypeError("Prompt field `context_variable_keys` must be a list of strings.")
-        validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ctx_variable_keys
-    else:
-        validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ["context"]
-    if rag_query_variable_keys is not None:
-        if not isinstance(rag_query_variable_keys, list):
-            raise TypeError("Prompt field `rag_query_variables` must be a list of strings.")
-        if not all(isinstance(k, str) for k in rag_query_variable_keys):
-            raise TypeError("Prompt field `rag_query_variables` must be a list of strings.")
-        validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = rag_query_variable_keys
-    else:
-        validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = ["question"]
-    return validated_prompt
-
-
 class LinkTracker:
     def __init__(self, object_span_links=None):
         self._object_span_links = object_span_links or {}
@@ -185,7 +138,7 @@ def _get_session_id(span: Span) -> Optional[str]:
 def _inject_llmobs_parent_id(span_context):
     """Inject the LLMObs parent ID into the span context for reconnecting distributed LLMObs traces."""
     span = ddtrace.tracer.current_span()
-    
+
     if span is None:
         log.warning("No active span to inject LLMObs parent ID info.")
         return
diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index dac1f3149c9..c3861148bf3 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -1,8 +1,8 @@
-from typing import Dict
+from re import match
+from typing import Dict, Tuple, Optional
 from typing import List
 from typing import Union
 
-
 # TypedDict was added to typing in python 3.8
 try:
     from typing import TypedDict  # noqa:F401
@@ -19,20 +19,159 @@
 ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str})
 Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False)
 Message = TypedDict("Message", {"content": str, "role": str}, total=False)
-Prompt = TypedDict(
-    "Prompt",
-    {
-        "variables": Dict[str, str],
-        "template": str,
-        "id": str,
-        "version": str,
-        "rag_context_variables": List[
-            str
-        ],  # a list of variable key names that contain ground truth context information
-        "rag_query_variables": List[str],  # a list of variable key names that contains query information
-    },
-    total=False,
-)
+
+class Prompt:
+    """
+    Represents a prompt used for an LLM call.
+
+    Attributes:
+        name (str): The name of the prompt.
+        version (str): The version of the prompt.
+        prompt_template_id (int): A hash of name and ml_app, used to identify the prompt template.
+        prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance.
+        template (Union[List[Tuple[str, str]], str]): The template used for the prompt, which can be a list of tuples or a string.
+        variables (Dict[str, str]): A dictionary of variables used in the prompt.
+        example_variables (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt.
+        constraint_variables (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed.
+        rag_context_variables (List[str]): A list of variable key names that contain ground truth context information.
+        rag_query_variables (List[str]): A list of variable key names that contain query information for an LLM call.
+    """
+    name: str
+    version: Optional[str]
+    prompt_template_id: int
+    prompt_instance_id: int
+    template: Optional[List[Tuple[str, str]]]
+    variables: Optional[Dict[str, str]]
+    example_variables: Optional[List[str]]
+    constraint_variables: Optional[List[str]]
+    rag_context_variables: Optional[List[str]]
+    rag_query_variables: Optional[List[str]]
+
+    def __init__(self,
+                 name,
+                 version = "1.0.0",
+                 template = None,
+                 variables = None,
+                 example_variables = None,
+                 constraint_variables = None,
+                 rag_context_variables = None,
+                 rag_query_variables = None):
+
+        if name is None:
+            raise TypeError("Prompt name of type String is mandatory.")
+
+        self.name = name
+
+        # Default values
+        template = template or []
+        variables = variables or {}
+        example_variables = example_variables or []
+        constraint_variables = constraint_variables or []
+        rag_context_variables = rag_context_variables or ["context"]
+        rag_query_variables = rag_query_variables or ["question"]
+        version = version or "1.0.0"
+
+        if version is not None:
+            # Add minor and patch version if not present
+            version_parts = (version.split(".") + ["0", "0"])[:3]
+            version = ".".join(version_parts)
+
+        # Accept simple string templates
+        if isinstance(template, str):
+            template = [("user", template)]
+
+        self.prompt_template_id = hash(name)
+        self.prompt_instance_id = hash(
+            (name, version, tuple(template), tuple(variables.keys()), tuple(variables.values()),
+             tuple(example_variables), tuple(constraint_variables),
+             tuple(rag_context_variables), tuple(rag_query_variables)))
+
+        self.version = version
+        self.template = template
+        self.variables = variables
+        self.example_variables = example_variables
+        self.constraint_variables = constraint_variables
+        self.rag_context_variables = rag_context_variables
+        self.rag_query_variables = rag_query_variables
+
+    def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+        return {
+            "name": self.name,
+            "version": self.version,
+            "prompt_template_id": self.prompt_template_id,
+            "prompt_instance_id": self.prompt_instance_id,
+            "template": self.template,
+            "variables": self.variables,
+            "example_variables": self.example_variables,
+            "constraint_variables": self.constraint_variables,
+            "rag_context_variables": self.rag_context_variables,
+            "rag_query_variables": self.rag_query_variables,
+        }
+
+    def regenerate_ids(self, ml_app: str):
+        self.prompt_instance_id = hash((ml_app, self.name, self.version, tuple(self.template), tuple(self.variables.keys()), tuple(self.variables.values()), tuple(self.example_variables), tuple(self.constraint_variables), tuple(self.rag_context_variables), tuple(self.rag_query_variables)))
+        self.prompt_template_id = hash((ml_app, self.name))
+        pass
+
+    def validate(self):
+        errors = []
+
+        name = self.name
+        version = self.version
+        template = self.template
+        variables = self.variables
+        example_variables = self.example_variables
+        constraint_variables = self.constraint_variables
+        rag_context_variables = self.rag_context_variables
+        rag_query_variables = self.rag_query_variables
+
+        if name is None:
+            errors.append("Prompt name of type String is mandatory.")
+        elif not isinstance(name, str):
+            errors.append("Prompt name must be a string.")
+
+        if version is not None:
+            # Add minor and patch version if not present
+            version_parts = (version.split(".") + ["0", "0"])[:3]
+            version = ".".join(version_parts)
+            # Official semver regex from https://semver.org/
+            semver_regex = (
+                r'^(?P<major>0|[1-9]\d*)\.'
+                r'(?P<minor>0|[1-9]\d*)\.'
+                r'(?P<patch>0|[1-9]\d*)'
+                r'(?:-(?P<prerelease>(?:0|[1-9]\d*|\d*[a-zA-Z-]'
+                r'[0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-]'
+                r'[0-9a-zA-Z-]*))*))?'
+                r'(?:\+(?P<buildmetadata>[0-9a-zA-Z-]+'
+                r'(?:\.[0-9a-zA-Z-]+)*))?$'
+            )
+            if not bool(match(semver_regex, version)):
+                errors.append(
+                    "Prompt version must be semver compatible. Please check https://semver.org/ for more information.")
+
+        # Accept simple string templates
+        if isinstance(template, str):
+            template = [("user", template)]
+
+        # validate template
+        if not (isinstance(template, list) and all(isinstance(t, tuple) for t in template)):
+            errors.append("Prompt template must be a list of tuples.")
+        if not all(len(t) == 2 for t in template):
+            errors.append("Prompt template tuples must have exactly two elements.")
+        if not all(isinstance(item[0], str) and isinstance(item[1], str) for item in template):
+            errors.append("Prompt template tuple elements must be strings.")
+
+        if not isinstance(variables, dict):
+            errors.append("Prompt variables must be a dictionary.")
+        if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()):
+            errors.append("Prompt variable keys and values must be strings.")
+
+        for var_list in [example_variables, constraint_variables, rag_context_variables, rag_query_variables]:
+            if not all(isinstance(var, str) for var in var_list):
+                errors.append("All variable lists must contain strings only.")
+
+        if errors:
+            raise TypeError("\n".join(errors))
 
 
 class Messages:

From d0845acfa3a210469e774e1bb8e12fbbab02860f Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Wed, 19 Feb 2025 20:47:18 +0100
Subject: [PATCH 02/11] Switch to sha-1 hash and only generate ids before
 submission to llmobs

---
 ddtrace/llmobs/_llmobs.py |  4 +--
 ddtrace/llmobs/utils.py   | 54 ++++++++++++++++++++++++++++-----------
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index f7094114e8d..95f3cb5dee5 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -876,9 +876,7 @@ def annotate(
             span.name = _name
         if prompt is not None:
             try:
-                ml_app = _get_ml_app(span)
-                if ml_app is not None:
-                    prompt.regenerate_ids(ml_app)
+                prompt.generate_ids(_get_ml_app(span) or "")
                 prompt.validate()
                 dict_prompt = prompt.to_dict()
                 cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt)
diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index c3861148bf3..673d3be6307 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -1,4 +1,5 @@
 from re import match
+from hashlib import sha1
 from typing import Dict, Tuple, Optional
 from typing import List
 from typing import Union
@@ -38,8 +39,8 @@ class Prompt:
     """
     name: str
     version: Optional[str]
-    prompt_template_id: int
-    prompt_instance_id: int
+    prompt_template_id: str
+    prompt_instance_id: str
     template: Optional[List[Tuple[str, str]]]
     variables: Optional[Dict[str, str]]
     example_variables: Optional[List[str]]
@@ -65,8 +66,8 @@ def __init__(self,
         # Default values
         template = template or []
         variables = variables or {}
-        example_variables = example_variables or []
-        constraint_variables = constraint_variables or []
+        example_variables = example_variables or ["example"]
+        constraint_variables = constraint_variables or ["constraint"]
         rag_context_variables = rag_context_variables or ["context"]
         rag_query_variables = rag_query_variables or ["question"]
         version = version or "1.0.0"
@@ -80,12 +81,6 @@ def __init__(self,
         if isinstance(template, str):
             template = [("user", template)]
 
-        self.prompt_template_id = hash(name)
-        self.prompt_instance_id = hash(
-            (name, version, tuple(template), tuple(variables.keys()), tuple(variables.values()),
-             tuple(example_variables), tuple(constraint_variables),
-             tuple(rag_context_variables), tuple(rag_query_variables)))
-
         self.version = version
         self.template = template
         self.variables = variables
@@ -108,14 +103,31 @@ def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[T
             "rag_query_variables": self.rag_query_variables,
         }
 
-    def regenerate_ids(self, ml_app: str):
-        self.prompt_instance_id = hash((ml_app, self.name, self.version, tuple(self.template), tuple(self.variables.keys()), tuple(self.variables.values()), tuple(self.example_variables), tuple(self.constraint_variables), tuple(self.rag_context_variables), tuple(self.rag_query_variables)))
-        self.prompt_template_id = hash((ml_app, self.name))
-        pass
+    def generate_ids(self, ml_app=""):
+        """
+        Generates prompt_template_id and prompt_instance_id based on the prompt attributes.
+        The prompt_template_id is a sha-1 hash of the prompt name and ml_app
+        The prompt_instance_id is a sha-1 hash of all prompt attributes.
+        """
+        name = str(self.name)
+        version = str(self.version)
+        template = str(self.template)
+        variables = str(self.variables)
+        example_variables = str(self.example_variables)
+        constraint_variables = str(self.constraint_variables)
+        rag_context_variables = str(self.rag_context_variables)
+        rag_query_variables = str(self.rag_query_variables)
+
+        template_id_str = f"[{ml_app}]{name}"
+        instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variables}{constraint_variables}{rag_context_variables}{rag_query_variables}"
+
+        self.prompt_template_id = sha1(template_id_str.encode()).hexdigest()
+        self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest()
 
     def validate(self):
         errors = []
-
+        prompt_template_id = self.prompt_template_id
+        prompt_instance_id = self.prompt_instance_id
         name = self.name
         version = self.version
         template = self.template
@@ -125,6 +137,16 @@ def validate(self):
         rag_context_variables = self.rag_context_variables
         rag_query_variables = self.rag_query_variables
 
+
+        if prompt_template_id is None:
+            self.generate_ids()
+        elif not isinstance(prompt_template_id, str):
+            errors.append("Prompt template id must be a string.")
+        if prompt_instance_id is None:
+            self.generate_ids()
+        elif not isinstance(prompt_instance_id, str):
+            errors.append("Prompt instance id must be a string.")
+
         if name is None:
             errors.append("Prompt name of type String is mandatory.")
         elif not isinstance(name, str):
@@ -173,6 +195,8 @@ def validate(self):
         if errors:
             raise TypeError("\n".join(errors))
 
+        return errors
+
 
 class Messages:
     def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]):

From f3c38e1fceb4aa05495c15bb084a235294183ed2 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 11:54:22 +0100
Subject: [PATCH 03/11] rename vars

---
 ddtrace/llmobs/utils.py | 52 +++++++++++++++++++++++------------------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index 673d3be6307..a8c87678737 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -3,6 +3,8 @@
 from typing import Dict, Tuple, Optional
 from typing import List
 from typing import Union
+from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS
+from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS
 
 # TypedDict was added to typing in python 3.8
 try:
@@ -43,20 +45,20 @@ class Prompt:
     prompt_instance_id: str
     template: Optional[List[Tuple[str, str]]]
     variables: Optional[Dict[str, str]]
-    example_variables: Optional[List[str]]
-    constraint_variables: Optional[List[str]]
-    rag_context_variables: Optional[List[str]]
-    rag_query_variables: Optional[List[str]]
+    example_variable_keys: Optional[List[str]]
+    constraint_variable_keys: Optional[List[str]]
+    rag_context_variable_keys: Optional[List[str]]
+    rag_query_variable_keys: Optional[List[str]]
 
     def __init__(self,
                  name,
                  version = "1.0.0",
                  template = None,
                  variables = None,
-                 example_variables = None,
-                 constraint_variables = None,
-                 rag_context_variables = None,
-                 rag_query_variables = None):
+                 example_variable_keys = None,
+                 constraint_variable_keys = None,
+                 rag_context_variable_keys = None,
+                 rag_query_variable_keys = None):
 
         if name is None:
             raise TypeError("Prompt name of type String is mandatory.")
@@ -70,6 +72,10 @@ def __init__(self,
         constraint_variables = constraint_variables or ["constraint"]
         rag_context_variables = rag_context_variables or ["context"]
         rag_query_variables = rag_query_variables or ["question"]
+        example_variable_keys = example_variable_keys or ["example"]
+        constraint_variable_keys = constraint_variable_keys or ["constraint"]
+        rag_context_variable_keys = rag_context_variable_keys or ["context"]
+        rag_query_variable_keys = rag_query_variable_keys or ["question"]
         version = version or "1.0.0"
 
         if version is not None:
@@ -89,7 +95,7 @@ def __init__(self,
         self.rag_context_variables = rag_context_variables
         self.rag_query_variables = rag_query_variables
 
-    def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+    def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
         return {
             "name": self.name,
             "version": self.version,
@@ -97,10 +103,10 @@ def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[T
             "prompt_instance_id": self.prompt_instance_id,
             "template": self.template,
             "variables": self.variables,
-            "example_variables": self.example_variables,
-            "constraint_variables": self.constraint_variables,
-            "rag_context_variables": self.rag_context_variables,
-            "rag_query_variables": self.rag_query_variables,
+            "example_variable_keys": self.example_variable_keys,
+            "constraint_variable_keys": self.constraint_variable_keys,
+            INTERNAL_CONTEXT_VARIABLE_KEYS: self.rag_context_variable_keys,
+            INTERNAL_QUERY_VARIABLE_KEYS: self.rag_query_variable_keys,
         }
 
     def generate_ids(self, ml_app=""):
@@ -113,13 +119,13 @@ def generate_ids(self, ml_app=""):
         version = str(self.version)
         template = str(self.template)
         variables = str(self.variables)
-        example_variables = str(self.example_variables)
-        constraint_variables = str(self.constraint_variables)
-        rag_context_variables = str(self.rag_context_variables)
-        rag_query_variables = str(self.rag_query_variables)
+        example_variable_keys = str(self.example_variable_keys)
+        constraint_variable_keys = str(self.constraint_variable_keys)
+        rag_context_variable_keys = str(self.rag_context_variable_keys)
+        rag_query_variable_keys = str(self.rag_query_variable_keys)
 
         template_id_str = f"[{ml_app}]{name}"
-        instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variables}{constraint_variables}{rag_context_variables}{rag_query_variables}"
+        instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variable_keys}{constraint_variable_keys}{rag_context_variable_keys}{rag_query_variable_keys}"
 
         self.prompt_template_id = sha1(template_id_str.encode()).hexdigest()
         self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest()
@@ -132,10 +138,10 @@ def validate(self):
         version = self.version
         template = self.template
         variables = self.variables
-        example_variables = self.example_variables
-        constraint_variables = self.constraint_variables
-        rag_context_variables = self.rag_context_variables
-        rag_query_variables = self.rag_query_variables
+        example_variable_keys = self.example_variable_keys
+        constraint_variable_keys = self.constraint_variable_keys
+        rag_context_variable_keys = self.rag_context_variable_keys
+        rag_query_variable_keys = self.rag_query_variable_keys
 
 
         if prompt_template_id is None:
@@ -188,7 +194,7 @@ def validate(self):
         if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()):
             errors.append("Prompt variable keys and values must be strings.")
 
-        for var_list in [example_variables, constraint_variables, rag_context_variables, rag_query_variables]:
+        for var_list in [example_variable_keys, constraint_variable_keys, rag_context_variable_keys, rag_query_variable_keys]:
             if not all(isinstance(var, str) for var in var_list):
                 errors.append("All variable lists must contain strings only.")
 

From c9e14f0fd4c9f9293928afbb46f243afd91f485b Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 11:54:32 +0100
Subject: [PATCH 04/11] rename vars

---
 ddtrace/llmobs/utils.py | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index a8c87678737..ccaa1efcad0 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -34,10 +34,10 @@ class Prompt:
         prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance.
         template (Union[List[Tuple[str, str]], str]): The template used for the prompt, which can be a list of tuples or a string.
         variables (Dict[str, str]): A dictionary of variables used in the prompt.
-        example_variables (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt.
-        constraint_variables (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed.
-        rag_context_variables (List[str]): A list of variable key names that contain ground truth context information.
-        rag_query_variables (List[str]): A list of variable key names that contain query information for an LLM call.
+        example_variable_keys (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt.
+        constraint_variable_keys (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed.
+        rag_context_variable_keys (List[str]): A list of variable key names that contain ground truth context information.
+        rag_query_variable_keys (List[str]): A list of variable key names that contain query information for an LLM call.
     """
     name: str
     version: Optional[str]
@@ -68,10 +68,7 @@ def __init__(self,
         # Default values
         template = template or []
         variables = variables or {}
-        example_variables = example_variables or ["example"]
-        constraint_variables = constraint_variables or ["constraint"]
-        rag_context_variables = rag_context_variables or ["context"]
-        rag_query_variables = rag_query_variables or ["question"]
+        # TODO remove default keys when not in variables
         example_variable_keys = example_variable_keys or ["example"]
         constraint_variable_keys = constraint_variable_keys or ["constraint"]
         rag_context_variable_keys = rag_context_variable_keys or ["context"]

From 1f19033a8a57f13dc7b6754afe5df56763e3642e Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 11:54:45 +0100
Subject: [PATCH 05/11] rename vars

---
 ddtrace/llmobs/utils.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index ccaa1efcad0..6d34116e456 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -87,10 +87,10 @@ def __init__(self,
         self.version = version
         self.template = template
         self.variables = variables
-        self.example_variables = example_variables
-        self.constraint_variables = constraint_variables
-        self.rag_context_variables = rag_context_variables
-        self.rag_query_variables = rag_query_variables
+        self.example_variable_keys = example_variable_keys
+        self.constraint_variable_keys = constraint_variable_keys
+        self.rag_context_variable_keys = rag_context_variable_keys
+        self.rag_query_variable_keys = rag_query_variable_keys
 
     def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
         return {

From a553ff6414e4b71e43b2de7cdb7b671b61bc5165 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 13:42:32 +0100
Subject: [PATCH 06/11] rename method

---
 ddtrace/llmobs/_llmobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 95f3cb5dee5..d3ae5a85b65 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -878,7 +878,7 @@ def annotate(
             try:
                 prompt.generate_ids(_get_ml_app(span) or "")
                 prompt.validate()
-                dict_prompt = prompt.to_dict()
+                dict_prompt = prompt.to_tags_dict()
                 cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt)
             except TypeError:
                 log.warning("Failed to validate prompt with error: ", exc_info=True)

From 89ba50ec2ce3ee78c96ed04266f8e2ea2062bdd8 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 14:35:14 +0100
Subject: [PATCH 07/11] Refactor quality of usage

---
 ddtrace/llmobs/_llmobs.py |  4 +-
 ddtrace/llmobs/utils.py   | 87 ++++++++++++++++++++++++++++-----------
 2 files changed, 64 insertions(+), 27 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index d3ae5a85b65..b2eb36c7ba2 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -876,9 +876,7 @@ def annotate(
             span.name = _name
         if prompt is not None:
             try:
-                prompt.generate_ids(_get_ml_app(span) or "")
-                prompt.validate()
-                dict_prompt = prompt.to_tags_dict()
+                dict_prompt = prompt.prepare_prompt(ml_app=_get_ml_app(span) or "")
                 cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt)
             except TypeError:
                 log.warning("Failed to validate prompt with error: ", exc_info=True)
diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index 6d34116e456..3c35bb8eb79 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -1,7 +1,10 @@
 from re import match
 from hashlib import sha1
-from typing import Dict, Tuple, Optional
+from typing import Any
+from typing import Dict
 from typing import List
+from typing import Optional
+from typing import Tuple
 from typing import Union
 from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS
 from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS
@@ -29,6 +32,7 @@ class Prompt:
 
     Attributes:
         name (str): The name of the prompt.
+        ml_app (str): The name of the service, retrieved from the active span when not specified.
         version (str): The version of the prompt.
         prompt_template_id (int): A hash of name and ml_app, used to identify the prompt template.
         prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance.
@@ -44,11 +48,12 @@ class Prompt:
     prompt_template_id: str
     prompt_instance_id: str
     template: Optional[List[Tuple[str, str]]]
-    variables: Optional[Dict[str, str]]
+    variables: Optional[Dict[str, Any]]
     example_variable_keys: Optional[List[str]]
     constraint_variable_keys: Optional[List[str]]
     rag_context_variable_keys: Optional[List[str]]
     rag_query_variable_keys: Optional[List[str]]
+    ml_app: str
 
     def __init__(self,
                  name,
@@ -58,7 +63,8 @@ def __init__(self,
                  example_variable_keys = None,
                  constraint_variable_keys = None,
                  rag_context_variable_keys = None,
-                 rag_query_variable_keys = None):
+                 rag_query_variable_keys = None,
+                 ml_app=""):
 
         if name is None:
             raise TypeError("Prompt name of type String is mandatory.")
@@ -68,9 +74,8 @@ def __init__(self,
         # Default values
         template = template or []
         variables = variables or {}
-        # TODO remove default keys when not in variables
-        example_variable_keys = example_variable_keys or ["example"]
-        constraint_variable_keys = constraint_variable_keys or ["constraint"]
+        example_variable_keys = example_variable_keys or ["example", "examples"]
+        constraint_variable_keys = constraint_variable_keys or ["constraint", "constraints"]
         rag_context_variable_keys = rag_context_variable_keys or ["context"]
         rag_query_variable_keys = rag_query_variable_keys or ["question"]
         version = version or "1.0.0"
@@ -80,10 +85,11 @@ def __init__(self,
             version_parts = (version.split(".") + ["0", "0"])[:3]
             version = ".".join(version_parts)
 
-        # Accept simple string templates
+        # Accept simple string templates as user role
         if isinstance(template, str):
             template = [("user", template)]
 
+        self.ml_app = ml_app
         self.version = version
         self.template = template
         self.variables = variables
@@ -92,27 +98,14 @@ def __init__(self,
         self.rag_context_variable_keys = rag_context_variable_keys
         self.rag_query_variable_keys = rag_query_variable_keys
 
-    def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
-        return {
-            "name": self.name,
-            "version": self.version,
-            "prompt_template_id": self.prompt_template_id,
-            "prompt_instance_id": self.prompt_instance_id,
-            "template": self.template,
-            "variables": self.variables,
-            "example_variable_keys": self.example_variable_keys,
-            "constraint_variable_keys": self.constraint_variable_keys,
-            INTERNAL_CONTEXT_VARIABLE_KEYS: self.rag_context_variable_keys,
-            INTERNAL_QUERY_VARIABLE_KEYS: self.rag_query_variable_keys,
-        }
-
-    def generate_ids(self, ml_app=""):
+    def generate_ids(self):
         """
         Generates prompt_template_id and prompt_instance_id based on the prompt attributes.
         The prompt_template_id is a sha-1 hash of the prompt name and ml_app
         The prompt_instance_id is a sha-1 hash of all prompt attributes.
         """
         name = str(self.name)
+        ml_app = str(self.ml_app)
         version = str(self.version)
         template = str(self.template)
         variables = str(self.variables)
@@ -188,8 +181,8 @@ def validate(self):
 
         if not isinstance(variables, dict):
             errors.append("Prompt variables must be a dictionary.")
-        if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()):
-            errors.append("Prompt variable keys and values must be strings.")
+        if not all(isinstance(k, str) for k in variables):
+            errors.append("Prompt variable keys must be strings.")
 
         for var_list in [example_variable_keys, constraint_variable_keys, rag_context_variable_keys, rag_query_variable_keys]:
             if not all(isinstance(var, str) for var in var_list):
@@ -200,6 +193,52 @@ def validate(self):
 
         return errors
 
+    def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+        name = self.name
+        version = self.version
+        prompt_template_id = self.prompt_template_id
+        prompt_instance_id = self.prompt_instance_id
+        template = self.template
+        variables = self.variables
+        example_variable_keys = self.example_variable_keys
+        constraint_variable_keys = self.constraint_variable_keys
+        rag_context_variable_keys = self.rag_context_variable_keys
+        rag_query_variable_keys = self.rag_query_variable_keys
+
+        # Clean up keys and remove those that are not in variables, including default keys.
+        example_variable_keys_set = {key for key in example_variable_keys if key in variables}
+        constraint_variable_keys_set = {key for key in constraint_variable_keys if key in variables}
+        rag_context_variable_keys_set = {key for key in rag_context_variable_keys if key in variables}
+        rag_query_variable_keys_set = {key for key in rag_query_variable_keys if key in variables}
+
+        return {
+            "name": name,
+            "version": version,
+            "prompt_template_id": prompt_template_id,
+            "prompt_instance_id": prompt_instance_id,
+            "template": template,
+            "variables": variables,
+            "example_variable_keys": example_variable_keys_set,
+            "constraint_variable_keys": constraint_variable_keys_set,
+            "rag_context_variable_keys": rag_context_variable_keys_set,
+            "rag_query_variable_keys": rag_query_variable_keys_set,
+            # also using internal constants to keep hallucination functionality
+            INTERNAL_CONTEXT_VARIABLE_KEYS: rag_context_variable_keys_set,
+            INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set,
+        }
+
+    def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+        if ml_app:
+            self.ml_app = ml_app
+        self.validate()
+        return self.to_tags_dict()
+
+    def __setattr__(self, name, value):
+        super().__setattr__(name, value)
+        self.generate_ids()
+
+
+
 
 class Messages:
     def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]):

From 83236bd29b7f22fe844fb51861ff66d0ab2a04f4 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 15:28:26 +0100
Subject: [PATCH 08/11] remove redundant tags

---
 ddtrace/llmobs/utils.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index 3c35bb8eb79..0e1b797951f 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -220,8 +220,6 @@ def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[T
             "variables": variables,
             "example_variable_keys": example_variable_keys_set,
             "constraint_variable_keys": constraint_variable_keys_set,
-            "rag_context_variable_keys": rag_context_variable_keys_set,
-            "rag_query_variable_keys": rag_query_variable_keys_set,
             # also using internal constants to keep hallucination functionality
             INTERNAL_CONTEXT_VARIABLE_KEYS: rag_context_variable_keys_set,
             INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set,

From 81dc59a4ba7c2f2a942a6f65333430eb62d26700 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 16:45:14 +0100
Subject: [PATCH 09/11] update dict signature

---
 ddtrace/llmobs/utils.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index 0e1b797951f..75a544119c0 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -4,6 +4,7 @@
 from typing import Dict
 from typing import List
 from typing import Optional
+from typing import Set
 from typing import Tuple
 from typing import Union
 from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS
@@ -193,7 +194,7 @@ def validate(self):
 
         return errors
 
-    def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+    def to_tags_dict(self) -> Dict[str, Union[str, Set[str], Dict[str, str], List[Tuple[str, str]]]]:
         name = self.name
         version = self.version
         prompt_template_id = self.prompt_template_id
@@ -225,7 +226,7 @@ def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[T
             INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set,
         }
 
-    def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]:
+    def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]:
         if ml_app:
             self.ml_app = ml_app
         self.validate()

From 430024d66e5aea9a538fb11905122d21c60ced6a Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Tue, 25 Feb 2025 17:29:43 +0100
Subject: [PATCH 10/11] dynamic id generation in case prompt object is updated

---
 ddtrace/llmobs/_llmobs.py | 21 ++++++++++++++++++++-
 ddtrace/llmobs/utils.py   | 37 ++++++++++++++++++++++++-------------
 2 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index b2eb36c7ba2..fd5d3967678 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -1,7 +1,7 @@
 import json
 import os
 import time
-from typing import Any
+from typing import Any, Tuple
 from typing import Dict
 from typing import List
 from typing import Optional
@@ -805,6 +805,25 @@ def retrieval(
             log.warning(SPAN_START_WHILE_DISABLED_WARNING)
         return cls._instance._start_span("retrieval", name=name, session_id=session_id, ml_app=ml_app)
 
+    @classmethod
+    def prompt_context(cls,
+        name: str,
+        version: Optional[str]="1.0.0",
+        template: Optional[List[Tuple[str, str]]]=None,
+        variables: Optional[Dict[str, Any]]=None,
+        example_variable_keys: Optional[List[str]]=None,
+        constraint_variable_keys: Optional[List[str]]=None,
+        rag_context_variable_keys: Optional[List[str]]=None,
+        rag_query_variable_keys: Optional[List[str]]=None,
+        ml_app: str="") -> AnnotationContext:
+        """
+        shortcut to create a prompt object and annotate it
+        """
+        # TODO try to check for if the prompt already exists within the span and update it
+        prompt = Prompt(name, version, template, variables, example_variable_keys, constraint_variable_keys,
+                        rag_context_variable_keys, rag_query_variable_keys, ml_app)
+        return cls.annotation_context(prompt=prompt)
+
     @classmethod
     def annotate(
         cls,
diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py
index 75a544119c0..231c9cb752f 100644
--- a/ddtrace/llmobs/utils.py
+++ b/ddtrace/llmobs/utils.py
@@ -67,10 +67,12 @@ def __init__(self,
                  rag_query_variable_keys = None,
                  ml_app=""):
 
+        self.__dict__["_is_initialized"] = False
+
         if name is None:
             raise TypeError("Prompt name of type String is mandatory.")
 
-        self.name = name
+        self.__dict__["name"] = name
 
         # Default values
         template = template or []
@@ -90,14 +92,17 @@ def __init__(self,
         if isinstance(template, str):
             template = [("user", template)]
 
-        self.ml_app = ml_app
-        self.version = version
-        self.template = template
-        self.variables = variables
-        self.example_variable_keys = example_variable_keys
-        self.constraint_variable_keys = constraint_variable_keys
-        self.rag_context_variable_keys = rag_context_variable_keys
-        self.rag_query_variable_keys = rag_query_variable_keys
+        self.__dict__["ml_app"] = ml_app
+        self.__dict__["version"] = version
+        self.__dict__["template"] = template
+        self.__dict__["variables"] = variables
+        self.__dict__["example_variable_keys"] = example_variable_keys
+        self.__dict__["constraint_variable_keys"] = constraint_variable_keys
+        self.__dict__["rag_context_variable_keys"] = rag_context_variable_keys
+        self.__dict__["rag_query_variable_keys"] = rag_query_variable_keys
+
+        # Unlocks the id regeneration at each setattr call
+        self.__dict__["_is_initialized"] = True
 
     def generate_ids(self):
         """
@@ -118,8 +123,8 @@ def generate_ids(self):
         template_id_str = f"[{ml_app}]{name}"
         instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variable_keys}{constraint_variable_keys}{rag_context_variable_keys}{rag_query_variable_keys}"
 
-        self.prompt_template_id = sha1(template_id_str.encode()).hexdigest()
-        self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest()
+        self.__dict__["prompt_template_id"] = sha1(template_id_str.encode()).hexdigest()
+        self.__dict__["prompt_instance_id"] = sha1(instance_id_str.encode()).hexdigest()
 
     def validate(self):
         errors = []
@@ -228,13 +233,19 @@ def to_tags_dict(self) -> Dict[str, Union[str, Set[str], Dict[str, str], List[Tu
 
     def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]:
         if ml_app:
-            self.ml_app = ml_app
+            # regenerate ids if ml_app is changed
+            self.__dict__["ml_app"] = ml_app
+        self.generate_ids()
         self.validate()
         return self.to_tags_dict()
 
     def __setattr__(self, name, value):
+        """
+        Overrides Set attribute value to regenerate prompt ids if attributes change.
+        """
         super().__setattr__(name, value)
-        self.generate_ids()
+        if self.__dict__.get("_is_initialized"):
+            self.generate_ids()
 
 
 

From 2f0801d02228548b0cc22ee2bcf47accd4e4c535 Mon Sep 17 00:00:00 2001
From: yahya-mouman <yahya.mouman@datadoghq.com>
Date: Wed, 26 Feb 2025 15:35:04 +0100
Subject: [PATCH 11/11] Add support for adding prompts into experiments

---
 ddtrace/llmobs/experimentation/_experiments.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ddtrace/llmobs/experimentation/_experiments.py b/ddtrace/llmobs/experimentation/_experiments.py
index 590d3caae4a..b2c5091abf5 100644
--- a/ddtrace/llmobs/experimentation/_experiments.py
+++ b/ddtrace/llmobs/experimentation/_experiments.py
@@ -13,6 +13,7 @@
 
 from .._utils import HTTPResponse
 from .._utils import http_request
+from ..utils import Prompt
 
 from ..decorators import agent
 from .._llmobs import LLMObs  
@@ -624,6 +625,7 @@ class Experiment:
         name (str): Name of the experiment
         task (Callable): Function that processes each dataset record
         dataset (Dataset): Dataset to run the experiment on
+        prompt (Prompt): Prompt template for the experiment
         evaluators (List[Callable]): Functions that evaluate task outputs
         tags (List[str]): Tags for organizing experiments
         description (str): Description of the experiment
@@ -640,6 +642,7 @@ def __init__(
         name: str,
         task: Callable,
         dataset: Dataset,
+        prompt: Prompt,
         evaluators: List[Callable],
         tags: List[str] = [],
         description: str = "",
@@ -649,6 +652,7 @@ def __init__(
         self.name = name
         self.task = task
         self.dataset = dataset
+        self.prompt = prompt
         self.evaluators = evaluators
         self.tags = tags
         self.project_name = ENV_PROJECT_NAME
@@ -997,6 +1001,7 @@ def process_row(idx_row):
 
                     LLMObs.annotate(
                         span,
+                        prompt=self.prompt,
                         input_data=input_data,
                         output_data=output,
                         tags={
@@ -1033,6 +1038,7 @@ def process_row(idx_row):
                     LLMObs.annotate(
                         span,
                         input_data=input_data,
+                        prompt=self.prompt,
                         tags={
                             "dataset_id": self.dataset._datadog_dataset_id,
                             "dataset_record_id": row["record_id"],