From dc014efe4132a9752212eda2bcd74f067289850d Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Wed, 19 Feb 2025 18:22:17 +0100 Subject: [PATCH 01/11] Added Prompt class and updated submission logic --- ddtrace/llmobs/_llmobs.py | 25 +++--- ddtrace/llmobs/_utils.py | 51 +----------- ddtrace/llmobs/utils.py | 171 ++++++++++++++++++++++++++++++++++---- 3 files changed, 168 insertions(+), 79 deletions(-) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 58b233f00a2..f7094114e8d 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -66,10 +66,10 @@ from ddtrace.llmobs._utils import _get_span_name from ddtrace.llmobs._utils import _is_evaluation_span from ddtrace.llmobs._utils import safe_json -from ddtrace.llmobs._utils import validate_prompt from ddtrace.llmobs._writer import LLMObsEvalMetricWriter from ddtrace.llmobs._writer import LLMObsSpanWriter from ddtrace.llmobs.utils import Documents +from ddtrace.llmobs.utils import Prompt from ddtrace.llmobs.utils import ExportedLLMObsSpan from ddtrace.llmobs.utils import Messages from ddtrace.propagation.http import HTTPPropagator @@ -463,7 +463,7 @@ def _tag_span_links(self, span, span_links): @classmethod def annotation_context( - cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[dict] = None, name: Optional[str] = None + cls, tags: Optional[Dict[str, Any]] = None, prompt: Optional[Prompt] = None, name: Optional[str] = None ) -> AnnotationContext: """ Sets specified attributes on all LLMObs spans created while the returned AnnotationContext is active. @@ -809,7 +809,7 @@ def retrieval( def annotate( cls, span: Optional[Span] = None, - prompt: Optional[dict] = None, + prompt: Optional[Prompt] = None, input_data: Optional[Any] = None, output_data: Optional[Any] = None, metadata: Optional[Dict[str, Any]] = None, @@ -823,15 +823,8 @@ def annotate( :param Span span: Span to annotate. If no span is provided, the current active span will be used. Must be an LLMObs-type span, i.e. generated by the LLMObs SDK. - :param prompt: A dictionary that represents the prompt used for an LLM call in the following form: - `{"template": "...", "id": "...", "version": "...", "variables": {"variable_1": "...", ...}}`. - Can also be set using the `ddtrace.llmobs.utils.Prompt` constructor class. - - This argument is only applicable to LLM spans. - - The dictionary may contain two optional keys relevant to RAG applications: - `rag_context_variables` - a list of variable key names that contain ground - truth context information - `rag_query_variables` - a list of variable key names that contains query - information for an LLM call + :param prompt: An instance of the `ddtrace.llmobs.utils.Prompt` class that represents the prompt used for an LLM call. + - This argument is only applicable to LLM spans. :param input_data: A single input string, dictionary, or a list of dictionaries based on the span kind: - llm spans: accepts a string, or a dictionary of form {"content": "...", "role": "..."}, or a list of dictionaries with the same signature. @@ -883,8 +876,12 @@ def annotate( span.name = _name if prompt is not None: try: - validated_prompt = validate_prompt(prompt) - cls._set_dict_attribute(span, INPUT_PROMPT, validated_prompt) + ml_app = _get_ml_app(span) + if ml_app is not None: + prompt.regenerate_ids(ml_app) + prompt.validate() + dict_prompt = prompt.to_dict() + cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt) except TypeError: log.warning("Failed to validate prompt with error: ", exc_info=True) if not span_kind: diff --git a/ddtrace/llmobs/_utils.py b/ddtrace/llmobs/_utils.py index f178582f518..92a54bba54d 100644 --- a/ddtrace/llmobs/_utils.py +++ b/ddtrace/llmobs/_utils.py @@ -20,59 +20,12 @@ from ddtrace.llmobs._constants import OPENAI_APM_SPAN_NAME from ddtrace.llmobs._constants import SESSION_ID from ddtrace.llmobs._constants import VERTEXAI_APM_SPAN_NAME +from ddtrace.llmobs.utils import Prompt from ddtrace.trace import Span log = get_logger(__name__) - -def validate_prompt(prompt: dict) -> Dict[str, Union[str, dict, List[str]]]: - validated_prompt = {} # type: Dict[str, Union[str, dict, List[str]]] - if not isinstance(prompt, dict): - raise TypeError("Prompt must be a dictionary") - variables = prompt.get("variables") - template = prompt.get("template") - version = prompt.get("version") - prompt_id = prompt.get("id") - ctx_variable_keys = prompt.get("rag_context_variables") - rag_query_variable_keys = prompt.get("rag_query_variables") - if variables is not None: - if not isinstance(variables, dict): - raise TypeError("Prompt variables must be a dictionary.") - if not any(isinstance(k, str) or isinstance(v, str) for k, v in variables.items()): - raise TypeError("Prompt variable keys and values must be strings.") - validated_prompt["variables"] = variables - if template is not None: - if not isinstance(template, str): - raise TypeError("Prompt template must be a string") - validated_prompt["template"] = template - if version is not None: - if not isinstance(version, str): - raise TypeError("Prompt version must be a string.") - validated_prompt["version"] = version - if prompt_id is not None: - if not isinstance(prompt_id, str): - raise TypeError("Prompt id must be a string.") - validated_prompt["id"] = prompt_id - if ctx_variable_keys is not None: - if not isinstance(ctx_variable_keys, list): - raise TypeError("Prompt field `context_variable_keys` must be a list of strings.") - if not all(isinstance(k, str) for k in ctx_variable_keys): - raise TypeError("Prompt field `context_variable_keys` must be a list of strings.") - validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ctx_variable_keys - else: - validated_prompt[INTERNAL_CONTEXT_VARIABLE_KEYS] = ["context"] - if rag_query_variable_keys is not None: - if not isinstance(rag_query_variable_keys, list): - raise TypeError("Prompt field `rag_query_variables` must be a list of strings.") - if not all(isinstance(k, str) for k in rag_query_variable_keys): - raise TypeError("Prompt field `rag_query_variables` must be a list of strings.") - validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = rag_query_variable_keys - else: - validated_prompt[INTERNAL_QUERY_VARIABLE_KEYS] = ["question"] - return validated_prompt - - class LinkTracker: def __init__(self, object_span_links=None): self._object_span_links = object_span_links or {} @@ -185,7 +138,7 @@ def _get_session_id(span: Span) -> Optional[str]: def _inject_llmobs_parent_id(span_context): """Inject the LLMObs parent ID into the span context for reconnecting distributed LLMObs traces.""" span = ddtrace.tracer.current_span() - + if span is None: log.warning("No active span to inject LLMObs parent ID info.") return diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index dac1f3149c9..c3861148bf3 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -1,8 +1,8 @@ -from typing import Dict +from re import match +from typing import Dict, Tuple, Optional from typing import List from typing import Union - # TypedDict was added to typing in python 3.8 try: from typing import TypedDict # noqa:F401 @@ -19,20 +19,159 @@ ExportedLLMObsSpan = TypedDict("ExportedLLMObsSpan", {"span_id": str, "trace_id": str}) Document = TypedDict("Document", {"name": str, "id": str, "text": str, "score": float}, total=False) Message = TypedDict("Message", {"content": str, "role": str}, total=False) -Prompt = TypedDict( - "Prompt", - { - "variables": Dict[str, str], - "template": str, - "id": str, - "version": str, - "rag_context_variables": List[ - str - ], # a list of variable key names that contain ground truth context information - "rag_query_variables": List[str], # a list of variable key names that contains query information - }, - total=False, -) + +class Prompt: + """ + Represents a prompt used for an LLM call. + + Attributes: + name (str): The name of the prompt. + version (str): The version of the prompt. + prompt_template_id (int): A hash of name and ml_app, used to identify the prompt template. + prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance. + template (Union[List[Tuple[str, str]], str]): The template used for the prompt, which can be a list of tuples or a string. + variables (Dict[str, str]): A dictionary of variables used in the prompt. + example_variables (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt. + constraint_variables (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed. + rag_context_variables (List[str]): A list of variable key names that contain ground truth context information. + rag_query_variables (List[str]): A list of variable key names that contain query information for an LLM call. + """ + name: str + version: Optional[str] + prompt_template_id: int + prompt_instance_id: int + template: Optional[List[Tuple[str, str]]] + variables: Optional[Dict[str, str]] + example_variables: Optional[List[str]] + constraint_variables: Optional[List[str]] + rag_context_variables: Optional[List[str]] + rag_query_variables: Optional[List[str]] + + def __init__(self, + name, + version = "1.0.0", + template = None, + variables = None, + example_variables = None, + constraint_variables = None, + rag_context_variables = None, + rag_query_variables = None): + + if name is None: + raise TypeError("Prompt name of type String is mandatory.") + + self.name = name + + # Default values + template = template or [] + variables = variables or {} + example_variables = example_variables or [] + constraint_variables = constraint_variables or [] + rag_context_variables = rag_context_variables or ["context"] + rag_query_variables = rag_query_variables or ["question"] + version = version or "1.0.0" + + if version is not None: + # Add minor and patch version if not present + version_parts = (version.split(".") + ["0", "0"])[:3] + version = ".".join(version_parts) + + # Accept simple string templates + if isinstance(template, str): + template = [("user", template)] + + self.prompt_template_id = hash(name) + self.prompt_instance_id = hash( + (name, version, tuple(template), tuple(variables.keys()), tuple(variables.values()), + tuple(example_variables), tuple(constraint_variables), + tuple(rag_context_variables), tuple(rag_query_variables))) + + self.version = version + self.template = template + self.variables = variables + self.example_variables = example_variables + self.constraint_variables = constraint_variables + self.rag_context_variables = rag_context_variables + self.rag_query_variables = rag_query_variables + + def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: + return { + "name": self.name, + "version": self.version, + "prompt_template_id": self.prompt_template_id, + "prompt_instance_id": self.prompt_instance_id, + "template": self.template, + "variables": self.variables, + "example_variables": self.example_variables, + "constraint_variables": self.constraint_variables, + "rag_context_variables": self.rag_context_variables, + "rag_query_variables": self.rag_query_variables, + } + + def regenerate_ids(self, ml_app: str): + self.prompt_instance_id = hash((ml_app, self.name, self.version, tuple(self.template), tuple(self.variables.keys()), tuple(self.variables.values()), tuple(self.example_variables), tuple(self.constraint_variables), tuple(self.rag_context_variables), tuple(self.rag_query_variables))) + self.prompt_template_id = hash((ml_app, self.name)) + pass + + def validate(self): + errors = [] + + name = self.name + version = self.version + template = self.template + variables = self.variables + example_variables = self.example_variables + constraint_variables = self.constraint_variables + rag_context_variables = self.rag_context_variables + rag_query_variables = self.rag_query_variables + + if name is None: + errors.append("Prompt name of type String is mandatory.") + elif not isinstance(name, str): + errors.append("Prompt name must be a string.") + + if version is not None: + # Add minor and patch version if not present + version_parts = (version.split(".") + ["0", "0"])[:3] + version = ".".join(version_parts) + # Official semver regex from https://semver.org/ + semver_regex = ( + r'^(?P0|[1-9]\d*)\.' + r'(?P0|[1-9]\d*)\.' + r'(?P0|[1-9]\d*)' + r'(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-]' + r'[0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-]' + r'[0-9a-zA-Z-]*))*))?' + r'(?:\+(?P[0-9a-zA-Z-]+' + r'(?:\.[0-9a-zA-Z-]+)*))?$' + ) + if not bool(match(semver_regex, version)): + errors.append( + "Prompt version must be semver compatible. Please check https://semver.org/ for more information.") + + # Accept simple string templates + if isinstance(template, str): + template = [("user", template)] + + # validate template + if not (isinstance(template, list) and all(isinstance(t, tuple) for t in template)): + errors.append("Prompt template must be a list of tuples.") + if not all(len(t) == 2 for t in template): + errors.append("Prompt template tuples must have exactly two elements.") + if not all(isinstance(item[0], str) and isinstance(item[1], str) for item in template): + errors.append("Prompt template tuple elements must be strings.") + + if not isinstance(variables, dict): + errors.append("Prompt variables must be a dictionary.") + if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()): + errors.append("Prompt variable keys and values must be strings.") + + for var_list in [example_variables, constraint_variables, rag_context_variables, rag_query_variables]: + if not all(isinstance(var, str) for var in var_list): + errors.append("All variable lists must contain strings only.") + + if errors: + raise TypeError("\n".join(errors)) class Messages: From d0845acfa3a210469e774e1bb8e12fbbab02860f Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Wed, 19 Feb 2025 20:47:18 +0100 Subject: [PATCH 02/11] Switch to sha-1 hash and only generate ids before submission to llmobs --- ddtrace/llmobs/_llmobs.py | 4 +-- ddtrace/llmobs/utils.py | 54 ++++++++++++++++++++++++++++----------- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index f7094114e8d..95f3cb5dee5 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -876,9 +876,7 @@ def annotate( span.name = _name if prompt is not None: try: - ml_app = _get_ml_app(span) - if ml_app is not None: - prompt.regenerate_ids(ml_app) + prompt.generate_ids(_get_ml_app(span) or "") prompt.validate() dict_prompt = prompt.to_dict() cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index c3861148bf3..673d3be6307 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -1,4 +1,5 @@ from re import match +from hashlib import sha1 from typing import Dict, Tuple, Optional from typing import List from typing import Union @@ -38,8 +39,8 @@ class Prompt: """ name: str version: Optional[str] - prompt_template_id: int - prompt_instance_id: int + prompt_template_id: str + prompt_instance_id: str template: Optional[List[Tuple[str, str]]] variables: Optional[Dict[str, str]] example_variables: Optional[List[str]] @@ -65,8 +66,8 @@ def __init__(self, # Default values template = template or [] variables = variables or {} - example_variables = example_variables or [] - constraint_variables = constraint_variables or [] + example_variables = example_variables or ["example"] + constraint_variables = constraint_variables or ["constraint"] rag_context_variables = rag_context_variables or ["context"] rag_query_variables = rag_query_variables or ["question"] version = version or "1.0.0" @@ -80,12 +81,6 @@ def __init__(self, if isinstance(template, str): template = [("user", template)] - self.prompt_template_id = hash(name) - self.prompt_instance_id = hash( - (name, version, tuple(template), tuple(variables.keys()), tuple(variables.values()), - tuple(example_variables), tuple(constraint_variables), - tuple(rag_context_variables), tuple(rag_query_variables))) - self.version = version self.template = template self.variables = variables @@ -108,14 +103,31 @@ def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[T "rag_query_variables": self.rag_query_variables, } - def regenerate_ids(self, ml_app: str): - self.prompt_instance_id = hash((ml_app, self.name, self.version, tuple(self.template), tuple(self.variables.keys()), tuple(self.variables.values()), tuple(self.example_variables), tuple(self.constraint_variables), tuple(self.rag_context_variables), tuple(self.rag_query_variables))) - self.prompt_template_id = hash((ml_app, self.name)) - pass + def generate_ids(self, ml_app=""): + """ + Generates prompt_template_id and prompt_instance_id based on the prompt attributes. + The prompt_template_id is a sha-1 hash of the prompt name and ml_app + The prompt_instance_id is a sha-1 hash of all prompt attributes. + """ + name = str(self.name) + version = str(self.version) + template = str(self.template) + variables = str(self.variables) + example_variables = str(self.example_variables) + constraint_variables = str(self.constraint_variables) + rag_context_variables = str(self.rag_context_variables) + rag_query_variables = str(self.rag_query_variables) + + template_id_str = f"[{ml_app}]{name}" + instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variables}{constraint_variables}{rag_context_variables}{rag_query_variables}" + + self.prompt_template_id = sha1(template_id_str.encode()).hexdigest() + self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest() def validate(self): errors = [] - + prompt_template_id = self.prompt_template_id + prompt_instance_id = self.prompt_instance_id name = self.name version = self.version template = self.template @@ -125,6 +137,16 @@ def validate(self): rag_context_variables = self.rag_context_variables rag_query_variables = self.rag_query_variables + + if prompt_template_id is None: + self.generate_ids() + elif not isinstance(prompt_template_id, str): + errors.append("Prompt template id must be a string.") + if prompt_instance_id is None: + self.generate_ids() + elif not isinstance(prompt_instance_id, str): + errors.append("Prompt instance id must be a string.") + if name is None: errors.append("Prompt name of type String is mandatory.") elif not isinstance(name, str): @@ -173,6 +195,8 @@ def validate(self): if errors: raise TypeError("\n".join(errors)) + return errors + class Messages: def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]): From f3c38e1fceb4aa05495c15bb084a235294183ed2 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 11:54:22 +0100 Subject: [PATCH 03/11] rename vars --- ddtrace/llmobs/utils.py | 52 +++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 23 deletions(-) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index 673d3be6307..a8c87678737 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -3,6 +3,8 @@ from typing import Dict, Tuple, Optional from typing import List from typing import Union +from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS +from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS # TypedDict was added to typing in python 3.8 try: @@ -43,20 +45,20 @@ class Prompt: prompt_instance_id: str template: Optional[List[Tuple[str, str]]] variables: Optional[Dict[str, str]] - example_variables: Optional[List[str]] - constraint_variables: Optional[List[str]] - rag_context_variables: Optional[List[str]] - rag_query_variables: Optional[List[str]] + example_variable_keys: Optional[List[str]] + constraint_variable_keys: Optional[List[str]] + rag_context_variable_keys: Optional[List[str]] + rag_query_variable_keys: Optional[List[str]] def __init__(self, name, version = "1.0.0", template = None, variables = None, - example_variables = None, - constraint_variables = None, - rag_context_variables = None, - rag_query_variables = None): + example_variable_keys = None, + constraint_variable_keys = None, + rag_context_variable_keys = None, + rag_query_variable_keys = None): if name is None: raise TypeError("Prompt name of type String is mandatory.") @@ -70,6 +72,10 @@ def __init__(self, constraint_variables = constraint_variables or ["constraint"] rag_context_variables = rag_context_variables or ["context"] rag_query_variables = rag_query_variables or ["question"] + example_variable_keys = example_variable_keys or ["example"] + constraint_variable_keys = constraint_variable_keys or ["constraint"] + rag_context_variable_keys = rag_context_variable_keys or ["context"] + rag_query_variable_keys = rag_query_variable_keys or ["question"] version = version or "1.0.0" if version is not None: @@ -89,7 +95,7 @@ def __init__(self, self.rag_context_variables = rag_context_variables self.rag_query_variables = rag_query_variables - def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: + def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: return { "name": self.name, "version": self.version, @@ -97,10 +103,10 @@ def to_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[T "prompt_instance_id": self.prompt_instance_id, "template": self.template, "variables": self.variables, - "example_variables": self.example_variables, - "constraint_variables": self.constraint_variables, - "rag_context_variables": self.rag_context_variables, - "rag_query_variables": self.rag_query_variables, + "example_variable_keys": self.example_variable_keys, + "constraint_variable_keys": self.constraint_variable_keys, + INTERNAL_CONTEXT_VARIABLE_KEYS: self.rag_context_variable_keys, + INTERNAL_QUERY_VARIABLE_KEYS: self.rag_query_variable_keys, } def generate_ids(self, ml_app=""): @@ -113,13 +119,13 @@ def generate_ids(self, ml_app=""): version = str(self.version) template = str(self.template) variables = str(self.variables) - example_variables = str(self.example_variables) - constraint_variables = str(self.constraint_variables) - rag_context_variables = str(self.rag_context_variables) - rag_query_variables = str(self.rag_query_variables) + example_variable_keys = str(self.example_variable_keys) + constraint_variable_keys = str(self.constraint_variable_keys) + rag_context_variable_keys = str(self.rag_context_variable_keys) + rag_query_variable_keys = str(self.rag_query_variable_keys) template_id_str = f"[{ml_app}]{name}" - instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variables}{constraint_variables}{rag_context_variables}{rag_query_variables}" + instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variable_keys}{constraint_variable_keys}{rag_context_variable_keys}{rag_query_variable_keys}" self.prompt_template_id = sha1(template_id_str.encode()).hexdigest() self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest() @@ -132,10 +138,10 @@ def validate(self): version = self.version template = self.template variables = self.variables - example_variables = self.example_variables - constraint_variables = self.constraint_variables - rag_context_variables = self.rag_context_variables - rag_query_variables = self.rag_query_variables + example_variable_keys = self.example_variable_keys + constraint_variable_keys = self.constraint_variable_keys + rag_context_variable_keys = self.rag_context_variable_keys + rag_query_variable_keys = self.rag_query_variable_keys if prompt_template_id is None: @@ -188,7 +194,7 @@ def validate(self): if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()): errors.append("Prompt variable keys and values must be strings.") - for var_list in [example_variables, constraint_variables, rag_context_variables, rag_query_variables]: + for var_list in [example_variable_keys, constraint_variable_keys, rag_context_variable_keys, rag_query_variable_keys]: if not all(isinstance(var, str) for var in var_list): errors.append("All variable lists must contain strings only.") From c9e14f0fd4c9f9293928afbb46f243afd91f485b Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 11:54:32 +0100 Subject: [PATCH 04/11] rename vars --- ddtrace/llmobs/utils.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index a8c87678737..ccaa1efcad0 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -34,10 +34,10 @@ class Prompt: prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance. template (Union[List[Tuple[str, str]], str]): The template used for the prompt, which can be a list of tuples or a string. variables (Dict[str, str]): A dictionary of variables used in the prompt. - example_variables (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt. - constraint_variables (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed. - rag_context_variables (List[str]): A list of variable key names that contain ground truth context information. - rag_query_variables (List[str]): A list of variable key names that contain query information for an LLM call. + example_variable_keys (List[str]): A list of variables names denoting examples. Examples are used to improve accuracy for the prompt. + constraint_variable_keys (List[str]): A list of variables names denoting constraints. Constraints are limitations on how the prompt result is displayed. + rag_context_variable_keys (List[str]): A list of variable key names that contain ground truth context information. + rag_query_variable_keys (List[str]): A list of variable key names that contain query information for an LLM call. """ name: str version: Optional[str] @@ -68,10 +68,7 @@ def __init__(self, # Default values template = template or [] variables = variables or {} - example_variables = example_variables or ["example"] - constraint_variables = constraint_variables or ["constraint"] - rag_context_variables = rag_context_variables or ["context"] - rag_query_variables = rag_query_variables or ["question"] + # TODO remove default keys when not in variables example_variable_keys = example_variable_keys or ["example"] constraint_variable_keys = constraint_variable_keys or ["constraint"] rag_context_variable_keys = rag_context_variable_keys or ["context"] From 1f19033a8a57f13dc7b6754afe5df56763e3642e Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 11:54:45 +0100 Subject: [PATCH 05/11] rename vars --- ddtrace/llmobs/utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index ccaa1efcad0..6d34116e456 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -87,10 +87,10 @@ def __init__(self, self.version = version self.template = template self.variables = variables - self.example_variables = example_variables - self.constraint_variables = constraint_variables - self.rag_context_variables = rag_context_variables - self.rag_query_variables = rag_query_variables + self.example_variable_keys = example_variable_keys + self.constraint_variable_keys = constraint_variable_keys + self.rag_context_variable_keys = rag_context_variable_keys + self.rag_query_variable_keys = rag_query_variable_keys def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: return { From a553ff6414e4b71e43b2de7cdb7b671b61bc5165 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 13:42:32 +0100 Subject: [PATCH 06/11] rename method --- ddtrace/llmobs/_llmobs.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index 95f3cb5dee5..d3ae5a85b65 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -878,7 +878,7 @@ def annotate( try: prompt.generate_ids(_get_ml_app(span) or "") prompt.validate() - dict_prompt = prompt.to_dict() + dict_prompt = prompt.to_tags_dict() cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt) except TypeError: log.warning("Failed to validate prompt with error: ", exc_info=True) From 89ba50ec2ce3ee78c96ed04266f8e2ea2062bdd8 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 14:35:14 +0100 Subject: [PATCH 07/11] Refactor quality of usage --- ddtrace/llmobs/_llmobs.py | 4 +- ddtrace/llmobs/utils.py | 87 ++++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 27 deletions(-) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index d3ae5a85b65..b2eb36c7ba2 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -876,9 +876,7 @@ def annotate( span.name = _name if prompt is not None: try: - prompt.generate_ids(_get_ml_app(span) or "") - prompt.validate() - dict_prompt = prompt.to_tags_dict() + dict_prompt = prompt.prepare_prompt(ml_app=_get_ml_app(span) or "") cls._set_dict_attribute(span, INPUT_PROMPT, dict_prompt) except TypeError: log.warning("Failed to validate prompt with error: ", exc_info=True) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index 6d34116e456..3c35bb8eb79 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -1,7 +1,10 @@ from re import match from hashlib import sha1 -from typing import Dict, Tuple, Optional +from typing import Any +from typing import Dict from typing import List +from typing import Optional +from typing import Tuple from typing import Union from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS from ddtrace.llmobs._constants import INTERNAL_QUERY_VARIABLE_KEYS @@ -29,6 +32,7 @@ class Prompt: Attributes: name (str): The name of the prompt. + ml_app (str): The name of the service, retrieved from the active span when not specified. version (str): The version of the prompt. prompt_template_id (int): A hash of name and ml_app, used to identify the prompt template. prompt_instance_id (int): A hash of all prompt attributes, used to identify the prompt instance. @@ -44,11 +48,12 @@ class Prompt: prompt_template_id: str prompt_instance_id: str template: Optional[List[Tuple[str, str]]] - variables: Optional[Dict[str, str]] + variables: Optional[Dict[str, Any]] example_variable_keys: Optional[List[str]] constraint_variable_keys: Optional[List[str]] rag_context_variable_keys: Optional[List[str]] rag_query_variable_keys: Optional[List[str]] + ml_app: str def __init__(self, name, @@ -58,7 +63,8 @@ def __init__(self, example_variable_keys = None, constraint_variable_keys = None, rag_context_variable_keys = None, - rag_query_variable_keys = None): + rag_query_variable_keys = None, + ml_app=""): if name is None: raise TypeError("Prompt name of type String is mandatory.") @@ -68,9 +74,8 @@ def __init__(self, # Default values template = template or [] variables = variables or {} - # TODO remove default keys when not in variables - example_variable_keys = example_variable_keys or ["example"] - constraint_variable_keys = constraint_variable_keys or ["constraint"] + example_variable_keys = example_variable_keys or ["example", "examples"] + constraint_variable_keys = constraint_variable_keys or ["constraint", "constraints"] rag_context_variable_keys = rag_context_variable_keys or ["context"] rag_query_variable_keys = rag_query_variable_keys or ["question"] version = version or "1.0.0" @@ -80,10 +85,11 @@ def __init__(self, version_parts = (version.split(".") + ["0", "0"])[:3] version = ".".join(version_parts) - # Accept simple string templates + # Accept simple string templates as user role if isinstance(template, str): template = [("user", template)] + self.ml_app = ml_app self.version = version self.template = template self.variables = variables @@ -92,27 +98,14 @@ def __init__(self, self.rag_context_variable_keys = rag_context_variable_keys self.rag_query_variable_keys = rag_query_variable_keys - def to_tags_dict(self) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: - return { - "name": self.name, - "version": self.version, - "prompt_template_id": self.prompt_template_id, - "prompt_instance_id": self.prompt_instance_id, - "template": self.template, - "variables": self.variables, - "example_variable_keys": self.example_variable_keys, - "constraint_variable_keys": self.constraint_variable_keys, - INTERNAL_CONTEXT_VARIABLE_KEYS: self.rag_context_variable_keys, - INTERNAL_QUERY_VARIABLE_KEYS: self.rag_query_variable_keys, - } - - def generate_ids(self, ml_app=""): + def generate_ids(self): """ Generates prompt_template_id and prompt_instance_id based on the prompt attributes. The prompt_template_id is a sha-1 hash of the prompt name and ml_app The prompt_instance_id is a sha-1 hash of all prompt attributes. """ name = str(self.name) + ml_app = str(self.ml_app) version = str(self.version) template = str(self.template) variables = str(self.variables) @@ -188,8 +181,8 @@ def validate(self): if not isinstance(variables, dict): errors.append("Prompt variables must be a dictionary.") - if not all(isinstance(k, str) and isinstance(v, str) for k, v in variables.items()): - errors.append("Prompt variable keys and values must be strings.") + if not all(isinstance(k, str) for k in variables): + errors.append("Prompt variable keys must be strings.") for var_list in [example_variable_keys, constraint_variable_keys, rag_context_variable_keys, rag_query_variable_keys]: if not all(isinstance(var, str) for var in var_list): @@ -200,6 +193,52 @@ def validate(self): return errors + def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]: + name = self.name + version = self.version + prompt_template_id = self.prompt_template_id + prompt_instance_id = self.prompt_instance_id + template = self.template + variables = self.variables + example_variable_keys = self.example_variable_keys + constraint_variable_keys = self.constraint_variable_keys + rag_context_variable_keys = self.rag_context_variable_keys + rag_query_variable_keys = self.rag_query_variable_keys + + # Clean up keys and remove those that are not in variables, including default keys. + example_variable_keys_set = {key for key in example_variable_keys if key in variables} + constraint_variable_keys_set = {key for key in constraint_variable_keys if key in variables} + rag_context_variable_keys_set = {key for key in rag_context_variable_keys if key in variables} + rag_query_variable_keys_set = {key for key in rag_query_variable_keys if key in variables} + + return { + "name": name, + "version": version, + "prompt_template_id": prompt_template_id, + "prompt_instance_id": prompt_instance_id, + "template": template, + "variables": variables, + "example_variable_keys": example_variable_keys_set, + "constraint_variable_keys": constraint_variable_keys_set, + "rag_context_variable_keys": rag_context_variable_keys_set, + "rag_query_variable_keys": rag_query_variable_keys_set, + # also using internal constants to keep hallucination functionality + INTERNAL_CONTEXT_VARIABLE_KEYS: rag_context_variable_keys_set, + INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set, + } + + def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: + if ml_app: + self.ml_app = ml_app + self.validate() + return self.to_tags_dict() + + def __setattr__(self, name, value): + super().__setattr__(name, value) + self.generate_ids() + + + class Messages: def __init__(self, messages: Union[List[Dict[str, str]], Dict[str, str], str]): From 83236bd29b7f22fe844fb51861ff66d0ab2a04f4 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 15:28:26 +0100 Subject: [PATCH 08/11] remove redundant tags --- ddtrace/llmobs/utils.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index 3c35bb8eb79..0e1b797951f 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -220,8 +220,6 @@ def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[T "variables": variables, "example_variable_keys": example_variable_keys_set, "constraint_variable_keys": constraint_variable_keys_set, - "rag_context_variable_keys": rag_context_variable_keys_set, - "rag_query_variable_keys": rag_query_variable_keys_set, # also using internal constants to keep hallucination functionality INTERNAL_CONTEXT_VARIABLE_KEYS: rag_context_variable_keys_set, INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set, From 81dc59a4ba7c2f2a942a6f65333430eb62d26700 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 16:45:14 +0100 Subject: [PATCH 09/11] update dict signature --- ddtrace/llmobs/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index 0e1b797951f..75a544119c0 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -4,6 +4,7 @@ from typing import Dict from typing import List from typing import Optional +from typing import Set from typing import Tuple from typing import Union from ddtrace.llmobs._constants import INTERNAL_CONTEXT_VARIABLE_KEYS @@ -193,7 +194,7 @@ def validate(self): return errors - def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]: + def to_tags_dict(self) -> Dict[str, Union[str, Set[str], Dict[str, str], List[Tuple[str, str]]]]: name = self.name version = self.version prompt_template_id = self.prompt_template_id @@ -225,7 +226,7 @@ def to_tags_dict(self) -> Dict[str, Union[str, List[str], Dict[str, str], List[T INTERNAL_QUERY_VARIABLE_KEYS: rag_query_variable_keys_set, } - def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, int, List[str], Dict[str, str], List[Tuple[str, str]]]]: + def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]: if ml_app: self.ml_app = ml_app self.validate() From 430024d66e5aea9a538fb11905122d21c60ced6a Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Tue, 25 Feb 2025 17:29:43 +0100 Subject: [PATCH 10/11] dynamic id generation in case prompt object is updated --- ddtrace/llmobs/_llmobs.py | 21 ++++++++++++++++++++- ddtrace/llmobs/utils.py | 37 ++++++++++++++++++++++++------------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py index b2eb36c7ba2..fd5d3967678 100644 --- a/ddtrace/llmobs/_llmobs.py +++ b/ddtrace/llmobs/_llmobs.py @@ -1,7 +1,7 @@ import json import os import time -from typing import Any +from typing import Any, Tuple from typing import Dict from typing import List from typing import Optional @@ -805,6 +805,25 @@ def retrieval( log.warning(SPAN_START_WHILE_DISABLED_WARNING) return cls._instance._start_span("retrieval", name=name, session_id=session_id, ml_app=ml_app) + @classmethod + def prompt_context(cls, + name: str, + version: Optional[str]="1.0.0", + template: Optional[List[Tuple[str, str]]]=None, + variables: Optional[Dict[str, Any]]=None, + example_variable_keys: Optional[List[str]]=None, + constraint_variable_keys: Optional[List[str]]=None, + rag_context_variable_keys: Optional[List[str]]=None, + rag_query_variable_keys: Optional[List[str]]=None, + ml_app: str="") -> AnnotationContext: + """ + shortcut to create a prompt object and annotate it + """ + # TODO try to check for if the prompt already exists within the span and update it + prompt = Prompt(name, version, template, variables, example_variable_keys, constraint_variable_keys, + rag_context_variable_keys, rag_query_variable_keys, ml_app) + return cls.annotation_context(prompt=prompt) + @classmethod def annotate( cls, diff --git a/ddtrace/llmobs/utils.py b/ddtrace/llmobs/utils.py index 75a544119c0..231c9cb752f 100644 --- a/ddtrace/llmobs/utils.py +++ b/ddtrace/llmobs/utils.py @@ -67,10 +67,12 @@ def __init__(self, rag_query_variable_keys = None, ml_app=""): + self.__dict__["_is_initialized"] = False + if name is None: raise TypeError("Prompt name of type String is mandatory.") - self.name = name + self.__dict__["name"] = name # Default values template = template or [] @@ -90,14 +92,17 @@ def __init__(self, if isinstance(template, str): template = [("user", template)] - self.ml_app = ml_app - self.version = version - self.template = template - self.variables = variables - self.example_variable_keys = example_variable_keys - self.constraint_variable_keys = constraint_variable_keys - self.rag_context_variable_keys = rag_context_variable_keys - self.rag_query_variable_keys = rag_query_variable_keys + self.__dict__["ml_app"] = ml_app + self.__dict__["version"] = version + self.__dict__["template"] = template + self.__dict__["variables"] = variables + self.__dict__["example_variable_keys"] = example_variable_keys + self.__dict__["constraint_variable_keys"] = constraint_variable_keys + self.__dict__["rag_context_variable_keys"] = rag_context_variable_keys + self.__dict__["rag_query_variable_keys"] = rag_query_variable_keys + + # Unlocks the id regeneration at each setattr call + self.__dict__["_is_initialized"] = True def generate_ids(self): """ @@ -118,8 +123,8 @@ def generate_ids(self): template_id_str = f"[{ml_app}]{name}" instance_id_str = f"[{ml_app}]{name}{version}{template}{variables}{example_variable_keys}{constraint_variable_keys}{rag_context_variable_keys}{rag_query_variable_keys}" - self.prompt_template_id = sha1(template_id_str.encode()).hexdigest() - self.prompt_instance_id = sha1(instance_id_str.encode()).hexdigest() + self.__dict__["prompt_template_id"] = sha1(template_id_str.encode()).hexdigest() + self.__dict__["prompt_instance_id"] = sha1(instance_id_str.encode()).hexdigest() def validate(self): errors = [] @@ -228,13 +233,19 @@ def to_tags_dict(self) -> Dict[str, Union[str, Set[str], Dict[str, str], List[Tu def prepare_prompt(self, ml_app=None) -> Dict[str, Union[str, List[str], Dict[str, str], List[Tuple[str, str]]]]: if ml_app: - self.ml_app = ml_app + # regenerate ids if ml_app is changed + self.__dict__["ml_app"] = ml_app + self.generate_ids() self.validate() return self.to_tags_dict() def __setattr__(self, name, value): + """ + Overrides Set attribute value to regenerate prompt ids if attributes change. + """ super().__setattr__(name, value) - self.generate_ids() + if self.__dict__.get("_is_initialized"): + self.generate_ids() From 2f0801d02228548b0cc22ee2bcf47accd4e4c535 Mon Sep 17 00:00:00 2001 From: yahya-mouman Date: Wed, 26 Feb 2025 15:35:04 +0100 Subject: [PATCH 11/11] Add support for adding prompts into experiments --- ddtrace/llmobs/experimentation/_experiments.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ddtrace/llmobs/experimentation/_experiments.py b/ddtrace/llmobs/experimentation/_experiments.py index 590d3caae4a..b2c5091abf5 100644 --- a/ddtrace/llmobs/experimentation/_experiments.py +++ b/ddtrace/llmobs/experimentation/_experiments.py @@ -13,6 +13,7 @@ from .._utils import HTTPResponse from .._utils import http_request +from ..utils import Prompt from ..decorators import agent from .._llmobs import LLMObs @@ -624,6 +625,7 @@ class Experiment: name (str): Name of the experiment task (Callable): Function that processes each dataset record dataset (Dataset): Dataset to run the experiment on + prompt (Prompt): Prompt template for the experiment evaluators (List[Callable]): Functions that evaluate task outputs tags (List[str]): Tags for organizing experiments description (str): Description of the experiment @@ -640,6 +642,7 @@ def __init__( name: str, task: Callable, dataset: Dataset, + prompt: Prompt, evaluators: List[Callable], tags: List[str] = [], description: str = "", @@ -649,6 +652,7 @@ def __init__( self.name = name self.task = task self.dataset = dataset + self.prompt = prompt self.evaluators = evaluators self.tags = tags self.project_name = ENV_PROJECT_NAME @@ -997,6 +1001,7 @@ def process_row(idx_row): LLMObs.annotate( span, + prompt=self.prompt, input_data=input_data, output_data=output, tags={ @@ -1033,6 +1038,7 @@ def process_row(idx_row): LLMObs.annotate( span, input_data=input_data, + prompt=self.prompt, tags={ "dataset_id": self.dataset._datadog_dataset_id, "dataset_record_id": row["record_id"],