Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Jonathan.chavez/llm experiments 2 #12500

Open
wants to merge 47 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
b4d6082
Add main classes for experiments sdk
jjxct Oct 24, 2024
f9e9296
Added more things but don't remember what
jjxct Oct 24, 2024
60f3ba5
Add network calls for main methods
jjxct Oct 28, 2024
d48942d
Add docstring
jjxct Oct 29, 2024
88f05d3
Format code
jjxct Oct 29, 2024
e73a897
Add custom exception classes
jjxct Oct 29, 2024
f8c9ef0
Move code to another directory
jjxct Oct 29, 2024
59577e1
Change experiments module export
jjxct Oct 29, 2024
402d402
Use f strings
jjxct Oct 29, 2024
2c281c5
Decouple running from evaluating
jjxct Oct 29, 2024
0e421da
Change parametrize function to make it simpler
jjxct Nov 4, 2024
173d2ae
Add test file, export the top level classes
Kyle-Verhoog Nov 5, 2024
044d696
fmt
Kyle-Verhoog Nov 5, 2024
ac634fa
Simplify http client, remove stdout printing
Kyle-Verhoog Nov 5, 2024
d29f081
fmt
Kyle-Verhoog Nov 5, 2024
dc119d0
more stdout cleanup, http status code checking
Kyle-Verhoog Nov 5, 2024
f018298
Add feedback from sync
jjxct Nov 8, 2024
351cd7a
Add error handling on tasks
jjxct Nov 10, 2024
2608ba5
fix import
jjxct Nov 10, 2024
5cbfd70
docstring
jjxct Nov 10, 2024
bed1261
Custom Exception classes
jjxct Nov 10, 2024
0cbc487
Merge remote-tracking branch 'origin/main' into jonathan.chavez/llm-e…
Kyle-Verhoog Nov 11, 2024
0928224
handle duration errors
jjxct Nov 11, 2024
cac1476
more stuff
jjxct Nov 12, 2024
436b1b6
Merge branch 'jonathan.chavez/llm-experiments' of github.com:DataDog/…
jjxct Nov 12, 2024
9024e14
support polymorphic i/o
jjxct Nov 14, 2024
a228c30
structure changes
jjxct Dec 6, 2024
b29fa1d
modifications to types
jjxct Dec 9, 2024
738cc07
remove unnecessary comments
jjxct Dec 9, 2024
1059172
fix code quality violations
jjxct Dec 9, 2024
965bdcb
add test comments
jjxct Dec 13, 2024
ba8e807
add error fields on evals
jjxct Jan 13, 2025
8e237e1
Merge branch 'main' of github.com:DataDog/dd-trace-py into jonathan.c…
jjxct Jan 13, 2025
2a73462
encode llm events in utf-8
jjxct Jan 15, 2025
9497ea8
tracing works
jjxct Jan 15, 2025
c05deb8
two buffers temporary
jjxct Jan 27, 2025
1b0800b
switch trace ingestion path
jjxct Jan 30, 2025
b8a2472
tracing stable
jjxct Feb 5, 2025
9ecc788
stabilize errors
jjxct Feb 11, 2025
9a46b2d
start of cleanup
jjxct Feb 14, 2025
ac72eaf
latest polish changes
jjxct Feb 19, 2025
82ff293
fixes
jjxct Feb 19, 2025
f29ef96
migrate from experiment task to task
jjxct Feb 20, 2025
d23b9db
change imports
jjxct Feb 20, 2025
4727bb7
new logic
jjxct Feb 20, 2025
244c003
fix run parameters
jjxct Feb 21, 2025
cbf4d2d
offline mode
jjxct Feb 24, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion ddtrace/llmobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from ddtrace.llmobs import LLMObs
LLMObs.enable()
"""

from ._llmobs import LLMObs


Expand Down
4 changes: 3 additions & 1 deletion ddtrace/llmobs/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@
INPUT_VALUE = "_ml_obs.meta.input.value"
INPUT_PARAMETERS = "_ml_obs.meta.input.parameters"
INPUT_PROMPT = "_ml_obs.meta.input.prompt"

EXPECTED_OUTPUT = "_ml_obs.meta.input.expected_output"
EXPERIMENT_INPUT = "_ml_obs.meta.input"
EXPERIMENT_OUTPUT = "_ml_obs.meta.output"
OUTPUT_DOCUMENTS = "_ml_obs.meta.output.documents"
OUTPUT_MESSAGES = "_ml_obs.meta.output.messages"
OUTPUT_VALUE = "_ml_obs.meta.output.value"
Expand Down
3 changes: 2 additions & 1 deletion ddtrace/llmobs/_integrations/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,8 @@ def llmobs_set_tags(
return
try:
self._llmobs_set_tags(span, args, kwargs, response, operation)
except Exception:
except Exception as e:
print(e)
log.error("Error extracting LLMObs fields for span %s, likely due to malformed data", span, exc_info=True)

@abc.abstractmethod
Expand Down
49 changes: 49 additions & 0 deletions ddtrace/llmobs/_llmobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@
from ddtrace.llmobs._constants import SPAN_KIND
from ddtrace.llmobs._constants import SPAN_START_WHILE_DISABLED_WARNING
from ddtrace.llmobs._constants import TAGS
from ddtrace.llmobs._constants import EXPECTED_OUTPUT
from ddtrace.llmobs._constants import EXPERIMENT_INPUT
from ddtrace.llmobs._constants import EXPERIMENT_OUTPUT
from ddtrace.llmobs._evaluators.runner import EvaluatorRunner
from ddtrace.llmobs._utils import AnnotationContext
from ddtrace.llmobs._utils import _get_llmobs_parent_id
Expand Down Expand Up @@ -193,6 +196,14 @@ def _llmobs_span_event(cls, span: Span) -> Tuple[Dict[str, Any], bool]:
span._set_ctx_item(ML_APP, ml_app)
parent_id = str(_get_llmobs_parent_id(span) or "undefined")

# Experiments related
if span._get_ctx_item(EXPECTED_OUTPUT) is not None:
meta["expected_output"] = span._get_ctx_item(EXPECTED_OUTPUT)
if span._get_ctx_item(EXPERIMENT_INPUT) is not None:
meta["input"] = span._get_ctx_item(EXPERIMENT_INPUT)
if span._get_ctx_item(EXPERIMENT_OUTPUT) is not None:
meta["output"] = span._get_ctx_item(EXPERIMENT_OUTPUT)

llmobs_span_event = {
"trace_id": "{:x}".format(span.trace_id),
"span_id": str(span.span_id),
Expand All @@ -212,6 +223,7 @@ def _llmobs_span_event(cls, span: Span) -> Tuple[Dict[str, Any], bool]:
llmobs_span_event["tags"] = cls._llmobs_tags(
span, ml_app, session_id, is_ragas_integration_span=is_ragas_integration_span
)

return llmobs_span_event, is_ragas_integration_span

@staticmethod
Expand Down Expand Up @@ -626,6 +638,22 @@ def agent(cls, name: Optional[str] = None, session_id: Optional[str] = None, ml_
if cls.enabled is False:
log.warning(SPAN_START_WHILE_DISABLED_WARNING)
return cls._instance._start_span("agent", name=name, session_id=session_id, ml_app=ml_app)

@classmethod
def _experiment(cls, name: Optional[str] = None, session_id: Optional[str] = None, ml_app: Optional[str] = None) -> Span:
"""
Trace a dynamic workflow in which an embedded language model (agent) decides what sequence of actions to take.

:param str name: The name of the traced operation. If not provided, a default value of "agent" will be set.
:param str session_id: The ID of the underlying user session. Required for tracking sessions.
:param str ml_app: The name of the ML application that the agent is orchestrating. If not provided, the default
value will be set to the value of `DD_LLMOBS_ML_APP`.

:returns: The Span object representing the traced operation.
"""
if cls.enabled is False:
log.warning(SPAN_START_WHILE_DISABLED_WARNING)
return cls._instance._start_span("experiment", name=name, session_id=session_id, ml_app=ml_app)

@classmethod
def workflow(
Expand Down Expand Up @@ -785,9 +813,20 @@ def annotate(
cls._tag_embedding_io(span, input_documents=input_data, output_text=output_data)
elif span_kind == "retrieval":
cls._tag_retrieval_io(span, input_text=input_data, output_documents=output_data)
elif span_kind == "experiment":
cls._tag_experiment_io(span, input_data=input_data, output_data=output_data)
else:
cls._tag_text_io(span, input_value=input_data, output_value=output_data)

@staticmethod
def _tag_expected_output(span, expected_output: dict) -> None:
"""Tags a given LLMObs span with a prompt"""
try:
span._set_ctx_item(EXPECTED_OUTPUT, expected_output)
except TypeError:
log.warning("Failed to validate expected output with error: ", exc_info=True)
return

@staticmethod
def _tag_prompt(span, prompt: dict) -> None:
"""Tags a given LLMObs span with a prompt"""
Expand Down Expand Up @@ -877,6 +916,16 @@ def _tag_text_io(cls, span, input_value=None, output_value=None):
if output_value is not None:
span._set_ctx_item(OUTPUT_VALUE, str(output_value))

@classmethod
def _tag_experiment_io(cls, span, input_data=None, output_data=None):
"""Tags input/output values for experiment kind spans.
Will be mapped to span's `meta.{input,output}.values` fields.
"""
if input_data is not None:
span._set_ctx_item(EXPERIMENT_INPUT, input_data)
if output_data is not None:
span._set_ctx_item(EXPERIMENT_OUTPUT, output_data)

@staticmethod
def _tag_span_tags(span: Span, span_tags: Dict[str, Any]) -> None:
"""Tags a given LLMObs span with a dictionary of key-value tag pairs.
Expand Down
51 changes: 51 additions & 0 deletions ddtrace/llmobs/_utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
import http.client
import json
from typing import Dict
from typing import List
from typing import Optional
from typing import Union
import urllib.request
from urllib.error import HTTPError

import ddtrace
from ddtrace import Span
Expand Down Expand Up @@ -164,6 +167,7 @@ def _get_session_id(span: Span) -> Optional[str]:
def _inject_llmobs_parent_id(span_context):
"""Inject the LLMObs parent ID into the span context for reconnecting distributed LLMObs traces."""
span = ddtrace.tracer.current_span()

if span is None:
log.warning("No active span to inject LLMObs parent ID info.")
return
Expand All @@ -175,6 +179,7 @@ def _inject_llmobs_parent_id(span_context):
llmobs_parent_id = str(span.span_id)
else:
llmobs_parent_id = _get_llmobs_parent_id(span)

span_context._meta[PROPAGATED_PARENT_ID_KEY] = llmobs_parent_id or "undefined"


Expand All @@ -191,3 +196,49 @@ def safe_json(obj):
return json.dumps(obj, ensure_ascii=False, skipkeys=True, default=_unserializable_default_repr)
except Exception:
log.error("Failed to serialize object to JSON.", exc_info=True)


class HTTPResponse:
def __init__(self, resp) -> None:
if resp is None:
raise ValueError("Response object cannot be None")
self._resp = resp
self._content = None # Cache the content

@property
def status_code(self) -> int:
if hasattr(self._resp, 'status'):
return self._resp.status
elif hasattr(self._resp, 'code'):
return self._resp.code
elif hasattr(self._resp, 'getcode'):
return self._resp.getcode()
else:
raise AttributeError(f"Could not find status code in response object of type {type(self._resp)}")

def read(self) -> bytes:
if self._content is None:
self._content = self._resp.read()
return self._content

def text(self) -> str:
return self.read().decode('utf-8')

def json(self) -> dict:
return json.loads(self.text())


def http_request(
method: str, url: str, headers: Optional[Dict[str, str]] = None, body: Optional[bytes] = None
) -> HTTPResponse:
"""Make an HTTP request and return an HTTPResponse object."""
# Create the request object
req = urllib.request.Request(url, data=body, method=method)
if headers:
req.headers.update(headers)
try:
response = urllib.request.urlopen(req)
return HTTPResponse(response)
except HTTPError as e:
# Create an HTTPResponse object from the error response
return HTTPResponse(e)
8 changes: 8 additions & 0 deletions ddtrace/llmobs/_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
from ddtrace.llmobs._constants import EVP_SUBDOMAIN_HEADER_NAME
from ddtrace.llmobs._constants import EVP_SUBDOMAIN_HEADER_VALUE
from ddtrace.llmobs._utils import safe_json
from ddtrace.internal.utils.formats import asbool
import os


logger = get_logger(__name__)
Expand Down Expand Up @@ -208,12 +210,18 @@ def encode(self):
events = self._buffer
self._init_buffer()
data = {"_dd.stage": "raw", "_dd.tracer_version": ddtrace.__version__, "event_type": "span", "spans": events}
if asbool(os.getenv("DD_EXPERIMENTS_RUNNER_ENABLED")):
data["_dd.scope"] = "experiments"
try:
enc_llm_events = safe_json(data)
if isinstance(enc_llm_events, str):
enc_llm_events = enc_llm_events.encode('utf-8')
logger.debug("encode %d LLMObs span events to be sent", len(events))

except TypeError:
logger.error("failed to encode %d LLMObs span events", len(events), exc_info=True)
return None, 0
# print(enc_llm_events)
return enc_llm_events, len(events)


Expand Down
11 changes: 11 additions & 0 deletions ddtrace/llmobs/experimentation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
"""
LLM Datasets and Experiments.
"""
from ._experiments import Dataset
from ._experiments import Experiment
from ._experiments import task
from ._experiments import evaluator
from ._experiments import init


__all__ = ["Dataset", "Experiment", "task", "evaluator", "init"]
Loading
Loading