Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions integrations/opentelemetry/.env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
## This is an example of a .env file
## This file should be renamed to .env and filled with the appropriate values

## This file is used to store environment variables for the project
## This file should not be committed to the repository

## OpenAI API Key
OPENAI_API_KEY=

## OpenAI API URL Override (Optional)
OPENAI_API_URL=

## OpenTelemetry endpoin (Optional)
OTEL_EXPORTER_OTLP_ENDPOINT=

## Anthropic API Key (Optional)
ANTHROPIC_API_KEY=
14 changes: 14 additions & 0 deletions integrations/opentelemetry/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
import sys
from pathlib import Path

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

root_path = (Path(__file__)).parent
subfolders = ["src", "tests"]
for subfolder in subfolders:
directory = str((root_path / subfolder).resolve())
print(f"appending folder: {subfolder}, as directory: {directory}")
sys.path.append(directory)
40 changes: 40 additions & 0 deletions integrations/opentelemetry/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Purpose
> To provide an example of validating LLM responses through observability. This example does **not** actually use _any_ of the CAT python library functionality. It's more of an example of how one _might_ do continuous alignment through observability. Perhaps we could call that CAO, Continuous Alignment in Observability.
# Overview
> This example uses [OpenLIT](https://github.com/openlit/openlit) to auto instrument calls to an LLMs api, providing **[OpenTelemetry](https://opentelemetry.io)-native** observability. [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib) is used to process the traces and attach validations on the fly to the traces, ready for downstream visualization.

> This is a simple, hardcoded example to prove out the possiblity. There is a number of places for automation and further development that _might_ be quite time consuming.
# Running the example
## Setup (if needed)
> Clone this repo locally
```shell
git clone https://github.com/thisisartium/continuous-alignment-testing
```
> Install dependencies
#### Install package manager
* install [uv](https://docs.astral.sh/uv/getting-started/installation) - Python package manager
* `brew install uv`
#### Install dependencies
```shell
uv pip install openlit
uv sync
```
#### Setup environment
> populate your new `.env` file with required values
```shell
cp .env.example .env
```

> Setup environment
## Running OpenTelemetry Collector
> Run the following command
```shell
docker run -p 4317:4317 -p 4318:4318 -v $(pwd)/integrations/opentelemetry/src/config.yaml:/etc/otelcol/config.yaml otel/opentelemetry-collector-contrib:latest --config /etc/otelcol/config.yaml
```
## Executing LLM calls using a test
> Run one of the tests found in `/integrations/opentelemetry/tests/test_responses_available_in_opentelemetry.py
## See the results
> Look at the logs from the OpenTelemetry Collector. At the end you'll see a line similar to the following.
```shell
validations: Map({"correct_developer_suggested":true,"no_developer_name_is_hallucinated":true,"not_empty_response":true})
```
91 changes: 91 additions & 0 deletions integrations/opentelemetry/src/config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
receivers:
otlp:
protocols:
grpc:
endpoint: "0.0.0.0:4317"
http:
endpoint: "0.0.0.0:4318"

processors:
transform:
error_mode: ignore
trace_statements:
- context: spanevent
statements:
# useful variables
- set(cache["response"], ParseJSON(attributes["gen_ai.completion"]))
- set(cache["developers"], cache["response"]["developers"])
- set(cache["count"], Len(cache["developers"]))

# validation variables
- set(cache["not_empty_response"], false)
- set(cache["correct_developer_suggested"], false)
- set(cache["no_developer_name_is_hallucinated"], false)

# not empty response
- set(cache["not_empty_response"], true) where cache["count"] > 0

# validate developers
- set(cache["0_name"], cache["developers"][0]["name"]) where cache["count"] > 0
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["0_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["0_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["0_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["0_name"] != nil

- set(cache["1_name"], cache["developers"][1]["name"]) where cache["count"] > 1
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["1_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["1_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["1_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["1_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["2_name"], cache["developers"][2]["name"]) where cache["count"] > 2
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["2_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["2_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["2_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["2_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["3_name"], cache["developers"][3]["name"]) where cache["count"] > 3
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["3_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["3_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["3_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["3_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["4_name"], cache["developers"][4]["name"]) where cache["count"] > 4
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["4_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["4_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["4_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["4_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["5_name"], cache["developers"][5]["name"]) where cache["count"] > 5
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["5_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["5_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["5_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["5_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["6_name"], cache["developers"][6]["name"]) where cache["count"] > 6
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["6_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["6_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["6_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["6_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["7_name"], cache["developers"][7]["name"]) where cache["count"] > 7
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["7_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["7_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["7_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["7_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["8_name"], cache["developers"][8]["name"]) where cache["count"] > 8
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["8_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["8_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["8_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["8_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

- set(cache["9_name"], cache["developers"][9]["name"]) where cache["count"] > 9
- set(cache["correct_developer_suggested"], true) where IsMatch(cache["9_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["9_name"] != nil
- set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["9_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["9_name"] != nil and cache["no_developer_name_is_hallucinated"] == true

# save results
- set(attributes["validations"]["correct_developer_suggested"], cache["correct_developer_suggested"])
- set(attributes["validations"]["no_developer_name_is_hallucinated"], cache["no_developer_name_is_hallucinated"])
- set(attributes["validations"]["not_empty_response"], cache["not_empty_response"])

# cleanup
- set(cache, {})

exporters:
debug:
verbosity: detailed

service:
telemetry:
logs:
level: debug

pipelines:
traces:
receivers: [otlp]
processors: [transform]
exporters: [debug]

Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
import json

import anthropic
import openlit
from helpers import load_json_fixture
from openai import OpenAI

openlit.init()


def test_anthropic_to_opentelemetry():
client = anthropic.Anthropic()
assert client is not None

responses = (
client.messages.create(
max_tokens=8192,
model="claude-3-7-sonnet-20250219",
system=system_prompt(),
messages=[
{"role": "user", "content": user_prompt()},
],
)
.content[0]
.text
)

not_empty_response = True

try:
json_object = json.loads(responses)
print(json_object)
developer_names = {developer["name"] for developer in json_object["developers"]}
not_empty_response = len(developer_names) != 0
except json.JSONDecodeError as e:
print(f"JSON Exception: {e}")

assert not_empty_response


def test_openai_to_opentelemetry():
client = OpenAI()
assert client is not None

responses = (
client.chat.completions.create(
model="gpt-4-1106-preview",
messages=[
{"role": "system", "content": system_prompt()},
{"role": "user", "content": user_prompt()},
],
response_format={"type": "json_object"},
)
.choices[0]
.message.content
)

not_empty_response = True

try:
json_object = json.loads(responses)
developer_names = {developer["name"] for developer in json_object["developers"]}
not_empty_response = len(developer_names) != 0
except json.JSONDecodeError as e:
print(f"JSON Exception: {e}")

assert not_empty_response


def system_prompt():
skills_data = load_json_fixture("skills.json")
example_output = load_json_fixture("example_output.json")

system_prompt = f"""
You will get a description of a project, and your task is
to tell me the best developers from the given list for the project based on their skills.
Today's date is April 15th, 2025.
Pick only developers who are available after the project start date.
Pick people with higher skill levels first.
Respond in json with this structure:
{example_output}

Here is the skills data:
"""

return system_prompt + str(skills_data)


def user_prompt():
return """
This is a mobile project for telecommunication company. The project starts June 3rd.
It will find exciting moments from sports highlights videos.
"""
24 changes: 24 additions & 0 deletions integrations/opentelemetry/tests/fixtures/example_output.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{
"developers": [
{
"name": "Bob",
"availableStartDate": "2025-05-19T00:00:00Z",
"relevantSkills": [
{
"skill": "Javascript",
"level": "3"
}
]
},
{
"name": "Alice",
"availableStartDate": "2025-05-19T00:00:00Z",
"relevantSkills": [
{
"skill": "Python",
"level": "4"
}
]
}
]
}
50 changes: 50 additions & 0 deletions integrations/opentelemetry/tests/fixtures/output_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"type": "object",
"properties": {
"developers": {
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {
"type": "string"
},
"availableStartDate": {
"type": "string",
"format": "date-time"
},
"relevantSkills": {
"type": "array",
"items": {
"type": "object",
"properties": {
"skill": {
"type": "string"
},
"level": {
"type": "string"
}
},
"required": [
"skill",
"level"
],
"additionalProperties": false
}
}
},
"required": [
"name",
"availableStartDate",
"relevantSkills"
],
"additionalProperties": false
}
}
},
"required": [
"developers"
],
"additionalProperties": false
}
Loading