thisisartium · tkersey · Mar 14, 2025
diff --git a/integrations/opentelemetry/.env.example b/integrations/opentelemetry/.env.example
@@ -0,0 +1,17 @@
+## This is an example of a .env file
+## This file should be renamed to .env and filled with the appropriate values
+
+## This file is used to store environment variables for the project
+## This file should not be committed to the repository
+
+## OpenAI API Key
+OPENAI_API_KEY=
+
+## OpenAI API URL Override (Optional)
+OPENAI_API_URL=
+
+## OpenTelemetry endpoin (Optional)
+OTEL_EXPORTER_OTLP_ENDPOINT=
+
+## Anthropic API Key (Optional)
+ANTHROPIC_API_KEY=
diff --git a/integrations/opentelemetry/conftest.py b/integrations/opentelemetry/conftest.py
@@ -0,0 +1,14 @@
+import sys
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+# Load environment variables from .env file
+load_dotenv()
+
+root_path = (Path(__file__)).parent
+subfolders = ["src", "tests"]
+for subfolder in subfolders:
+    directory = str((root_path / subfolder).resolve())
+    print(f"appending folder: {subfolder}, as directory: {directory}")
+    sys.path.append(directory)
diff --git a/integrations/opentelemetry/readme.md b/integrations/opentelemetry/readme.md
@@ -0,0 +1,40 @@
+# Purpose
+> To provide an example of validating LLM responses through observability. This example does **not** actually use _any_ of the CAT python library functionality. It's more of an example of how one _might_ do continuous alignment through observability. Perhaps we could call that CAO, Continuous Alignment in Observability.
+# Overview
+> This example uses [OpenLIT](https://github.com/openlit/openlit) to auto instrument calls to an LLMs api, providing **[OpenTelemetry](https://opentelemetry.io)-native** observability. [OpenTelemetry Collector](https://github.com/open-telemetry/opentelemetry-collector-contrib) is used to process the traces and attach validations on the fly to the traces, ready for downstream visualization.
+
+> This is a simple, hardcoded example to prove out the possiblity. There is a number of places for automation and further development that _might_ be quite time consuming.
+# Running the example
+## Setup (if needed)
+> Clone this repo locally
+```shell
+git clone https://github.com/thisisartium/continuous-alignment-testing
+```
+> Install dependencies
+#### Install package manager
+* install [uv](https://docs.astral.sh/uv/getting-started/installation) - Python package manager
+  * `brew install uv`
+#### Install dependencies
+```shell
+uv pip install openlit
+uv sync
+```
+#### Setup environment
+> populate your new `.env` file with required values
+```shell
+cp .env.example .env
+```
+
+> Setup environment
+## Running OpenTelemetry Collector
+> Run the following command
+```shell
+docker run -p 4317:4317 -p 4318:4318 -v $(pwd)/integrations/opentelemetry/src/config.yaml:/etc/otelcol/config.yaml otel/opentelemetry-collector-contrib:latest --config /etc/otelcol/config.yaml
+```
+## Executing LLM calls using a test
+> Run one of the tests found in `/integrations/opentelemetry/tests/test_responses_available_in_opentelemetry.py
+## See the results
+> Look at the logs from the OpenTelemetry Collector. At the end you'll see a line similar to the following.
+```shell
+validations: Map({"correct_developer_suggested":true,"no_developer_name_is_hallucinated":true,"not_empty_response":true})
+```
diff --git a/integrations/opentelemetry/src/config.yaml b/integrations/opentelemetry/src/config.yaml
@@ -0,0 +1,91 @@
+receivers:
+  otlp:
+    protocols:
+      grpc:
+        endpoint: "0.0.0.0:4317"
+      http:
+        endpoint: "0.0.0.0:4318"
+
+processors:
+  transform:
+    error_mode: ignore
+    trace_statements:
+      - context: spanevent
+        statements:
+          # useful variables
+          - set(cache["response"], ParseJSON(attributes["gen_ai.completion"]))
+          - set(cache["developers"], cache["response"]["developers"])
+          - set(cache["count"], Len(cache["developers"]))
+
+          # validation variables
+          - set(cache["not_empty_response"], false)
+          - set(cache["correct_developer_suggested"], false)
+          - set(cache["no_developer_name_is_hallucinated"], false)
+
+          # not empty response
+          - set(cache["not_empty_response"], true) where cache["count"] > 0
+
+          # validate developers
+          - set(cache["0_name"], cache["developers"][0]["name"]) where cache["count"] > 0
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["0_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["0_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["0_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["0_name"] != nil
+
+          - set(cache["1_name"], cache["developers"][1]["name"]) where cache["count"] > 1
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["1_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["1_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["1_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["1_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["2_name"], cache["developers"][2]["name"]) where cache["count"] > 2
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["2_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["2_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["2_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["2_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["3_name"], cache["developers"][3]["name"]) where cache["count"] > 3
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["3_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["3_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["3_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["3_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["4_name"], cache["developers"][4]["name"]) where cache["count"] > 4
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["4_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["4_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["4_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["4_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["5_name"], cache["developers"][5]["name"]) where cache["count"] > 5
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["5_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["5_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["5_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["5_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["6_name"], cache["developers"][6]["name"]) where cache["count"] > 6
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["6_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["6_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["6_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["6_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["7_name"], cache["developers"][7]["name"]) where cache["count"] > 7
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["7_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["7_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["7_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["7_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["8_name"], cache["developers"][8]["name"]) where cache["count"] > 8
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["8_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["8_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["8_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["8_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          - set(cache["9_name"], cache["developers"][9]["name"]) where cache["count"] > 9
+          - set(cache["correct_developer_suggested"], true) where IsMatch(cache["9_name"], "Sam\\sThomas|Drew\\sAnderson|Alex\\sWilson|Alex\\sJohnson") and cache["9_name"] != nil
+          - set(cache["no_developer_name_is_hallucinated"], true) where IsMatch(cache["9_name"], "Alex\\sAnderson|Alex\\sJohnson|Alex\\sWilson|Blake\\sDavis|Blake\\sJohnson|Blake\\sWilson|Casey\\sMoore|Casey\\sThomas|Casey\\sWilson|Drew\\sAnderson|Jamie\\sJohnson|Jamie\\sMiller|Jamie\\sMoore|Morgan\\sBrown|Sam\\sJohnson|Sam\\sMiller|Sam\\sThomas|Sam\\sWilson|Taylor\\sAnderson|Taylor\\sBrown|Taylor\\sJohnson|Taylor\\sWilson") and cache["9_name"] != nil and cache["no_developer_name_is_hallucinated"] == true
+
+          # save results
+          - set(attributes["validations"]["correct_developer_suggested"], cache["correct_developer_suggested"])
+          - set(attributes["validations"]["no_developer_name_is_hallucinated"], cache["no_developer_name_is_hallucinated"])
+          - set(attributes["validations"]["not_empty_response"], cache["not_empty_response"])
+
+          # cleanup
+          - set(cache, {})
+
+exporters:
+  debug:
+    verbosity: detailed
+
+service:
+  telemetry:
+    logs:
+      level: debug
+
+  pipelines:
+    traces:
+      receivers: [otlp]
+      processors: [transform]
+      exporters: [debug]
+
diff --git a/...ry/tests/example_1_opentelemetry_integration/test_responses_available_in_opentelemetry.py b/...ry/tests/example_1_opentelemetry_integration/test_responses_available_in_opentelemetry.py
@@ -0,0 +1,93 @@
+import json
+
+import anthropic
+import openlit
+from helpers import load_json_fixture
+from openai import OpenAI
+
+openlit.init()
+
+
+def test_anthropic_to_opentelemetry():
+    client = anthropic.Anthropic()
+    assert client is not None
+
+    responses = (
+        client.messages.create(
+            max_tokens=8192,
+            model="claude-3-7-sonnet-20250219",
+            system=system_prompt(),
+            messages=[
+                {"role": "user", "content": user_prompt()},
+            ],
+        )
+        .content[0]
+        .text
+    )
+
+    not_empty_response = True
+
+    try:
+        json_object = json.loads(responses)
+        print(json_object)
+        developer_names = {developer["name"] for developer in json_object["developers"]}
+        not_empty_response = len(developer_names) != 0
+    except json.JSONDecodeError as e:
+        print(f"JSON Exception: {e}")
+
+    assert not_empty_response
+
+
+def test_openai_to_opentelemetry():
+    client = OpenAI()
+    assert client is not None
+
+    responses = (
+        client.chat.completions.create(
+            model="gpt-4-1106-preview",
+            messages=[
+                {"role": "system", "content": system_prompt()},
+                {"role": "user", "content": user_prompt()},
+            ],
+            response_format={"type": "json_object"},
+        )
+        .choices[0]
+        .message.content
+    )
+
+    not_empty_response = True
+
+    try:
+        json_object = json.loads(responses)
+        developer_names = {developer["name"] for developer in json_object["developers"]}
+        not_empty_response = len(developer_names) != 0
+    except json.JSONDecodeError as e:
+        print(f"JSON Exception: {e}")
+
+    assert not_empty_response
+
+
+def system_prompt():
+    skills_data = load_json_fixture("skills.json")
+    example_output = load_json_fixture("example_output.json")
+
+    system_prompt = f"""
+        You will get a description of a project, and your task is
+        to tell me the best developers from the given list for the project based on their skills.
+        Today's date is April 15th, 2025.
+        Pick only developers who are available after the project start date.
+        Pick people with higher skill levels first.
+        Respond in json with this structure:
+            {example_output}
+
+        Here is the skills data:
+        """
+
+    return system_prompt + str(skills_data)
+
+
+def user_prompt():
+    return """
+    This is a mobile project for telecommunication company. The project starts June 3rd.
+    It will find exciting moments from sports highlights videos.
+    """
diff --git a/integrations/opentelemetry/tests/fixtures/example_output.json b/integrations/opentelemetry/tests/fixtures/example_output.json
@@ -0,0 +1,24 @@
+{
+  "developers": [
+    {
+      "name": "Bob",
+      "availableStartDate": "2025-05-19T00:00:00Z",
+      "relevantSkills": [
+        {
+          "skill": "Javascript",
+          "level": "3"
+        }
+      ]
+    },
+    {
+      "name": "Alice",
+      "availableStartDate": "2025-05-19T00:00:00Z",
+      "relevantSkills": [
+        {
+          "skill": "Python",
+          "level": "4"
+        }
+      ]
+    }
+  ]
+}
diff --git a/integrations/opentelemetry/tests/fixtures/output_schema.json b/integrations/opentelemetry/tests/fixtures/output_schema.json
@@ -0,0 +1,50 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "type": "object",
+  "properties": {
+    "developers": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "name": {
+            "type": "string"
+          },
+          "availableStartDate": {
+            "type": "string",
+            "format": "date-time"
+          },
+          "relevantSkills": {
+            "type": "array",
+            "items": {
+              "type": "object",
+              "properties": {
+                "skill": {
+                  "type": "string"
+                },
+                "level": {
+                  "type": "string"
+                }
+              },
+              "required": [
+                "skill",
+                "level"
+              ],
+              "additionalProperties": false
+            }
+          }
+        },
+        "required": [
+          "name",
+          "availableStartDate",
+          "relevantSkills"
+        ],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": [
+    "developers"
+  ],
+  "additionalProperties": false
+}