diff --git a/Gemfile b/Gemfile
new file mode 100644
index 0000000..c20eb13
--- /dev/null
+++ b/Gemfile
@@ -0,0 +1,34 @@
+source "https://rubygems.org"
+# Hello! This is where you manage which Jekyll version is used to run.
+# When you want to use a different version, change it below, save the
+# file and run `bundle install`. Run Jekyll with `bundle exec`, like so:
+#
+# bundle exec jekyll serve
+#
+# This will help ensure the proper Jekyll version is running.
+# Happy Jekylling!
+#gem "jekyll", "~> 4.4.1"
+# This is the default theme for new Jekyll sites. You may change this to anything you like.
+gem "minima", "~> 2.5"
+# If you want to use GitHub Pages, remove the "gem "jekyll"" above and
+# uncomment the line below. To upgrade, run `bundle update github-pages`.
+# gem "github-pages", group: :jekyll_plugins
+gem "github-pages", "~> 232", group: :jekyll_plugins
+# If you have any plugins, put them here!
+group :jekyll_plugins do
+ gem "jekyll-feed", "~> 0.12"
+end
+
+# Windows and JRuby does not include zoneinfo files, so bundle the tzinfo-data gem
+# and associated library.
+platforms :mingw, :x64_mingw, :mswin, :jruby do
+ gem "tzinfo", ">= 1", "< 3"
+ gem "tzinfo-data"
+end
+
+# Performance-booster for watching directories on Windows
+gem "wdm", "~> 0.1", :platforms => [:mingw, :x64_mingw, :mswin]
+
+# Lock `http_parser.rb` gem to `v0.6.x` on JRuby builds since newer versions of the gem
+# do not have a Java counterpart.
+gem "http_parser.rb", "~> 0.6.0", :platforms => [:jruby]
diff --git a/LICENSE b/LICENSE
deleted file mode 100644
index 261eeb9..0000000
--- a/LICENSE
+++ /dev/null
@@ -1,201 +0,0 @@
- Apache License
- Version 2.0, January 2004
- http://www.apache.org/licenses/
-
- TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
- 1. Definitions.
-
- "License" shall mean the terms and conditions for use, reproduction,
- and distribution as defined by Sections 1 through 9 of this document.
-
- "Licensor" shall mean the copyright owner or entity authorized by
- the copyright owner that is granting the License.
-
- "Legal Entity" shall mean the union of the acting entity and all
- other entities that control, are controlled by, or are under common
- control with that entity. For the purposes of this definition,
- "control" means (i) the power, direct or indirect, to cause the
- direction or management of such entity, whether by contract or
- otherwise, or (ii) ownership of fifty percent (50%) or more of the
- outstanding shares, or (iii) beneficial ownership of such entity.
-
- "You" (or "Your") shall mean an individual or Legal Entity
- exercising permissions granted by this License.
-
- "Source" form shall mean the preferred form for making modifications,
- including but not limited to software source code, documentation
- source, and configuration files.
-
- "Object" form shall mean any form resulting from mechanical
- transformation or translation of a Source form, including but
- not limited to compiled object code, generated documentation,
- and conversions to other media types.
-
- "Work" shall mean the work of authorship, whether in Source or
- Object form, made available under the License, as indicated by a
- copyright notice that is included in or attached to the work
- (an example is provided in the Appendix below).
-
- "Derivative Works" shall mean any work, whether in Source or Object
- form, that is based on (or derived from) the Work and for which the
- editorial revisions, annotations, elaborations, or other modifications
- represent, as a whole, an original work of authorship. For the purposes
- of this License, Derivative Works shall not include works that remain
- separable from, or merely link (or bind by name) to the interfaces of,
- the Work and Derivative Works thereof.
-
- "Contribution" shall mean any work of authorship, including
- the original version of the Work and any modifications or additions
- to that Work or Derivative Works thereof, that is intentionally
- submitted to Licensor for inclusion in the Work by the copyright owner
- or by an individual or Legal Entity authorized to submit on behalf of
- the copyright owner. For the purposes of this definition, "submitted"
- means any form of electronic, verbal, or written communication sent
- to the Licensor or its representatives, including but not limited to
- communication on electronic mailing lists, source code control systems,
- and issue tracking systems that are managed by, or on behalf of, the
- Licensor for the purpose of discussing and improving the Work, but
- excluding communication that is conspicuously marked or otherwise
- designated in writing by the copyright owner as "Not a Contribution."
-
- "Contributor" shall mean Licensor and any individual or Legal Entity
- on behalf of whom a Contribution has been received by Licensor and
- subsequently incorporated within the Work.
-
- 2. Grant of Copyright License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- copyright license to reproduce, prepare Derivative Works of,
- publicly display, publicly perform, sublicense, and distribute the
- Work and such Derivative Works in Source or Object form.
-
- 3. Grant of Patent License. Subject to the terms and conditions of
- this License, each Contributor hereby grants to You a perpetual,
- worldwide, non-exclusive, no-charge, royalty-free, irrevocable
- (except as stated in this section) patent license to make, have made,
- use, offer to sell, sell, import, and otherwise transfer the Work,
- where such license applies only to those patent claims licensable
- by such Contributor that are necessarily infringed by their
- Contribution(s) alone or by combination of their Contribution(s)
- with the Work to which such Contribution(s) was submitted. If You
- institute patent litigation against any entity (including a
- cross-claim or counterclaim in a lawsuit) alleging that the Work
- or a Contribution incorporated within the Work constitutes direct
- or contributory patent infringement, then any patent licenses
- granted to You under this License for that Work shall terminate
- as of the date such litigation is filed.
-
- 4. Redistribution. You may reproduce and distribute copies of the
- Work or Derivative Works thereof in any medium, with or without
- modifications, and in Source or Object form, provided that You
- meet the following conditions:
-
- (a) You must give any other recipients of the Work or
- Derivative Works a copy of this License; and
-
- (b) You must cause any modified files to carry prominent notices
- stating that You changed the files; and
-
- (c) You must retain, in the Source form of any Derivative Works
- that You distribute, all copyright, patent, trademark, and
- attribution notices from the Source form of the Work,
- excluding those notices that do not pertain to any part of
- the Derivative Works; and
-
- (d) If the Work includes a "NOTICE" text file as part of its
- distribution, then any Derivative Works that You distribute must
- include a readable copy of the attribution notices contained
- within such NOTICE file, excluding those notices that do not
- pertain to any part of the Derivative Works, in at least one
- of the following places: within a NOTICE text file distributed
- as part of the Derivative Works; within the Source form or
- documentation, if provided along with the Derivative Works; or,
- within a display generated by the Derivative Works, if and
- wherever such third-party notices normally appear. The contents
- of the NOTICE file are for informational purposes only and
- do not modify the License. You may add Your own attribution
- notices within Derivative Works that You distribute, alongside
- or as an addendum to the NOTICE text from the Work, provided
- that such additional attribution notices cannot be construed
- as modifying the License.
-
- You may add Your own copyright statement to Your modifications and
- may provide additional or different license terms and conditions
- for use, reproduction, or distribution of Your modifications, or
- for any such Derivative Works as a whole, provided Your use,
- reproduction, and distribution of the Work otherwise complies with
- the conditions stated in this License.
-
- 5. Submission of Contributions. Unless You explicitly state otherwise,
- any Contribution intentionally submitted for inclusion in the Work
- by You to the Licensor shall be under the terms and conditions of
- this License, without any additional terms or conditions.
- Notwithstanding the above, nothing herein shall supersede or modify
- the terms of any separate license agreement you may have executed
- with Licensor regarding such Contributions.
-
- 6. Trademarks. This License does not grant permission to use the trade
- names, trademarks, service marks, or product names of the Licensor,
- except as required for reasonable and customary use in describing the
- origin of the Work and reproducing the content of the NOTICE file.
-
- 7. Disclaimer of Warranty. Unless required by applicable law or
- agreed to in writing, Licensor provides the Work (and each
- Contributor provides its Contributions) on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
- implied, including, without limitation, any warranties or conditions
- of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
- PARTICULAR PURPOSE. You are solely responsible for determining the
- appropriateness of using or redistributing the Work and assume any
- risks associated with Your exercise of permissions under this License.
-
- 8. Limitation of Liability. In no event and under no legal theory,
- whether in tort (including negligence), contract, or otherwise,
- unless required by applicable law (such as deliberate and grossly
- negligent acts) or agreed to in writing, shall any Contributor be
- liable to You for damages, including any direct, indirect, special,
- incidental, or consequential damages of any character arising as a
- result of this License or out of the use or inability to use the
- Work (including but not limited to damages for loss of goodwill,
- work stoppage, computer failure or malfunction, or any and all
- other commercial damages or losses), even if such Contributor
- has been advised of the possibility of such damages.
-
- 9. Accepting Warranty or Additional Liability. While redistributing
- the Work or Derivative Works thereof, You may choose to offer,
- and charge a fee for, acceptance of support, warranty, indemnity,
- or other liability obligations and/or rights consistent with this
- License. However, in accepting such obligations, You may act only
- on Your own behalf and on Your sole responsibility, not on behalf
- of any other Contributor, and only if You agree to indemnify,
- defend, and hold each Contributor harmless for any liability
- incurred by, or claims asserted against, such Contributor by reason
- of your accepting any such warranty or additional liability.
-
- END OF TERMS AND CONDITIONS
-
- APPENDIX: How to apply the Apache License to your work.
-
- To apply the Apache License to your work, attach the following
- boilerplate notice, with the fields enclosed by brackets "[]"
- replaced with your own identifying information. (Don't include
- the brackets!) The text should be enclosed in the appropriate
- comment syntax for the file format. We also recommend that a
- file or class name and description of purpose be included on the
- same "printed page" as the copyright notice for easier
- identification within third-party archives.
-
- Copyright [yyyy] [name of copyright owner]
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
diff --git a/README.md b/README.md
deleted file mode 100644
index 67172e2..0000000
--- a/README.md
+++ /dev/null
@@ -1 +0,0 @@
-Monocle Doc page
diff --git a/_config.yml b/_config.yml
new file mode 100644
index 0000000..fb257d2
--- /dev/null
+++ b/_config.yml
@@ -0,0 +1,58 @@
+# Welcome to Jekyll!
+#
+# This config file is meant for settings that affect your whole blog, values
+# which you are expected to set up once and rarely edit after that. If you find
+# yourself editing this file very often, consider using Jekyll's data files
+# feature for the data you need to update frequently.
+#
+# For technical reasons, this file is *NOT* reloaded automatically when you use
+# 'bundle exec jekyll serve'. If you change this file, please restart the server process.
+#
+# If you need help with YAML syntax, here are some quick references for you:
+# https://learn-the-web.algonquindesign.ca/topics/markdown-yaml-cheat-sheet/#yaml
+# https://learnxinyminutes.com/docs/yaml/
+#
+# Site settings
+# These are used to personalize your new site. If you look in the HTML files,
+# you will see them accessed via {{ site.title }}, {{ site.email }}, and so on.
+# You can create any custom variable you would like, and they will be accessible
+# in the templates via {{ site.myvariable }}.
+
+title: Project Monocle
+logo: /assets/img/Monocle-Logo-Color.png
+description: >- # this means to ignore newlines until "baseurl:"
+ Monocle helps developers and platform engineers building or managing GenAI apps monitor these in prod by
+ making it easy to instrument their code to capture traces that are compliant with the open-source cloud-native
+ observability ecosystem.
+show_downloads: false
+#baseurl: "" # the subpath of your site, e.g. /blog
+url: "https://monocle2ai.org" # the base hostname & protocol for your site, e.g. http://example.com
+#twitter_username: jekyllrb
+#github_username: jekyll
+
+# Build settings
+#theme: minimal
+remote_theme: pages-themes/minimal@v0.2.0
+plugins:
+- jekyll-remote-theme # add this line to the plugins list if you already have one
+# - jekyll-feed
+
+# Exclude from processing.
+# The following items will not be processed, by default.
+# Any item listed under the `exclude:` key here will be automatically added to
+# the internal "default list".
+#
+# Excluded items can be processed by explicitly listing the directories or
+# their entries' file path in the `include:` list.
+#
+# exclude:
+# - .sass-cache/
+# - .jekyll-cache/
+# - gemfiles/
+# - Gemfile
+# - Gemfile.lock
+# - node_modules/
+# - vendor/bundle/
+# - vendor/cache/
+# - vendor/gems/
+# - vendor/ruby/
diff --git a/_layouts/default.html b/_layouts/default.html
new file mode 100644
index 0000000..eb88ef8
--- /dev/null
+++ b/_layouts/default.html
@@ -0,0 +1,43 @@
+
+
+
+
+
+
+
+{% seo %}
+
+
+ {% include head-custom.html %}
+
+
+
+
+
+
\ No newline at end of file
diff --git a/assets/.DS_Store b/assets/.DS_Store
new file mode 100644
index 0000000..26319bd
Binary files /dev/null and b/assets/.DS_Store differ
diff --git a/assets/img/Monocle-Logo-Color.png b/assets/img/Monocle-Logo-Color.png
new file mode 100644
index 0000000..312b35c
Binary files /dev/null and b/assets/img/Monocle-Logo-Color.png differ
diff --git a/assets/img/cncf.png b/assets/img/cncf.png
new file mode 100644
index 0000000..b2ed6dc
Binary files /dev/null and b/assets/img/cncf.png differ
diff --git a/assets/img/lfaidata-horizontal-color.png b/assets/img/lfaidata-horizontal-color.png
new file mode 100644
index 0000000..6eebeef
Binary files /dev/null and b/assets/img/lfaidata-horizontal-color.png differ
diff --git a/assets/img/lfaidata-stacked-color.png b/assets/img/lfaidata-stacked-color.png
new file mode 100644
index 0000000..2e8cb47
Binary files /dev/null and b/assets/img/lfaidata-stacked-color.png differ
diff --git a/assets/img/monocle_chatbot_aws.png b/assets/img/monocle_chatbot_aws.png
new file mode 100644
index 0000000..c519c25
Binary files /dev/null and b/assets/img/monocle_chatbot_aws.png differ
diff --git a/demos.md b/demos.md
new file mode 100644
index 0000000..bae7f44
--- /dev/null
+++ b/demos.md
@@ -0,0 +1,5 @@
+---
+layout: default
+---
+
+# Okahu demo
\ No newline at end of file
diff --git a/documentation/Extending_monocle_python.md b/documentation/Extending_monocle_python.md
new file mode 100644
index 0000000..58a0d36
--- /dev/null
+++ b/documentation/Extending_monocle_python.md
@@ -0,0 +1,260 @@
+# Custom Instrumentation with Monocle
+
+This guide demonstrates how to use Monocle to instrument OpenAI and Vector DB interactions, collecting telemetry data to analyze and monitor their performance.
+
+## Overview
+
+The example includes the following components:
+
+- **OpenAI Client (`openai_client.py`)**: A client for interacting with OpenAI's Chat API
+- **Vector Database (`vector_db.py`)**: An in-memory vector database with OpenAI embeddings
+- **Output Processors**: Configuration files that define how to extract and structure telemetry data
+- **Example script**: Shows how to instrument and run the application
+
+## Component Details
+
+### OpenAI Client
+
+`OpenAIClient` is a wrapper around the OpenAI API that provides methods for:
+
+- Making chat completion requests via the `chat()` method
+- Formatting messages for the API using `format_messages()`
+- Handling API responses and errors
+
+```python
+# Initialize client
+client = OpenAIClient()
+
+# Format messages and send to OpenAI
+messages = client.format_messages(
+ system_prompts=["You are a helpful assistant."],
+ user_prompts=["Tell me a joke about programming."]
+)
+response = client.chat(messages=messages, model="gpt-3.5-turbo")
+```
+
+### Vector Database
+
+`InMemoryVectorDB` is a simple vector database implementation that:
+
+- Converts text to vector embeddings using OpenAI's embedding API
+- Stores vectors with associated metadata
+- Performs similarity searches using cosine similarity
+
+```python
+# Initialize vector database
+vector_db = InMemoryVectorDB()
+
+# Store documents
+vector_db.store_text("doc1", "Python is a programming language", {"source": "docs"})
+
+# Search for similar documents
+results = vector_db.search_by_text("programming languages", top_k=2)
+```
+
+## Instrumenting Your Code with Monocle
+
+### 1. Define Output Processors
+
+Output processors define what data to extract from your methods. Two examples are provided:
+
+#### Inference Output Processor
+
+`output_processor_inference.py` defines how to extract data from OpenAI chat completions:
+
+```python
+INFERENCE_OUTPUT_PROCESSOR = {
+ "type": "inference",
+ "attributes": [
+ [
+ # Entity attributes for the provider
+ {
+ "attribute": "type",
+ "accessor": lambda arguments: "openai"
+ },
+ {
+ "attribute": "deployment",
+ "accessor": lambda arguments: arguments['kwargs'].get('model', 'unknown')
+ },
+ # More attributes...
+ ]
+ ],
+ "events": [
+ {
+ "name": "data.input",
+ "attributes": [
+ {
+ "attribute": "input",
+ "accessor": lambda arguments: [
+ msg["content"]
+ for msg in arguments['kwargs'].get('messages', [])
+ ] if isinstance(arguments['kwargs'].get('messages'), list) else []
+ }
+ ]
+ },
+ # More events...
+ ]
+}
+```
+
+#### Vector DB Output Processor
+
+`output_processor_vector.py` defines how to extract data from vector database operations:
+
+```python
+VECTOR_OUTPUT_PROCESSOR = {
+ "type": "retrieval",
+ "attributes": [
+ [
+ # Vector store attributes
+ {
+ "attribute": "name",
+ "accessor": lambda arguments: type(arguments["instance"]).__name__,
+ },
+ # More attributes...
+ ]
+ ],
+ "events": [
+ {
+ "name": "data.input",
+ "attributes": [
+ {
+ "attribute": "input",
+ "accessor": lambda arguments: arguments["args"][0] if arguments["args"] else None
+ }
+ ]
+ },
+ # More events...
+ ]
+}
+```
+
+### 2. Accessor Functions
+
+The key to instrumentation is the `accessor` function, which extracts data from method calls:
+
+- `arguments["instance"]`: The object instance (e.g., the OpenAIClient or InMemoryVectorDB)
+- `arguments["args"]`: Positional arguments passed to the method
+- `arguments["kwargs"]`: Keyword arguments passed to the method
+- `arguments["result"]`: The return value from the method call
+
+These give you access to all inputs, outputs, and context of the instrumented methods.
+
+### 3. Configure Instrumentation
+
+Set up Monocle's telemetry system with your output processors:
+
+```python
+from monocle_apptrace.instrumentation.common.wrapper_method import WrapperMethod
+from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
+
+setup_monocle_telemetry(
+ workflow_name="openai.app",
+ wrapper_methods=[
+ WrapperMethod(
+ package="openai_client", # Module name
+ object_name="OpenAIClient", # Class name
+ method="chat", # Method to instrument
+ span_name="openai_client.chat", # Span name in telemetry
+ output_processor=INFERENCE_OUTPUT_PROCESSOR
+ ),
+ # More method wrappers...
+ ]
+)
+```
+
+## Running the Example
+
+1. Ensure you have your **OpenAI API key** available:
+
+```bash
+export OPENAI_API_KEY=your_api_key_here
+```
+
+2. Install the required packages:
+
+```bash
+pip install -r requirements.txt
+```
+
+3. Run the example script:
+
+```bash
+python example.py
+# Or use the provided shell script
+./run_example.sh
+```
+
+## Understanding the Telemetry Output
+
+Monocle generates JSON trace files in your directory with names like:
+`monocle_trace_openai.app__.json`
+
+### Output Format
+
+The trace files contain structured telemetry data:
+
+```json
+{
+ "name": "openai_client.chat",
+ "context": { /* trace context */ },
+ "attributes": {
+ "entity.2.type": "openai",
+ "entity.2.provider_name": "OpenAI",
+ "entity.2.deployment": "gpt-3.5-turbo",
+ "entity.2.inference_endpoint": "https://api.openai.com/v1",
+ "entity.3.name": "gpt-3.5-turbo",
+ "entity.3.type": "model.llm.gpt-3.5-turbo"
+ },
+ "events": [
+ {
+ "name": "data.input",
+ "timestamp": "2025-02-27T10:36:49.985586Z",
+ "attributes": {
+ "input": [
+ "You are a helpful AI assistant.",
+ "Tell me a short joke about programming."
+ ]
+ }
+ },
+ {
+ "name": "data.output",
+ "attributes": {
+ "response": "Why do programmers prefer dark mode? Because the light attracts bugs!"
+ }
+ },
+ {
+ "name": "metadata",
+ "attributes": {
+ "prompt_tokens": 26,
+ "completion_tokens": 14,
+ "total_tokens": 40
+ }
+ }
+ ]
+}
+```
+
+### Key Elements
+
+1. **Attributes**: Contains information about the instrumented entity:
+ - Model name and type
+ - Deployment details
+ - API endpoints
+ - Provider information
+
+2. **Events**: Contains captured events during the method execution:
+ - `data.input`: The inputs provided to the method
+ - `data.output`: The response or results from the method
+ - `metadata`: Additional information like token usage
+
+## Customizing for Your Application
+
+To instrument your own code:
+
+1. Create output processors tailored to your methods
+2. Use accessor functions to extract the data you need
+3. Set up telemetry with your method wrappers
+4. Run your application and analyze the generated traces
+
+By customizing the output processors, you can collect exactly the telemetry data you need from any Python method.
diff --git a/documentation/Extending_monocle_ts.md b/documentation/Extending_monocle_ts.md
new file mode 100644
index 0000000..a0db46a
--- /dev/null
+++ b/documentation/Extending_monocle_ts.md
@@ -0,0 +1,228 @@
+# Monocle Custom Instrumentation Guide
+
+Monocle allows you to easily instrument your GenAI applications to capture telemetry for both custom code and third-party libraries. This guide explains how to instrument your code, create output processors, and analyze the resulting telemetry.
+
+## Instrumenting Custom Code
+
+Monocle allows you to instrument your own custom wrappers around GenAI services. The `setupMonocle` function is used to configure instrumentation for your application.
+
+### Basic Setup
+
+```javascript
+const { setupMonocle } = require('monocle2ai');
+
+setupMonocle(
+ "myapp.name", // Service name
+ [], // Custom hooks array (empty here)
+ [ // Instrumentation configurations array
+ {
+ "package": require.resolve('./path/to/your/module'),
+ "object": "YourClass",
+ "method": "yourMethod",
+ "spanName": "customSpanName",
+ "output_processor": [
+ YOUR_OUTPUT_PROCESSOR
+ ]
+ }
+ ]
+);
+```
+
+### Configuration Parameters
+
+- **package**: Path to the module containing the class to instrument
+- **object**: Name of the class or object to instrument
+- **method**: Method name to instrument
+- **spanName**: Name of the span created when this method is called
+- **output_processor**: Array of processors that extract and format telemetry data
+
+## Output Processors
+
+Output processors define how to extract and format telemetry data from method calls. They have access to:
+
+- **arguments**: All arguments passed to the method
+- **instance**: The object instance (this)
+- **response**: The return value from the method
+
+### Output Processor Structure
+
+```javascript
+const EXAMPLE_OUTPUT_PROCESSOR = {
+ type: "inference", // Type of span (inference, retrieval, etc.)
+ attributes: [ // Arrays of attribute definitions
+ [
+ {
+ attribute: "name",
+ accessor: arguments => arguments.instance.someProperty
+ },
+ // More attributes...
+ ]
+ ],
+ events: [ // Events to capture
+ {
+ name: "data.input",
+ attributes: [
+ {
+ attribute: "input",
+ accessor: arguments => arguments.args[0] || null
+ }
+ ]
+ },
+ // More events...
+ ]
+};
+```
+
+## Example: Instrumenting Custom OpenAI Client
+
+Here's how we instrument a custom OpenAI client:
+
+```javascript
+setupMonocle(
+ "openai.app",
+ [],
+ [
+ {
+ "package": require.resolve('./custom_ai_code/openaiClient'),
+ "object": "OpenAIClient",
+ "method": "chat",
+ "spanName": "openaiClient.chat",
+ "output_processor": [
+ INFERENCE_OUTPUT_PROCESSOR
+ ]
+ }
+ ]
+);
+```
+
+The `INFERENCE_OUTPUT_PROCESSOR` extracts information like:
+- Model name and type from function arguments
+- Input prompts from method arguments
+- Response text from the method's return value
+- Usage metadata from the response object
+
+## Example: Instrumenting Vector Database
+
+```javascript
+{
+ "package": require.resolve('./custom_ai_code/vectorDb'),
+ "object": "InMemoryVectorDB",
+ "method": "searchByText",
+ "spanName": "vectorDb.searchByText",
+ "output_processor": [
+ VECTOR_OUTPUT_PROCESSOR
+ ]
+}
+```
+
+The `VECTOR_OUTPUT_PROCESSOR` captures:
+- Vector store name and type from the instance
+- Embedding model information
+- Query inputs and search results
+
+## Instrumenting NPM Modules
+
+You can also instrument third-party NPM modules like Google's Generative AI SDK:
+
+```javascript
+{
+ "package": "@google/generative-ai",
+ "object": "GenerativeModel",
+ "method": "generateContent",
+ "spanName": "gemini.generateContent",
+ "output_processor": [
+ GEMINI_OUTPUT_PROCESSOR
+ ]
+}
+```
+
+For NPM modules, specify the package name directly instead of using `require.resolve()`.
+
+## Output Processor to Trace Correlation
+
+Let's see how output processors translate to actual traces:
+
+### Vector DB Processor & Trace
+
+The Vector DB processor extracts:
+- Vector store name: `accessor: arguments => arguments.instance.constructor.name`
+- Query text: `accessor: arguments => arguments.args[0] || null`
+- Results: `accessor: arguments => arguments.response.map(...).join(", ")`
+
+This produces the following trace data:
+```json
+{
+ "name": "vectorDb.searchByText",
+ "attributes": {
+ "span.type": "retrieval",
+ "entity.2.name": "InMemoryVectorDB",
+ "entity.2.type": "vectorstore.InMemoryVectorDB",
+ "entity.3.name": "text-embedding-ada-002",
+ "entity.3.type": "model.embedding.text-embedding-ada-002"
+ },
+ "events": [
+ {
+ "name": "data.input",
+ "attributes": { "input": "programming languages" }
+ },
+ {
+ "name": "data.output",
+ "attributes": {
+ "response": "JavaScript is a high-level programming language, Machine learning is a subset of artificial intelligence"
+ }
+ }
+ ]
+}
+```
+
+### Gemini Output Processor & Trace
+
+The Gemini output processor extracts:
+- Model name: `accessor: arguments => arguments.instance.model`
+- Input: `accessor: arguments => ...input text extraction logic...`
+- Response: `accessor: arguments => arguments.response.response.text()`
+- Usage metrics: Extracting token counts from response metadata
+
+This produces the following trace data:
+```json
+{
+ "name": "gemini.generateContent",
+ "attributes": {
+ "span.type": "inference",
+ "entity.2.type": "gemini",
+ "entity.2.provider_name": "Google",
+ "entity.2.deployment": "models/gemini-1.5-flash",
+ "entity.3.name": "models/gemini-1.5-flash",
+ "entity.3.type": "model.llm.models/gemini-1.5-flash"
+ },
+ "events": [
+ {
+ "name": "data.input",
+ "attributes": { "input": ["Tell me a short joke about programming."] }
+ },
+ {
+ "name": "data.output",
+ "attributes": { "response": "Why do programmers prefer dark mode? Because light attracts bugs!\n" }
+ },
+ {
+ "name": "metadata",
+ "attributes": {
+ "prompt_tokens": 8,
+ "completion_tokens": 14,
+ "total_tokens": 22
+ }
+ }
+ ]
+}
+```
+
+## Best Practices
+
+1. **Accessor Functions**: Write robust accessor functions that handle missing or malformed data
+2. **Attribute Organization**: Group related attributes within the same array in the `attributes` section
+3. **Events**: Use standard event names like `data.input`, `data.output`, and `metadata`
+4. **Error Handling**: Add proper error handling in accessors to avoid instrumentation failures
+
+## Conclusion
+
+Monocle's custom instrumentation provides a flexible way to track your GenAI application's behavior. By defining output processors, you can extract meaningful telemetry data from any GenAI component, whether it's your custom code or a third-party library.
\ No newline at end of file
diff --git a/documentation/Monocle_Cookbook_python.md b/documentation/Monocle_Cookbook_python.md
new file mode 100644
index 0000000..33d2a4a
--- /dev/null
+++ b/documentation/Monocle_Cookbook_python.md
@@ -0,0 +1,118 @@
+# This cookbook provides receipts of various instrumenation solution with Monocle
+
+## Generate out of box telemetry, without any code chante
+If you have a python app that run locally ie ```python my-app.py [args]``` (as opposed to hosting in a cloud serverless container like AWS Lambda or Azure Function), you can use Monocle package to enable telemetry with any code change
+```shell
+python -m monocle_apptrace my-app.py [args]
+```
+This will genearate the trace files monocle_trace_*.json in the local directory
+
+## Instrument your app to enable Monocle telemetry
+- Python
+Install monocle package or add `monocle_telemetry` in your ```requirements.txt``` file.
+```shell
+pip install monocle_telemetry
+```
+Import the package and add Monocle a single line of code to enable Monocle telemetry
+```python
+ from monocle_apptrace import setup_monocle_telemetry
+ setup_monocle_telemetry(workflow_name="your-app-name")
+```
+Now when you run the application, it will generate the trace files `monocle_trace_*.json` in the directory where the application is ran.
+
+## Combining multiple APIs under single traceID
+By default monocle instrumetation will generate traces for every chain or API call that your application. If you want to combine some traces for multiple APIs under a single traceID,
+- Use `start_trace()` and `stop_trace()` APIs
+```python
+token = start_trace()
+try:
+ embedding_api()
+ inferece_api()
+finally:
+ stop_trace(token)
+```
+- Wrapp the code under `monocle_trace`
+```python
+with monocle_trace():
+ embedding_api()
+ inferece_api()
+
+
+## Track application business logic coded in a top level application method/API
+Consider a chatbot application with a method called conversation() that implements a chat conversion thread with end user. This method in turn calls other APIs like OpenAI and Langchain to use LLMs and generate responses.
+```python
+...
+ def conversation():
+ ...
+ message = input("How can I help you:")
+ cleaned_message = response = openai.chat.completions.create(message) ==> GenAI code
+ result = rag_chat_chain.invoke(cleaned_message) ==> GenAI code
+```
+By default monocle instrumetation will generate a unique trace ID for every chain or API call that your application. This is very useful to track how your app is using the GenAI services. However, that's often not sufficient. As an app developer or owner, you might want to look at bigger picture from the logic or business context. For example, you want to look at the prompts or latency etc at the conversion level than API level. Monocle has this notion of [scopes](Monocle_User_Guide.md#scopes) which allows to you tie multiple traces/spans under a unique id so you can group it.
+- Enableing scope programatically at method level
+```python
+ with monocle_trace_scope("conversation"):
+ message = input("How can I help you:")
+ cleaned_message = response = openai.chat.completions.create(message) ==> GenAI code
+ result = rag_chat_chain.invoke(cleaned_message) ==> GenAI code
+```
+- By adding a decorator `monocle_trace_scope_method` to this `conversation()` method
+```python
+@monocle_trace_scope_method("conversation")
+def conversation():
+...
+```
+- Configuraging the method name in ```monocle_scope.json``` file that's placed in the working directory of the application
+```json
+ {
+ "package": "myapp.bot",
+ "object": "chat",
+ "method": "conversation",
+ "scope_name": "conversation"
+ }
+```
+The above code will generate two traces (one per chain invocation). All the spans in these traces will have an attribute called `conversaion` with a unique value.
+```json
+"attributes": {
+ "span.type": "inference",
+ ...
+ "scope.conversation": "0xcb80e6f772968ed50ead80657b09cf52",
+```
+
+## Build on existing application logic to capture scope
+Imagine you have a chatbot where the frontend app is running in browser and the backend gen AI code is running in a REST framework like Flask or hosted in serverless cloud service like Azure function or AWS Lambda. Let's say that the application has a notion of conversaions, a chat thread that goes between end user and chatbot. A conversation IDs is genearted in the frontend to track each conversions and for sent as a REST header to stateless backend to retrieve the right context. Monocle enables you to track this conversation ID as a scope so all the gen AI APIs called during a conversation are marked with this unique conversation ID.
+- GenAI code running in the Flask
+Monocle support Flask instrumentation out of the box. All you need to do is to add `setup_monocle_telemetry()` in your flask app and specify the http headers you want to track in the `monocle_scope.json` file
+```python
+from flask import Flask, request, jsonify
+from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
+
+web_app = Flask(__name__)
+setup_monocle_telemetry(workflow_name = "my-chatbot-webapp")
+
+def main():
+ web_app.run(host="0.0.0.0", port=8096, debug=False)
+
+@web_app.route('/chat',methods = ["POST"])
+def chat():
+ try:
+ coversation_id= request.headers["coversation-id"]
+ question = request.args["question"]
+ response = chat(question, coversation_id)
+ return response
+
+```
+Save the `monocle_scope.json` in the folder where you run the Flask application
+```json
+ {
+ "http_header": "client-id",
+ "scope_name": "conversation"
+ }
+```
+The above code will generate two traces (one per chain invocation). All the spans in these traces will have an attribute called `conversaion` with a unique value.
+```json
+"attributes": {
+ "span.type": "inference",
+ ...
+ "scope.conversation": "conversion-id: 0xcb80e6f772968ed50ead80657b09cf52",
+```
diff --git a/documentation/Monocle_User_Guide.md b/documentation/Monocle_User_Guide.md
new file mode 100644
index 0000000..c21faec
--- /dev/null
+++ b/documentation/Monocle_User_Guide.md
@@ -0,0 +1,427 @@
+# Monocle User Guide
+
+## Monocle Concepts
+
+### Span
+Span is an observation of a code/method executed. Each span has a unique ID. It records the start time and end time of the code's execution along with additional information relevant to that operation. Before the code execution starts, a span object is created in the memory of the host process executing this code. It'll capture the current time as start of time of span. At this stage the span is considered active. It'll stay active till the code execution ends. Once the code execution is complete, it'll record the current time as end time, capture any additional relevant information (eg argument, return value, environment setttings etc.). Now the span is marked as closed and it will be queued to be saved to some configured storage.
+Note that the code that generated this span could in turn call other methods that are also instrumented. Those will generate spans of their own. These will be "child" spans which will refer to the span ID of the calling code as "parent" span. An initial span which has no parent is referred as "root" span.
+
+### Trace
+A trace is a collection of spans with a common ID called traceID. When the first active span gets created, a new unique traceID is generated and assigned to that span. All the child spans generated by execution of other instrumented code/methods will share the same traceID. Once this top span ends, this trace ends. This way all the code executed as part of the top level instrumented code will have a common traceID to group them together. For example, consider following sequence where `f1()` is the first instrumented method is executed, it calls other instrumented methods `f2(),f3(),f4() and f5()`
+```
+f1()--> f2() --> f3()
+ --> f4() --> f5()
+```
+In the above sequence, all method execution will generate a span each and they all will have a common traceID. Now if a new instrumented methods is executed after f1() finishes, it will be the first active span in the process's execution context and a will get a new traceID.
+
+#### Trace ID propogation
+Each child span inherits the parent's trace ID. When spans running in same process, it captures it from process memory/context etc. But consider the above example again, where the `f4()-->f5()` code is not part of the process that executing f1(). It's a remote call, say over REST. From the overall application's point of view, the work done if `f4()` and `f5()` is part of `f1()` and you want same traceID associated with all spans. You want the instrumentation to seamlessly pass the tracedID over such remote calls and continue that instead of generating a new one. It's the responsibility of Monocle to provide such mechanism to make thsi trace ID propogation transparent to the application logic and architecture.
+
+### Types of spans in Monocle
+Monocle extends these generic span types by enriching additional attributes/data for genAI specific operations.
+#### GenAI spans
+There are the core spans that capture details of genAI component operations like call to an LLM or vectore store. The purpose of these spans is to capture the details the applications interaction with core genAI comoponents. These spans are triggered by pre-instrumented methods that handle such operations.
+- Inference span
+Represents interaction with LLMs, captures details like model, prompts, response and other metadata (eg tokens)
+- Retrieval span
+Represents interactions with vector stores like embedding creating, vector retrieval etc. Captures the model, search query, response, vector embedding etc.
+- HTTP span
+Represents communications between two different workflows where the traceID or scopes are propogated.
+
+#### Generic spans
+These are the spans that are created by a top level method that anchors a higher level of abstraction for underlying core genAI APIs. For example a langchain.invoke() which under the cover calls langchain.llm_invoke() or langchain.vector_retrieval(). Consider following psuedo code of a langchain rag pattern API,
+```
+response = rag_chain.invoke(prompt)
+ --> cleaned_prompt = llm1.chat(prompt)
+ --> context = vector_store.retrieve(cleaned_prompt)
+ --> response = llm2.chat(system_prompt+context+cleaned_prompt)
+ --> return response
+```
+If we only instrument the top level invoke call, then we'll trace the top level prompt and response interaction between application and langchain. But we'll miss the details like how a system prompt was added and send to mulitple LLMs and what context was extracted from a vector store etc. On the other hand, if we only instrument the low level calls to LLM and vector, then we'll miss the fact that those are part of same RAG. Hence we instrument all of them. This exaple would genearte an anchor spna for `invoke()` method, a retrieval span for `retrieve()` method and two inference spans for each `chat()` method. All of these will have common traceID.
+The anchor spans also provides an observation window of your application interaction with an high level SDK or service. It will illustrate facts such as how much time take by the genAI service invocation compared to other local logic.
+
+#### Workflow spans
+Workflow spans are synthetic spans that are created to trace the full trace. It captures the summary of the full trace including the time window, the process running this code (set as `workflow_name` in the API to enab le Monocle instrumentation) and runtime environment details such as hosting service (Azure function, Lambda function etc).
+The workflow spans is generated when a new trace starts or when a trace is propogated. They provide the base line observation window for the entire trace or a fragment of trace executed in a process.
+Consider following example,
+```
+setup_monocle_telemetry(workflow='bot')
+rag_chain.invoke()
+ --> context = retrieval()
+ --> new_prompt = REST --> azure.func.chat(prompt) -->
+ setup_monocle_telemetry(workflow='moderator')
+ return llm(moderator_system_prompt+prompt)
+ --> response = llm(new_prompt)
+```
+This will generate following spans:
+```
+Span{name='workflow.bot', type= workflow, traceID = xx1, spanID = yy0, parentID=None} ==> Workflow for new trace start
+Span{name='chain.invoke', type=anchor, traceID = xx1, spanID = yy1, parentID=yy0} ==> anchor span for chain invoke
+Span{name='chain.retrieval', type=retrieval, traceID = xx1, spanID = yy2, parentID = yy1} ==> Retrieval API span
+Span{name='workflow.moderator', type=workflow, traceID = xx1, spanID = zz1, parentID=yy1} ==> Workflow for propogated trace fragement
+Span{name='az.func.chat', type=anchor, traceID = xx1, spanID = zz2, parentID=zz1} ==> anchor span for az function invoke
+Span{name='chain.infer', type=inference, traceID = xx1, spanID = zz2, parentID=zz2} ==> inference
+Span{name='chain.infer',type=inference, traceID = xx1, spanID = yy3, parentID=yy1} ==> inference
+```
+
+### Metamodel
+Monocle metamodel is the way to manage standardization across all supported GenAI component stack. It includes the list of components that Monocle can identify and extract metadata. This help understanding and analyzing the traces from applications that include multiple components and can evolve over time. This is one of core value that Monocle provides to it's user community.
+
+### Exporters
+The spans genearated by Monocle need to stored for future analysis. An exporter is a mechanism Monocle provides multiple
+
+### Scopes
+While a trace is a physical/technical tracking of APIs invoked by your application, a scopes is the logical stage of your application can be tracke with Monocle. For example an OpenAI inference API invocation would map to a trace, while a series of inferense and vector store APIs to facilitate a conversion in a chatbot app is a scope. Monocle provides programatic and declarative mechanism to track scope across traces.
+
+
+## Setup Monocle
+Monocle supports tracing GenAI applications coded in Python and Typescript.
+
+
+### Instrument TypeScript GenAI code
+- Get the Monocle package
+
+```
+ npm install --save monacle2ai
+```
+- Instrument your app code
+```js
+ const { setupMonocle } = require("monacle2ai")
+ setup_monocle_telemetry(workflow_name="your-app-name")
+```
+
+### Instrument Python GenAI code
+- Get the Monocle package
+
+```
+ pip install monocle_apptrace
+```
+- Import the Monocle package
+```python
+ from monocle_apptrace import setup_monocle_telemetry
+```
+- Setup instrumentation in your ```main()``` function
+```python
+ setup_monocle_telemetry(workflow_name="your-app-name")
+```
+## Understanding Monocle traces and spans
+Monocle spans provides detail of each genAI operation executed by your application in a consistent metamodel format. Monocle trace is OpenTelemetry compatible collection of [spans](https://opentelemetry.io/docs/specs/otel/trace/api/#span) with common trace ID. Each span has a JSON structure that include a traceID, a unique span id and timestamp, as per OpenTelemtry spec. There are three types of spans that Monocle generates,
+- `inference`: When an API is called to generate model inference
+- `retrival`: When an API is called to generate embedding and communicate with vector store
+- `workflow`: A summary of for trace
+The genAI related information captured by Monocle is in the ```attributes``` and ```events``` section of this span JSON. The ```attributes``` section lists various entities that was part of the operation/API which generated this span eg Azure OpenAI as a model inference provider, gpt-4o-mini as a LLM etc. The ```events``` section includes the data and metadata from this operation, for example prompt to LLM, response from LLM and token details. Here's a [complete example](examples/monocle_trace.json) of traces generated by this [sample python application](examples/chatbot.py) instrumented with Monocle.
+### Open telemetry compatible Span headers
+These span headers are included in every span.
+```json
+ "context": {
+ "trace_id": "0x62672060b60c246e5c7bfdf46d93e2b3", ==> Trace id common to all spans of this trace
+ "span_id": "0xfbd245d1509ef554", ==> Span id, unique to this span
+ "trace_state": "[]"
+ },
+ "kind": "SpanKind.INTERNAL",
+ "parent_id": "0x34fc562203a4a926",
+ "start_time": "2025-03-12T17:05:57.256058Z", ==> timestamp of span start
+ "end_time": "2025-03-12T17:05:57.720410Z", ==> timestamp of span end
+```
+
+### Inference span
+The inference span include details of genAI components used in the inference operation.The information is devided into two sections, attributes and events. A given trace can have mulitple inference spans, one per every inference check.
+#### Attribute
+The attribute part of the span provides details of components like model and model hosting service
+```json
+"attributes": {
+ "monocle_apptrace.version": "0.3.0b6",
+ "span.type": "inference",
+ "entity.1.type": "inference.azure_openai", ==> Inference service type
+ "entity.1.deployment": "gpt-4o-mini",
+ "entity.1.inference_endpoint": "https://my-az-openai.openai.azure.com/",
+ "entity.2.name": "gpt-35-turbo", ==> ILLM
+ "entity.2.type": "model.llm.gpt-35-turbo",
+ "entity.count": 2
+}
+```
+#### Events
+```json
+"events": [
+ {
+ "name": "data.input", ==> Inputs to LLM
+ "timestamp": "2025-03-12T17:05:59.165628Z",
+ "attributes": {
+ "input": [
+ "{'system': \"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\"}",
+ "{'user': 'What is an americano?'}",
+ "[ChatMessage(role=, additional_kwargs={}, blocks=[TextBlock(block_type='text', text=\"You are an expert Q&A system that is trusted around the world.\\nAlways answer the query using the provided context information, and not prior knowledge.\\nSome rules to follow:\\n1. Never directly reference the given context in your answer.\\n2. Avoid statements like 'Based on the context, ...' or 'The context information ...' or anything along those lines.\")]), ChatMessage(role=, additional_kwargs={}, blocks=[TextBlock(block_type='text', text='Context information is below.\\n---------------------\\nfile_path: coffee.txt\\n\\nCoffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\\nA latte consists of one or more shots of espresso, served in a glass (or sometimes a cup), into which hot steamed milk is added\\nAmericano is a type of coffee drink prepared by diluting an espresso shot with hot water at a 1:3 to 1:4 ratio, resulting in a drink that retains the complex flavors of espresso, but in a lighter way\\n---------------------\\nGiven the context information and not prior knowledge, answer the query.\\nQuery: What is an americano?\\nAnswer: ')])]"
+ ]
+ }
+ },
+ {
+ "name": "data.output", ==> Responses from LLM
+ "timestamp": "2025-03-12T17:05:59.165655Z",
+ "attributes": {
+ "response": [
+ "An Americano is a type of coffee drink prepared by diluting an espresso shot with hot water at a ratio of 1:3 to 1:4. This process results in a drink that retains the complex flavors of espresso while being lighter in taste."
+ ]
+ }
+ },
+ {
+ "name": "metadata", ==> Token metadata from LLM
+ "timestamp": "2025-03-12T17:05:59.165675Z",
+ "attributes": {
+ "temperature": 0.1,
+ "completion_tokens": 52,
+ "prompt_tokens": 220,
+ "total_tokens": 272
+ }
+ }
+]
+```
+
+### Retrieval span
+The inference span include details of genAI components used in the inference operation.The information is devided into two sections, attributes and events. A given trace could have multiple retrieval spans.
+#### Attribute
+The atrributes describe embedding model and vector store used
+```json
+"attributes": {
+ "monocle_apptrace.version": "0.3.0b6",
+ "span.type": "retrieval",
+ "entity.1.name": "ChromaVectorStore", ==> Vector store
+ "entity.1.type": "vectorstore.ChromaVectorStore",
+ "entity.2.name": "text-embedding-3-large", ==> Embedding model
+ "entity.2.type": "model.embedding.text-embedding-3-large",
+ "entity.count": 2
+}
+```
+
+#### Events
+The events capture the search and retrieval of vector data
+```json
+"events": [
+ {
+ "name": "data.input", ==> prompts to search
+ "timestamp": "2025-03-12T17:05:57.720379Z",
+ "attributes": {
+ "input": "What is an americano?"
+ }
+ },
+ {
+ "name": "data.output", ==> Context retrieved
+ "timestamp": "2025-03-12T17:05:57.720398Z",
+ "attributes": {
+ "response": "Coffee is a hot drink made from the roasted and ground seeds (coffee beans) of a tropical shrub\nA la..."
+ }
+ }
+]
+```
+
+### Workflow span
+A workflow span captures summary the trace, like start and end of full trace, type of client tools etc. Note that there's only one workflow span for a given trace.
+```json
+"attributes": {
+ "monocle_apptrace.version": "0.3.0b6",
+ "span.type": "workflow",entity.1.name": "my-chatbot", ==> workflow name set in setup_monocle_telemetry()
+ "entity.1.type": "workflow.llamaindex", ==> Type of framework
+ "entity.2.type": "app_hosting.github_codespace", ==> Application hosting environment
+ "entity.2.name": "my-chatbot-container-xyz",
+ "entity.count": 2
+}
+```
+
+## Exporting traces
+Monocle exporters handle storing the trace for future analysis. By default each trace is stored as a JSON file in the directory where the app runs. You can configure the exporter by setting an environment variable MONOCLE_EXPORTER to exporter setting (listed below). ```MONOCLE_EXPOERTER=```
+ By default Monocle flushes the traces in batch of 10. Note that the traces are written to it destination asynchronously so it doesn't impact applications response. Following are the supported exporters.
+|Exporter Name| Exporter Setting|Description|Format|Trace destination|Additional configuration|
+|-|-|-|-|-|-|
+|File (default)| `file`|Export to local file system| JSON|local directory||
+|Console|`console`|Export to console|text|Console/stdout||
+|Memory|`memory`|keep in memory|string|Process memory||
+|s3|`s3`|Export to AWS S3 bucket|ND JSON|S3 bucket|Install Monocle aws package dependencies: `pip install monocle_apptrace[aws]` Env variables for s3 exporter: MONOCLE_AWS_ACCESS_KEY_ID or AWS_ACCESS_KEY_ID : AWS access key MONOCLE_AWS_SECRET_ACCESS_KEY or AWS_SECRET_ACCESS_KEY: AWS secret MONOCLE_S3_BUCKET_NAME: S3 bucket where traces will be stored MONOCLE_S3_KEY_PREFIX: ND JSON file name prefixe (default: monocle_trace_)|
+|blob|`blob`|Export to Azure blob store|ND JSON|Blob container|Install Monocle Azure package dependencies: `pip install monocle_apptrace[azure]` Env variables for blob exporter: MONOCLE_BLOB_CONNECTION_STRING: Connection string for Azure blob store MONOCLE_BLOB_CONTAINER_NAME : Blob container to store the trace ndjson files|
+|okahu|`okahu`|Export to Okahu.ai service|JSON|Okahu Tenant|Env variables for Okahu exporter: OKAHU_API_KEY : API key for Okahu tenant|
+
+## Monocle coverage
+### GenAI application frameworks
+|Framework|Python|Typescript|
+|-|-|-|
+|Langchain|✅|✅|
+|Llama Index|✅|✅|
+|HayStack|✅|Not Applicable|
+
+### Inference Services API
+|API|Python|Typescript|
+|-|-|-|
+|OpenAI|✅|✅|
+|AWS Boto|✅|✅|
+|Anthropic|✅|✅|
+
+### Inference
+|Service|Python|Typescript|
+|-|-|-|
+|OpenAI|✅|✅|
+|Azure OpenAI|✅|✅|
+|AWS SageMaker|✅|✅|
+|AWS Bedrock|✅|✅|
+|Anthropic|✅|✅|
+|NVIDIA Triton|✅|❌|
+
+### Vector stores
+|Service|Python|Typescript|
+|-|-|-|
+|Chrome|✅|✅|
+|OpenSearch|✅|✅|
+
+## Using scopes
+Imagine you have a chatbot application that supports a long conversion ie multiple question/answer back and forth between end user and bot. It uses various genAI tech components/services like LLMs and vector stores. A simple instrumentation will generate a trace per genAI API call (eg invocation of a framework chat or direct OpenAI API). As the app developer or owner, you are more interested in tracking the conversions than just APIs. The scopes in Monocle enables that use case.
+You can set the scope in application either programatically or declaratively. You can specific a value for scope or Monocle will generate a unique value (GUID) which gives you options to choose what's best suited for your use case. Please see the [Monocle python cookbook](./Monocle_Cookbook_python.md) for the details and examples.
+
+## Extending Monocle
+If you are using a genAI technology that's not yet supported by Monocle out of the box or have you own proparitory code, you can extend monocle to generate traces in the Monocle format.
+- [Extending monocle guide for python](Extending_monocle_python.md) and [example](./examples/custom/custom_instrumentation_python/)
+- [Extending monocle guide for typescript](Extending_monocle_ts.md) and [example](./examples/custom/custom_instrumentation_ts/)
+
+## Monocle API Referece
+### Python APIs
+#### [`setup_monocle_telemetry`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L153)
+```python3
+def setup_monocle_telemetry(
+ workflow_name: str,
+ span_processors: List[opentelemetry.sdk.trace.SpanProcessor] = None,
+ span_handlers: Dict[str, monocle_apptrace.instrumentation.common.span_handler.SpanHandler] = None,
+ wrapper_methods: List[Union[dict, monocle_apptrace.instrumentation.common.wrapper_method.WrapperMethod]] = None,
+ union_with_default_methods: bool = True
+) -> None
+```
+
+Set up Monocle telemetry for the application.
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|---|---|---|---|
+| workflow_name | str | The name of the workflow to be used as the service name in telemetry. | None |
+| span_processors | List[SpanProcessor] | Custom span processors to use instead of the default ones. If None, BatchSpanProcessors with Monocle exporters will be used. | ones |
+| span_handlers | Dict[str, SpanHandler] | Dictionary of span handlers to be used by the instrumentor, mapping handler names to handler objects. | None |
+| wrapper_methods | List[Union[dict, WrapperMethod]] | Custom wrapper methods for instrumentation. If None, default methods will be used. | methods |
+| monocle_exporters_list | str, optional | Comma-separated list of exporters to use. This will override the env setting MONOCLE_EXPORTERS. Supported exporters are: s3, blob, okahu, file, memory, console. This can't be combined with `span_processors`.|
+| union_with_default_methods | bool, default=True | If True, combine the provided wrapper_methods with the default methods. If False, only use the provided wrapper_methods. | methods |
+
+### [`start_trace`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L196)
+
+```python3
+def start_trace(
+
+)
+```
+
+Starts a new trace. All the spans created after this call will be part of the same trace.
+
+**Returns:**
+
+| Type | Description |
+|---|---|
+| Token | A token representing the attached context for the workflow span. This token is to be used later to stop the current trace. Returns None if tracing fails. |
+
+**Raises:**
+
+| Type | Description |
+|---|---|
+| Exception | The function catches all exceptions internally and logs a warning. |
+
+### [`stop_scope`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L209)
+
+```python3
+def stop_scope(
+ token: object
+) -> None
+```
+
+Stop the active scope. All the spans created after this will not have the scope attached.
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|---|---|---|---|
+| token | None | The token that was returned when the scope was started. | None |
+
+**Returns:**
+
+| Type | Description |
+|---|---|
+| None | None |
+
+#### [`start_scope`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L229)
+
+```python3
+def start_scope(
+ scope_name: str,
+ scope_value: str = None
+) -> object
+```
+
+Start a new scope with the given name and and optional value. If no value is provided, a random UUID will be generated.
+
+All the spans, across traces created after this call will have the scope attached until the scope is stopped.
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|---|---|---|---|
+| scope_name | None | The name of the scope. | None |
+| scope_value | None | Optional value of the scope. If None, a random UUID will be generated. | None |
+
+**Returns:**
+
+| Type | Description |
+|---|---|
+| Token | A token representing the attached context for the scope. This token is to be used later to stop the current scope. |
+
+#### [`stop_scope`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L232)
+
+```python3
+def stop_scope(
+ token: object
+) -> None
+```
+
+Stop the active scope. All the spans created after this will not have the scope attached.
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|---|---|---|---|
+| token | None | The token that was returned when the scope was started. | None |
+
+**Returns:**
+
+| Type | Description |
+|---|---|
+| None | None |
+
+#### [`monocle_trace_scope`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L244)
+
+```python3
+def monocle_trace_scope(
+ scope_name: str,
+ scope_value: str = None
+)
+```
+
+Context manager to start and stop a scope. All the spans, across traces created within the encapsulated code will have the scope attached.
+
+**Parameters:**
+
+| Name | Type | Description | Default |
+|---|---|---|---|
+| scope_name | None | The name of the scope. | None |
+| scope_value | None | Optional value of the scope. If None, a random UUID will be generated. | None |
+
+#### [`monocle_trace_http_route`](https://github.com/monocle2ai/monocle/blob/main/src/monocle_apptrace/instrumentation/common/instrumentor.py#L264)
+
+```python3
+def monocle_trace_http_route(
+ func
+)
+```
+
+Decorator to start and stop a continue traces and scope for a http route. It will also initiate new scopes from the http headers if configured in ``monocle_scopes.json``
+
+All the spans, across traces created in the route will have the scope attached.
diff --git a/documentation/Monocle_contributor_guide.md b/documentation/Monocle_contributor_guide.md
new file mode 100644
index 0000000..78a0d59
--- /dev/null
+++ b/documentation/Monocle_contributor_guide.md
@@ -0,0 +1 @@
+Coming soon ...
\ No newline at end of file
diff --git a/documentation/What-is-monocle.md b/documentation/What-is-monocle.md
new file mode 100644
index 0000000..a61657d
--- /dev/null
+++ b/documentation/What-is-monocle.md
@@ -0,0 +1,40 @@
+---
+layout: default
+---
+
+# Monocle for tracing GenAI app code
+
+Monocle is built for:
+- **app developers** to trace their app code in any environment without lots of custom code decoration
+- **platform engineers** to instrument apps in prod through wrapping instead of asking app devs to recode
+- **GenAI component providers** to add observability features to their products
+- **enterprises** to consume traces from GenAI apps in their existing open-source observability stack
+
+Benefits:
+- Monocle provides an implementation + package, not just a spec
+ - No expertise in OpenTelemetry spec required
+ - No bespoke implementation of that spec required
+ - No last-mile GenAI domain specific code required to instrument your app
+- Monocle provides consistency
+ - Connect traces across app code executions, model inference or data retrievals
+ - No cleansing of telemetry data across GenAI component providers required
+ - Works the same in personal lab dev or org cloud prod environments
+ - Send traces to location that fits your scale, budget and observability stack
+- Monocle is fully open source and community driven
+ - No vendor lock-in
+ - Implementation is transparent
+ - You can freely use or customize it to fit your needs
+
+## What Monocle provides
+
+- Easy to use code instrumentation.
+- OpenTelemetry compatible format for spans and traces.
+- Community-curated and extensible metamodel for consisent tracing of GenAI components.
+- Export of telemetry to local and cloud storage.
+
+## Get involved
+### Provide feedback
+- Submit issues and enhancements requests via Github issues
+
+### Contribute
+- Monocle is community based open source project. We welcome your contributions.
diff --git a/documentation/examples/chatbot.py b/documentation/examples/chatbot.py
new file mode 100644
index 0000000..ecf53ad
--- /dev/null
+++ b/documentation/examples/chatbot.py
@@ -0,0 +1,73 @@
+import os, sys
+import logging
+from typing import Any, Dict, List
+import chromadb
+from chromadb.errors import InvalidCollectionException
+from llama_index.vector_stores.chroma import ChromaVectorStore
+from llama_index.core import Settings
+from llama_index.core import StorageContext
+from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
+from llama_index.core import StorageContext
+from llama_index.llms.azure_openai import AzureOpenAI
+from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
+from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
+setup_monocle_telemetry(workflow_name = "my-chatbot")
+chroma_collection_name="monocle_demo"
+
+# Create vectore store and load data
+def setup_embedding(chroma_vector_store: ChromaVectorStore, embed_model):
+ documents = SimpleDirectoryReader(input_files= ["coffee.txt"]).load_data()
+
+ storage_context = StorageContext.from_defaults(vector_store=chroma_vector_store)
+ index = VectorStoreIndex.from_documents(
+ documents, storage_context=storage_context, embed_model=embed_model
+ )
+ index.storage_context.persist(persist_dir="vector_store")
+
+def get_vector_index() -> VectorStoreIndex:
+ chroma_client = chromadb.PersistentClient(path="vector_store")
+ create_embedding = False
+ embed_model = AzureOpenAIEmbedding(
+ model_name="text-embedding-3-large",
+ azure_deployment=os.environ.get("AZURE_OPENAI_EMBED_API_DEPLOYMENT"),
+ api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
+ api_version=os.environ.get("AZURE_OPENAI_API_VERSION"),
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT")
+ )
+ try:
+ chroma_collection = chroma_client.get_collection(chroma_collection_name)
+ except InvalidCollectionException:
+ chroma_collection = chroma_client.create_collection(chroma_collection_name)
+ create_embedding = True
+ # construct vector store
+ chroma_vector_store = ChromaVectorStore(
+ chroma_collection=chroma_collection,
+ )
+ if create_embedding == True:
+ setup_embedding(chroma_vector_store, embed_model)
+ return VectorStoreIndex.from_vector_store(vector_store=chroma_vector_store, embed_model=embed_model)
+
+def run(index: VectorStoreIndex):
+ az_llm = AzureOpenAI(deployment_id=os.environ.get("AZURE_OPENAI_API_DEPLOYMENT"),
+ api_key=os.environ.get("AZURE_OPENAI_API_KEY"),
+ api_version=os.environ.get("AZURE_OPENAI_API_VERSION"),
+ azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"))
+
+ query_engine = index.as_query_engine(llm=az_llm)
+
+ while True:
+ prompt = input("\nAsk a coffee question [Press return to exit]: ")
+ if prompt == "":
+ break
+ response = query_engine.query(prompt)
+ print(response)
+
+def main():
+ logger = logging.getLogger()
+ logger.setLevel(logging.ERROR)
+ index = get_vector_index()
+ run(index)
+
+if __name__ == "__main__":
+ main()
+
diff --git a/documentation/examples/custom/custom_instrumentation_python/__init__.py b/documentation/examples/custom/custom_instrumentation_python/__init__.py
new file mode 100644
index 0000000..2b52acf
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/__init__.py
@@ -0,0 +1,3 @@
+from .openai_client import OpenAIClient
+
+__all__ = ['OpenAIClient']
diff --git a/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/openai_client.py b/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/openai_client.py
new file mode 100644
index 0000000..0f164a7
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/openai_client.py
@@ -0,0 +1,91 @@
+import os
+import requests
+import json
+from typing import List, Dict, Any, Optional
+
+
+class OpenAIClient:
+ """Client for interacting with OpenAI's Chat API."""
+
+ def __init__(self, api_key: Optional[str] = None):
+ """
+ Initialize the OpenAI client.
+
+ Args:
+ api_key: OpenAI API key. If not provided, will look for OPENAI_API_KEY env variable.
+ """
+ self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
+ if not self.api_key:
+ raise ValueError("OpenAI API key is required. Either pass it explicitly or set OPENAI_API_KEY environment variable.")
+
+ self.base_url = "https://api.openai.com/v1"
+
+ def chat(self, messages: List[Dict[str, str]], model: str = "gpt-3.5-turbo",
+ temperature: float = 0.7, max_tokens: Optional[int] = None,
+ top_p: float = 1.0, frequency_penalty: float = 0.0,
+ presence_penalty: float = 0.0) -> Dict[str, Any]:
+ """
+ Call OpenAI's chat completion API.
+
+ Args:
+ messages: List of message dictionaries with 'role' and 'content' keys
+ model: OpenAI model identifier to use
+ temperature: Sampling temperature (0-2)
+ max_tokens: Maximum tokens to generate
+ top_p: Nucleus sampling parameter
+ frequency_penalty: Penalty for token frequency
+ presence_penalty: Penalty for token presence
+
+ Returns:
+ Complete API response including content and metadata
+ """
+ url = f"{self.base_url}/chat/completions"
+
+ # Prepare request payload
+ payload = {
+ "model": model,
+ "messages": messages,
+ "temperature": temperature,
+ "top_p": top_p,
+ "frequency_penalty": frequency_penalty,
+ "presence_penalty": presence_penalty
+ }
+
+ if max_tokens:
+ payload["max_tokens"] = max_tokens
+
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": f"Bearer {self.api_key}"
+ }
+
+ # Make API request
+ response = requests.post(url, headers=headers, json=payload)
+
+ if response.status_code != 200:
+ raise Exception(f"OpenAI API request failed with status {response.status_code}: {response.text}")
+
+ return response.json()
+
+ def format_messages(self, system_prompts: List[str], user_prompts: List[str]) -> List[Dict[str, str]]:
+ """
+ Format system and user prompts into the message format required by OpenAI API.
+
+ Args:
+ system_prompts: List of system prompts
+ user_prompts: List of user prompts
+
+ Returns:
+ List of formatted message dictionaries
+ """
+ messages = []
+
+ # Add system messages
+ for system_prompt in system_prompts:
+ messages.append({"role": "system", "content": system_prompt})
+
+ # Add user messages
+ for user_prompt in user_prompts:
+ messages.append({"role": "user", "content": user_prompt})
+
+ return messages
diff --git a/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/vector_db.py b/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/vector_db.py
new file mode 100644
index 0000000..7b8e301
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/custom_ai_code/vector_db.py
@@ -0,0 +1,80 @@
+import os
+import numpy as np
+from typing import List, Dict, Any
+import requests
+import json
+
+class InMemoryVectorDB:
+ def __init__(self, api_key: str = None):
+ self.vectors: Dict[str, np.ndarray] = {}
+ self.metadata: Dict[str, Dict[str, Any]] = {}
+ self.api_key = api_key or os.environ.get("OPENAI_API_KEY")
+ self.embedding_endpoint = "https://api.openai.com/v1/embeddings"
+ self.model = "text-embedding-ada-002"
+
+
+ def _get_embedding(self, text: str) -> List[float]:
+ """Get embeddings from OpenAI API using direct HTTP request"""
+ headers = {
+ "Authorization": f"Bearer {self.api_key}",
+ "Content-Type": "application/json"
+ }
+ # Add the model name as a class field
+
+ data = {
+ "input": text,
+ "model": self.model
+ }
+
+ response = requests.post(
+ self.embedding_endpoint,
+ headers=headers,
+ data=json.dumps(data)
+ )
+
+ if response.status_code != 200:
+ raise Exception(f"API request failed: {response.text}")
+
+ return response.json()['data'][0]['embedding']
+
+ def store_text(self, id: str, text: str, metadata: Dict[str, Any] = None) -> None:
+ """Store text by converting it to a vector first"""
+ vector = self._get_embedding(text)
+ self.vectors[id] = np.array(vector)
+ if metadata:
+ self.metadata[id] = metadata
+ self.metadata.setdefault(id, {})['text'] = text
+
+ def search_by_text(self, query_text: str, top_k: int = 5) -> List[Dict[str, Any]]:
+ """Search using text query"""
+ query_vector = self._get_embedding(query_text)
+ return self.search(query_vector, top_k)
+
+ def store(self, id: str, vector: List[float], metadata: Dict[str, Any] = None) -> None:
+ """Store a vector with optional metadata"""
+ self.vectors[id] = np.array(vector)
+ if metadata:
+ self.metadata[id] = metadata
+
+ def search(self, query_vector: List[float], top_k: int = 5) -> List[Dict[str, Any]]:
+ """Search for similar vectors using cosine similarity"""
+ if not self.vectors:
+ return []
+
+ query_vec = np.array(query_vector)
+ similarities = {}
+
+ for id, vec in self.vectors.items():
+ similarity = np.dot(query_vec, vec) / (np.linalg.norm(query_vec) * np.linalg.norm(vec))
+ similarities[id] = similarity
+
+ sorted_results = sorted(similarities.items(), key=lambda x: x[1], reverse=True)[:top_k]
+
+ return [
+ {
+ "id": id,
+ "similarity": score,
+ "metadata": self.metadata.get(id, {})
+ }
+ for id, score in sorted_results
+ ]
diff --git a/documentation/examples/custom/custom_instrumentation_python/example_custom.py b/documentation/examples/custom/custom_instrumentation_python/example_custom.py
new file mode 100644
index 0000000..e211258
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/example_custom.py
@@ -0,0 +1,64 @@
+from output_processor_inference import INFERENCE_OUTPUT_PROCESSOR
+from output_processor_vector import VECTOR_OUTPUT_PROCESSOR
+from monocle_apptrace.instrumentation.common.wrapper_method import WrapperMethod
+from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
+
+setup_monocle_telemetry(
+ workflow_name="openai.app",
+ wrapper_methods=[
+ WrapperMethod(
+ package="custom_ai_code.openai_client",
+ object_name="OpenAIClient",
+ method="chat",
+ span_name="openai_client.chat",
+ output_processor=INFERENCE_OUTPUT_PROCESSOR
+ ),
+ WrapperMethod(
+ package="custom_ai_code.vector_db",
+ object_name="InMemoryVectorDB",
+ method="search_by_text",
+ span_name="vector_db.search_by_text",
+ output_processor=VECTOR_OUTPUT_PROCESSOR
+ )
+ ],
+)
+
+from custom_ai_code.openai_client import OpenAIClient
+from custom_ai_code.vector_db import InMemoryVectorDB
+
+
+
+def main():
+ # Initialize clients
+ client = OpenAIClient()
+ vector_db = InMemoryVectorDB() # Replace with your API key
+
+ # Store some example texts
+ vector_db.store_text(
+ "doc1",
+ "Python is a high-level programming language",
+ {"source": "programming-docs"}
+ )
+ vector_db.store_text(
+ "doc2",
+ "Machine learning is a subset of artificial intelligence",
+ {"source": "ml-docs"}
+ )
+
+ # Search using text query
+ results = vector_db.search_by_text("programming languages", top_k=2)
+ print("\nVector search results:")
+ for result in results:
+ print(f"ID: {result['id']}, Similarity: {result['similarity']:.3f}")
+ print(f"Text: {result['metadata'].get('text', '')}")
+ print(", ".join([result['metadata'].get('text', '') for result in results]))
+ # Original OpenAI example
+ system_prompts = ["You are a helpful AI assistant."]
+ user_prompts = ["Tell me a short joke about programming."]
+ messages = client.format_messages(system_prompts, user_prompts)
+ response = client.chat(messages=messages, model="gpt-3.5-turbo", temperature=0.7)
+ print("\nOpenAI response:")
+ print(response["choices"][0]["message"]["content"])
+
+if __name__ == "__main__":
+ main()
diff --git a/documentation/examples/custom/custom_instrumentation_python/example_gemini.py b/documentation/examples/custom/custom_instrumentation_python/example_gemini.py
new file mode 100644
index 0000000..6e801d7
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/example_gemini.py
@@ -0,0 +1,49 @@
+import os
+import google.generativeai as genai
+from output_processor_gemini import GEMINI_OUTPUT_PROCESSOR
+from monocle_apptrace.instrumentation.common.wrapper_method import WrapperMethod
+from monocle_apptrace.instrumentation.common.instrumentor import setup_monocle_telemetry
+
+# Set up Monocle telemetry with instrumentation for Gemini
+setup_monocle_telemetry(
+ workflow_name="gemini.app",
+ wrapper_methods=[
+ WrapperMethod(
+ package="google.generativeai.generative_models",
+ object_name="GenerativeModel",
+ method="generate_content",
+ span_name="gemini.generate_content",
+ output_processor=GEMINI_OUTPUT_PROCESSOR
+ )
+ ],
+)
+
+def setup_gemini():
+ """Set up the Gemini API with API key."""
+ api_key = os.environ.get("GOOGLE_API_KEY")
+ if not api_key:
+ raise ValueError("Google API key is required. Please set GOOGLE_API_KEY environment variable.")
+
+ genai.configure(api_key=api_key)
+ return genai.GenerativeModel('gemini-1.5-flash')
+
+def main():
+ """Run examples using the Gemini API."""
+ try:
+ # Initialize Gemini model
+ model = setup_gemini()
+
+ # Example 1: Simple question
+ prompt = "Explain quantum computing in simple terms in 3 lines."
+ print(f"\nSending prompt to Gemini: {prompt}")
+
+ response = model.generate_content(contents=prompt)
+
+ print("\nGemini response:")
+ print(response.text)
+
+ except Exception as e:
+ print(f"Error occurred: {e}")
+
+if __name__ == "__main__":
+ main()
diff --git a/documentation/examples/custom/custom_instrumentation_python/output_processor_gemini.py b/documentation/examples/custom/custom_instrumentation_python/output_processor_gemini.py
new file mode 100644
index 0000000..87bff1c
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/output_processor_gemini.py
@@ -0,0 +1,78 @@
+GEMINI_OUTPUT_PROCESSOR = {
+ "type": "inference",
+ "attributes": [
+ [
+ {
+ "_comment": "provider type, name, deployment",
+ "attribute": "type",
+ "accessor": lambda arguments: "google",
+ },
+ {"attribute": "provider_name", "accessor": lambda arguments: "Google"},
+ {
+ "attribute": "deployment",
+ "accessor": lambda arguments: arguments["instance"]._model_name,
+ },
+ ],
+ [
+ {
+ "_comment": "LLM Model",
+ "attribute": "name",
+ "accessor": lambda arguments: arguments["instance"]._model_name,
+ },
+ {
+ "attribute": "type",
+ "_comment": "model.llm.",
+ "accessor": lambda arguments: f"model.llm.{arguments['instance']._model_name}",
+ },
+ ],
+ ],
+ "events": [
+ {
+ "name": "data.input",
+ "_comment": "input to Gemini",
+ "attributes": [
+ {
+ "attribute": "input",
+ "accessor": lambda arguments: (
+ [
+ (
+ arguments["args"][0]
+ if arguments["args"]
+ else arguments["kwargs"].get("contents", "")
+ )
+ ]
+ ),
+ }
+ ],
+ },
+ {
+ "name": "data.output",
+ "_comment": "output from Gemini",
+ "attributes": [
+ {
+ "attribute": "response",
+ "accessor": lambda arguments: [
+ (
+ arguments["result"].text
+ if hasattr(arguments["result"], "text")
+ else str(arguments["result"])
+ )
+ ],
+ }
+ ],
+ },
+ {
+ "name": "metadata",
+ "attributes": [
+ {
+ "_comment": "metadata from Gemini response",
+ "accessor": lambda arguments: {
+ "prompt_tokens": arguments["result"].usage_metadata.prompt_token_count,
+ "completion_tokens": arguments["result"].usage_metadata.candidates_token_count,
+ "total_tokens": arguments["result"].usage_metadata.total_token_count,
+ },
+ }
+ ],
+ },
+ ],
+}
diff --git a/documentation/examples/custom/custom_instrumentation_python/output_processor_inference.py b/documentation/examples/custom/custom_instrumentation_python/output_processor_inference.py
new file mode 100644
index 0000000..358f33b
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/output_processor_inference.py
@@ -0,0 +1,71 @@
+INFERENCE_OUTPUT_PROCESSOR = {
+ "type": "inference",
+ "attributes": [
+ [
+ {
+ "_comment": "provider type ,name , deployment , inference_endpoint",
+ "attribute": "type",
+ "accessor": lambda arguments: "openai"
+ },
+ {
+ "attribute": "provider_name",
+ "accessor": lambda arguments: "OpenAI"
+ },
+ {
+ "attribute": "deployment",
+ "accessor": lambda arguments: arguments['kwargs'].get('model', 'unknown')
+ },
+ {
+ "attribute": "inference_endpoint",
+ "accessor": lambda arguments: arguments['instance'].base_url
+ }
+ ],
+ [
+ {
+ "_comment": "LLM Model",
+ "attribute": "name",
+ "accessor": lambda arguments: arguments['kwargs'].get('model', 'unknown')
+ },
+ {
+ "attribute": "type",
+ "_comment": "model.llm.",
+ "accessor": lambda arguments: f"model.llm.{arguments['kwargs'].get('model', 'unknown')}"
+ }
+ ]
+ ],
+ "events": [
+ {"name": "data.input",
+ "_comment": "",
+ "attributes": [
+ {
+ "_comment": "this is input to LLM, the accessor extracts only the message contents",
+ "attribute": "input",
+ "accessor": lambda arguments: [
+ msg["content"]
+ for msg in arguments['kwargs'].get('messages', [])
+ ] if isinstance(arguments['kwargs'].get('messages'), list) else []
+ }
+ ]
+ },
+ {
+ "name": "data.output",
+ "_comment": "",
+ "attributes": [
+ {
+ "_comment": "this is output from LLM, it includes the string response which is part of a list",
+ "attribute": "response",
+ "accessor": lambda arguments: arguments['result']['choices'][0]['message']['content'] if 'result' in arguments and 'choices' in arguments['result'] else None
+ }
+ ]
+ },
+ {
+ "name": "metadata",
+ "attributes": [
+ {
+ "_comment": "this is metadata usage from LLM",
+ "accessor": lambda arguments: arguments['result']['usage'] if 'result' in arguments and 'usage' in arguments['result'] else {}
+ }
+ ]
+ }
+ ]
+}
diff --git a/documentation/examples/custom/custom_instrumentation_python/output_processor_vector.py b/documentation/examples/custom/custom_instrumentation_python/output_processor_vector.py
new file mode 100644
index 0000000..62bd741
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/output_processor_vector.py
@@ -0,0 +1,56 @@
+
+VECTOR_OUTPUT_PROCESSOR = {
+ "type": "retrieval",
+ "attributes": [
+ [
+ {
+ "_comment": "vector store name and type",
+ "attribute": "name",
+ "accessor": lambda arguments: type(arguments["instance"]).__name__,
+ },
+ {
+ "attribute": "type",
+ "accessor": lambda arguments: "vectorstore."
+ + type(arguments["instance"]).__name__,
+ },
+ {
+ "attribute": "deployment",
+ "accessor": lambda arguments: ""
+ },
+ ],
+ [
+ {
+ "_comment": "embedding model name and type",
+ "attribute": "name",
+ "accessor": lambda arguments: arguments["instance"].model
+ },
+ {
+ "attribute": "type",
+ "accessor": lambda arguments: "model.embedding." + arguments["instance"].model
+ },
+ ],
+ ],
+ "events": [
+ {
+ "name": "data.input",
+ "_comment": "query input to vector store",
+ "attributes": [
+ {
+ "attribute": "input",
+ "accessor": lambda arguments: arguments["args"][0] if arguments["args"] else None
+ }
+ ],
+ },
+ {
+ "name": "data.output",
+ "_comment": "results from vector store search",
+ "attributes": [
+ {
+ "attribute": "response",
+ "accessor": lambda arguments: ", ".join([resultItem['metadata'].get('text', '') for resultItem in arguments["result"]])
+
+ }
+ ],
+ }
+ ],
+}
diff --git a/documentation/examples/custom/custom_instrumentation_python/requirements.txt b/documentation/examples/custom/custom_instrumentation_python/requirements.txt
new file mode 100644
index 0000000..6d6a81e
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/requirements.txt
@@ -0,0 +1,4 @@
+requests>=2.28.0
+monocle-apptrace==0.3.0b4
+numpy==2.2.3
+google-generativeai==0.8.4
\ No newline at end of file
diff --git a/documentation/examples/custom/custom_instrumentation_python/run_example_custom.sh b/documentation/examples/custom/custom_instrumentation_python/run_example_custom.sh
new file mode 100755
index 0000000..86eec50
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/run_example_custom.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+
+# Check if OPENAI_API_KEY is already set
+if [ -z "$OPENAI_API_KEY" ]; then
+ echo "OPENAI_API_KEY not found in environment."
+ echo "Please enter your OpenAI API Key: "
+ read -s OPENAI_API_KEY
+ export OPENAI_API_KEY
+ echo "API Key set."
+else
+ echo "Using existing OPENAI_API_KEY from environment."
+fi
+
+# Check if Python 3 is installed
+if ! command -v python3 &> /dev/null; then
+ echo "Python 3 is required but not found. Please install Python 3 and try again."
+ exit 1
+fi
+
+# Check if virtualenv is installed, install if needed
+if ! command -v pip3 &> /dev/null; then
+ echo "pip3 not found. Please install pip and try again."
+ exit 1
+fi
+
+if ! python3 -m pip show virtualenv &> /dev/null; then
+ echo "Installing virtualenv package..."
+ python3 -m pip install virtualenv
+fi
+
+# Create virtual environment if not exists
+if [ ! -d "monocle_custom_env" ]; then
+ echo "Creating virtual environment monocle_custom_env..."
+ python3 -m virtualenv monocle_custom_env
+fi
+
+# Activate virtual environment
+echo "Activating virtual environment..."
+source monocle_custom_env/bin/activate
+
+# Check activation worked
+if [ "$VIRTUAL_ENV" == "" ]; then
+ echo "Failed to activate virtual environment."
+ exit 1
+fi
+
+# Install requirements
+if [ -f "requirements.txt" ]; then
+ echo "Installing requirements from requirements.txt..."
+ pip install -r requirements.txt
+else
+ echo "Warning: requirements.txt file not found."
+fi
+
+# Run example.py
+if [ -f "example_custom.py" ]; then
+ echo "Running example_custom.py..."
+ python example_custom.py
+else
+ echo "Error: example_custom.py file not found."
+ exit 1
+fi
+
+# Deactivate virtual environment
+deactivate
+echo "Script execution completed."
\ No newline at end of file
diff --git a/documentation/examples/custom/custom_instrumentation_python/run_example_gemini.sh b/documentation/examples/custom/custom_instrumentation_python/run_example_gemini.sh
new file mode 100755
index 0000000..1fc4acc
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_python/run_example_gemini.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+
+# Check if OPENAI_API_KEY is already set
+if [ -z "$OPENAI_API_KEY" ]; then
+ echo "OPENAI_API_KEY not found in environment."
+ echo "Please enter your OpenAI API Key: "
+ read -s OPENAI_API_KEY
+ export OPENAI_API_KEY
+ echo "API Key set."
+else
+ echo "Using existing OPENAI_API_KEY from environment."
+fi
+
+# Check if GOOGLE_API_KEY is already set
+if [ -z "$GOOGLE_API_KEY" ]; then
+ echo "GOOGLE_API_KEY not found in environment."
+ echo "Please enter your Gemini API Key: "
+ read -s GOOGLE_API_KEY
+ export GOOGLE_API_KEY
+ echo "Gemini API Key set."
+else
+ echo "Using existing GOOGLE_API_KEY from environment."
+fi
+
+# Check if Python 3 is installed
+if ! command -v python3 &> /dev/null; then
+ echo "Python 3 is required but not found. Please install Python 3 and try again."
+ exit 1
+fi
+
+# Check if virtualenv is installed, install if needed
+if ! command -v pip3 &> /dev/null; then
+ echo "pip3 not found. Please install pip and try again."
+ exit 1
+fi
+
+if ! python3 -m pip show virtualenv &> /dev/null; then
+ echo "Installing virtualenv package..."
+ python3 -m pip install virtualenv
+fi
+
+# Create virtual environment if not exists
+if [ ! -d "monocle_custom_env" ]; then
+ echo "Creating virtual environment monocle_custom_env..."
+ python3 -m virtualenv monocle_custom_env
+fi
+
+# Activate virtual environment
+echo "Activating virtual environment..."
+source monocle_custom_env/bin/activate
+
+# Check activation worked
+if [ "$VIRTUAL_ENV" == "" ]; then
+ echo "Failed to activate virtual environment."
+ exit 1
+fi
+
+# Install requirements
+if [ -f "requirements.txt" ]; then
+ echo "Installing requirements from requirements.txt..."
+ pip install -r requirements.txt
+else
+ echo "Warning: requirements.txt file not found."
+fi
+
+# Run example.py
+if [ -f "example_gemini.py" ]; then
+ echo "Running example_gemini.py..."
+ python example_gemini.py
+else
+ echo "Error: example_gemini.py file not found."
+ exit 1
+fi
+
+# Deactivate virtual environment
+deactivate
+echo "Script execution completed."
\ No newline at end of file
diff --git a/documentation/examples/custom/custom_instrumentation_ts/HOW_TO_RUN.MD b/documentation/examples/custom/custom_instrumentation_ts/HOW_TO_RUN.MD
new file mode 100644
index 0000000..e6a42e7
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_ts/HOW_TO_RUN.MD
@@ -0,0 +1,64 @@
+
+# Running the Examples
+
+This directory contains examples of how to use Monocle for custom instrumentation with different AI providers and vector databases.
+
+## OpenAI Example
+To run the OpenAI custom instrumentation example:
+
+```bash
+# Make the script executable
+chmod +x run_custom_code_example.sh
+
+# Run the script
+./run_custom_code_example.sh
+```
+
+This script will:
+1. Set up the MONOCLE_EXPORTER environment variable
+2. Prompt for your OpenAI API key if not already set
+3. Install necessary dependencies
+4. Run the OpenAI example code
+
+## Gemini Example
+To run the Gemini API instrumentation example:
+
+```bash
+# Make the script executable
+chmod +x run_gemini_sdk_code_example.sh
+
+# Run the script
+./run_gemini_sdk_code_example.sh
+```
+
+This script will:
+1. Set up the MONOCLE_EXPORTER environment variable
+2. Prompt for your OpenAI and Gemini API keys if not already set
+3. Install necessary dependencies
+4. Run the Gemini example code
+
+## What These Examples Demonstrate
+
+1. **Custom Instrumentation Setup**: How to set up Monocle's instrumentation for different AI libraries
+2. **Output Processors**: How to define custom output processors for various AI services
+3. **Vector Database Integration**: How to instrument vector database operations
+4. **Multiple Service Tracing**: How to trace operations across multiple AI services in a single application
+
+## Requirements
+
+- Node.js and npm
+- OpenAI API key (for OpenAI example)
+- Google Gemini API key (for Gemini example)
+
+## File Structure
+
+### Output Processors
+Output processors define how Monocle should process and format data from different AI services:
+
+- [`outputProcessorInference.js`](./monocle_output_processor/outputProcessorInference.js) - Processor for OpenAI API calls
+- [`outputProcessorGemini.js`](./monocle_output_processor/outputProcessorGemini.js) - Processor for Google's Gemini API calls
+- [`outputProcessorVector.js`](./monocle_output_processor/outputProcessorVector.js) - Processor for vector database operations
+
+### Example Implementations
+- [`exampleCustom.js`](./exampleCustom.js) - Example of custom instrumentation with OpenAI and a vector database
+- [`exampleGemini.js`](./exampleGemini.js) - Example of custom instrumentation with Google's Gemini API and a vector database
\ No newline at end of file
diff --git a/documentation/examples/custom/custom_instrumentation_ts/custom_ai_code/openaiClient.js b/documentation/examples/custom/custom_instrumentation_ts/custom_ai_code/openaiClient.js
new file mode 100644
index 0000000..a621bd6
--- /dev/null
+++ b/documentation/examples/custom/custom_instrumentation_ts/custom_ai_code/openaiClient.js
@@ -0,0 +1,91 @@
+const axios = require('axios');
+class OpenAIClient {
+ /**
+ * Client for interacting with OpenAI's Chat API
+ */
+
+ constructor(apiKey = null) {
+ /**
+ * Initialize the OpenAI client.
+ *
+ * @param {string} apiKey - OpenAI API key. If not provided, will look for OPENAI_API_KEY env variable.
+ */
+ this.apiKey = apiKey || process.env.OPENAI_API_KEY;
+ if (!this.apiKey) {
+ throw new Error("OpenAI API key is required. Either pass it explicitly or set OPENAI_API_KEY environment variable.");
+ }
+
+ this.baseUrl = "https://api.openai.com/v1";
+ }
+
+ async chat(messages, model = "gpt-3.5-turbo", temperature = 0.7, maxTokens = null,
+ topP = 1.0, frequencyPenalty = 0.0, presencePenalty = 0.0) {
+ /**
+ * Call OpenAI's chat completion API.
+ *
+ * @param {Array