snowsecure · snowsecure · Jan 19, 2026
diff --git a/.env.example b/.env.example
@@ -25,6 +25,15 @@ AZURE_OPENAI_API_KEY=your_azure_api_key_here
 AZURE_OPENAI_DEPLOYMENT=gpt-4o-mini
 AZURE_OPENAI_API_VERSION=2024-10-21
 
+# LangSmith Tracing (optional)
+LANGSMITH_API_KEY=your_langsmith_api_key_here
+LANGSMITH_PROJECT=title-abstractor-enterprise
+LANGSMITH_ENDPOINT=https://api.smith.langchain.com
+LANGSMITH_TRACING=false
+LANGSMITH_SAMPLE_RATE=1.0
+LANGSMITH_LOG_PROMPTS=false
+LANGSMITH_LOG_RESPONSES=false
+
 # File Storage
 UPLOAD_DIR=./uploads
 MAX_UPLOAD_SIZE=104857600
@@ -34,3 +43,9 @@ CORS_ORIGINS=http://localhost:3000,http://localhost:8000
 
 # JWT (for future auth - generate with: openssl rand -hex 32)
 SECRET_KEY=your_secret_key_here
+
+# System of Record Webhook (optional)
+SYSTEM_OF_RECORD_ENABLED=false
+SYSTEM_OF_RECORD_WEBHOOK_URL=https://your-system-of-record.example.com/webhooks/abstracts
+SYSTEM_OF_RECORD_WEBHOOK_TOKEN=your_system_of_record_token
+SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT=10
diff --git a/backend/README.md b/backend/README.md
@@ -142,6 +142,30 @@ Edit the `.env` files in both the project root and backend directory to set your
 
 **IMPORTANT**: The system uses `gemini-2.5-pro` model (Gemini 2.5 Pro) which requires billing enabled but provides high RPM quota.
 
+#### Optional: LangSmith Tracing
+
+Set the following environment variables to enable LangSmith tracing for LLM calls:
+
+```
+LANGSMITH_API_KEY=your_langsmith_api_key
+LANGSMITH_PROJECT=title-abstractor-enterprise
+LANGSMITH_TRACING=true
+LANGSMITH_SAMPLE_RATE=1.0
+LANGSMITH_LOG_PROMPTS=false
+LANGSMITH_LOG_RESPONSES=false
+```
+
+#### Optional: System of Record Webhook
+
+Enable the webhook to notify downstream systems (e.g., Stewart system-of-record) when abstracts complete:
+
+```
+SYSTEM_OF_RECORD_ENABLED=true
+SYSTEM_OF_RECORD_WEBHOOK_URL=https://your-system-of-record.example.com/webhooks/abstracts
+SYSTEM_OF_RECORD_WEBHOOK_TOKEN=your_webhook_token
+SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT=10
+```
+
 ### Starting the Services
 
 **RECOMMENDED**: Use the provided startup scripts which ensure the correct Gemini model is used:

diff --git a/backend/app/core/azure_openai_client.py b/backend/app/core/azure_openai_client.py
@@ -6,6 +6,7 @@
 import os
 from io import BytesIO
 from pdf2image import convert_from_path
+from app.core.langsmith import build_llm_inputs, build_llm_outputs, start_langsmith_run
 
 
 class APITimeoutError(Exception):
@@ -159,6 +160,14 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
         """
         print(f"  Processing with {len(images)} images via Azure OpenAI (timeout: {timeout}s)...")
 
+        run = start_langsmith_run(
+            name="AzureOpenAI.chat.completions",
+            run_type="llm",
+            inputs=build_llm_inputs(prompt, "images", {"deployment": self.deployment, "image_count": len(images)}),
+            metadata={"provider": "azure-openai", "timeout": timeout, "api_version": self.api_version},
+            tags=["azure-openai", "images"]
+        )
+
         try:
             # Build content with prompt + all images
             content_parts = [
@@ -210,20 +219,35 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
                 input_tokens = response.usage.prompt_tokens or 0
                 output_tokens = response.usage.completion_tokens or 0
 
+            if run:
+                run.end(outputs=build_llm_outputs(
+                    response_text,
+                    {
+                        "input_tokens": input_tokens,
+                        "output_tokens": output_tokens,
+                        "finish_reason": response.choices[0].finish_reason
+                    }
+                ))
             return {
                 'text': response_text,
                 'input_tokens': input_tokens,
                 'output_tokens': output_tokens
             }
         except asyncio.TimeoutError:
+            if run:
+                run.end(error=f"Timeout after {timeout}s")
             raise APITimeoutError(f"Azure OpenAI API call exceeded {timeout}s timeout")
         except Exception as e:
             # Check if this is a rate limit error
             if self._is_rate_limit_error(e):
                 retry_after = self._extract_retry_after(e)
                 print(f"⚠️  Rate limit hit! Suggested retry after {retry_after}s")
+                if run:
+                    run.end(error=f"Rate limit: {str(e)}")
                 raise RateLimitError(f"Azure OpenAI API rate limit exceeded: {str(e)}", retry_after=retry_after)
             # Re-raise other exceptions
+            if run:
+                run.end(error=str(e))
             raise
 
     async def process_text(self, prompt: str, temperature: float = 0, timeout: int = 120) -> Dict:
@@ -241,6 +265,14 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
         """
         print(f"  Processing text-only prompt via Azure OpenAI (timeout: {timeout}s)...")
 
+        run = start_langsmith_run(
+            name="AzureOpenAI.chat.completions",
+            run_type="llm",
+            inputs=build_llm_inputs(prompt, "text", {"deployment": self.deployment}),
+            metadata={"provider": "azure-openai", "timeout": timeout, "temperature": temperature},
+            tags=["azure-openai", "text"]
+        )
+
         try:
             messages = [
                 {
@@ -270,20 +302,35 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
                 input_tokens = response.usage.prompt_tokens or 0
                 output_tokens = response.usage.completion_tokens or 0
 
+            if run:
+                run.end(outputs=build_llm_outputs(
+                    response_text,
+                    {
+                        "input_tokens": input_tokens,
+                        "output_tokens": output_tokens,
+                        "finish_reason": response.choices[0].finish_reason
+                    }
+                ))
             return {
                 'text': response_text,
                 'input_tokens': input_tokens,
                 'output_tokens': output_tokens
             }
         except asyncio.TimeoutError:
+            if run:
+                run.end(error=f"Timeout after {timeout}s")
             raise APITimeoutError(f"Azure OpenAI API call exceeded {timeout}s timeout")
         except Exception as e:
             # Check if this is a rate limit error
             if self._is_rate_limit_error(e):
                 retry_after = self._extract_retry_after(e)
                 print(f"⚠️  Rate limit hit! Suggested retry after {retry_after}s")
+                if run:
+                    run.end(error=f"Rate limit: {str(e)}")
                 raise RateLimitError(f"Azure OpenAI API rate limit exceeded: {str(e)}", retry_after=retry_after)
             # Re-raise other exceptions
+            if run:
+                run.end(error=str(e))
             raise
 
     def estimate_cost(self, num_pages: int) -> float:

diff --git a/backend/app/core/claude_client.py b/backend/app/core/claude_client.py
@@ -2,6 +2,7 @@
 from typing import Dict, Union, List
 import base64
 import asyncio
+from app.core.langsmith import build_llm_inputs, build_llm_outputs, start_langsmith_run
 
 
 class APITimeoutError(Exception):
@@ -85,6 +86,14 @@ async def _process_with_base64_pdf(self, base64_data: str, prompt: str, timeout:
         """
         print(f"  Processing with base64 PDF via Claude (timeout: {timeout}s)...")
 
+        run = start_langsmith_run(
+            name="Claude.messages.create",
+            run_type="llm",
+            inputs=build_llm_inputs(prompt, "base64_pdf", {"model": self.model_name}),
+            metadata={"provider": "anthropic", "timeout": timeout},
+            tags=["claude", "pdf"]
+        )
+
         try:
             # Claude expects PDF documents in specific format
             message = await asyncio.wait_for(
@@ -127,20 +136,35 @@ async def _process_with_base64_pdf(self, base64_data: str, prompt: str, timeout:
                 input_tokens = message.usage.input_tokens or 0
                 output_tokens = message.usage.output_tokens or 0
 
+            if run:
+                run.end(outputs=build_llm_outputs(
+                    response_text,
+                    {
+                        "input_tokens": input_tokens,
+                        "output_tokens": output_tokens,
+                        "finish_reason": message.stop_reason
+                    }
+                ))
             return {
                 'text': response_text,
                 'input_tokens': input_tokens,
                 'output_tokens': output_tokens
             }
         except asyncio.TimeoutError:
+            if run:
+                run.end(error=f"Timeout after {timeout}s")
             raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
         except Exception as e:
             # Check if this is a rate limit error
             if self._is_rate_limit_error(e):
                 retry_after = self._extract_retry_after(e)
                 print(f"⚠️  Rate limit hit! Suggested retry after {retry_after}s")
+                if run:
+                    run.end(error=f"Rate limit: {str(e)}")
                 raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
             # Re-raise other exceptions
+            if run:
+                run.end(error=str(e))
             raise
 
     async def _process_with_images(self, images: List, prompt: str, timeout: int = 180) -> str:
@@ -154,6 +178,14 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
         """
         print(f"  Processing with {len(images)} images via Claude (timeout: {timeout}s)...")
 
+        run = start_langsmith_run(
+            name="Claude.messages.create",
+            run_type="llm",
+            inputs=build_llm_inputs(prompt, "images", {"model": self.model_name, "image_count": len(images)}),
+            metadata={"provider": "anthropic", "timeout": timeout},
+            tags=["claude", "images"]
+        )
+
         try:
             # Build content with prompt + all images
             content_parts = []
@@ -208,20 +240,35 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
                 input_tokens = message.usage.input_tokens or 0
                 output_tokens = message.usage.output_tokens or 0
 
+            if run:
+                run.end(outputs=build_llm_outputs(
+                    response_text,
+                    {
+                        "input_tokens": input_tokens,
+                        "output_tokens": output_tokens,
+                        "finish_reason": message.stop_reason
+                    }
+                ))
             return {
                 'text': response_text,
                 'input_tokens': input_tokens,
                 'output_tokens': output_tokens
             }
         except asyncio.TimeoutError:
+            if run:
+                run.end(error=f"Timeout after {timeout}s")
             raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
         except Exception as e:
             # Check if this is a rate limit error
             if self._is_rate_limit_error(e):
                 retry_after = self._extract_retry_after(e)
                 print(f"⚠️  Rate limit hit! Suggested retry after {retry_after}s")
+                if run:
+                    run.end(error=f"Rate limit: {str(e)}")
                 raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
             # Re-raise other exceptions
+            if run:
+                run.end(error=str(e))
             raise
 
     async def process_text(self, prompt: str, temperature: float = 0, timeout: int = 120) -> str:
@@ -239,6 +286,14 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
         """
         print(f"  Processing text-only prompt via Claude (timeout: {timeout}s)...")
 
+        run = start_langsmith_run(
+            name="Claude.messages.create",
+            run_type="llm",
+            inputs=build_llm_inputs(prompt, "text", {"model": self.model_name}),
+            metadata={"provider": "anthropic", "timeout": timeout, "temperature": temperature},
+            tags=["claude", "text"]
+        )
+
         try:
             message = await asyncio.wait_for(
                 self.client.messages.create(
@@ -266,20 +321,35 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
                 input_tokens = message.usage.input_tokens or 0
                 output_tokens = message.usage.output_tokens or 0
 
+            if run:
+                run.end(outputs=build_llm_outputs(
+                    response_text,
+                    {
+                        "input_tokens": input_tokens,
+                        "output_tokens": output_tokens,
+                        "finish_reason": message.stop_reason
+                    }
+                ))
             return {
                 'text': response_text,
                 'input_tokens': input_tokens,
                 'output_tokens': output_tokens
             }
         except asyncio.TimeoutError:
+            if run:
+                run.end(error=f"Timeout after {timeout}s")
             raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
         except Exception as e:
             # Check if this is a rate limit error
             if self._is_rate_limit_error(e):
                 retry_after = self._extract_retry_after(e)
                 print(f"⚠️  Rate limit hit! Suggested retry after {retry_after}s")
+                if run:
+                    run.end(error=f"Rate limit: {str(e)}")
                 raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
             # Re-raise other exceptions
+            if run:
+                run.end(error=str(e))
             raise
 
     def estimate_cost(self, num_pages: int) -> float:

diff --git a/backend/app/core/config.py b/backend/app/core/config.py
@@ -82,6 +82,29 @@ def default_celery_to_redis(cls, v, info):
     PACER_AUTH_URL: str = "https://pacer.login.uscourts.gov/services/cso-auth"
     PACER_API_URL: str = "https://pcl.uscourts.gov"
 
+    # LangSmith Tracing
+    LANGSMITH_API_KEY: Optional[str] = None
+    LANGSMITH_PROJECT: str = "title-abstractor-enterprise"
+    LANGSMITH_ENDPOINT: Optional[str] = None
+    LANGSMITH_TRACING: bool = False
+    LANGSMITH_SAMPLE_RATE: float = 1.0
+    LANGSMITH_LOG_PROMPTS: bool = False
+    LANGSMITH_LOG_RESPONSES: bool = False
+
+    @field_validator('LANGSMITH_SAMPLE_RATE', mode='after')
+    @classmethod
+    def validate_langsmith_sample_rate(cls, v):
+        """Ensure LangSmith sample rate is between 0 and 1"""
+        if v < 0 or v > 1:
+            raise ValueError("LANGSMITH_SAMPLE_RATE must be between 0 and 1")
+        return v
+
+    # System of Record Webhook (optional)
+    SYSTEM_OF_RECORD_ENABLED: bool = False
+    SYSTEM_OF_RECORD_WEBHOOK_URL: Optional[str] = None
+    SYSTEM_OF_RECORD_WEBHOOK_TOKEN: Optional[str] = None
+    SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT: int = 10
+
     # CourtListener API (Free public court records - for viewing case details)
     COURTLISTENER_API_TOKEN: str = "a275ee0d649b1bbbc9b30659c501c692a1150268"
     COURTLISTENER_API_URL: str = "https://www.courtlistener.com/api/rest/v4"