Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ AZURE_OPENAI_API_KEY=your_azure_api_key_here
AZURE_OPENAI_DEPLOYMENT=gpt-4o-mini
AZURE_OPENAI_API_VERSION=2024-10-21

# LangSmith Tracing (optional)
LANGSMITH_API_KEY=your_langsmith_api_key_here
LANGSMITH_PROJECT=title-abstractor-enterprise
LANGSMITH_ENDPOINT=https://api.smith.langchain.com
LANGSMITH_TRACING=false
LANGSMITH_SAMPLE_RATE=1.0
LANGSMITH_LOG_PROMPTS=false
LANGSMITH_LOG_RESPONSES=false

# File Storage
UPLOAD_DIR=./uploads
MAX_UPLOAD_SIZE=104857600
Expand All @@ -34,3 +43,9 @@ CORS_ORIGINS=http://localhost:3000,http://localhost:8000

# JWT (for future auth - generate with: openssl rand -hex 32)
SECRET_KEY=your_secret_key_here

# System of Record Webhook (optional)
SYSTEM_OF_RECORD_ENABLED=false
SYSTEM_OF_RECORD_WEBHOOK_URL=https://your-system-of-record.example.com/webhooks/abstracts
SYSTEM_OF_RECORD_WEBHOOK_TOKEN=your_system_of_record_token
SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT=10
24 changes: 24 additions & 0 deletions backend/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,30 @@ Edit the `.env` files in both the project root and backend directory to set your

**IMPORTANT**: The system uses `gemini-2.5-pro` model (Gemini 2.5 Pro) which requires billing enabled but provides high RPM quota.

#### Optional: LangSmith Tracing

Set the following environment variables to enable LangSmith tracing for LLM calls:

```
LANGSMITH_API_KEY=your_langsmith_api_key
LANGSMITH_PROJECT=title-abstractor-enterprise
LANGSMITH_TRACING=true
LANGSMITH_SAMPLE_RATE=1.0
LANGSMITH_LOG_PROMPTS=false
LANGSMITH_LOG_RESPONSES=false
```

#### Optional: System of Record Webhook

Enable the webhook to notify downstream systems (e.g., Stewart system-of-record) when abstracts complete:

```
SYSTEM_OF_RECORD_ENABLED=true
SYSTEM_OF_RECORD_WEBHOOK_URL=https://your-system-of-record.example.com/webhooks/abstracts
SYSTEM_OF_RECORD_WEBHOOK_TOKEN=your_webhook_token
SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT=10
```

### Starting the Services

**RECOMMENDED**: Use the provided startup scripts which ensure the correct Gemini model is used:
Expand Down
47 changes: 47 additions & 0 deletions backend/app/core/azure_openai_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
from io import BytesIO
from pdf2image import convert_from_path
from app.core.langsmith import build_llm_inputs, build_llm_outputs, start_langsmith_run


class APITimeoutError(Exception):
Expand Down Expand Up @@ -159,6 +160,14 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
"""
print(f" Processing with {len(images)} images via Azure OpenAI (timeout: {timeout}s)...")

run = start_langsmith_run(
name="AzureOpenAI.chat.completions",
run_type="llm",
inputs=build_llm_inputs(prompt, "images", {"deployment": self.deployment, "image_count": len(images)}),
metadata={"provider": "azure-openai", "timeout": timeout, "api_version": self.api_version},
tags=["azure-openai", "images"]
)

try:
# Build content with prompt + all images
content_parts = [
Expand Down Expand Up @@ -210,20 +219,35 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
input_tokens = response.usage.prompt_tokens or 0
output_tokens = response.usage.completion_tokens or 0

if run:
run.end(outputs=build_llm_outputs(
response_text,
{
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"finish_reason": response.choices[0].finish_reason
}
))
return {
'text': response_text,
'input_tokens': input_tokens,
'output_tokens': output_tokens
}
except asyncio.TimeoutError:
if run:
run.end(error=f"Timeout after {timeout}s")
raise APITimeoutError(f"Azure OpenAI API call exceeded {timeout}s timeout")
except Exception as e:
# Check if this is a rate limit error
if self._is_rate_limit_error(e):
retry_after = self._extract_retry_after(e)
print(f"⚠️ Rate limit hit! Suggested retry after {retry_after}s")
if run:
run.end(error=f"Rate limit: {str(e)}")
raise RateLimitError(f"Azure OpenAI API rate limit exceeded: {str(e)}", retry_after=retry_after)
# Re-raise other exceptions
if run:
run.end(error=str(e))
raise

async def process_text(self, prompt: str, temperature: float = 0, timeout: int = 120) -> Dict:
Expand All @@ -241,6 +265,14 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
"""
print(f" Processing text-only prompt via Azure OpenAI (timeout: {timeout}s)...")

run = start_langsmith_run(
name="AzureOpenAI.chat.completions",
run_type="llm",
inputs=build_llm_inputs(prompt, "text", {"deployment": self.deployment}),
metadata={"provider": "azure-openai", "timeout": timeout, "temperature": temperature},
tags=["azure-openai", "text"]
)

try:
messages = [
{
Expand Down Expand Up @@ -270,20 +302,35 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
input_tokens = response.usage.prompt_tokens or 0
output_tokens = response.usage.completion_tokens or 0

if run:
run.end(outputs=build_llm_outputs(
response_text,
{
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"finish_reason": response.choices[0].finish_reason
}
))
return {
'text': response_text,
'input_tokens': input_tokens,
'output_tokens': output_tokens
}
except asyncio.TimeoutError:
if run:
run.end(error=f"Timeout after {timeout}s")
raise APITimeoutError(f"Azure OpenAI API call exceeded {timeout}s timeout")
except Exception as e:
# Check if this is a rate limit error
if self._is_rate_limit_error(e):
retry_after = self._extract_retry_after(e)
print(f"⚠️ Rate limit hit! Suggested retry after {retry_after}s")
if run:
run.end(error=f"Rate limit: {str(e)}")
raise RateLimitError(f"Azure OpenAI API rate limit exceeded: {str(e)}", retry_after=retry_after)
# Re-raise other exceptions
if run:
run.end(error=str(e))
raise

def estimate_cost(self, num_pages: int) -> float:
Expand Down
70 changes: 70 additions & 0 deletions backend/app/core/claude_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import Dict, Union, List
import base64
import asyncio
from app.core.langsmith import build_llm_inputs, build_llm_outputs, start_langsmith_run


class APITimeoutError(Exception):
Expand Down Expand Up @@ -85,6 +86,14 @@ async def _process_with_base64_pdf(self, base64_data: str, prompt: str, timeout:
"""
print(f" Processing with base64 PDF via Claude (timeout: {timeout}s)...")

run = start_langsmith_run(
name="Claude.messages.create",
run_type="llm",
inputs=build_llm_inputs(prompt, "base64_pdf", {"model": self.model_name}),
metadata={"provider": "anthropic", "timeout": timeout},
tags=["claude", "pdf"]
)

try:
# Claude expects PDF documents in specific format
message = await asyncio.wait_for(
Expand Down Expand Up @@ -127,20 +136,35 @@ async def _process_with_base64_pdf(self, base64_data: str, prompt: str, timeout:
input_tokens = message.usage.input_tokens or 0
output_tokens = message.usage.output_tokens or 0

if run:
run.end(outputs=build_llm_outputs(
response_text,
{
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"finish_reason": message.stop_reason
}
))
return {
'text': response_text,
'input_tokens': input_tokens,
'output_tokens': output_tokens
}
except asyncio.TimeoutError:
if run:
run.end(error=f"Timeout after {timeout}s")
raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
except Exception as e:
# Check if this is a rate limit error
if self._is_rate_limit_error(e):
retry_after = self._extract_retry_after(e)
print(f"⚠️ Rate limit hit! Suggested retry after {retry_after}s")
if run:
run.end(error=f"Rate limit: {str(e)}")
raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
# Re-raise other exceptions
if run:
run.end(error=str(e))
raise

async def _process_with_images(self, images: List, prompt: str, timeout: int = 180) -> str:
Expand All @@ -154,6 +178,14 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
"""
print(f" Processing with {len(images)} images via Claude (timeout: {timeout}s)...")

run = start_langsmith_run(
name="Claude.messages.create",
run_type="llm",
inputs=build_llm_inputs(prompt, "images", {"model": self.model_name, "image_count": len(images)}),
metadata={"provider": "anthropic", "timeout": timeout},
tags=["claude", "images"]
)

try:
# Build content with prompt + all images
content_parts = []
Expand Down Expand Up @@ -208,20 +240,35 @@ async def _process_with_images(self, images: List, prompt: str, timeout: int = 1
input_tokens = message.usage.input_tokens or 0
output_tokens = message.usage.output_tokens or 0

if run:
run.end(outputs=build_llm_outputs(
response_text,
{
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"finish_reason": message.stop_reason
}
))
return {
'text': response_text,
'input_tokens': input_tokens,
'output_tokens': output_tokens
}
except asyncio.TimeoutError:
if run:
run.end(error=f"Timeout after {timeout}s")
raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
except Exception as e:
# Check if this is a rate limit error
if self._is_rate_limit_error(e):
retry_after = self._extract_retry_after(e)
print(f"⚠️ Rate limit hit! Suggested retry after {retry_after}s")
if run:
run.end(error=f"Rate limit: {str(e)}")
raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
# Re-raise other exceptions
if run:
run.end(error=str(e))
raise

async def process_text(self, prompt: str, temperature: float = 0, timeout: int = 120) -> str:
Expand All @@ -239,6 +286,14 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
"""
print(f" Processing text-only prompt via Claude (timeout: {timeout}s)...")

run = start_langsmith_run(
name="Claude.messages.create",
run_type="llm",
inputs=build_llm_inputs(prompt, "text", {"model": self.model_name}),
metadata={"provider": "anthropic", "timeout": timeout, "temperature": temperature},
tags=["claude", "text"]
)

try:
message = await asyncio.wait_for(
self.client.messages.create(
Expand Down Expand Up @@ -266,20 +321,35 @@ async def process_text(self, prompt: str, temperature: float = 0, timeout: int =
input_tokens = message.usage.input_tokens or 0
output_tokens = message.usage.output_tokens or 0

if run:
run.end(outputs=build_llm_outputs(
response_text,
{
"input_tokens": input_tokens,
"output_tokens": output_tokens,
"finish_reason": message.stop_reason
}
))
return {
'text': response_text,
'input_tokens': input_tokens,
'output_tokens': output_tokens
}
except asyncio.TimeoutError:
if run:
run.end(error=f"Timeout after {timeout}s")
raise APITimeoutError(f"Claude API call exceeded {timeout}s timeout")
except Exception as e:
# Check if this is a rate limit error
if self._is_rate_limit_error(e):
retry_after = self._extract_retry_after(e)
print(f"⚠️ Rate limit hit! Suggested retry after {retry_after}s")
if run:
run.end(error=f"Rate limit: {str(e)}")
raise RateLimitError(f"Claude API rate limit exceeded: {str(e)}", retry_after=retry_after)
# Re-raise other exceptions
if run:
run.end(error=str(e))
raise

def estimate_cost(self, num_pages: int) -> float:
Expand Down
23 changes: 23 additions & 0 deletions backend/app/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,29 @@ def default_celery_to_redis(cls, v, info):
PACER_AUTH_URL: str = "https://pacer.login.uscourts.gov/services/cso-auth"
PACER_API_URL: str = "https://pcl.uscourts.gov"

# LangSmith Tracing
LANGSMITH_API_KEY: Optional[str] = None
LANGSMITH_PROJECT: str = "title-abstractor-enterprise"
LANGSMITH_ENDPOINT: Optional[str] = None
LANGSMITH_TRACING: bool = False
LANGSMITH_SAMPLE_RATE: float = 1.0
LANGSMITH_LOG_PROMPTS: bool = False
LANGSMITH_LOG_RESPONSES: bool = False

@field_validator('LANGSMITH_SAMPLE_RATE', mode='after')
@classmethod
def validate_langsmith_sample_rate(cls, v):
"""Ensure LangSmith sample rate is between 0 and 1"""
if v < 0 or v > 1:
raise ValueError("LANGSMITH_SAMPLE_RATE must be between 0 and 1")
return v

# System of Record Webhook (optional)
SYSTEM_OF_RECORD_ENABLED: bool = False
SYSTEM_OF_RECORD_WEBHOOK_URL: Optional[str] = None
SYSTEM_OF_RECORD_WEBHOOK_TOKEN: Optional[str] = None
SYSTEM_OF_RECORD_WEBHOOK_TIMEOUT: int = 10

# CourtListener API (Free public court records - for viewing case details)
COURTLISTENER_API_TOKEN: str = "a275ee0d649b1bbbc9b30659c501c692a1150268"
COURTLISTENER_API_URL: str = "https://www.courtlistener.com/api/rest/v4"
Expand Down
Loading