headroom/pyproject.toml at main · azedinez/headroom · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
[build-system]
requires = ["maturin>=1.5,<2.0"]
build-backend = "maturin"

[project]
name = "headroom-ai"
version = "0.22.4"
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10"
authors = [
    { name = "Headroom Contributors" }
]
maintainers = [
    { name = "Headroom Contributors" }
]
keywords = [
    "llm",
    "openai",
    "anthropic",
    "claude",
    "gpt",
    "context",
    "token",
    "optimization",
    "compression",
    "caching",
    "proxy",
    "ai",
    "machine-learning",
]
classifiers = [
    "Development Status :: 4 - Beta",
    "Intended Audience :: Developers",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Programming Language :: Python :: 3",
    "Programming Language :: Python :: 3.10",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Software Development :: Libraries :: Python Modules",
    "Typing :: Typed",
]
dependencies = [
    # Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN)
    "tiktoken>=0.5.0",            # Tokenizer for all compressors
    "pydantic>=2.0.0",            # Config and data models
    "litellm==1.82.3",            # Model registry, pricing, and provider support
    "click>=8.1.0",               # CLI framework
    "rich>=13.0.0",               # Rich terminal output
    "opentelemetry-api>=1.24.0",  # Safe no-op OTEL API for instrumentation
    "ast-grep-cli>=0.30.0",       # AST-aware code slicing (CodeCompressor); binary wheel
    "tomli>=2.0.0; python_version < '3.11'",  # tomllib backport for helper scripts
]

[project.optional-dependencies]
# Proxy server (most common install: pip install headroom-ai[proxy])
proxy = [
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0",
    "httpx[http2]>=0.24.0",
    "openai>=2.14.0",             # OpenAI API format support
    "mcp>=1.0.0",                 # MCP server (headroom_compress, retrieve, stats)
    "magika>=0.6.0",              # ML content detection for ContentRouter
    "zstandard>=0.20.0",          # Decompress zstd request bodies (Codex, etc.)
    "websockets>=13.0",           # WebSocket proxy for /v1/responses (Codex gpt-5.4+)
    "onnxruntime>=1.16.0",        # Kompress ONNX INT8 text compression (no torch needed)
    "transformers>=4.30.0",       # Tokenizer only (for Kompress)
    "watchdog>=4.0.0",            # File watcher for live code graph reindexing (--code-graph)
    "sqlite-vec>=0.1.6",          # Vector index for memory (--memory). Lightweight, no torch.
]
# AST-based code compression (tree-sitter)
code = [
    "tree-sitter-language-pack>=0.10.0",
]
# ML-based compression with Kompress (ModernBERT).
# (The legacy [llmlingua] extra was removed in 0.9.x — no live code path used it.
# Use [ml] for the supported ML compression dependencies.)
ml = [
    "torch>=2.0.0",
    "transformers>=4.30.0",
    # transformers >= 5.x requires huggingface-hub >= 1.5.0,<2.0; pinning
    # the floor here prevents Kompress from silently falling back to
    # "unavailable" when a sibling install (e.g. `pip install
    # strands-agents`) drags huggingface-hub backwards.
    "huggingface-hub>=1.5.0,<2.0",
]
# Memory system (hierarchical memory with vector search)
memory = [
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0",
]
# Qdrant + Neo4j memory backend helpers
memory-stack = [
    "mem0ai>=0.1.100",
    "qdrant-client>=1.9.0",
    "neo4j>=5.20.0",
]
# Semantic relevance scoring with embeddings.
# Uses `fastembed` (BAAI/bge-small-en-v1.5 by default — 33M params,
# 384 dims, ~30 MB int8-quantized ONNX). Same library + model used by
# the Rust SmartCrusher (`fastembed` crate), giving byte-equal embeddings
# across the language boundary. Replaced sentence-transformers in
# Stage 3c.1 — fastembed is faster (~2-3x), smaller (no torch
# dependency), and outranks all-MiniLM-L6-v2 on MTEB by ~6 points.
relevance = [
    "fastembed>=0.4.0",
    "numpy>=1.24.0",
]
# Image compression (ML-based routing + OCR)
#
# OCR backend uses ONNX Runtime regardless of Python version. The
# rapidocr ecosystem split into two flavors after 1.4.x:
#   * rapidocr-onnxruntime 1.4.x — bundled-ORT package, capped at
#     Python <3.13 by its requires-python metadata. Drop-in for our
#     existing v1 tuple-shaped API call.
#   * rapidocr 3.x — engine-agnostic core, supports Python 3.13+.
#     Returns a RapidOCROutput dataclass (txts, scores, boxes, ...).
#     Needs `onnxruntime` installed separately to use the ORT backend.
#
# `headroom/image/compressor.py` adapts both API shapes at runtime via
# a try/except cascade. See issue #372 for context.
image = [
    "pillow>=10.0.0",
    "sentencepiece>=0.1.99",  # Required by SigLIP tokenizer (SiglipTokenizer)
    # Python 3.6–3.12: keep the proven ORT-bundled package directly.
    # ~15 MB ONNX models auto-downloaded on first use.
    "rapidocr-onnxruntime>=1.4.0,<2; python_version<'3.13'",
    # Python 3.13+: rapidocr-onnxruntime is unavailable (its wheels
    # declare requires-python<3.13). Use the successor `rapidocr` 3.x
    # core + `onnxruntime` engine; same ORT backend, just split into
    # two packages. Total install size and inference speed unchanged.
    "rapidocr>=3.0,<4; python_version>='3.13'",
    "onnxruntime>=1.7,<2; python_version>='3.13'",
]
# Report generation
reports = [
    "jinja2>=3.0.0",
]
# OpenTelemetry metrics export
otel = [
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
]
# any-llm multi-provider backend (requires Python 3.11+)
anyllm = [
    "any-llm-sdk>=1.0.0; python_version >= '3.11'",
]
# LangChain integration
langchain = [
    "langchain-core>=0.2.0",
    "langchain-openai>=0.1.0",
]
# Agno agent framework integration
agno = [
    "agno>=1.0.0",
]
# AWS Strands Agents SDK integration
strands = [
    "strands-agents>=0.1.0",
]
# MCP server for Claude Code integration
mcp = [
    "mcp>=1.0.0",
    "httpx>=0.24.0",
]
# Voice filler detection
voice = [
    "onnxruntime>=1.16.0",
    "transformers>=4.30.0",
    "torch>=2.0.0",
]
# Voice training (includes voice deps + training extras)
voice-train = [
    "headroom-ai[voice]",
    "datasets>=2.14.0",
    "accelerate>=0.20.0",
]
# Evaluation framework
evals = [
    "datasets>=2.14.0",
    "sentence-transformers>=2.2.0",
    "numpy>=1.24.0",
    "scikit-learn>=1.3.0",
    "anthropic>=0.18.0",
    "openai>=1.0.0",
]
# AWS Bedrock backend
bedrock = [
    "boto3>=1.28.0",
]
# HTML content extraction
html = [
    "trafilatura>=1.6.0",
]
# Comprehensive LLM benchmarks
benchmark = [
    "lm-eval[api]>=0.4.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
]
# Development dependencies
dev = [
    "pytest>=7.0.0",
    "pytest-cov>=4.0.0",
    "pytest-asyncio>=0.21.0",
    "ruff>=0.1.0",
    "mypy>=1.0.0",
    "pre-commit>=3.0.0",
    "openai>=1.0.0",
    "anthropic>=0.18.0",
    "litellm==1.82.3",
    "fastapi>=0.100.0",
    "uvicorn>=0.23.0",
    "httpx[http2]>=0.24.0",
    "websockets>=13.0",
    "opentelemetry-sdk>=1.24.0",
    "opentelemetry-exporter-otlp-proto-http>=1.24.0",
    "ollama>=0.4.0",
    "langchain-ollama>=0.2.0",
    "hnswlib>=0.8.0",
    "sqlite-vec>=0.1.6",
    "sentence-transformers>=2.2.0",
    "numpy>=1.24.0",
]
# All optional dependencies (everything you need)
all = [
    "headroom-ai[proxy,code,ml,memory,relevance,image,reports,otel,evals,voice,html,benchmark,mcp]",
]

[project.scripts]
headroom = "headroom.cli:main"

[project.urls]
Homepage = "https://headroom-docs.vercel.app"
Documentation = "https://headroom-docs.vercel.app/docs"
Repository = "https://github.com/chopratejas/headroom"
Issues = "https://github.com/chopratejas/headroom/issues"
Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md"
# llms.txt convention (llmstxt.org) — point AI agents / LLM crawlers
# at the auto-generated docs index so they can resolve install paths
# and entry points without a follow-up fetch.
"AI / LLM Index" = "https://headroom-docs.vercel.app/llms.txt"

# Maturin builds a single wheel containing both the Python source under
# `headroom/` AND the compiled Rust extension `headroom/_core.so` (cdylib
# from `crates/headroom-py`). One `pip install headroom-ai` ships everything
# atomically — no separate `headroom-core-py` package, no chicken-and-egg,
# no PIP_FIND_LINKS plumbing. Phase A0's runtime fail-loud check still
# exists but only fires if someone forces an sdist install on a platform
# without a wheel and the rust toolchain isn't available to compile it.
# Pin the project's package index to public PyPI. Without this, `uv lock`
# inherits the developer's user-level `~/.config/uv/uv.toml` index
# setting — including private/internal mirrors like
# `pypi.netflix.net/simple` — and bakes those URLs into uv.lock, which
# then breaks CI on every public runner that can't reach the mirror.
# Declaring the index in pyproject.toml makes the project authoritative
# regardless of who runs `uv lock`.
[[tool.uv.index]]
name = "pypi"
url = "https://pypi.org/simple/"
default = true

[tool.maturin]
# Where the Python package lives. With `python-source = "."` and the
# package directory `headroom/` at repo root, maturin includes every file
# under `headroom/` in the wheel — that picks up the dashboard HTML
# templates and bundled YAML configs. `LICENSE` and `NOTICE` are listed
# explicitly because maturin sdists do not get the package-directory
# treatment wheels do, and PEP 639 auto-discovery emits both files into
# `License-File:` metadata — PyPI rejects sdists whose declared license
# files are missing from the tarball with `400 License-File X does not
# exist in distribution file`.
include = [
    { path = "LICENSE", format = "sdist" },
    { path = "NOTICE", format = "sdist" },
]
python-source = "."
module-name = "headroom._core"
# The cdylib source lives under `crates/headroom-py`. Maturin invokes
# `cargo build` with this manifest to produce `_core.cdylib`, then injects
# the resulting `.so` into the wheel at `headroom/_core.so`.
manifest-path = "crates/headroom-py/Cargo.toml"
features = ["extension-module"]
# Forbid building without the cdylib feature — bare `cargo build` won't
# produce a usable Python extension. Maturin's default `bindings` is "pyo3"
# which is correct here (see `crates/headroom-py/src/`).
bindings = "pyo3"

[tool.ruff]
target-version = "py310"
line-length = 100

[tool.ruff.lint]
select = [
    "E",   # pycodestyle errors
    "W",   # pycodestyle warnings
    "F",   # pyflakes
    "I",   # isort
    "B",   # flake8-bugbear
    "C4",  # flake8-comprehensions
    "UP",  # pyupgrade
]
ignore = [
    "E501",  # line too long (handled by formatter)
    "B008",  # do not perform function calls in argument defaults
    "B905",  # zip without strict parameter
]

[tool.ruff.lint.isort]
known-first-party = ["headroom"]

[tool.ruff.format]
quote-style = "double"
indent-style = "space"

[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
ignore_missing_imports = true

# Per-module overrides for modules with dynamic typing patterns
[[tool.mypy.overrides]]
module = [
    "headroom.proxy.server",
    "headroom.proxy.cost",
    "headroom.proxy.prometheus_metrics",
    "headroom.proxy.semantic_cache",
    "headroom.proxy.rate_limiter",
    "headroom.proxy.request_logger",
    "headroom.proxy.helpers",
    "headroom.integrations.langchain",
    "headroom.integrations.mcp",
    "headroom.ccr.mcp_server",
    "headroom.relevance.embedding",
    "headroom.reporting.generator",
]
disallow_untyped_defs = false

[[tool.mypy.overrides]]
module = [
    "headroom.tokenizers.*",
    "headroom.providers.litellm",
    "headroom.providers.google",
]
disallow_untyped_defs = false
warn_return_any = false

# Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these
[[tool.mypy.overrides]]
module = ["headroom.proxy.handlers.*"]
disallow_untyped_defs = false
ignore_errors = true

# Ignore third-party stubs with syntax errors
[[tool.mypy.overrides]]
module = ["mlx.*"]
ignore_errors = true

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"
markers = [
    "slow: slow tests (model loads, large fixtures)",
    "real_llm: tests that hit real LLM APIs; skipped unless explicitly enabled",
    "live: opt-in multi-turn tests that hit real upstream APIs; require provider keys",
]

[tool.coverage.run]
source = ["headroom"]
branch = true
omit = [
    "headroom/cli.py",
    "*/tests/*",
]

[tool.coverage.report]
exclude_lines = [
    "pragma: no cover",
    "def __repr__",
    "raise NotImplementedError",
    "if TYPE_CHECKING:",
    "if __name__ == .__main__.:",
]