forked from chopratejas/headroom
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathpyproject.toml
More file actions
338 lines (320 loc) · 9.07 KB
/
pyproject.toml
File metadata and controls
338 lines (320 loc) · 9.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"
[project]
name = "headroom-ai"
version = "0.9.1"
description = "The Context Optimization Layer for LLM Applications - Cut costs by 50-90%"
readme = "README.md"
license = "Apache-2.0"
requires-python = ">=3.10"
authors = [
{ name = "Headroom Contributors" }
]
maintainers = [
{ name = "Headroom Contributors" }
]
keywords = [
"llm",
"openai",
"anthropic",
"claude",
"gpt",
"context",
"token",
"optimization",
"compression",
"caching",
"proxy",
"ai",
"machine-learning",
]
classifiers = [
"Development Status :: 4 - Beta",
"Intended Audience :: Developers",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
"Programming Language :: Python :: 3",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Topic :: Software Development :: Libraries :: Python Modules",
"Typing :: Typed",
]
dependencies = [
# Core: lightweight compression (SmartCrusher, ContentRouter, CCR, TOIN)
"tiktoken>=0.5.0", # Tokenizer for all compressors
"pydantic>=2.0.0", # Config and data models
"litellm==1.82.3", # Model registry, pricing, and provider support
"click>=8.1.0", # CLI framework
"rich>=13.0.0", # Rich terminal output
"opentelemetry-api>=1.24.0", # Safe no-op OTEL API for instrumentation
"ast-grep-cli>=0.30.0", # AST-aware code slicing (CodeCompressor); binary wheel
"tomli>=2.0.0; python_version < '3.11'", # tomllib backport for helper scripts
]
[project.optional-dependencies]
# Proxy server (most common install: pip install headroom-ai[proxy])
proxy = [
"fastapi>=0.100.0",
"uvicorn>=0.23.0",
"httpx[http2]>=0.24.0",
"openai>=2.14.0", # OpenAI API format support
"mcp>=1.0.0", # MCP server (headroom_compress, retrieve, stats)
"magika>=0.6.0", # ML content detection for ContentRouter
"zstandard>=0.20.0", # Decompress zstd request bodies (Codex, etc.)
"websockets>=13.0", # WebSocket proxy for /v1/responses (Codex gpt-5.4+)
"onnxruntime>=1.16.0", # Kompress ONNX INT8 text compression (no torch needed)
"transformers>=4.30.0", # Tokenizer only (for Kompress)
"watchdog>=4.0.0", # File watcher for live code graph reindexing (--code-graph)
"sqlite-vec>=0.1.6", # Vector index for memory (--memory). Lightweight, no torch.
]
# AST-based code compression (tree-sitter)
code = [
"tree-sitter-language-pack>=0.10.0",
]
# ML-based compression with Kompress (ModernBERT)
ml = [
"torch>=2.0.0",
"transformers>=4.30.0",
]
# Legacy ML compression (LLMLingua-2 — use [ml] instead for Kompress)
llmlingua = [
"llmlingua>=0.2.0",
"torch>=2.0.0",
"transformers>=4.30.0",
]
# Memory system (hierarchical memory with vector search)
memory = [
"hnswlib>=0.8.0",
"sqlite-vec>=0.1.6",
"sentence-transformers>=2.2.0",
]
# Qdrant + Neo4j memory backend helpers
memory-stack = [
"mem0ai>=0.1.100",
"qdrant-client>=1.9.0",
"neo4j>=5.20.0",
]
# Semantic relevance scoring with embeddings.
# Uses `fastembed` (BAAI/bge-small-en-v1.5 by default — 33M params,
# 384 dims, ~30 MB int8-quantized ONNX). Same library + model used by
# the Rust SmartCrusher (`fastembed` crate), giving byte-equal embeddings
# across the language boundary. Replaced sentence-transformers in
# Stage 3c.1 — fastembed is faster (~2-3x), smaller (no torch
# dependency), and outranks all-MiniLM-L6-v2 on MTEB by ~6 points.
relevance = [
"fastembed>=0.4.0",
"numpy>=1.24.0",
]
# Image compression (ML-based routing + OCR)
image = [
"pillow>=10.0.0",
"sentencepiece>=0.1.99", # Required by SigLIP tokenizer (SiglipTokenizer)
"rapidocr-onnxruntime>=1.4.0", # ONNX-native OCR for text extraction from images (~15MB models)
]
# Report generation
reports = [
"jinja2>=3.0.0",
]
# OpenTelemetry metrics export
otel = [
"opentelemetry-sdk>=1.24.0",
"opentelemetry-exporter-otlp-proto-http>=1.24.0",
]
# any-llm multi-provider backend (requires Python 3.11+)
anyllm = [
"any-llm-sdk>=1.0.0; python_version >= '3.11'",
]
# LangChain integration
langchain = [
"langchain-core>=0.2.0",
"langchain-openai>=0.1.0",
]
# Agno agent framework integration
agno = [
"agno>=1.0.0",
]
# AWS Strands Agents SDK integration
strands = [
"strands-agents>=0.1.0",
]
# MCP server for Claude Code integration
mcp = [
"mcp>=1.0.0",
"httpx>=0.24.0",
]
# Voice filler detection
voice = [
"onnxruntime>=1.16.0",
"transformers>=4.30.0",
"torch>=2.0.0",
]
# Voice training (includes voice deps + training extras)
voice-train = [
"headroom-ai[voice]",
"datasets>=2.14.0",
"accelerate>=0.20.0",
]
# Evaluation framework
evals = [
"datasets>=2.14.0",
"sentence-transformers>=2.2.0",
"numpy>=1.24.0",
"scikit-learn>=1.3.0",
"anthropic>=0.18.0",
"openai>=1.0.0",
]
# AWS Bedrock backend
bedrock = [
"boto3>=1.28.0",
]
# HTML content extraction
html = [
"trafilatura>=1.6.0",
]
# Comprehensive LLM benchmarks
benchmark = [
"lm-eval>=0.4.0",
"openai>=1.0.0",
"anthropic>=0.18.0",
]
# Development dependencies
dev = [
"pytest>=7.0.0",
"pytest-cov>=4.0.0",
"pytest-asyncio>=0.21.0",
"ruff>=0.1.0",
"mypy>=1.0.0",
"pre-commit>=3.0.0",
"openai>=1.0.0",
"anthropic>=0.18.0",
"litellm==1.82.3",
"fastapi>=0.100.0",
"uvicorn>=0.23.0",
"httpx[http2]>=0.24.0",
"websockets>=13.0",
"opentelemetry-sdk>=1.24.0",
"opentelemetry-exporter-otlp-proto-http>=1.24.0",
"ollama>=0.4.0",
"langchain-ollama>=0.2.0",
"hnswlib>=0.8.0",
"sqlite-vec>=0.1.6",
"sentence-transformers>=2.2.0",
"numpy>=1.24.0",
]
# All optional dependencies (everything you need)
all = [
"headroom-ai[proxy,code,ml,memory,relevance,image,reports,otel,evals,voice,html,benchmark,mcp]",
]
[project.scripts]
headroom = "headroom.cli:main"
[project.urls]
Homepage = "https://github.com/chopratejas/headroom"
Documentation = "https://github.com/chopratejas/headroom#readme"
Repository = "https://github.com/chopratejas/headroom"
Issues = "https://github.com/chopratejas/headroom/issues"
Changelog = "https://github.com/chopratejas/headroom/blob/main/CHANGELOG.md"
[tool.hatch.build.targets.wheel]
packages = ["headroom"]
# Include non-Python files (dashboard templates, etc.)
artifacts = [
"headroom/dashboard/templates/*.html",
]
[tool.hatch.build.targets.sdist]
include = [
"/headroom",
"/tests",
"/LICENSE",
"/NOTICE",
"/README.md",
"/CHANGELOG.md",
]
[tool.ruff]
target-version = "py310"
line-length = 100
[tool.ruff.lint]
select = [
"E", # pycodestyle errors
"W", # pycodestyle warnings
"F", # pyflakes
"I", # isort
"B", # flake8-bugbear
"C4", # flake8-comprehensions
"UP", # pyupgrade
]
ignore = [
"E501", # line too long (handled by formatter)
"B008", # do not perform function calls in argument defaults
"B905", # zip without strict parameter
]
[tool.ruff.lint.isort]
known-first-party = ["headroom"]
[tool.ruff.format]
quote-style = "double"
indent-style = "space"
[tool.mypy]
python_version = "3.10"
warn_return_any = true
warn_unused_configs = true
disallow_untyped_defs = true
ignore_missing_imports = true
# Per-module overrides for modules with dynamic typing patterns
[[tool.mypy.overrides]]
module = [
"headroom.proxy.server",
"headroom.proxy.cost",
"headroom.proxy.prometheus_metrics",
"headroom.proxy.semantic_cache",
"headroom.proxy.rate_limiter",
"headroom.proxy.request_logger",
"headroom.proxy.helpers",
"headroom.integrations.langchain",
"headroom.integrations.mcp",
"headroom.ccr.mcp_server",
"headroom.relevance.embedding",
"headroom.reporting.generator",
]
disallow_untyped_defs = false
[[tool.mypy.overrides]]
module = [
"headroom.tokenizers.*",
"headroom.providers.litellm",
"headroom.providers.google",
]
disallow_untyped_defs = false
warn_return_any = false
# Handler mixins use self.* from HeadroomProxy via duck typing — mypy can't resolve these
[[tool.mypy.overrides]]
module = ["headroom.proxy.handlers.*"]
disallow_untyped_defs = false
ignore_errors = true
# Ignore third-party stubs with syntax errors
[[tool.mypy.overrides]]
module = ["mlx.*"]
ignore_errors = true
[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = ["test_*.py"]
python_functions = ["test_*"]
addopts = "-v --tb=short"
asyncio_mode = "auto"
markers = [
"slow: slow tests (model loads, large fixtures)",
"real_llm: tests that hit real LLM APIs; skipped unless explicitly enabled",
]
[tool.coverage.run]
source = ["headroom"]
branch = true
omit = [
"headroom/cli.py",
"*/tests/*",
]
[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise NotImplementedError",
"if TYPE_CHECKING:",
"if __name__ == .__main__.:",
]