Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion headroom/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,8 @@ class AnchorConfig:
# Tool outputs that are reference data and must NOT be compressed.
# Read/Glob/Grep contain exact file contents/search results the agent needs for edits.
# Write/Edit record what changes were made — compressing them causes duplicate/conflicting edits.
# Bash is NOT excluded — its outputs (build logs, test output) are ideal compression targets.
# Bash IS excluded by design: RTK (Rust Token Killer) handles Bash output
# compression upstream of headroom. Compressing here would double-compress.
DEFAULT_EXCLUDE_TOOLS: frozenset[str] = frozenset(
{
"Read",
Expand Down
27 changes: 25 additions & 2 deletions headroom/transforms/content_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -1243,20 +1243,34 @@ def eager_load_compressors(self) -> dict[str, str]:
status: dict[str, str] = {}

# 1. ML text compressor: Kompress
# Wrapper construction alone does NOT load the ONNX session — that
# happens lazily inside compress(). Run a tiny dummy compress() so
# _load_kompress() runs AND the ONNX graph is optimized at startup,
# not on the first real request (was costing ~3-9s of opt_ms).
if self.config.enable_kompress:
compressor = self._get_kompress()
if compressor:
logger.info("Kompress model pre-loaded at startup")
try:
compressor.compress("warmup " * 16)
logger.info("Kompress model pre-loaded at startup")
except Exception as exc:
logger.warning("Kompress warmup forward pass failed: %s", exc)
status["kompress"] = "enabled"
else:
status["kompress"] = "unavailable"

# 2. Magika content detector (avoids 100-200ms on first content detection)
# Singleton init alone defers the model's first inference cost; run a
# dummy identify_bytes so the predictor is fully warm.
try:
from ..compression.detector import _get_magika, _magika_available

if _magika_available():
_get_magika() # Initializes the singleton
_magika_inst = _get_magika()
try:
_magika_inst.identify_bytes(b"warmup")
except Exception as exc:
logger.debug("Magika warmup inference failed: %s", exc)
logger.info("Magika content detector pre-loaded at startup")
status["magika"] = "enabled"
else:
Expand Down Expand Up @@ -1299,6 +1313,15 @@ def eager_load_compressors(self) -> dict[str, str]:
except Exception as e:
logger.debug("Tree-sitter pre-load skipped: %s", e)
status["tree_sitter"] = "skipped"
# Force one parse + compress on the most common language so
# the AST visitor JIT paths are hot before request traffic.
try:
code_compressor.compress(
"def warmup():\n return 1\n",
language="python",
)
except Exception as exc:
logger.debug("Code-Aware warmup compress failed: %s", exc)
else:
status["code_aware"] = "not installed"

Expand Down
Loading