hecate/.env.example at master · hecatehq/hecate · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
# Hecate runtime
# Copy to `.env` and keep your real secrets there. `just dev` sources `.env` directly.

# Server
# This is the client-facing gateway address for Codex/Claude-style traffic.
# Provider BASE_URL values below are upstream model endpoints, not this gateway URL.
# Binds to loopback by default. Override to 0.0.0.0:8765 to expose on
# all interfaces (and put a reverse proxy / firewall in front of it).
# The Docker image overrides to 0.0.0.0 because the container's network
# is itself the boundary.
GATEWAY_ADDRESS=127.0.0.1:8765

# Optional client-facing URL written to `hecate.runtime.json` for helper
# processes such as `hecate-acp`. Leave empty for normal local dev; the gateway
# derives http://127.0.0.1:<port> from GATEWAY_ADDRESS. Set this when the listen
# address is not the URL clients should use, e.g. Docker or a reverse proxy.
# GATEWAY_PUBLIC_URL=http://127.0.0.1:8765

# DataDir is where the gateway puts auto-generated state. The bootstrap
# file stores the generated settings encryption key here unless
# GATEWAY_BOOTSTRAP_FILE is set explicitly.
#
# Leave commented out unless you need to override:
#   - Local source dev (`just dev`, `just run`): defaults to `.data` —
#     relative to the repo root, gitignored.
#   - Docker (`docker compose up`): the image bakes in `/data` as the
#     mounted volume; setting this here would override that and break
#     the bootstrap-file path inside the container.
# GATEWAY_DATA_DIR=.data
GATEWAY_BOOTSTRAP_FILE=

# Settings store
# memory | sqlite
GATEWAY_CONTROL_PLANE_BACKEND=memory
GATEWAY_CONTROL_PLANE_KEY=control-plane
# Encryption key for persisted provider API keys. Leave empty to let the
# gateway auto-generate one on first run (also stored in the bootstrap
# file). Set explicitly to override.
GATEWAY_CONTROL_PLANE_SECRET_KEY=

# Providers
# `PROVIDER_<NAME>_*` env vars seed the runtime provider registry on
# boot and are also auto-imported into the persisted Providers tab so
# operators can see and manage them through the UI. On subsequent
# boots the auto-import skips any provider already present in the
# Providers tab, so operator edits made via the UI are never
# overwritten by environment values. Env vars are a first-boot
# convenience; the Providers tab is the source of truth thereafter.
# See docs/providers.md for the full lifecycle.
#
# For client integration recipes (Codex / Claude Code), see docs/client-integration.md.
#
# Built-in preset names:
#   anthropic, deepseek, gemini, groq, llamacpp, lmstudio, localai, mistral, ollama, openai, perplexity, together_ai, xai
#
# Runtime support today:
#   - OpenAI-compatible chat + /v1/models discovery
#   - Anthropic native Messages API + /v1/models discovery
#
# Model lists are not configured in env. Hecate discovers provider catalogs
# from provider APIs and falls back to DEFAULT_MODEL when discovery is unavailable.
#
GATEWAY_DEFAULT_MODEL=gpt-5.4-mini

# Cloud providers
PROVIDER_ANTHROPIC_API_KEY=
PROVIDER_ANTHROPIC_DEFAULT_MODEL=claude-sonnet-4-6

PROVIDER_DEEPSEEK_API_KEY=
PROVIDER_DEEPSEEK_DEFAULT_MODEL=deepseek-chat

PROVIDER_GEMINI_API_KEY=
PROVIDER_GEMINI_DEFAULT_MODEL=gemini-2.5-flash

PROVIDER_GROQ_API_KEY=
PROVIDER_GROQ_DEFAULT_MODEL=llama-3.3-70b-versatile

PROVIDER_MISTRAL_API_KEY=
PROVIDER_MISTRAL_DEFAULT_MODEL=mistral-small-latest

PROVIDER_OPENAI_API_KEY=
PROVIDER_OPENAI_DEFAULT_MODEL=gpt-5.4-mini

PROVIDER_PERPLEXITY_API_KEY=
PROVIDER_PERPLEXITY_DEFAULT_MODEL=sonar

PROVIDER_TOGETHER_AI_API_KEY=
PROVIDER_TOGETHER_AI_DEFAULT_MODEL=meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo

PROVIDER_XAI_API_KEY=
PROVIDER_XAI_DEFAULT_MODEL=grok-3-mini

# Local providers
# Set BASE_URL to configure local runtime endpoints.
PROVIDER_LLAMACPP_BASE_URL=http://127.0.0.1:8080/v1
PROVIDER_LMSTUDIO_BASE_URL=http://127.0.0.1:1234/v1
PROVIDER_LOCALAI_BASE_URL=http://127.0.0.1:8080/v1
PROVIDER_OLLAMA_BASE_URL=http://127.0.0.1:11434/v1

# Provider execution
GATEWAY_PROVIDER_MAX_ATTEMPTS=2
GATEWAY_PROVIDER_RETRY_BACKOFF=200ms
GATEWAY_PROVIDER_FAILOVER_ENABLED=true
GATEWAY_PROVIDER_HEALTH_FAILURE_THRESHOLD=3
GATEWAY_PROVIDER_HEALTH_COOLDOWN=30s
# Mark a provider `degraded` (route diagnostics surface it as
# `provider_slow`) when successful calls take at-or-above this duration.
# Degraded providers stay routable but lose to healthy peers in router
# scoring. 0 disables the latency tier; healthy/unhealthy is then driven
# only by the failure-threshold + cooldown above. See docs/providers.md.
GATEWAY_PROVIDER_HEALTH_LATENCY_DEGRADED_THRESHOLD=0

# Provider health history store. Backs the persisted event log behind
# GET /hecate/v1/providers/history (state transitions: success, slow_success,
# failure, cooldown_opened, cooldown_recovered, failover_triggered,
# failover_selected). `memory` resets on restart; `sqlite` persists
# across restarts. _LIMIT is the default page size for the endpoint.
GATEWAY_PROVIDER_HISTORY_BACKEND=memory
GATEWAY_PROVIDER_HISTORY_LIMIT=100

# Anthropic prompt-cache markers. When enabled (default), the Anthropic
# adapter auto-attaches `cache_control: {"type":"ephemeral"}` to the
# last `system` block and the last `tools` entry on every outbound
# Messages-API request. Anthropic then caches the static prefix
# (system instructions + tool catalog) and serves it back at ~10% of
# the fresh-input rate on subsequent requests in the same session.
# Long agent_loop runs and Hecate Chat threads see a 60-90% input-cost
# drop with no latency penalty — caller-supplied cache_control on
# message blocks is preserved untouched. Set to `false` to disable
# auto-marking for cost-tier comparisons or when debugging a
# suspected cache-related upstream issue. Caching never harms
# correctness; the toggle exists for cost analysis, not safety.
GATEWAY_PROVIDER_ANTHROPIC_CACHE_ENABLED=true

# Chat sessions
# This is the single selector for the entire agent-chat state bundle:
#   - regular chat sessions + messages
#   - external-adapter chat sessions + messages
#   - external-adapter approvals + grants
# These records move together so agent-chat state can't go split-brain
# (e.g. sessions on disk while approvals stay in memory). When set to
# `sqlite`, the gateway runs a startup reconcile pass that flips any
# pending approvals from a prior process to status=timed_out,
# path=startup_reconcile — process-local waiters can't be resurrected,
# so the operator UI is never shown stale "actionable" rows.
GATEWAY_CHAT_SESSIONS_BACKEND=memory
GATEWAY_CHAT_SESSIONS_KEY=chat-sessions
GATEWAY_CHAT_SESSIONS_LIMIT=50

# Task runtime
# Use `sqlite` for durable task/run/step/artifact/approval/event state.
GATEWAY_TASKS_BACKEND=memory
# Comma-separated approval classes: shell_exec,git_exec,file_write,network_egress,read_file,all_tools
# Default gates shell, git, and file tasks. Operators who relied on git/file tasks
# running unattended must set this explicitly (e.g. shell_exec, or empty for no gates).
GATEWAY_TASK_APPROVAL_POLICIES=shell_exec,git_exec,file_write
GATEWAY_TASK_QUEUE_BACKEND=memory
GATEWAY_TASK_QUEUE_WORKERS=1
GATEWAY_TASK_QUEUE_BUFFER=128
GATEWAY_TASK_QUEUE_LEASE_SECONDS=30
# How often the periodic reconciler scans for runs stuck in "running"
# past 3× the lease duration and re-queues them automatically.
# Accepts Go duration strings (e.g. "30s", "1m"). Default 30s.
GATEWAY_TASK_RECONCILE_INTERVAL=30s
# 0 means unlimited.
GATEWAY_TASK_MAX_CONCURRENT_PER_TENANT=0
# Cap on agent_loop LLM round-trips per run. Runaway-cost safety net.
GATEWAY_TASK_AGENT_LOOP_MAX_TURNS=8
# Cap on how many `mcp_servers` an agent_loop task may declare. Each
# entry produces one MCP client (subprocess for stdio, persistent
# connection for http) so this is a per-task resource ceiling, not a
# global one.
GATEWAY_TASK_MAX_MCP_SERVERS_PER_TASK=16
# Shared MCP client cache amortizes subprocess spawn cost across runs
# that share the same upstream config.
# _MAX_ENTRIES: distinct cached upstreams the cache holds at once;
# inserts at-or-over the cap evict the least-recently-used entry.
# _PING_INTERVAL: how often the cache proactively pings idle entries
# to detect wedged-but-alive subprocesses; 0 disables the loop.
# _PING_TIMEOUT: per-ping deadline; failure or timeout evicts.
GATEWAY_TASK_MCP_CLIENT_CACHE_MAX_ENTRIES=256
GATEWAY_TASK_MCP_CLIENT_CACHE_PING_INTERVAL=60s
GATEWAY_TASK_MCP_CLIENT_CACHE_PING_TIMEOUT=5s
# Global agent_loop system prompt — broadest layer of the three-layer
# composition (global → workspace CLAUDE.md|AGENTS.md → per-task).
# Empty disables the global layer; the others still apply.
GATEWAY_TASK_AGENT_SYSTEM_PROMPT=

# External-adapter approval mode (Codex / Claude Code / Cursor Agent).
# One of:
#   prompt — ask the operator (default; safe).
#   auto   — auto-approve every adapter RequestPermission. Danger mode
#            kept for batch / CI / smoke runs. Logged at WARN on startup.
#   deny   — auto-reject every adapter RequestPermission. Audit /
#            compliance.
# In prompt mode, approval requests appear in Chats and can also be
# resolved through the Agent Chat approval REST endpoints. If nobody
# resolves the request before the timeout below, the adapter receives
# ACP Cancelled. Operators who depend on headless auto-approve behavior
# must set this to `auto` explicitly.
GATEWAY_AGENT_ADAPTER_APPROVAL_MODE=prompt
# How long a pending approval waits before resolving to ACP Cancelled.
# Accepts Go duration strings (e.g. "5m", "30s"). Default 5m.
GATEWAY_AGENT_ADAPTER_APPROVAL_TIMEOUT=5m
# Per-session turn ceiling for agent-chat sessions. 0 = unlimited (default).
# When exceeded, POST /hecate/v1/agent-chat/sessions/{id}/messages returns HTTP 422
# with code "agent_chat.session_limit_exceeded".
GATEWAY_AGENT_CHAT_MAX_TURNS_PER_SESSION=0
# Optional wall-clock ceiling for an agent-chat session. 0s = unlimited
# (default). When exceeded, POST /hecate/v1/agent-chat/sessions/{id}/messages returns
# HTTP 422 with code "agent_chat.session_duration_limit_exceeded".
GATEWAY_AGENT_CHAT_MAX_SESSION_DURATION=0s
# Optional idle auto-close timeout for agent-chat sessions. 0s disables the
# sweeper and request-time idle guard (default). When exceeded before a sweep,
# POST /hecate/v1/agent-chat/sessions/{id}/messages returns HTTP 422 with code
# "agent_chat.session_idle_timeout"; the sweeper closes stale sessions as
# cancelled.
GATEWAY_AGENT_CHAT_IDLE_TIMEOUT=0s
# Optional override for Hecate-managed ACP launcher scripts used by Codex /
# Claude Code when direct adapter binaries are not on PATH. Empty uses the
# platform user-cache location.
HECATE_AGENT_ADAPTERS_DIR=

# Rate limiting — process-local token bucket on /v1/chat/completions
# and /v1/messages. Off by default. When enabled, every response
# (allowed or 429) carries X-RateLimit-Limit / X-RateLimit-Remaining /
# X-RateLimit-Reset. In the single-user no-auth runtime, requests share
# the same local-operator bucket. See docs/runtime-api.md
# "Rate-limit headers" for shape.
GATEWAY_RATE_LIMIT_ENABLED=false
GATEWAY_RATE_LIMIT_RPM=60
# 0 falls back to RPM (one minute's worth of refill as the burst).
GATEWAY_RATE_LIMIT_BURST=0

# agent_loop `http_request` tool — outbound HTTP from agents.
# Defaults are conservative; broaden by config.
GATEWAY_TASK_HTTP_TIMEOUT=30s
GATEWAY_TASK_HTTP_MAX_RESPONSE_BYTES=262144
# Block private IPs (10/8, 172.16/12, 192.168/16, loopback, link-local).
# Set to true to permit agents to reach internal sidecars / the gateway's
# own admin API. Document the threat model before flipping this on.
GATEWAY_TASK_HTTP_ALLOW_PRIVATE_IPS=false
# Comma-separated allowlist of exact hostnames the agent can reach.
# Empty = all public hosts allowed. No subdomain wildcarding.
GATEWAY_TASK_HTTP_ALLOWED_HOSTS=

# Sandbox per-call output cap (Layer 1 defensive hardening).
# Applied to each shell / git / file subprocess the gateway spawns.
# Combined stdout+stderr cap per command (bytes). Commands that exceed
# this limit are killed and return an error. 0 disables the cap.
# Default: 4 MiB. See docs/sandbox.md.
GATEWAY_TASK_MAX_OUTPUT_BYTES=4194304
#
# CPU / file-descriptor / address-space caps are not configured here —
# RLIMIT_* applied per-call would shrink the long-running gateway
# itself. Run the gateway under systemd (CPUQuota=, LimitNOFILE=,
# MemoryMax=) or in a container (docker run --cpus= --memory=) to cap
# the gateway and every subprocess it spawns.

# Layer 2 (OS-level isolation via bwrap / sandbox-exec) is auto-detected
# at gateway startup — no env var. On Linux it activates when
# /usr/bin/bwrap is present and a probe call succeeds; on macOS it is
# always active (sandbox-exec ships on every macOS install). The active
# mode is logged at startup and exposed on /healthz under
# sandbox.os_isolation. See docs/sandbox.md for the layer model.

# Shell network egress — applies to shell_exec / git_exec when a
# task has SandboxNetwork=true. The default deny-all gate stays in
# force when SandboxNetwork=false; these only refine WHICH
# destinations are reachable when network IS allowed. Same semantics
# as the http_request knobs above; best-effort static URL parsing,
# clever obfuscation can bypass.
GATEWAY_TASK_SHELL_ALLOW_PRIVATE_IPS=false
GATEWAY_TASK_SHELL_ALLOWED_HOSTS=

# Governance / balance tracking
GATEWAY_DENY_ALL=false
GATEWAY_MAX_PROMPT_TOKENS=64000
GATEWAY_MAX_BUDGET_MICROS_USD=5000000
GATEWAY_MODEL_REWRITE_TO=
GATEWAY_BUDGET_BACKEND=memory
GATEWAY_BUDGET_KEY=global
GATEWAY_BUDGET_SCOPE=global
GATEWAY_BUDGET_TENANT_FALLBACK=anonymous
GATEWAY_ROUTE_MODE=any
GATEWAY_ALLOWED_PROVIDERS=
GATEWAY_DENIED_PROVIDERS=
GATEWAY_ALLOWED_MODELS=
GATEWAY_DENIED_MODELS=
GATEWAY_ALLOWED_PROVIDER_KINDS=
GATEWAY_BUDGET_WARNING_THRESHOLDS=50,80,95
GATEWAY_BUDGET_HISTORY_LIMIT=20

# Retention
GATEWAY_RETENTION_ENABLED=false
GATEWAY_RETENTION_INTERVAL=15m
GATEWAY_RETENTION_HISTORY_BACKEND=memory
GATEWAY_RETENTION_TRACES_MAX_AGE=24h
GATEWAY_RETENTION_TRACES_MAX_COUNT=2000
GATEWAY_RETENTION_BUDGET_EVENTS_MAX_AGE=720h
GATEWAY_RETENTION_BUDGET_EVENTS_MAX_COUNT=200
GATEWAY_RETENTION_AUDIT_EVENTS_MAX_AGE=720h
GATEWAY_RETENTION_AUDIT_EVENTS_MAX_COUNT=500
# Provider health history rows accumulate one per state transition per
# provider (success/failure/cooldown/failover). The sweep deletes rows
# older than MAX_AGE and trims to the most-recent MAX_COUNT globally.
GATEWAY_RETENTION_PROVIDER_HISTORY_MAX_AGE=168h
GATEWAY_RETENTION_PROVIDER_HISTORY_MAX_COUNT=10000
# `turn.completed` events are emitted once per LLM round-trip,
# so a long agent run accumulates them quickly. The sweep deletes turn
# rows older than MAX_AGE, then trims to the most-recent MAX_COUNT
# globally. Other event types (run.started/finished, approval.*) are
# never touched. Set MAX_COUNT=0 to disable the count cap (TTL only).
GATEWAY_RETENTION_TURN_EVENTS_MAX_AGE=168h
GATEWAY_RETENTION_TURN_EVENTS_MAX_COUNT=100000
# External-adapter approval history. Only RESOLVED rows are pruned;
# pending rows are caller state, not history, and stay until the
# startup reconcile pass flips them. Operator-authored grants are
# NEVER pruned by this subsystem — only their own ExpiresAt drives
# deletion (operator intent outlives normal retention windows).
GATEWAY_RETENTION_AGENT_CHAT_APPROVALS_MAX_AGE=720h
GATEWAY_RETENTION_AGENT_CHAT_APPROVALS_MAX_COUNT=10000

# OpenTelemetry
GATEWAY_OTEL_SERVICE_NAME=hecate-gateway
GATEWAY_OTEL_SERVICE_VERSION=
GATEWAY_OTEL_SERVICE_INSTANCE_ID=
GATEWAY_OTEL_DEPLOYMENT_ENVIRONMENT=
# Optional shared OTLP settings. When set, Hecate derives per-signal HTTP
# endpoints as /v1/traces, /v1/metrics, and /v1/logs. With grpc transport,
# the same host:port endpoint is used for every enabled signal.
GATEWAY_OTEL_ENDPOINT=
GATEWAY_OTEL_HEADERS=
GATEWAY_OTEL_TIMEOUT=5s
GATEWAY_OTEL_TRANSPORT=http
GATEWAY_OTEL_TRACES_ENABLED=false
GATEWAY_OTEL_TRACES_ENDPOINT=
GATEWAY_OTEL_TRACES_HEADERS=
GATEWAY_OTEL_TRACES_TIMEOUT=5s
GATEWAY_OTEL_TRACES_TRANSPORT=http
GATEWAY_OTEL_TRACES_SAMPLER=parentbased_always_on
GATEWAY_OTEL_TRACES_SAMPLER_ARG=1.0
GATEWAY_OTEL_METRICS_ENABLED=false
GATEWAY_OTEL_METRICS_ENDPOINT=
GATEWAY_OTEL_METRICS_HEADERS=
GATEWAY_OTEL_METRICS_TIMEOUT=5s
GATEWAY_OTEL_METRICS_TRANSPORT=http
GATEWAY_OTEL_METRICS_INTERVAL=30s
# Optional SDK exemplar filter for trace-from-metric pivots in supporting backends:
# trace_based (default), always_on, or always_off.
GATEWAY_OTEL_METRICS_EXEMPLAR_FILTER=trace_based
GATEWAY_OTEL_LOGS_ENABLED=false
GATEWAY_OTEL_LOGS_ENDPOINT=
GATEWAY_OTEL_LOGS_HEADERS=
GATEWAY_OTEL_LOGS_TIMEOUT=5s
GATEWAY_OTEL_LOGS_TRANSPORT=http

# Trace body capture — when true, the gateway records (redacted)
# request and response bodies as span attributes. Off by default
# because bodies can be large and include PII. See docs/telemetry.md
# for the redaction rules and where the bodies surface.
GATEWAY_TRACE_BODIES=false
# Per-body cap when capture is on. Bodies past this limit are
# truncated with an ellipsis marker on the span attribute.
GATEWAY_TRACE_BODY_MAX_BYTES=4096

# SQLite — single-file durable store. One file is shared across every
# subsystem that opts in via GATEWAY_*_BACKEND=sqlite. Parent directory
# auto-created. Pure-Go driver, no CGO needed.
#
# Leave SQLITE_PATH commented unless overriding (same logic as
# GATEWAY_DATA_DIR above): source dev defaults to `.data/hecate.db`,
# the docker image bakes in `/data/hecate.db`.
# GATEWAY_SQLITE_PATH=.data/hecate.db
GATEWAY_SQLITE_TABLE_PREFIX=hecate
GATEWAY_SQLITE_BUSY_TIMEOUT=5s

# Pricebook auto-import — when set to a positive duration, the gateway
# pulls fresh prices from LiteLLM on this interval. Manual rows are
# always preserved (operator-protected); only imported / new rows land.
# Empty or 0 disables. Examples: 24h, 6h, 30m.
GATEWAY_PRICEBOOK_AUTO_IMPORT_INTERVAL=

# Logging
LOG_LEVEL=INFO