fbi-crime-analyst-agent/agent.yaml at main · redhat-ai-americas/fbi-crime-analyst-agent · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
# agent.yaml -- My Agent
#
# Framework: fipsagents (BaseAgent)
#   Monorepo: packages/fipsagents/   |  PyPI: pip install fipsagents
#   Docs:     see packages/fipsagents/README.md for the full API reference
#
# Configuration for a research-assistant agent built on BaseAgent.
# Every value supports ${VAR:-default} env-var substitution so the same
# file works unchanged for local development and OpenShift deployment.
#
# Local dev:  sensible defaults work out of the box -- just run the agent.
# Production: set env vars via ConfigMap / Secrets.  Override only what
#             differs from the defaults below.

# -- Agent identity ------------------------------------------------------------
#
# Name, description, and version for logging and the /v1/agent-info endpoint.
# Populated by /create-agent from AGENT_PLAN.md.
#
# Production env vars:
#   AGENT_NAME -- override the agent name per deployment
agent:
  name: ${AGENT_NAME:-fbi-crime-analyst}
  description: "FBI UCR crime data analyst — forecasts and historical analysis via MCP tools"
  version: 0.1.0

# -- LLM provider and generation settings ------------------------------------
#
# `provider` selects the LLM backend.  Defaults to "openai" (any OpenAI-
# compatible endpoint: vLLM, LlamaStack, llm-d).  Set to "anthropic",
# "bedrock", or "azure" to route traffic through the adapter sidecar at
# localhost:8081/v1.  The adapter sidecar must be enabled in
# chart/values.yaml (llm_adapter.enabled: true).
#
# When provider is non-openai, MODEL_ENDPOINT is ignored -- the framework
# routes traffic to the sidecar automatically.
#
# `name` is the model identifier passed to the endpoint (or adapter).
# `endpoint` points at vLLM, LlamaStack, llm-d, or any OpenAI-compatible API.
#
# Production env vars:
#   MODEL_PROVIDER  -- openai | anthropic | bedrock | azure
#   MODEL_ENDPOINT  -- vLLM / LlamaStack URL (ignored for non-openai providers)
#   MODEL_NAME      -- deployed model identifier
#   OPENAI_API_KEY  -- required by the openai SDK even for unauthenticated vLLM
#                      endpoints; set to any non-empty string (e.g. "not-required")
model:
  provider: ${MODEL_PROVIDER:-openai}
  # Default points at gpt-oss-20b deployed by ../fbi-ucr-demo into the
  # gpt-oss-model namespace. Override MODEL_ENDPOINT for local dev.
  endpoint: ${MODEL_ENDPOINT:-http://gpt-oss-20b.gpt-oss-model.svc.cluster.local:80/v1}
  name: ${MODEL_NAME:-RedHatAI/gpt-oss-20b}
  temperature: 0.3
  max_tokens: 4096

# -- Platform mode (optional, OGX / LlamaStack delegation) -------------------
#
# Opt in to OGX's /v1/responses endpoint instead of /v1/chat/completions.
# OGX (the rebrand of LlamaStack) orchestrates MCP tool calls, shield
# enforcement, and the inference loop server-side; the agent makes a
# single call_model_responses() per turn and skips its own tool-call loop.
#
# Default is off. Set PLATFORM_MODE=true and point OGX_ENDPOINT at your
# OGX base URL to enable. To pass MCP servers, guardrails, or moderation
# config, expand this block — see CLAUDE.md ("Platform Mode") and
# docs/architecture.md for the full schema.
#
# Production env vars:
#   PLATFORM_MODE  -- "true" to opt in
#   OGX_ENDPOINT   -- OGX base URL (typically ending in /v1)
platform:
  enabled: ${PLATFORM_MODE:-false}
  endpoint: ${OGX_ENDPOINT:-}

# -- MCP servers --------------------------------------------------------------
#
# Client-side MCP servers (used when platform.enabled=false).  In platform
# mode, OGX orchestrates MCP server-side via platform.mcp -- this block is
# ignored.
#
# Remote tool servers the agent connects to at startup.  BaseAgent discovers
# three capability types from each server:
#
#   Tools:     Auto-registered with llm_only visibility.  The LLM decides
#              when to call them, just like local tools.
#
#   Prompts:   Available via self.get_mcp_prompt(name, arguments).
#              Use self.list_mcp_prompts() to see what a server provides.
#
#   Resources: Available via self.read_resource(uri).
#              Use self.list_mcp_resources() for discovery.
#              Resource templates: self.list_mcp_resource_templates().
#
# Each entry needs either a `url` (HTTP transport) or a `command` (stdio):
#
#   - url: http://search-mcp:8080/mcp              # HTTP
#   - command: /path/to/mcp-server                  # stdio
#     args: [--verbose]
#     env: {API_KEY: "${SEARCH_API_KEY}"}
#
# Production env vars (example):
#   MCP_SEARCH_URL  -- URL for a web-search MCP server
#
# Default points at the FBI Crime Stats MCP server deployed by
# ../fbi-ucr-demo into the fbi-mcp namespace. Override MCP_FBI_URL for
# local dev (e.g. http://localhost:8080/mcp/ when running the MCP server
# locally with `make run-server`).
mcp_servers:
  - url: ${MCP_FBI_URL:-http://fbi-crime-stats-mcp.fbi-mcp.svc.cluster.local:8080/mcp/}

# -- Local tools ---------------------------------------------------------------
#
# Tools are auto-discovered from `local_dir` at startup.
# `visibility_default` controls the default plane for tools that don't
# declare their own visibility via the @tool decorator:
#   agent_only  -- callable from agent code only (plane 1)
#   llm_only    -- surfaced to the LLM for tool calling (plane 2)
#   both        -- accessible from either plane
tools:
  local_dir: ./tools
  visibility_default: agent_only

# -- Prompt templates ----------------------------------------------------------
#
# Markdown files with YAML frontmatter, one per file.
# Loaded via self.load_prompt(name, **variables).
# `system` designates which prompt becomes the system prompt.  Defaults to
# "system" (i.e. prompts/system.md).  Supports env-var substitution.
prompts:
  dir: ./prompts
  system: system          # prompt file to load as the system prompt (prompts/system.md)

# -- Agent loop ----------------------------------------------------------------
#
# max_iterations: hard ceiling on step() calls per run() invocation.
#                 Prevents runaway loops.  100 is generous for most agents;
#                 lower it for tightly scoped tasks.
#
# backoff: exponential backoff applied when step() raises a retryable error.
#   initial    -- first delay in seconds
#   max        -- ceiling on delay (seconds)
#   multiplier -- factor applied after each retry (must be > 1.0)
#
# Production env vars:
#   MAX_ITERATIONS -- override the iteration cap per deployment
loop:
  max_iterations: ${MAX_ITERATIONS:-100}
  backoff:
    initial: 1.0
    max: 30.0
    multiplier: 2.0

# -- Logging -------------------------------------------------------------------
#
# Standard Python log levels: DEBUG, INFO, WARNING, ERROR, CRITICAL.
# Set to DEBUG locally for verbose output; keep INFO or WARNING in production.
#
# Production env vars:
#   LOG_LEVEL -- e.g. WARNING
logging:
  level: ${LOG_LEVEL:-INFO}

# -- Server --------------------------------------------------------------------
#
# HTTP server binding for the OpenAI-compatible API.  The Helm chart's
# service.port should match `port` here.
#
# Production env vars:
#   HOST -- bind address (useful for local-only testing with 127.0.0.1)
#   PORT -- bind port (some platforms inject this automatically)
server:
  host: ${HOST:-0.0.0.0}
  port: ${PORT:-8080}

  # -- Storage backend (optional) -----------------------------------------------
  #
  # Shared storage for session persistence and trace collection.
  # When backend is null (default), both features degrade to no-ops --
  # fully backward-compatible.
  #
  # Production env vars:
  #   STORAGE_BACKEND -- "sqlite" or "postgres"
  #   SQLITE_PATH     -- path to SQLite file (default: ./agent.db)
  #   DATABASE_URL    -- PostgreSQL connection string (for postgres backend)
  storage:
    backend: ${STORAGE_BACKEND:-}
    sqlite_path: ${SQLITE_PATH:-./agent.db}
  #   database_url: ${DATABASE_URL:-}

  # -- Session persistence (optional) -------------------------------------------
  #
  # Enables conversation continuity across requests.  Requires a storage
  # backend (sqlite or postgres) to persist session state.
  #
  # REST endpoints:  POST /v1/sessions, GET /v1/sessions/{id},
  #                  DELETE /v1/sessions/{id}
  # Per-request:     pass session_id on ChatCompletionRequest to reuse context
  # Auto-create:     sessions are created on first use (explicit POST optional)
  sessions:
    enabled: ${SESSIONS_ENABLED:-false}
    max_age_hours: 168           # sessions expire after 7 days

  # -- Trace collection (optional) ----------------------------------------------
  #
  # TraceCollector observes astep_stream() and builds span trees from
  # StreamEvents.  Traces are queryable via GET /v1/traces.
  #
  # exporter:
  #   store  -- persist to the storage backend (default when enabled)
  #   otel   -- export to OpenTelemetry Collector via OTLP (requires [otel] extra)
  #
  # Production env vars:
  #   TRACES_ENABLED  -- "true" to enable
  #   OTEL_ENDPOINT   -- OTLP gRPC endpoint (e.g. http://otel-collector:4317)
  traces:
    enabled: ${TRACES_ENABLED:-false}
    sampling_rate: 1.0
  #   exporter: store            # or: otel
  #   otel_endpoint: ${OTEL_ENDPOINT:-}
  #   service_name: ${OTEL_SERVICE_NAME:-fipsagents}

  # -- Prometheus metrics (optional) --------------------------------------------
  #
  # Records request/tool/token counters and duration histograms.
  # Exposed at GET /metrics in Prometheus text format.
  # Requires the [metrics] extra:  pip install fipsagents[metrics]
  #
  # Available metrics:
  #   agent_requests_total, agent_request_duration_seconds,
  #   agent_model_call_duration_seconds, agent_tool_call_total,
  #   agent_tokens_total
  metrics:
    enabled: ${METRICS_ENABLED:-false}

  # -- User feedback collection (optional) --------------------------------------
  #
  # Persists thumbs-up / thumbs-down ratings, comments, and corrections
  # submitted via the UI.  Records are joined to traces (when tracing is
  # enabled) so feedback can be correlated with the conversation that
  # produced it.
  #
  # Requires a storage backend (sqlite or postgres) for persistence —
  # without one, feedback POSTs are accepted and discarded.
  #
  # REST endpoints:
  #   POST /v1/feedback            -- submit a rating
  #   GET  /v1/feedback            -- query records (filter by trace/session)
  #   GET  /v1/feedback/stats      -- aggregated counts grouped by window
  #
  # Production env vars:
  #   FEEDBACK_ENABLED  -- "true" to enable
  feedback:
    enabled: ${FEEDBACK_ENABLED:-false}
    max_age_hours: 720           # feedback expires after 30 days

  # -- File uploads (optional) --------------------------------------------------
  #
  # Accepts user-attached files via POST /v1/files (multipart). The endpoint
  # persists each upload, optionally extracts text via Docling, and lets
  # subsequent /v1/chat/completions requests reference uploads by file_id.
  #
  # Requires the [files] extra:  pip install fipsagents[files]
  # Optional virus scan: deploy a ClamAV sidecar (chart/values.yaml ->
  # files.virusScanner.enabled=true) and set scanner.url to point at it.
  #
  # Defense-in-depth: the gateway also enforces size + MIME caps before
  # bytes reach the agent.  Keep allowed_mime_types in sync between the
  # two so a request the gateway would reject doesn't surprise the agent.
  #
  # backend: persistence backend for metadata + bytes.
  #   sqlite   -- local SQLite + sharded local FS (dev / single-replica)
  #   postgres -- Postgres metadata (S3-compatible bytes backend pending)
  #   null     -- accepted-then-discarded (no persistence; for smoke testing)
  #
  # max_file_size_bytes: hard cap enforced at the agent layer. Must be set
  # to at least the gateway's cap so honest uploads aren't double-rejected.
  #
  # allowed_mime_types: empty list means allow-all (gateway is the gate).
  # Populate with explicit types to belt-and-braces the gateway allowlist.
  #
  # scanner.url: HTTP URL of a ClamAV-fronting sidecar. Empty disables
  # scanning (NullScanner). When set, every upload is POSTed to the
  # sidecar before persistence.
  # scanner.fail_mode:
  #   open   -- accept uploads when the scanner is unreachable (dev default)
  #   closed -- 503 the upload when the scanner errors (production-recommended)
  #
  # Production env vars:
  #   FILES_ENABLED          -- "true" to accept uploads
  #   FILES_BACKEND          -- "sqlite" | "postgres" | "" (Null)
  #   FILES_MAX_SIZE_BYTES   -- override the size cap (default 50 MiB)
  #   FILES_SCANNER_URL      -- ClamAV sidecar URL (empty disables scanning)
  #   FILES_SCANNER_FAIL_MODE -- "open" or "closed"
  files:
    enabled: ${FILES_ENABLED:-false}
    backend: ${FILES_BACKEND:-}
    max_file_size_bytes: ${FILES_MAX_SIZE_BYTES:-52428800}  # 50 MiB
    # bytes_dir is the local-FS path SqliteFileStore / PostgresFileStore
    # write bytes to. Mount a PVC at this path in production (the Helm
    # chart sets FILES_BYTES_DIR via files.persistence.mountPath);
    # without a PVC, uploads are wiped on every pod restart.
    bytes_dir: ${FILES_BYTES_DIR:-./files}
    # Override storage.sqlite_path for the SqliteFileStore metadata DB
    # only. When bytes_dir is on a PVC, point this at the same volume so
    # both bytes and metadata survive pod restarts (the chart sets
    # FILES_SQLITE_DB_PATH=${mountPath}/.metadata/agent.db when
    # files.persistence.enabled and backend is sqlite). Empty defers to
    # storage.sqlite_path.
    # Quoted so empty ${VAR:-} yields "" (string), not null —
    # Pydantic rejects None for str fields. Same caveat applies to
    # every str-field substitution with an empty default below.
    sqlite_path: "${FILES_SQLITE_DB_PATH:-}"
    # Bytes-storage backend (per ADR-0001). Composes with the metadata
    # ``backend`` so eg "postgres metadata + S3 bytes" is one config
    # block, not a separate FileStore class.
    #
    #   type: local_fs   — sharded local filesystem at bytes_dir
    #                      (single-replica only; backward-compatible
    #                      with 0.16.0 deployments)
    #   type: s3         — S3-compatible object storage (AWS S3, MinIO,
    #                      GCS S3-mode, Cloudflare R2, Backblaze B2).
    #                      Requires ``pip install fipsagents[s3]``.
    #                      Multi-replica safe.
    #   type: null       — accept-and-discard (tests)
    #
    # For S3, set bucket + endpoint (omit for AWS) + region. Auth
    # falls through boto3's default chain when access_key/secret_key
    # are empty (IAM role, env vars, EC2 metadata).
    bytes_backend:
      # String fields are quoted so empty ${VAR:-} substitutions yield
      # "" (string), not null — Pydantic rejects None for str fields.
      type: "${FILES_BYTES_BACKEND:-local_fs}"
      bucket: "${FILES_S3_BUCKET:-}"
      endpoint: "${FILES_S3_ENDPOINT:-}"
      region: "${FILES_S3_REGION:-us-east-1}"
      access_key: "${FILES_S3_ACCESS_KEY:-}"
      secret_key: "${FILES_S3_SECRET_KEY:-}"
      prefix: "${FILES_S3_PREFIX:-}"
      path_style: ${FILES_S3_PATH_STYLE:-false}
    max_age_hours: 720                                       # files expire after 30 days
    allowed_mime_types: []
    # Uncomment entries above to restrict at the agent layer:
    #   - application/pdf
    #   - text/plain
    #   - text/markdown
    #   - text/csv
    #   - application/json
    #   - image/png
    #   - image/jpeg
    scanner:
      url: "${FILES_SCANNER_URL:-}"
      timeout_seconds: 30.0
      fail_mode: ${FILES_SCANNER_FAIL_MODE:-open}

    # -- Docling parser pipeline knobs --------------------------------------
    #
    # Tunes the Docling DocumentConverter used by `[files]`-extra builds.
    # Currently only PDF has knobs surfaced; other formats use Docling's
    # converter defaults.
    #
    # `parser.pdf.do_ocr` defaults to `false` in 0.19.0+. Most modern PDFs
    # ship a selectable text layer; OCR adds 1-2 seconds per page on a
    # 2-CPU pod with no quality benefit. Set `FILES_PARSER_PDF_DO_OCR=true`
    # for scanned PDFs.
    #
    # `parser.pdf.do_table_structure` keeps Docling's default (`true`) so
    # tabular content survives the markdown export. Disable only if you
    # see table extraction errors on a particular corpus.
    parser:
      pdf:
        do_ocr: ${FILES_PARSER_PDF_DO_OCR:-false}
        do_table_structure: ${FILES_PARSER_PDF_DO_TABLE_STRUCTURE:-true}

    # -- Large-file chunking + retrieval (optional, per ADR-0002) ------------
    #
    # When enabled, files whose extracted text exceeds
    # `small_file_threshold_tokens` are split into chunks, embedded via the
    # configured OpenAI-compatible embedding endpoint, and stored in
    # pgvector. At chat-completion time the chunked file's content is
    # retrieved per-query (top-K nearest chunks scoped to the user's
    # `file_ids`) instead of dumping the full text into the prompt.
    #
    # Disabled by default — flip `enabled: true` and provide a pgvector URL
    # + embedding endpoint to opt in. Files smaller than the threshold and
    # files uploaded while chunking is disabled fall back to full-text
    # injection (the 0.17.0 behaviour).
    #
    # `backend: pgvector` requires the [chunking] extra in the container
    # build (`pip install fipsagents[chunking]`).
    #
    # Budget presets parallel `memory.budget` so a deployment using
    # `${MEMORY_BUDGET}` for memory naturally gets matching chunking
    # defaults — set `${CHUNKING_BUDGET}` to the same value:
    #   small   -- chunk_size_tokens=400, retrieval_top_k=3,
    #              small_file_threshold_tokens=2000
    #   medium  -- 600 / 5  / 4000     (default sizing)
    #   large   -- 800 / 8  / 8000
    #   custom  -- no preset; set the per-tier knobs explicitly
    #
    # Production env vars:
    #   CHUNKING_ENABLED          -- "true" to enable
    #   CHUNKING_BACKEND          -- "pgvector" (default "null" = disabled)
    #   PGVECTOR_URL              -- Postgres+pgvector connection string
    #                                (postgresql://user:pass@host:5432/db)
    #   EMBEDDING_URL             -- OpenAI-compatible embeddings endpoint
    #   EMBEDDING_MODEL           -- model identifier passed to the endpoint
    #   EMBEDDING_DIMENSION       -- vector dim (must match the model)
    #   CHUNKING_TABLE            -- pgvector table name (default file_chunks)
    #   CHUNKING_BUDGET           -- "small" | "medium" | "large" | "custom"
    chunking:
      enabled: ${CHUNKING_ENABLED:-false}
      # Quoted so the default stays the literal string "null" — Pydantic's
      # Literal["null", "pgvector"] rejects YAML's bare null (None).
      backend: "${CHUNKING_BACKEND:-null}"
      database_url: "${PGVECTOR_URL:-}"
      embedding_url: "${EMBEDDING_URL:-}"
      embedding_model: "${EMBEDDING_MODEL:-all-MiniLM-L6-v2}"
      embedding_dimension: ${EMBEDDING_DIMENSION:-768}
      table_name: "${CHUNKING_TABLE:-file_chunks}"
      # budget: ${CHUNKING_BUDGET:-medium}
      # Per-tier knobs (override the budget preset):
      # chunk_size_tokens: 600
      # chunk_overlap_tokens: 100
      # small_file_threshold_tokens: 4000
      # retrieval_top_k: 5
      # retrieval_min_score: 0.0

# -- Memory backend (optional) -----------------------------------------------
#
# Controls which memory backend the agent uses.
#
# backend: which backend to use.  When omitted, auto-detects by looking
#          for .memoryhub.yaml (backward compatible).
#   memoryhub -- MemoryHub SDK (default when .memoryhub.yaml exists)
#   markdown  -- Human-readable markdown file(s) (zero dependencies)
#   sqlite    -- Local SQLite with FTS5 search (zero dependencies)
#   pgvector  -- PostgreSQL + pgvector for semantic search
#   custom    -- Bring your own MemoryClientBase subclass
#   null      -- Explicitly disable memory
#
# config_path: path to the backend-specific config file.
#   For memoryhub: .memoryhub.yaml (generated by `memoryhub config init`)
#   For sqlite:    .memory-sqlite.yaml
#   For pgvector:  .memory-pgvector.yaml
#
# backend_class: dotted import path, only used when backend: custom
#   Example: myproject.memory.RedisMemoryClient
#
# Budget presets (set one and forget individual tuning):
#   budget: small   -- 500 chars, 5 results, min_weight 0.7  (8K-16K models)
#   budget: medium  -- 4K chars, 20 results, min_weight 0.5  (32K-128K models)
#   budget: large   -- 8K chars, 50 results, min_weight 0.3  (128K+ models)
#   Explicit values for max_prefix_chars/max_results/min_weight override presets.
#
# Injection mode (how memories reach the model):
#   injection_mode: prefix     -- (default) separate message before user turn
#   injection_mode: user_turn  -- append to user message inside XML tags;
#                                 required for small models (8K-16K) that
#                                 ignore system-level memory
#   injection_tag: user_memories  -- XML tag name (only with user_turn mode)
#
# Loading pattern (when to retrieve memories):
#   loading_pattern: eager           -- at setup time (default, best for prefix caching)
#   loading_pattern: lazy            -- after first user message arrives
#   loading_pattern: lazy_with_rebias -- lazy + re-retrieve after topic shift
#   loading_pattern: jit             -- just-in-time, every turn
#   For MemoryHub backends, .memoryhub.yaml pattern takes precedence unless
#   loading_pattern is set explicitly here.
#
# Production env vars:
#   MEMORY_BACKEND -- override the backend choice per deployment
memory:
  backend: ${MEMORY_BACKEND:-}
  config_path: .memoryhub.yaml
  # backend_class: null
  # budget: medium             # or: small, large, custom
  # injection_mode: prefix     # or: user_turn
  # injection_tag: user_memories
  # loading_pattern: eager     # or: lazy, lazy_with_rebias, jit
  # max_prefix_chars: 8000     # 0 disables limit
  # max_results: 50
  # min_weight: 0.0            # 0.0 = no filtering

# -- Security ------------------------------------------------------------------
#
# Controls security inspection, audit logging, and enforcement behavior.
# Global mode sets the default; per-layer mode overrides it.
#
#   mode: enforce      -- block on findings (production default)
#   mode: observe      -- log findings but allow execution (for tuning)
#
security:
  mode: ${SECURITY_MODE:-enforce}
  tool_inspection:
    enabled: ${TOOL_INSPECTION_ENABLED:-true}
#    mode: observe     # override global mode for this layer
#  guardrails:
#    mode: observe     # override global mode for guardrails

# -- Node deployment topology (optional) -------------------------------------
#
# Maps workflow node names to their deployment topology.  Nodes default to
# local (in-process).  Set type: remote to delegate processing to an
# already-deployed agent via HTTP POST.
#
# The runner auto-wraps remote nodes -- the graph definition stays
# topology-agnostic.  Same graph works for local dev and production.
#
# Remote contract:
#   POST {endpoint}{path}
#   Body:  {"state": {...}, "state_type": "fully.qualified.ClassName"}
#   Reply: {"state": {...}}
#
# Production env vars (example):
#   RESEARCH_AGENT_URL -- URL of a separately deployed research agent
#
# nodes:
#   classify:
#     type: local
#   research:
#     type: remote
#     endpoint: ${RESEARCH_AGENT_URL:-http://research-agent:8080}
#     path: /process        # default
#     timeout: 30.0         # seconds, default
#     retries: 2            # HTTP-level retries (runner also retries via node_retries)
nodes: {}

# -- Code execution sandbox (optional) ----------------------------------------
#
# The code_executor tool sends LLM-generated Python code to a sandbox sidecar
# for safe execution.  The sidecar runs in the same pod and is reachable at
# localhost:8000.  Enable it in your Helm values:
#
#   sandbox:
#     enabled: true
#     image:
#       repository: quay.io/yourorg/code-sandbox
#       tag: latest
#
# The tool reads SANDBOX_URL from the environment (set automatically by the
# Helm chart when sandbox.enabled=true).  For local development, run the
# sandbox manually from https://github.com/fips-agents/code-sandbox or
# scaffold a standalone instance with `fips-agents create sandbox <name>`.
#
# Available modules in the sandbox:
#   math, statistics, itertools, functools, re, datetime, collections,
#   json, csv, string, textwrap, decimal, fractions, random, operator, typing
#
# Production env vars:
#   SANDBOX_URL -- override the sidecar URL (default: http://localhost:8000)