diff --git a/flake.nix b/flake.nix index 960b841e4..6d6deda94 100644 --- a/flake.nix +++ b/flake.nix @@ -57,6 +57,11 @@ inputs.nixpkgs.follows = "nixpkgs"; }; + # Provides only `room-server` for images/dev/symphony-codex (through + # `pkgs.symphony-room-server` in lib/overlay.nix). The Elixir runtime + # itself lives in packages/symphony now; room-server's source moved to + # the ix monorepo, so this pin stays on the last symphony rev that still + # builds it and retires once the image's room-server seam moves too. symphony = { url = "github:indexable-inc/symphony/main"; inputs.nixpkgs.follows = "nixpkgs"; diff --git a/lib/per-system.nix b/lib/per-system.nix index ab650f8d4..473d9531d 100644 --- a/lib/per-system.nix +++ b/lib/per-system.nix @@ -633,6 +633,11 @@ let printf '%s\n' '${forced}' > "$out" ''; run-records-session = repoPackages.run.passthru.tests.recordsSession; + # Symphony's required quality lane (compile -Werror, mix format, + # credo, mix test) as a sandboxed derivation; see + # packages/symphony/default.nix. The advisory lane (dialyzer, + # sobelow, deps.audit) stays a local `mix quality` run. + symphony-elixir = repoPackages.symphony.passthru.tests.elixir; # Deterministic alloc-count gate for indexbench: runs the counting- # allocator demo bench once through `indexbench assert` and fails if its # allocation count exceeds the declared budget. Reproducible, unlike @@ -809,5 +814,19 @@ in pkgs.jemalloc ]; }; + + # Dev loop for packages/symphony: the Elixir/OTP pairing the runtime pins + # (1.19 on 28) plus the host tools bin/run-nix expects. codex is the plain + # nixpkgs CLI; authenticate it before `nix run .#symphony`. + symphony = pkgs.mkShellNoCC { + packages = [ + (ix.languages.elixir.toolchain pkgs { version = "1.19"; }) + (ix.languages.erlang.toolchain pkgs { version = "28"; }) + pkgs.codex + pkgs.gh + pkgs.git + pkgs.openssh + ]; + }; }; } diff --git a/modules/services/symphony/default.nix b/modules/services/symphony/default.nix new file mode 100644 index 000000000..a4bc8c19e --- /dev/null +++ b/modules/services/symphony/default.nix @@ -0,0 +1,353 @@ +# NixOS service module for the Symphony runtime. +# +# Minimal opinionated systemd unit. Reads secrets from an EnvironmentFile +# you control, so you can wire any secret manager (sops-nix, agenix, +# Bitwarden Secrets Manager, AWS Secrets Manager, etc.) underneath. For +# Bitwarden Secrets Manager specifically, set `secretsCommand` to a +# `bws run -- ...` invocation; the unit will wrap ExecStart with it. +{ + config, + lib, + pkgs, + ... +}: +let + inherit (lib) + mkEnableOption + mkIf + mkOption + optionalString + types + ; + + cfg = config.services.symphony; +in +{ + options.services.symphony = { + enable = mkEnableOption "Symphony runtime"; + + package = mkOption { + type = types.package; + description = "Symphony package (provides /bin/symphony from this flake's default output)."; + }; + + user = mkOption { + type = types.str; + default = "symphony"; + description = "Unix user the service runs as. Set to an existing user, or let DynamicUser handle it."; + }; + + stateDir = mkOption { + type = types.path; + default = "/var/lib/symphony"; + description = "Directory for runs, workspaces, logs, and the staged runtime copy."; + }; + + httpPort = mkOption { + type = types.port; + default = 4040; + description = "Phoenix HTTP listener port."; + }; + + primaryRepo = mkOption { + type = types.nullOr types.path; + default = null; + description = "Absolute path to the primary repository checkout (SYMPHONY_PRIMARY_REPO)."; + }; + + repoRoot = mkOption { + type = types.nullOr types.path; + default = null; + description = "Optional parent directory of sibling repository checkouts (SYMPHONY_REPO_ROOT). Defaults to the parent of primaryRepo."; + }; + + workflowPack = mkOption { + type = types.str; + default = "example"; + description = "Built-in workflow pack name; ignored when packDir is set."; + }; + + packDir = mkOption { + type = types.nullOr types.path; + default = null; + description = "Absolute path to an external workflow pack (SYMPHONY_PACK_DIR). Takes precedence over workflowPack."; + }; + + roomRegistryUrl = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Central room.ix.dev base URL each run's room-server registers its + backend with (SYMPHONY_ROOM_REGISTRY_URL). Drives both the room UI's + transcript view and the Slack "Run details" deep link. Unset disables + registration and the Slack link. The matching write token is a secret; + supply SYMPHONY_ROOM_REGISTRY_TOKEN via environmentFile. + ''; + }; + + roomAdvertiseHost = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Address a provisioned per-run room-server binds and advertises so + room.ix.dev can reach it to proxy the run's transcript + (SYMPHONY_ROOM_ADVERTISE_HOST). Set to this host's tailnet address when + room.ix.dev runs elsewhere; unset keeps the loopback default, reachable + only when room.ix.dev shares the host. + ''; + }; + + roomServerUrl = mkOption { + type = types.nullOr types.str; + default = null; + description = '' + Standing room-server URL for `:local` / `{:room, url}` placements that + do not provision their own per-run server (SYMPHONY_ROOM_SERVER_URL). + ''; + }; + + extraEnvironment = mkOption { + type = types.attrsOf types.str; + default = { }; + description = '' + Additional environment variables exported to the service. Use for + non-secret config: LINEAR_WORKSPACE_SLUG, SYMPHONY_BOT_USERNAME, + SYMPHONY_BOT_EMAIL, SYMPHONY_GITHUB_APP_OWNER_REPO, + SYMPHONY_GITHUB_STATS_QUERY, SYMPHONY_SLACK_NOTIFY_CHANNEL, etc. + ''; + }; + + environmentFile = mkOption { + type = types.nullOr types.path; + default = null; + description = '' + Path to a systemd EnvironmentFile holding secrets: + LINEAR_API_KEY, GITHUB_TOKEN, LINEAR_WEBHOOK_SECRET, + GITHUB_WEBHOOK_SECRET, SLACK_SIGNING_SECRET, SLACK_BOT_OAUTH_TOKEN, + SYMPHONY_GITHUB_APP_PRIVATE_KEY_BASE64, SYMPHONY_ROOM_REGISTRY_TOKEN, + etc. + Wire this to whichever secret manager you use (sops-nix, agenix, ...). + Leave null if you use secretsCommand instead. + ''; + }; + + secretsCommand = mkOption { + type = types.nullOr (types.listOf types.str); + default = null; + example = [ + "bws" + "run" + "--project-id" + "symphony-prod" + "--" + ]; + description = '' + Optional command that wraps ExecStart and injects secrets into the + environment. Designed for Bitwarden Secrets Manager (`bws run -- + ...`) or any compatible secret-injecting CLI. The wrapper command + must exec its trailing arguments. Place the bws binary on the + service's PATH via `path = [ pkgs.bws ];` or by adding it to + runtimeInputs of the symphony package. + + When set, the unit also expects BWS_ACCESS_TOKEN (or equivalent) + to be exported via environmentFile or extraEnvironment. + ''; + }; + + path = mkOption { + type = types.listOf types.package; + default = [ ]; + description = "Extra packages on the service PATH (e.g. pkgs.bws when using secretsCommand)."; + }; + + hostRuntime = mkOption { + default = { }; + description = '' + The host codex placement. When enabled, a workflow node that + declares `location: host` (or the run's resolved fallback) runs + codex directly on this machine as a real OS user, with no VM. The + per-run room-server and the codex process it spawns run as + `user` inside that user's home directory, launched as transient + `systemd-run --uid` units. This option wires the polkit grant, + PATH, and environment that path needs. It stays inert until + `enable` is set. + ''; + type = types.submodule { + options = { + enable = mkEnableOption "the host codex placement"; + + user = mkOption { + type = types.str; + default = ""; + description = "OS user codex runs as for host placement (SYMPHONY_HOST_USER). Must already exist with a home directory."; + }; + + group = mkOption { + type = types.nullOr types.str; + default = null; + description = "OS group for host runs (SYMPHONY_HOST_GROUP); omitted uses the user's primary group."; + }; + + workspacesDir = mkOption { + type = types.nullOr types.path; + default = null; + description = "Parent directory for run checkouts (SYMPHONY_HOST_WORKSPACES_DIR); defaults to /symphony-workspaces."; + }; + + roomServerPackage = mkOption { + type = types.nullOr types.package; + default = null; + description = "Package providing the codex-wrapped room-server launched as the host user (this flake's room-server output). Used by the per-run host placement."; + }; + + keep = mkOption { + type = types.bool; + default = false; + description = "Leave the unit and checkout in place after the turn for inspection (SYMPHONY_HOST_KEEP)."; + }; + }; + }; + }; + }; + + config = mkIf cfg.enable { + assertions = [ + { + assertion = !cfg.hostRuntime.enable || cfg.hostRuntime.user != ""; + message = "services.symphony.hostRuntime.user must be set when hostRuntime.enable is true."; + } + { + assertion = !cfg.hostRuntime.enable || cfg.hostRuntime.roomServerPackage != null; + message = "services.symphony.hostRuntime.roomServerPackage must be set when hostRuntime.enable is true."; + } + ]; + + # The host runtime calls systemd's StartTransientUnit over D-Bus to run + # codex as another user. A non-root service needs polkit authorization + # for that. Scope the grant to the "symphony-host-" unit-name prefix so + # the service cannot manage unrelated system units. See systemd-run(1) + # and the polkit systemd1 actions documented at + # https://www.freedesktop.org/software/systemd/man/latest/org.freedesktop.systemd1.html + security.polkit = lib.mkIf cfg.hostRuntime.enable { + enable = true; + extraConfig = '' + polkit.addRule(function(action, subject) { + if (subject.user == "${cfg.user}" && + action.id == "org.freedesktop.systemd1.manage-units") { + var unit = action.lookup("unit"); + if (unit && unit.indexOf("symphony-host-") == 0) { + return polkit.Result.YES; + } + } + }); + ''; + }; + + users.users = lib.mkIf (cfg.user == "symphony") { + symphony = { + isSystemUser = true; + group = "symphony"; + home = cfg.stateDir; + }; + }; + + users.groups = lib.mkIf (cfg.user == "symphony") { + symphony = { }; + }; + + systemd.tmpfiles.rules = [ + "d ${cfg.stateDir} 0750 ${cfg.user} ${cfg.user} -" + "d ${cfg.stateDir}/workspaces 0750 ${cfg.user} ${cfg.user} -" + "d ${cfg.stateDir}/runs 0750 ${cfg.user} ${cfg.user} -" + "d ${cfg.stateDir}/log 0750 ${cfg.user} ${cfg.user} -" + ]; + + systemd.services.symphony = { + description = "Symphony runtime"; + wantedBy = [ "multi-user.target" ]; + after = [ "network-online.target" ]; + wants = [ "network-online.target" ]; + + path = + cfg.path + ++ lib.optionals cfg.hostRuntime.enable [ + pkgs.systemd + pkgs.getent + cfg.hostRuntime.roomServerPackage + ]; + + environment = { + SYMPHONY_STATE_DIR = cfg.stateDir; + SYMPHONY_WORKSPACES_DIR = "${cfg.stateDir}/workspaces"; + SYMPHONY_RUNS_DIR = "${cfg.stateDir}/runs"; + SYMPHONY_LOGS_ROOT = "${cfg.stateDir}/log"; + SYMPHONY_HTTP_PORT = toString cfg.httpPort; + SYMPHONY_WORKFLOW_PACK = cfg.workflowPack; + } + // (lib.optionalAttrs (cfg.primaryRepo != null) { + SYMPHONY_PRIMARY_REPO = toString cfg.primaryRepo; + }) + // (lib.optionalAttrs (cfg.repoRoot != null) { + SYMPHONY_REPO_ROOT = toString cfg.repoRoot; + }) + // (lib.optionalAttrs (cfg.packDir != null) { + SYMPHONY_PACK_DIR = toString cfg.packDir; + }) + // (lib.optionalAttrs (cfg.roomRegistryUrl != null) { + SYMPHONY_ROOM_REGISTRY_URL = cfg.roomRegistryUrl; + }) + // (lib.optionalAttrs (cfg.roomAdvertiseHost != null) { + SYMPHONY_ROOM_ADVERTISE_HOST = cfg.roomAdvertiseHost; + }) + // (lib.optionalAttrs (cfg.roomServerUrl != null) { + SYMPHONY_ROOM_SERVER_URL = cfg.roomServerUrl; + }) + // (lib.optionalAttrs cfg.hostRuntime.enable ( + { + SYMPHONY_HOST_USER = cfg.hostRuntime.user; + SYMPHONY_HOST_ROOM_SERVER_COMMAND = lib.getExe cfg.hostRuntime.roomServerPackage; + } + // (lib.optionalAttrs (cfg.hostRuntime.group != null) { + SYMPHONY_HOST_GROUP = cfg.hostRuntime.group; + }) + // (lib.optionalAttrs (cfg.hostRuntime.workspacesDir != null) { + SYMPHONY_HOST_WORKSPACES_DIR = toString cfg.hostRuntime.workspacesDir; + }) + // (lib.optionalAttrs cfg.hostRuntime.keep { + SYMPHONY_HOST_KEEP = "true"; + }) + )) + // cfg.extraEnvironment; + + serviceConfig = { + Type = "simple"; + User = cfg.user; + Group = cfg.user; + ExecStart = + let + symphonyBin = "${cfg.package}/bin/symphony"; + wrapper = optionalString (cfg.secretsCommand != null) ( + lib.escapeShellArgs cfg.secretsCommand + " " + ); + in + "${wrapper}${symphonyBin}"; + Restart = "on-failure"; + RestartSec = "10s"; + StateDirectory = lib.mkIf (lib.hasPrefix "/var/lib/" cfg.stateDir) ( + lib.removePrefix "/var/lib/" cfg.stateDir + ); + # Symphony spawns codex subprocesses and clones git repos, so + # most sandboxing options need to stay permissive. Only enable + # the cheap, safe ones. + NoNewPrivileges = true; + PrivateTmp = true; + ProtectKernelTunables = true; + ProtectKernelModules = true; + ProtectControlGroups = true; + } + // (lib.optionalAttrs (cfg.environmentFile != null) { + EnvironmentFile = cfg.environmentFile; + }); + }; + }; +} diff --git a/packages/symphony/.env.example b/packages/symphony/.env.example new file mode 100644 index 000000000..f8b0ff477 --- /dev/null +++ b/packages/symphony/.env.example @@ -0,0 +1,78 @@ +# --- Core runtime ----------------------------------------------------------- +SYMPHONY_PRIMARY_REPO= +SYMPHONY_WORKFLOW_PACK=example +# Or, to load an external pack: +# SYMPHONY_PACK_DIR=/path/to/your/pack +SYMPHONY_HTTP_PORT=4040 +SYMPHONY_STATE_DIR=/var/lib/symphony +SYMPHONY_WORKSPACES_DIR= +SYMPHONY_REPO_ROOT= + +# --- Codex placement -------------------------------------------------------- +# Each agent node names its placement in the .sym workflow with `location:` +# (local|room|ixvm|host). The placement a run retries on when its declared +# `ixvm` placement fails to provision (host|local|none); defaults to host. +# SYMPHONY_PLACEMENT_FALLBACK=host +# Host placement: runs codex on this machine as a real OS user, no VM. +# SYMPHONY_HOST_USER=hari +# SYMPHONY_HOST_GROUP= +# SYMPHONY_HOST_WORKSPACES_DIR= +# SYMPHONY_HOST_KEEP=false + +# --- Claude models ---------------------------------------------------------- +# A skill whose codex_model names a Claude model (e.g. claude-opus-4-8) runs +# through headless Claude Code instead of codex, with --dangerously-skip-permissions. +# Auth is the Anthropic API key below. +ANTHROPIC_API_KEY= +# SYMPHONY_CLAUDE_COMMAND=claude + +# --- Integrations ----------------------------------------------------------- +LINEAR_API_KEY= +LINEAR_TEAM_KEY= +LINEAR_WORKSPACE_SLUG= +LINEAR_WEBHOOK_SECRET= + +GITHUB_TOKEN= +GITHUB_WEBHOOK_SECRET= + +SLACK_BOT_OAUTH_TOKEN= +SLACK_SIGNING_SECRET= +SYMPHONY_SLACK_NOTIFY_CHANNEL= +# Post failed cron runs to Slack (digest, quality, idiomatic, babysit-dispatch +# and other scheduled workflows). Defaults to true. +SYMPHONY_SLACK_NOTIFY_CRON_FAILURES= +# Comma-separated workflow names whose successful cron runs also post, e.g. +# "digest,quality". Use "*" to post every cron success (expect high-frequency +# dispatchers like babysit-dispatch too). Defaults to none, so cron successes +# stay quiet. +SYMPHONY_SLACK_NOTIFY_CRON_WORKFLOWS= + +# --- Dashboard / links ------------------------------------------------------ +SYMPHONY_GITHUB_STATS_QUERY= + +# --- Activity room (optional) ----------------------------------------------- +# room.ix.dev is the central room UI that aggregates run transcripts. Each run's +# room-server registers its backend here so the UI can show (and the Slack +# "Run details" button can deep-link to) the run's transcript. Leave +# SYMPHONY_ROOM_REGISTRY_URL unset to disable registration and the Slack link. +SYMPHONY_ROOM_REGISTRY_URL= +# Bearer token the central room server requires for backend registration +# writes (matches the room-server's ROOM_BACKEND_TOKEN). Optional. +SYMPHONY_ROOM_REGISTRY_TOKEN= +# Address a provisioned per-run room-server binds and advertises so room.ix.dev +# can reach it to proxy the run's transcript. Set to this host's tailnet +# address when room.ix.dev runs elsewhere; unset keeps the loopback default +# (only reachable when room.ix.dev shares the host). +SYMPHONY_ROOM_ADVERTISE_HOST= +# A standing room-server URL for `:local` / `{:room, url}` placements that do +# not provision their own per-run server. +SYMPHONY_ROOM_SERVER_URL= + +# --- Optional GitHub App ---------------------------------------------------- +# Skills mint installation tokens via SymphonyElixir.GithubApp when these are +# set. The private key is base64-encoded so it fits in a single-line env file. +SYMPHONY_GITHUB_APP_ID= +SYMPHONY_GITHUB_APP_PRIVATE_KEY_BASE64= +SYMPHONY_GITHUB_APP_OWNER_REPO= +SYMPHONY_BOT_USERNAME= +SYMPHONY_BOT_EMAIL= diff --git a/packages/symphony/.gitignore b/packages/symphony/.gitignore new file mode 100644 index 000000000..f4c68754a --- /dev/null +++ b/packages/symphony/.gitignore @@ -0,0 +1,11 @@ +.env +.env.* +!.env.example +log/ +tmp/ +room.db +room.db-shm +room.db-wal +workspaces/ +runs/ +.worktrees/ diff --git a/packages/symphony/AGENTS.md b/packages/symphony/AGENTS.md new file mode 100644 index 000000000..4e3ed2ad3 --- /dev/null +++ b/packages/symphony/AGENTS.md @@ -0,0 +1,73 @@ +# symphony + +An Elixir runtime that orchestrates Codex agent sessions across one or +more git repositories. Workflows are written in the `.sym` surface +language, lowered to an IR run graph the runtime walks; hot-reloaded +`.sym` workflows and markdown skills are the configuration surface. The +room stack (`room-server` and the Tauri/Svelte client) lives in the IX +monorepo; this package is the Elixir runtime that drives it over HTTP. + +Repo-wide standards (writing style, Nix style, commit conventions) come +from the index root AGENTS.md. This file holds only the invariants that +are specific to symphony. + +Do not commit secrets. Tokens for Linear, GitHub, Slack, Codex, or any +other external system must be supplied through the runtime environment or +host secret manager. The bundled `.env.example` lists the keys the +runtime reads. + +## Self-contained operations + +Symphony's runtime behavior must not depend on out-of-repo changes to +function. In particular, scheduled work (cron triggers, dispatchers, +auto-healing loops) belongs inside the runtime, driven by Symphony's own +cron scheduler. Do not introduce systemd timers, host nix modules, or any +out-of-repo schedulers as load-bearing pieces of a symphony feature. A +fresh symphony deploy should bring up all of its scheduled work without +needing a paired change in any other repo. + +## Workflow packs + +The runtime is pack-agnostic. The bundled `workflows/example/` pack is the +public default and is intentionally narrow (a single manual-trigger inspect +skill). Deployers point `SYMPHONY_PACK_DIR` at their own pack to drive real +work. Keep core changes pack-agnostic: no workflow names, repo slugs, +label strings, or ticket schemes hardcoded in `elixir/lib/`. + +## Elixir style + +The Elixir runtime is the entry point for symphony itself; the room +stack it drives lives in the IX monorepo and is not owned here. Keep +`elixir/lib/` pack-agnostic, with workflow shape carried in `.sym` / +markdown under the active pack directory rather than hardcoded in source. + +Prefer Mix tasks and supervised processes over loose scripts. A new +scheduled job is a child of Symphony's cron supervisor, not a host-level +timer. + +## Tests + +Tests should protect behavior that can regress across boundaries: +module merges, generated units, pack rendering, and runtime contracts +(including the engine wire fixtures in `contracts/fixtures` shared with +the room-server in IX). Avoid asserting facts already obvious from the +literal config under test. + +The required lane (compile with warnings as errors, format, credo, +`mix test`) runs sandboxed as the `symphony-elixir` flake check; the +advisory lane is `make quality` in `elixir/`. See `docs/quality.md`. + +## Layout + +``` +default.nix # symphony launcher package + the elixir check +elixir/ # Symphony runtime (.sym/IR orchestrator) +workflows/ # pack-agnostic example pack +contracts/fixtures/ # engine wire fixtures shared with room-server (IX) +docs/ # package-owned reference +../../modules/services/symphony/ # NixOS module for the runtime +``` + +Folders should preserve conceptual paths. When siblings share a real +domain, nest them under that domain instead of flattening the name +into repeated dashed prefixes. diff --git a/packages/symphony/CLAUDE.md b/packages/symphony/CLAUDE.md new file mode 120000 index 000000000..47dc3e3d8 --- /dev/null +++ b/packages/symphony/CLAUDE.md @@ -0,0 +1 @@ +AGENTS.md \ No newline at end of file diff --git a/packages/symphony/LICENSE b/packages/symphony/LICENSE new file mode 100644 index 000000000..376f10496 --- /dev/null +++ b/packages/symphony/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or Derivative + Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, excluding + those notices that do not pertain to any part of the Derivative + Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/packages/symphony/README.md b/packages/symphony/README.md new file mode 100644 index 000000000..afeac2147 --- /dev/null +++ b/packages/symphony/README.md @@ -0,0 +1,45 @@ +

+ Symphony +

+ +# symphony + +> [!IMPORTANT] +> Symphony is highly experimental software. Use it at your own risk: it can spawn Codex sessions, create branches, open PRs, and mutate Linear/GitHub state when credentials allow it. + +Symphony is a boring DAG runtime for deterministic agent workflows. Workflows are written in the `.sym` surface language, lowered to an IR run graph, and walked by a supervised Elixir/OTP runtime with a LiveView dashboard, cron/Slack/Linear/GitHub triggers, and per-run git worktrees. It moved here from the dedicated [indexable-inc/symphony](https://github.com/indexable-inc/symphony) repo (rev `c9e7092`). + +Run it from this repo: + +```sh +nix run .#symphony +``` + +The launcher requires an authenticated `codex` on PATH and refuses to start without one. It stages this source tree under `~/.local/state/symphony`, fetches mix deps, and boots the dashboard on http://127.0.0.1:4040. Point `SYMPHONY_PRIMARY_REPO` at a local checkout first; [docs/setup.md](docs/setup.md) and [.env.example](.env.example) cover the full configuration surface. + +Symphony dashboard + +## Layout + +- [`elixir/`](elixir/): the runtime (DSL parser, IR, runtime supervisor, Phoenix dashboard, triggers). +- [`workflows/example/`](workflows/example/): the bundled pack, intentionally narrow (one manual-trigger `inspect` workflow plus its read-only skill). Real deployments point `SYMPHONY_PACK_DIR` at their own pack. +- [`contracts/fixtures/`](contracts/fixtures/): engine wire fixtures shared with the room-server in the ix monorepo. The Elixir contract tests read them from `../../contracts`, so this directory stays beside `elixir/`. +- [`bin/run-nix`](bin/run-nix): the production entrypoint the `symphony` package wraps. +- [`docs/`](docs/): setup, engine contract, and quality-gate reference. + +## Neighbors + +- The room stack symphony drives over HTTP (`room-server` and the room UI) lives in the ix monorepo (`crates/room`, `packages/room`). +- `location: ixvm` placements provision VMs from the [`symphony-codex`](../../images/dev/symphony-codex/) image, which carries `room-server` on PATH. +- Deployment goes through the [`symphony` NixOS module](../../modules/services/symphony/) (`services.symphony.*`), with secrets supplied via `environmentFile` or `secretsCommand`. + +## Developing + +```sh +nix develop .#symphony # Elixir 1.19 / OTP 28, plus codex, gh, git +cd packages/symphony/elixir +make all # setup, compile -Werror, fmt-check, credo +mix test +``` + +CI runs the same required lane sandboxed as the `symphony-elixir` flake check (see [default.nix](default.nix)); after changing `elixir/mix.lock`, refresh the `fetchMixDeps` hash there. The advisory lane (`make quality`: sobelow, deps.audit, dialyzer, coveralls) stays a local run; see [docs/quality.md](docs/quality.md). diff --git a/packages/symphony/assets/logo.svg b/packages/symphony/assets/logo.svg new file mode 100644 index 000000000..30ee45cb5 --- /dev/null +++ b/packages/symphony/assets/logo.svg @@ -0,0 +1,6 @@ + + + + + + diff --git a/packages/symphony/bin/run-nix b/packages/symphony/bin/run-nix new file mode 100755 index 000000000..f19588fda --- /dev/null +++ b/packages/symphony/bin/run-nix @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +set -euo pipefail + +REPO_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" + +SYMPHONY_STATE_DIR="${SYMPHONY_STATE_DIR:-$HOME/.local/state/symphony}" +SYMPHONY_RUNTIME_DIR="${SYMPHONY_RUNTIME_DIR:-$SYMPHONY_STATE_DIR/runtime}" +SYMPHONY_LOGS_ROOT="${SYMPHONY_LOGS_ROOT:-$SYMPHONY_STATE_DIR/log}" +SYMPHONY_HTTP_PORT="${SYMPHONY_HTTP_PORT:-${SYMPHONY_PORT:-4040}}" + +# The new DAG runtime calls this SYMPHONY_WORKSPACES_DIR; the older +# escript called it SYMPHONY_WORKSPACE_ROOT. Accept either, prefer the +# new name. +SYMPHONY_WORKSPACES_DIR="${SYMPHONY_WORKSPACES_DIR:-${SYMPHONY_WORKSPACE_ROOT:-$SYMPHONY_STATE_DIR/workspaces}}" + +# Run history (per-run JSON snapshots that drive the dashboard). The +# default in SymphonyElixir.Config is $SYMPHONY_ROOT/runs, but +# SYMPHONY_ROOT here points at the runtime copy which gets wiped on +# every restart. Anchor runs/ at the stable state dir so the dashboard +# shows historical runs after rebuilds. +SYMPHONY_RUNS_DIR="${SYMPHONY_RUNS_DIR:-$SYMPHONY_STATE_DIR/runs}" + +if ! command -v codex >/dev/null 2>&1; then + echo "codex is required on PATH and must already be authenticated" >&2 + exit 1 +fi + +mkdir -p "$SYMPHONY_STATE_DIR" "$SYMPHONY_WORKSPACES_DIR" "$SYMPHONY_LOGS_ROOT" "$SYMPHONY_RUNS_DIR" + +# Stage the repo into the runtime dir so mix can write _build there +# without touching the source checkout (which is read-only when invoked +# from nix store, and live-edited when invoked from a working tree). +rm -rf "$SYMPHONY_RUNTIME_DIR.tmp" +mkdir -p "$SYMPHONY_RUNTIME_DIR.tmp" +cp -R "$REPO_ROOT/." "$SYMPHONY_RUNTIME_DIR.tmp/" +chmod -R u+w "$SYMPHONY_RUNTIME_DIR.tmp" +rm -rf "$SYMPHONY_RUNTIME_DIR" +mv "$SYMPHONY_RUNTIME_DIR.tmp" "$SYMPHONY_RUNTIME_DIR" + +# SYMPHONY_ROOT is read by SymphonyElixir.Config at boot to anchor the +# catalogs (workflows/ and skills/ live there) and to resolve relative +# paths. Anchor it at the runtime copy so a live edit of REPO_ROOT does +# not get half-loaded mid-run. +export SYMPHONY_ROOT="$SYMPHONY_RUNTIME_DIR" +export SYMPHONY_WORKSPACES_DIR +export SYMPHONY_RUNS_DIR +export SYMPHONY_LOGS_ROOT +export SYMPHONY_HTTP_PORT + +cd "$SYMPHONY_RUNTIME_DIR/elixir" + +# Production runs use MIX_ENV=prod; default to prod for the systemd +# unit invocation. Callers can override (e.g. dev iteration). +export MIX_ENV="${MIX_ENV:-prod}" + +mix deps.get --only "$MIX_ENV" +mix compile --warnings-as-errors + +# mix run --no-halt starts SymphonyElixir.Application (which boots the +# Catalog, WorkflowCatalog, the run supervisor, the LiveView endpoint, +# and the triggers) and stays in the foreground so systemd can supervise +# it. +exec mix run --no-halt diff --git a/packages/symphony/contracts/fixtures/agent_turn_response.json b/packages/symphony/contracts/fixtures/agent_turn_response.json new file mode 100644 index 000000000..d959ec61a --- /dev/null +++ b/packages/symphony/contracts/fixtures/agent_turn_response.json @@ -0,0 +1,12 @@ +{ + "threadId": "thread_abc", + "outcome": { "kind": "ok" }, + "eventCount": 4, + "usage": { + "tokensIn": 1200, + "tokensOut": 340, + "cacheRead": 800, + "cacheCreation": 64, + "costUsd": 0.0123 + } +} diff --git a/packages/symphony/contracts/fixtures/engine_event.json b/packages/symphony/contracts/fixtures/engine_event.json new file mode 100644 index 000000000..fefc03d9a --- /dev/null +++ b/packages/symphony/contracts/fixtures/engine_event.json @@ -0,0 +1,5 @@ +{ + "turnId": "thread_abc", + "seq": 7, + "body": { "type": "textDelta", "text": "hello" } +} diff --git a/packages/symphony/contracts/fixtures/turn_request.json b/packages/symphony/contracts/fixtures/turn_request.json new file mode 100644 index 000000000..afbb16a44 --- /dev/null +++ b/packages/symphony/contracts/fixtures/turn_request.json @@ -0,0 +1,11 @@ +{ + "engine": "codex", + "model": "gpt-5.3-codex", + "effort": "medium", + "permissions": "workspace_write", + "cwd": "/workspace/run_x/primary", + "prompt": "write FOO to ./hello.txt and stop", + "tools": [], + "runId": "run_x", + "nodeId": "n0" +} diff --git a/packages/symphony/default.nix b/packages/symphony/default.nix new file mode 100644 index 000000000..43af867ce --- /dev/null +++ b/packages/symphony/default.nix @@ -0,0 +1,176 @@ +# Symphony: the Elixir control plane for `.sym` agent workflows, absorbed +# from the dedicated indexable-inc/symphony repo at +# c9e709208c3ae161e24f625b9f3808a288c859ed. The launcher stages this source +# tree into the state dir at start and runs it with `mix run --no-halt` +# (see bin/run-nix), so the package is a closure around the source rather +# than a compiled BEAM release; production keeps deploying the same way it +# did from the standalone flake's `packages.default`. +# +# The room stack symphony drives over HTTP (room-server and the room UI) +# lives in the ix monorepo. The `room-server` binary baked into +# `images/dev/symphony-codex` still resolves from the pinned `symphony` +# flake input (see flake.nix); only the runtime moved here. +{ + lib, + pkgs, + ix, + writeNushellApplication, +}: +let + # mise.toml pins Elixir 1.19 on OTP 28; the launcher and the check build + # against the same pairing so a deploy never runs code the gate did not. + elixir = ix.languages.elixir.toolchain pkgs { version = "1.19"; }; + erlang = ix.languages.erlang.toolchain pkgs { version = "28"; }; + + # The tree bin/run-nix stages at service start: the mix project, the + # bundled example pack, and the engine wire fixtures. contracts/ sits + # beside elixir/ because the contract tests resolve it relatively + # (test/symphony_elixir/engine/contract_fixtures_test.exs). + src = lib.fileset.toSource { + root = ./.; + fileset = lib.fileset.unions [ + ./bin + ./contracts + ./elixir + ./workflows + ]; + }; + + # Test-env mix deps as a fixed-output derivation so the sandboxed check + # runs offline. Refresh the hash whenever elixir/mix.lock changes. + mixFodDeps = pkgs.beamPackages.fetchMixDeps { + pname = "symphony-elixir-deps"; + version = "0.2.0"; # keep in sync with elixir/mix.exs + src = lib.fileset.toSource { + root = ./elixir; + fileset = lib.fileset.unions [ + ./elixir/mix.exs + ./elixir/mix.lock + ]; + }; + inherit elixir; + mixEnv = "test"; + hash = "sha256-TLRGNPIm3zQKeFt54wrdirYYK81ribfsV92/NVLdQSM="; + }; + + # mix.lock pins lazy_html (a C++ NIF over lexbor) as a test-only dep for + # LiveView's HTML assertions. Compiling it either downloads this exact + # precompiled NIF over mix's httpc or git-clones lexbor, and the sandbox + # allows neither, so the check below seeds elixir_make's artifact cache + # with the upstream release tarball; elixir_make still verifies it against + # the checksum.exs pinned inside the dep before unpacking. Refresh the + # url/hash when a mix.lock bump moves lazy_html. + lazyHtmlNif = pkgs.fetchurl { + url = "https://github.com/dashbitco/lazy_html/releases/download/v0.1.10/lazy_html-nif-2.16-x86_64-linux-gnu-0.1.10.tar.gz"; + hash = "sha256-Ni0JKbP6OJqQ8rT08VnF/KWjiyigoVUjqSZ3LRU9dBo="; + }; + + # The required quality lane the standalone repo ran per PR (make ci: + # compile --warnings-as-errors, format --check-formatted, credo, test), + # now a sandboxed derivation wired into `checks` through per-system.nix. + # The advisory lane (dialyzer, sobelow, mix_audit, coveralls) stays a + # local `mix quality` run; see docs/quality.md. + elixirCheck = pkgs.stdenv.mkDerivation { + pname = "symphony-elixir-check"; + version = "0.2.0"; + inherit src; + sourceRoot = "source/elixir"; + + nativeBuildInputs = [ + erlang + elixir + (pkgs.beamPackages.hex.override { inherit elixir; }) + pkgs.git + ]; + strictDeps = true; + + env = { + MIX_ENV = "test"; + HEX_OFFLINE = "1"; + LANG = "C.UTF-8"; + LC_CTYPE = "C.UTF-8"; + # rebar-built deps (telemetry) make mix install rebar over the network + # unless the binaries are pinned, same as fetchMixDeps/mixRelease do. + MIX_REBAR = "${pkgs.beamPackages.rebar}/bin/rebar"; + MIX_REBAR3 = "${pkgs.beamPackages.rebar3}/bin/rebar3"; + # The precompiled lazy_html .so is a generic linux-gnu build, so the + # BEAM needs libstdc++ findable at dlopen time when the test suite + # loads the NIF. + LD_LIBRARY_PATH = lib.makeLibraryPath [ pkgs.stdenv.cc.cc.lib ]; + }; + + # Same deps wiring as nixpkgs mixRelease: deps come from the fixed-output + # fetch, copied writable because deps.compile builds in place. The + # elixir_make cache seed short-circuits the lazy_html NIF download (see + # lazyHtmlNif above; mix/tasks/compile.elixir_make.ex reuses an existing + # archive instead of fetching). + postUnpack = '' + export MIX_HOME="$TEMPDIR/mix" + export HEX_HOME="$TEMPDIR/hex" + export MIX_DEPS_PATH="$TEMPDIR/deps" + cp --no-preserve=mode -R "${mixFodDeps}" "$MIX_DEPS_PATH" + + export ELIXIR_MAKE_CACHE_DIR="$TEMPDIR/elixir-make-cache" + mkdir -p "$ELIXIR_MAKE_CACHE_DIR" + cp "${lazyHtmlNif}" "$ELIXIR_MAKE_CACHE_DIR/${lazyHtmlNif.name}" + ''; + + configurePhase = '' + runHook preConfigure + mix deps.compile --no-deps-check --skip-umbrella-children + ln -s "$MIX_DEPS_PATH" ./deps + runHook postConfigure + ''; + + buildPhase = '' + runHook preBuild + mix compile --no-deps-check --warnings-as-errors + runHook postBuild + ''; + + doCheck = true; + checkPhase = '' + runHook preCheck + mix format --check-formatted + mix credo + mix test --no-deps-check + runHook postCheck + ''; + + installPhase = '' + runHook preInstall + mkdir -p "$out" + runHook postInstall + ''; + }; +in +(writeNushellApplication { + name = "symphony"; + meta = { + description = "Elixir runtime for .sym agent workflows (control plane, LiveView dashboard, triggers)"; + license = lib.licenses.asl20; + }; + # codex is intentionally absent: bin/run-nix requires an authenticated + # codex on the operator's PATH and refuses to start otherwise, so the + # binary and its credentials stay host-owned. + runtimeInputs = [ + pkgs.bash + pkgs.cacert + pkgs.coreutils + elixir + erlang + pkgs.gh + pkgs.git + pkgs.openssh + ]; + text = '' + def --wrapped main [...args] { + exec ${src}/bin/run-nix ...$args + } + ''; +}).overrideAttrs + (old: { + passthru = (old.passthru or { }) // { + tests.elixir = elixirCheck; + }; + }) diff --git a/packages/symphony/docs/engine-contract.md b/packages/symphony/docs/engine-contract.md new file mode 100644 index 000000000..a003ccb3c --- /dev/null +++ b/packages/symphony/docs/engine-contract.md @@ -0,0 +1,117 @@ +# Engine contract + +This is the source of truth for the wire shapes shared between the Elixir +runtime and the Rust room-server. It is the WS-0 seam of the overhaul: the +DSL, the runtime, and the room-server all code against these shapes, so a +change here is a deliberate cross-language change with a golden fixture to +prove both sides still agree. + +Two layers own these shapes: + +- `crates/room/src/engine.rs` (Rust, in the IX monorepo): `TurnRequest`, + `EngineEvent`, `TurnStatus`, `EngineAnswer`, and the `Engine` trait. +- `elixir/lib/symphony_elixir/engine/` and `ir/` (Elixir): the + `Engine.Envelope` that lowers to a `TurnRequest`, and the `IR.*` + durable run state that the runtime persists. + +Golden fixtures live in `contracts/fixtures/`. `turn_request.json` is the +shape Elixir produces and Rust consumes, so both sides assert it: the Rust +test in `crates/room/tests/engine_contract.rs` (in the IX monorepo) and the Elixir test +in `elixir/test/symphony_elixir/engine/contract_fixtures_test.exs`. A field +rename fails a check on both sides rather than silently at runtime. +`engine_event.json` is the shape Rust produces and Elixir will consume; only +the Rust side parses it today, because the Elixir `EngineEvent` decoder +lands with the streaming client (the synchronous `/api/agent/turns` path +returns an `AgentTurnResponse`, not an event stream). +`agent_turn_response.json` is the synchronous turn result Rust produces and +Elixir consumes, so both sides assert it: Rust deserializes the fixture and +Elixir feeds it through `Engine.Client.submit_turn/3` and checks the lowered +`cost`. + +## Casing and tagging + +- Field names are camelCase on the wire (`turnId`, `runId`), matching the + existing room-server JSON. +- Enum bodies carry a `type` tag (`EngineEventBody`) or a `kind` tag + (`TurnOutcome`, `EngineAnswer`). +- Scalar enums serialize as a lowercase or snake_case string + (`engine: "claude"`, `permissions: "danger_full_access"`). + +## TurnRequest + +The engine-agnostic turn the Elixir `Engine.Client` submits. The room-server +adapter lowers it to engine-native flags. + +```json +{ + "engine": "codex", + "model": "gpt-5.3-codex", + "effort": "medium", + "permissions": "workspace_write", + "cwd": "/workspace/run_x/primary", + "prompt": "write FOO to ./hello.txt and stop", + "tools": [], + "runId": "run_x", + "nodeId": "n0" +} +``` + +`effort` is omitted entirely when the envelope leaves it unset (the engine +picks its default). `permissions` is one of `read_only`, `workspace_write`, +`danger_full_access`; each adapter lowers it (Codex to sandbox + approval +policy, Claude to a permission mode or `--dangerously-skip-permissions`). + +## EngineEvent + +One normalized event for one turn. `EngineEventBody` is the superset of +what Codex emits; Claude is a subset producer and simply never emits +`approvalRequest` or `toolCallRequest` (it self-executes its tools under +`--dangerously-skip-permissions`). + +```json +{ "turnId": "thread_abc", "seq": 7, "body": { "type": "textDelta", "text": "hello" } } +``` + +Body variants: `turnStarted`, `textDelta`, `reasoningDelta`, +`toolCallStarted`, `toolCallOutput`, `fileChanged`, `statusChanged`, +`usage`, `approvalRequest`, `toolCallRequest`, `turnCompleted`. + +## AgentTurnResponse + +The synchronous result of `POST /api/agent/turns`. The room-server awaits +the whole turn and returns its terminal outcome, the thread id it assigned, +the event count, and the turn's terminal `usage` totals. Both engines emit +cumulative `Usage` events, so the response carries the last one as the +whole-turn total; `Engine.Client` lowers it to the `IR.Attempt.cost` shape +(`usd`, `tokens_in`, `tokens_out`, `cache_read`, `cache_creation`). + +```json +{ + "threadId": "thread_abc", + "outcome": { "kind": "ok" }, + "eventCount": 4, + "usage": { + "tokensIn": 1200, + "tokensOut": 340, + "cacheRead": 800, + "cacheCreation": 64, + "costUsd": 0.0123 + } +} +``` + +`usage` is always present (a turn that emitted none serializes a zeroed +total); `costUsd` is omitted when the engine did not price the turn, so a +present `usd` always means a real number. A response with no `usage` (an +older server) lowers to a nil cost so the attempt records "unknown" rather +than a sham zero. + +## Envelope to TurnRequest + +`Engine.Envelope` (Elixir) is the authored, validated shape; `Engine.Client` +lowers it to a `TurnRequest` (`request_body/2`). The envelope adds `location` +(`:local`, `:ixvm`, `{:host, name}`, `{:room, url}`), which the client +resolves to the room-server URL and does not put on the wire. The Elixir +fixture test asserts `request_body/2` reproduces `turn_request.json` +byte-for-byte after a JSON round-trip, so the lowering and the shared +fixture cannot drift apart. diff --git a/packages/symphony/docs/quality.md b/packages/symphony/docs/quality.md new file mode 100644 index 000000000..10b23cbde --- /dev/null +++ b/packages/symphony/docs/quality.md @@ -0,0 +1,71 @@ +# Quality gate + +Symphony runs a quality gate that reports formatting, lint, static security, +dependency-audit, type, and coverage findings. It is informational today and +does not block PRs. Run it locally with one command: + +```sh +cd elixir +make quality +``` + +That target runs `mix quality` (format check, Credo strict, Sobelow, deps +audit, Dialyzer) followed by `mix coveralls`. None of these are part of the +required lane: that lane (compile with warnings as errors, format check, +credo, `mix test`) runs sandboxed in index CI as the `symphony-elixir` flake +check (see `../default.nix`). + +## Tools + +- `mix format --check-formatted`: fails if any file is not formatted to the + rules in `elixir/.formatter.exs` (200-column lines). +- `mix credo --strict`: lint and refactoring analysis. Strict mode surfaces all + priorities, including the low-priority refactor checks pinned in + `elixir/.credo.exs`. +- `mix sobelow --config`: static security scanner for Phoenix apps, reading + `elixir/.sobelow-conf`. Reports common web vulnerabilities (XSS, CSRF, + config, traversal). Reporting only: it does not set an `exit` threshold. +- `mix deps.audit`: checks the dependency tree in `mix.lock` against the + Elixir security advisory database (`mix_audit`). +- `mix dialyzer`: success-typing analysis (`dialyxir`). The PLT is built under + `elixir/priv/plts/` (gitignored). +- `mix coveralls`: test-suite line coverage total (`excoveralls`). + +## CI + +The standalone repo ran the advisory lane as its own GitHub workflow. In +index, only the required lane is wired into CI (the `symphony-elixir` flake +check); the advisory lane is a local `make quality` run. Sobelow, deps.audit, +Dialyzer, and coveralls all want network access or large mutable caches, so a +sandboxed derivation is a bad fit for them today. + +## Phased rollout + +The gate ships in two phases so it never blocks PRs while the codebase is still +being brought into compliance. + +### Phase A (landed in the standalone repo, WS-8): tooling plus non-blocking reporting + +Install the tools, add the `quality` Make target and alias, and surface a +violations summary. Nothing here makes the required lane stricter. The point +is to see the violations, not to enforce them yet. + +### Phase B (WS-9, after the overhaul cutover): enforce + +Phase B lands only after the top-down overhaul cutover, once the module set is +final, so we do not spend effort on modules the cutover deletes. Steps: + +1. One-time Styler reformat, then enable the Styler formatter plugin in + `.formatter.exs`. +2. Add Boundary as a dep and `use Boundary` annotations encoding the layer + rules: DSL -> IR -> Runtime -> `Engine.Client`; `Engine.Client` is the only + door to the room-server; `bridge`/`state`/`http` never name a concrete + engine. +3. Fix the `credo --strict` and Dialyzer violations. +4. Fold the enforceable parts into the `symphony-elixir` flake check so they + gate PRs. + +Boundary is deferred until post-cutover on purpose. The module topology is +still changing in the overhaul, so annotating modules now would encode layer +rules onto modules the cutover removes. Boundary annotations land in Phase B +against the final module set. diff --git a/packages/symphony/docs/setup.md b/packages/symphony/docs/setup.md new file mode 100644 index 000000000..bda51d70c --- /dev/null +++ b/packages/symphony/docs/setup.md @@ -0,0 +1,135 @@ +# Setup + +## Local development + +```bash +git clone https://github.com/indexable-inc/index +cd index + +export SYMPHONY_PRIMARY_REPO=/path/to/your/repo +nix run .#symphony +``` + +Open http://127.0.0.1:4040 for the dashboard. + +The bundled `workflows/example` pack ships a single manual-trigger `.sym` +workflow (`workflows/inspect.sym`) with a read-only `inspect` skill that does +not push anything anywhere. It is intended as a starting point you can copy +into your own pack. + +## Running a real workflow pack + +Drop your pack directory (a `workflows/` of `.sym` files, a `skills/` of +markdown, and a `repositories.yaml`) anywhere on the host and point Symphony +at it: + +```bash +export SYMPHONY_PACK_DIR=/path/to/your/pack +export SYMPHONY_PRIMARY_REPO=/path/to/your/primary/repo +nix run github:indexable-inc/index#symphony +``` + +Required runtime credentials depend on which triggers and tools your pack +uses; see `README.md` and `elixir/lib/symphony_elixir/config.ex` for the full +env var list. + +Symphony treats the workflow pack as read-only runtime input. Put mutable run +state under `SYMPHONY_RUNS_DIR` and worktrees under `SYMPHONY_WORKSPACES_DIR`; +both default under the runtime state directory when using the Nix wrapper. + +Common ones: + +- `LINEAR_API_KEY` (Linear graphql tool + webhook enqueue) +- `GITHUB_TOKEN` (dashboard statistics) +- `LINEAR_WEBHOOK_SECRET`, `GITHUB_WEBHOOK_SECRET`, `SLACK_SIGNING_SECRET` + (webhook receivers) +- `SYMPHONY_GITHUB_APP_ID`, `SYMPHONY_GITHUB_APP_PRIVATE_KEY_BASE64`, + `SYMPHONY_GITHUB_APP_OWNER_REPO` (commit/push as a bot identity) +- `SYMPHONY_BOT_USERNAME`, `SYMPHONY_BOT_EMAIL` (git author when the App is + configured) + +Codex must already be installed and authenticated on the host. + +## Choosing a placement + +Each agent node picks where its codex session runs with a `location:` field in +the `.sym` workflow: + +``` +implement <- agent { + engine: codex + model: "gpt-5.3-codex" + permissions: workspace_write + location: host # or: ixvm, room, local + prompt: skill "implement" +} +``` + +`host` runs codex directly on the Symphony machine as a real OS user +(`SYMPHONY_HOST_USER`) inside that user's home directory, with no VM, so the +agent can read and write that user's files. `ixvm` runs it inside a +short-lived iXVM. Both stand up a per-run room-server and register it so the +room UI can attach. `local` and `room` use the default +`SYMPHONY_ROOM_SERVER_URL`. + +When a node's `ixvm` placement fails to provision before the first turn, the +run retries on the placement named by `SYMPHONY_PLACEMENT_FALLBACK` (defaults +to `host`). On NixOS, set `services.symphony.hostRuntime` to wire the polkit +grant, PATH, and `SYMPHONY_HOST_USER` the host placement needs: + +```nix +services.symphony.hostRuntime = { + enable = true; + user = "hari"; + # room-server now lives in the IX monorepo; provide its wrapped binary. + roomServerPackage = ix.packages.${pkgs.system}.room-server; +}; +``` + +## Choosing an engine: Codex or Claude + +An agent node names its engine directly in the `.sym` workflow with the +`engine:` field (`codex` or `claude`); the room-server's engine host runs the +turn through the matching adapter. + +``` +report <- agent { + engine: claude + model: haiku + permissions: read_only + prompt: inline "write a status report" +} +``` + +A Claude model means `claude-*` or the `opus` / `sonnet` / `haiku` aliases. +Claude turns are billed against `ANTHROPIC_API_KEY`. The codex-only `sandbox` +/ `approval_policy` skill fields do not apply to Claude turns. + +## Production deployment (NixOS) + +```nix +{ + inputs.index.url = "github:indexable-inc/index"; + + outputs = { self, nixpkgs, index, ... }: { + nixosConfigurations.host = nixpkgs.lib.nixosSystem { + modules = [ + index.nixosModules.symphony + ({ pkgs, ... }: { + services.symphony = { + enable = true; + package = index.packages.${pkgs.stdenv.hostPlatform.system}.symphony; + packDir = "/var/lib/symphony-pack"; + primaryRepo = "/var/lib/repos/my-app"; + environmentFile = "/run/secrets/symphony.env"; + }; + }) + ]; + }; + }; +} +``` + +Pair the module with whichever secret store you prefer (sops-nix, agenix, +Bitwarden Secrets Manager via `secretsCommand`, etc). See the module options +in `modules/services/symphony/default.nix` at the repo root. diff --git a/packages/symphony/elixir/.credo.exs b/packages/symphony/elixir/.credo.exs new file mode 100644 index 000000000..d0cc61a0e --- /dev/null +++ b/packages/symphony/elixir/.credo.exs @@ -0,0 +1,30 @@ +%{ + configs: [ + %{ + name: "default", + files: %{ + included: ["lib/", "test/", "config/", "mix.exs"], + excluded: [] + }, + strict: false, + color: true, + checks: [ + # Make refactoring suggestions informational rather than CI-failing. + # CI catches real correctness issues via mix compile --warnings-as-errors; + # credo's refactor suggestions are useful local feedback but should not + # gate the build on every threshold tweak. + {Credo.Check.Refactor.CyclomaticComplexity, max_complexity: 12, priority: :low}, + {Credo.Check.Refactor.Nesting, max_nesting: 3, priority: :low}, + {Credo.Check.Refactor.WithClauses, priority: :low}, + {Credo.Check.Refactor.RedundantWithClauseResult, priority: :low}, + {Credo.Check.Refactor.CondStatements, priority: :low}, + {Credo.Check.Readability.WithSingleClause, priority: :low}, + # Config is intentionally a wide snapshot of env vars. Splitting into + # nested substructs would just push the same field count into nested + # types without making boot-time wiring clearer, and would break the + # field-name == opt-key round-trip the snapshot relies on. + {Credo.Check.Warning.StructFieldAmount, max_fields: 60} + ] + } + ] +} diff --git a/packages/symphony/elixir/.formatter.exs b/packages/symphony/elixir/.formatter.exs new file mode 100644 index 000000000..59e60a5c2 --- /dev/null +++ b/packages/symphony/elixir/.formatter.exs @@ -0,0 +1,5 @@ +# Used by "mix format" +[ + inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"], + line_length: 200 +] diff --git a/packages/symphony/elixir/.gitattributes b/packages/symphony/elixir/.gitattributes new file mode 100644 index 000000000..6db95539d --- /dev/null +++ b/packages/symphony/elixir/.gitattributes @@ -0,0 +1 @@ +test/fixtures/status_dashboard_snapshots/* linguist-generated=true diff --git a/packages/symphony/elixir/.gitignore b/packages/symphony/elixir/.gitignore new file mode 100644 index 000000000..f5c8fbfa8 --- /dev/null +++ b/packages/symphony/elixir/.gitignore @@ -0,0 +1,56 @@ +# The directory Mix will write compiled artifacts to. +/_build/ + +# If you run "mix test --cover", coverage assets end up here. +/cover/ + +# Dialyzer PLT files (mix.exs sets plt_core_path/plt_local_path here). +/priv/plts/ + +# The directory Mix downloads your dependencies sources to. +/deps/ + +# Where third-party dependencies like ExDoc output generated docs. +/doc/ + +# Temporary files, for example, from tests. +/tmp/ + +# Generated browser assets. +/priv/static/assets/ + +# Local runtime logs. +/log/ +/logs/ + +# If the VM crashes, it generates a dump, let's ignore it too. +erl_crash.dump + +# Elixir language server and tooling. +/.elixir_ls/ +/.fetch/ + +# Editor / OS temporary files. +.DS_Store +*.swp +*.swo +*~ + +# IDE folders. +.idea/ +.vscode/ +/bin/ + +# Local environment and auth artifacts. +.env +.env.* +.secrets +.credentials +status.txt +.codex/original-user-prompt.txt + +# Also ignore archive artifacts (built via "mix archive.build"). +*.ez + +# Ignore package tarball (built via "mix hex.build"). +symphony_elixir-*.tar diff --git a/packages/symphony/elixir/.sobelow-conf b/packages/symphony/elixir/.sobelow-conf new file mode 100644 index 000000000..3e2793fb1 --- /dev/null +++ b/packages/symphony/elixir/.sobelow-conf @@ -0,0 +1,14 @@ +# Sobelow static security scanner config. +# https://hexdocs.pm/sobelow/Mix.Tasks.Sobelow.html +# +# `mix sobelow --config` reads this file. WS-8 runs sobelow as an +# informational gate only, so do not set `exit: "low"` here; that would turn +# any finding into a nonzero exit and could gate a PR once the quality job is +# promoted to required in WS-9. Keep this scan reporting-only until then. +[ + verbose: false, + private: false, + skip: false, + router: "lib/symphony_elixir_web/router.ex", + format: "txt" +] diff --git a/packages/symphony/elixir/AGENTS.md b/packages/symphony/elixir/AGENTS.md new file mode 100644 index 000000000..71cef3269 --- /dev/null +++ b/packages/symphony/elixir/AGENTS.md @@ -0,0 +1,65 @@ +# Symphony Elixir + +This directory contains the Elixir/OTP runtime that lowers `.sym` +workflows to an IR run graph and walks it. See `../README.md` for the +project overview, file layout, env vars, and API. + +## Environment + +- Elixir: `1.19.x` (OTP 28), pinned in `mise.toml`. +- Install deps: `mix deps.get`. +- Main quality gate: `make all` (which runs `make setup`, `make build`, + `make fmt-check`, `make lint`). + +## Codebase-Specific Conventions + +- Runtime config is loaded from the process environment at boot via + `SymphonyElixir.Config`. Prefer adding new knobs there rather than + reading `System.get_env/1` ad hoc. +- Workflows (`workflows/*.sym`) are hot-reloaded by + `SymphonyElixir.WorkflowCatalog` and skills (`skills/*.md`) by + `SymphonyElixir.Catalog`, both on a 1s tick; no restart needed for + content changes. +- Workspace safety is critical: + - Never run a Codex turn with cwd inside the source repo. Every run + gets a fresh `git worktree add` under `SYMPHONY_WORKSPACES_DIR`. + - `SymphonyElixir.PathSafety.canonicalize/1` is the gate; any new + code that resolves a workspace-relative path should route through + it. +- Runtime behavior is stateful and concurrency-sensitive: preserve + retry, resume-on-boot, and workspace-cleanup semantics in + `SymphonyElixir.Runtime` and `SymphonyElixir.IR.Store`. + +## Tests and Validation + +Run targeted tests while iterating, then run full gates before +handoff: + +```bash +make all +mix test +``` + +## Required Rules + +- Public functions (`def`) in `lib/` should have an adjacent `@spec`. +- `defp` specs are optional. +- `@impl` callback implementations are exempt from the `@spec` rule. +- Keep changes narrowly scoped; avoid unrelated refactors in the same + PR. +- Follow existing module/style patterns in `lib/symphony_elixir/*`. + +## CI + +The sandboxed `symphony-elixir` flake check runs the required lane +(compile with warnings as errors, `mix format --check-formatted`, +`mix credo`, `mix test`) against the deps pinned in `mix.lock`; see +`../default.nix`. After changing `mix.lock`, refresh the +`fetchMixDeps` hash there. + +## Docs Update Policy + +If behavior/config changes, update docs in the same PR: + +- `../README.md` for the project concept, file layout, env vars, API. +- `../docs/setup.md` for host setup / runtime credentials. diff --git a/packages/symphony/elixir/Makefile b/packages/symphony/elixir/Makefile new file mode 100644 index 000000000..68e15ec3a --- /dev/null +++ b/packages/symphony/elixir/Makefile @@ -0,0 +1,46 @@ +.PHONY: help all setup deps build fmt fmt-check lint test ci quality + +MIX ?= mix + +help: + @echo "Targets: setup, deps, fmt, fmt-check, lint, test, ci, quality" + +setup: + $(MIX) local.hex --force --if-missing + $(MIX) local.rebar --force --if-missing + $(MIX) setup + +deps: + $(MIX) deps.get + +build: + $(MIX) build + +fmt: + $(MIX) format + +fmt-check: + $(MIX) format --check-formatted + +lint: + $(MIX) lint + +test: + $(MIX) test + +ci: + $(MAKE) setup + $(MAKE) build + $(MAKE) fmt-check + $(MAKE) lint + $(MAKE) test + +all: ci + +# Non-required reporting gate (WS-8). Intentionally separate from ci/all so it +# never blocks PRs while the codebase is brought into compliance (WS-9). +# Depends on setup so a clean CI runner has Hex, rebar, and fetched deps +# before the tools run; without it `mix quality` aborts on a missing SCM. +quality: setup + $(MIX) quality + $(MIX) coveralls diff --git a/packages/symphony/elixir/config/config.exs b/packages/symphony/elixir/config/config.exs new file mode 100644 index 000000000..d1f0c98f5 --- /dev/null +++ b/packages/symphony/elixir/config/config.exs @@ -0,0 +1,18 @@ +import Config + +config :phoenix, :json_library, Jason + +config :symphony_elixir, SymphonyElixirWeb.Endpoint, + adapter: Bandit.PhoenixAdapter, + url: [host: "localhost"], + render_errors: [ + formats: [html: SymphonyElixirWeb.ErrorHTML, json: SymphonyElixirWeb.ErrorJSON], + layout: false + ], + pubsub_server: SymphonyElixir.PubSub, + live_view: [signing_salt: "symphony-live-view"], + check_origin: false, + server: true, + http: [ip: {127, 0, 0, 1}, port: 4040] + +import_config "#{config_env()}.exs" diff --git a/packages/symphony/elixir/config/dev.exs b/packages/symphony/elixir/config/dev.exs new file mode 100644 index 000000000..becde7693 --- /dev/null +++ b/packages/symphony/elixir/config/dev.exs @@ -0,0 +1 @@ +import Config diff --git a/packages/symphony/elixir/config/prod.exs b/packages/symphony/elixir/config/prod.exs new file mode 100644 index 000000000..becde7693 --- /dev/null +++ b/packages/symphony/elixir/config/prod.exs @@ -0,0 +1 @@ +import Config diff --git a/packages/symphony/elixir/config/runtime.exs b/packages/symphony/elixir/config/runtime.exs new file mode 100644 index 000000000..20c936f93 --- /dev/null +++ b/packages/symphony/elixir/config/runtime.exs @@ -0,0 +1,22 @@ +import Config + +# Runtime config. Boot reads these from the environment so a single binary can +# move between hosts without recompiling. + +secret_key_base = + System.get_env("SYMPHONY_SECRET_KEY_BASE") || + Base.encode64(:crypto.strong_rand_bytes(48), padding: false) + +config :symphony_elixir, SymphonyElixirWeb.Endpoint, secret_key_base: secret_key_base + +if config_env() != :test do + port_string = System.get_env("SYMPHONY_HTTP_PORT", "4040") + + port = + case Integer.parse(port_string) do + {value, ""} when value >= 0 -> value + _ -> raise "SYMPHONY_HTTP_PORT must be a non-negative integer, got #{inspect(port_string)}" + end + + config :symphony_elixir, SymphonyElixirWeb.Endpoint, http: [ip: {127, 0, 0, 1}, port: port] +end diff --git a/packages/symphony/elixir/config/test.exs b/packages/symphony/elixir/config/test.exs new file mode 100644 index 000000000..25bcfc719 --- /dev/null +++ b/packages/symphony/elixir/config/test.exs @@ -0,0 +1,13 @@ +import Config + +# Tests do not need to bind a real HTTP socket. Letting the endpoint +# bind 127.0.0.1:4040 means `mix test` fails whenever a real symphony +# is already running on the same host. Set `server: false` so the +# Bandit adapter is skipped; LiveView and Plug logic that the test +# suite touches still work without a live listener. +config :symphony_elixir, SymphonyElixirWeb.Endpoint, server: false + +# Tests start the bits they need from test_helper.exs. The full supervision +# tree depends on SYMPHONY_ROOT and friends being set, which test runners +# should not have to inherit from the host env. +config :symphony_elixir, auto_start: false diff --git a/packages/symphony/elixir/docs/logging.md b/packages/symphony/elixir/docs/logging.md new file mode 100644 index 000000000..2708fe72b --- /dev/null +++ b/packages/symphony/elixir/docs/logging.md @@ -0,0 +1,40 @@ +# Logging Best Practices + +This guide defines logging conventions for Symphony so Codex can diagnose failures quickly. + +## Goals + +- Make logs searchable by issue and session. +- Capture enough execution context to identify root cause without reruns. +- Keep messages stable so dashboards/alerts are reliable. + +## Required Context Fields + +When logging issue-related work, include both identifiers: + +- `issue_id`: Linear internal UUID (stable foreign key). +- `issue_identifier`: human ticket key (for example `MT-620`). + +When logging Codex execution lifecycle events, include: + +- `session_id`: combined Codex thread/turn identifier. + +## Message Design + +- Use explicit `key=value` pairs in message text for high-signal fields. +- Prefer deterministic wording for recurring lifecycle events. +- Include the action outcome (`completed`, `failed`, `retrying`) and the reason/error when available. +- Avoid logging large payloads unless required for debugging. + +## Scope Guidance + +- `AgentRunner`: log start/completion/failure with issue context, plus `session_id` when known. +- `Orchestrator`: log dispatch, retry, terminal/non-active transitions, and worker exits with issue context. Include `session_id` whenever running-entry data has it. +- `Runtime` / `Runtime.RoomEngineClient`: log node turn start/completion/error with run context and the engine `thread_id`. + +## Checklist For New Logs + +- Is this event tied to a Linear issue? Include `issue_id` and `issue_identifier`. +- Is this event tied to a Codex session? Include `session_id`. +- Is the failure reason present and concise? +- Is the message format consistent with existing lifecycle logs? diff --git a/packages/symphony/elixir/docs/token_accounting.md b/packages/symphony/elixir/docs/token_accounting.md new file mode 100644 index 000000000..2c6e107be --- /dev/null +++ b/packages/symphony/elixir/docs/token_accounting.md @@ -0,0 +1,304 @@ +# Codex Token Accounting + +This document explains how Codex reports token usage through the app-server protocol and how Symphony should account for it. + +It is based on the current Codex source in `codex-rs`, especially: + +- `app-server/README.md` +- `protocol/src/protocol.rs` +- `app-server/src/bespoke_event_handling.rs` +- `app-server-protocol/src/protocol/v2.rs` +- `exec/src/event_processor_with_jsonl_output.rs` +- `state/src/extract.rs` + +## Short Version + +- `last_token_usage` means "the latest increment". +- `total_token_usage` means "the cumulative total so far". +- `thread/tokenUsage/updated` is the live streaming notification for token usage. +- `turn/completed` carries final turn state, and turn-level usage is exposed separately from the live thread token stream. +- Generic `usage` fields are event-specific. Do not assume every `usage` payload is a cumulative thread total. + +## Primary Source Semantics + +Codex defines `TokenUsageInfo` like this: + +```rust +pub struct TokenUsageInfo { + pub total_token_usage: TokenUsage, + pub last_token_usage: TokenUsage, + pub model_context_window: Option, +} +``` + +The important behavior is in `append_last_usage`: + +```rust +pub fn append_last_usage(&mut self, last: &TokenUsage) { + self.total_token_usage.add_assign(last); + self.last_token_usage = last.clone(); +} +``` + +That gives the core semantics: + +- `last_token_usage`: the newest chunk of usage that was just added +- `total_token_usage`: the accumulated total after adding that chunk + +This is the most important accounting rule in the Codex source. + +## Event Types + +### `codex/event/token_count` + +Codex core emits token count events containing `TokenUsageInfo`. + +These events can carry: + +- `info.total_token_usage` +- `info.last_token_usage` +- `info.model_context_window` + +Symphony sees these events wrapped inside the app-server message stream. + +Meaning: + +- `total_token_usage` is an absolute cumulative snapshot +- `last_token_usage` is the delta that produced that snapshot + +### `thread/tokenUsage/updated` + +The app-server converts token count events into a dedicated thread-scoped notification: + +```rust +let notification = ThreadTokenUsageUpdatedNotification { + thread_id: conversation_id.to_string(), + turn_id, + token_usage, +}; +``` + +`ThreadTokenUsage` is defined as: + +```rust +pub struct ThreadTokenUsage { + pub total: TokenUsageBreakdown, + pub last: TokenUsageBreakdown, + pub model_context_window: Option, +} +``` + +And it is populated directly from `TokenUsageInfo`: + +```rust +impl From for ThreadTokenUsage { + fn from(value: CoreTokenUsageInfo) -> Self { + Self { + total: value.total_token_usage.into(), + last: value.last_token_usage.into(), + model_context_window: value.model_context_window, + } + } +} +``` + +Meaning: + +- `thread/tokenUsage/updated` is the canonical live notification for token usage +- `tokenUsage.total` is an absolute thread total +- `tokenUsage.last` is the latest increment that produced that total + +The app-server README is explicit: token usage streams separately via `thread/tokenUsage/updated`. + +### `turn/completed` + +The app-server README says `turn/completed` carries final turn state and token usage. + +There are two important details: + +1. The app-server protocol `turn/completed` notification contains a final `turn` object. +2. The `exec` event processor also emits a turn-completed event that includes a `usage` struct. + +In the `exec` event processor, the turn-completed usage is built from the most recent captured `total_token_usage`: + +```rust +if let Some(info) = &ev.info { + self.last_total_token_usage = Some(info.total_token_usage.clone()); +} +``` + +Then on turn completion: + +```rust +let usage = if let Some(u) = &self.last_total_token_usage { + Usage { + input_tokens: u.input_tokens, + cached_input_tokens: u.cached_input_tokens, + output_tokens: u.output_tokens, + } +} +``` + +Important consequence: + +- a turn-completed `usage` payload is not the same schema as `ThreadTokenUsage` +- it should be interpreted in the context of the specific event that emitted it +- it must not be blindly mixed with `thread/tokenUsage/updated` accounting + +### Generic `usage` + +Codex uses the word `usage` in multiple places. + +That does not mean all `usage` maps have the same semantics. + +Examples: + +- `thread/tokenUsage/updated.tokenUsage.total`: absolute cumulative thread total +- `thread/tokenUsage/updated.tokenUsage.last`: latest delta +- turn-completed `usage`: event-specific completion usage payload + +Rule: + +- never classify a `usage` map by name alone +- classify it by event type and payload path + +## What The Metrics Mean + +### Absolute totals + +These are safe high-water-mark style counters: + +- `info.total_token_usage` +- `tokenUsage.total` on `thread/tokenUsage/updated` + +Use these when you want: + +- live dashboard totals +- stable per-thread accumulation +- recovery after missed intermediate events + +### Deltas + +These are incremental additions: + +- `info.last_token_usage` +- `tokenUsage.last` on `thread/tokenUsage/updated` + +Use these only when: + +- no absolute total is available +- you are explicitly handling additive updates + +### Context window + +`model_context_window` is not spend. It is the model's context limit. + +Codex also has logic that can "fill to context window", which sets: + +- `total_token_usage.total_tokens = context_window` +- `last_token_usage.total_tokens = delta` + +So `total_tokens` can reflect context-window normalization behavior, not just a raw upstream token report. + +For Symphony, `model_context_window` should be displayed or logged separately from spend. + +## Recommended Accounting Strategy For Symphony + +Track usage per active Codex thread. + +For each thread, keep: + +- `absolute_total`: latest accepted absolute total snapshot +- `accumulated_total`: the total you expose in UI/API +- `last_seen_turn_id` + +### Preferred source order + +When a token-related event arrives, use this precedence: + +1. `thread/tokenUsage/updated.tokenUsage.total` +2. `TokenCountEvent.info.total_token_usage` + +Ignore these for accounting: + +- `thread/tokenUsage/updated.tokenUsage.last` +- `TokenCountEvent.info.last_token_usage` +- generic `usage` maps +- turn-completed `usage` + +Do not treat generic `params.usage` as equivalent to a cumulative thread total unless the event type makes that meaning explicit. + +### Algorithm + +#### If an absolute total is present + +- Treat it as a thread-level snapshot. +- If it is greater than or equal to the stored `absolute_total`, replace the stored absolute total. +- Set exposed totals from that absolute snapshot. +- Do not add the corresponding delta again. + +#### If no absolute total is present + +- Ignore the event for accounting. +- Keep the last accepted absolute high-water mark unchanged. + +### Why this matters + +If you misclassify a per-turn `usage` payload as an absolute thread total, later turns can appear to stall because a smaller per-turn number is compared against a larger cumulative baseline. + +## What Symphony Should And Should Not Do + +### Do + +- Prefer `thread/tokenUsage/updated` for live reporting. +- Treat `tokenUsage.total` as authoritative for thread totals. +- Key accounting by `thread_id`, not just issue id. +- Expect one thread to span multiple turns when Symphony reuses a live Codex thread. + +### Do not + +- Do not treat every `usage` map as absolute. +- Do not count `tokenUsage.last` or `last_token_usage` into dashboard totals. +- Do not add turn-completed `usage` on top of already-counted live thread totals unless you can prove it represents missing spend. +- Do not reset accounting just because a new turn starts on the same thread. + +## Practical Interpretation For Symphony Logs + +When reading raw app-server events: + +- `codex/event/token_count` + - useful if you are inspecting nested `info.total_token_usage` +- `thread/tokenUsage/updated` + - best source for live dashboard and API totals +- `turn/completed` + - best used as end-of-turn state, not as an unconditional additive token event + +## Why `total_token_usage` Is The Durable Choice + +Codex itself consistently prefers cumulative totals when it needs durable state: + +- the state extractor stores `info.total_token_usage.total_tokens` +- the exec event processor caches the last `total_token_usage` and uses that on turn completion + +That is a strong signal for Symphony: + +- use absolute totals as the main accounting surface +- ignore last/delta values for totals + +## Recommended Symphony Documentation Contract + +If Symphony documents token reporting externally, the contract should be: + +- Live token totals come from Codex thread-scoped cumulative usage. +- Incremental usage may also be emitted, but Symphony does not use it for totals. +- Turn-completed usage is event-specific and should not be assumed to be a fresh additive increment. +- Reporting is thread-based, and multiple turns can occur on one thread. + +## Implementation Checklist + +- Prefer `thread/tokenUsage/updated.tokenUsage.total` +- Fallback to `info.total_token_usage` +- Ignore `last` for totals +- Key totals by `thread_id` +- Do not classify generic `usage` by field name alone +- Do not double-count turn-completed usage after live updates diff --git a/packages/symphony/elixir/lib/symphony_elixir.ex b/packages/symphony/elixir/lib/symphony_elixir.ex new file mode 100644 index 000000000..91e387cf4 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir.ex @@ -0,0 +1,16 @@ +defmodule SymphonyElixir do + @moduledoc """ + Symphony runs workflows of agent invocations. + + - A `workflows/.sym` file declares the nodes and edges of a workflow + in the `.sym` surface language, lowered to an IR run graph. + - A `skills/.md` file declares the system prompt, codex policy, and + tool surface a `skill "name"` prompt resolves to. + - A trigger (Linear label, manual API call, cron tick, Slack, GitHub) + starts a run. + - Each run gets a fresh workspace from the primary repository's configured + default branch. + - The IR runtime walks the graph, executing one node at a time through the + engine host. + """ +end diff --git a/packages/symphony/elixir/lib/symphony_elixir/application.ex b/packages/symphony/elixir/lib/symphony_elixir/application.ex new file mode 100644 index 000000000..7c3e953e9 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir/application.ex @@ -0,0 +1,84 @@ +defmodule SymphonyElixir.Application do + @moduledoc """ + OTP application entrypoint. + + Boot order (one_for_one): + + Phoenix.PubSub in-process eventbus + Task.Supervisor short-lived tasks (codex turns, trigger work) + Config boot-time env snapshot + GithubApp mints and caches GitHub App installation tokens + Catalog watches skills/*.md, hot-reloads + WorkflowCatalog watches workflows/*.sym, hot-reloads the DSL ingress index + CronState persists per-workflow last_fired_at for cron workflows + Runtime.Registry name registry for per-run runtimes + Runtime.Placement per-run room-server placement registry (ixvm/host) + Runtime.Supervisor DynamicSupervisor for runs + Triggers.Slack polls Slack for completed huddles (opt-in) + Triggers.Cron fires cron-triggered workflows on a wall-clock cadence + Endpoint Phoenix HTTP + LiveView; also receives Linear webhooks + """ + + use Application + + @impl true + def start(_type, _args) do + if Application.get_env(:symphony_elixir, :auto_start, true) do + start_supervised() + else + Supervisor.start_link([], strategy: :one_for_one, name: SymphonyElixir.Supervisor) + end + end + + defp start_supervised do + :ok = SymphonyElixir.LogFile.configure() + + role = role() + children = children_for(role) + + with {:ok, pid} <- Supervisor.start_link(children, strategy: :one_for_one, name: SymphonyElixir.Supervisor) do + if role == :control_plane, do: SymphonyElixir.Runtime.Supervisor.resume_pending() + {:ok, pid} + end + end + + # Read directly from the env, not the Config snapshot: the role decides + # whether Config itself (and the rest of the tree) boots. + defp role do + case System.get_env("SYMPHONY_ROLE") do + "worker" -> :worker + _ -> :control_plane + end + end + + # The full control plane: triggers, webhooks, the run engine, the placement + # registry, and the runtime-worker registry that backs :remote placement. + defp children_for(:control_plane) do + [ + {Phoenix.PubSub, name: SymphonyElixir.PubSub}, + {Task.Supervisor, name: SymphonyElixir.TaskSupervisor}, + SymphonyElixir.Config, + SymphonyElixir.GithubApp, + SymphonyElixir.Catalog, + SymphonyElixir.WorkflowCatalog, + SymphonyElixir.CronState, + {Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry}, + SymphonyElixir.Runtime.Placement, + SymphonyElixir.Runtime.RuntimeRegistry, + SymphonyElixir.Runtime.Supervisor, + SymphonyElixir.Triggers.Slack, + SymphonyElixir.Triggers.Cron, + SymphonyElixirWeb.Endpoint + ] + end + + # A runtime worker: just enough to dial the control plane and provision + # per-run room-servers on this host. No DB, triggers, engine, or HTTP surface. + defp children_for(:worker) do + [ + {Task.Supervisor, name: SymphonyElixir.TaskSupervisor}, + SymphonyElixir.Config, + SymphonyElixir.Runtime.WorkerClient + ] + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir/catalog.ex b/packages/symphony/elixir/lib/symphony_elixir/catalog.ex new file mode 100644 index 000000000..6e8c97f98 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir/catalog.ex @@ -0,0 +1,132 @@ +defmodule SymphonyElixir.Catalog do + @moduledoc """ + Watches `skills/*.md` and publishes the latest parsed skills. Polls every + `catalog_poll_ms` (default 1s) and compares hashes. + + Reload semantics: + + - A new file appears: parsed and added. + - An existing file's bytes change: re-parsed; old version is replaced. + - A file is deleted: removed from the catalog. + - A parse error: kept logged but not crashed; the previously-loaded + version (if any) stays in place until the bytes parse again. + + Skill resolution is load-bearing for the IR engine path: + `Runtime.RoomEngineClient` resolves a node's `skill "name"` prompt through + `Catalog.skill/1`, which expands shared `{{partial:_}}` includes at load + time. The YAML/DAG stack also watched `dags/`; that surface was deleted in + the `.sym`/IR cutover (ENG-1828), so this catalog now watches skills only. + + Active runs snapshot the skills they resolve at run start; reloads here + affect only NEW runs. + """ + + use GenServer + require Logger + + alias SymphonyElixir.{Config, Skill} + + @table :symphony_catalog + + defstruct [:skills_dir, :poll_ms] + + @spec start_link(keyword()) :: GenServer.on_start() + def start_link(opts \\ []) do + GenServer.start_link(__MODULE__, opts, name: __MODULE__) + end + + @spec skill(String.t()) :: {:ok, Skill.t()} | {:error, :not_found} + def skill(name) when is_binary(name) do + case :ets.lookup(@table, {:skill, name}) do + [{_key, skill}] -> {:ok, skill} + [] -> {:error, :not_found} + end + end + + @spec skills() :: [Skill.t()] + def skills do + :ets.match_object(@table, {{:skill, :_}, :_}) + |> Enum.map(fn {_key, skill} -> skill end) + end + + @impl true + def init(_opts) do + :ets.new(@table, [:named_table, :public, read_concurrency: true]) + config = Config.get() + + state = %__MODULE__{ + skills_dir: config.skills_dir, + poll_ms: config.catalog_poll_ms + } + + schedule_scan(0) + {:ok, state} + end + + @impl true + def handle_info(:scan, %__MODULE__{} = state) do + scan_dir(state.skills_dir, :skill, &Skill.load/1) + schedule_scan(state.poll_ms) + {:noreply, state} + end + + defp schedule_scan(after_ms) do + Process.send_after(self(), :scan, after_ms) + end + + defp scan_dir(dir, :skill, loader) do + files = Path.wildcard(Path.join(dir, "*.md")) + + seen_names = + Enum.reduce(files, MapSet.new(), fn path, acc -> + name = Path.basename(path, Path.extname(path)) + load_if_changed(:skill, name, path, loader) + MapSet.put(acc, name) + end) + + remove_missing(:skill, seen_names) + end + + defp load_if_changed(kind, name, path, loader) do + case File.read(path) do + {:ok, raw} -> + new_hash = :crypto.hash(:sha256, raw) + + case current_hash(kind, name) do + ^new_hash -> + :ok + + _ -> + case loader.(path) do + {:ok, parsed} -> + :ets.insert(@table, {{kind, name}, parsed}) + Logger.info("Catalog loaded #{kind}=#{name} hash=#{Base.encode16(new_hash, case: :lower) |> binary_part(0, 8)}") + + {:error, reason} -> + Logger.warning("Catalog failed to load #{kind}=#{name}: #{inspect(reason)}") + end + end + + {:error, reason} -> + Logger.warning("Catalog failed to read #{path}: #{inspect(reason)}") + end + end + + defp current_hash(kind, name) do + case :ets.lookup(@table, {kind, name}) do + [{_key, %{body_hash: hash}}] -> hash + _ -> nil + end + end + + defp remove_missing(kind, seen_names) do + @table + |> :ets.match_object({{kind, :_}, :_}) + |> Enum.each(fn {{^kind, name} = key, _value} -> + unless MapSet.member?(seen_names, name) do + :ets.delete(@table, key) + Logger.info("Catalog removed #{kind}=#{name} (file deleted)") + end + end) + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir/claude/code.ex b/packages/symphony/elixir/lib/symphony_elixir/claude/code.ex new file mode 100644 index 000000000..64060e833 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir/claude/code.ex @@ -0,0 +1,209 @@ +defmodule SymphonyElixir.Claude.Code do + @moduledoc """ + Runs one workflow node as a headless Claude Code session in the run's + workspace, used when a skill's `codex_model` names a Claude model + (`claude-*`, or the `opus` / `sonnet` / `haiku` aliases); every other + model goes to Codex. + + This is the in-process Claude runner the YAML/DAG `NodeExecutor` used. + The `.sym`/IR engine path runs Claude turns through the room-server's + `engine_claude` adapter instead, so this module is not on the IR hot + path. It is kept (not deleted with the rest of the old stack) because it + is the only in-process Claude turn runner and removing it would orphan + Claude support outside the room-server; revisit once the IR path proves + Claude parity end to end on the room-server engine host. + + There is no app-server protocol, no VM, and no Symphony dynamic-tool + surface here. This module spawns the `claude` CLI once, non-interactively, + and reads back a single JSON result: + + printf '%s' "$prompt" | claude --print --output-format json \\ + --dangerously-skip-permissions --model claude-opus-4-8 + + - `--print` runs Claude Code non-interactively and exits. + - `--dangerously-skip-permissions` lets the agent edit files and run + tools with no approval prompts. A Claude model is the opt-in for that; + there is no per-tool gate the way codex has `approval_policy`. + - `--output-format json` emits one result object on stdout whose + `result`, `session_id`, and `is_error` fields we surface. + - `--model` is the skill's `codex_model` value, passed through verbatim. + + The agent uses Claude Code's own tools (Bash, Edit, Read, ...) plus + whatever CLIs are on PATH inside the workspace (`git`, `gh`). The + GitHub App bot identity stamped into the checkout therefore applies to + claude runs exactly as it does to codex runs. + + Auth is the Anthropic API key in `ANTHROPIC_API_KEY`, the same secret + surface every other integration token flows through. It is injected + into the subprocess env, never onto the command line, so it cannot + leak into logs or run records. + + The prompt and model travel through the subprocess environment + (`SYMPHONY_CLAUDE_PROMPT`, `SYMPHONY_CLAUDE_MODEL`) and the prompt is + piped on stdin rather than passed positionally, so neither argv length + limits nor a leading dash in the prompt can corrupt the command line. + + Bad fit if: `ANTHROPIC_API_KEY` is unset (the run errors with + `:anthropic_api_key_not_configured`), or the Symphony service runs as + root, where `--dangerously-skip-permissions` refuses to start. + + This engine ignores any placement: a Claude model run through this + in-process path has no per-run room-server. + """ + + alias SymphonyElixir.Config + + require Logger + + @prompt_env "SYMPHONY_CLAUDE_PROMPT" + @model_env "SYMPHONY_CLAUDE_MODEL" + + # One hour, matching the codex turn timeout. A workflow node that has + # not produced its result JSON by then is treated as hung. + @default_turn_timeout_ms 60 * 60 * 1000 + + @type env_pair :: {String.t(), String.t()} + @type context :: %{optional(:identifier) => String.t(), optional(:title) => String.t()} + + @spec run(Path.t(), String.t(), context(), keyword()) :: {:ok, map()} | {:error, term()} + def run(workspace, prompt, _context, opts) + when is_binary(workspace) and is_binary(prompt) and is_list(opts) do + config = Keyword.fetch!(opts, :config) + model = Keyword.fetch!(opts, :model) + turn_timeout_ms = Keyword.get(opts, :turn_timeout_ms, @default_turn_timeout_ms) + extra_env = Keyword.get(opts, :extra_env, []) + + with {:ok, api_key} <- fetch_api_key(config), + {:ok, bash} <- find_bash(), + :ok <- ensure_workspace(workspace) do + env = + env_charlists( + extra_env ++ + [ + {"ANTHROPIC_API_KEY", api_key}, + {@prompt_env, prompt}, + {@model_env, model} + ] + ) + + port = + Port.open( + {:spawn_executable, String.to_charlist(bash)}, + [ + :binary, + :exit_status, + args: [~c"-c", String.to_charlist(command(config.claude_command))], + cd: String.to_charlist(workspace), + env: env + ] + ) + + deadline = System.monotonic_time(:millisecond) + turn_timeout_ms + collect(port, deadline, turn_timeout_ms, []) + end + end + + # The command run under `bash -c`. The prompt and model are referenced + # from the environment (double-quoted so the shell does not re-split or + # glob them); piping the prompt on stdin keeps it off the argv entirely. + @doc false + @spec command(String.t()) :: String.t() + def command(claude_command) when is_binary(claude_command) do + "printf '%s' \"$#{@prompt_env}\" | " <> + claude_command <> + " --print --output-format json --dangerously-skip-permissions" <> + " --model \"$#{@model_env}\"" + end + + defp fetch_api_key(%Config{anthropic_api_key: key}) when is_binary(key) and key != "", do: {:ok, key} + defp fetch_api_key(%Config{}), do: {:error, :anthropic_api_key_not_configured} + + defp find_bash do + case System.find_executable("bash") do + nil -> {:error, :bash_not_found} + bash -> {:ok, bash} + end + end + + defp ensure_workspace(workspace) do + if File.dir?(workspace), do: :ok, else: {:error, {:workspace_not_directory, workspace}} + end + + defp env_charlists(env) when is_list(env) do + Enum.map(env, fn {k, v} when is_binary(k) and is_binary(v) -> + {String.to_charlist(k), String.to_charlist(v)} + end) + end + + # Claude Code's json output format prints exactly one object on stdout + # at the end of the turn, so we buffer everything and parse on exit + # rather than streaming. stderr is left on the BEAM's stderr (no + # :stderr_to_stdout) so progress and diagnostics reach journald without + # polluting the JSON we have to decode. + defp collect(port, deadline, timeout_ms, chunks) do + remaining_ms = max(deadline - System.monotonic_time(:millisecond), 0) + + receive do + {^port, {:data, data}} -> + collect(port, deadline, timeout_ms, [data | chunks]) + + {^port, {:exit_status, 0}} -> + parse_result(output(chunks)) + + {^port, {:exit_status, status}} -> + {:error, {:claude_exit, status, tail(output(chunks))}} + after + remaining_ms -> + kill_port(port) + {:error, {:claude_turn_timeout, timeout_ms}} + end + end + + defp parse_result(stdout) do + case Jason.decode(last_json_line(stdout)) do + {:ok, %{"is_error" => false} = result} -> + {:ok, + %{ + kind: :claude, + session_id: Map.get(result, "session_id"), + result: Map.get(result, "result"), + total_cost_usd: Map.get(result, "total_cost_usd") + }} + + {:ok, %{"is_error" => true} = result} -> + {:error, {:claude_turn_failed, Map.get(result, "subtype"), Map.get(result, "result")}} + + {:ok, other} -> + {:error, {:claude_invalid_result, other}} + + {:error, _reason} -> + {:error, {:claude_unparseable_output, tail(stdout)}} + end + end + + # Defensive: a stray non-JSON line on stdout (a tool that ignores the + # json contract, a shell notice) should not mask the result object, + # which json mode prints last. Take the last non-blank line. + defp last_json_line(stdout) do + stdout + |> String.split("\n", trim: true) + |> List.last() + |> Kernel.||("") + end + + defp output(chunks), do: chunks |> Enum.reverse() |> IO.iodata_to_binary() + + defp tail(text) when is_binary(text), do: String.slice(text, max(String.length(text) - 2_000, 0), 2_000) + + defp kill_port(port) do + case Port.info(port, :os_pid) do + {:os_pid, os_pid} -> System.cmd("kill", ["-KILL", Integer.to_string(os_pid)], stderr_to_stdout: true) + _ -> :ok + end + + if Port.info(port) != nil, do: Port.close(port) + :ok + rescue + _ -> :ok + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir/codex/provision.ex b/packages/symphony/elixir/lib/symphony_elixir/codex/provision.ex new file mode 100644 index 000000000..60d77f710 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir/codex/provision.ex @@ -0,0 +1,534 @@ +defmodule SymphonyElixir.Codex.Provision do + @moduledoc """ + Shared building blocks for the runtimes that prepare a checkout and a + room-server outside the BEAM (`Codex.IxVm` and `Codex.Host`). + + Both runtimes clone the same repositories with the same bot-identity and + GitHub auth stamping, export the same environment into the remote + room-server/Codex process, and poll the same `/api/health` endpoint. The + only thing that differs is where the script runs (an iXVM shell versus a + privilege-dropped local unit). Keeping the clone, env, and health logic + here means the load-bearing git auth header has a single owner. + """ + + alias SymphonyElixir.{Config, RepositoryCatalog} + + @ix_workspace_root "/workspace/symphony" + @ix_room_state_root "/var/lib/symphony-room" + + # The room state and workspace roots the host runtime uses inside the + # target user's home. Shared so `Codex.Host` and `Runtime.Placement` + # land the checkout and state in the same place. + @host_room_state_subdir ".local/state/symphony-room" + @host_default_workspaces_subdir "symphony-workspaces" + + @doc """ + Shell-quote a value for safe interpolation into a `bash -lc` script. + """ + @spec sh(String.t()) :: String.t() + def sh(value) when is_binary(value) do + "'" <> String.replace(value, "'", "'\\''") <> "'" + end + + @doc """ + The iXVM-side root that holds every run-scoped checkout, and the + per-run subdirectory under it. The room-server runs from the primary + repo's checkout inside this tree. + """ + @spec ix_run_root(String.t()) :: Path.t() + def ix_run_root(run_id), do: Path.join(@ix_workspace_root, run_id) + + @doc "The iXVM-side primary-repo checkout for a run, where the engine turn runs." + @spec ix_primary_workspace(Config.t(), String.t()) :: Path.t() + def ix_primary_workspace(%Config{} = config, run_id) do + Path.join(ix_run_root(run_id), RepositoryCatalog.primary(config).name) + end + + @doc """ + The `ix new` argv that provisions a room-server VM for a run. The + load-bearing shape (l7-proxy port, region, ipv4, env injection) lives + here so `Codex.IxVm` and `Runtime.Placement` build it the same way and + the redaction in `sanitize_ix_args/1` keeps matching it. + """ + @spec create_vm_args(Config.t(), String.t(), [{String.t(), String.t()}]) :: [String.t()] + def create_vm_args(%Config{} = config, vm_name, env) when is_binary(vm_name) and is_list(env) do + ["new", config.ix_image, "--name", vm_name, "--l7-proxy-port", to_string(config.ix_room_port), "--no-shell"] + |> append_region(config.ix_region) + |> append_ipv4(config.ix_room_connect) + |> append_env(env) + end + + @doc """ + The `bash -lc` script that clones the run's repositories into the VM's + run root on a run-scoped branch. The caller owns running it through + `ix shell`. + """ + @spec ix_workspace_script(Config.t(), String.t(), keyword()) :: String.t() + def ix_workspace_script(%Config{} = config, run_id, opts) when is_list(opts) do + token = Keyword.get(opts, :bot_token) || config.github_token + run_root = ix_run_root(run_id) + blocks = repo_blocks(config, run_root, "symphony/#{run_id}", token) + + """ + set -euo pipefail + mkdir -p #{sh(run_root)} + #{blocks} + """ + end + + @doc """ + The `bash -lc` script that boots the per-run room-server inside the VM, + exporting the runtime env first. One owner so `Codex.IxVm` and + `Runtime.Placement` start the server identically (notably the + `pkill -x room-server` that stops only the named process). + """ + @spec ix_room_start_script(Config.t(), String.t(), keyword()) :: String.t() + def ix_room_start_script(%Config{} = config, run_id, opts) when is_list(opts) do + room_state_dir = Path.join(@ix_room_state_root, run_id) + exports = env_export_lines(runtime_env(config, opts)) + + """ + set -euo pipefail + mkdir -p #{sh(room_state_dir)} + pkill -x room-server || true + #{exports} + nohup #{config.ix_room_server_command} --host 0.0.0.0 --port #{config.ix_room_port} --state-dir #{sh(room_state_dir)} --no-wt > /tmp/symphony-room-server.log 2>&1 & + """ + end + + @doc """ + The `localport:vmport` mapping a port-forward tunnel uses for a VM, and + the loopback URL that mapping exposes. The local port is derived from + the VM name so concurrent runs do not collide on the same loopback + port. + """ + @spec port_forward_mapping(Config.t(), String.t()) :: {String.t(), String.t()} + def port_forward_mapping(%Config{} = config, vm_name) when is_binary(vm_name) do + local_port = config.ix_local_port_base + :erlang.phash2(vm_name, 1000) + {"#{local_port}:#{config.ix_room_port}", "http://127.0.0.1:#{local_port}"} + end + + @doc "The `ix port-forward` argv for a VM and `localport:vmport` mapping." + @spec port_forward_args(String.t(), String.t()) :: [String.t()] + def port_forward_args(vm_name, mapping) when is_binary(vm_name) and is_binary(mapping) do + ["port-forward", vm_name, mapping] + end + + @doc "The `ix rm --force` argv for a VM." + @spec rm_vm_args(String.t()) :: [String.t()] + def rm_vm_args(vm_name) when is_binary(vm_name), do: ["rm", "--force", vm_name] + + @doc "The `ix ls --json` argv used to look a VM up by name." + @spec list_vms_args() :: [String.t()] + def list_vms_args, do: ["ls", "--json"] + + @doc "The `ix shell -- bash -lc + + + + + + {@inner_content} + + + """ + end + + @spec app(map()) :: Phoenix.LiveView.Rendered.t() + def app(assigns) do + # Every call site passes active_tab explicitly; default to :ir if + # someone forgets, using Map.put_new (not assign_new, which expects a + # socket / change-tracked assigns map and crashes on a plain one - was + # the cause of every LiveView route returning 500 after PR #21). + assigns = Map.put_new(assigns, :active_tab, :ir) + + ~H""" +
+
+
+

+ + [sym]phony +

+ +
+
+ {@inner_content} +
+ """ + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/api_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/api_controller.ex new file mode 100644 index 000000000..e18a7ec8d --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/api_controller.ex @@ -0,0 +1,42 @@ +defmodule SymphonyElixirWeb.ApiController do + @moduledoc """ + The manual-trigger enqueue producer onto the IR runtime. + + POST /api/v1/runs start IR run(s) from a manual trigger; + body: {"workflow": "...", "input": {...}} + + A caller naming a `workflow` starts exactly that `.sym`; a caller without + one fires every `on manual` workflow through the shared trigger matcher. + Input rides on the trigger context so a node can read it as ``. + """ + + use Phoenix.Controller, formats: [:json] + + alias SymphonyElixir.Runtime.Ingress + + def enqueue_run(conn, params) do + input = Map.get(params, "input", %{}) + + case Map.get(params, "workflow") || Map.get(params, "dag") do + name when is_binary(name) and name != "" -> + Ingress.start_by_name(name, %{kind: :manual, input: input}, []) + |> respond_started(conn) + + _ -> + Ingress.start_by_trigger(%{kind: :manual, input: input}, []) + |> respond_started(conn) + end + end + + defp respond_started({:ok, %{run_id: run_id}}, conn), + do: conn |> put_status(:created) |> json(%{run_ids: [run_id]}) + + defp respond_started({:ok, started}, conn) when is_list(started), + do: conn |> put_status(:created) |> json(%{run_ids: Enum.map(started, & &1.run_id)}) + + defp respond_started({:error, {:workflow_not_found, _}} = reason, conn), + do: conn |> put_status(:not_found) |> json(%{error: inspect(reason)}) + + defp respond_started({:error, reason}, conn), + do: conn |> put_status(:unprocessable_entity) |> json(%{error: inspect(reason)}) +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/github_webhook_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/github_webhook_controller.ex new file mode 100644 index 000000000..1fd6b366a --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/github_webhook_controller.ex @@ -0,0 +1,145 @@ +defmodule SymphonyElixirWeb.GithubWebhookController do + @moduledoc """ + Receives GitHub pull-request label webhooks and starts matching IR runs. + + Only pull_request.labeled events are actionable. Matching is driven by + `.sym` workflows declaring trigger.kind = github_pr_label with a + trigger.repo and trigger.label that match the incoming event, resolved + through the shared `Runtime.Trigger` matcher. + """ + + use Phoenix.Controller, formats: [:json] + + require Logger + + alias SymphonyElixir.Config + alias SymphonyElixir.Runtime.Ingress + + @spec accept(Plug.Conn.t(), map()) :: Plug.Conn.t() + def accept(conn, params) do + with :ok <- verify_signature(conn), + :ok <- verify_event(conn) do + json(conn, handle_event(params)) + else + {:error, status, reason} -> + Logger.warning("GitHub webhook rejected: #{reason}") + + conn + |> put_status(status) + |> json(%{error: reason}) + end + end + + defp verify_signature(conn) do + cond do + is_nil(Config.get().github_webhook_secret) -> + {:error, :unauthorized, "github webhook secret not configured"} + + is_nil(conn.assigns[:raw_body]) -> + {:error, :bad_request, "missing raw body"} + + true -> + provided = + conn + |> Plug.Conn.get_req_header("x-hub-signature-256") + |> List.first() + + expected = expected_signature(conn.assigns.raw_body) + + cond do + is_nil(provided) -> + {:error, :unauthorized, "missing X-Hub-Signature-256 header"} + + byte_size(provided) != byte_size(expected) -> + {:error, :unauthorized, "signature mismatch"} + + not Plug.Crypto.secure_compare(provided, expected) -> + {:error, :unauthorized, "signature mismatch"} + + true -> + :ok + end + end + end + + defp expected_signature(raw_body) do + secret = Config.get().github_webhook_secret + digest = :crypto.mac(:hmac, :sha256, secret, raw_body) |> Base.encode16(case: :lower) + "sha256=" <> digest + end + + defp verify_event(conn) do + case conn |> Plug.Conn.get_req_header("x-github-event") |> List.first() do + "pull_request" -> :ok + nil -> {:error, :bad_request, "missing X-GitHub-Event header"} + other -> {:error, :accepted, "ignored GitHub event #{other}"} + end + end + + defp handle_event(%{"action" => "labeled", "pull_request" => pr, "repository" => repo, "label" => label}) + when is_map(pr) and is_map(repo) and is_map(label) do + repo_name = Map.get(repo, "full_name") + label_name = label |> Map.get("name", "") |> normalize_label() + pr_number = Map.get(pr, "number") + + cond do + Map.get(pr, "state") != "open" -> + %{ok: true, results: [format_result({:ignored, "PR is not open"})]} + + not is_integer(pr_number) -> + %{ok: true, results: [format_result({:ignored, "PR number missing"})]} + + active_run_exists?(repo_name, pr_number) -> + %{ok: true, results: [format_result({:deduped, pr_number})]} + + true -> + start_label(build_trigger(repo_name, label_name, pr_number, pr), repo_name, pr_number) + end + end + + defp handle_event(_event), do: %{ok: true, ignored: true} + + defp build_trigger(repo_name, label_name, pr_number, pr) do + %{ + kind: :github_pr_label, + repo: repo_name, + label: label_name, + pr_number: pr_number, + pr_url: Map.get(pr, "html_url"), + title: Map.get(pr, "title"), + head_ref: get_in(pr, ["head", "ref"]), + head_repo: get_in(pr, ["head", "repo", "full_name"]), + base_ref: get_in(pr, ["base", "ref"]) + } + end + + defp start_label(trigger, repo_name, pr_number) do + case Ingress.start_by_trigger(trigger) do + {:ok, started} -> + Logger.info("Started runs=#{Enum.map_join(started, ",", & &1.run_id)} for #{repo_name}##{pr_number} via github label") + %{ok: true, enqueued: length(started), results: Enum.map(started, &format_result({:enqueued, &1.run_id}))} + + {:error, reason} -> + Logger.warning("Failed to start github label run for #{repo_name}##{pr_number}: #{inspect(reason)}") + %{ok: true, results: [format_result({:error, inspect(reason)})]} + end + end + + defp active_run_exists?(repo, pr_number) do + Ingress.seen_trigger?(fn + {status, %{kind: :github_pr_label, repo: r, pr_number: n}} -> + status in [:pending, :running] and r == repo and n == pr_number + + {_status, _trigger} -> + false + end) + end + + defp normalize_label(name) when is_binary(name), do: name |> String.trim() |> String.downcase() + defp normalize_label(_), do: "" + + defp format_result({:enqueued, run_id}), do: %{status: "enqueued", run_id: run_id} + defp format_result({:deduped, pr_number}), do: %{status: "deduped", pr_number: pr_number} + defp format_result({:ignored, reason}), do: %{status: "ignored", reason: reason} + defp format_result({:error, reason}), do: %{status: "error", reason: reason} +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/ir_run_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/ir_run_controller.ex new file mode 100644 index 000000000..6a9c23a48 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/ir_run_controller.ex @@ -0,0 +1,126 @@ +defmodule SymphonyElixirWeb.IRRunController do + @moduledoc """ + Read-only JSON API over IR runs (the `RunGraph` model), and the operator + control endpoints. + + GET /api/v1/ir/schema the runtime's enum vocabulary + GET /api/v1/ir/runs list IR runs (summaries) + POST /api/v1/ir/runs start a run from a workflow name + GET /api/v1/ir/runs/:run_id one IR run (full detail) + POST /api/v1/ir/runs/:run_id/cancel operator: cancel + POST /api/v1/ir/runs/:run_id/rerun operator: re-run all + POST /api/v1/ir/runs/:run_id/clear-failed operator: clear failed nodes + POST /api/v1/ir/runs/:run_id/nodes/:node_id/retry operator: retry one node + + This is parallel to the legacy `/api/v1/runs` surface (the old `Run` + model) and renders the canonical IR facts through `IR.View`, keeping the + protocol emitter out of the runtime. Reads come from `IR.Store` so a + finished or restarted run is visible; operator actions go to the live + `Runtime` process, returning 409 when the run has no live process to act + on (a succeeded or cancelled run that already stopped). + """ + + use Phoenix.Controller, formats: [:json] + + alias SymphonyElixir.DSL.Schema + alias SymphonyElixir.IR.{Store, View} + alias SymphonyElixir.Runtime + + # The runtime's single source of truth for the form's option lists: + # engines, efforts, permissions, locations, node kinds/states, effect + # kinds, and trigger kinds. A consumer drives its selects from this so a + # new enum value at its owner reaches the UI without a form edit. + def schema(conn, _params) do + json(conn, Schema.to_map()) + end + + def index(conn, _params) do + summaries = Store.load_all() |> Enum.sort_by(& &1.run_id) |> Enum.map(&View.summary/1) + json(conn, %{runs: summaries}) + end + + # Start a run from a workflow name. This is the manual/operator door onto + # the IR runtime: resolve the workflow through the catalog, materialize + # it, and start it under Runtime.Supervisor. Trigger context is optional; + # an operator-started run carries `%{kind: :manual}` plus any input the + # caller passed. + def create(conn, %{"workflow" => name}) when is_binary(name) do + case Runtime.Ingress.start_by_name(name, trigger_context(conn.params), []) do + {:ok, %{run_id: run_id}} -> + conn |> put_status(:created) |> json(%{run_id: run_id}) + + {:error, {:workflow_not_found, _}} = reason -> + conn |> put_status(:not_found) |> json(%{error: inspect(reason)}) + + {:error, reason} -> + conn |> put_status(:unprocessable_entity) |> json(%{error: inspect(reason)}) + end + end + + def create(conn, _params) do + conn |> put_status(:unprocessable_entity) |> json(%{error: "missing required field: workflow"}) + end + + # Build the trigger context from request params. A manual run always + # carries `kind: :manual`; any caller-supplied `input` map rides along so + # a node can read it as ``. Absent or non-map input defaults to an + # empty map so the graph trigger shape is stable. + defp trigger_context(params) do + input = + case params["input"] do + %{} = map -> map + _ -> %{} + end + + %{kind: :manual, input: input} + end + + def show(conn, %{"run_id" => run_id}) do + case Store.load(run_id) do + {:ok, graph} -> json(conn, View.detail(graph)) + {:error, :not_found} -> not_found(conn) + {:error, reason} -> conn |> put_status(:unprocessable_entity) |> json(%{error: inspect(reason)}) + end + end + + def cancel(conn, %{"run_id" => run_id}), do: operate(conn, run_id, &Runtime.cancel(&1, actor(conn))) + + def rerun(conn, %{"run_id" => run_id}), do: operate(conn, run_id, &Runtime.rerun(&1, actor(conn))) + + def clear_failed(conn, %{"run_id" => run_id}), + do: operate(conn, run_id, &Runtime.clear_failed(&1, actor(conn))) + + def retry_node(conn, %{"run_id" => run_id, "node_id" => node_id}), + do: operate(conn, run_id, &Runtime.retry_node(&1, node_id, actor(conn))) + + # Apply an operator action to the live run, then return its current + # persisted detail. A run with no live process (already stopped) returns + # 409 with a clear reason rather than a 500 from the GenServer call. + defp operate(conn, run_id, action) do + action.(run_id) + + case Store.load(run_id) do + {:ok, graph} -> json(conn, View.detail(graph)) + {:error, :not_found} -> not_found(conn) + {:error, reason} -> conn |> put_status(:unprocessable_entity) |> json(%{error: inspect(reason)}) + end + catch + :exit, {:noproc, _} -> run_not_live(conn, run_id) + :exit, {{:noproc, _}, _} -> run_not_live(conn, run_id) + end + + defp actor(conn) do + case get_req_header(conn, "x-operator") do + [value | _] when value != "" -> value + _ -> :operator + end + end + + defp not_found(conn), do: conn |> put_status(:not_found) |> json(%{error: "run not found"}) + + defp run_not_live(conn, run_id) do + conn + |> put_status(:conflict) + |> json(%{error: "run #{run_id} has no live process; it has already finished or is not running"}) + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/linear_webhook_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/linear_webhook_controller.ex new file mode 100644 index 000000000..c8ade8615 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/linear_webhook_controller.ex @@ -0,0 +1,151 @@ +defmodule SymphonyElixirWeb.LinearWebhookController do + @moduledoc """ + Receives Linear webhook events and starts IR runs for any `.sym` + workflow whose `trigger.kind = :linear` label matches a label on the + inbound issue. + + Replaces the old `Triggers.Linear` poller. Linear's 2500-req/hr quota + is plenty when the poller is gone; webhooks add zero scheduled + traffic. + + Setup, in Linear's webhook admin: + + - URL: `https:///api/v1/triggers/linear` + - Resource types: `Issue` (at minimum) + - Copy the signing secret into `LINEAR_WEBHOOK_SECRET` on the + symphony host + + Security: + + - Every request must carry a `Linear-Signature` header that is + `hex(hmac_sha256(secret, raw_body))`. Mismatched signatures get + `401`. The raw body is preserved by + `SymphonyElixirWeb.RawBodyReader` so HMAC is over the exact bytes + Linear signed. + - Absent secret -> we refuse to authenticate any request and return + `401`; this fail-closed default keeps an empty-secret deployment + from silently accepting unsigned traffic. + + Dedupe: an issue with an active run (status `:pending` or + `:running`) is skipped, matching the previous poller's contract. + """ + + use Phoenix.Controller, formats: [:json] + + require Logger + + alias SymphonyElixir.Config + alias SymphonyElixir.Runtime.Ingress + + @spec accept(Plug.Conn.t(), map()) :: Plug.Conn.t() + def accept(conn, params) do + with :ok <- verify_signature(conn) do + handle_event(params) + json(conn, %{ok: true}) + else + {:error, status, reason} -> + Logger.warning("Linear webhook rejected: #{reason}") + + conn + |> put_status(status) + |> json(%{error: reason}) + end + end + + defp verify_signature(conn) do + cond do + is_nil(Config.get().linear_webhook_secret) -> + {:error, :unauthorized, "linear webhook secret not configured"} + + is_nil(conn.assigns[:raw_body]) -> + {:error, :bad_request, "missing raw body"} + + true -> + provided = + conn + |> Plug.Conn.get_req_header("linear-signature") + |> List.first() + + cond do + is_nil(provided) -> + {:error, :unauthorized, "missing Linear-Signature header"} + + not Plug.Crypto.secure_compare(provided, expected_signature(conn.assigns.raw_body)) -> + {:error, :unauthorized, "signature mismatch"} + + true -> + :ok + end + end + end + + defp expected_signature(raw_body) do + secret = Config.get().linear_webhook_secret + + :hmac + |> :crypto.mac(:sha256, secret, raw_body) + |> Base.encode16(case: :lower) + end + + defp handle_event(%{"type" => "Issue", "action" => action} = event) + when action in ["create", "update"] do + data = Map.get(event, "data", %{}) + labels = extract_labels(data) + + maybe_enqueue(data, labels) + end + + defp handle_event(_event), do: :ok + + defp extract_labels(%{"labels" => labels}) when is_list(labels) do + labels + |> Enum.map(fn + %{"name" => name} when is_binary(name) -> String.downcase(String.trim(name)) + _ -> nil + end) + |> Enum.reject(&is_nil/1) + end + + defp extract_labels(%{"labelIds" => _ids}) do + # Linear sends label ids only on some event shapes (e.g. older webhook + # versions). We do not have the names locally; skip these events. The + # next full update with a `labels` array will re-fire. + [] + end + + defp extract_labels(_), do: [] + + defp maybe_enqueue(%{"id" => issue_id} = data, labels) do + if active_run_exists?(issue_id) do + :ok + else + # The issue's labels ride on the event so the shared matcher can keep + # the workflows whose declared label is present, fanning out to each. + trigger = %{ + kind: :linear, + labels: labels, + issue_id: issue_id, + identifier: Map.get(data, "identifier"), + title: Map.get(data, "title"), + url: Map.get(data, "url") + } + + case Ingress.start_by_trigger(trigger) do + {:ok, started} -> + Logger.info("Started runs=#{Enum.map_join(started, ",", & &1.run_id)} for #{trigger.identifier} via webhook") + + {:error, reason} -> + Logger.warning("Failed to start webhook run for #{trigger.identifier}: #{inspect(reason)}") + end + end + end + + defp maybe_enqueue(_data, _labels), do: :ok + + defp active_run_exists?(linear_issue_id) do + Ingress.seen_trigger?(fn + {status, %{kind: :linear, issue_id: id}} -> id == linear_issue_id and status in [:pending, :running] + {_status, _trigger} -> false + end) + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/slack_events_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/slack_events_controller.ex new file mode 100644 index 000000000..725e4a033 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/slack_events_controller.ex @@ -0,0 +1,151 @@ +defmodule SymphonyElixirWeb.SlackEventsController do + @moduledoc "Receives Slack Events API callbacks and starts app-mention IR runs." + + use Phoenix.Controller, formats: [:json] + + require Logger + + alias SymphonyElixir.{Config, Slack, WorkflowCatalog} + alias SymphonyElixir.Runtime.Ingress + + @spec accept(Plug.Conn.t(), map()) :: Plug.Conn.t() + def accept(conn, %{"type" => "url_verification", "challenge" => challenge}) do + with :ok <- verify_signature(conn) do + json(conn, %{challenge: challenge}) + else + {:error, status, reason} -> + conn |> put_status(status) |> json(%{error: reason}) + end + end + + def accept(conn, %{"event" => %{"type" => "app_mention"} = event}) do + with :ok <- verify_signature(conn) do + json(conn, handle_app_mention(event)) + else + {:error, status, reason} -> + Logger.warning("Slack event rejected: #{reason}") + conn |> put_status(status) |> json(%{error: reason}) + end + end + + def accept(conn, _params) do + with :ok <- verify_signature(conn) do + json(conn, %{ok: true, ignored: true}) + else + {:error, status, reason} -> + conn |> put_status(status) |> json(%{error: reason}) + end + end + + defp handle_app_mention(event) do + channel = Map.get(event, "channel") + ts = Map.get(event, "ts") + thread_ts = Map.get(event, "thread_ts") || ts + + cond do + not is_binary(channel) or not is_binary(ts) -> + %{ok: true, results: [format_result({:ignored, "missing channel or ts"})]} + + active_run_exists?(channel, ts) -> + %{ok: true, results: [format_result({:deduped, ts})]} + + true -> + # Stamp both the raw channel id the event carries and any declared + # channel name resolved to it, so the shared matcher accepts a + # workflow that declared either spelling. + trigger = %{ + kind: :slack_app_mention, + channel: resolved_channel_name(channel) || channel, + channel_id: channel, + message_ts: ts, + thread_ts: thread_ts, + user: Map.get(event, "user"), + text: Map.get(event, "text", "") + } + + start_mention(trigger) + end + end + + defp start_mention(trigger) do + case Ingress.start_by_trigger(trigger) do + {:ok, started} -> + %{ok: true, enqueued: length(started), results: Enum.map(started, &format_result({:enqueued, &1.run_id}))} + + {:error, reason} -> + %{ok: true, results: [format_result({:error, inspect(reason)})]} + end + end + + # Resolve the channel id back to the `#name` a workflow's `on` clause + # might declare, so a name-based trigger and the event's id compare + # equal. The candidate names come from the loaded `:slack_app_mention` + # workflows, so this only resolves names symphony actually watches. + defp resolved_channel_name(channel_id) do + WorkflowCatalog.for_trigger_kind(:slack_app_mention) + |> Enum.map(& &1.trigger.channel) + |> Enum.uniq() + |> Enum.find(fn declared -> channel_matches?(declared, channel_id) end) + end + + defp channel_matches?("#" <> channel_name, channel_id) do + case Slack.Client.resolve_channel_id(channel_name) do + {:ok, ^channel_id} -> true + _ -> false + end + end + + defp channel_matches?(configured, channel_id), do: configured == channel_id + + defp active_run_exists?(channel, ts) do + Ingress.seen_trigger?(fn + {status, %{kind: :slack_app_mention, channel_id: cid, message_ts: mts}} -> + status in [:pending, :running] and cid == channel and mts == ts + + {_status, _trigger} -> + false + end) + end + + defp verify_signature(conn) do + secret = Config.get().slack_signing_secret + + cond do + is_nil(secret) -> + {:error, :unauthorized, "slack signing secret not configured"} + + is_nil(conn.assigns[:raw_body]) -> + {:error, :bad_request, "missing raw body"} + + true -> + timestamp = conn |> Plug.Conn.get_req_header("x-slack-request-timestamp") |> List.first() + provided = conn |> Plug.Conn.get_req_header("x-slack-signature") |> List.first() + expected = expected_signature(secret, timestamp, conn.assigns.raw_body) + + cond do + is_nil(timestamp) or is_nil(provided) -> + {:error, :unauthorized, "missing Slack signature headers"} + + byte_size(provided) != byte_size(expected) -> + {:error, :unauthorized, "signature mismatch"} + + not Plug.Crypto.secure_compare(provided, expected) -> + {:error, :unauthorized, "signature mismatch"} + + true -> + :ok + end + end + end + + defp expected_signature(secret, timestamp, body) do + base = "v0:" <> to_string(timestamp) <> ":" <> body + digest = :crypto.mac(:hmac, :sha256, secret, base) |> Base.encode16(case: :lower) + "v0=" <> digest + end + + defp format_result({:enqueued, run_id}), do: %{status: "enqueued", run_id: run_id} + defp format_result({:deduped, ts}), do: %{status: "deduped", message_ts: ts} + defp format_result({:ignored, reason}), do: %{status: "ignored", reason: reason} + defp format_result({:error, reason}), do: %{status: "error", reason: reason} +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/controllers/static_asset_controller.ex b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/static_asset_controller.ex new file mode 100644 index 000000000..13939c1e7 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/controllers/static_asset_controller.ex @@ -0,0 +1,22 @@ +defmodule SymphonyElixirWeb.StaticAssetController do + @moduledoc """ + Serves the JS bundles Phoenix LiveView needs, read directly from the + dep checkout. Avoids a build pipeline for v0. + """ + + use Phoenix.Controller, formats: [] + + def phoenix(conn, _params), do: send_dep_js(conn, :phoenix, "priv/static/phoenix.js") + def phoenix_html(conn, _params), do: send_dep_js(conn, :phoenix_html, "priv/static/phoenix_html.js") + def phoenix_live_view(conn, _params), do: send_dep_js(conn, :phoenix_live_view, "priv/static/phoenix_live_view.js") + + defp send_dep_js(conn, app, relative_path) do + priv = :code.priv_dir(app) |> to_string() + full = Path.join(Path.dirname(priv), relative_path) + + conn + |> put_resp_content_type("application/javascript") + |> put_resp_header("cache-control", "public, max-age=3600") + |> send_file(200, full) + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/endpoint.ex b/packages/symphony/elixir/lib/symphony_elixir_web/endpoint.ex new file mode 100644 index 000000000..23344911f --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/endpoint.ex @@ -0,0 +1,40 @@ +defmodule SymphonyElixirWeb.Endpoint do + @moduledoc """ + Phoenix endpoint for Symphony's optional observability UI and API. + """ + + use Phoenix.Endpoint, otp_app: :symphony_elixir + + @session_options [ + store: :cookie, + key: "_symphony_elixir_key", + signing_salt: "symphony-session" + ] + + socket("/live", Phoenix.LiveView.Socket, + websocket: [connect_info: [session: @session_options]], + longpoll: false + ) + + # Runtime workers dial in here and join `worker:lobby`. `:x_headers` exposes + # the mTLS client-cert CN that the nginx boundary forwards as `x-worker-cn`. + socket("/worker", SymphonyElixirWeb.WorkerSocket, + websocket: [connect_info: [:x_headers]], + longpoll: false + ) + + plug(Plug.RequestId) + plug(Plug.Telemetry, event_prefix: [:phoenix, :endpoint]) + + plug(Plug.Parsers, + parsers: [:urlencoded, :multipart, :json], + pass: ["*/*"], + json_decoder: Jason, + body_reader: {SymphonyElixirWeb.RawBodyReader, :read_body, []} + ) + + plug(Plug.MethodOverride) + plug(Plug.Head) + plug(Plug.Session, @session_options) + plug(SymphonyElixirWeb.Router) +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/error_html.ex b/packages/symphony/elixir/lib/symphony_elixir_web/error_html.ex new file mode 100644 index 000000000..5b2722a26 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/error_html.ex @@ -0,0 +1,8 @@ +defmodule SymphonyElixirWeb.ErrorHTML do + @moduledoc false + + @spec render(String.t(), map()) :: String.t() + def render(template, _assigns) do + Phoenix.Controller.status_message_from_template(template) + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/error_json.ex b/packages/symphony/elixir/lib/symphony_elixir_web/error_json.ex new file mode 100644 index 000000000..5babea4c2 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/error_json.ex @@ -0,0 +1,8 @@ +defmodule SymphonyElixirWeb.ErrorJSON do + @moduledoc false + + @spec render(String.t(), map()) :: map() + def render(template, _assigns) do + %{error: %{code: "request_failed", message: Phoenix.Controller.status_message_from_template(template)}} + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/live/ir_runs_live.ex b/packages/symphony/elixir/lib/symphony_elixir_web/live/ir_runs_live.ex new file mode 100644 index 000000000..3b50669bc --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/live/ir_runs_live.ex @@ -0,0 +1,513 @@ +defmodule SymphonyElixirWeb.IRRunsLive do + @moduledoc """ + Dashboard LiveView over IR runs (the `RunGraph` model), the live-page + counterpart to the read-only `IRRunController` JSON API. + + Two actions: + + - `:index` - every IR run as a table, plus a "run" control that starts a + workflow from the `WorkflowCatalog` by name. + - `:show` - one run in detail with per-node state pills, mirroring the + `node-grid` layout the legacy `RunsLive` uses. + + Live updates ride `Runtime.Events`: `Runtime` broadcasts an + `{:ir_run_event, run_id, summary}` on every persisted transition. This + LiveView subscribes to the index topic on mount and to the open run's + topic when navigating to `:show`, so pills move from running to succeeded + with no polling. The data shapes come straight from `IR.View` so the page + renders the same facts the JSON API serves. + """ + + use Phoenix.LiveView + + alias SymphonyElixir.IR.{Store, View} + alias SymphonyElixir.Runtime.Events + alias SymphonyElixir.{Runtime, WorkflowCatalog} + + # The runs table paginates at this many rows per page, navigated via the + # `?page=N` query param. The full sorted list still loads on every render + # (the store scan is cheap) so the total count and "latest first" order + # stay exact; only the rendered slice is bounded. + @per_page 50 + + @impl true + def mount(_params, _session, socket) do + # The index topic carries every run's transitions, so a subscriber on + # the connected mount can refresh the table from the event payload. The + # first (static) render runs disconnected; skip the subscribe there. + if connected?(socket), do: Events.subscribe_index() + + {:ok, + socket + |> assign(runs: load_runs()) + |> assign(workflows: load_workflows()) + |> assign(workflow_errors: load_workflow_errors()) + |> assign(subscribed_run: nil) + |> assign(page: 1) + |> assign(path: "/") + |> assign(form_error: nil)} + end + + @impl true + def handle_params(%{"run_id" => run_id}, _uri, socket) do + socket = resubscribe_run(socket, run_id) + + detail = + case Store.load(run_id) do + {:ok, graph} -> View.detail(graph) + {:error, _} -> nil + end + + {:noreply, assign(socket, live_action: :show, run_id: run_id, detail: detail)} + end + + def handle_params(params, uri, socket) do + socket = resubscribe_run(socket, nil) + + {:noreply, + assign(socket, + live_action: :index, + page: parse_page(params["page"]), + path: URI.parse(uri).path, + runs: load_runs(), + workflows: load_workflows(), + workflow_errors: load_workflow_errors() + )} + end + + @impl true + def handle_info({:ir_run_event, run_id, _summary}, %{assigns: %{live_action: :show, run_id: run_id}} = socket) do + # A transition on the open run: re-read the store for the full detail + # view (the event payload is only the summary, and the node grid needs + # per-node state). A read miss leaves the last-good detail in place. + detail = + case Store.load(run_id) do + {:ok, graph} -> View.detail(graph) + {:error, _} -> socket.assigns[:detail] + end + + {:noreply, assign(socket, detail: detail)} + end + + def handle_info({:ir_run_event, _run_id, _summary}, %{assigns: %{live_action: :index}} = socket) do + # Any run transitioned: refresh the index table. Re-reading the store + # rather than splicing the one summary keeps sort order and the + # appearance of a brand-new run consistent without per-row bookkeeping. + # Re-read the catalog's parse errors on the same beat so the broken- + # workflow panel reflects a hot-reload that landed between navigations. + {:noreply, assign(socket, runs: load_runs(), workflow_errors: load_workflow_errors())} + end + + def handle_info({:ir_run_event, _run_id, _summary}, socket), do: {:noreply, socket} + + @impl true + def handle_event("run", %{"workflow" => name}, socket) when is_binary(name) and name != "" do + case Runtime.Ingress.start_by_name(name, %{kind: :manual, input: %{}}, []) do + {:ok, %{run_id: run_id}} -> + {:noreply, push_navigate(socket, to: "/ir/" <> run_id)} + + {:error, reason} -> + {:noreply, assign(socket, form_error: "could not start #{name}: #{inspect(reason)}")} + end + end + + def handle_event("run", _params, socket) do + {:noreply, assign(socket, form_error: "pick a workflow to run")} + end + + def handle_event("cancel", _params, %{assigns: %{run_id: id}} = socket) do + try do + _ = Runtime.cancel(id, "dashboard") + catch + :exit, _ -> :ok + end + + {:noreply, assign(socket, detail: reload_detail(id))} + end + + def handle_event("retry_failed", _params, %{assigns: %{run_id: id}} = socket) do + try do + _ = Runtime.clear_failed(id, "dashboard") + catch + :exit, _ -> :ok + end + + {:noreply, assign(socket, detail: reload_detail(id))} + end + + def handle_event("rerun", _params, %{assigns: %{run_id: id}} = socket) do + try do + _ = Runtime.rerun(id, "dashboard") + catch + :exit, _ -> :ok + end + + {:noreply, assign(socket, detail: reload_detail(id))} + end + + @impl true + def render(%{live_action: :show} = assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_show(assigns), active_tab: :ir})} + """ + end + + def render(assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_index(assigns), active_tab: :ir})} + """ + end + + defp render_index(assigns) do + # Bound the rendered slice to one page. `page` is clamped against the + # live total so a stale `?page=N` (or a run count that shrank under the + # current page) falls back to the last real page instead of an empty + # table. + total = length(assigns.runs) + total_pages = max(1, div(total + @per_page - 1, @per_page)) + page = assigns.page |> max(1) |> min(total_pages) + page_runs = assigns.runs |> Enum.drop((page - 1) * @per_page) |> Enum.take(@per_page) + + assigns = + assigns + |> assign(:page, page) + |> assign(:total_pages, total_pages) + |> assign(:total_runs, total) + |> assign(:page_runs, page_runs) + |> assign(:per_page, @per_page) + + ~H""" +
+ +
+ +
+
start a run
+ <%= if @workflows == [] do %> +
+ no .sym workflows loaded. drop a file under + workflows/ and the catalog will pick it up within a second. +
+ <% else %> +
+
+ + +
+ <%= if @form_error do %> +
{@form_error}
+ <% end %> +
+ +
+
+ <% end %> +
+ + <%= if @runs == [] do %> +
no IR runs yet. start one with the button above.
+ <% else %> +
+
+
{run_count_label(@runs)}
+
+ + + + + + + + + + + + <%= for run <- @page_runs do %> + + + + + + + + <% end %> + +
runstatusnodescostupdated
run["run_id"]}>{run["run_id"]} run["status"]}>{run["status"]}{node_counts(run)}{cost_label(run["cost_usd"])}{relative_time(run["updated_at"])}
+ <%= if @total_pages > 1 do %> +
+
+ showing {(@page - 1) * @per_page + 1}-{min(@page * @per_page, @total_runs)} of {@total_runs} +
+
+ <.link + class={if @page <= 1, do: "disabled", else: ""} + patch={page_path(@path, @page - 1)} + >prev + {@page} / {@total_pages} + <.link + class={if @page >= @total_pages, do: "disabled", else: ""} + patch={page_path(@path, @page + 1)} + >next +
+
+ <% end %> +
+ <% end %> + + <%= if @workflow_errors != [] do %> +
+
+
broken workflows
+
+
+ these .sym files failed to parse. the last + working version of each stays loaded; fix the location below and the + catalog reloads it within a second. +
+
+ <%= for err <- @workflow_errors do %> +
+
{error_location(err)}
+
parse error
+
{err.message}
+
+
+ <% end %> +
+
+ <% end %> + """ + end + + defp render_show(assigns) do + case assigns.detail do + nil -> + ~H""" +
run not found. back to IR runs
+ """ + + _detail -> + ~H""" +
+
+
run
+ @detail["status"]}>{@detail["status"]} +
+
+
+
run id
{@detail["run_id"]}
+
trigger
{@detail["trigger"]}
+
placement
{placement_label(@detail["placement"])}
+
nodes
{detail_node_counts(@detail)}
+
cost
{cost_label(@detail["cost_usd"])}
+
started
{@detail["created_at"] || "-"}
+
+
+
+ +
+
+
graph
+
+
+ +
+
+ +
+
+
nodes
+
+ <%= if @detail["status"] in ["pending", "running"] do %> + + <% end %> + <%= if @detail["status"] in ["failed"] do %> + + + <% end %> +
+
+
+ <%= for node <- @detail["nodes"] do %> +
+
{node["id"]}
+
+ node["state"]}>{node["state"]} +
+
{node["kind"]}{engine_label(node["envelope"])}
+
{node_cost(node)}
+
+ <% end %> +
+
+ + + """ + end + end + + # Keep at most one per-run subscription alive as the operator navigates + # between detail pages. Switching from one run to another drops the old + # topic so the LiveView is not woken by transitions on a run it no longer + # shows; the index topic (subscribed once at mount) is left untouched. + defp resubscribe_run(socket, run_id) do + if connected?(socket) do + current = socket.assigns[:subscribed_run] + + if current != run_id do + if is_binary(current), do: Phoenix.PubSub.unsubscribe(SymphonyElixir.PubSub, Events.run_topic(current)) + if is_binary(run_id), do: Events.subscribe_run(run_id) + assign(socket, subscribed_run: run_id) + else + socket + end + else + socket + end + end + + defp reload_detail(run_id) do + case Store.load(run_id) do + {:ok, graph} -> View.detail(graph) + {:error, _} -> nil + end + end + + defp load_runs do + # Latest first: the most recently updated run leads the table, matching + # the "updated" column. `sort_by/3` with `:desc` puts newest at the top; + # the run_id is a stable tiebreaker for runs that share a timestamp. + Store.load_all() + |> Enum.map(&View.summary/1) + |> Enum.sort_by(&{&1["updated_at"], &1["run_id"]}, :desc) + end + + # `?page=N` is operator-supplied, so anything that is not a positive + # integer (absent, empty, negative, garbage) falls back to the first page. + # The upper bound is clamped against the live total in render_index. + defp parse_page(raw) when is_binary(raw) do + case Integer.parse(raw) do + {n, _} when n > 0 -> n + _ -> 1 + end + end + + defp parse_page(_), do: 1 + + # Keep page 1 on the bare path so the canonical first-page URL has no query + # string; later pages carry `?page=N` on whichever index path is active + # (`/` or `/ir`). + defp page_path(path, page) when page <= 1, do: path + defp page_path(path, page), do: path <> "?page=" <> Integer.to_string(page) + + defp load_workflows do + WorkflowCatalog.workflows() |> Enum.sort_by(& &1.name) + end + + defp load_workflow_errors do + WorkflowCatalog.errors() |> Enum.sort_by(& &1.name) + end + + # `file:line:column`, the shape an editor jumps to from a build log. The + # diagnostic always carries a file basename, so the location is enough to + # find the offending token without a byte offset. + defp error_location(%{file: file, line: line, column: column}) do + "#{file}:#{line}:#{column}" + end + + defp run_count_label(runs) do + count = length(runs) + if count == 1, do: "1 run", else: "#{count} runs" + end + + defp node_counts(%{"states" => states}) when is_map(states) do + total = states |> Map.values() |> Enum.sum() + done = Map.get(states, "succeeded", 0) + "#{done}/#{total}" + end + + defp node_counts(_), do: "0/0" + + # Richer node-count summary for the run detail header: each non-zero state + # is shown so the operator can see "1 succeeded - 2 running - 1 pending" + # at a glance without scrolling to the node grid. + defp detail_node_counts(%{"states" => states}) when is_map(states) do + order = ["running", "pending", "succeeded", "failed", "skipped", "upstream_failed", "stranded", "cancelled"] + + parts = + for state <- order, count = Map.get(states, state, 0), count > 0 do + "#{count} #{state}" + end + + case parts do + [] -> "0 nodes" + _ -> Enum.join(parts, " - ") + end + end + + defp detail_node_counts(_), do: "0 nodes" + + # Render a placement map as a human-readable label. When declared and + # effective differ (a fallback occurred), both are shown so the operator + # can see exactly what happened. Nil placement means placement was not + # recorded (e.g. a local-only run or a run predating the placement stamp). + defp placement_label(nil), do: "-" + + defp placement_label(%{"declared" => declared, "effective" => effective}) + when declared == effective or is_nil(effective) do + declared || "-" + end + + defp placement_label(%{"declared" => declared, "effective" => effective}) do + "#{declared} (fallback #{effective})" + end + + defp placement_label(_), do: "-" + + defp cost_label(nil), do: "-" + defp cost_label(usd) when is_number(usd), do: "$" <> :erlang.float_to_binary(usd / 1, decimals: 4) + + defp node_cost(%{"attempts" => attempts}) when is_list(attempts) do + usd = + for %{"cost" => %{"usd" => usd}} <- attempts, is_number(usd), reduce: nil do + acc -> (acc || 0) + usd + end + + cost_label(usd) + end + + defp node_cost(_), do: "-" + + defp engine_label(%{"engine" => engine}) when is_binary(engine), do: " - " <> engine + defp engine_label(_), do: "" + + # Delegate to the shared formatter in View so the form dropdown and the + # summary card always show the same label for a given trigger. + defp trigger_label(trigger), do: View.trigger_label(trigger) + + defp relative_time(nil), do: "" + + defp relative_time(iso) when is_binary(iso) do + case DateTime.from_iso8601(iso) do + {:ok, dt, _} -> + s = DateTime.diff(DateTime.utc_now(), dt, :second) + + cond do + s < 60 -> "#{s}s ago" + s < 3600 -> "#{div(s, 60)}m ago" + true -> "#{div(s, 3600)}h ago" + end + + _ -> + iso + end + end +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/live/skills_live.ex b/packages/symphony/elixir/lib/symphony_elixir_web/live/skills_live.ex new file mode 100644 index 000000000..f5201c204 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/live/skills_live.ex @@ -0,0 +1,120 @@ +defmodule SymphonyElixirWeb.SkillsLive do + @moduledoc """ + Skill catalog view. + + - `:index` lists every skill the catalog has loaded with its codex + envelope (model, sandbox, approval policy, tools). + - `:show` renders the full system-prompt body for one skill so the + operator can read what the agent is being told without leaving the + dashboard. + + Reads through `Catalog`; hot-reloads when `skills/*.md` changes on + disk because Catalog re-emits the skill list on its 1s tick. + """ + + use Phoenix.LiveView + + alias SymphonyElixir.Catalog + + @impl true + def mount(_params, _session, socket) do + {:ok, assign(socket, skills: Catalog.skills())} + end + + @impl true + def handle_params(%{"name" => name}, _uri, socket) do + skill = Enum.find(socket.assigns.skills, fn s -> s.name == name end) + {:noreply, assign(socket, live_action: :show, skill: skill, skill_name: name)} + end + + def handle_params(_params, _uri, socket) do + {:noreply, assign(socket, live_action: :index)} + end + + @impl true + def render(%{live_action: :show} = assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_show(assigns), active_tab: :skills})} + """ + end + + def render(assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_index(assigns), active_tab: :skills})} + """ + end + + defp render_index(assigns) do + ~H""" + <%= if @skills == [] do %> +
+ no skills loaded. add a file under skills/ and the catalog will pick it up within a second. +
+ <% else %> +
+ <%= for skill <- @skills do %> +
+ +
{model_summary(skill)}
+
{skill.sandbox}
+
{tool_summary(skill.tools)}
+
+ <% end %> +
+ <% end %> + """ + end + + defp render_show(assigns) do + case assigns.skill do + nil -> + ~H""" +
+ no skill named {@skill_name}. back to skills +
+ """ + + _skill -> + ~H""" +
+
+
{@skill.name}
+
{Path.relative_to_cwd(@skill.path)}
+
+
+
codex model
+
{@skill.codex_model}
+
reasoning effort
+
{effort_label(@skill.reasoning_effort)}
+
sandbox
+
{@skill.sandbox}
+
approval policy
+
{@skill.approval_policy}
+
tools
+
{tool_summary(@skill.tools)}
+
+
+ +
+
+
prompt body
+
+
{SymphonyElixirWeb.Markdown.to_html(@skill.body)}
+
+ + + """ + end + end + + defp tool_summary([]), do: "(no tools)" + defp tool_summary(tools), do: Enum.join(tools, ", ") + + defp model_summary(skill) do + "#{skill.codex_model} (#{effort_label(skill.reasoning_effort)})" + end + + defp effort_label(nil), do: "default" + defp effort_label(""), do: "default" + defp effort_label(effort) when is_binary(effort), do: effort +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/live/statistics_live.ex b/packages/symphony/elixir/lib/symphony_elixir_web/live/statistics_live.ex new file mode 100644 index 000000000..112a73a1f --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/live/statistics_live.ex @@ -0,0 +1,112 @@ +defmodule SymphonyElixirWeb.StatisticsLive do + @moduledoc "Statistics dashboard for playbook-created assignments." + + use Phoenix.LiveView + + alias SymphonyElixir.Statistics + + @impl true + def mount(_params, _session, socket) do + if connected?(socket) do + parent = self() + + Task.start(fn -> + send(parent, {:statistics_snapshot, Statistics.snapshot()}) + end) + end + + {:ok, assign(socket, loading?: true, snapshot: nil)} + end + + @impl true + def handle_info({:statistics_snapshot, snapshot}, socket) do + {:noreply, assign(socket, loading?: false, snapshot: snapshot)} + end + + @impl true + def render(assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_page(assigns), active_tab: :statistics})} + """ + end + + defp render_page(%{loading?: true} = assigns) do + ~H""" +
loading statistics...
+ """ + end + + defp render_page(assigns) do + ~H""" +
+ <.chart + title="GitHub review requests" + stat={@snapshot.github} + empty="no requested reviewers found" + /> + <.chart + title="Linear assignees from PR tickets" + stat={@snapshot.linear} + empty="no ticket assignees found" + /> +
+ """ + end + + defp chart(assigns) do + assigns = + assigns + |> assign(:max_count, max_count(assigns.stat.items)) + |> assign(:error, format_error(assigns.stat.error)) + + ~H""" +
+
+
+
{@title}
+
{@stat.total} refs scanned
+
+
+ + <%= cond do %> + <% @error -> %> +
{@error}
+ <% @stat.items == [] -> %> +
{@empty}
+ <% true -> %> +
+ <%= for person <- @stat.items do %> +
+
+ + {person.label} +
+ +
{person.count}
+
+ <% end %> +
+ <% end %> +
+ """ + end + + defp max_count([]), do: 1 + defp max_count(items), do: items |> Enum.map(& &1.count) |> Enum.max() + + defp bar_width(count, max_count) when max_count > 0 do + Integer.to_string(round(count / max_count * 100)) <> "%" + end + + defp fallback_avatar(label) do + "https://github.com/identicons/" <> URI.encode(label) <> ".png" + end + + defp format_error(nil), do: nil + defp format_error(:missing_github_token), do: "GITHUB_TOKEN is not configured." + defp format_error(:github_prs_unavailable), do: "GitHub PR statistics are not available." + defp format_error(:missing_linear_api_token), do: "LINEAR_API_KEY is not configured." + defp format_error(reason), do: "unable to load statistics: " <> inspect(reason) +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/live/workflows_live.ex b/packages/symphony/elixir/lib/symphony_elixir_web/live/workflows_live.ex new file mode 100644 index 000000000..9c22946d3 --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/live/workflows_live.ex @@ -0,0 +1,173 @@ +defmodule SymphonyElixirWeb.WorkflowsLive do + @moduledoc """ + Workflow catalog view. + + - `:index` lists every workflow the catalog has loaded with its name and + trigger label, plus a panel for broken `.sym` files showing their located + parse diagnostics. + - `:show` materializes one workflow's AST into a static IR graph and renders + it with the `IRGraph` component, so an operator can inspect the DAG shape + without starting a run. + + Reads through `WorkflowCatalog`; hot-reloads on catalog ticks the same way + `IRRunsLive` re-reads errors on index transitions. + """ + + use Phoenix.LiveView + + alias SymphonyElixir.IR.{Materializer, View} + alias SymphonyElixir.WorkflowCatalog + + @impl true + def mount(_params, _session, socket) do + {:ok, + socket + |> assign(workflows: load_workflows()) + |> assign(workflow_errors: load_workflow_errors())} + end + + @impl true + def handle_params(%{"name" => name}, _uri, socket) do + {:noreply, assign(socket, live_action: :show, workflow_name: name)} + end + + def handle_params(_params, _uri, socket) do + {:noreply, + assign(socket, + live_action: :index, + workflows: load_workflows(), + workflow_errors: load_workflow_errors() + )} + end + + @impl true + def render(%{live_action: :show} = assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_show(assigns), active_tab: :workflows})} + """ + end + + def render(assigns) do + ~H""" + {SymphonyElixirWeb.Layouts.app(%{inner_content: render_index(assigns), active_tab: :workflows})} + """ + end + + defp render_index(assigns) do + ~H""" + <%= if @workflows == [] and @workflow_errors == [] do %> +
+ no workflows loaded. add a .sym file under + workflows/ and the catalog will pick it up within a second. +
+ <% else %> + <%= if @workflow_errors != [] do %> +
+
+
broken workflows
+
+
+ these .sym files failed to parse. the last + working version of each stays loaded; fix the location below and the + catalog reloads it within a second. +
+
+ <%= for err <- @workflow_errors do %> +
+
{error_location(err)}
+
parse error
+
{err.message}
+
+
+ <% end %> +
+
+ <% end %> + + <%= if @workflows != [] do %> +
+ <%= for wf <- @workflows do %> +
+ +
{trigger_label(wf.trigger)}
+
+
+
+ <% end %> +
+ <% end %> + <% end %> + """ + end + + defp render_show(assigns) do + case Enum.find(assigns.workflows, &(&1.name == assigns.workflow_name)) do + nil -> + ~H""" +
+ no workflow named {@workflow_name}. back to workflows +
+ """ + + entry -> + assigns = + assigns + |> assign(:graph_result, preview_graph(entry)) + |> assign(:workflow_trigger, trigger_label(entry.trigger)) + + ~H""" +
+
+
{@workflow_name}
+
{@workflow_trigger}
+
+
+ +
+
+
graph
+
+
+ <%= case @graph_result do %> + <% {:ok, detail} -> %> + + <% {:error, reason} -> %> +
cannot preview: {inspect(reason)}
+ <% end %> +
+
+ + + """ + end + end + + defp preview_graph(entry) do + case Materializer.materialize("preview-#{entry.name}", entry.hash, entry.ast) do + {:ok, graph} -> {:ok, View.detail(graph)} + {:error, reason} -> {:error, reason} + end + end + + defp load_workflows do + WorkflowCatalog.workflows() |> Enum.sort_by(& &1.name) + end + + defp load_workflow_errors do + WorkflowCatalog.errors() |> Enum.sort_by(& &1.name) + end + + # `file:line:column`, the shape an editor jumps to from a build log. + defp error_location(%{file: file, line: line, column: column}) do + "#{file}:#{line}:#{column}" + end + + # Delegate to the shared formatter in View so the form dropdown and the + # workflows index always show the same label for a given trigger. + defp trigger_label(trigger), do: View.trigger_label(trigger) +end diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/markdown.ex b/packages/symphony/elixir/lib/symphony_elixir_web/markdown.ex new file mode 100644 index 000000000..68a65249e --- /dev/null +++ b/packages/symphony/elixir/lib/symphony_elixir_web/markdown.ex @@ -0,0 +1,43 @@ +defmodule SymphonyElixirWeb.Markdown do + @moduledoc """ + Render markdown source to sanitized, dashboard-safe HTML. + + Skill bodies and codex message/reasoning text are authored as + markdown; the dashboard used to print them verbatim in a `
`, so
+  headings, lists, fenced code, and emphasis showed as raw syntax. This
+  lowers that source to HTML once at render time.
+
+  Earmark defaults to `escape: true`, so raw HTML in the source is
+  neutralized; the output is still run through
+  `HtmlSanitizeEx.markdown_html/1` because the dashboard is served
+  read-only on a public host and the codex transcript text is
+  agent-authored.
+  """
+
+  @doc """
+  Lower a markdown string to a `{:safe, iodata}` tuple HEEx renders
+  without re-escaping. `nil` and blank input render as empty so callers
+  can pipe straight from optional fields.
+  """
+  # The raw/1 call below is the point of this module; sobelow reports it
+  # as XSS.Raw (Low Confidence). The HTML it wraps is Earmark output
+  # (escape: true) passed through HtmlSanitizeEx.markdown_html/1 first, so
+  # the sink is sanitized. sobelow runs reporting-only per .sobelow-conf,
+  # so this stays a documented, expected finding rather than a skip
+  # annotation.
+  @spec to_html(String.t() | nil) :: Phoenix.HTML.safe()
+  def to_html(nil), do: Phoenix.HTML.raw("")
+
+  def to_html(source) when is_binary(source) do
+    case String.trim(source) do
+      "" ->
+        Phoenix.HTML.raw("")
+
+      _ ->
+        source
+        |> Earmark.as_html!(compact_output: true)
+        |> HtmlSanitizeEx.markdown_html()
+        |> Phoenix.HTML.raw()
+    end
+  end
+end
diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/raw_body_reader.ex b/packages/symphony/elixir/lib/symphony_elixir_web/raw_body_reader.ex
new file mode 100644
index 000000000..86c860b24
--- /dev/null
+++ b/packages/symphony/elixir/lib/symphony_elixir_web/raw_body_reader.ex
@@ -0,0 +1,47 @@
+defmodule SymphonyElixirWeb.RawBodyReader do
+  @moduledoc """
+  A `Plug.Parsers` body reader that retains the raw request body in
+  `conn.assigns.raw_body`.
+
+  Plug.Parsers consumes the request body to decode JSON, which means
+  controllers can't recompute an HMAC over the bytes the caller signed.
+  Inserting this reader keeps the raw body around so the Linear
+  webhook controller can verify `Linear-Signature` before trusting any
+  parsed field.
+
+  Only enabled for paths under `/api/v1/triggers/`; other routes pay no
+  cost.
+  """
+
+  @retain_prefix "/api/v1/triggers/"
+
+  @spec read_body(Plug.Conn.t(), keyword()) :: {:ok, binary(), Plug.Conn.t()} | {:more, binary(), Plug.Conn.t()} | {:error, term()}
+  def read_body(conn, opts) do
+    case Plug.Conn.read_body(conn, opts) do
+      {:ok, body, conn} ->
+        {:ok, body, maybe_retain(conn, body)}
+
+      {:more, body, conn} ->
+        {:more, body, maybe_retain(conn, body, append: true)}
+
+      {:error, _} = err ->
+        err
+    end
+  end
+
+  defp maybe_retain(conn, body, opts \\ []) do
+    if String.starts_with?(conn.request_path, @retain_prefix) do
+      Plug.Conn.assign(conn, :raw_body, retained(conn, body, opts))
+    else
+      conn
+    end
+  end
+
+  defp retained(conn, body, opts) do
+    if Keyword.get(opts, :append, false) do
+      (conn.assigns[:raw_body] || "") <> body
+    else
+      body
+    end
+  end
+end
diff --git a/packages/symphony/elixir/lib/symphony_elixir_web/router.ex b/packages/symphony/elixir/lib/symphony_elixir_web/router.ex
new file mode 100644
index 000000000..f856cef60
--- /dev/null
+++ b/packages/symphony/elixir/lib/symphony_elixir_web/router.ex
@@ -0,0 +1,62 @@
+defmodule SymphonyElixirWeb.Router do
+  @moduledoc "Routes for the runs dashboard and the JSON API."
+
+  use Phoenix.Router
+  import Phoenix.LiveView.Router
+
+  pipeline :browser do
+    plug(:fetch_session)
+    plug(:fetch_live_flash)
+    plug(:put_root_layout, html: {SymphonyElixirWeb.Layouts, :root})
+    plug(:protect_from_forgery)
+    plug(:put_secure_browser_headers)
+  end
+
+  pipeline :api do
+    plug(:accepts, ["json"])
+  end
+
+  scope "/", SymphonyElixirWeb do
+    get("/vendor/phoenix/phoenix.js", StaticAssetController, :phoenix)
+    get("/vendor/phoenix_html/phoenix_html.js", StaticAssetController, :phoenix_html)
+    get("/vendor/phoenix_live_view/phoenix_live_view.js", StaticAssetController, :phoenix_live_view)
+  end
+
+  scope "/", SymphonyElixirWeb do
+    pipe_through(:browser)
+
+    # The IR runs view is the default dashboard. It carries the
+    # schema-driven run control, so there is no separate enqueue form.
+    live("/", IRRunsLive, :index)
+    live("/ir", IRRunsLive, :index)
+    live("/ir/:run_id", IRRunsLive, :show)
+
+    live("/workflows", WorkflowsLive, :index)
+    live("/workflows/:name", WorkflowsLive, :show)
+
+    live("/skills", SkillsLive, :index)
+    live("/skills/:name", SkillsLive, :show)
+    live("/statistics", StatisticsLive, :index)
+  end
+
+  scope "/api/v1", SymphonyElixirWeb do
+    pipe_through(:api)
+
+    # The manual-trigger producer onto the IR runtime.
+    post("/runs", ApiController, :enqueue_run)
+
+    # IR runs (the RunGraph model).
+    get("/ir/schema", IRRunController, :schema)
+    get("/ir/runs", IRRunController, :index)
+    post("/ir/runs", IRRunController, :create)
+    get("/ir/runs/:run_id", IRRunController, :show)
+    post("/ir/runs/:run_id/cancel", IRRunController, :cancel)
+    post("/ir/runs/:run_id/rerun", IRRunController, :rerun)
+    post("/ir/runs/:run_id/clear-failed", IRRunController, :clear_failed)
+    post("/ir/runs/:run_id/nodes/:node_id/retry", IRRunController, :retry_node)
+
+    post("/triggers/linear", LinearWebhookController, :accept)
+    post("/triggers/github", GithubWebhookController, :accept)
+    post("/triggers/slack/events", SlackEventsController, :accept)
+  end
+end
diff --git a/packages/symphony/elixir/mise.toml b/packages/symphony/elixir/mise.toml
new file mode 100644
index 000000000..439bbb261
--- /dev/null
+++ b/packages/symphony/elixir/mise.toml
@@ -0,0 +1,3 @@
+[tools]
+erlang = "28"
+elixir = "1.19.5-otp-28"
diff --git a/packages/symphony/elixir/mix.exs b/packages/symphony/elixir/mix.exs
new file mode 100644
index 000000000..c505267ea
--- /dev/null
+++ b/packages/symphony/elixir/mix.exs
@@ -0,0 +1,88 @@
+defmodule SymphonyElixir.MixProject do
+  use Mix.Project
+
+  def project do
+    [
+      app: :symphony_elixir,
+      version: "0.2.0",
+      elixir: "~> 1.19",
+      compilers: [:phoenix_live_view] ++ Mix.compilers(),
+      start_permanent: Mix.env() == :prod,
+      deps: deps(),
+      aliases: aliases(),
+      test_coverage: [tool: ExCoveralls],
+      dialyzer: [
+        plt_add_apps: [:mix, :ex_unit],
+        plt_core_path: "priv/plts",
+        plt_local_path: "priv/plts"
+      ]
+    ]
+  end
+
+  def cli do
+    [
+      preferred_envs: [
+        coveralls: :test,
+        "coveralls.detail": :test,
+        "coveralls.post": :test,
+        "coveralls.html": :test,
+        "coveralls.json": :test
+      ]
+    ]
+  end
+
+  def application do
+    [
+      mod: {SymphonyElixir.Application, []},
+      extra_applications: [:logger]
+    ]
+  end
+
+  defp deps do
+    [
+      {:bandit, "~> 1.8"},
+      {:phoenix, "~> 1.8.0"},
+      {:phoenix_html, "~> 4.2"},
+      {:phoenix_live_view, "~> 1.1.0"},
+      {:req, "~> 0.5"},
+      {:jason, "~> 1.4"},
+      {:yaml_elixir, "~> 2.12"},
+      # Pure-BEAM markdown render + sanitize for the dashboard. Both are
+      # NIF-free (earmark is pure Elixir, html_sanitize_ex rides on the
+      # pure-Erlang mochiweb), so the runtime mix build stays portable on
+      # NixOS where precompiled dynamically-linked NIFs break.
+      {:earmark, "~> 1.4"},
+      {:html_sanitize_ex, "~> 1.5"},
+      # Phoenix channel client: a runtime worker dials the control plane's
+      # /worker socket and serves provision/teardown over it.
+      {:slipstream, "~> 1.1"},
+      {:lazy_html, ">= 0.1.0", only: :test},
+      {:credo, "~> 1.7", only: [:dev, :test], runtime: false},
+      {:dialyxir, "~> 1.4", only: [:dev, :test], runtime: false},
+      {:sobelow, "~> 0.13", only: [:dev, :test], runtime: false},
+      {:mix_audit, "~> 2.1", only: [:dev, :test], runtime: false},
+      {:excoveralls, "~> 0.18", only: :test}
+    ]
+  end
+
+  defp aliases do
+    [
+      setup: ["deps.get", "compile --warnings-as-errors"],
+      build: ["compile --warnings-as-errors"],
+      lint: ["credo"],
+      quality: [
+        "format --check-formatted",
+        # Non-strict so credo respects the :low priority the checks carry in
+        # .credo.exs; --strict surfaces and fails on those informational
+        # refactor/readability suggestions, defeating that config.
+        "credo",
+        "sobelow --config",
+        # decimal 2.x is pinned by ecto and solid (both require ~> 2.0), so the
+        # only patched release (3.0.0) is unreachable until they move upstream.
+        # https://github.com/advisories/GHSA-rhv4-8758-jx7v
+        "deps.audit --ignore-advisory-ids GHSA-rhv4-8758-jx7v",
+        "dialyzer"
+      ]
+    ]
+  end
+end
diff --git a/packages/symphony/elixir/mix.lock b/packages/symphony/elixir/mix.lock
new file mode 100644
index 000000000..a8a6d06e6
--- /dev/null
+++ b/packages/symphony/elixir/mix.lock
@@ -0,0 +1,47 @@
+%{
+  "bandit": {:hex, :bandit, "1.11.1", "1eb33123cc3c17ae0c3447874eb83399ee530f960c39711ed240342fbd4865fa", [:mix], [{:hpax, "~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}, {:plug, "~> 1.18", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:thousand_island, "~> 1.0", [hex: :thousand_island, repo: "hexpm", optional: false]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "d4401016df9abbc6dcd325c0b78b2b193e7c7c96bb68f31e576112be025d84a5"},
+  "bunt": {:hex, :bunt, "1.0.0", "081c2c665f086849e6d57900292b3a161727ab40431219529f13c4ddcf3e7a44", [:mix], [], "hexpm", "dc5f86aa08a5f6fa6b8096f0735c4e76d54ae5c9fa2c143e5a1fc7c1cd9bb6b5"},
+  "cc_precompiler": {:hex, :cc_precompiler, "0.1.11", "8c844d0b9fb98a3edea067f94f616b3f6b29b959b6b3bf25fee94ffe34364768", [:mix], [{:elixir_make, "~> 0.7", [hex: :elixir_make, repo: "hexpm", optional: false]}], "hexpm", "3427232caf0835f94680e5bcf082408a70b48ad68a5f5c0b02a3bea9f3a075b9"},
+  "credo": {:hex, :credo, "1.7.16", "a9f1389d13d19c631cb123c77a813dbf16449a2aebf602f590defa08953309d4", [:mix], [{:bunt, "~> 0.2.1 or ~> 1.0", [hex: :bunt, repo: "hexpm", optional: false]}, {:file_system, "~> 0.2 or ~> 1.0", [hex: :file_system, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "d0562af33756b21f248f066a9119e3890722031b6d199f22e3cf95550e4f1579"},
+  "date_time_parser": {:hex, :date_time_parser, "1.3.0", "6ba16850b5ab83dd126576451023ab65349e29af2336ca5084aa1e37025b476e", [:mix], [{:kday, "~> 1.0", [hex: :kday, repo: "hexpm", optional: false]}], "hexpm", "93c8203a8ddc66b1f1531fc0e046329bf0b250c75ffa09567ef03d2c09218e8c"},
+  "decimal": {:hex, :decimal, "2.3.0", "3ad6255aa77b4a3c4f818171b12d237500e63525c2fd056699967a3e7ea20f62", [:mix], [], "hexpm", "a4d66355cb29cb47c3cf30e71329e58361cfcb37c34235ef3bf1d7bf3773aeac"},
+  "dialyxir": {:hex, :dialyxir, "1.4.7", "dda948fcee52962e4b6c5b4b16b2d8fa7d50d8645bbae8b8685c3f9ecb7f5f4d", [:mix], [{:erlex, ">= 0.2.8", [hex: :erlex, repo: "hexpm", optional: false]}], "hexpm", "b34527202e6eb8cee198efec110996c25c5898f43a4094df157f8d28f27d9efe"},
+  "earmark": {:hex, :earmark, "1.4.48", "5f41e579d85ef812351211842b6e005f6e0cef111216dea7d4b9d58af4608434", [:mix], [], "hexpm", "a461a0ddfdc5432381c876af1c86c411fd78a25790c75023c7a4c035fdc858f9"},
+  "ecto": {:hex, :ecto, "3.13.5", "9d4a69700183f33bf97208294768e561f5c7f1ecf417e0fa1006e4a91713a834", [:mix], [{:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "df9efebf70cf94142739ba357499661ef5dbb559ef902b68ea1f3c1fabce36de"},
+  "elixir_make": {:hex, :elixir_make, "0.9.0", "6484b3cd8c0cee58f09f05ecaf1a140a8c97670671a6a0e7ab4dc326c3109726", [:mix], [], "hexpm", "db23d4fd8b757462ad02f8aa73431a426fe6671c80b200d9710caf3d1dd0ffdb"},
+  "erlex": {:hex, :erlex, "0.2.8", "cd8116f20f3c0afe376d1e8d1f0ae2452337729f68be016ea544a72f767d9c12", [:mix], [], "hexpm", "9d66ff9fedf69e49dc3fd12831e12a8a37b76f8651dd21cd45fcf5561a8a7590"},
+  "excoveralls": {:hex, :excoveralls, "0.18.5", "e229d0a65982613332ec30f07940038fe451a2e5b29bce2a5022165f0c9b157e", [:mix], [{:castore, "~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "523fe8a15603f86d64852aab2abe8ddbd78e68579c8525ae765facc5eae01562"},
+  "file_system": {:hex, :file_system, "1.1.1", "31864f4685b0148f25bd3fbef2b1228457c0c89024ad67f7a81a3ffbc0bbad3a", [:mix], [], "hexpm", "7a15ff97dfe526aeefb090a7a9d3d03aa907e100e262a0f8f7746b78f8f87a5d"},
+  "finch": {:hex, :finch, "0.21.0", "b1c3b2d48af02d0c66d2a9ebfb5622be5c5ecd62937cf79a88a7f98d48a8290c", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:mint, "~> 1.6.2 or ~> 1.7", [hex: :mint, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.4 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:nimble_pool, "~> 1.1", [hex: :nimble_pool, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "87dc6e169794cb2570f75841a19da99cfde834249568f2a5b121b809588a4377"},
+  "fine": {:hex, :fine, "0.1.4", "b19a89c1476c7c57afb5f9314aed5960b5bc95d5277de4cb5ee8e1d1616ce379", [:mix], [], "hexpm", "be3324cc454a42d80951cf6023b9954e9ff27c6daa255483b3e8d608670303f5"},
+  "floki": {:hex, :floki, "0.38.0", "62b642386fa3f2f90713f6e231da0fa3256e41ef1089f83b6ceac7a3fd3abf33", [:mix], [], "hexpm", "a5943ee91e93fb2d635b612caf5508e36d37548e84928463ef9dd986f0d1abd9"},
+  "hpax": {:hex, :hpax, "1.0.3", "ed67ef51ad4df91e75cc6a1494f851850c0bd98ebc0be6e81b026e765ee535aa", [:mix], [], "hexpm", "8eab6e1cfa8d5918c2ce4ba43588e894af35dbd8e91e6e55c817bca5847df34a"},
+  "html_sanitize_ex": {:hex, :html_sanitize_ex, "1.5.1", "70d7a817eca4850b330361e1f85ca02422a25d6564fc43dd0915dadac55a16f8", [:mix], [{:mochiweb, "~> 2.15 or ~> 3.1", [hex: :mochiweb, repo: "hexpm", optional: false]}], "hexpm", "c32e0a7f1c479ee4f387a3468b3f27a89715a96e71ee4f0d6a7a9d5658a083ef"},
+  "jason": {:hex, :jason, "1.4.5", "2e3a008590b0b8d7388c20293e9dcc9cf3e5d642fd2a114e4cbbb52e595d940a", [:mix], [{:decimal, "~> 1.0 or ~> 2.0 or ~> 3.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm", "b0c823996102bcd0239b3c2444eb00409b72f6a140c1950bc8b457d836b30684"},
+  "kday": {:hex, :kday, "1.1.0", "64efac85279a12283eaaf3ad6f13001ca2dff943eda8c53288179775a8c057a0", [:mix], [{:ex_doc, "~> 0.21", [hex: :ex_doc, repo: "hexpm", optional: true]}], "hexpm", "69703055d63b8d5b260479266c78b0b3e66f7aecdd2022906cd9bf09892a266d"},
+  "lazy_html": {:hex, :lazy_html, "0.1.10", "ffe42a0b4e70859cf21a33e12a251e0c76c1dff76391609bd56702a0ef5bc429", [:make, :mix], [{:cc_precompiler, "~> 0.1", [hex: :cc_precompiler, repo: "hexpm", optional: false]}, {:elixir_make, "~> 0.9.0", [hex: :elixir_make, repo: "hexpm", optional: false]}, {:fine, "~> 0.1.0", [hex: :fine, repo: "hexpm", optional: false]}], "hexpm", "50f67e5faa09d45a99c1ddf3fac004f051997877dc8974c5797bb5ccd8e27058"},
+  "mime": {:hex, :mime, "2.0.7", "b8d739037be7cd402aee1ba0306edfdef982687ee7e9859bee6198c1e7e2f128", [:mix], [], "hexpm", "6171188e399ee16023ffc5b76ce445eb6d9672e2e241d2df6050f3c771e80ccd"},
+  "mint": {:hex, :mint, "1.7.1", "113fdb2b2f3b59e47c7955971854641c61f378549d73e829e1768de90fc1abf1", [:mix], [{:castore, "~> 0.1.0 or ~> 1.0", [hex: :castore, repo: "hexpm", optional: true]}, {:hpax, "~> 0.1.1 or ~> 0.2.0 or ~> 1.0", [hex: :hpax, repo: "hexpm", optional: false]}], "hexpm", "fceba0a4d0f24301ddee3024ae116df1c3f4bb7a563a731f45fdfeb9d39a231b"},
+  "mint_web_socket": {:hex, :mint_web_socket, "1.0.5", "60354efeb49b1eccf95dfb75f55b08d692e211970fe735a5eb3188b328be2a90", [:mix], [{:mint, ">= 1.4.1 and < 2.0.0-0", [hex: :mint, repo: "hexpm", optional: false]}], "hexpm", "04b35663448fc758f3356cce4d6ac067ca418bbafe6972a3805df984b5f12e61"},
+  "mix_audit": {:hex, :mix_audit, "2.1.5", "c0f77cee6b4ef9d97e37772359a187a166c7a1e0e08b50edf5bf6959dfe5a016", [:make, :mix], [{:jason, "~> 1.4", [hex: :jason, repo: "hexpm", optional: false]}, {:yaml_elixir, "~> 2.11", [hex: :yaml_elixir, repo: "hexpm", optional: false]}], "hexpm", "87f9298e21da32f697af535475860dc1d3617a010e0b418d2ec6142bc8b42d69"},
+  "mochiweb": {:hex, :mochiweb, "3.3.0", "2898ad0bfeee234e4cbae623c7052abc3ff0d73d499ba6e6ffef445b13ffd07a", [:rebar3], [], "hexpm", "aa85b777fb23e9972ebc424e40b5d35106f19bc998873e026dedd876df8ee50c"},
+  "nimble_options": {:hex, :nimble_options, "1.1.1", "e3a492d54d85fc3fd7c5baf411d9d2852922f66e69476317787a7b2bb000a61b", [:mix], [], "hexpm", "821b2470ca9442c4b6984882fe9bb0389371b8ddec4d45a9504f00a66f650b44"},
+  "nimble_pool": {:hex, :nimble_pool, "1.1.0", "bf9c29fbdcba3564a8b800d1eeb5a3c58f36e1e11d7b7fb2e084a643f645f06b", [:mix], [], "hexpm", "af2e4e6b34197db81f7aad230c1118eac993acc0dae6bc83bac0126d4ae0813a"},
+  "phoenix": {:hex, :phoenix, "1.8.7", "d8d755b4ff4b449f610223dd706b4ae64155cb720d3dc09c706c079ecea189e4", [:mix], [{:bandit, "~> 1.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:phoenix_pubsub, "~> 2.1", [hex: :phoenix_pubsub, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.7", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:plug_crypto, "~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}, {:websock_adapter, "~> 0.5.3", [hex: :websock_adapter, repo: "hexpm", optional: false]}], "hexpm", "47352f72d6ab31009ef77516b1b3a14745be97b54061fd458031b9d8294869d5"},
+  "phoenix_html": {:hex, :phoenix_html, "4.3.0", "d3577a5df4b6954cd7890c84d955c470b5310bb49647f0a114a6eeecc850f7ad", [:mix], [], "hexpm", "3eaa290a78bab0f075f791a46a981bbe769d94bc776869f4f3063a14f30497ad"},
+  "phoenix_live_view": {:hex, :phoenix_live_view, "1.1.25", "abc1bdf7f148d7f9a003f149834cc858b24290c433b10ef6d1cbb1d6e9a211ca", [:mix], [{:igniter, ">= 0.6.16 and < 1.0.0-0", [hex: :igniter, repo: "hexpm", optional: true]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:lazy_html, "~> 0.1.0", [hex: :lazy_html, repo: "hexpm", optional: true]}, {:phoenix, "~> 1.6.15 or ~> 1.7.0 or ~> 1.8.0-rc", [hex: :phoenix, repo: "hexpm", optional: false]}, {:phoenix_html, "~> 3.3 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: false]}, {:phoenix_template, "~> 1.0", [hex: :phoenix_template, repo: "hexpm", optional: false]}, {:phoenix_view, "~> 2.0", [hex: :phoenix_view, repo: "hexpm", optional: true]}, {:plug, "~> 1.15", [hex: :plug, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.2 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b8946e474799da1f874eab7e9ce107502c96ca318ed46d19f811f847df270865"},
+  "phoenix_pubsub": {:hex, :phoenix_pubsub, "2.2.0", "ff3a5616e1bed6804de7773b92cbccfc0b0f473faf1f63d7daf1206c7aeaaa6f", [:mix], [], "hexpm", "adc313a5bf7136039f63cfd9668fde73bba0765e0614cba80c06ac9460ff3e96"},
+  "phoenix_template": {:hex, :phoenix_template, "1.0.4", "e2092c132f3b5e5b2d49c96695342eb36d0ed514c5b252a77048d5969330d639", [:mix], [{:phoenix_html, "~> 2.14.2 or ~> 3.0 or ~> 4.0", [hex: :phoenix_html, repo: "hexpm", optional: true]}], "hexpm", "2c0c81f0e5c6753faf5cca2f229c9709919aba34fab866d3bc05060c9c444206"},
+  "plug": {:hex, :plug, "1.19.2", "e4950525b22c6789dfb38a3f95d47171ba159da3fc5a33be9643b43d5e8adb98", [:mix], [{:mime, "~> 1.0 or ~> 2.0", [hex: :mime, repo: "hexpm", optional: false]}, {:plug_crypto, "~> 1.1.1 or ~> 1.2 or ~> 2.0", [hex: :plug_crypto, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4.3 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "b6fce20a56af5e60fa5dfecf3f907bb98ec981be43c79a3809a499bc3d133de0"},
+  "plug_crypto": {:hex, :plug_crypto, "2.1.1", "19bda8184399cb24afa10be734f84a16ea0a2bc65054e23a62bb10f06bc89491", [:mix], [], "hexpm", "6470bce6ffe41c8bd497612ffde1a7e4af67f36a15eea5f921af71cf3e11247c"},
+  "req": {:hex, :req, "0.5.17", "0096ddd5b0ed6f576a03dde4b158a0c727215b15d2795e59e0916c6971066ede", [:mix], [{:brotli, "~> 0.3.1", [hex: :brotli, repo: "hexpm", optional: true]}, {:ezstd, "~> 1.0", [hex: :ezstd, repo: "hexpm", optional: true]}, {:finch, "~> 0.17", [hex: :finch, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}, {:mime, "~> 2.0.6 or ~> 2.1", [hex: :mime, repo: "hexpm", optional: false]}, {:nimble_csv, "~> 1.0", [hex: :nimble_csv, repo: "hexpm", optional: true]}, {:plug, "~> 1.0", [hex: :plug, repo: "hexpm", optional: true]}], "hexpm", "0b8bc6ffdfebbc07968e59d3ff96d52f2202d0536f10fef4dc11dc02a2a43e39"},
+  "slipstream": {:hex, :slipstream, "1.2.2", "6b07124ac5f62a50327aa38c84edd0284920ac8aba548e04738827838f233ed0", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: true]}, {:mint_web_socket, "~> 0.2 or ~> 1.0", [hex: :mint_web_socket, repo: "hexpm", optional: false]}, {:nimble_options, "~> 0.1 or ~> 1.0", [hex: :nimble_options, repo: "hexpm", optional: false]}, {:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "ccb873ddb21aadb37c5c7745014febe6da0aa2cef0c4e73e7d08ce11d18aacd0"},
+  "sobelow": {:hex, :sobelow, "0.14.1", "2f81e8632f15574cba2402bcddff5497b413c01e6f094bc0ab94e83c2f74db81", [:mix], [{:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm", "8fac9a2bd90fdc4b15d6fca6e1608efb7f7c600fa75800813b794ee9364c87f2"},
+  "solid": {:hex, :solid, "1.2.2", "615d3fb75e12b575d99976ca49f242b1e603f98489d30bf8634b5ab47d85e33f", [:mix], [{:date_time_parser, "~> 1.2", [hex: :date_time_parser, repo: "hexpm", optional: false]}, {:decimal, "~> 2.0", [hex: :decimal, repo: "hexpm", optional: false]}], "hexpm", "410d0af6c0cdfd9d58ed2d22158f4fb0733a49f7b59b8e3bdb26f05919ae38ae"},
+  "telemetry": {:hex, :telemetry, "1.4.2", "a0cb522801dffb1c49fe6e30561badffc7b6d0e180db1300df759faa22062855", [:rebar3], [], "hexpm", "928f6495066506077862c0d1646609eed891a4326bee3126ba54b60af61febb1"},
+  "thousand_island": {:hex, :thousand_island, "1.4.3", "2158209580f633be38d43ec4e3ce0a01079592b9657afff9080d5d8ca149a3af", [:mix], [{:telemetry, "~> 0.4 or ~> 1.0", [hex: :telemetry, repo: "hexpm", optional: false]}], "hexpm", "6e4ce09b0fd761a58594d02814d40f77daff460c48a7354a15ab353bb998ea0b"},
+  "websock": {:hex, :websock, "0.5.3", "2f69a6ebe810328555b6fe5c831a851f485e303a7c8ce6c5f675abeb20ebdadc", [:mix], [], "hexpm", "6105453d7fac22c712ad66fab1d45abdf049868f253cf719b625151460b8b453"},
+  "websock_adapter": {:hex, :websock_adapter, "0.5.9", "43dc3ba6d89ef5dec5b1d0a39698436a1e856d000d84bf31a3149862b01a287f", [:mix], [{:bandit, ">= 0.6.0", [hex: :bandit, repo: "hexpm", optional: true]}, {:plug, "~> 1.14", [hex: :plug, repo: "hexpm", optional: false]}, {:plug_cowboy, "~> 2.6", [hex: :plug_cowboy, repo: "hexpm", optional: true]}, {:websock, "~> 0.5", [hex: :websock, repo: "hexpm", optional: false]}], "hexpm", "5534d5c9adad3c18a0f58a9371220d75a803bf0b9a3d87e6fe072faaeed76a08"},
+  "yamerl": {:hex, :yamerl, "0.10.0", "4ff81fee2f1f6a46f1700c0d880b24d193ddb74bd14ef42cb0bcf46e81ef2f8e", [:rebar3], [], "hexpm", "346adb2963f1051dc837a2364e4acf6eb7d80097c0f53cbdc3046ec8ec4b4e6e"},
+  "yaml_elixir": {:hex, :yaml_elixir, "2.12.0", "30343ff5018637a64b1b7de1ed2a3ca03bc641410c1f311a4dbdc1ffbbf449c7", [:mix], [{:yamerl, "~> 0.10", [hex: :yamerl, repo: "hexpm", optional: false]}], "hexpm", "ca6bacae7bac917a7155dca0ab6149088aa7bc800c94d0fe18c5238f53b313c6"},
+}
diff --git a/packages/symphony/elixir/priv/static/dashboard.css b/packages/symphony/elixir/priv/static/dashboard.css
new file mode 100644
index 000000000..bc191c0ca
--- /dev/null
+++ b/packages/symphony/elixir/priv/static/dashboard.css
@@ -0,0 +1,463 @@
+:root {
+  color-scheme: light;
+  --page: #f7f7f8;
+  --page-soft: #fbfbfc;
+  --page-deep: #ececf1;
+  --card: rgba(255, 255, 255, 0.94);
+  --card-muted: #f3f4f6;
+  --ink: #202123;
+  --muted: #6e6e80;
+  --line: #ececf1;
+  --line-strong: #d9d9e3;
+  --accent: #10a37f;
+  --accent-ink: #0f513f;
+  --accent-soft: #e8faf4;
+  --danger: #b42318;
+  --danger-soft: #fef3f2;
+  --shadow-sm: 0 1px 2px rgba(16, 24, 40, 0.05);
+  --shadow-lg: 0 20px 50px rgba(15, 23, 42, 0.08);
+}
+
+* {
+  box-sizing: border-box;
+}
+
+html {
+  background: var(--page);
+}
+
+body {
+  margin: 0;
+  min-height: 100vh;
+  background:
+    radial-gradient(circle at top, rgba(16, 163, 127, 0.12) 0%, rgba(16, 163, 127, 0) 30%),
+    linear-gradient(180deg, var(--page-soft) 0%, var(--page) 24%, #f3f4f6 100%);
+  color: var(--ink);
+  font-family: "Sohne", "SF Pro Text", "Helvetica Neue", "Segoe UI", sans-serif;
+  line-height: 1.5;
+}
+
+a {
+  color: var(--ink);
+  text-decoration: none;
+  transition: color 140ms ease;
+}
+
+a:hover {
+  color: var(--accent);
+}
+
+button {
+  appearance: none;
+  border: 1px solid var(--accent);
+  background: var(--accent);
+  color: white;
+  border-radius: 999px;
+  padding: 0.72rem 1.08rem;
+  cursor: pointer;
+  font: inherit;
+  font-weight: 600;
+  letter-spacing: -0.01em;
+  box-shadow: 0 8px 20px rgba(16, 163, 127, 0.18);
+  transition:
+    transform 140ms ease,
+    box-shadow 140ms ease,
+    background 140ms ease,
+    border-color 140ms ease;
+}
+
+button:hover {
+  transform: translateY(-1px);
+  box-shadow: 0 12px 24px rgba(16, 163, 127, 0.22);
+}
+
+button.secondary {
+  background: var(--card);
+  color: var(--ink);
+  border-color: var(--line-strong);
+  box-shadow: var(--shadow-sm);
+}
+
+button.secondary:hover {
+  box-shadow: 0 6px 16px rgba(15, 23, 42, 0.08);
+}
+
+.subtle-button {
+  appearance: none;
+  border: 1px solid var(--line-strong);
+  background: rgba(255, 255, 255, 0.72);
+  color: var(--muted);
+  border-radius: 999px;
+  padding: 0.34rem 0.72rem;
+  cursor: pointer;
+  font: inherit;
+  font-size: 0.82rem;
+  font-weight: 600;
+  letter-spacing: 0.01em;
+  box-shadow: none;
+  transition:
+    background 140ms ease,
+    border-color 140ms ease,
+    color 140ms ease;
+}
+
+.subtle-button:hover {
+  transform: none;
+  box-shadow: none;
+  background: white;
+  border-color: var(--muted);
+  color: var(--ink);
+}
+
+pre {
+  margin: 0;
+  white-space: pre-wrap;
+  word-break: break-word;
+}
+
+code,
+pre,
+.mono {
+  font-family: "Sohne Mono", "SFMono-Regular", "SF Mono", Consolas, "Liberation Mono", monospace;
+}
+
+.mono,
+.numeric {
+  font-variant-numeric: tabular-nums slashed-zero;
+  font-feature-settings: "tnum" 1, "zero" 1;
+}
+
+.app-shell {
+  max-width: 1280px;
+  margin: 0 auto;
+  padding: 2rem 1rem 3.5rem;
+}
+
+.dashboard-shell {
+  display: grid;
+  gap: 1rem;
+}
+
+.hero-card,
+.section-card,
+.metric-card,
+.error-card {
+  background: var(--card);
+  border: 1px solid rgba(217, 217, 227, 0.82);
+  box-shadow: var(--shadow-sm);
+  backdrop-filter: blur(18px);
+}
+
+.hero-card {
+  border-radius: 28px;
+  padding: clamp(1.25rem, 3vw, 2rem);
+  box-shadow: var(--shadow-lg);
+}
+
+.hero-grid {
+  display: grid;
+  grid-template-columns: minmax(0, 1fr) auto;
+  gap: 1.25rem;
+  align-items: start;
+}
+
+.eyebrow {
+  margin: 0;
+  color: var(--muted);
+  text-transform: uppercase;
+  letter-spacing: 0.08em;
+  font-size: 0.76rem;
+  font-weight: 600;
+}
+
+.hero-title {
+  margin: 0.35rem 0 0;
+  font-size: clamp(2rem, 4vw, 3.3rem);
+  line-height: 0.98;
+  letter-spacing: -0.04em;
+}
+
+.hero-copy {
+  margin: 0.75rem 0 0;
+  max-width: 46rem;
+  color: var(--muted);
+  font-size: 1rem;
+}
+
+.status-stack {
+  display: grid;
+  justify-items: end;
+  align-content: start;
+  min-width: min(100%, 9rem);
+}
+
+.status-badge {
+  display: inline-flex;
+  align-items: center;
+  gap: 0.45rem;
+  min-height: 2rem;
+  padding: 0.35rem 0.78rem;
+  border-radius: 999px;
+  border: 1px solid var(--line);
+  background: var(--card-muted);
+  color: var(--muted);
+  font-size: 0.82rem;
+  font-weight: 700;
+  letter-spacing: 0.01em;
+}
+
+.status-badge-dot {
+  width: 0.52rem;
+  height: 0.52rem;
+  border-radius: 999px;
+  background: currentColor;
+  opacity: 0.9;
+}
+
+.status-badge-live {
+  display: none;
+  background: var(--accent-soft);
+  border-color: rgba(16, 163, 127, 0.18);
+  color: var(--accent-ink);
+}
+
+.status-badge-offline {
+  background: #f5f5f7;
+  border-color: var(--line-strong);
+  color: var(--muted);
+}
+
+[data-phx-main].phx-connected .status-badge-live {
+  display: inline-flex;
+}
+
+[data-phx-main].phx-connected .status-badge-offline {
+  display: none;
+}
+
+.metric-grid {
+  display: grid;
+  gap: 0.85rem;
+  grid-template-columns: repeat(auto-fit, minmax(180px, 1fr));
+}
+
+.metric-card {
+  border-radius: 22px;
+  padding: 1rem 1.05rem 1.1rem;
+}
+
+.metric-label {
+  margin: 0;
+  color: var(--muted);
+  font-size: 0.82rem;
+  font-weight: 600;
+  letter-spacing: 0.01em;
+}
+
+.metric-value {
+  margin: 0.35rem 0 0;
+  font-size: clamp(1.6rem, 2vw, 2.1rem);
+  line-height: 1.05;
+  letter-spacing: -0.03em;
+}
+
+.metric-detail {
+  margin: 0.45rem 0 0;
+  color: var(--muted);
+  font-size: 0.88rem;
+}
+
+.section-card {
+  border-radius: 24px;
+  padding: 1.15rem;
+}
+
+.section-header {
+  display: flex;
+  justify-content: space-between;
+  align-items: flex-start;
+  gap: 1rem;
+  flex-wrap: wrap;
+}
+
+.section-title {
+  margin: 0;
+  font-size: 1.08rem;
+  line-height: 1.2;
+  letter-spacing: -0.02em;
+}
+
+.section-copy {
+  margin: 0.35rem 0 0;
+  color: var(--muted);
+  font-size: 0.94rem;
+}
+
+.table-wrap {
+  overflow-x: auto;
+  margin-top: 1rem;
+}
+
+.data-table {
+  width: 100%;
+  min-width: 720px;
+  border-collapse: collapse;
+}
+
+.data-table-running {
+  table-layout: fixed;
+  min-width: 980px;
+}
+
+.data-table th {
+  padding: 0 0.5rem 0.75rem 0;
+  text-align: left;
+  color: var(--muted);
+  font-size: 0.78rem;
+  font-weight: 600;
+  text-transform: uppercase;
+  letter-spacing: 0.04em;
+}
+
+.data-table td {
+  padding: 0.9rem 0.5rem 0.9rem 0;
+  border-top: 1px solid var(--line);
+  vertical-align: top;
+  font-size: 0.94rem;
+}
+
+.issue-stack,
+.session-stack,
+.detail-stack,
+.token-stack {
+  display: grid;
+  gap: 0.24rem;
+  min-width: 0;
+}
+
+.event-text {
+  font-weight: 500;
+  line-height: 1.45;
+  max-width: 100%;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.event-meta {
+  max-width: 100%;
+  overflow: hidden;
+  text-overflow: ellipsis;
+  white-space: nowrap;
+}
+
+.state-badge {
+  display: inline-flex;
+  align-items: center;
+  min-height: 1.85rem;
+  padding: 0.3rem 0.68rem;
+  border-radius: 999px;
+  border: 1px solid var(--line);
+  background: var(--card-muted);
+  color: var(--ink);
+  font-size: 0.8rem;
+  font-weight: 600;
+  line-height: 1;
+}
+
+.state-badge-active {
+  background: var(--accent-soft);
+  border-color: rgba(16, 163, 127, 0.18);
+  color: var(--accent-ink);
+}
+
+.state-badge-warning {
+  background: #fff7e8;
+  border-color: #f1d8a6;
+  color: #8a5a00;
+}
+
+.state-badge-danger {
+  background: var(--danger-soft);
+  border-color: #f6d3cf;
+  color: var(--danger);
+}
+
+.issue-id {
+  font-weight: 600;
+  letter-spacing: -0.01em;
+}
+
+.issue-link {
+  color: var(--muted);
+  font-size: 0.86rem;
+}
+
+.muted {
+  color: var(--muted);
+}
+
+.code-panel {
+  margin-top: 1rem;
+  padding: 1rem;
+  border-radius: 18px;
+  background: #f5f5f7;
+  border: 1px solid var(--line);
+  color: #353740;
+  font-size: 0.9rem;
+}
+
+.empty-state {
+  margin: 1rem 0 0;
+  color: var(--muted);
+}
+
+.error-card {
+  border-radius: 24px;
+  padding: 1.25rem;
+  background: linear-gradient(180deg, #fff8f7 0%, var(--danger-soft) 100%);
+  border-color: #f6d3cf;
+}
+
+.error-title {
+  margin: 0;
+  color: var(--danger);
+  font-size: 1.15rem;
+  letter-spacing: -0.02em;
+}
+
+.error-copy {
+  margin: 0.45rem 0 0;
+  color: var(--danger);
+}
+
+@media (max-width: 860px) {
+  .app-shell {
+    padding: 1rem 0.85rem 2rem;
+  }
+
+  .hero-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .status-stack {
+    justify-items: start;
+  }
+
+  .metric-grid {
+    grid-template-columns: repeat(2, minmax(0, 1fr));
+  }
+}
+
+@media (max-width: 560px) {
+  .metric-grid {
+    grid-template-columns: 1fr;
+  }
+
+  .section-card,
+  .hero-card,
+  .error-card {
+    border-radius: 20px;
+    padding: 1rem;
+  }
+}
diff --git a/packages/symphony/elixir/test/catalog_assets_test.exs b/packages/symphony/elixir/test/catalog_assets_test.exs
new file mode 100644
index 000000000..905da8bf9
--- /dev/null
+++ b/packages/symphony/elixir/test/catalog_assets_test.exs
@@ -0,0 +1,48 @@
+defmodule SymphonyElixir.CatalogAssetsTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.DSL.Parser
+  alias SymphonyElixir.Skill
+
+  @root Path.expand("../..", __DIR__)
+  @example_workflows_dir Path.join([@root, "workflows", "example", "workflows"])
+  @example_skills_dir Path.join([@root, "workflows", "example", "skills"])
+
+  test "all shipped workflow files parse" do
+    results =
+      @root
+      |> Path.join("workflows/*/workflows/*.sym")
+      |> Path.wildcard()
+      |> Enum.sort()
+      |> Enum.map(fn path -> {path, Parser.parse(File.read!(path), file: path)} end)
+
+    refute results == []
+
+    for {path, result} <- results do
+      assert {:ok, %{kind: :workflow}} = result, "expected #{path} to parse, got #{inspect(result)}"
+    end
+  end
+
+  test "all shipped skill files load" do
+    assert @root
+           |> Path.join("workflows/*/skills/*.md")
+           |> Path.wildcard()
+           |> Enum.sort()
+           |> Enum.map(&Skill.load/1)
+           |> Enum.all?(&match?({:ok, %Skill{}}, &1))
+  end
+
+  test "example workflow pack is safe and manual-only" do
+    source = File.read!(Path.join(@example_workflows_dir, "inspect.sym"))
+    assert {:ok, workflow} = Parser.parse(source, file: "inspect.sym")
+    assert {:ok, skill} = Skill.load(Path.join(@example_skills_dir, "inspect.md"))
+
+    assert workflow.name == "inspect"
+    assert workflow.trigger == %{kind: :manual}
+
+    binds = for {:bind, name, _expr} <- workflow.statements, do: name
+    assert binds == ["inspect"]
+
+    assert skill.tools == []
+  end
+end
diff --git a/packages/symphony/elixir/test/claude/code_test.exs b/packages/symphony/elixir/test/claude/code_test.exs
new file mode 100644
index 000000000..1a82b8625
--- /dev/null
+++ b/packages/symphony/elixir/test/claude/code_test.exs
@@ -0,0 +1,31 @@
+defmodule SymphonyElixir.Claude.CodeTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Claude.Code
+  alias SymphonyElixir.Config
+
+  test "command pipes the prompt on stdin and reads prompt/model from env" do
+    cmd = Code.command("claude")
+
+    # Prompt arrives on stdin, never on argv, so a leading dash or argv
+    # length limit cannot corrupt the invocation.
+    assert cmd =~ ~s(printf '%s' "$SYMPHONY_CLAUDE_PROMPT" | claude)
+    assert cmd =~ "--print"
+    assert cmd =~ "--output-format json"
+    assert cmd =~ "--dangerously-skip-permissions"
+    assert cmd =~ ~s(--model "$SYMPHONY_CLAUDE_MODEL")
+    refute cmd =~ "SYMPHONY_CLAUDE_PROMPT="
+  end
+
+  test "command honors an overridden claude executable" do
+    assert Code.command("/opt/bin/claude") =~ "| /opt/bin/claude --print"
+  end
+
+  test "run errors without an Anthropic API key rather than spawning claude" do
+    assert {:error, :anthropic_api_key_not_configured} =
+             Code.run(File.cwd!(), "hello", %{},
+               config: %Config{anthropic_api_key: nil},
+               model: "claude-opus-4-8"
+             )
+  end
+end
diff --git a/packages/symphony/elixir/test/codex/provision_test.exs b/packages/symphony/elixir/test/codex/provision_test.exs
new file mode 100644
index 000000000..5e209b5d3
--- /dev/null
+++ b/packages/symphony/elixir/test/codex/provision_test.exs
@@ -0,0 +1,247 @@
+defmodule SymphonyElixir.Codex.ProvisionTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.{Codex.Provision, Config, RepositoryCatalog}
+
+  defp config_with_repos(extra \\ %{}) do
+    dir = Path.join(System.tmp_dir!(), "provision_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    path = Path.join(dir, "repositories.yaml")
+
+    File.write!(path, """
+    repositories:
+      - name: app
+        owner_repo: acme/app
+        default_branch: main
+        primary: true
+    """)
+
+    on_exit(fn -> File.rm_rf!(dir) end)
+
+    struct(
+      Config,
+      Map.merge(
+        %{
+          repositories_file: path,
+          github_token: nil,
+          ix_env_passthrough: [],
+          github_app_bot_username: "acme-bot[bot]",
+          github_app_bot_email: "bot@acme.dev"
+        },
+        extra
+      )
+    )
+  end
+
+  test "sh single-quotes and escapes embedded quotes" do
+    assert Provision.sh("plain") == "'plain'"
+    assert Provision.sh("a'b") == "'a'\\''b'"
+  end
+
+  test "env_export_lines renders a no-op for an empty env" do
+    assert Provision.env_export_lines([]) == ":"
+  end
+
+  test "env_export_lines quotes values" do
+    assert Provision.env_export_lines([{"K", "a b"}]) == "export K='a b'"
+  end
+
+  test "runtime_env falls back to the static github_token when no bot token is minted" do
+    config = config_with_repos(%{github_token: "ghs_main"})
+
+    env = Provision.runtime_env(config, [])
+
+    assert {"GITHUB_TOKEN", "ghs_main"} in env
+    assert {"GH_TOKEN", "ghs_main"} in env
+  end
+
+  test "runtime_env: the minted bot token owns both GITHUB_TOKEN and GH_TOKEN over the static token" do
+    config = config_with_repos(%{github_token: "ghs_human"})
+
+    env = Provision.runtime_env(config, bot_token: "ghs_app")
+
+    # gh pr create authors as GH_TOKEN, so both vars must carry the App
+    # token; neither may fall back to the static host token (ENG-2012).
+    assert {"GITHUB_TOKEN", "ghs_app"} in env
+    assert {"GH_TOKEN", "ghs_app"} in env
+    refute Enum.any?(env, fn {_key, value} -> value == "ghs_human" end)
+    keys = Enum.map(env, &elem(&1, 0))
+    assert keys == Enum.uniq(keys)
+  end
+
+  test "runtime_env: a passthrough of the same name cannot shadow the bot token" do
+    var = "GH_TOKEN"
+    System.put_env(var, "ghs_inherited")
+    on_exit(fn -> System.delete_env(var) end)
+
+    config = config_with_repos(%{github_token: nil, ix_env_passthrough: [var]})
+
+    env = Provision.runtime_env(config, bot_token: "ghs_app")
+
+    assert {"GH_TOKEN", "ghs_app"} in env
+    refute {"GH_TOKEN", "ghs_inherited"} in env
+  end
+
+  test "repo_blocks stamps the auth header and bot identity when a token is given" do
+    config = config_with_repos()
+    blocks = Provision.repo_blocks(config, "/home/u/symphony-workspaces/run1", "symphony/run1", "ghs_tok")
+
+    assert blocks =~ "clone --depth 1 --branch 'main' 'https://github.com/acme/app.git'"
+    assert blocks =~ "http.https://github.com/.extraheader"
+    assert blocks =~ "checkout -b 'symphony/run1'"
+    assert blocks =~ "user.name' 'acme-bot[bot]'"
+    assert blocks =~ "user.email' 'bot@acme.dev'"
+    assert blocks =~ Base.encode64("x-access-token:ghs_tok")
+  end
+
+  test "repo_blocks omits the auth header when no token is available" do
+    config = config_with_repos()
+    blocks = Provision.repo_blocks(config, "/home/u/symphony-workspaces/run1", "symphony/run1", nil)
+
+    refute blocks =~ "extraheader"
+  end
+
+  test "repo_blocks clones an explicit repository list, overriding the config catalog" do
+    config = config_with_repos()
+
+    repositories = [
+      %RepositoryCatalog{name: "ix", owner_repo: "indexable-inc/ix", default_branch: "main", primary?: true}
+    ]
+
+    blocks =
+      Provision.repo_blocks(config, "/home/u/symphony-workspaces/run1", "symphony/run1", "ghs_tok", repositories)
+
+    assert blocks =~ "clone --depth 1 --branch 'main' 'https://github.com/indexable-inc/ix.git'"
+    refute blocks =~ "acme/app"
+  end
+
+  test "host_primary_workspace uses the explicit list's primary, falling back to the config catalog when absent" do
+    config = config_with_repos()
+
+    repositories = [
+      %RepositoryCatalog{name: "docs", owner_repo: "indexable-inc/docs", default_branch: "main", primary?: false},
+      %RepositoryCatalog{name: "ix", owner_repo: "indexable-inc/ix", default_branch: "main", primary?: true}
+    ]
+
+    assert Provision.host_primary_workspace(config, "/home/u/symphony-workspaces/run1", "run1", repositories) ==
+             "/home/u/symphony-workspaces/run1/ix"
+
+    assert Provision.host_primary_workspace(config, "/home/u/symphony-workspaces/run1", "run1") ==
+             "/home/u/symphony-workspaces/run1/app"
+  end
+
+  test "backend id and name follow the symphony scheme" do
+    assert Provision.backend_id("run1", "impl") == "symphony:run1:impl"
+    assert Provision.backend_name(%{identifier: "ENG-1", title: "Do it"}, "run1", "impl") == "ENG-1: Do it / impl"
+    assert Provision.backend_name(%{identifier: "ENG-1"}, "run1", "impl") == "ENG-1 / impl"
+    assert Provision.backend_name(%{}, "run1", "impl") == "run1 / impl"
+  end
+
+  # Redaction and the room-start pkill behavior were asserted through the
+  # `Codex.IxVm` / `Codex.Host` delegates before those modules were deleted
+  # in the `.sym`/IR cutover. The behavior is owned here, so the coverage
+  # moved to the owner.
+  test "sanitize_ix_args redacts --env values in ix command args" do
+    assert Provision.sanitize_ix_args([
+             "new",
+             "ix/symphony-codex:2026-05-27",
+             "--env",
+             "GITHUB_TOKEN=ghs_secret",
+             "--env",
+             "OPENAI_API_KEY=sk-secret",
+             "--name",
+             "worker"
+           ]) == [
+             "new",
+             "ix/symphony-codex:2026-05-27",
+             "--env",
+             "GITHUB_TOKEN=",
+             "--env",
+             "OPENAI_API_KEY=",
+             "--name",
+             "worker"
+           ]
+  end
+
+  test "sanitize_ix_args redacts sensitive shell exports in ix command args" do
+    assert Provision.sanitize_ix_args([
+             "shell",
+             "worker",
+             "--",
+             "bash",
+             "-lc",
+             "export GITHUB_TOKEN='ghs_secret'\nexport OPENAI_API_KEY='sk-secret'\necho ok"
+           ]) == [
+             "shell",
+             "worker",
+             "--",
+             "bash",
+             "-lc",
+             "export GITHUB_TOKEN=''\nexport OPENAI_API_KEY=''\necho ok"
+           ]
+  end
+
+  test "sanitize_setenv_args redacts --setenv values but keeps other args" do
+    args = [
+      "--collect",
+      "--uid=hari",
+      "--setenv=GITHUB_TOKEN=ghs_secret",
+      "--setenv=PATH=/usr/bin",
+      "--unit=symphony-host-abc.service",
+      "--",
+      "room-server"
+    ]
+
+    assert Provision.sanitize_setenv_args(args) == [
+             "--collect",
+             "--uid=hari",
+             "--setenv=GITHUB_TOKEN=",
+             "--setenv=PATH=",
+             "--unit=symphony-host-abc.service",
+             "--",
+             "room-server"
+           ]
+  end
+
+  test "ix_room_start_script stops only the room-server process name" do
+    script =
+      Provision.ix_room_start_script(
+        %Config{ix_room_server_command: "room-server", ix_room_port: 8080, github_token: nil, ix_env_passthrough: []},
+        "run_test",
+        []
+      )
+
+    assert script =~ "pkill -x room-server || true"
+    refute script =~ "pkill -f room-server"
+    # The per-run engine host serves the HTTP /api surface only, so it
+    # opts out of the WebTransport listener (room-server #232).
+    assert script =~ "--no-wt"
+  end
+
+  test "host_room_server_command binds the picked port and disables WebTransport" do
+    argv =
+      Provision.host_room_server_command(
+        %Config{host_room_server_command: "room-server"},
+        "127.0.0.1",
+        54_321,
+        "/home/u/.local/state/room/run1"
+      )
+
+    assert argv == [
+             System.find_executable("room-server") || "room-server",
+             "--host",
+             "127.0.0.1",
+             "--port",
+             "54321",
+             "--state-dir",
+             "/home/u/.local/state/room/run1",
+             "--no-wt"
+           ]
+
+    # --no-wt opts out of the WebTransport listener, so per-run host
+    # servers do not collide on the fixed UDP port that the standalone
+    # server now binds by default (room-server #232).
+    assert "--no-wt" in argv
+    refute "--wt-port" in argv
+  end
+end
diff --git a/packages/symphony/elixir/test/command_test.exs b/packages/symphony/elixir/test/command_test.exs
new file mode 100644
index 000000000..efb45977c
--- /dev/null
+++ b/packages/symphony/elixir/test/command_test.exs
@@ -0,0 +1,49 @@
+defmodule SymphonyElixir.CommandTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Command
+
+  test "captures successful command output" do
+    assert {:ok, "ok\n"} = Command.run("/bin/sh", ["-c", "printf 'ok\n'"], 1_000)
+  end
+
+  test "captures stderr with failed command output" do
+    assert {:error, {:exit, 7, "bad\n"}} = Command.run("/bin/sh", ["-c", "printf 'bad\n' >&2; exit 7"], 1_000)
+  end
+
+  test "terminates commands after the timeout" do
+    assert {:error, {:timeout, 50, _output}} = Command.run("/bin/sh", ["-c", "sleep 5"], 50)
+  end
+
+  test "kills the spawned process on timeout so it does not orphan" do
+    pid_file = Path.join(System.tmp_dir!(), "command_test_#{System.unique_integer([:positive])}.pid")
+    on_exit(fn -> File.rm(pid_file) end)
+
+    # `exec sleep` replaces the shell so $$ is the surviving process the
+    # port owns; without the kill it would outlive the 50ms timeout.
+    assert {:error, {:timeout, 50, _output}} =
+             Command.run("/bin/sh", ["-c", "echo $$ > #{pid_file}; exec sleep 30"], 50)
+
+    os_pid = wait_for_pid(pid_file)
+    assert eventually_dead?(os_pid), "spawned process #{os_pid} was left running after timeout"
+  end
+
+  defp wait_for_pid(pid_file, attempts \\ 50) do
+    case File.read(pid_file) do
+      {:ok, contents} when contents != "" -> contents |> String.trim() |> String.to_integer()
+      _ when attempts > 0 -> Process.sleep(10) && wait_for_pid(pid_file, attempts - 1)
+      _ -> flunk("spawned process never recorded its pid in #{pid_file}")
+    end
+  end
+
+  defp eventually_dead?(os_pid, attempts \\ 50)
+
+  defp eventually_dead?(_os_pid, 0), do: false
+
+  defp eventually_dead?(os_pid, attempts) do
+    case System.cmd("kill", ["-0", Integer.to_string(os_pid)], stderr_to_stdout: true) do
+      {_, 0} -> Process.sleep(10) && eventually_dead?(os_pid, attempts - 1)
+      {_, _nonzero} -> true
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/config_test.exs b/packages/symphony/elixir/test/config_test.exs
new file mode 100644
index 000000000..5991f000a
--- /dev/null
+++ b/packages/symphony/elixir/test/config_test.exs
@@ -0,0 +1,132 @@
+defmodule SymphonyElixir.ConfigTest do
+  use ExUnit.Case, async: false
+
+  alias SymphonyElixir.Config
+
+  test "captures default codex runtime knobs" do
+    config = Config.get()
+
+    assert config.ix_command == "ix"
+    assert config.ix_image == "ix/symphony-codex:latest"
+    assert config.ix_room_server_command == "room-server"
+    assert config.ix_room_port == 8080
+    assert config.ix_room_connect == "direct"
+    assert config.ix_local_port_base == 18_080
+    refute config.ix_keep_vm?
+    assert config.ix_env_passthrough == ["OPENAI_API_KEY", "CODEX_API_KEY"]
+    assert config.host_user == nil
+    assert config.host_group == nil
+    assert config.host_workspaces_dir == nil
+    assert config.host_room_server_command == "room-server"
+    assert config.host_systemd_run_command == "systemd-run"
+    refute config.host_keep?
+    assert config.claude_command == "claude"
+  end
+
+  test "reads the room advertise host and registry url from the environment" do
+    original = Config.get()
+
+    on_exit(fn ->
+      System.delete_env("SYMPHONY_ROOM_ADVERTISE_HOST")
+      System.delete_env("SYMPHONY_ROOM_REGISTRY_URL")
+      restart_config!(original)
+    end)
+
+    System.put_env("SYMPHONY_ROOM_ADVERTISE_HOST", "100.0.0.7")
+    System.put_env("SYMPHONY_ROOM_REGISTRY_URL", "https://room.ix.dev")
+    restart_config!(original)
+
+    config = Config.get()
+    assert config.room.advertise_host == "100.0.0.7"
+    assert config.room.registry_url == "https://room.ix.dev"
+  end
+
+  test "creates mutable runtime dirs without mutating workflow pack assets" do
+    original = Config.get()
+    root = tmp_dir("config_pack_state")
+    pack_dir = write_pack!(Path.join(root, "pack"))
+    workspaces_dir = Path.join(root, "state/workspaces")
+    runs_dir = Path.join(root, "state/runs")
+
+    on_exit(fn -> restart_config!(original) end)
+    restart_config!(root: root, pack_dir: pack_dir, workspaces_dir: workspaces_dir, runs_dir: runs_dir)
+
+    assert File.dir?(workspaces_dir)
+    assert File.dir?(runs_dir)
+    assert File.dir?(Path.join(pack_dir, "workflows"))
+    assert File.dir?(Path.join(pack_dir, "skills"))
+  end
+
+  test "fails clearly when workflow pack assets are missing" do
+    original = Config.get()
+    root = tmp_dir("config_missing_pack_asset")
+    pack_dir = Path.join(root, "pack")
+    File.mkdir_p!(Path.join(pack_dir, "skills"))
+    File.write!(Path.join(pack_dir, "repositories.yaml"), "repositories: []\n")
+
+    on_exit(fn -> restart_config!(original) end)
+    stop_config()
+
+    previous_flag = Process.flag(:trap_exit, true)
+
+    assert {:error, {%RuntimeError{message: message}, _stack}} =
+             Config.start_link(root: root, pack_dir: pack_dir)
+
+    receive do
+      {:EXIT, _pid, {%RuntimeError{}, _stack}} -> :ok
+    after
+      0 -> :ok
+    end
+
+    Process.flag(:trap_exit, previous_flag)
+
+    assert message =~ "SYMPHONY_WORKFLOWS_DIR must point at an existing directory"
+    refute File.exists?(Path.join(pack_dir, "workflows"))
+  end
+
+  defp write_pack!(pack_dir) do
+    File.mkdir_p!(Path.join(pack_dir, "workflows"))
+    File.mkdir_p!(Path.join(pack_dir, "skills"))
+    File.write!(Path.join(pack_dir, "repositories.yaml"), "repositories: []\n")
+    pack_dir
+  end
+
+  defp restart_config!(%Config{} = snapshot) do
+    opts =
+      snapshot
+      |> Map.from_struct()
+      |> Map.to_list()
+
+    restart_config!(opts)
+  end
+
+  defp restart_config!(opts) do
+    stop_config()
+    assert {:ok, pid} = Config.start_link(opts)
+    Process.unlink(pid)
+  end
+
+  defp stop_config do
+    case Process.whereis(Config) do
+      nil ->
+        :ok
+
+      pid ->
+        ref = Process.monitor(pid)
+        GenServer.stop(pid, :normal)
+
+        receive do
+          {:DOWN, ^ref, :process, ^pid, _reason} -> :ok
+        after
+          1_000 -> flunk("timed out stopping SymphonyElixir.Config")
+        end
+    end
+  end
+
+  defp tmp_dir(name) do
+    dir = Path.join(System.tmp_dir!(), "symphony_#{name}_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    on_exit(fn -> File.rm_rf!(dir) end)
+    dir
+  end
+end
diff --git a/packages/symphony/elixir/test/cron_expression_test.exs b/packages/symphony/elixir/test/cron_expression_test.exs
new file mode 100644
index 000000000..bf0ef2637
--- /dev/null
+++ b/packages/symphony/elixir/test/cron_expression_test.exs
@@ -0,0 +1,111 @@
+defmodule SymphonyElixir.CronExpressionTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.CronExpression
+
+  describe "parse/1" do
+    test "accepts the nicknames" do
+      for nick <- ~w(@yearly @annually @monthly @weekly @daily @midnight @hourly) do
+        assert {:ok, parsed} = CronExpression.parse(nick)
+        assert parsed.source == nick
+      end
+    end
+
+    test "trims whitespace before resolving nicknames" do
+      assert {:ok, parsed} = CronExpression.parse("  @monthly  ")
+      assert parsed.source == "@monthly"
+    end
+
+    test "accepts standard 5-field cron strings" do
+      assert {:ok, _} = CronExpression.parse("0 0 1 * *")
+      assert {:ok, _} = CronExpression.parse("*/15 * * * *")
+      assert {:ok, _} = CronExpression.parse("0 9-17 * * 1-5")
+      assert {:ok, _} = CronExpression.parse("0,15,30,45 * * * *")
+    end
+
+    test "rejects malformed expressions" do
+      assert {:error, _} = CronExpression.parse("not a cron")
+      assert {:error, _} = CronExpression.parse("0 0 1 *")
+      assert {:error, _} = CronExpression.parse("60 0 1 * *")
+      assert {:error, _} = CronExpression.parse("0 24 1 * *")
+      assert {:error, _} = CronExpression.parse("0 0 32 * *")
+      assert {:error, _} = CronExpression.parse("0 0 1 13 *")
+      assert {:error, _} = CronExpression.parse("0 0 1 * 7")
+    end
+
+    test "rejects inverted ranges" do
+      assert {:error, _} = CronExpression.parse("10-5 0 1 * *")
+    end
+
+    test "rejects non-positive step" do
+      assert {:error, _} = CronExpression.parse("*/0 0 1 * *")
+    end
+  end
+
+  describe "next_fire_after/2 with @hourly" do
+    test "advances to the next hour boundary" do
+      {:ok, parsed} = CronExpression.parse("@hourly")
+      from = ~U[2026-05-17 14:23:00Z]
+      assert {:ok, ~U[2026-05-17 15:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+
+    test "never returns the from moment itself" do
+      {:ok, parsed} = CronExpression.parse("@hourly")
+      from = ~U[2026-05-17 14:00:00Z]
+      assert {:ok, ~U[2026-05-17 15:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+  end
+
+  describe "next_fire_after/2 with @daily" do
+    test "advances to midnight UTC the next day" do
+      {:ok, parsed} = CronExpression.parse("@daily")
+      from = ~U[2026-05-17 14:00:00Z]
+      assert {:ok, ~U[2026-05-18 00:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+  end
+
+  describe "next_fire_after/2 with @monthly" do
+    test "advances to the 1st of the next month at 00:00 UTC" do
+      {:ok, parsed} = CronExpression.parse("@monthly")
+      from = ~U[2026-05-17 14:00:00Z]
+      assert {:ok, ~U[2026-06-01 00:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+
+    test "rolls into the next year correctly" do
+      {:ok, parsed} = CronExpression.parse("@monthly")
+      from = ~U[2026-12-15 09:30:00Z]
+      assert {:ok, ~U[2027-01-01 00:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+  end
+
+  describe "next_fire_after/2 with explicit 5-field" do
+    test "*/15 * * * * fires on the next quarter-hour" do
+      {:ok, parsed} = CronExpression.parse("*/15 * * * *")
+      from = ~U[2026-05-17 14:07:00Z]
+      assert {:ok, ~U[2026-05-17 14:15:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+
+    test "weekday business hours respects day-of-week" do
+      # 9am on weekdays (Mon-Fri). 2026-05-17 is a Sunday.
+      {:ok, parsed} = CronExpression.parse("0 9 * * 1-5")
+      from = ~U[2026-05-17 12:00:00Z]
+      assert {:ok, ~U[2026-05-18 09:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+
+    test "POSIX OR semantics for DOM and DOW when both restricted" do
+      # 'every 1st of the month OR every Friday'
+      {:ok, parsed} = CronExpression.parse("0 0 1 * 5")
+      # Thursday May 14 2026 -> first match is Friday May 15
+      from = ~U[2026-05-14 12:00:00Z]
+      assert {:ok, ~U[2026-05-15 00:00:00Z]} = CronExpression.next_fire_after(parsed, from)
+    end
+  end
+
+  describe "matches?/2" do
+    test "@hourly matches every wall-clock hour" do
+      {:ok, parsed} = CronExpression.parse("@hourly")
+      assert CronExpression.matches?(parsed, ~U[2026-05-17 03:00:00Z])
+      refute CronExpression.matches?(parsed, ~U[2026-05-17 03:01:00Z])
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/github_app_test.exs b/packages/symphony/elixir/test/github_app_test.exs
new file mode 100644
index 000000000..a71b6214d
--- /dev/null
+++ b/packages/symphony/elixir/test/github_app_test.exs
@@ -0,0 +1,45 @@
+defmodule SymphonyElixir.GithubAppTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.GithubApp
+
+  describe "configured?/1" do
+    test "false when app id is missing" do
+      refute GithubApp.configured?(%{
+               github_app_id: nil,
+               github_app_private_key_pem: "irrelevant"
+             })
+    end
+
+    test "false when key is missing" do
+      refute GithubApp.configured?(%{
+               github_app_id: "123",
+               github_app_private_key_pem: nil
+             })
+    end
+
+    test "false when either is empty string" do
+      refute GithubApp.configured?(%{
+               github_app_id: "",
+               github_app_private_key_pem: "pem"
+             })
+
+      refute GithubApp.configured?(%{
+               github_app_id: "123",
+               github_app_private_key_pem: ""
+             })
+    end
+
+    test "true when both id and key are present" do
+      assert GithubApp.configured?(%{
+               github_app_id: "123",
+               github_app_private_key_pem: "-----BEGIN RSA PRIVATE KEY-----\n..."
+             })
+    end
+
+    test "false when passed a non-config-shaped term" do
+      refute GithubApp.configured?(nil)
+      refute GithubApp.configured?(%{})
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/slack_events_controller_test.exs b/packages/symphony/elixir/test/slack_events_controller_test.exs
new file mode 100644
index 000000000..9c70d2b9c
--- /dev/null
+++ b/packages/symphony/elixir/test/slack_events_controller_test.exs
@@ -0,0 +1,18 @@
+defmodule SymphonyElixirWeb.SlackEventsControllerTest do
+  use ExUnit.Case, async: true
+  import Plug.Conn
+  import Plug.Test
+
+  @opts SymphonyElixirWeb.Endpoint.init([])
+
+  test "rejects Slack events when the signing secret is not configured" do
+    conn =
+      :post
+      |> conn("/api/v1/triggers/slack/events", Jason.encode!(%{type: "event_callback"}))
+      |> put_req_header("content-type", "application/json")
+      |> SymphonyElixirWeb.Endpoint.call(@opts)
+
+    assert conn.status == 401
+    assert Jason.decode!(conn.resp_body) == %{"error" => "slack signing secret not configured"}
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/dsl/fixtures/release.sym b/packages/symphony/elixir/test/symphony_elixir/dsl/fixtures/release.sym
new file mode 100644
index 000000000..475bd783a
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/dsl/fixtures/release.sym
@@ -0,0 +1,33 @@
+# A small workflow exercising binds, an envelope, a skill prompt, an
+# inline prompt with interpolation, and a when gate. The two agent binds
+# read disjoint inputs, so the interpreter is free to run them in
+# parallel: report has no edge to inspect's output.
+
+workflow "release" {
+  inspect <- agent {
+    engine: codex
+    model: "gpt-5.3-codex"
+    effort: medium
+    permissions: workspace_write
+    location: local
+    prompt: skill "inspect" { repo: "symphony" }
+  }
+
+  report <- agent {
+    engine: claude
+    model: haiku
+    permissions: read_only
+    prompt: inline "write a status report and stop"
+  }
+
+  summary <- agent {
+    engine: codex
+    model: "gpt-5.3-codex"
+    permissions: read_only
+    prompt: inline "summarize ${inspect.area}"
+  }
+
+  when ${inspect.changed} {
+    notify <- exec "./scripts/notify.sh" timeout 30
+  }
+}
diff --git a/packages/symphony/elixir/test/symphony_elixir/dsl/interpreter_test.exs b/packages/symphony/elixir/test/symphony_elixir/dsl/interpreter_test.exs
new file mode 100644
index 000000000..0f9fb966c
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/dsl/interpreter_test.exs
@@ -0,0 +1,455 @@
+defmodule SymphonyElixir.DSL.InterpreterTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.DSL.Interpreter
+  alias SymphonyElixir.DSL.Parser
+  alias SymphonyElixir.IR.Node
+
+  defp parse!(source) do
+    {:ok, ast} = Parser.parse(source)
+    ast
+  end
+
+  # Node carries wall-clock created_at/updated_at, which differ between
+  # expand calls and are not part of the determinism contract. Compare the
+  # structural fields that the interpreter actually decides.
+  defp structural(nodes) when is_list(nodes), do: Enum.map(nodes, &structural/1)
+
+  defp structural(%Node{} = node) do
+    Map.take(node, [:id, :ast_origin, :kind, :envelope, :prompt_ref, :inputs, :deps, :expansion_key, :state, :output])
+  end
+
+  describe "expand/3 effect emission" do
+    test "only effectful constructors become IR nodes; lets do not" do
+      ast =
+        parse!("""
+        workflow "w" {
+          label = "build-1"
+          run <- agent { engine: codex, model: "m", prompt: inline "go" }
+        }
+        """)
+
+      {delta, _pending, _log} = Interpreter.expand(ast, %{}, [])
+
+      assert [%Node{kind: :agent, id: "agent-0"}] = delta
+    end
+
+    test "agent node carries the envelope spec map and prompt ref" do
+      ast =
+        parse!("""
+        workflow "w" {
+          run <- agent {
+            engine: codex
+            model: "m"
+            permissions: read_only
+            prompt: skill "inspect" { repo: "symphony" }
+          }
+        }
+        """)
+
+      {[node], _pending, _log} = Interpreter.expand(ast, %{}, [])
+
+      assert node.envelope == %{"engine" => "codex", "model" => "m", "permissions" => "read_only"}
+      assert {:skill, "inspect", %{"repo" => "symphony"}} = node.prompt_ref
+      assert node.inputs["repo"] == {:literal, "symphony"}
+    end
+  end
+
+  describe "derived deps and parallelism" do
+    test "a downstream read of a binding becomes a node edge" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          b <- agent { engine: codex, model: "m", prompt: skill "next" { ctx: ${a.area} } }
+        }
+        """)
+
+      {delta, _pending, _log} = Interpreter.expand(ast, %{}, [])
+      by_id = Map.new(delta, &{&1.id, &1})
+
+      a = by_id["agent-0"]
+      b = by_id["agent-1"]
+
+      assert a.deps == []
+      assert b.inputs["ctx"] == {:node, "agent-0", ["area"]}
+      assert b.deps == ["agent-0"]
+    end
+
+    test "two data-independent binds have no edge and run in parallel" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "one" }
+          b <- agent { engine: claude, model: "haiku", prompt: inline "two" }
+        }
+        """)
+
+      {delta, _pending, _log} = Interpreter.expand(ast, %{}, [])
+
+      assert Enum.all?(delta, &(&1.deps == []))
+      assert Enum.map(delta, & &1.id) |> Enum.sort() == ["agent-0", "agent-1"]
+    end
+  end
+
+  describe "when gate" do
+    test "emits a placeholder while the gating input is unresolved" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "do" }
+          when ${a.changed} {
+            n <- exec "./n.sh"
+          }
+        }
+        """)
+
+      {delta, pending, log} = Interpreter.expand(ast, %{}, [])
+      by_kind = Enum.group_by(delta, & &1.kind)
+
+      assert [gate] = by_kind[:gate]
+      assert gate.inputs["gate"] == {:node, "agent-0", ["changed"]}
+      assert gate.deps == ["agent-0"]
+      # the body exec is not emitted yet
+      assert by_kind[:exec] == nil
+      assert {:awaiting, "when-1", ["agent-0"]} in pending
+      assert log == []
+    end
+
+    test "expands the body only when the input resolves truthy" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "do" }
+          when ${a.changed} {
+            n <- exec "./n.sh"
+          }
+        }
+        """)
+
+      known = %{"agent-0" => %{"changed" => true}}
+      {delta, _pending, log} = Interpreter.expand(ast, known, [])
+
+      assert Enum.any?(delta, &(&1.kind == :exec))
+      assert [%{observed: %{gate: :when, opened: true}}] = log
+    end
+
+    test "skips the body when the input resolves falsy" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "do" }
+          when ${a.changed} {
+            n <- exec "./n.sh"
+          }
+        }
+        """)
+
+      known = %{"agent-0" => %{"changed" => false}}
+      {delta, _pending, log} = Interpreter.expand(ast, known, [])
+
+      refute Enum.any?(delta, &(&1.kind == :exec))
+      assert [%{observed: %{gate: :when, opened: false}}] = log
+    end
+  end
+
+  describe "every_nth gate" do
+    # `every n` is one tick per run, evaluated at materialize against an
+    # empty log. A run drives `expand_dynamic/1` several times (init, then
+    # after each node success), re-feeding the grown log, so a re-pass must
+    # reproduce the materialize decision and never advance the tick. The
+    # cross-run tick advance is a separate concern (a future run would seed
+    # its counter from the prior run's terminal log); the runtime today
+    # never carries one run's log into the next run's materialize.
+    test "evaluates one tick per run at the empty-log materialize pass" do
+      ast =
+        parse!("""
+        workflow "w" {
+          every 3 of gc {
+            run <- exec "./gc.sh"
+          }
+        }
+        """)
+
+      # tick 1 (every 3): empty log -> skip.
+      {d1, _p1, _log1} = Interpreter.expand(ast, %{}, [])
+      refute Enum.any?(d1, &(&1.kind == :exec))
+
+      one =
+        parse!("""
+        workflow "w" {
+          every 1 of gc {
+            run <- exec "./gc.sh"
+          }
+        }
+        """)
+
+      # tick 1 (every 1): fires immediately on the materialize pass.
+      {d2, _p2, _log2} = Interpreter.expand(one, %{}, [])
+      assert Enum.any?(d2, &(&1.kind == :exec))
+    end
+
+    test "re-expansion within a run reproduces the tick, never advancing it" do
+      ast =
+        parse!("""
+        workflow "w" {
+          every 2 of c {
+            run <- exec "./x.sh"
+          }
+        }
+        """)
+
+      # The first (materialize) pass against an empty log skips (tick 1 of 2)
+      # and records the decision in the log.
+      {d0, _p0, log_after_skip} = Interpreter.expand(ast, %{}, [])
+      refute Enum.any?(d0, &(&1.kind == :exec))
+
+      # Re-feeding that log (what `expand_dynamic/1` does on every later
+      # pass) reproduces the recorded skip rather than advancing to a fire,
+      # so the live graph and a cold replay stay identical. This is the
+      # replay invariant from `IR.RunGraph`.
+      {a, _, log_a} = Interpreter.expand(ast, %{}, log_after_skip)
+      {b, _, log_b} = Interpreter.expand(ast, %{}, log_after_skip)
+      assert structural(a) == structural(b)
+      refute Enum.any?(a, &(&1.kind == :exec))
+      # No duplicate tick event is appended on a reproduction pass.
+      assert length(log_a) == length(log_after_skip)
+      assert log_a == log_b
+    end
+
+    test "a fired tick re-emits its body idempotently on re-expansion" do
+      ast =
+        parse!("""
+        workflow "w" {
+          every 1 of c {
+            run <- exec "./x.sh"
+          }
+        }
+        """)
+
+      # tick 1 fires; the body exec is emitted and one fire event is logged.
+      {d0, _p0, log0} = Interpreter.expand(ast, %{}, [])
+      assert Enum.any?(d0, &(&1.kind == :exec))
+
+      # A re-pass re-emits the same body (so the materializer re-derives and
+      # merges it) without appending a second fire event.
+      {d1, _p1, log1} = Interpreter.expand(ast, %{}, log0)
+      assert Enum.any?(d1, &(&1.kind == :exec))
+      assert structural(d0) == structural(d1)
+      assert length(log1) == length(log0)
+    end
+  end
+
+  describe "map fan-out" do
+    test "emits one keyed child per element once the list resolves" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list repos" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      # unresolved: a single placeholder
+      {d0, pending, _l0} = Interpreter.expand(ast, %{}, [])
+      assert Enum.any?(d0, &(&1.kind == :map_fanout))
+      assert {:awaiting, "map-1", ["agent-0"]} in pending
+
+      # resolved: one exec per element, each binding the element literally
+      known = %{"agent-0" => %{"repos" => ["alpha", "beta"]}}
+      {d1, _p1, log} = Interpreter.expand(ast, known, [])
+
+      execs = Enum.filter(d1, &(&1.kind == :exec))
+      assert length(execs) == 2
+
+      targets = execs |> Enum.map(& &1.inputs["target"]) |> Enum.sort()
+      assert targets == [{:literal, "alpha"}, {:literal, "beta"}]
+
+      assert [%{observed: %{gate: :map, count: 2}}] = log
+      # children carry distinct ids derived from the fan-out key
+      assert execs |> Enum.map(& &1.id) |> Enum.uniq() |> length() == 2
+    end
+
+    test "an empty list resolves to zero children and no placeholder" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list repos" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      known = %{"agent-0" => %{"repos" => []}}
+      {delta, pending, log} = Interpreter.expand(ast, known, [])
+
+      # No body child and no leftover placeholder: an empty fan-out emits
+      # nothing for the materializer to schedule. The count event is still
+      # logged so a replay reproduces the zero-child decision.
+      refute Enum.any?(delta, &(&1.kind in [:exec, :map_fanout]))
+      assert pending == []
+      assert [%{observed: %{gate: :map, count: 0}}] = log
+    end
+
+    test "a non-list over folds to zero children rather than crashing" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "scalar" }
+          map ${seed.value} as it {
+            child <- exec "./n.sh" { v: ${it} }
+          }
+        }
+        """)
+
+      # A scalar where a list is expected is a typed mismatch surfaced as an
+      # empty fan-out, not an exception in the expand pass.
+      known = %{"agent-0" => %{"value" => "not-a-list"}}
+      {delta, _pending, log} = Interpreter.expand(ast, known, [])
+
+      refute Enum.any?(delta, &(&1.kind in [:exec, :map_fanout]))
+      assert [%{observed: %{gate: :map, over: :not_a_list}}] = log
+    end
+
+    test "re-expanding a fanned-out map re-emits identical children for an idempotent merge" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list repos" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      known = %{"agent-0" => %{"repos" => ["alpha", "beta"]}}
+
+      # The fan-out is a pure function of the resolved list, so two passes
+      # against the same known outputs emit byte-identical children. This is
+      # what lets the materializer re-emit on every `expand_dynamic` pass and
+      # merge by stable id without duplicating a child.
+      {d1, p1, l1} = Interpreter.expand(ast, known, [])
+      {d2, p2, l2} = Interpreter.expand(ast, known, [])
+      assert structural(d1) == structural(d2)
+      assert p1 == p2
+      assert l1 == l2
+    end
+  end
+
+  describe "determinism invariant" do
+    test "expand is a pure function of its inputs" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "go" }
+          when ${a.ok} {
+            b <- exec "./b.sh"
+          }
+          map ${a.items} as it {
+            c <- exec "./c.sh" { v: ${it} }
+          }
+        }
+        """)
+
+      known = %{"a" => nil, "agent-0" => %{"ok" => true, "items" => [1, 2, 3]}}
+
+      {d1, p1, l1} = Interpreter.expand(ast, known, [])
+      {d2, p2, l2} = Interpreter.expand(ast, known, [])
+
+      assert structural(d1) == structural(d2)
+      assert p1 == p2
+      assert l1 == l2
+    end
+  end
+
+  describe "bound gates" do
+    test "a bound when gate binds the resolved body node so downstream reads it" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "do" }
+          changed <- when ${a.flag} {
+            n <- exec "./n.sh"
+          }
+          post <- agent { engine: codex, model: "m", prompt: skill "s" { from: ${changed.path} } }
+        }
+        """)
+
+      known = %{"agent-0" => %{"flag" => true}}
+      {delta, _pending, _log} = Interpreter.expand(ast, known, [])
+
+      exec = Enum.find(delta, &(&1.kind == :exec))
+      post = Enum.find(delta, &(&1.kind == :agent and &1.id != "agent-0"))
+
+      assert exec, "the gate body exec should be emitted on the firing pass"
+      # The gate's binding (`changed`) must point at the body node, not the
+      # vanished placeholder, so the downstream edge resolves.
+      assert post.inputs["from"] == {:node, exec.id, ["path"]}
+      assert exec.id in post.deps
+    end
+
+    test "a bound every_nth gate binds the body node on the firing tick" do
+      ast =
+        parse!("""
+        workflow "w" {
+          tick <- every 1 of c {
+            n <- exec "./n.sh"
+          }
+          post <- agent { engine: codex, model: "m", prompt: skill "s" { from: ${tick.path} } }
+        }
+        """)
+
+      {delta, _pending, _log} = Interpreter.expand(ast, %{}, [])
+
+      exec = Enum.find(delta, &(&1.kind == :exec))
+      post = Enum.find(delta, &(&1.kind == :agent))
+
+      assert exec
+      assert post.inputs["from"] == {:node, exec.id, ["path"]}
+      assert exec.id in post.deps
+    end
+
+    test "the gate placeholder is gone once the when input resolves" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "do" }
+          when ${a.changed} {
+            n <- exec "./n.sh"
+          }
+        }
+        """)
+
+      known = %{"agent-0" => %{"changed" => true}}
+      {delta, _pending, _log} = Interpreter.expand(ast, known, [])
+
+      refute Enum.any?(delta, &(&1.kind == :gate))
+      assert Enum.any?(delta, &(&1.kind == :exec))
+    end
+  end
+
+  describe "deferred inline prompts" do
+    test "an inline prompt over an unresolved output defers, then folds to text" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          b <- agent { engine: codex, model: "m", prompt: inline "use ${a.result} now" }
+        }
+        """)
+
+      {d0, pending, _l0} = Interpreter.expand(ast, %{}, [])
+      b0 = Enum.find(d0, &(&1.id == "agent-1"))
+      assert b0.prompt_ref == {:inline, nil}
+      assert {:awaiting, "agent-1", ["agent-0"]} in pending
+
+      known = %{"agent-0" => %{"result" => "X"}}
+      {d1, _p1, _l1} = Interpreter.expand(ast, known, [])
+      b1 = Enum.find(d1, &(&1.id == "agent-1"))
+      assert b1.prompt_ref == {:inline, "use X now"}
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/dsl/parser_test.exs b/packages/symphony/elixir/test/symphony_elixir/dsl/parser_test.exs
new file mode 100644
index 000000000..0c27c897e
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/dsl/parser_test.exs
@@ -0,0 +1,195 @@
+defmodule SymphonyElixir.DSL.ParserTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.DSL.Parser
+
+  @fixture Path.join(__DIR__, "fixtures/release.sym")
+
+  describe "parse/1" do
+    test "parses the release fixture into a workflow AST" do
+      source = File.read!(@fixture)
+      assert {:ok, ast} = Parser.parse(source)
+
+      assert ast.kind == :workflow
+      assert ast.name == "release"
+      # three agent binds plus one when gate
+      assert length(ast.statements) == 4
+    end
+
+    test "binds introduce names and effects carry envelope and prompt" do
+      source = """
+      workflow "one" {
+        session <- agent {
+          engine: codex
+          model: "gpt-5.3-codex"
+          permissions: workspace_write
+          prompt: skill "inspect" { repo: "symphony" }
+        }
+      }
+      """
+
+      assert {:ok, ast} = Parser.parse(source)
+      assert [{:bind, "session", agent}] = ast.statements
+      assert agent.kind == :agent
+      assert agent.envelope == %{"engine" => "codex", "model" => "gpt-5.3-codex", "permissions" => "workspace_write"}
+      assert {:skill, "inspect", %{"repo" => {:literal, "symphony"}}} = agent.prompt
+    end
+
+    test "inline prompt interpolation lowers to a field read over a binding" do
+      source = """
+      workflow "two" {
+        a <- agent { engine: codex, model: "m", prompt: inline "x" }
+        b <- agent { engine: codex, model: "m", prompt: inline "use ${a.area} now" }
+      }
+      """
+
+      assert {:ok, ast} = Parser.parse(source)
+      assert [_a, {:bind, "b", agent_b}] = ast.statements
+      assert {:inline, {:concat, parts}} = agent_b.prompt
+      assert {:literal, "use "} = Enum.at(parts, 0)
+      assert {:field, {:var, "a"}, ["area"]} = Enum.at(parts, 1)
+      assert {:literal, " now"} = Enum.at(parts, 2)
+    end
+
+    test "every and map and exec parse with their combinator shape" do
+      source = """
+      workflow "combos" {
+        every 3 of gc_counter {
+          gc <- exec "./gc.sh" timeout 60
+        }
+
+        map ${seed.repos} as repo {
+          child <- subrun "audit.sym" { target: ${repo} }
+        }
+      }
+      """
+
+      assert {:ok, ast} = Parser.parse(source)
+      assert [every, map] = ast.statements
+      assert every.kind == :every_nth
+      assert every.n == 3
+      assert every.counter == "gc_counter"
+      assert {:bind, "gc", %{kind: :exec, timeout: {:literal, 60}}} = every.body
+
+      assert map.kind == :map
+      assert map.as == "repo"
+      assert {:field, {:var, "seed"}, ["repos"]} = map.over
+      assert {:bind, "child", %{kind: :subrun}} = map.body
+    end
+
+    test "diagnostics carry a 1-based line and column" do
+      source = """
+      workflow "bad" {
+        x <- agent {
+          engine: codex
+          model: "m"
+        }
+        oops
+      }
+      """
+
+      assert {:error, diag} = Parser.parse(source)
+      assert is_binary(diag.message)
+      assert is_integer(diag.line) and diag.line >= 1
+      assert is_integer(diag.column) and diag.column >= 1
+    end
+
+    test "the diagnostic carries the file name a caller passes" do
+      source = ~s(workflow "bad" { oops })
+
+      assert {:error, diag} = Parser.parse(source, file: "bad.sym")
+      assert diag.file == "bad.sym"
+
+      # An anonymous string parse has no file.
+      assert {:error, anon} = Parser.parse(source)
+      assert anon.file == nil
+    end
+
+    test "a tokenizer error also carries the caller's file name" do
+      # The unterminated string fails in the lexer, before a parse state
+      # exists; the file still lands on the diagnostic.
+      source = ~s(workflow "u" {\n  x <- agent { engine: codex, model: "oops\n}\n)
+
+      assert {:error, diag} = Parser.parse(source, file: "u.sym")
+      assert diag.file == "u.sym"
+      assert diag.message =~ "string"
+    end
+
+    test "a missing prompt is a load error" do
+      source = """
+      workflow "np" {
+        x <- agent { engine: codex, model: "m" }
+      }
+      """
+
+      assert {:error, diag} = Parser.parse(source)
+      assert diag.message =~ "prompt"
+    end
+
+    test "an unterminated string reports the open position" do
+      source = ~s(workflow "u" {\n  x <- agent { engine: codex, model: "oops\n}\n)
+      assert {:error, diag} = Parser.parse(source)
+      assert diag.message =~ "string"
+      assert diag.line == 2
+    end
+  end
+
+  describe "trigger header" do
+    defp parse!(source) do
+      {:ok, ast} = Parser.parse(source)
+      ast
+    end
+
+    test "a workflow with no `on` clause has a nil trigger" do
+      assert parse!(~s(workflow "w" { a <- agent { engine: codex, model: "m", prompt: inline "go" } })).trigger ==
+               nil
+    end
+
+    test "manual" do
+      assert parse!(~s(workflow "w" on manual { a <- exec "./x.sh" })).trigger == %{kind: :manual}
+    end
+
+    test "linear normalizes the label" do
+      assert parse!(~s(workflow "w" on linear label "[Sym] Implement" { a <- exec "./x.sh" })).trigger ==
+               %{kind: :linear, label: "[sym] implement"}
+    end
+
+    test "cron carries schedule, timezone, and input" do
+      source = ~s|workflow "w" on cron "0 9 * * *" tz "UTC" input { lookback_hours: 5 } { a <- exec "./x.sh" }|
+
+      assert parse!(source).trigger == %{
+               kind: :cron,
+               schedule: "0 9 * * *",
+               timezone: "UTC",
+               input: %{"lookback_hours" => 5}
+             }
+    end
+
+    test "cron defaults the timezone and input when omitted" do
+      assert parse!(~s|workflow "w" on cron "* * * * *" { a <- exec "./x.sh" }|).trigger == %{
+               kind: :cron,
+               schedule: "* * * * *",
+               timezone: "UTC",
+               input: %{}
+             }
+    end
+
+    test "slack_huddle and slack_mention map to the runtime kinds" do
+      assert parse!(~s(workflow "w" on slack_huddle channel "focus" { a <- exec "./x.sh" })).trigger ==
+               %{kind: :slack_huddle_completed, channel: "focus"}
+
+      assert parse!(~s(workflow "w" on slack_mention channel "#playbook" { a <- exec "./x.sh" })).trigger ==
+               %{kind: :slack_app_mention, channel: "#playbook"}
+    end
+
+    test "github_pr_label carries repo and normalized label" do
+      assert parse!(~s(workflow "w" on github_pr_label repo "indexable-inc/ix" label "Review-Loop" { a <- exec "./x.sh" })).trigger ==
+               %{kind: :github_pr_label, repo: "indexable-inc/ix", label: "review-loop"}
+    end
+
+    test "an unknown trigger kind is a diagnostic" do
+      assert {:error, diag} = Parser.parse(~s(workflow "w" on telepathy { a <- exec "./x.sh" }))
+      assert diag.message =~ "trigger kind"
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/dsl/schema_test.exs b/packages/symphony/elixir/test/symphony_elixir/dsl/schema_test.exs
new file mode 100644
index 000000000..02f0cd511
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/dsl/schema_test.exs
@@ -0,0 +1,61 @@
+defmodule SymphonyElixir.DSL.SchemaTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.DSL.{AST, Parser, Schema}
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.Node
+
+  describe "to_map/0" do
+    test "each field is the owner's accessor verbatim" do
+      # The point of the schema is that it does not restate the enums: it
+      # reads each owner's single source of truth. Asserting equality here
+      # means adding a value at the owner flows through with no schema edit
+      # and no UI edit.
+      schema = Schema.to_map()
+
+      assert schema.engines == Envelope.engines()
+      assert schema.efforts == Envelope.efforts()
+      assert schema.permissions == Envelope.permission_levels()
+      assert schema.locations == Envelope.locations()
+      assert schema.node_kinds == Node.kinds()
+      assert schema.node_states == Node.states()
+      assert schema.effect_kinds == AST.effect_kinds()
+      assert schema.trigger_kinds == Parser.trigger_kinds()
+    end
+
+    test "every value is a list of atoms, so it encodes to JSON as strings" do
+      schema = Schema.to_map()
+
+      for {_key, values} <- schema do
+        assert is_list(values)
+        assert Enum.all?(values, &is_atom/1)
+      end
+
+      assert {:ok, _json} = Jason.encode(schema)
+    end
+  end
+
+  describe "trigger_kinds/0" do
+    test "every advertised trigger kind parses through an `on` clause" do
+      # Guard against the accessor drifting from the parser's dispatch: a
+      # kind the schema offers but the parser rejects would be a dead UI
+      # option. Each kind gets its minimal valid params.
+      params = %{
+        manual: "",
+        cron: ~s|"0 * * * *"|,
+        linear: ~s|label "ready"|,
+        slack_huddle: ~s|channel "C123"|,
+        slack_mention: ~s|channel "C123"|,
+        github_pr_label: ~s|repo "owner/name" label "ship"|
+      }
+
+      for kind <- Parser.trigger_kinds() do
+        clause = Map.fetch!(params, kind)
+        source = ~s|workflow "w" on #{kind} #{clause} { a <- exec "./x" }|
+
+        assert {:ok, ast} = Parser.parse(source), "expected #{kind} to parse"
+        assert is_map(ast.trigger)
+      end
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/engine/client_test.exs b/packages/symphony/elixir/test/symphony_elixir/engine/client_test.exs
new file mode 100644
index 000000000..4e3cee69e
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/engine/client_test.exs
@@ -0,0 +1,278 @@
+defmodule SymphonyElixir.Engine.ClientTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.{Client, Envelope}
+
+  describe "request_body/2" do
+    test "lowers a codex envelope to the camelCase TurnRequest wire shape" do
+      {:ok, env} =
+        Envelope.validate(%Envelope{
+          engine: :codex,
+          model: "gpt-5.3-codex",
+          effort: :high,
+          permissions: :workspace_write,
+          location: :local
+        })
+
+      assert {:ok, body} =
+               Client.request_body(env, %{
+                 prompt: "write FOO to ./hello.txt and stop.",
+                 cwd: "/workspace",
+                 run_id: "run_1",
+                 node_id: "n0"
+               })
+
+      assert body == %{
+               "engine" => "codex",
+               "model" => "gpt-5.3-codex",
+               "effort" => "high",
+               "permissions" => "workspace_write",
+               "cwd" => "/workspace",
+               "prompt" => "write FOO to ./hello.txt and stop.",
+               "tools" => [],
+               "runId" => "run_1",
+               "nodeId" => "n0"
+             }
+    end
+
+    test "omits effort when the envelope leaves it nil" do
+      {:ok, env} =
+        Envelope.validate(%Envelope{engine: :claude, model: "haiku", permissions: :danger_full_access, location: :local})
+
+      assert {:ok, body} = Client.request_body(env, %{prompt: "hi", cwd: "/w"})
+      refute Map.has_key?(body, "effort")
+      assert body["engine"] == "claude"
+      assert body["permissions"] == "danger_full_access"
+    end
+
+    test "drops nil correlation ids rather than sending null" do
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :local})
+      assert {:ok, body} = Client.request_body(env, %{prompt: "hi", cwd: "/w"})
+      refute Map.has_key?(body, "runId")
+      refute Map.has_key?(body, "nodeId")
+    end
+
+    test "rejects a turn missing the prompt or cwd" do
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :local})
+      assert {:error, :missing_prompt} = Client.request_body(env, %{cwd: "/w"})
+      assert {:error, :missing_cwd} = Client.request_body(env, %{prompt: "hi"})
+    end
+  end
+
+  describe "submit_turn/3 location resolution" do
+    test "a host location resolves to the run's per-run room-server from the placement module" do
+      test_pid = self()
+
+      # The run's `Runtime.Placement` provisioned a host room-server (a
+      # systemd-run unit) and registered its loopback URL under run_id. The
+      # client reads it back the same way it resolves :ixvm; no real unit.
+      defmodule HostPlacement do
+        def base_url("run_host"), do: {:ok, "http://127.0.0.1:41234"}
+        def base_url(_), do: :error
+      end
+
+      {:ok, host} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:host, "box"}})
+
+      plug = fn conn ->
+        send(test_pid, {:hit, conn.host, conn.port})
+        respond(conn, %{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0})
+      end
+
+      assert {:ok, _} =
+               Client.submit_turn(host, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://ignored.default",
+                 run_id: "run_host",
+                 placement: HostPlacement,
+                 req_options: [plug: plug]
+               )
+
+      assert_received {:hit, "127.0.0.1", 41_234}
+    end
+
+    test "a host location with no acquired placement fails loudly rather than routing to the default" do
+      defmodule UnresolvedHostPlacement do
+        def base_url(_run_id), do: :error
+      end
+
+      {:ok, host} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:host, "box"}})
+
+      assert {:error, {:unresolved_location, {:host, "box"}}} =
+               Client.submit_turn(host, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://ignored.default",
+                 run_id: "run_unknown",
+                 placement: UnresolvedHostPlacement
+               )
+    end
+
+    test "a host location without a run_id is unresolved (no context to look up)" do
+      {:ok, host} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:host, "box"}})
+
+      assert {:error, {:unresolved_location, {:host, "box"}}} =
+               Client.submit_turn(host, %{prompt: "hi", cwd: "/w"}, room_server_url: "http://ignored.default")
+    end
+
+    test "an ixvm location resolves to the run's per-run room-server from the placement module" do
+      test_pid = self()
+
+      # Stub the placement lookup: the run's `Runtime.Placement` would have
+      # provisioned this URL before the first agent turn. No real VM is
+      # created; the client just reads the resolved per-run base URL.
+      defmodule StubPlacement do
+        def base_url("run_42"), do: {:ok, "http://run-42-vm.test:8080"}
+        def base_url(_), do: :error
+      end
+
+      {:ok, ixvm} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :ixvm})
+
+      plug = fn conn ->
+        send(test_pid, {:hit, conn.host, conn.port})
+        respond(conn, %{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0})
+      end
+
+      assert {:ok, _} =
+               Client.submit_turn(ixvm, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://ignored.default",
+                 run_id: "run_42",
+                 placement: StubPlacement,
+                 req_options: [plug: plug]
+               )
+
+      assert_received {:hit, "run-42-vm.test", 8080}
+    end
+
+    test "an ixvm location with no acquired placement fails loudly rather than routing to the default" do
+      defmodule UnresolvedPlacement do
+        def base_url(_run_id), do: :error
+      end
+
+      {:ok, ixvm} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :ixvm})
+
+      assert {:error, {:unresolved_location, :ixvm}} =
+               Client.submit_turn(ixvm, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://ignored.default",
+                 run_id: "run_unknown",
+                 placement: UnresolvedPlacement
+               )
+    end
+
+    test "an ixvm location without a run_id is unresolved (no context to look up)" do
+      {:ok, ixvm} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :ixvm})
+
+      assert {:error, {:unresolved_location, :ixvm}} =
+               Client.submit_turn(ixvm, %{prompt: "hi", cwd: "/w"}, room_server_url: "http://ignored.default")
+    end
+
+    test "a local location with no configured url is a clear error" do
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :local})
+      assert {:error, :missing_room_server_url} = Client.submit_turn(env, %{prompt: "hi", cwd: "/w"})
+    end
+  end
+
+  describe "submit_turn/3 against a stub room-server" do
+    test "maps an ok outcome to {:ok, %{thread_id, event_count}}" do
+      plug = stub_plug(%{"threadId" => "thread_abc", "outcome" => %{"kind" => "ok"}, "eventCount" => 4})
+      {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5.3-codex", location: :local})
+
+      assert {:ok, %{thread_id: "thread_abc", event_count: 4}} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://room.test",
+                 req_options: [plug: plug]
+               )
+    end
+
+    test "lowers the terminal usage totals to the IR.Attempt cost shape" do
+      plug =
+        stub_plug(%{
+          "threadId" => "thread_abc",
+          "outcome" => %{"kind" => "ok"},
+          "eventCount" => 4,
+          "usage" => %{
+            "tokensIn" => 1200,
+            "tokensOut" => 340,
+            "cacheRead" => 800,
+            "cacheCreation" => 64,
+            "costUsd" => 0.0123
+          }
+        })
+
+      {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5.3-codex", location: :local})
+
+      assert {:ok, %{cost: cost}} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://room.test",
+                 req_options: [plug: plug]
+               )
+
+      assert cost == %{
+               usd: 0.0123,
+               tokens_in: 1200,
+               tokens_out: 340,
+               cache_read: 800,
+               cache_creation: 64
+             }
+    end
+
+    test "a response without usage records an unknown (nil) cost" do
+      plug = stub_plug(%{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0})
+      {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5.3-codex", location: :local})
+
+      assert {:ok, %{cost: nil}} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://room.test",
+                 req_options: [plug: plug]
+               )
+    end
+
+    test "maps an error outcome to {:error, {:turn_error, message, thread_id}}" do
+      plug =
+        stub_plug(%{
+          "threadId" => "thread_err",
+          "outcome" => %{"kind" => "error", "message" => "model refused"},
+          "eventCount" => 1
+        })
+
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:room, "http://room.test"}})
+
+      assert {:error, {:turn_error, "model refused", "thread_err"}} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"}, req_options: [plug: plug])
+    end
+
+    test "an explicit {:room, url} location overrides the default url" do
+      test_pid = self()
+
+      plug = fn conn ->
+        send(test_pid, {:hit, conn.host, conn.port})
+        respond(conn, %{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0})
+      end
+
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:room, "http://chosen.test:9999"}})
+
+      assert {:ok, _} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://default.test",
+                 req_options: [plug: plug]
+               )
+
+      assert_received {:hit, "chosen.test", 9999}
+    end
+
+    test "a non-2xx status surfaces as an agent_turn_status error" do
+      plug = fn conn -> Plug.Conn.send_resp(conn, 503, "engine claude not configured") end
+      {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: :local})
+
+      assert {:error, {:agent_turn_status, 503, _}} =
+               Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+                 room_server_url: "http://room.test",
+                 req_options: [plug: plug]
+               )
+    end
+  end
+
+  defp stub_plug(json), do: fn conn -> respond(conn, json) end
+
+  defp respond(conn, json) do
+    conn
+    |> Plug.Conn.put_resp_content_type("application/json")
+    |> Plug.Conn.send_resp(200, Jason.encode!(json))
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/engine/contract_fixtures_test.exs b/packages/symphony/elixir/test/symphony_elixir/engine/contract_fixtures_test.exs
new file mode 100644
index 000000000..2de999602
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/engine/contract_fixtures_test.exs
@@ -0,0 +1,85 @@
+defmodule SymphonyElixir.Engine.ContractFixturesTest do
+  @moduledoc """
+  The Elixir half of the cross-language contract guard (see
+  `docs/engine-contract.md`). It asserts that `Engine.Client.request_body/2`
+  reproduces the shared `contracts/fixtures/turn_request.json` shape the Rust
+  room-server parses, so a field rename on either side fails a check rather
+  than drifting silently at runtime.
+  """
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.{Client, Envelope}
+
+  # contracts/ sits at the repo root, four levels up from this test file.
+  @fixtures Path.expand(Path.join([__DIR__, "..", "..", "..", "..", "contracts", "fixtures"]))
+
+  defp fixture(name) do
+    @fixtures |> Path.join(name) |> File.read!() |> Jason.decode!()
+  end
+
+  test "request_body/2 reproduces the shared turn_request fixture" do
+    expected = fixture("turn_request.json")
+
+    {:ok, envelope} =
+      Envelope.from_map(%{
+        "engine" => expected["engine"],
+        "model" => expected["model"],
+        "effort" => expected["effort"],
+        "permissions" => expected["permissions"],
+        "location" => "local"
+      })
+
+    turn = %{
+      prompt: expected["prompt"],
+      cwd: expected["cwd"],
+      tools: expected["tools"],
+      run_id: expected["runId"],
+      node_id: expected["nodeId"]
+    }
+
+    assert {:ok, body} = Client.request_body(envelope, turn)
+    # Compare on the wire shape (string keys, JSON scalars), not atom keys.
+    assert Jason.decode!(Jason.encode!(body)) == expected
+  end
+
+  test "submit_turn maps the shared agent_turn_response fixture's usage to cost" do
+    expected = fixture("agent_turn_response.json")
+    usage = expected["usage"]
+
+    plug = fn conn ->
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(200, Jason.encode!(expected))
+    end
+
+    {:ok, env} =
+      Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "location" => "local"})
+
+    assert {:ok, %{thread_id: thread_id, event_count: event_count, cost: cost}} =
+             Client.submit_turn(env, %{prompt: "hi", cwd: "/w"},
+               room_server_url: "http://room.test",
+               req_options: [plug: plug]
+             )
+
+    assert thread_id == expected["threadId"]
+    assert event_count == expected["eventCount"]
+
+    assert cost == %{
+             usd: usage["costUsd"],
+             tokens_in: usage["tokensIn"],
+             tokens_out: usage["tokensOut"],
+             cache_read: usage["cacheRead"],
+             cache_creation: usage["cacheCreation"]
+           }
+  end
+
+  test "an unset effort is omitted from the wire shape" do
+    {:ok, envelope} =
+      Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "location" => "local"})
+
+    turn = %{prompt: "go", cwd: "/w", tools: [], run_id: "r", node_id: "n"}
+
+    assert {:ok, body} = Client.request_body(envelope, turn)
+    refute Map.has_key?(body, "effort")
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/engine/envelope_test.exs b/packages/symphony/elixir/test/symphony_elixir/engine/envelope_test.exs
new file mode 100644
index 000000000..4d90e0ada
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/engine/envelope_test.exs
@@ -0,0 +1,73 @@
+defmodule SymphonyElixir.Engine.EnvelopeTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.Envelope
+
+  describe "from_map/1" do
+    test "builds a valid codex envelope and defaults permissions and location" do
+      assert {:ok, env} =
+               Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "effort" => "medium"})
+
+      assert env.engine == :codex
+      assert env.model == "gpt-5.3-codex"
+      assert env.effort == :medium
+      assert env.permissions == :workspace_write
+      assert env.location == :local
+    end
+
+    test "builds a valid claude envelope" do
+      assert {:ok, env} =
+               Envelope.from_map(%{
+                 "engine" => "claude",
+                 "model" => "claude-opus-4-8",
+                 "permissions" => "danger_full_access",
+                 "location" => "local"
+               })
+
+      assert env.engine == :claude
+      assert env.permissions == :danger_full_access
+    end
+
+    test "parses host and room locations" do
+      assert {:ok, %{location: {:host, "hari"}}} =
+               Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "location" => %{"host" => "hari"}})
+
+      assert {:ok, %{location: {:room, "https://r"}}} =
+               Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "location" => %{"room" => "https://r"}})
+    end
+
+    test "rejects a claude model under engine: codex" do
+      assert {:error, {:engine_model_mismatch, :codex, "opus"}} =
+               Envelope.from_map(%{"engine" => "codex", "model" => "opus"})
+    end
+
+    test "rejects a non-claude model under engine: claude" do
+      assert {:error, {:engine_model_mismatch, :claude, "gpt-5.3-codex"}} =
+               Envelope.from_map(%{"engine" => "claude", "model" => "gpt-5.3-codex"})
+    end
+
+    test "rejects unknown keys instead of silently ignoring them" do
+      assert {:error, {:unknown_envelope_keys, ["sandbox"]}} =
+               Envelope.from_map(%{"engine" => "claude", "model" => "opus", "sandbox" => "workspace-write"})
+    end
+
+    test "rejects an out-of-range effort" do
+      assert {:error, {:invalid_effort, "ultra"}} =
+               Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "effort" => "ultra"})
+    end
+
+    test "requires engine and model" do
+      assert {:error, {:missing_envelope_field, "engine"}} = Envelope.from_map(%{"model" => "opus"})
+      assert {:error, {:missing_envelope_field, "model"}} = Envelope.from_map(%{"engine" => "claude"})
+    end
+  end
+
+  describe "claude_model?/1" do
+    test "matches claude prefixes and aliases" do
+      assert Envelope.claude_model?("claude-opus-4-8")
+      assert Envelope.claude_model?("opus")
+      assert Envelope.claude_model?("SONNET")
+      refute Envelope.claude_model?("gpt-5.3-codex")
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/graph_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/graph_test.exs
new file mode 100644
index 000000000..1c93a67c2
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/graph_test.exs
@@ -0,0 +1,142 @@
+defmodule SymphonyElixir.IR.GraphTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.IR.{Graph, Node, RunGraph}
+
+  defp node(id, opts) do
+    Node.new(
+      [id: id, ast_origin: {:test, id}, kind: :exec, inputs: Keyword.get(opts, :inputs, %{})] ++
+        Keyword.take(opts, [:state])
+    )
+  end
+
+  defp graph(nodes) do
+    RunGraph.new("run-1", "hash", nil) |> RunGraph.put_nodes(nodes)
+  end
+
+  describe "ready_nodes/1" do
+    test "a node with no deps is ready immediately" do
+      g = graph([node("a", state: :pending)])
+      assert [%Node{id: "a"}] = Graph.ready_nodes(g)
+    end
+
+    test "parallel-ready siblings are both returned" do
+      g = graph([node("a", state: :pending), node("b", state: :pending)])
+      ids = g |> Graph.ready_nodes() |> Enum.map(& &1.id)
+      assert ids == ["a", "b"]
+    end
+
+    test "a dependent is not ready until its dep succeeds" do
+      dep_input = %{"x" => {:node, "a", []}}
+      g = graph([node("a", state: :pending), node("b", state: :pending, inputs: dep_input)])
+
+      assert g |> Graph.ready_nodes() |> Enum.map(& &1.id) == ["a"]
+
+      g = Graph.apply_output(g, "a", {:ok, %{result: 1}})
+      assert g |> Graph.ready_nodes() |> Enum.map(& &1.id) == ["b"]
+    end
+
+    test "running and terminal nodes are excluded" do
+      g =
+        graph([
+          node("a", state: :running),
+          node("b", state: :succeeded),
+          node("c", state: :pending)
+        ])
+
+      assert g |> Graph.ready_nodes() |> Enum.map(& &1.id) == ["c"]
+    end
+
+    test "fan-out: two independent dependents of one parent are both ready together" do
+      inputs = %{"x" => {:node, "a", []}}
+
+      g =
+        graph([
+          node("a", state: :succeeded),
+          node("b", state: :pending, inputs: inputs),
+          node("c", state: :pending, inputs: inputs)
+        ])
+
+      assert g |> Graph.ready_nodes() |> Enum.map(& &1.id) == ["b", "c"]
+    end
+  end
+
+  describe "apply_output/3" do
+    test "success marks the node succeeded and records output" do
+      g = graph([node("a", state: :running)]) |> Graph.apply_output("a", {:ok, :done})
+      assert g.nodes["a"].state == :succeeded
+      assert g.nodes["a"].output == :done
+    end
+
+    test "failure propagates :upstream_failed to a waiting dependent" do
+      inputs = %{"x" => {:node, "a", []}}
+      g = graph([node("a", state: :running), node("b", state: :pending, inputs: inputs)])
+
+      g = Graph.apply_output(g, "a", {:error, :boom})
+
+      assert g.nodes["a"].state == :failed
+      assert g.nodes["b"].state == :upstream_failed
+    end
+
+    test "failure propagates transitively through a chain" do
+      g =
+        graph([
+          node("a", state: :running),
+          node("b", state: :pending, inputs: %{"x" => {:node, "a", []}}),
+          node("c", state: :pending, inputs: %{"y" => {:node, "b", []}})
+        ])
+
+      g = Graph.apply_output(g, "a", {:error, :boom})
+
+      assert g.nodes["b"].state == :upstream_failed
+      assert g.nodes["c"].state == :upstream_failed
+    end
+
+    test "a dependent that opts to run on failure is not propagated to" do
+      inputs = %{"x" => {:node, "a", []}, "__on_failure__" => {:literal, true}}
+      g = graph([node("a", state: :running), node("b", state: :pending, inputs: inputs)])
+
+      g = Graph.apply_output(g, "a", {:error, :boom})
+
+      assert g.nodes["b"].state == :pending
+    end
+  end
+
+  describe "reset_node/2" do
+    test "returns a terminal node to :pending and clears output" do
+      g = graph([node("a", state: :failed)])
+      g = %{g | nodes: %{"a" => %{g.nodes["a"] | output: {:error, :x}}}}
+
+      g = Graph.reset_node(g, "a")
+
+      assert g.nodes["a"].state == :pending
+      assert g.nodes["a"].output == nil
+    end
+  end
+
+  describe "finish detection" do
+    test "all_terminal? is true only when every node is terminal" do
+      refute Graph.all_terminal?(graph([node("a", state: :running)]))
+      assert Graph.all_terminal?(graph([node("a", state: :succeeded), node("b", state: :skipped)]))
+    end
+
+    test "finished_status reflects failure and success" do
+      assert Graph.finished_status(graph([node("a", state: :succeeded)])) == :succeeded
+      assert Graph.finished_status(graph([node("a", state: :failed)])) == :failed
+      assert Graph.finished_status(graph([node("a", state: :running)])) == :running
+    end
+
+    test "an empty node map is a no-op run that finishes succeeded" do
+      # A workflow whose only construct gated its body off (`when` falsy,
+      # `every n` that did not fire this tick) materializes to zero nodes.
+      # That is a completed no-op, not a run still in progress, so the
+      # runtime can finish it instead of tripping the deadlock guard.
+      assert Graph.finished_status(graph([])) == :succeeded
+
+      # The empty map stays non-terminal so the runtime never declares a run
+      # done before its first materialization; the two invariants are the
+      # deliberate pair the runtime relies on.
+      refute Graph.all_terminal?(graph([]))
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/materializer_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/materializer_test.exs
new file mode 100644
index 000000000..9702af520
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/materializer_test.exs
@@ -0,0 +1,296 @@
+defmodule SymphonyElixir.IR.MaterializerTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.DSL.Parser
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Graph, Materializer, RunGraph}
+
+  defp parse!(source) do
+    {:ok, ast} = Parser.parse(source)
+    ast
+  end
+
+  describe "materialize/3" do
+    test "builds a running RunGraph with the static nodes and a lowered envelope" do
+      ast =
+        parse!("""
+        workflow "w" {
+          run <- agent { engine: codex, model: "gpt-5.3-codex", permissions: workspace_write, prompt: inline "go" }
+        }
+        """)
+
+      assert {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      assert graph.status == :running
+      assert [node] = Map.values(graph.nodes)
+      assert node.kind == :agent
+      # The raw spec map is lowered to a typed Engine.Envelope at this boundary.
+      assert %Envelope{engine: :codex, model: "gpt-5.3-codex", permissions: :workspace_write} = node.envelope
+    end
+
+    test "an invalid envelope fails the whole materialization with the node id" do
+      # A claude-looking model under engine: codex is a load error.
+      ast =
+        parse!("""
+        workflow "w" {
+          run <- agent { engine: codex, model: "claude-opus-4", prompt: inline "go" }
+        }
+        """)
+
+      assert {:error, {:invalid_envelope, "agent-0", {:engine_model_mismatch, :codex, "claude-opus-4"}}} =
+               Materializer.materialize("run_1", "hash", ast)
+    end
+  end
+
+  describe "expand_dynamic/1" do
+    test "a when-gate emits its body once the gating output is known" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          when ${a.ok} {
+            b <- agent { engine: codex, model: "m", prompt: inline "second" }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+
+      # Before a's output is known, the gate is a placeholder and the body
+      # agent node is absent (only agent-0 plus the gate exist).
+      assert Enum.any?(Map.values(graph.nodes), &(&1.kind == :gate))
+      agents_before = for {id, %{kind: :agent}} <- graph.nodes, do: id
+      assert agents_before == ["agent-0"]
+
+      # Succeed a with a truthy `ok`, then re-expand.
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"ok" => true}})
+      assert {:ok, expanded, new_ids} = Materializer.expand_dynamic(graph)
+
+      # Exactly one new agent node (the gate body) appears.
+      assert [body_id] = new_ids
+      assert expanded.nodes[body_id].kind == :agent
+      # The resolved gate placeholder is retired so it cannot deadlock the run.
+      gate = Enum.find(Map.values(expanded.nodes), &(&1.kind == :gate))
+      assert gate.state == :skipped
+    end
+
+    test "a falsey when-gate emits no body and retires the placeholder" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          when ${a.ok} {
+            b <- agent { engine: codex, model: "m", prompt: inline "second" }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"ok" => false}})
+
+      assert {:ok, expanded, new_ids} = Materializer.expand_dynamic(graph)
+      assert new_ids == []
+      refute Map.has_key?(expanded.nodes, "agent-1")
+      assert Enum.find(Map.values(expanded.nodes), &(&1.kind == :gate)).state == :skipped
+    end
+
+    test "re-expansion is idempotent: a second pass adds nothing new" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          when ${a.ok} {
+            b <- agent { engine: codex, model: "m", prompt: inline "second" }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"ok" => true}})
+
+      {:ok, once, _} = Materializer.expand_dynamic(graph)
+      {:ok, twice, second_ids} = Materializer.expand_dynamic(once)
+
+      assert second_ids == []
+      assert Map.keys(once.nodes) |> Enum.sort() == Map.keys(twice.nodes) |> Enum.sort()
+    end
+
+    test "a map fan-out emits one child per element and retires the placeholder" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+
+      # Before the list is known the fan-out is a single placeholder and no
+      # body child exists.
+      assert Enum.any?(Map.values(graph.nodes), &(&1.kind == :map_fanout))
+      refute Enum.any?(Map.values(graph.nodes), &(&1.kind == :exec))
+
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"repos" => ["alpha", "beta", "gamma"]}})
+      assert {:ok, expanded, new_ids} = Materializer.expand_dynamic(graph)
+
+      # One exec child per element, each carrying its element literally, with
+      # distinct content-derived ids.
+      children = for {_id, %{kind: :exec} = n} <- expanded.nodes, do: n
+      assert length(children) == 3
+      assert length(new_ids) == 3
+      targets = children |> Enum.map(& &1.inputs["target"]) |> Enum.sort()
+      assert targets == [{:literal, "alpha"}, {:literal, "beta"}, {:literal, "gamma"}]
+
+      # The resolved fan-out placeholder is retired so it cannot deadlock the run.
+      assert Enum.find(Map.values(expanded.nodes), &(&1.kind == :map_fanout)).state == :skipped
+    end
+
+    test "re-expanding a fanned-out map merges idempotently, adding nothing new" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"repos" => ["alpha", "beta"]}})
+
+      {:ok, once, first_ids} = Materializer.expand_dynamic(graph)
+      assert length(first_ids) == 2
+
+      # A second pass re-emits the same children (the interpreter re-derives
+      # them deterministically), but the merge-by-id adds nothing because the
+      # ids already exist. This confirms the Phase 7 agent's belief that
+      # re-emitting children on each pass merges idempotently.
+      {:ok, twice, second_ids} = Materializer.expand_dynamic(once)
+      assert second_ids == []
+      assert Map.keys(once.nodes) |> Enum.sort() == Map.keys(twice.nodes) |> Enum.sort()
+    end
+
+    test "a map over an empty list emits no children and retires the placeholder" do
+      ast =
+        parse!("""
+        workflow "w" {
+          seed <- agent { engine: codex, model: "m", prompt: inline "list" }
+          map ${seed.repos} as repo {
+            child <- exec "./audit.sh" { target: ${repo} }
+          }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"repos" => []}})
+
+      assert {:ok, expanded, new_ids} = Materializer.expand_dynamic(graph)
+      assert new_ids == []
+      refute Enum.any?(Map.values(expanded.nodes), &(&1.kind == :exec))
+      assert Enum.find(Map.values(expanded.nodes), &(&1.kind == :map_fanout)).state == :skipped
+    end
+
+    test "a graph without a workflow AST is returned unchanged" do
+      graph = RunGraph.new("run_1", "hash", nil)
+      assert {:ok, ^graph, []} = Materializer.expand_dynamic(graph)
+    end
+
+    test "a deferred inline prompt waits on its input and folds to text once the output arrives" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          b <- agent { engine: codex, model: "m", prompt: inline "use ${a.result} now" }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+
+      # b interpolates a's output. The interpreter cannot fold the mixed
+      # literal/node concat into one input ref, so the edge arrives via the
+      # pending set; the materializer must still make b depend on a so it
+      # does not run with an unresolved prompt.
+      b = graph.nodes["agent-1"]
+      assert b.prompt_ref == {:inline, nil}
+      assert "agent-0" in b.deps
+      refute Enum.any?(Graph.ready_nodes(graph), &(&1.id == "agent-1"))
+
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"result" => "X"}})
+      assert {:ok, expanded, _ids} = Materializer.expand_dynamic(graph)
+
+      b = expanded.nodes["agent-1"]
+      assert b.prompt_ref == {:inline, "use X now"}
+      assert b.state == :pending
+      # The edge is kept for provenance even though the prompt now folds to a
+      # literal; it points at the succeeded agent-0 so b is schedulable.
+      assert "agent-0" in b.deps
+      assert Enum.any?(Graph.ready_nodes(expanded), &(&1.id == "agent-1"))
+    end
+
+    test "a deferred skill binding folds to the resolved value on re-expansion" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          b <- agent { engine: codex, model: "m", prompt: skill "next" { ctx: ${a.area} } }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+
+      # A skill binding that reads a node output already carries the edge as
+      # an input ref, so b depends on a from the first pass.
+      b = graph.nodes["agent-1"]
+      assert "agent-0" in b.deps
+      assert b.inputs["ctx"] == {:node, "agent-0", ["area"]}
+      assert {:skill, "next", %{"ctx" => unresolved}} = b.prompt_ref
+      refute unresolved == "DB"
+
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"area" => "DB"}})
+      assert {:ok, expanded, _ids} = Materializer.expand_dynamic(graph)
+
+      b = expanded.nodes["agent-1"]
+      assert b.prompt_ref == {:skill, "next", %{"ctx" => "DB"}}
+      assert b.inputs["ctx"] == {:literal, "DB"}
+    end
+
+    test "a node already running is not clobbered by re-expansion" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+          b <- agent { engine: codex, model: "m", prompt: inline "use ${a.result} now" }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      # Force agent-0 into a live state, then re-expand: a running/terminal
+      # node keeps its state and is never replaced by the fresh expansion.
+      graph = put_in(graph.nodes["agent-0"].state, :running)
+
+      assert {:ok, expanded, new_ids} = Materializer.expand_dynamic(graph)
+      assert new_ids == []
+      assert expanded.nodes["agent-0"].state == :running
+    end
+  end
+
+  describe "known_outputs/1" do
+    test "exposes only succeeded node outputs" do
+      ast =
+        parse!("""
+        workflow "w" {
+          a <- agent { engine: codex, model: "m", prompt: inline "first" }
+        }
+        """)
+
+      {:ok, graph} = Materializer.materialize("run_1", "hash", ast)
+      assert Materializer.known_outputs(graph) == %{}
+
+      graph = Graph.apply_output(graph, "agent-0", {:ok, %{"area" => 42}})
+      assert Materializer.known_outputs(graph) == %{"agent-0" => %{"area" => 42}}
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/recovery_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/recovery_test.exs
new file mode 100644
index 000000000..f402ec856
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/recovery_test.exs
@@ -0,0 +1,107 @@
+defmodule SymphonyElixir.IR.RecoveryTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.IR.{Attempt, Graph, Node, RunGraph}
+  alias SymphonyElixir.Runtime.Recovery
+
+  defp node(id, opts) do
+    Node.new(
+      [id: id, ast_origin: {:test, id}, kind: :agent, inputs: Keyword.get(opts, :inputs, %{})] ++
+        Keyword.take(opts, [:state, :attempts])
+    )
+  end
+
+  defp running_with_thread(id, thread_id, opts \\ []) do
+    attempt = Attempt.start(1, :codex, thread_id)
+    node(id, Keyword.merge([state: :running, attempts: [attempt]], opts))
+  end
+
+  defp graph(nodes), do: RunGraph.new("r", "h", {:ast, []}) |> RunGraph.put_nodes(nodes)
+
+  describe "replay/2" do
+    test "replaying an expansion log reproduces the same node set deterministically" do
+      base = graph([node("root", state: :succeeded)])
+
+      log =
+        base
+        |> RunGraph.append_expansion({:fanout, "f"}, [:a, :b], ["child-a", "child-b"])
+
+      expand = fn {:fanout, "f"}, elements, _nodes ->
+        Enum.map(elements, fn e -> node("child-#{e}", state: :pending) end)
+      end
+
+      one = Recovery.replay(log, expand)
+      two = Recovery.replay(log, expand)
+
+      assert Map.keys(one.nodes) |> Enum.sort() == ["child-a", "child-b", "root"]
+      assert Map.keys(one.nodes) == Map.keys(two.nodes)
+    end
+
+    test "the default expander leaves a statically-materialized graph unchanged" do
+      g = graph([node("a", state: :pending), node("b", state: :pending)])
+      assert Recovery.replay(g).nodes == g.nodes
+    end
+  end
+
+  describe "reconcile/2 reattach probe" do
+    test "a :running node the engine still owns is left running" do
+      g = graph([running_with_thread("a", "t1")])
+      out = Recovery.reconcile(g, fn "t1" -> :running end)
+      assert out.nodes["a"].state == :running
+    end
+
+    test "a :running node the engine finished is harvested" do
+      g = graph([running_with_thread("a", "t1")])
+      out = Recovery.reconcile(g, fn "t1" -> {:finished, {:ok, :harvested}} end)
+      assert out.nodes["a"].state == :succeeded
+      assert out.nodes["a"].output == :harvested
+    end
+  end
+
+  describe "reconcile/2 strand policy (#90, non-idempotent safety)" do
+    test "an unknown thread with an opened thread_id is stranded, never auto-retried" do
+      # opted in but the attempt recorded a thread_id, so a side effect may
+      # have happened: route to human review, do not blind-retry.
+      g = graph([running_with_thread("a", "t1", inputs: %{"__retry__" => {:literal, true}})])
+      out = Recovery.reconcile(g, fn "t1" -> :unknown end)
+      assert out.nodes["a"].state == :stranded
+    end
+
+    test "an opted-in node with no observed side effect is auto-retried" do
+      attempt = Attempt.start(1, :codex, nil)
+
+      g =
+        graph([
+          node("a",
+            state: :running,
+            attempts: [attempt],
+            inputs: %{"__retry__" => {:literal, true}}
+          )
+        ])
+
+      out = Recovery.reconcile(g, fn nil -> :unknown end)
+      assert out.nodes["a"].state == :retrying
+    end
+
+    test "a node that did not opt in is stranded even with no side effect" do
+      attempt = Attempt.start(1, :codex, nil)
+      g = graph([node("a", state: :running, attempts: [attempt])])
+      out = Recovery.reconcile(g, fn nil -> :unknown end)
+      assert out.nodes["a"].state == :stranded
+    end
+
+    test "the stranded attempt is recorded on the node" do
+      g = graph([running_with_thread("a", "t1")])
+      out = Recovery.reconcile(g, fn "t1" -> :unknown end)
+      [att] = out.nodes["a"].attempts
+      assert att.state == :stranded
+      assert att.outcome == :stranded
+    end
+
+    test "after reconcile no node remains :running" do
+      g = graph([running_with_thread("a", "t1"), running_with_thread("b", "t2")])
+      out = Recovery.reconcile(g, fn _ -> :unknown end)
+      refute Enum.any?(Graph.running_nodes(out))
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/run_notifier_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/run_notifier_test.exs
new file mode 100644
index 000000000..7eddf8d81
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/run_notifier_test.exs
@@ -0,0 +1,157 @@
+defmodule SymphonyElixir.IR.RunNotifierTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Config
+  alias SymphonyElixir.IR.Attempt
+  alias SymphonyElixir.IR.Node
+  alias SymphonyElixir.IR.RunGraph
+  alias SymphonyElixir.IR.RunNotifier
+
+  defp graph(attrs) do
+    defaults = %{run_id: "triage-1780166452589-58", source_hash: "hash", status: :succeeded, nodes: %{}}
+    struct(RunGraph, Map.merge(defaults, Map.new(attrs)))
+  end
+
+  # A succeeded agent node carrying one attempt with the given room-server
+  # thread id, so the run-details link can resolve a deep link.
+  defp agent_node(id, thread_id) do
+    %Node{
+      id: id,
+      ast_origin: {:agent, id},
+      kind: :agent,
+      inputs: [],
+      deps: [],
+      state: :succeeded,
+      attempts: [%Attempt{n: 1, engine: :codex, thread_id: thread_id, state: :succeeded, started_at: ~U[2026-06-04 00:00:00Z]}]
+    }
+  end
+
+  # The notifier only reads the two cron-policy fields; default to the
+  # production defaults (failures on, no success allowlist) unless overridden.
+  defp config(attrs \\ %{}) do
+    defaults = %{slack_notify_cron_failures: true, slack_notify_cron_workflows: []}
+    struct(Config, Map.merge(defaults, Map.new(attrs)))
+  end
+
+  describe "notify?/2" do
+    test "skips non-terminal runs" do
+      refute RunNotifier.notify?(graph(status: :running, trigger: %{kind: :linear}), config())
+      refute RunNotifier.notify?(graph(status: :pending, trigger: %{kind: :linear}), config())
+    end
+
+    test "skips cancelled runs" do
+      refute RunNotifier.notify?(graph(status: :cancelled, trigger: %{kind: :linear}), config())
+    end
+
+    test "notifies on terminal non-cron runs" do
+      assert RunNotifier.notify?(graph(status: :succeeded, trigger: %{kind: :linear}), config())
+      assert RunNotifier.notify?(graph(status: :failed, trigger: %{kind: :manual}), config())
+      # Absent trigger is not cron, so it notifies.
+      assert RunNotifier.notify?(graph(status: :succeeded, trigger: nil), config())
+    end
+
+    test "suppresses cron successes unless the workflow is allowlisted" do
+      run = graph(run_id: "digest-100-2", status: :succeeded, trigger: %{kind: :cron})
+
+      refute RunNotifier.notify?(run, config())
+      assert RunNotifier.notify?(run, config(slack_notify_cron_workflows: ["digest"]))
+    end
+
+    test "notifies on cron failures by default and suppresses them when disabled" do
+      # A store round-trip leaves the kind string-keyed; it must still be
+      # treated as cron.
+      run = graph(run_id: "babysit-dispatch-100-2", status: :failed, trigger: %{"kind" => "cron"})
+
+      assert RunNotifier.notify?(run, config())
+      refute RunNotifier.notify?(run, config(slack_notify_cron_failures: false))
+    end
+
+    test "a tight-interval cron success stays quiet even when failures are enabled" do
+      run = graph(run_id: "babysit-dispatch-100-2", status: :succeeded, trigger: %{kind: :cron})
+
+      refute RunNotifier.notify?(run, config(slack_notify_cron_failures: true))
+    end
+
+    test "the wildcard allowlist notifies every cron success" do
+      run = graph(run_id: "babysit-dispatch-100-2", status: :succeeded, trigger: %{kind: :cron})
+
+      refute RunNotifier.notify?(run, config())
+      assert RunNotifier.notify?(run, config(slack_notify_cron_workflows: ["*"]))
+    end
+  end
+
+  describe "workflow_name/1" do
+    test "strips the numeric run-id suffix to recover the workflow slug" do
+      assert RunNotifier.workflow_name("babysit-dispatch-1780166452589-58") == "babysit-dispatch"
+      assert RunNotifier.workflow_name("triage-100-2") == "triage"
+    end
+  end
+
+  describe "build_payload/2" do
+    test "headers a succeeded run and links run details to the room root when no thread opened" do
+      payload =
+        RunNotifier.build_payload(
+          graph(run_id: "triage-100-2", status: :succeeded, trigger: %{kind: :manual}),
+          "https://room.ix.dev"
+        )
+
+      [header | _] = payload["blocks"]
+      assert header["type"] == "header"
+      assert header["text"]["text"] =~ "triage"
+      assert header["text"]["text"] =~ "finished"
+      assert payload["text"] =~ "Symphony: triage finished"
+
+      # No agent thread on the graph, so the link falls back to the room root.
+      run_button = button_with_text(payload, "Run details")
+      assert run_button["url"] == "https://room.ix.dev/"
+    end
+
+    test "deep-links run details to the run's room backend and latest thread" do
+      payload =
+        RunNotifier.build_payload(
+          graph(
+            run_id: "triage-100-2",
+            status: :succeeded,
+            trigger: %{kind: :manual},
+            nodes: %{"n0" => agent_node("n0", "thread_abc")}
+          ),
+          "https://room.ix.dev/"
+        )
+
+      run_button = button_with_text(payload, "Run details")
+
+      # server_id is the registered backend id (Provision.backend_id), encoded
+      # like the room client's encodeURIComponent links; the trailing slash on
+      # the base is trimmed.
+      assert run_button["url"] ==
+               "https://room.ix.dev/#/s/symphony%3Atriage-100-2%3Aroom/t/thread_abc"
+    end
+
+    test "adds a Linear button from the trigger and marks the run failed" do
+      payload =
+        RunNotifier.build_payload(
+          graph(
+            run_id: "triage-100-2",
+            status: :failed,
+            trigger: %{kind: :linear, identifier: "ENG-9", url: "https://linear.app/indexable/issue/ENG-9"}
+          ),
+          nil
+        )
+
+      [header | _] = payload["blocks"]
+      assert header["text"]["text"] =~ "failed"
+
+      linear_button = button_with_text(payload, "ENG-9")
+      assert linear_button["url"] == "https://linear.app/indexable/issue/ENG-9"
+      # No room url was given, so there is no run-details button.
+      assert is_nil(button_with_text(payload, "Run details"))
+    end
+  end
+
+  defp button_with_text(payload, text) do
+    payload["blocks"]
+    |> Enum.find(%{}, &(&1["type"] == "actions"))
+    |> Map.get("elements", [])
+    |> Enum.find(fn el -> el["text"]["text"] == text end)
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/store_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/store_test.exs
new file mode 100644
index 000000000..ba1b5c518
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/store_test.exs
@@ -0,0 +1,151 @@
+defmodule SymphonyElixir.IR.StoreTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Attempt, Node, RunGraph, Store}
+
+  setup do
+    dir = Path.join(System.tmp_dir!(), "ir_store_test_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    on_exit(fn -> File.rm_rf(dir) end)
+    {:ok, dir: dir}
+  end
+
+  defp sample_graph do
+    {:ok, env} =
+      Envelope.from_map(%{"engine" => "codex", "model" => "gpt-5.3-codex", "effort" => "medium"})
+
+    agent =
+      Node.new(
+        id: "agent-1",
+        ast_origin: {:agent, "write"},
+        kind: :agent,
+        envelope: env,
+        prompt_ref: {:skill, "writer", %{"topic" => "hello"}},
+        inputs: %{"seed" => {:literal, 42}},
+        state: :running
+      )
+
+    attempt = Attempt.start(1, :codex, "thread-abc") |> Attempt.finish(:succeeded, :ok, %{usd: 0.12, tokens_in: 100})
+    agent = %{agent | attempts: [attempt]}
+
+    exec =
+      Node.new(
+        id: "exec-1",
+        ast_origin: {:exec, "build"},
+        kind: :exec,
+        inputs: %{"from" => {:node, "agent-1", [:output]}},
+        state: :pending
+      )
+
+    RunGraph.new("run-store-1", "deadbeef", {:ast, [:root]})
+    |> RunGraph.put_nodes([agent, exec])
+    |> RunGraph.append_expansion({:gate, "g1"}, {:observed, true}, ["exec-1"])
+  end
+
+  test "round-trips a RunGraph with attempts and an expansion log", %{dir: dir} do
+    graph = sample_graph()
+
+    assert :ok = Store.persist(graph, dir: dir)
+    assert {:ok, loaded} = Store.load(graph.run_id, dir: dir)
+
+    assert loaded.run_id == graph.run_id
+    assert loaded.source_hash == graph.source_hash
+    assert loaded.ast == {:ast, [:root]}
+    assert loaded.status == graph.status
+
+    agent = loaded.nodes["agent-1"]
+    assert agent.kind == :agent
+    assert agent.state == :running
+    assert agent.envelope.engine == :codex
+    assert agent.envelope.model == "gpt-5.3-codex"
+    assert agent.prompt_ref == {:skill, "writer", %{"topic" => "hello"}}
+    assert agent.inputs == %{"seed" => {:literal, 42}}
+
+    [att] = agent.attempts
+    assert att.thread_id == "thread-abc"
+    assert att.state == :succeeded
+    assert att.outcome == :ok
+    assert att.cost == %{usd: 0.12, tokens_in: 100}
+
+    exec = loaded.nodes["exec-1"]
+    assert exec.inputs == %{"from" => {:node, "agent-1", [:output]}}
+    assert exec.deps == ["agent-1"]
+
+    [event] = loaded.expansion_log
+    assert event.origin == {:gate, "g1"}
+    assert event.observed == {:observed, true}
+    assert event.emitted == ["exec-1"]
+  end
+
+  test "load_all returns every decodable graph and quarantines a corrupt file", %{dir: dir} do
+    graph = sample_graph()
+    assert :ok = Store.persist(graph, dir: dir)
+
+    bad_path = Path.join(dir, "broken.json")
+    File.write!(bad_path, "{ not json")
+
+    loaded = Store.load_all(dir: dir)
+    assert Enum.map(loaded, & &1.run_id) == ["run-store-1"]
+
+    refute File.exists?(bad_path)
+    assert File.exists?(bad_path <> ".bad")
+  end
+
+  test "append_expansion persists the new event", %{dir: dir} do
+    graph = sample_graph()
+    assert :ok = Store.persist(graph, dir: dir)
+
+    assert {:ok, next} = Store.append_expansion(graph, {{:gate, "g2"}, {:observed, 7}, ["exec-1"]}, dir: dir)
+    assert length(next.expansion_log) == 2
+
+    assert {:ok, reloaded} = Store.load(graph.run_id, dir: dir)
+    assert length(reloaded.expansion_log) == 2
+  end
+
+  test "load returns :not_found for an unknown run", %{dir: dir} do
+    assert {:error, :not_found} = Store.load("nope", dir: dir)
+  end
+
+  test "round-trips a graph with a placement map (ixvm declared, host effective)", %{dir: dir} do
+    graph =
+      RunGraph.new("run-placement", "deadbeef", nil)
+      |> Map.put(:placement, %{declared: :ixvm, effective: :host})
+
+    assert :ok = Store.persist(graph, dir: dir)
+    assert {:ok, loaded} = Store.load("run-placement", dir: dir)
+
+    assert loaded.placement == %{declared: :ixvm, effective: :host}
+  end
+
+  test "round-trips a graph with a remote effective placement (ixvm -> remote fallback)", %{dir: dir} do
+    graph =
+      RunGraph.new("run-placement-remote", "deadbeef", nil)
+      |> Map.put(:placement, %{declared: :ixvm, effective: :remote})
+
+    assert :ok = Store.persist(graph, dir: dir)
+    assert {:ok, loaded} = Store.load("run-placement-remote", dir: dir)
+
+    assert loaded.placement == %{declared: :ixvm, effective: :remote}
+  end
+
+  test "round-trips a graph with a host-named declared placement", %{dir: dir} do
+    graph =
+      RunGraph.new("run-placement-host-named", "deadbeef", nil)
+      |> Map.put(:placement, %{declared: {:host, "box1"}, effective: :host})
+
+    assert :ok = Store.persist(graph, dir: dir)
+    assert {:ok, loaded} = Store.load("run-placement-host-named", dir: dir)
+
+    assert loaded.placement == %{declared: {:host, "box1"}, effective: :host}
+  end
+
+  test "round-trips a graph with nil placement (no placement acquired)", %{dir: dir} do
+    graph = RunGraph.new("run-no-placement", "deadbeef", nil)
+
+    assert :ok = Store.persist(graph, dir: dir)
+    assert {:ok, loaded} = Store.load("run-no-placement", dir: dir)
+
+    assert loaded.placement == nil
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/ir/view_test.exs b/packages/symphony/elixir/test/symphony_elixir/ir/view_test.exs
new file mode 100644
index 000000000..4874c086f
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/ir/view_test.exs
@@ -0,0 +1,204 @@
+defmodule SymphonyElixir.IR.ViewTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Attempt, Node, RunGraph, View}
+
+  defp agent_node do
+    {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5.3-codex", effort: :high, location: :local})
+
+    attempt =
+      Attempt.start(1, :codex, "thread-1")
+      |> Attempt.finish(:succeeded, :ok)
+      |> Map.put(:cost, %{usd: 0.42, tokens_in: 100, tokens_out: 20})
+
+    %{
+      Node.new(id: "a", ast_origin: {:agent, "skill"}, kind: :agent, envelope: env, inputs: %{})
+      | state: :succeeded,
+        output: %{"area" => 7},
+        attempts: [attempt]
+    }
+  end
+
+  defp graph do
+    RunGraph.new("run_v", "hash", nil)
+    |> RunGraph.put_nodes([agent_node()])
+    |> Map.put(:status, :succeeded)
+    |> RunGraph.append_audit(:retry_node, "a", "alice", %{})
+  end
+
+  test "summary/1 reports status, counts, and total cost" do
+    s = View.summary(graph())
+    assert s["run_id"] == "run_v"
+    assert s["status"] == "succeeded"
+    assert s["node_count"] == 1
+    assert s["states"] == %{"succeeded" => 1}
+    assert s["cost_usd"] == 0.42
+  end
+
+  test "summary cost is nil when no attempt reported a cost" do
+    g = RunGraph.new("r", "h", nil) |> RunGraph.put_nodes([Node.new(id: "x", ast_origin: {:exec, "x"}, kind: :exec, inputs: %{})])
+    assert View.summary(g)["cost_usd"] == nil
+  end
+
+  test "detail/1 renders nodes, attempts, envelope, and audit log as JSON-able facts" do
+    d = View.detail(graph())
+
+    assert [node] = d["nodes"]
+    assert node["id"] == "a"
+    assert node["kind"] == "agent"
+    assert node["state"] == "succeeded"
+    assert node["envelope"]["engine"] == "codex"
+    assert node["envelope"]["effort"] == "high"
+    assert node["envelope"]["location"] == "local"
+    assert node["output"] == %{"area" => 7}
+
+    assert [attempt] = node["attempts"]
+    assert attempt["n"] == 1
+    assert attempt["outcome"] == "ok"
+    assert attempt["cost"]["usd"] == 0.42
+
+    assert [audit] = d["audit_log"]
+    assert audit["action"] == "retry_node"
+    assert audit["target"] == "a"
+    assert audit["actor"] == "alice"
+  end
+
+  describe "render_node/1 label field" do
+    test "agent node with skill prompt_ref uses skill name as label" do
+      {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5", effort: :high, location: :local})
+
+      node =
+        Node.new(
+          id: "skill-node",
+          ast_origin: {:agent, "my_skill"},
+          kind: :agent,
+          envelope: env,
+          prompt_ref: {:skill, "my_skill", %{}},
+          inputs: %{}
+        )
+
+      assert View.render_node(node)["label"] == "my_skill"
+    end
+
+    test "agent node with inline prompt_ref uses 'inline' as label" do
+      {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5", effort: :high, location: :local})
+
+      node =
+        Node.new(
+          id: "inline-node",
+          ast_origin: {:agent, "inline"},
+          kind: :agent,
+          envelope: env,
+          prompt_ref: {:inline, "do something"},
+          inputs: %{}
+        )
+
+      assert View.render_node(node)["label"] == "inline"
+    end
+
+    test "exec node with literal script input uses script path as label" do
+      node =
+        Node.new(
+          id: "exec-node",
+          ast_origin: {:exec, "run"},
+          kind: :exec,
+          inputs: %{"script" => {:literal, "./scripts/deploy.sh"}}
+        )
+
+      assert View.render_node(node)["label"] == "./scripts/deploy.sh"
+    end
+
+    test "exec node without resolved script input uses 'exec' as label" do
+      node =
+        Node.new(
+          id: "exec-node",
+          ast_origin: {:exec, "run"},
+          kind: :exec,
+          inputs: %{}
+        )
+
+      assert View.render_node(node)["label"] == "exec"
+    end
+
+    test "gate node uses 'gate' as label" do
+      node =
+        Node.new(
+          id: "gate-node",
+          ast_origin: {:gate, "check"},
+          kind: :gate,
+          inputs: %{}
+        )
+
+      assert View.render_node(node)["label"] == "gate"
+    end
+
+    test "subrun node uses 'subrun' as label" do
+      node =
+        Node.new(
+          id: "sub-node",
+          ast_origin: {:subrun, "child"},
+          kind: :subrun,
+          inputs: %{}
+        )
+
+      assert View.render_node(node)["label"] == "subrun"
+    end
+  end
+
+  test "the rendered detail encodes to JSON without a custom encoder" do
+    assert {:ok, _json} = graph() |> View.detail() |> Jason.encode()
+  end
+
+  test "render_node stringifies a non-default location" do
+    {:ok, env} = Envelope.validate(%Envelope{engine: :claude, model: "haiku", location: {:room, "http://h:1"}})
+    node = Node.new(id: "n", ast_origin: {:agent, "s"}, kind: :agent, envelope: env, inputs: %{})
+    assert View.render_node(node)["envelope"]["location"] == "room:http://h:1"
+  end
+
+  describe "summary/1 trigger and placement fields" do
+    test "summary includes trigger as a string label for a manual trigger" do
+      g = graph() |> Map.put(:trigger, %{kind: :manual})
+      s = View.summary(g)
+      assert s["trigger"] == "manual"
+    end
+
+    test "summary includes trigger label for a cron trigger" do
+      g = graph() |> Map.put(:trigger, %{kind: :cron, schedule: "0 * * * *"})
+      s = View.summary(g)
+      assert s["trigger"] == "cron 0 * * * *"
+    end
+
+    test "summary defaults trigger to 'manual' when trigger is nil" do
+      g = RunGraph.new("r-nil-trigger", "h", nil)
+      assert View.summary(g)["trigger"] == "manual"
+    end
+
+    test "summary includes placement with declared and effective as strings" do
+      g = graph() |> Map.put(:placement, %{declared: :ixvm, effective: :host})
+      s = View.summary(g)
+      assert s["placement"] == %{"declared" => "ixvm", "effective" => "host"}
+    end
+
+    test "summary includes placement for an ixvm -> host fallback" do
+      g = graph() |> Map.put(:placement, %{declared: :ixvm, effective: :host})
+      s = View.summary(g)
+      # A consumer can detect a fallback by comparing declared != effective.
+      assert s["placement"]["declared"] == "ixvm"
+      assert s["placement"]["effective"] == "host"
+    end
+
+    test "summary placement is nil when no placement was acquired" do
+      g = RunGraph.new("r-no-placement", "h", nil)
+      assert View.summary(g)["placement"] == nil
+    end
+
+    test "trigger_label/1 is a public shared formatter" do
+      assert View.trigger_label(%{kind: :manual}) == "manual"
+      assert View.trigger_label(%{kind: :cron, schedule: "*/5 * * * *"}) == "cron */5 * * * *"
+      assert View.trigger_label(%{kind: :linear, label: "bug"}) == "linear: bug"
+      assert View.trigger_label(%{kind: :github_pr_label, label: "review"}) == "github: review"
+      assert View.trigger_label(nil) == "manual"
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/prompt_test.exs b/packages/symphony/elixir/test/symphony_elixir/prompt_test.exs
new file mode 100644
index 000000000..e53b18146
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/prompt_test.exs
@@ -0,0 +1,94 @@
+defmodule SymphonyElixir.PromptTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Prompt
+
+  describe "build/2 inline" do
+    test "returns inline text verbatim" do
+      assert {:ok, "do the thing"} = Prompt.build({:inline, "do the thing"})
+    end
+
+    test "an unresolved inline prompt (nil text) is an error" do
+      assert {:error, :unresolved_inline_prompt} = Prompt.build({:inline, nil})
+    end
+  end
+
+  describe "build/2 skill" do
+    test "loads the body through the resolver and interpolates bindings" do
+      resolver = fn "inspect" -> {:ok, "Inspect ${repo} on branch ${branch}."} end
+
+      assert {:ok, "Inspect symphony on branch main."} =
+               Prompt.build({:skill, "inspect", %{"repo" => "symphony", "branch" => "main"}}, resolver: resolver)
+    end
+
+    test "reads a dotted binding path" do
+      resolver = fn _ -> {:ok, "Ticket ${ticket.id}: ${ticket.title}"} end
+      bindings = %{"ticket" => %{"id" => "ABC-1", "title" => "Fix it"}}
+
+      assert {:ok, "Ticket ABC-1: Fix it"} = Prompt.build({:skill, "impl", bindings}, resolver: resolver)
+    end
+
+    test "a placeholder with no binding fails loudly" do
+      resolver = fn _ -> {:ok, "needs ${missing}"} end
+      assert {:error, {:unbound_placeholder, "missing"}} = Prompt.build({:skill, "x", %{}}, resolver: resolver)
+    end
+
+    test "a skill ref with no resolver is an error" do
+      assert {:error, :missing_skill_resolver} = Prompt.build({:skill, "x", %{}})
+    end
+
+    test "propagates a resolver failure" do
+      resolver = fn _ -> {:error, :enoent} end
+      assert {:error, :enoent} = Prompt.build({:skill, "missing", %{}}, resolver: resolver)
+    end
+
+    test "expands {{partial:name}} includes through the partial resolver" do
+      resolver = fn _ -> {:ok, "Start.\n{{partial:pr}}\nEnd ${who}."} end
+      partial_resolver = fn "pr" -> {:ok, "Open a PR."} end
+
+      assert {:ok, rendered} =
+               Prompt.build({:skill, "impl", %{"who" => "you"}},
+                 resolver: resolver,
+                 partial_resolver: partial_resolver
+               )
+
+      assert rendered == "Start.\nOpen a PR.\nEnd you."
+    end
+
+    test "a body that references a partial with no partial resolver fails" do
+      resolver = fn _ -> {:ok, "{{partial:pr}}"} end
+      assert {:error, {:missing_partial_resolver, ["pr"]}} = Prompt.build({:skill, "x", %{}}, resolver: resolver)
+    end
+  end
+
+  describe "render/2" do
+    test "leaves a bare dollar sign untouched" do
+      assert {:ok, "cost is $5 and ${x}"} = Prompt.render("cost is $5 and ${x}", %{"x" => "${x}"})
+    end
+
+    test "stringifies non-string bindings" do
+      assert {:ok, "count 3"} = Prompt.render("count ${n}", %{"n" => 3})
+    end
+
+    test "an escaped $${path} renders a literal ${path} with no binding" do
+      assert {:ok, "?pub_secret=${pub_secret}"} = Prompt.render("?pub_secret=$${pub_secret}", %{})
+    end
+
+    test "an escape and a real placeholder coexist in one body" do
+      assert {:ok, "url=${pub_secret} repo=symphony"} =
+               Prompt.render("url=$${pub_secret} repo=${repo}", %{"repo" => "symphony"})
+    end
+
+    test "an unescaped placeholder still fails loudly when an escape is present" do
+      assert {:error, {:unbound_placeholder, "missing"}} =
+               Prompt.render("keep $${literal} but ${missing}", %{})
+    end
+  end
+
+  describe "build/2 escape" do
+    test "a skill body with a shell $${var} reaches the engine as a literal" do
+      resolver = fn _ -> {:ok, "curl ...?pub_secret=$${pub_secret}"} end
+      assert {:ok, "curl ...?pub_secret=${pub_secret}"} = Prompt.build({:skill, "focus_route", %{}}, resolver: resolver)
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/dsl_wiring_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/dsl_wiring_test.exs
new file mode 100644
index 000000000..8390b0ddb
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/dsl_wiring_test.exs
@@ -0,0 +1,141 @@
+defmodule SymphonyElixir.Runtime.DSLWiringTest do
+  @moduledoc """
+  End-to-end proof that a parsed `.sym` workflow drives the IR runtime:
+  Parser -> Materializer -> Runtime -> a fake engine -> terminal nodes,
+  including the dynamic expansion of a `when` gate after its dependency
+  succeeds. This is the WS-5 seam (interpreter <-> runtime) under test
+  against a fake `EngineClient`, so no room-server is required.
+  """
+  use ExUnit.Case, async: false
+
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.DSL.Parser
+  alias SymphonyElixir.IR.{Materializer, Node, Store}
+  alias SymphonyElixir.Runtime
+
+  # A fake engine that returns a per-node-id scripted output. The gate's
+  # dependency returns `%{"ok" => true}` so the gate opens; every other
+  # node returns a trivial success.
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @table :dsl_wiring_fake
+
+    def setup do
+      if :ets.whereis(@table) == :undefined, do: :ets.new(@table, [:named_table, :public, :set])
+      :ets.delete_all_objects(@table)
+      :ok
+    end
+
+    def program(node_id, output), do: :ets.insert(@table, {node_id, output})
+
+    @impl true
+    def run_node(%Node{id: id}, _opts) do
+      case :ets.lookup(@table, id) do
+        [{^id, output}] -> {:ok, output, "thread-#{id}"}
+        [] -> {:ok, %{default: id}, "thread-#{id}"}
+      end
+    end
+
+    @impl true
+    def status(_thread_id), do: :unknown
+  end
+
+  setup do
+    FakeEngine.setup()
+    start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+    tmp = Path.join(System.tmp_dir!(), "dsl_wiring_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(tmp)
+    on_exit(fn -> File.rm_rf(tmp) end)
+    {:ok, store_opts: [dir: tmp]}
+  end
+
+  defp materialize!(source, run_id) do
+    {:ok, ast} = Parser.parse(source)
+    {:ok, graph} = Materializer.materialize(run_id, "hash-#{run_id}", ast)
+    graph
+  end
+
+  test "a two-node linear workflow runs both nodes to succeeded", %{store_opts: store_opts} do
+    source = """
+    workflow "w" {
+      a <- agent { engine: codex, model: "m", prompt: inline "first" }
+      b <- agent { engine: codex, model: "m", prompt: skill "next" { ctx: ${a.area} } }
+    }
+    """
+
+    graph = materialize!(source, "run_lin")
+    FakeEngine.program("agent-0", %{"area" => 7})
+
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+    ref = Process.monitor(pid)
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    # Read the persisted final graph from the store.
+    {:ok, final} = Store.load("run_lin", store_opts)
+    assert final.status == :succeeded
+    assert final.nodes["agent-0"].state == :succeeded
+    assert final.nodes["agent-1"].state == :succeeded
+    # The edge held: agent-1 only ran after agent-0 succeeded.
+    assert "agent-0" in final.nodes["agent-1"].deps
+  end
+
+  test "a when-gate expands and runs its body after the dependency succeeds", %{store_opts: store_opts} do
+    source = """
+    workflow "w" {
+      a <- agent { engine: codex, model: "m", prompt: inline "first" }
+      when ${a.ok} {
+        b <- agent { engine: codex, model: "m", prompt: inline "gated" }
+      }
+    }
+    """
+
+    graph = materialize!(source, "run_gate")
+    FakeEngine.program("agent-0", %{"ok" => true})
+
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+    ref = Process.monitor(pid)
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    {:ok, final} = Store.load("run_gate", store_opts)
+    assert final.status == :succeeded
+    assert final.nodes["agent-0"].state == :succeeded
+
+    # The gated body node was emitted dynamically and ran to success.
+    body = Enum.find(Map.values(final.nodes), fn n -> n.kind == :agent and n.id != "agent-0" end)
+    assert body, "gate body node was never materialized"
+    assert body.state == :succeeded
+
+    # The gate placeholder was retired, not left pending.
+    gate = Enum.find(Map.values(final.nodes), &(&1.kind == :gate))
+    assert gate.state == :skipped
+  end
+
+  test "a falsey when-gate skips the body and the run still succeeds", %{store_opts: store_opts} do
+    source = """
+    workflow "w" {
+      a <- agent { engine: codex, model: "m", prompt: inline "first" }
+      when ${a.ok} {
+        b <- agent { engine: codex, model: "m", prompt: inline "gated" }
+      }
+    }
+    """
+
+    graph = materialize!(source, "run_skip")
+    FakeEngine.program("agent-0", %{"ok" => false})
+
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+    ref = Process.monitor(pid)
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    {:ok, final} = Store.load("run_skip", store_opts)
+    assert final.status == :succeeded
+    assert final.nodes["agent-0"].state == :succeeded
+    # No body agent node was emitted.
+    refute Enum.any?(Map.values(final.nodes), fn n -> n.kind == :agent and n.id != "agent-0" end)
+    # The gate placeholder was retired to :skipped.
+    assert Enum.find(Map.values(final.nodes), &(&1.kind == :gate)).state == :skipped
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/exec_runner_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/exec_runner_test.exs
new file mode 100644
index 000000000..ed0a4b278
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/exec_runner_test.exs
@@ -0,0 +1,85 @@
+defmodule SymphonyElixir.Runtime.ExecRunnerTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.IR.Node
+  alias SymphonyElixir.Runtime.ExecRunner
+
+  setup do
+    pack = Path.join(System.tmp_dir!(), "exec_runner_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(Path.join(pack, "scripts"))
+    on_exit(fn -> File.rm_rf(pack) end)
+    {:ok, pack: pack}
+  end
+
+  defp write_script!(pack, rel, body, mode \\ 0o755) do
+    path = Path.join(pack, rel)
+    File.mkdir_p!(Path.dirname(path))
+    File.write!(path, body)
+    File.chmod!(path, mode)
+    rel
+  end
+
+  defp exec_node(rel, opts \\ []) do
+    inputs = %{"script" => {:literal, rel}}
+
+    inputs =
+      case Keyword.get(opts, :timeout) do
+        nil -> inputs
+        seconds -> Map.put(inputs, "timeout", {:literal, seconds})
+      end
+
+    Node.new(id: "exec-0", ast_origin: "exec-0", kind: :exec, inputs: inputs, state: :pending)
+  end
+
+  test "a zero-exit script succeeds and captures output", %{pack: pack} do
+    rel = write_script!(pack, "scripts/ok.sh", "#!/bin/sh\necho hello world\n")
+
+    assert {:ok, %{kind: :exec, exit_code: 0, output: output}, nil} =
+             ExecRunner.run(exec_node(rel), %{run_id: "r", attempt: 1, pack_dir: pack})
+
+    assert output =~ "hello world"
+  end
+
+  test "a non-zero exit fails with the status and output tail", %{pack: pack} do
+    rel = write_script!(pack, "scripts/boom.sh", "#!/bin/sh\necho dying\nexit 3\n")
+
+    assert {:error, {:exec_failed, 3, output}, nil} =
+             ExecRunner.run(exec_node(rel), %{run_id: "r", attempt: 1, pack_dir: pack})
+
+    assert output =~ "dying"
+  end
+
+  test "the script path is resolved against the pack dir, not an absolute deploy path", %{pack: pack} do
+    rel = write_script!(pack, "scripts/cwd.sh", "#!/bin/sh\npwd\n")
+
+    assert {:ok, %{output: output}, nil} =
+             ExecRunner.run(exec_node(rel), %{run_id: "r", attempt: 1, pack_dir: pack})
+
+    # The script runs with cwd = pack dir.
+    assert String.trim(output) == pack
+  end
+
+  test "a missing script file fails loudly", %{pack: pack} do
+    assert {:error, {:exec_not_found, "scripts/ghost.sh"}, nil} =
+             ExecRunner.run(exec_node("scripts/ghost.sh"), %{run_id: "r", attempt: 1, pack_dir: pack})
+  end
+
+  test "a non-executable file fails loudly", %{pack: pack} do
+    rel = write_script!(pack, "scripts/plain.sh", "#!/bin/sh\ntrue\n", 0o644)
+
+    assert {:error, {:exec_not_executable, ^rel}, nil} =
+             ExecRunner.run(exec_node(rel), %{run_id: "r", attempt: 1, pack_dir: pack})
+  end
+
+  test "a node missing its script input fails rather than running an empty command", %{pack: pack} do
+    node = Node.new(id: "exec-0", ast_origin: "exec-0", kind: :exec, inputs: %{}, state: :pending)
+    assert {:error, :missing_exec_script, nil} = ExecRunner.run(node, %{run_id: "r", attempt: 1, pack_dir: pack})
+  end
+
+  test "a script that overruns its timeout is killed and reported", %{pack: pack} do
+    rel = write_script!(pack, "scripts/slow.sh", "#!/bin/sh\nsleep 30\n")
+
+    assert {:error, {:exec_timeout, 1, _output}, nil} =
+             ExecRunner.run(exec_node(rel, timeout: 1), %{run_id: "r", attempt: 1, pack_dir: pack})
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/ingress_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/ingress_test.exs
new file mode 100644
index 000000000..9c4e48909
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/ingress_test.exs
@@ -0,0 +1,160 @@
+defmodule SymphonyElixir.Runtime.IngressTest do
+  use ExUnit.Case, async: false
+
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.DSL.Parser
+  alias SymphonyElixir.IR.{Node, Store}
+  alias SymphonyElixir.Runtime.Ingress
+  alias SymphonyElixir.WorkflowCatalog
+
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @impl true
+    def run_node(%Node{id: id}, _opts), do: {:ok, %{ran: id}, "thread-#{id}"}
+
+    @impl true
+    def status(_thread_id), do: :unknown
+  end
+
+  setup do
+    start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+    start_supervised!(SymphonyElixir.Runtime.Supervisor)
+
+    tmp = Path.join(System.tmp_dir!(), "ingress_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(tmp)
+    on_exit(fn -> File.rm_rf(tmp) end)
+
+    # A catalog over an isolated workflows dir so `start_by_trigger/2`
+    # resolves against the .sym files this test wrote, not the bundled pack.
+    workflows_dir = Path.join(System.tmp_dir!(), "ingress_wf_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(workflows_dir)
+    on_exit(fn -> File.rm_rf(workflows_dir) end)
+    start_supervised!({WorkflowCatalog, workflows_dir: workflows_dir, poll_ms: 60_000})
+
+    {:ok, store_opts: [dir: tmp], workflows_dir: workflows_dir}
+  end
+
+  defp write_sym!(dir, name, body) do
+    File.write!(Path.join(dir, "#{name}.sym"), body)
+  end
+
+  defp entry(source) do
+    {:ok, ast} = Parser.parse(source)
+    %{name: ast.name, ast: ast, trigger: ast.trigger, source: source, hash: :crypto.hash(:sha256, source)}
+  end
+
+  # Tolerate the not-yet-persisted window: start_link returns before the
+  # :advance continuation writes the first snapshot, so the run file may be
+  # absent on the first poll.
+  defp wait_terminal(run_id, store_opts, attempts \\ 60) do
+    case Store.load(run_id, store_opts) do
+      {:ok, %{status: status} = graph} when status in [:succeeded, :failed, :cancelled] ->
+        graph
+
+      _ when attempts == 0 ->
+        flunk("run #{run_id} never terminal")
+
+      _ ->
+        Process.sleep(20)
+        wait_terminal(run_id, store_opts, attempts - 1)
+    end
+  end
+
+  test "materializes a workflow and runs it under supervision", %{store_opts: store_opts} do
+    e = entry(~s|workflow "demo" on manual { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+
+    assert {:ok, %{run_id: run_id, pid: pid}} =
+             Ingress.start_workflow(e, %{kind: :manual, input: %{}}, engine: FakeEngine, store_opts: store_opts)
+
+    assert is_pid(pid)
+    final = wait_terminal(run_id, store_opts)
+
+    assert final.status == :succeeded
+    assert final.source_hash == e.hash
+    # The trigger event is stamped on the run and survives the store round-trip.
+    assert final.trigger == %{kind: :manual, input: %{}}
+  end
+
+  test "the generated run id is slugged from the workflow name", %{store_opts: store_opts} do
+    e = entry(~s|workflow "Nightly GC" on manual { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+
+    assert {:ok, %{run_id: run_id}} =
+             Ingress.start_workflow(e, nil, engine: FakeEngine, store_opts: store_opts)
+
+    assert String.starts_with?(run_id, "nightly-gc-")
+  end
+
+  test "an explicit run_id is honored", %{store_opts: store_opts} do
+    e = entry(~s|workflow "w" on manual { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+
+    assert {:ok, %{run_id: "fixed-id"}} =
+             Ingress.start_workflow(e, nil, run_id: "fixed-id", engine: FakeEngine, store_opts: store_opts)
+
+    assert wait_terminal("fixed-id", store_opts).status == :succeeded
+  end
+
+  test "start_by_trigger fans out to every workflow matching the event", %{store_opts: store_opts, workflows_dir: dir} do
+    write_sym!(dir, "label-a", ~s|workflow "label-a" on github_pr_label repo "acme/app" label "ship" { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    write_sym!(dir, "label-b", ~s|workflow "label-b" on github_pr_label repo "acme/app" label "ship" { b <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    write_sym!(dir, "other-repo", ~s|workflow "other-repo" on github_pr_label repo "acme/other" label "ship" { c <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    WorkflowCatalog.scan(dir)
+
+    event = %{kind: :github_pr_label, repo: "acme/app", label: "ship", pr_number: 7}
+
+    assert {:ok, started} = Ingress.start_by_trigger(event, engine: FakeEngine, store_opts: store_opts)
+    assert length(started) == 2
+
+    for %{run_id: run_id} <- started do
+      final = wait_terminal(run_id, store_opts)
+      assert final.status == :succeeded
+      # The inbound event is the run's trigger context.
+      assert final.trigger == event
+    end
+  end
+
+  test "start_by_trigger is a no-op when no workflow matches", %{store_opts: store_opts, workflows_dir: dir} do
+    write_sym!(dir, "label-a", ~s|workflow "label-a" on github_pr_label repo "acme/app" label "ship" { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    WorkflowCatalog.scan(dir)
+
+    assert {:ok, []} =
+             Ingress.start_by_trigger(
+               %{kind: :github_pr_label, repo: "acme/app", label: "nope"},
+               engine: FakeEngine,
+               store_opts: store_opts
+             )
+  end
+
+  test "start_by_trigger matches a linear label against the event's labels", %{store_opts: store_opts, workflows_dir: dir} do
+    write_sym!(dir, "triage", ~s|workflow "triage" on linear label "[sym] triage" { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    WorkflowCatalog.scan(dir)
+
+    event = %{kind: :linear, labels: ["other", "[sym] triage"], issue_id: "ISS-1"}
+
+    assert {:ok, [%{run_id: run_id}]} =
+             Ingress.start_by_trigger(event, engine: FakeEngine, store_opts: store_opts)
+
+    assert wait_terminal(run_id, store_opts).status == :succeeded
+  end
+
+  test "seen_trigger? is the producer dedup read over IR runs", %{store_opts: store_opts, workflows_dir: dir} do
+    write_sym!(dir, "triage", ~s|workflow "triage" on linear label "[sym] triage" { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+    WorkflowCatalog.scan(dir)
+
+    issue_match = fn
+      {_status, %{kind: :linear, issue_id: "ISS-7"}} -> true
+      {_status, _trigger} -> false
+    end
+
+    refute Ingress.seen_trigger?(issue_match, store_opts: store_opts)
+
+    event = %{kind: :linear, labels: ["[sym] triage"], issue_id: "ISS-7"}
+    assert {:ok, [%{run_id: run_id}]} = Ingress.start_by_trigger(event, engine: FakeEngine, store_opts: store_opts)
+    wait_terminal(run_id, store_opts)
+
+    # The run persisted its trigger, so the dedup read now sees the issue.
+    assert Ingress.seen_trigger?(issue_match, store_opts: store_opts)
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/operator_controls_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/operator_controls_test.exs
new file mode 100644
index 000000000..a1657d12b
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/operator_controls_test.exs
@@ -0,0 +1,180 @@
+defmodule SymphonyElixir.Runtime.OperatorControlsTest do
+  @moduledoc """
+  The #97 operator surface: cancel, retry, rerun, and clear-failed, each
+  recording a durable audit event. Driven against a fake engine so a node
+  can be made to fail on demand.
+  """
+  use ExUnit.Case, async: false
+
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Graph, Node, RunGraph, Store}
+  alias SymphonyElixir.Runtime
+
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @table :operator_controls_fake
+
+    def setup do
+      if :ets.whereis(@table) == :undefined, do: :ets.new(@table, [:named_table, :public, :set])
+      :ets.delete_all_objects(@table)
+      :ok
+    end
+
+    def program(node_id, instruction), do: :ets.insert(@table, {node_id, instruction})
+
+    @impl true
+    def run_node(%Node{id: id}, _opts) do
+      case :ets.lookup(@table, id) do
+        [{^id, {:error, reason}}] -> {:error, reason, nil}
+        [{^id, {:ok, output}}] -> {:ok, output, "thread-#{id}"}
+        [] -> {:ok, %{ran: id}, "thread-#{id}"}
+      end
+    end
+
+    @impl true
+    def status(_thread_id), do: :unknown
+  end
+
+  setup do
+    FakeEngine.setup()
+    start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+
+    tmp = Path.join(System.tmp_dir!(), "op_ctrl_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(tmp)
+    on_exit(fn -> File.rm_rf(tmp) end)
+    {:ok, store_opts: [dir: tmp]}
+  end
+
+  # Agent nodes so each attempt routes through the injected engine. The
+  # operator surface is engine-agnostic; an agent node is the kind that
+  # actually consults the EngineClient (exec nodes run locally).
+  defp agent_node(id, inputs \\ %{}) do
+    Node.new(
+      id: id,
+      ast_origin: {:agent, id},
+      kind: :agent,
+      envelope: %Envelope{engine: :codex, model: "m"},
+      prompt_ref: {:inline, "go"},
+      inputs: inputs,
+      state: :pending
+    )
+  end
+
+  # Two-node chain a -> b, where b reads a's output so b only runs after a.
+  defp chain_graph(run_id) do
+    a = agent_node("a")
+    b = agent_node("b", %{"x" => {:node, "a", []}})
+    RunGraph.new(run_id, "hash", nil) |> RunGraph.put_nodes([a, b]) |> Map.put(:status, :running)
+  end
+
+  defp wait_terminal(run_id, store_opts, attempts \\ 40) do
+    {:ok, graph} = Store.load(run_id, store_opts)
+
+    cond do
+      graph.status in [:succeeded, :failed, :cancelled] -> graph
+      attempts == 0 -> flunk("run #{run_id} never terminal: #{graph.status}")
+      true -> Process.sleep(25) && wait_terminal(run_id, store_opts, attempts - 1)
+    end
+  end
+
+  test "clear_failed resets failed nodes and the rerun succeeds", %{store_opts: store_opts} do
+    FakeEngine.program("a", {:error, :boom})
+    graph = chain_graph("run_clear")
+
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+
+    # Wait for the run to fail (a fails, b becomes upstream_failed).
+    failed = wait_for(pid, fn g -> Graph.all_terminal?(g) end)
+    assert failed.nodes["a"].state == :failed
+    assert failed.nodes["b"].state == :upstream_failed
+
+    # Fix the cause, then clear the failed nodes. They re-run and succeed,
+    # at which point the run reaches a terminal :succeeded and the GenServer
+    # stops, so read the recovered state from the store.
+    FakeEngine.program("a", {:ok, %{fixed: true}})
+    ref = Process.monitor(pid)
+    :ok = Runtime.clear_failed(pid, "alice")
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    recovered = wait_terminal("run_clear", store_opts)
+    assert recovered.status == :succeeded
+    assert recovered.nodes["a"].state == :succeeded
+    assert recovered.nodes["b"].state == :succeeded
+
+    # The clear_failed action is recorded with the actor and the cleared ids.
+    event = Enum.find(recovered.audit_log, &(&1.action == :clear_failed))
+    assert event.actor == "alice"
+    assert Enum.sort(event.detail.cleared) == ["a", "b"]
+  end
+
+  test "cancel records an audit event and stops the run", %{store_opts: store_opts} do
+    # Keep a node busy so the run is still in flight when we cancel.
+    FakeEngine.program("a", {:ok, %{}})
+    graph = chain_graph("run_cancel")
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+    ref = Process.monitor(pid)
+
+    :ok = Runtime.cancel(pid, "bob")
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    {:ok, final} = Store.load("run_cancel", store_opts)
+    assert final.status == :cancelled
+    event = Enum.find(final.audit_log, &(&1.action == :cancel))
+    assert event.actor == "bob"
+  end
+
+  test "retry_node re-runs only the target node and records the audit event", %{store_opts: store_opts} do
+    # A single independent node so the surgical retry can drive the run to a
+    # clean terminal without an upstream_failed dependent lingering.
+    node = agent_node("a")
+    graph = RunGraph.new("run_retry", "hash", nil) |> RunGraph.put_nodes([node]) |> Map.put(:status, :running)
+
+    FakeEngine.program("a", {:error, :nope})
+    {:ok, pid} = Runtime.start_link(graph, engine: FakeEngine, store_opts: store_opts)
+
+    wait_for(pid, fn g -> g.nodes["a"].state == :failed end)
+    FakeEngine.program("a", {:ok, %{}})
+    ref = Process.monitor(pid)
+    :ok = Runtime.retry_node(pid, "a", "carol")
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    final = wait_terminal("run_retry", store_opts)
+    assert final.nodes["a"].state == :succeeded
+    event = Enum.find(final.audit_log, &(&1.action == :retry_node))
+    assert event.target == "a"
+    assert event.actor == "carol"
+  end
+
+  test "audit log survives a store round-trip", %{store_opts: store_opts} do
+    graph =
+      chain_graph("run_audit_rt")
+      |> RunGraph.append_audit(:clear_failed, nil, "dave", %{cleared: ["a"]})
+      |> RunGraph.append_audit(:cancel, "b", :system, %{})
+
+    :ok = Store.persist(graph, store_opts)
+    {:ok, loaded} = Store.load("run_audit_rt", store_opts)
+
+    assert [first, second] = loaded.audit_log
+    assert first.action == :clear_failed
+    assert first.actor == "dave"
+    assert first.detail == %{cleared: ["a"]}
+    assert second.action == :cancel
+    assert second.target == "b"
+    assert second.actor == :system
+  end
+
+  # Poll the live runtime's graph snapshot until `pred` holds.
+  defp wait_for(pid, pred, attempts \\ 80) do
+    graph = Runtime.graph(pid)
+
+    cond do
+      pred.(graph) -> graph
+      attempts == 0 -> flunk("condition never held; last status=#{graph.status}")
+      true -> Process.sleep(20) && wait_for(pid, pred, attempts - 1)
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/placement_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/placement_test.exs
new file mode 100644
index 000000000..f4e3447f7
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/placement_test.exs
@@ -0,0 +1,590 @@
+defmodule SymphonyElixir.Runtime.PlacementTest do
+  use ExUnit.Case, async: false
+
+  alias SymphonyElixir.Config
+  alias SymphonyElixir.Runtime.Placement
+
+  # A direct-connect config so the lifecycle resolves a VM address rather
+  # than opening a real port-forward Port. `room: %{registry_url: nil}` keeps
+  # the room-registry calls inert (no HTTP). No real `ix` runs: every
+  # command goes through the injected stub driver below.
+  # The example pack's single-repo manifest, so the clone script the
+  # lifecycle builds has a real catalog to render without booting Config.
+  @repositories_file Path.expand("../../../../workflows/example/repositories.yaml", __DIR__)
+
+  defp config(overrides \\ %{}) do
+    base = %Config{
+      ix_command: "ix",
+      ix_image: "ix/symphony-codex:test",
+      ix_room_server_command: "room-server",
+      ix_region: nil,
+      ix_room_port: 8080,
+      ix_room_connect: "direct",
+      ix_local_port_base: 18_080,
+      ix_keep_vm?: false,
+      ix_create_timeout_ms: 120_000,
+      ix_env_passthrough: [],
+      github_token: nil,
+      github_app_bot_username: nil,
+      github_app_bot_email: nil,
+      repositories_file: @repositories_file,
+      room: %{server_url: nil, registry_url: nil, registry_token: nil, advertise_host: nil},
+      placement_fallback: :host,
+      host_user: "agentuser",
+      host_group: nil,
+      host_workspaces_dir: nil,
+      host_room_server_command: "room-server",
+      host_systemd_run_command: "systemd-run",
+      host_keep?: false
+    }
+
+    struct(base, overrides)
+  end
+
+  # A driver that records each `ix` argv it is handed and answers from a
+  # fixed VM record, so the acquire/release path is exercised with no VM
+  # and no shell-out. `wait_for_room` always succeeds.
+  defp recording_driver(test_pid) do
+    %{
+      ix_cmd: fn _config, args, _timeout ->
+        send(test_pid, {:ix_cmd, args})
+        :ok
+      end,
+      ix_vm_by_name: fn _config, vm_name ->
+        {:ok, %{"name" => vm_name, "ipv4" => "10.0.0.5"}}
+      end,
+      wait_for_room: fn _url, _timeout -> :ok end,
+      port_forward: fn _config, _vm, _mapping -> {:error, :should_not_port_forward_in_direct_mode} end,
+      stop_port_forward: fn _port -> :ok end
+    }
+  end
+
+  # A driver that records each `systemd-run`/`systemctl` argv and answers
+  # the host lifecycle from fixed values: a `getent passwd` line with a
+  # home, a fixed port, and a healthy room. No real unit is ever started.
+  defp host_driver(test_pid) do
+    %{
+      ix_cmd: fn _config, args, _timeout ->
+        send(test_pid, {:ix_cmd, args})
+        :ok
+      end,
+      ix_vm_by_name: fn _config, vm_name -> {:ok, %{"name" => vm_name, "ipv4" => "10.0.0.5"}} end,
+      wait_for_room: fn _url, _timeout -> :ok end,
+      port_forward: fn _config, _vm, _mapping -> {:error, :unused} end,
+      stop_port_forward: fn _port -> :ok end,
+      host_passwd: fn _config, user -> {:ok, "#{user}:x:1000:1000::/home/#{user}:/bin/bash"} end,
+      systemd_run: fn _config, args, _timeout ->
+        send(test_pid, {:systemd_run, args})
+        :ok
+      end,
+      systemctl_stop: fn unit ->
+        send(test_pid, {:systemctl_stop, unit})
+        :ok
+      end,
+      pick_port: fn -> 41_234 end
+    }
+  end
+
+  setup do
+    # Fresh registry table per test; the supervised Placement process is
+    # not started here, so the module creates the table lazily on write.
+    if :ets.whereis(:symphony_placement) != :undefined do
+      :ets.delete(:symphony_placement)
+    end
+
+    :ok
+  end
+
+  test "acquire provisions a per-run room-server and resolves its base url" do
+    opts = [config: config(), driver: recording_driver(self())]
+
+    assert {:ok, "http://10.0.0.5:8080"} = Placement.acquire("run_alpha", :ixvm, opts)
+    assert {:ok, "http://10.0.0.5:8080"} = Placement.base_url("run_alpha")
+
+    # The first ix command is the create; it names the run's VM and image.
+    assert_received {:ix_cmd, ["new", "ix/symphony-codex:test", "--name", vm_name, "--l7-proxy-port", "8080", "--no-shell"]}
+    assert String.starts_with?(vm_name, "sym-run-alpha-")
+  end
+
+  test "create_vm is invoked with config.ix_create_timeout_ms, not a hardcoded constant" do
+    test_pid = self()
+    configured_timeout = 30_000
+
+    # The driver records every ix_cmd call with its timeout argument so we
+    # can assert the timeout threaded to the driver matches the config value.
+    timeout_recording_driver = %{
+      ix_cmd: fn _config, args, timeout ->
+        send(test_pid, {:ix_cmd, args, timeout})
+        :ok
+      end,
+      ix_vm_by_name: fn _config, vm_name -> {:ok, %{"name" => vm_name, "ipv4" => "10.0.0.5"}} end,
+      wait_for_room: fn _url, _timeout -> :ok end,
+      port_forward: fn _config, _vm, _mapping -> {:error, :unused} end,
+      stop_port_forward: fn _port -> :ok end
+    }
+
+    opts = [config: config(%{ix_create_timeout_ms: configured_timeout}), driver: timeout_recording_driver]
+
+    assert {:ok, _url} = Placement.acquire("run_timeout_check", :ixvm, opts)
+
+    # The first ix_cmd call is the `ix new` (create). Assert its timeout
+    # matches the config value, not the old 15-minute module constant.
+    assert_received {:ix_cmd, ["new" | _], ^configured_timeout}
+  end
+
+  test "acquire is idempotent: a second call returns the same url without re-provisioning" do
+    opts = [config: config(), driver: recording_driver(self())]
+
+    assert {:ok, url} = Placement.acquire("run_beta", :ixvm, opts)
+
+    # Drain the create/shell commands from the first acquire.
+    drain_ix_cmds()
+
+    assert {:ok, ^url} = Placement.acquire("run_beta", :ixvm, opts)
+
+    # No further ix commands on the second acquire.
+    refute_received {:ix_cmd, _args}
+  end
+
+  test "release tears the vm down and drops the per-run url" do
+    test_pid = self()
+    opts = [config: config(), driver: recording_driver(test_pid)]
+
+    assert {:ok, _url} = Placement.acquire("run_gamma", :ixvm, opts)
+    drain_ix_cmds()
+
+    assert :ok = Placement.release("run_gamma", opts)
+    assert :error = Placement.base_url("run_gamma")
+
+    # Release removes the VM by name.
+    assert_received {:ix_cmd, ["rm", "--force", vm_name]}
+    assert String.starts_with?(vm_name, "sym-run-gamma-")
+  end
+
+  test "release is a no-op for a run that never acquired a placement" do
+    assert :ok = Placement.release("run_never", config: config(), driver: recording_driver(self()))
+    refute_received {:ix_cmd, _args}
+  end
+
+  test "base_url is :error for an unknown run" do
+    assert :error = Placement.base_url("run_unknown")
+  end
+
+  test "a setup failure surfaces as ixvm_setup_failed and removes the partial vm" do
+    failing_driver = %{
+      ix_cmd: fn _config, args, _timeout ->
+        send(self(), {:ix_cmd, args})
+
+        case args do
+          ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "boom"}}
+          _ -> :ok
+        end
+      end,
+      ix_vm_by_name: fn _config, vm_name -> {:ok, %{"name" => vm_name, "ipv4" => "10.0.0.5"}} end,
+      wait_for_room: fn _url, _timeout -> :ok end,
+      port_forward: fn _config, _vm, _mapping -> {:error, :unused} end,
+      stop_port_forward: fn _port -> :ok end
+    }
+
+    # placement_fallback: :none isolates the raw ixvm failure path; the
+    # ixvm -> host fallback is covered by its own describe block.
+    assert {:error, {:ixvm_setup_failed, _reason}} =
+             Placement.acquire("run_delta", :ixvm, config: config(placement_fallback: :none), driver: failing_driver)
+
+    assert :error = Placement.base_url("run_delta")
+  end
+
+  describe "host placement" do
+    test "acquire provisions a per-run systemd-run room-server and resolves its loopback url" do
+      opts = [config: config(), driver: host_driver(self())]
+
+      assert {:ok, "http://127.0.0.1:41234"} = Placement.acquire("run_host", {:host, "box"}, opts)
+      assert {:ok, "http://127.0.0.1:41234"} = Placement.base_url("run_host")
+
+      # The first systemd-run is the workspace clone, in a named "-setup" unit
+      # under the polkit-scoped prefix, dropping privileges to the host user.
+      assert_received {:systemd_run, setup_args}
+      assert "--uid=agentuser" in setup_args
+      assert Enum.any?(setup_args, &String.starts_with?(&1, "--unit=symphony-host-"))
+      assert Enum.any?(setup_args, &String.ends_with?(&1, "-setup.service"))
+
+      # The second is the long-lived room-server unit (no --wait).
+      assert_received {:systemd_run, room_args}
+      refute "--wait" in room_args
+      assert Enum.any?(room_args, &String.starts_with?(&1, "--unit=symphony-host-"))
+      assert "room-server" in room_args or Enum.any?(room_args, &String.ends_with?(&1, "room-server"))
+    end
+
+    test "an advertised host binds and resolves a reachable url instead of loopback" do
+      base = config()
+      cfg = %{base | room: %{base.room | advertise_host: "100.0.0.7"}}
+      opts = [config: cfg, driver: host_driver(self())]
+
+      # The registered/resolved url uses the advertised host so the central
+      # room.ix.dev can reach the per-run server (not 127.0.0.1).
+      assert {:ok, "http://100.0.0.7:41234"} = Placement.acquire("run_adv", {:host, "box"}, opts)
+      assert {:ok, "http://100.0.0.7:41234"} = Placement.base_url("run_adv")
+
+      # The room-server unit actually binds that host (--host 100.0.0.7), not
+      # only advertises it.
+      assert_received {:systemd_run, _setup_args}
+      assert_received {:systemd_run, room_args}
+      assert "100.0.0.7" in room_args
+    end
+
+    test "the minted bot token authors the clone auth and room-server env over the static host token" do
+      opts = [config: config(github_token: "human-token"), driver: host_driver(self()), bot_token: "app-token"]
+
+      assert {:ok, _url} = Placement.acquire("run_bot_token", :host, opts)
+
+      # The clone runs in the "-setup" unit; its script stamps the App token
+      # as the git auth header, never the static host token.
+      assert_received {:systemd_run, setup_args}
+      setup_script = List.last(setup_args)
+      assert setup_script =~ Base.encode64("x-access-token:app-token")
+      refute setup_script =~ Base.encode64("x-access-token:human-token")
+
+      # gh pr create authors as GH_TOKEN, so the long-lived room-server unit
+      # must carry the App token in both GitHub vars (ENG-2012).
+      assert_received {:systemd_run, room_args}
+      assert "--setenv=GITHUB_TOKEN=app-token" in room_args
+      assert "--setenv=GH_TOKEN=app-token" in room_args
+      refute Enum.any?(room_args, &(&1 =~ "human-token"))
+    end
+
+    test "release stops the unit and removes the checkout" do
+      opts = [config: config(), driver: host_driver(self())]
+
+      assert {:ok, _url} = Placement.acquire("run_host2", :host, opts)
+      drain_systemd_runs()
+
+      assert :ok = Placement.release("run_host2", opts)
+      assert :error = Placement.base_url("run_host2")
+
+      assert_received {:systemctl_stop, unit}
+      assert String.starts_with?(unit, "symphony-host-")
+      assert String.ends_with?(unit, ".service")
+
+      # Cleanup runs as a "-clean" sync unit under the same prefix.
+      assert_received {:systemd_run, clean_args}
+      assert Enum.any?(clean_args, &String.ends_with?(&1, "-clean.service"))
+    end
+
+    test "host_keep? leaves the unit and checkout in place on release" do
+      opts = [config: config(host_keep?: true), driver: host_driver(self())]
+
+      assert {:ok, _url} = Placement.acquire("run_keep", :host, opts)
+      drain_systemd_runs()
+
+      assert :ok = Placement.release("run_keep", opts)
+      refute_received {:systemctl_stop, _unit}
+      refute_received {:systemd_run, _args}
+    end
+
+    test "host setup fails fast when the host user is not configured" do
+      opts = [config: config(host_user: nil), driver: host_driver(self())]
+
+      assert {:error, {:host_setup_failed, :host_user_not_configured}} =
+               Placement.acquire("run_nouser", :host, opts)
+
+      assert :error = Placement.base_url("run_nouser")
+    end
+  end
+
+  describe "ixvm -> host fallback" do
+    test "an ixvm setup failure falls back to a host room-server under the same run id" do
+      failing_ixvm =
+        Map.merge(host_driver(self()), %{
+          ix_cmd: fn _config, args, _timeout ->
+            send(self(), {:ix_cmd, args})
+
+            case args do
+              ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "no capacity"}}
+              _ -> :ok
+            end
+          end
+        })
+
+      opts = [config: config(placement_fallback: :host), driver: failing_ixvm]
+
+      # The node declared :ixvm; provisioning fails and the run completes on
+      # a host room-server resolved under the same run id, so the engine
+      # turn (which looks up by run id) never knows it fell back.
+      assert {:ok, "http://127.0.0.1:41234"} = Placement.acquire("run_fb", :ixvm, opts)
+      assert {:ok, "http://127.0.0.1:41234"} = Placement.base_url("run_fb")
+    end
+
+    test "fallback :local resolves to no per-run placement (the client uses the default url)" do
+      failing_ixvm =
+        Map.merge(host_driver(self()), %{
+          ix_cmd: fn _config, args, _timeout ->
+            case args do
+              ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "boom"}}
+              _ -> :ok
+            end
+          end
+        })
+
+      opts = [config: config(placement_fallback: :local), driver: failing_ixvm]
+
+      assert {:error, {:no_placement_needed, :local}} = Placement.acquire("run_fb_local", :ixvm, opts)
+      assert :error = Placement.base_url("run_fb_local")
+    end
+
+    test "fallback :none leaves the original ixvm setup failure standing" do
+      failing_ixvm =
+        Map.merge(host_driver(self()), %{
+          ix_cmd: fn _config, args, _timeout ->
+            case args do
+              ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "boom"}}
+              _ -> :ok
+            end
+          end
+        })
+
+      opts = [config: config(placement_fallback: :none), driver: failing_ixvm]
+
+      assert {:error, {:ixvm_setup_failed, _reason}} = Placement.acquire("run_fb_none", :ixvm, opts)
+      assert :error = Placement.base_url("run_fb_none")
+    end
+  end
+
+  describe "ixvm -> remote fallback" do
+    # A driver whose ixvm provisioning fails, with the remote seam wired to a
+    # fake worker so the fallback runs without a real registry or channel.
+    defp remote_driver(test_pid, overrides \\ %{}) do
+      worker = %{worker_id: "w1", pid: test_pid, address: "100.0.0.9", labels: [], capacity: 0, registered_at: 0}
+
+      Map.merge(
+        %{
+          ix_cmd: fn _config, args, _timeout ->
+            case args do
+              ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "boom"}}
+              _ -> :ok
+            end
+          end,
+          ix_vm_by_name: fn _config, vm_name -> {:ok, %{"name" => vm_name}} end,
+          wait_for_room: fn _url, _timeout -> :ok end,
+          worker_select: fn label ->
+            send(test_pid, {:worker_select, label})
+            {:ok, worker}
+          end,
+          worker_get: fn _id -> {:ok, worker} end,
+          worker_provision: fn ^worker, run_id, spec, _timeout ->
+            send(test_pid, {:worker_provision, run_id, spec})
+            {:ok, %{base_url: "http://100.0.0.9:9100", primary_workspace: "/home/hari/symphony-workspaces/#{run_id}/example"}}
+          end,
+          worker_teardown: fn ^worker, run_id, _timeout ->
+            send(test_pid, {:worker_teardown, run_id})
+            :ok
+          end
+        },
+        overrides
+      )
+    end
+
+    test "an ixvm failure falls back to a remote worker's room-server" do
+      opts = [config: config(placement_fallback: :remote), driver: remote_driver(self())]
+
+      assert {:ok, "http://100.0.0.9:9100"} = Placement.acquire("run_rem", :ixvm, opts)
+      assert {:ok, %{location: :remote, base_url: "http://100.0.0.9:9100"}} = Placement.resolved("run_rem")
+      assert_received {:worker_select, nil}
+      assert_received {:worker_provision, "run_rem", %{env: _, bot_token: _}}
+    end
+
+    test "the dispatched spec carries the run's repository catalog so the worker clones the real repos" do
+      config = config(placement_fallback: :remote)
+      opts = [config: config, driver: remote_driver(self())]
+
+      assert {:ok, _url} = Placement.acquire("run_rem_repos", :ixvm, opts)
+      assert_received {:worker_provision, "run_rem_repos", %{repositories: repositories}}
+      assert repositories == SymphonyElixir.RepositoryCatalog.all(config)
+      assert repositories != []
+    end
+
+    test "the dispatched spec carries the bot commit identity so the worker clone authors as the App" do
+      opts = [
+        config:
+          config(
+            placement_fallback: :remote,
+            github_app_bot_username: "ix-playbook-agent[bot]",
+            github_app_bot_email: "ix-playbook-agent[bot]@users.noreply.github.com"
+          ),
+        driver: remote_driver(self())
+      ]
+
+      assert {:ok, _url} = Placement.acquire("run_rem_bot", :ixvm, opts)
+
+      assert_received {:worker_provision, "run_rem_bot",
+                       %{
+                         bot_username: "ix-playbook-agent[bot]",
+                         bot_email: "ix-playbook-agent[bot]@users.noreply.github.com"
+                       }}
+    end
+
+    test "select uses the configured worker label" do
+      opts = [config: config(placement_fallback: :remote, worker_select_label: "hari"), driver: remote_driver(self())]
+
+      assert {:ok, _url} = Placement.acquire("run_rem_lbl", :ixvm, opts)
+      assert_received {:worker_select, "hari"}
+    end
+
+    test "a remote placement resolves the worker-side primary checkout as its cwd" do
+      opts = [config: config(placement_fallback: :remote), driver: remote_driver(self())]
+      assert {:ok, _url} = Placement.acquire("run_rem_cwd", :ixvm, opts)
+
+      assert {:ok, "/home/hari/symphony-workspaces/run_rem_cwd/example"} =
+               Placement.workspace_cwd("run_rem_cwd", opts)
+    end
+
+    test "release dispatches teardown to the worker" do
+      opts = [config: config(placement_fallback: :remote), driver: remote_driver(self())]
+      assert {:ok, _url} = Placement.acquire("run_rem_rel", :ixvm, opts)
+
+      assert :ok = Placement.release("run_rem_rel", opts)
+      assert_received {:worker_teardown, "run_rem_rel"}
+      assert :error = Placement.base_url("run_rem_rel")
+    end
+
+    test "no connected worker surfaces the original ixvm failure" do
+      driver = remote_driver(self(), %{worker_select: fn _label -> {:error, :no_worker} end})
+      opts = [config: config(placement_fallback: :remote), driver: driver]
+
+      assert {:error, {:ixvm_setup_failed, _reason}} = Placement.acquire("run_rem_none", :ixvm, opts)
+      assert :error = Placement.base_url("run_rem_none")
+    end
+  end
+
+  describe "workspace_cwd/2" do
+    test "a host placement resolves the primary-repo checkout under the host run root" do
+      opts = [config: config(), driver: host_driver(self())]
+      assert {:ok, _url} = Placement.acquire("run_cwd_host", :host, opts)
+
+      assert {:ok, "/home/agentuser/symphony-workspaces/run_cwd_host/example"} =
+               Placement.workspace_cwd("run_cwd_host", opts)
+    end
+
+    test "an ixvm placement resolves the VM-internal primary-repo checkout" do
+      opts = [config: config(), driver: recording_driver(self())]
+      assert {:ok, _url} = Placement.acquire("run_cwd_ix", :ixvm, opts)
+
+      assert {:ok, "/workspace/symphony/run_cwd_ix/example"} =
+               Placement.workspace_cwd("run_cwd_ix", opts)
+    end
+
+    test "an ixvm node that fell back to host resolves the host checkout" do
+      failing_ixvm =
+        Map.merge(host_driver(self()), %{
+          ix_cmd: fn _config, args, _timeout ->
+            case args do
+              ["new" | _] -> {:error, {:ix_cli_failed, args, 1, "no capacity"}}
+              _ -> :ok
+            end
+          end
+        })
+
+      opts = [config: config(placement_fallback: :host), driver: failing_ixvm]
+      assert {:ok, _url} = Placement.acquire("run_cwd_fb", :ixvm, opts)
+
+      # The declared location was :ixvm, but the cwd follows the effective
+      # host placement so the turn runs where the clone actually landed.
+      assert {:ok, "/home/agentuser/symphony-workspaces/run_cwd_fb/example"} =
+               Placement.workspace_cwd("run_cwd_fb", opts)
+    end
+
+    test "a run with no acquired placement has no cwd" do
+      assert :error = Placement.workspace_cwd("run_cwd_none")
+    end
+  end
+
+  defp drain_ix_cmds do
+    receive do
+      {:ix_cmd, _args} -> drain_ix_cmds()
+    after
+      0 -> :ok
+    end
+  end
+
+  defp drain_systemd_runs do
+    receive do
+      {:systemd_run, _args} -> drain_systemd_runs()
+      {:ix_cmd, _args} -> drain_systemd_runs()
+    after
+      0 -> :ok
+    end
+  end
+
+  describe "reconcile/2" do
+    test "reaps an orphaned host unit and re-attaches a live one" do
+      units = %{
+        "symphony-host-live.service" => {"run_live", 1111},
+        "symphony-host-dead.service" => {"run_dead", 2222}
+      }
+
+      graphs = [graph("run_live", :running), graph("run_dead", :succeeded)]
+      opts = [config: config(), driver: reconcile_driver(self(), units)]
+
+      assert :ok = Placement.reconcile(graphs, opts)
+
+      # The terminal run's server is stopped and its checkout cleaned; the
+      # live run's server is left running.
+      assert_received {:systemctl_stop, "symphony-host-dead.service"}
+      refute_received {:systemctl_stop, "symphony-host-live.service"}
+      assert_received {:systemd_run, clean_args}
+      assert Enum.member?(clean_args, "--unit=symphony-host-dead-clean.service")
+
+      # The live run is re-attached so a resumed acquire resolves to the
+      # existing server instead of provisioning a duplicate.
+      assert {:ok, "http://127.0.0.1:1111"} = Placement.base_url("run_live")
+      # The reaped run holds no placement.
+      assert :error = Placement.base_url("run_dead")
+    end
+
+    test "reaps a unit whose run is absent from the store" do
+      units = %{"symphony-host-ghost.service" => {"run_ghost", 3333}}
+      opts = [config: config(), driver: reconcile_driver(self(), units)]
+
+      assert :ok = Placement.reconcile([], opts)
+      assert_received {:systemctl_stop, "symphony-host-ghost.service"}
+    end
+
+    test "is a no-op when the host user is unconfigured" do
+      units = %{"symphony-host-x.service" => {"run_x", 4444}}
+      opts = [config: config(%{host_user: nil}), driver: reconcile_driver(self(), units)]
+
+      assert :ok = Placement.reconcile([graph("run_x", :running)], opts)
+      refute_received {:systemctl_stop, _unit}
+    end
+  end
+
+  defp graph(run_id, status) do
+    %SymphonyElixir.IR.RunGraph{run_id: run_id, source_hash: "hash", status: status, nodes: %{}}
+  end
+
+  # A driver answering the reconcile path from a fixed unit table. Each
+  # entry maps a unit name to its `{run_id, port}`; `systemctl_show_exec_start`
+  # renders the same `ExecStart` shape systemd reports (the run id is the
+  # `--state-dir` basename), and `systemctl_stop`/`systemd_run` record so a
+  # test can assert exactly which units were reaped.
+  defp reconcile_driver(test_pid, units) do
+    %{
+      host_passwd: fn _config, user -> {:ok, "#{user}:x:1000:1000::/home/#{user}:/bin/bash"} end,
+      systemctl_list_host_units: fn -> Map.keys(units) end,
+      systemctl_show_exec_start: fn unit ->
+        {run_id, port} = Map.fetch!(units, unit)
+
+        {:ok,
+         "{ path=/n/room-server ; argv[]=/n/room-server --host 127.0.0.1 --port #{port} " <>
+           "--state-dir /home/agentuser/.local/state/symphony-room/#{run_id} ; ignore_errors=no }"}
+      end,
+      systemctl_stop: fn unit ->
+        send(test_pid, {:systemctl_stop, unit})
+        :ok
+      end,
+      systemd_run: fn _config, args, _timeout ->
+        send(test_pid, {:systemd_run, args})
+        :ok
+      end
+    }
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/room_engine_client_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/room_engine_client_test.exs
new file mode 100644
index 000000000..a291360b8
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/room_engine_client_test.exs
@@ -0,0 +1,212 @@
+defmodule SymphonyElixir.Runtime.RoomEngineClientTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.Node
+  alias SymphonyElixir.Runtime.RoomEngineClient
+
+  defp agent_node(prompt_ref, location \\ :local) do
+    {:ok, env} = Envelope.validate(%Envelope{engine: :codex, model: "gpt-5.3-codex", location: location})
+
+    Node.new(
+      id: "n0",
+      ast_origin: {:agent, "skill"},
+      kind: :agent,
+      envelope: env,
+      prompt_ref: prompt_ref,
+      inputs: %{}
+    )
+  end
+
+  defp ok_plug(thread_id) do
+    fn conn ->
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(
+        200,
+        Jason.encode!(%{"threadId" => thread_id, "outcome" => %{"kind" => "ok"}, "eventCount" => 3})
+      )
+    end
+  end
+
+  test "runs an inline-prompt agent node and returns {:ok, output, thread_id}" do
+    node = agent_node({:inline, "write FOO and stop"})
+
+    run_opts = %{
+      run_id: "run_1",
+      attempt: 1,
+      cwd: "/workspace/run_1",
+      room_server_url: "http://room.test",
+      req_options: [plug: ok_plug("thread_xyz")]
+    }
+
+    assert {:ok, %{thread_id: "thread_xyz", event_count: 3}, "thread_xyz"} =
+             RoomEngineClient.run_node(node, run_opts)
+  end
+
+  test "forwards the node id and run id to the room-server payload" do
+    test_pid = self()
+
+    plug = fn conn ->
+      {:ok, raw, conn} = Plug.Conn.read_body(conn)
+      send(test_pid, {:payload, Jason.decode!(raw)})
+
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(200, Jason.encode!(%{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0}))
+    end
+
+    node = agent_node({:inline, "do work"})
+    run_opts = %{run_id: "run_42", attempt: 1, cwd: "/w", room_server_url: "http://room.test", req_options: [plug: plug]}
+
+    assert {:ok, _, _} = RoomEngineClient.run_node(node, run_opts)
+    assert_received {:payload, payload}
+    assert payload["runId"] == "run_42"
+    assert payload["nodeId"] == "n0"
+    assert payload["prompt"] == "do work"
+    assert payload["cwd"] == "/w"
+    assert payload["engine"] == "codex"
+  end
+
+  test "an error outcome carries the thread id through for a later reattach probe" do
+    plug = fn conn ->
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(
+        200,
+        Jason.encode!(%{"threadId" => "thread_e", "outcome" => %{"kind" => "error", "message" => "boom"}, "eventCount" => 1})
+      )
+    end
+
+    node = agent_node({:inline, "do work"})
+    run_opts = %{run_id: "r", attempt: 1, cwd: "/w", room_server_url: "http://room.test", req_options: [plug: plug]}
+
+    assert {:error, {:turn_error, "boom", "thread_e"}, "thread_e"} = RoomEngineClient.run_node(node, run_opts)
+  end
+
+  test "a skill prompt is rendered from the resolved body and bindings" do
+    test_pid = self()
+
+    plug = fn conn ->
+      {:ok, raw, conn} = Plug.Conn.read_body(conn)
+      send(test_pid, {:payload, Jason.decode!(raw)})
+
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(200, Jason.encode!(%{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 1}))
+    end
+
+    node = agent_node({:skill, "inspect", %{"repo" => "symphony"}})
+
+    run_opts = %{
+      run_id: "r",
+      attempt: 1,
+      cwd: "/w",
+      room_server_url: "http://room.test",
+      req_options: [plug: plug],
+      # Inject the skill body so the test does not need a running Catalog.
+      skill_resolver: fn "inspect" -> {:ok, "inspect the ${repo} repo"} end
+    }
+
+    assert {:ok, _output, "t"} = RoomEngineClient.run_node(node, run_opts)
+    assert_receive {:payload, payload}
+    assert payload["prompt"] == "inspect the symphony repo"
+  end
+
+  test "appends the run's trigger context as an  block on the agent prompt" do
+    test_pid = self()
+
+    plug = fn conn ->
+      {:ok, raw, conn} = Plug.Conn.read_body(conn)
+      send(test_pid, {:payload, Jason.decode!(raw)})
+
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(200, Jason.encode!(%{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0}))
+    end
+
+    node = agent_node({:inline, "digest the window"})
+
+    trigger = %{
+      kind: :cron,
+      scheduled_for: "2026-06-03T07:00:00Z",
+      fired_at: "2026-06-03T07:00:07Z",
+      input: %{lookback_hours: 5}
+    }
+
+    run_opts = %{
+      run_id: "r",
+      attempt: 1,
+      cwd: "/w",
+      trigger: trigger,
+      room_server_url: "http://room.test",
+      req_options: [plug: plug]
+    }
+
+    assert {:ok, _, _} = RoomEngineClient.run_node(node, run_opts)
+    assert_receive {:payload, payload}
+
+    prompt = payload["prompt"]
+    assert String.starts_with?(prompt, "digest the window")
+    assert prompt =~ ""
+    assert prompt =~ ""
+    # The block carries the verbatim trigger envelope the skill reads.
+    assert prompt =~ "\"scheduled_for\": \"2026-06-03T07:00:00Z\""
+    assert prompt =~ "\"lookback_hours\": 5"
+  end
+
+  test "omits the  block for an operator-started run with no trigger" do
+    test_pid = self()
+
+    plug = fn conn ->
+      {:ok, raw, conn} = Plug.Conn.read_body(conn)
+      send(test_pid, {:payload, Jason.decode!(raw)})
+
+      conn
+      |> Plug.Conn.put_resp_content_type("application/json")
+      |> Plug.Conn.send_resp(200, Jason.encode!(%{"threadId" => "t", "outcome" => %{"kind" => "ok"}, "eventCount" => 0}))
+    end
+
+    node = agent_node({:inline, "do work"})
+    run_opts = %{run_id: "r", attempt: 1, cwd: "/w", trigger: nil, room_server_url: "http://room.test", req_options: [plug: plug]}
+
+    assert {:ok, _, _} = RoomEngineClient.run_node(node, run_opts)
+    assert_receive {:payload, payload}
+    assert payload["prompt"] == "do work"
+    refute payload["prompt"] =~ ""
+  end
+
+  test "a skill that names an unbound input fails loudly rather than half-rendering" do
+    node = agent_node({:skill, "inspect", %{}})
+
+    run_opts = %{
+      run_id: "r",
+      attempt: 1,
+      cwd: "/w",
+      room_server_url: "http://room.test",
+      skill_resolver: fn "inspect" -> {:ok, "needs ${missing}"} end
+    }
+
+    assert {:error, {:unbound_placeholder, "missing"}, nil} = RoomEngineClient.run_node(node, run_opts)
+  end
+
+  test "a missing cwd fails loudly before any request" do
+    node = agent_node({:inline, "do work"})
+    assert {:error, :missing_cwd, nil} = RoomEngineClient.run_node(node, %{run_id: "r", attempt: 1})
+  end
+
+  test "an agent node with no envelope is a wiring error" do
+    node = %{agent_node({:inline, "x"}) | envelope: nil}
+    assert {:error, {:missing_envelope, "n0"}, nil} = RoomEngineClient.run_node(node, %{run_id: "r", attempt: 1, cwd: "/w"})
+  end
+
+  test "a non-agent node never reaches the engine host" do
+    exec = Node.new(id: "e0", ast_origin: {:exec, "build"}, kind: :exec, inputs: %{})
+    assert {:error, {:not_an_agent_node, :exec, "e0"}, nil} = RoomEngineClient.run_node(exec, %{run_id: "r", cwd: "/w"})
+  end
+
+  test "status/1 is conservatively unknown on the synchronous path" do
+    assert RoomEngineClient.status("any-thread") == :unknown
+    assert RoomEngineClient.status(nil) == :unknown
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/runtime_registry_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/runtime_registry_test.exs
new file mode 100644
index 000000000..273f95d3c
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/runtime_registry_test.exs
@@ -0,0 +1,79 @@
+defmodule SymphonyElixir.Runtime.RuntimeRegistryTest do
+  use ExUnit.Case, async: false
+
+  alias SymphonyElixir.Runtime.RuntimeRegistry
+
+  setup do
+    start_supervised!(RuntimeRegistry)
+    :ok
+  end
+
+  defp worker(id, overrides \\ %{}) do
+    Map.merge(
+      %{worker_id: id, pid: self(), address: "100.0.0.1", labels: ["default"], capacity: 4},
+      overrides
+    )
+  end
+
+  test "register makes a worker discoverable by get/list/select" do
+    :ok = RuntimeRegistry.register(worker("w1"))
+
+    assert {:ok, %{worker_id: "w1", address: "100.0.0.1", labels: ["default"]}} = RuntimeRegistry.get("w1")
+    assert [%{worker_id: "w1"}] = RuntimeRegistry.list()
+    assert {:ok, %{worker_id: "w1"}} = RuntimeRegistry.select()
+  end
+
+  test "get is :error for an unknown worker" do
+    assert :error = RuntimeRegistry.get("nope")
+  end
+
+  test "select filters by label and returns :no_worker when none match" do
+    :ok = RuntimeRegistry.register(worker("w1", %{labels: ["us-west"]}))
+    :ok = RuntimeRegistry.register(worker("w2", %{labels: ["hari"]}))
+
+    assert {:ok, %{worker_id: "w2"}} = RuntimeRegistry.select("hari")
+    assert {:error, :no_worker} = RuntimeRegistry.select("nonexistent")
+  end
+
+  test "select returns :no_worker when the registry is empty" do
+    assert {:error, :no_worker} = RuntimeRegistry.select()
+  end
+
+  test "re-registering the same id replaces the prior entry" do
+    :ok = RuntimeRegistry.register(worker("w1", %{address: "100.0.0.1"}))
+    :ok = RuntimeRegistry.register(worker("w1", %{address: "100.0.0.9"}))
+
+    assert {:ok, %{address: "100.0.0.9"}} = RuntimeRegistry.get("w1")
+    assert [_one] = RuntimeRegistry.list()
+  end
+
+  test "unregister drops a worker" do
+    :ok = RuntimeRegistry.register(worker("w1"))
+    :ok = RuntimeRegistry.unregister("w1")
+    assert :error = RuntimeRegistry.get("w1")
+  end
+
+  test "a worker whose channel process dies is dropped automatically" do
+    parent = self()
+    pid = spawn(fn -> receive do: (:stop -> send(parent, :stopped)) end)
+    :ok = RuntimeRegistry.register(worker("w1", %{pid: pid}))
+    assert {:ok, _} = RuntimeRegistry.get("w1")
+
+    Process.exit(pid, :kill)
+    assert eventually(fn -> RuntimeRegistry.get("w1") == :error end)
+  end
+
+  defp eventually(fun, retries \\ 50) do
+    cond do
+      fun.() ->
+        true
+
+      retries == 0 ->
+        false
+
+      true ->
+        Process.sleep(10)
+        eventually(fun, retries - 1)
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/supervisor_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/supervisor_test.exs
new file mode 100644
index 000000000..673b5ca57
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/supervisor_test.exs
@@ -0,0 +1,100 @@
+defmodule SymphonyElixir.Runtime.SupervisorTest do
+  use ExUnit.Case, async: false
+
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Node, RunGraph, Store}
+  alias SymphonyElixir.Runtime
+
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @impl true
+    def run_node(%Node{id: id}, _opts), do: {:ok, %{ran: id}, "thread-#{id}"}
+
+    @impl true
+    def status(_thread_id), do: :unknown
+  end
+
+  setup do
+    start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+    start_supervised!(SymphonyElixir.Runtime.Supervisor)
+
+    tmp = Path.join(System.tmp_dir!(), "rt_sup_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(tmp)
+    on_exit(fn -> File.rm_rf(tmp) end)
+    {:ok, store_opts: [dir: tmp]}
+  end
+
+  # Agent nodes route through the injected FakeEngine; exec nodes run
+  # locally and would bypass it.
+  defp agent_node(id, overrides \\ []) do
+    Node.new(
+      [
+        id: id,
+        ast_origin: {:agent, id},
+        kind: :agent,
+        envelope: %Envelope{engine: :codex, model: "m"},
+        prompt_ref: {:inline, "go"},
+        inputs: %{}
+      ] ++ overrides
+    )
+  end
+
+  defp one_node_graph(run_id) do
+    node = agent_node("n0")
+    RunGraph.new(run_id, "hash", nil) |> RunGraph.put_nodes([node]) |> Map.put(:status, :running)
+  end
+
+  test "start_run schedules a graph under supervision and it runs to terminal", %{store_opts: store_opts} do
+    graph = one_node_graph("run_sup_1")
+
+    assert {:ok, pid} = Runtime.Supervisor.start_run(graph, engine: FakeEngine, store_opts: store_opts)
+    ref = Process.monitor(pid)
+    assert_receive {:DOWN, ^ref, :process, _, _}, 2_000
+
+    {:ok, final} = Store.load("run_sup_1", store_opts)
+    assert final.status == :succeeded
+    assert final.nodes["n0"].state == :succeeded
+  end
+
+  test "resume_pending restarts a persisted non-terminal run with recovery", %{store_opts: store_opts} do
+    # Persist a run left :running with a node :running (an orphaned run, as
+    # if the BEAM died mid-flight). resume_pending should reattach/recover.
+    node = agent_node("n0", state: :running)
+    graph = RunGraph.new("run_resume", "hash", nil) |> RunGraph.put_nodes([node]) |> Map.put(:status, :running)
+    :ok = Store.persist(graph, store_opts)
+
+    Runtime.Supervisor.resume_pending(engine: FakeEngine, store_opts: store_opts)
+
+    # The recovered run reconciles the orphaned :running node. With a
+    # FakeEngine status of :unknown the node is stranded (no opt-in retry),
+    # so the run resolves rather than hanging. Poll the store until terminal.
+    final = wait_for_terminal("run_resume", store_opts)
+    assert final.status in [:failed, :succeeded]
+    refute final.nodes["n0"].state == :running
+  end
+
+  test "resume_pending skips terminal runs", %{store_opts: store_opts} do
+    node = agent_node("n0", state: :succeeded)
+    graph = RunGraph.new("run_done", "hash", nil) |> RunGraph.put_nodes([node]) |> Map.put(:status, :succeeded)
+    :ok = Store.persist(graph, store_opts)
+
+    Runtime.Supervisor.resume_pending(engine: FakeEngine, store_opts: store_opts)
+
+    # No child was started for the already-terminal run.
+    assert DynamicSupervisor.count_children(SymphonyElixir.Runtime.Supervisor).active == 0
+  end
+
+  defp wait_for_terminal(run_id, store_opts, attempts \\ 40) do
+    {:ok, graph} = Store.load(run_id, store_opts)
+
+    cond do
+      graph.status in [:succeeded, :failed, :cancelled] -> graph
+      attempts == 0 -> flunk("run #{run_id} never reached terminal: #{graph.status}")
+      true -> Process.sleep(25) && wait_for_terminal(run_id, store_opts, attempts - 1)
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime/trigger_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime/trigger_test.exs
new file mode 100644
index 000000000..ce944409a
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime/trigger_test.exs
@@ -0,0 +1,47 @@
+defmodule SymphonyElixir.Runtime.TriggerTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Runtime.Trigger
+
+  describe "matches?/2" do
+    test "cron matches on the declared schedule" do
+      declared = %{kind: :cron, schedule: "0 9 * * *", timezone: "UTC", input: %{}}
+      assert Trigger.matches?(declared, %{kind: :cron, schedule: "0 9 * * *"})
+      refute Trigger.matches?(declared, %{kind: :cron, schedule: "@daily"})
+    end
+
+    test "linear matches when the declared label is on the event" do
+      declared = %{kind: :linear, label: "[sym] triage"}
+      assert Trigger.matches?(declared, %{kind: :linear, labels: ["a", "[sym] triage"]})
+      refute Trigger.matches?(declared, %{kind: :linear, labels: ["a", "b"]})
+      refute Trigger.matches?(declared, %{kind: :linear, labels: []})
+    end
+
+    test "github matches on repo and label together" do
+      declared = %{kind: :github_pr_label, repo: "acme/app", label: "ship"}
+      assert Trigger.matches?(declared, %{kind: :github_pr_label, repo: "acme/app", label: "ship"})
+      refute Trigger.matches?(declared, %{kind: :github_pr_label, repo: "acme/other", label: "ship"})
+      refute Trigger.matches?(declared, %{kind: :github_pr_label, repo: "acme/app", label: "hold"})
+    end
+
+    test "slack matches the declared channel against name or resolved id" do
+      huddle = %{kind: :slack_huddle_completed, channel: "#general"}
+      assert Trigger.matches?(huddle, %{kind: :slack_huddle_completed, channel: "#general"})
+      assert Trigger.matches?(huddle, %{kind: :slack_huddle_completed, channel: "x", channel_id: "#general"})
+      refute Trigger.matches?(huddle, %{kind: :slack_huddle_completed, channel: "#random"})
+
+      mention = %{kind: :slack_app_mention, channel: "C123"}
+      assert Trigger.matches?(mention, %{kind: :slack_app_mention, channel_id: "C123"})
+      refute Trigger.matches?(mention, %{kind: :slack_app_mention, channel_id: "C999"})
+    end
+
+    test "manual always matches its kind" do
+      assert Trigger.matches?(%{kind: :manual}, %{kind: :manual, input: %{}})
+    end
+
+    test "a nil or mismatched declared trigger never matches" do
+      refute Trigger.matches?(nil, %{kind: :manual})
+      refute Trigger.matches?(%{kind: :cron, schedule: "x"}, %{kind: :cron})
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/runtime_test.exs b/packages/symphony/elixir/test/symphony_elixir/runtime_test.exs
new file mode 100644
index 000000000..bc8e51a3e
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/runtime_test.exs
@@ -0,0 +1,808 @@
+defmodule SymphonyElixir.RuntimeTest do
+  use ExUnit.Case, async: false
+
+  # The #90 crash tests deliberately kill executor tasks, which logs the
+  # crash and the deadlock-guard error. Capture it so a passing run stays
+  # quiet; a real regression still surfaces through the assertions.
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Materializer, Node, RunGraph, Store}
+  alias SymphonyElixir.Runtime
+
+  # A fake EngineClient driven by a per-test ETS table mapping a node id
+  # to an instruction. The table name is fixed but rows are cleared in
+  # setup, so `async: false` keeps tests from racing each other.
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @table :runtime_test_fake
+
+    def setup do
+      if :ets.whereis(@table) == :undefined do
+        :ets.new(@table, [:named_table, :public, :set])
+      end
+
+      :ets.delete_all_objects(@table)
+      :ok
+    end
+
+    # `instruction` is one of:
+    #   {:ok, output}        -> succeed with output
+    #   {:error, reason}     -> fail
+    #   {:ok, output, tid}   -> succeed and report thread id
+    #   :crash               -> raise, so the task dies without :node_done
+    #   {:sleep_then, instr} -> sleep so the test can observe :running first
+    def program(node_id, instruction), do: :ets.insert(@table, {node_id, instruction})
+
+    def set_status(thread_id, status), do: :ets.insert(@table, {{:status, thread_id}, status})
+
+    # The run_opts a node's turn was invoked with, so a test can assert the
+    # runtime threaded the resolved working directory in.
+    def opts_for(node_id) do
+      case :ets.lookup(@table, {:opts, node_id}) do
+        [{_, opts}] -> opts
+        [] -> nil
+      end
+    end
+
+    @impl true
+    def run_node(%Node{id: id}, opts) do
+      :ets.insert(@table, {{:opts, id}, opts})
+
+      case lookup(id) do
+        {:ok, output} -> {:ok, output, nil}
+        {:ok, output, tid} -> {:ok, output, tid}
+        {:error, reason} -> {:error, reason, nil}
+        :crash -> raise "fake engine crash for #{id}"
+        {:sleep_then, instr} -> sleep_then(id, instr)
+        nil -> {:ok, %{default: id}, nil}
+      end
+    end
+
+    @impl true
+    def status(thread_id) do
+      case :ets.lookup(@table, {:status, thread_id}) do
+        [{_, status}] -> status
+        [] -> :unknown
+      end
+    end
+
+    defp sleep_then(id, instr) do
+      Process.sleep(50)
+      :ets.insert(@table, {id, instr})
+      run_node(%Node{id: id, ast_origin: nil, kind: :exec, inputs: %{}, deps: [], state: :running}, %{})
+    end
+
+    defp lookup(id) do
+      case :ets.lookup(@table, id) do
+        [{^id, instruction}] -> instruction
+        [] -> nil
+      end
+    end
+  end
+
+  # A placement double that resolves a fixed working directory, so a test
+  # can assert the runtime threads the checkout path into an agent turn
+  # without provisioning a real room-server.
+  defmodule CwdPlacement do
+    def acquire(_run_id, _location, _opts), do: {:ok, "http://stub.test"}
+    def resolved(_run_id), do: {:ok, %{location: :host, base_url: "http://stub.test"}}
+    def workspace_cwd(_run_id, _opts), do: {:ok, "/checkout/run/example"}
+    def release(_run_id), do: :ok
+  end
+
+  # A placement double that forwards the opts `acquire/3` received to the
+  # test process (the `:test_pid` is threaded through `placement_opts`), so a
+  # test can assert the runtime minted and passed a GitHub App `:bot_token`.
+  defmodule RecordingPlacement do
+    def acquire(_run_id, _location, opts) do
+      if pid = Keyword.get(opts, :test_pid), do: send(pid, {:acquire_opts, opts})
+      {:ok, "http://stub.test"}
+    end
+
+    def resolved(_run_id), do: {:ok, %{location: :host, base_url: "http://stub.test"}}
+    def workspace_cwd(_run_id, _opts), do: {:ok, "/checkout/run/example"}
+    def release(_run_id), do: :ok
+  end
+
+  setup do
+    FakeEngine.setup()
+    start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+
+    dir = Path.join(System.tmp_dir!(), "runtime_test_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    on_exit(fn -> File.rm_rf(dir) end)
+    {:ok, dir: dir}
+  end
+
+  # The subrun tests launch a nested run through Runtime.Ingress, which
+  # resolves the child workflow through WorkflowCatalog's ETS table and
+  # starts it under Runtime.Supervisor. Create the table and the supervisor
+  # only when a test needs them so the rest of the suite stays untouched.
+  defp ensure_subrun_substrate do
+    table = :symphony_workflows
+
+    if :ets.whereis(table) == :undefined do
+      :ets.new(table, [:named_table, :public, read_concurrency: true])
+    else
+      :ets.delete_all_objects(table)
+    end
+
+    unless Process.whereis(SymphonyElixir.Runtime.Supervisor) do
+      start_supervised!(SymphonyElixir.Runtime.Supervisor)
+    end
+
+    :ok
+  end
+
+  defp put_workflow(name, source) do
+    {:ok, ast} = SymphonyElixir.DSL.Parser.parse(source)
+    entry = %{name: ast.name || name, ast: ast, trigger: ast.trigger, source: source, hash: :crypto.hash(:sha256, source)}
+    :ets.insert(:symphony_workflows, {name, entry})
+  end
+
+  # Agent nodes by default so each attempt routes through the injected
+  # FakeEngine; exec nodes run locally and would bypass it. A test can still
+  # pass `kind:`/`envelope:` to override.
+  defp node(id, opts) do
+    base = [
+      id: id,
+      ast_origin: {:t, id},
+      kind: Keyword.get(opts, :kind, :agent),
+      envelope: Keyword.get(opts, :envelope, %Envelope{engine: :codex, model: "m"}),
+      inputs: Keyword.get(opts, :inputs, %{})
+    ]
+
+    Node.new(base ++ Keyword.take(opts, [:state, :attempts]))
+  end
+
+  defp graph(run_id, nodes), do: RunGraph.new(run_id, "h", {:ast, []}) |> RunGraph.put_nodes(nodes)
+
+  # Materialize a `.sym` source into a real RunGraph so the runtime drives
+  # the AST through `Materializer.expand_dynamic/1` on each success. The
+  # gate tests need the AST present (the hand-built `graph/2` carries a
+  # placeholder `{:ast, []}` that re-expands to nothing); this gives the
+  # supervised run an actual `when`/`every` construct to resolve.
+  defp materialized(run_id, source) do
+    {:ok, ast} = SymphonyElixir.DSL.Parser.parse(source)
+    {:ok, graph} = Materializer.materialize(run_id, "h", ast)
+    graph
+  end
+
+  defp opts(dir), do: [engine: FakeEngine, store_opts: [dir: dir]]
+
+  # A run settles when the GenServer stops (succeeded/cancelled) or stays
+  # alive and idle on a terminal :failed status (WS-6 keeps a failed run
+  # alive so the operator surface can reach it). Treat both as settled.
+  defp wait_for_exit(pid) do
+    ref = Process.monitor(pid)
+
+    receive do
+      {:DOWN, ^ref, :process, ^pid, _} -> :ok
+    after
+      0 -> wait_for_settled(pid, ref)
+    end
+  end
+
+  defp wait_for_settled(pid, ref, attempts \\ 100) do
+    receive do
+      {:DOWN, ^ref, :process, ^pid, _} -> :ok
+    after
+      20 ->
+        cond do
+          settled_failed?(pid) ->
+            Process.demonitor(ref, [:flush])
+            :ok
+
+          attempts == 0 ->
+            flunk("runtime did not settle in time")
+
+          true ->
+            wait_for_settled(pid, ref, attempts - 1)
+        end
+    end
+  end
+
+  defp settled_failed?(pid) do
+    Process.alive?(pid) and SymphonyElixir.Runtime.graph(pid).status == :failed
+  catch
+    :exit, _ -> true
+  end
+
+  test "runs a linear two-node graph to success", %{dir: dir} do
+    g =
+      graph("run-linear", [
+        node("a", state: :pending),
+        node("b", state: :pending, inputs: %{"x" => {:node, "a", []}})
+      ])
+
+    FakeEngine.program("a", {:ok, %{v: 1}})
+    FakeEngine.program("b", {:ok, %{v: 2}})
+
+    {:ok, pid} = Runtime.start_link(g, opts(dir))
+    wait_for_exit(pid)
+
+    {:ok, final} = Store.load("run-linear", dir: dir)
+    assert final.status == :succeeded
+    assert final.nodes["a"].state == :succeeded
+    assert final.nodes["b"].state == :succeeded
+  end
+
+  test "threads the resolved placement cwd into an agent turn", %{dir: dir} do
+    # A `{:host, _}` location makes the runtime acquire a placement, so the
+    # agent run_opts must carry the checkout cwd the engine turn needs.
+    envelope = %Envelope{engine: :codex, model: "m", location: {:host, "box"}}
+    g = graph("run-cwd", [node("a", state: :pending, envelope: envelope)])
+
+    FakeEngine.program("a", {:ok, %{v: 1}})
+
+    {:ok, pid} = Runtime.start_link(g, engine: FakeEngine, placement: CwdPlacement, store_opts: [dir: dir])
+    wait_for_exit(pid)
+
+    assert FakeEngine.opts_for("a")[:cwd] == "/checkout/run/example"
+  end
+
+  test "mints a GitHub App token and threads it into placement acquire", %{dir: dir} do
+    # With a GitHub App configured, the runtime must pass a freshly minted
+    # installation token as `:bot_token` so the workspace clone auth and the
+    # room-server GITHUB_TOKEN/GH_TOKEN author agent PRs under the App's bot
+    # identity rather than the static host token (ENG-2012,
+    # indexable-inc/symphony#242).
+    snapshot = SymphonyElixir.Config.get()
+
+    :ets.insert(
+      :symphony_config,
+      {:snapshot, %{snapshot | github_app_id: "123", github_app_private_key_pem: "PEM"}}
+    )
+
+    on_exit(fn -> :ets.insert(:symphony_config, {:snapshot, snapshot}) end)
+
+    # Seed the installation-token cache so `GithubApp.installation_token/0`
+    # answers without the GenServer (unstarted in this test) or a real mint.
+    if :ets.whereis(:symphony_github_app_token) == :undefined do
+      :ets.new(:symphony_github_app_token, [:named_table, :public, read_concurrency: true])
+    end
+
+    :ets.insert(
+      :symphony_github_app_token,
+      {:current, %{token: "app-token", expires_at: DateTime.add(DateTime.utc_now(), 3600, :second), installation_id: 1}}
+    )
+
+    # The seeded table is owned by this test process when GithubApp is not
+    # supervised (it vanishes on exit); only drop the entry if a real,
+    # longer-lived table is present so the seed cannot leak into other tests.
+    on_exit(fn ->
+      if :ets.whereis(:symphony_github_app_token) != :undefined do
+        :ets.delete(:symphony_github_app_token, :current)
+      end
+    end)
+
+    envelope = %Envelope{engine: :codex, model: "m", location: {:host, "box"}}
+    g = graph("run-bot-token", [node("a", state: :pending, envelope: envelope)])
+
+    FakeEngine.program("a", {:ok, %{v: 1}})
+
+    {:ok, pid} =
+      Runtime.start_link(g,
+        engine: FakeEngine,
+        placement: RecordingPlacement,
+        store_opts: [dir: dir, placement_opts: [test_pid: self()]]
+      )
+
+    wait_for_exit(pid)
+
+    assert_received {:acquire_opts, opts}
+    assert Keyword.get(opts, :bot_token) == "app-token"
+  end
+
+  test "threads a successful turn's cost onto the recorded attempt", %{dir: dir} do
+    g = graph("run-cost", [node("a", state: :pending)])
+
+    cost = %{usd: 0.0123, tokens_in: 1200, tokens_out: 340, cache_read: 800, cache_creation: 64}
+    FakeEngine.program("a", {:ok, %{thread_id: "thread_abc", event_count: 4, cost: cost}, "thread_abc"})
+
+    {:ok, pid} = Runtime.start_link(g, opts(dir))
+    wait_for_exit(pid)
+
+    {:ok, final} = Store.load("run-cost", dir: dir)
+    assert final.status == :succeeded
+    [attempt] = final.nodes["a"].attempts
+    assert attempt.state == :succeeded
+    assert attempt.cost == cost
+  end
+
+  test "runs parallel-ready siblings concurrently", %{dir: dir} do
+    g =
+      graph("run-parallel", [
+        node("a", state: :pending),
+        node("b", state: :pending)
+      ])
+
+    FakeEngine.program("a", {:ok, :ok})
+    FakeEngine.program("b", {:ok, :ok})
+
+    {:ok, pid} = Runtime.start_link(g, opts(dir))
+    wait_for_exit(pid)
+
+    {:ok, final} = Store.load("run-parallel", dir: dir)
+    assert final.status == :succeeded
+  end
+
+  test "a node failure propagates upstream_failed and the run fails", %{dir: dir} do
+    g =
+      graph("run-fail", [
+        node("a", state: :pending),
+        node("b", state: :pending, inputs: %{"x" => {:node, "a", []}})
+      ])
+
+    FakeEngine.program("a", {:error, :boom})
+
+    {:ok, pid} = Runtime.start_link(g, opts(dir))
+    wait_for_exit(pid)
+
+    {:ok, final} = Store.load("run-fail", dir: dir)
+    assert final.status == :failed
+    assert final.nodes["a"].state == :failed
+    assert final.nodes["b"].state == :upstream_failed
+  end
+
+  describe "#90: executor task dies without :node_done" do
+    test "a crashing task strands the node and the run resolves (no opt-in retry)", %{dir: dir} do
+      g = graph("run-crash", [node("a", state: :pending)])
+      FakeEngine.program("a", :crash)
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-crash", dir: dir)
+      # The node cannot run again without operator action; the run does not
+      # hang. With no remaining ready work the deadlock guard resolves it.
+      assert final.nodes["a"].state == :stranded
+      assert final.status == :failed
+      [att] = final.nodes["a"].attempts
+      assert att.state == :stranded
+    end
+
+    test "an opted-in node with no side effect auto-retries after a crash", %{dir: dir} do
+      g = graph("run-retry", [node("a", state: :pending, inputs: %{"__retry__" => {:literal, true}})])
+
+      # First attempt crashes; the retry succeeds. The fake flips the
+      # instruction the first time it is asked to crash.
+      FakeEngine.program("a", :crash)
+
+      test_pid = self()
+
+      # Replace the crash with a success once the strand has been recorded.
+      spawn(fn ->
+        Process.sleep(80)
+        FakeEngine.program("a", {:ok, :recovered})
+        send(test_pid, :reprogrammed)
+      end)
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-retry", dir: dir)
+      assert final.nodes["a"].state in [:succeeded, :stranded]
+    end
+  end
+
+  describe "#90: deadlock guard" do
+    test "a graph with no ready nodes and no tasks fails instead of hanging", %{dir: dir} do
+      # `a` depends on a node that never succeeds (it is itself blocked by a
+      # missing dep id), so no node is ever ready.
+      g =
+        graph("run-deadlock", [
+          node("a", state: :pending, inputs: %{"x" => {:node, "ghost", []}})
+        ])
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-deadlock", dir: dir)
+      assert final.status == :failed
+    end
+  end
+
+  describe "#90: BEAM restart reconciliation" do
+    test "a persisted :running node makes progress after a simulated restart", %{dir: dir} do
+      # Persist a graph as if the BEAM died mid-turn: node `a` is :running
+      # with an attempt that opened no thread, and `b` waits on it.
+      attempt = SymphonyElixir.IR.Attempt.start(1, :codex, nil)
+
+      g =
+        graph("run-restart", [
+          node("a", state: :running, attempts: [attempt], inputs: %{"__retry__" => {:literal, true}}),
+          node("b", state: :pending, inputs: %{"x" => {:node, "a", []}})
+        ])
+
+      :ok = Store.persist(g, dir: dir)
+
+      # On restart the engine cannot account for the thread (no thread id),
+      # so reconcile auto-retries `a` (opted in, no side effect). The rerun
+      # then succeeds and unblocks `b`.
+      FakeEngine.program("a", {:ok, :ok})
+      FakeEngine.program("b", {:ok, :ok})
+
+      {:ok, reloaded} = Store.load("run-restart", dir: dir)
+      {:ok, pid} = Runtime.start_link(reloaded, [recover: true] ++ opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-restart", dir: dir)
+      assert final.nodes["a"].state == :succeeded
+      assert final.nodes["b"].state == :succeeded
+      assert final.status == :succeeded
+    end
+
+    test "a persisted :running node with an opened thread strands on restart", %{dir: dir} do
+      attempt = SymphonyElixir.IR.Attempt.start(1, :codex, "thread-x")
+      g = graph("run-restart-strand", [node("a", state: :running, attempts: [attempt])])
+      :ok = Store.persist(g, dir: dir)
+
+      # status :unknown -> the thread cannot be accounted for; a recorded
+      # thread id means a side effect may have happened, so strand.
+      {:ok, reloaded} = Store.load("run-restart-strand", dir: dir)
+      {:ok, pid} = Runtime.start_link(reloaded, [recover: true] ++ opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-restart-strand", dir: dir)
+      assert final.nodes["a"].state == :stranded
+      assert final.status == :failed
+    end
+  end
+
+  describe "PubSub: live transitions broadcast" do
+    alias SymphonyElixir.Runtime.Events
+
+    # The app PubSub (`SymphonyElixir.PubSub`) is started once in
+    # `test_helper.exs`, so a subscriber here receives the runtime's
+    # broadcasts without booting any extra process.
+
+    test "a subscriber receives an event for each persisted transition", %{dir: dir} do
+      g =
+        graph("run-pubsub", [
+          node("a", state: :pending),
+          node("b", state: :pending, inputs: %{"x" => {:node, "a", []}})
+        ])
+
+      FakeEngine.program("a", {:ok, %{v: 1}})
+      FakeEngine.program("b", {:ok, %{v: 2}})
+
+      :ok = Events.subscribe_run("run-pubsub")
+      :ok = Events.subscribe_index()
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      summaries = drain_events("run-pubsub")
+
+      # Each persisted transition announces, so a two-node run that marks
+      # each node running then succeeded fans out several events, not one.
+      assert length(summaries) > 1
+
+      # The run resolves succeeded, so the last announced summary carries
+      # the terminal status and both nodes counted as succeeded.
+      last = List.last(summaries)
+      assert last["status"] == "succeeded"
+      assert last["states"] == %{"succeeded" => 2}
+
+      # An intermediate transition is observable: at least one summary shows
+      # a node already succeeded while the run had not yet finished, proving
+      # the page would update before the run completes.
+      assert Enum.any?(summaries, fn s -> s["states"]["succeeded"] == 1 end)
+
+      # The per-run and index topics carry the same message, so the
+      # subscriber sees each transition twice (once per topic). Both shapes
+      # are the `IR.View.summary/1` map keyed on this run.
+      assert Enum.all?(summaries, &match?(%{"run_id" => "run-pubsub"}, &1))
+    end
+
+    # Collect every `{:ir_run_event, run_id, summary}` currently in the
+    # mailbox for one run. The subscriber is registered on both topics, so
+    # this drains the duplicate index + per-run deliveries too.
+    defp drain_events(run_id, acc \\ []) do
+      receive do
+        {:ir_run_event, ^run_id, summary} -> drain_events(run_id, [summary | acc])
+      after
+        50 -> Enum.reverse(acc)
+      end
+    end
+  end
+
+  describe "subrun: nested child runs" do
+    # A child workflow with a single agent node. Its node id is
+    # content-derived, so the test does not program the FakeEngine for it;
+    # the fake's default branch succeeds any unprogrammed node, which is
+    # enough to drive the child to a :succeeded terminal status.
+    @child_sym ~s|workflow "child" on manual { c <- agent { engine: codex, model: "m", prompt: inline "do" } }|
+
+    test "a subrun starts a child run and its terminal output flows to the parent", %{dir: dir} do
+      ensure_subrun_substrate()
+      put_workflow("child", @child_sym)
+
+      g =
+        graph("run-subrun-ok", [
+          Node.new(
+            id: "s",
+            ast_origin: {:t, "s"},
+            kind: :subrun,
+            inputs: %{"source" => {:literal, "child.sym"}},
+            state: :pending
+          )
+        ])
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-subrun-ok", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["s"].state == :succeeded
+
+      # The subrun node's output names the child run and carries its status,
+      # so a downstream node could read the child result through its inputs.
+      output = final.nodes["s"].output
+      assert output.kind == :subrun
+      assert output.status == :succeeded
+      assert is_binary(output.run_id)
+
+      # The attempt records the subrun executor, not a sham engine.
+      [attempt] = final.nodes["s"].attempts
+      assert attempt.engine == :subrun
+      assert attempt.state == :succeeded
+
+      # The child run was persisted under its own id in the shared store.
+      assert {:ok, child} = Store.load(output.run_id, dir: dir)
+      assert child.status == :succeeded
+    end
+
+    test "a self-referential subrun is rejected as a cycle without spawning a child", %{dir: dir} do
+      ensure_subrun_substrate()
+      put_workflow("child", @child_sym)
+
+      g =
+        graph("run-subrun-cycle", [
+          Node.new(
+            id: "s",
+            ast_origin: {:t, "s"},
+            kind: :subrun,
+            inputs: %{"source" => {:literal, "child.sym"}},
+            state: :pending
+          )
+        ])
+
+      # The parent is itself a "child" run already (its name is on the
+      # ancestor chain), so a subrun back to "child" closes a cycle.
+      sub_opts = opts(dir) ++ [subrun_ancestors: ["child"]]
+      {:ok, pid} = Runtime.start_link(g, sub_opts)
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-subrun-cycle", dir: dir)
+      assert final.status == :failed
+      assert final.nodes["s"].state == :failed
+      assert {:error, {:subrun_cycle, "child", ["child"]}} = final.nodes["s"].output
+    end
+
+    test "a subrun over the depth ceiling is rejected", %{dir: dir} do
+      ensure_subrun_substrate()
+      put_workflow("child", @child_sym)
+
+      g =
+        graph("run-subrun-depth", [
+          Node.new(
+            id: "s",
+            ast_origin: {:t, "s"},
+            kind: :subrun,
+            inputs: %{"source" => {:literal, "child.sym"}},
+            state: :pending
+          )
+        ])
+
+      # Start already at the ceiling so the child (depth + 1) trips the cap.
+      ceiling = SymphonyElixir.Config.get().subrun_max_depth
+      sub_opts = opts(dir) ++ [subrun_depth: ceiling]
+      {:ok, pid} = Runtime.start_link(g, sub_opts)
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-subrun-depth", dir: dir)
+      assert final.status == :failed
+      assert final.nodes["s"].state == :failed
+      assert {:error, {:subrun_depth_exceeded, _depth, ^ceiling}} = final.nodes["s"].output
+    end
+  end
+
+  describe "when/every gate execution (Phase 7)" do
+    # A gating agent followed by a `when ${a.changed}` body agent. The
+    # interpreter ids are content-derived: the gating agent is `agent-0`,
+    # the gate placeholder is `when-1`, and the body agent that the firing
+    # pass emits is `agent-2`. The supervised run must drive `agent-0` to
+    # success, re-expand on its output, then schedule (or skip) `agent-2`.
+    @when_sym ~s|workflow "gate" on manual { a <- agent { engine: codex, model: "m", prompt: inline "decide" } when ${a.changed} { b <- agent { engine: codex, model: "m", prompt: inline "act" } } }|
+
+    test "a when gate that resolves true runs the gated body under a supervised run", %{dir: dir} do
+      g = materialized("run-when-true", @when_sym)
+
+      # The gate reads `${a.changed}`; the body agent is unprogrammed and
+      # falls through the fake's default success. Atom-keyed output is fine:
+      # the interpreter's field read digs string or atom keys.
+      FakeEngine.program("agent-0", {:ok, %{changed: true}})
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-when-true", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["agent-0"].state == :succeeded
+      # The gated body materialized after agent-0 succeeded and ran to success.
+      assert final.nodes["agent-2"].state == :succeeded
+      # The resolved gate placeholder was retired so it did not deadlock the run.
+      assert final.nodes["when-1"].state == :skipped
+    end
+
+    test "a when gate that resolves false skips the body and the run still succeeds", %{dir: dir} do
+      g = materialized("run-when-false", @when_sym)
+
+      FakeEngine.program("agent-0", {:ok, %{changed: false}})
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-when-false", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["agent-0"].state == :succeeded
+      # The body was never emitted: a falsy gate produces no child node.
+      refute Map.has_key?(final.nodes, "agent-2")
+      # The placeholder is retired to :skipped, the load-bearing pair with
+      # the deadlock guard so a never-fired gate does not stall the run.
+      assert final.nodes["when-1"].state == :skipped
+    end
+
+    # `every n of c { ... }` is an interpreter gate keyed on the persisted
+    # expansion log, not a wall-clock schedule. In a single run the gate is
+    # evaluated once at materialize time (tick 1): `every 1` fires its body,
+    # `every 2+` skips it. The skip case materializes to zero nodes, which
+    # must resolve as a no-op success, not trip the deadlock guard.
+    @every_one_sym ~s|workflow "tick" on manual { every 1 of gc { b <- agent { engine: codex, model: "m", prompt: inline "act" } } }|
+    @every_two_sym ~s|workflow "tick" on manual { every 2 of gc { b <- agent { engine: codex, model: "m", prompt: inline "act" } } }|
+
+    test "every 1 fires its body on the first tick of a supervised run", %{dir: dir} do
+      g = materialized("run-every-fire", @every_one_sym)
+
+      # The body fires immediately at materialize (tick 1), so the body
+      # agent is present from the start with no placeholder to resolve.
+      assert Map.has_key?(g.nodes, "agent-1")
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-every-fire", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["agent-1"].state == :succeeded
+    end
+
+    test "every 2 skips on the first tick and the no-op run succeeds without deadlock", %{dir: dir} do
+      g = materialized("run-every-skip", @every_two_sym)
+
+      # The gate does not fire on tick 1, so nothing materializes. A run with
+      # no schedulable work is a no-op success, not a deadlock.
+      assert g.nodes == %{}
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-every-skip", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes == %{}
+    end
+  end
+
+  describe "map fan-out execution (Phase 8)" do
+    # A seed agent whose output is a list, then a `map ${seed.repos} as repo`
+    # body that fans out one child agent per element. The interpreter ids are
+    # content-derived: the seed is `agent-0`, the unresolved fan-out is the
+    # `map-1` placeholder, and each child is `agent-2-` keyed on the
+    # element index. The supervised run drives `agent-0` to success, re-expands
+    # on its list output, then schedules every child.
+    @map_sym ~s|workflow "fan" on manual { seed <- agent { engine: codex, model: "m", prompt: inline "list" } map ${seed.repos} as repo { child <- agent { engine: codex, model: "m", prompt: inline "audit ${repo}" } } }|
+
+    test "a map over a dependency's list fans out one child per element and collects every output", %{dir: dir} do
+      g = materialized("run-map-fanout", @map_sym)
+
+      # Before the seed succeeds the body is a single placeholder, not work.
+      assert g.nodes["map-1"].kind == :map_fanout
+      refute Enum.any?(Map.values(g.nodes), &(&1.kind == :agent and &1.id != "agent-0"))
+
+      # The seed yields three repos; each child is unprogrammed and falls
+      # through the fake's default success, so the run drives all three to
+      # :succeeded without per-child programming.
+      FakeEngine.program("agent-0", {:ok, %{repos: ["alpha", "beta", "gamma"]}})
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-map-fanout", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["agent-0"].state == :succeeded
+
+      # One child node per element, each terminal succeeded. The ids are the
+      # content-derived fan-out keys; assert by count and kind rather than
+      # spelling each digest, so a stable-id change does not break the test.
+      children = for {_id, %Node{ast_origin: "agent-2"} = n} <- final.nodes, do: n
+      assert length(children) == 3
+      assert Enum.all?(children, &(&1.state == :succeeded))
+
+      # Every child's output collects back into the graph (here the fake's
+      # default `%{default: id}`), so a downstream node could read any one.
+      assert Enum.all?(children, fn n -> n.output == %{default: n.id} end)
+
+      # The resolved fan-out placeholder is retired to :skipped, the
+      # load-bearing pair with the deadlock guard: a fanned-out placeholder
+      # must not sit :pending and stall the run.
+      assert final.nodes["map-1"].state == :skipped
+    end
+
+    # A map over an empty list emits zero children. The placeholder retires to
+    # :skipped, leaving only the succeeded seed, so the run completes as a
+    # no-op success rather than tripping the deadlock guard.
+    test "a map over an empty list emits no children and the run still succeeds", %{dir: dir} do
+      g = materialized("run-map-empty", @map_sym)
+
+      FakeEngine.program("agent-0", {:ok, %{repos: []}})
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-map-empty", dir: dir)
+      assert final.status == :succeeded
+      assert final.nodes["agent-0"].state == :succeeded
+      # No child node was emitted for an empty fan-out.
+      refute Enum.any?(Map.values(final.nodes), &(&1.ast_origin == "agent-2"))
+      # The placeholder is retired so the empty fan-out does not stall the run.
+      assert final.nodes["map-1"].state == :skipped
+    end
+  end
+
+  describe "run visibility at creation" do
+    test "a freshly started run is present in the store before any node finishes", %{dir: dir} do
+      run_id = "run-visible-at-creation"
+
+      # Use a slow node so the run is in-flight when we check the store.
+      g = materialized(run_id, ~s|workflow "vis" on manual { a <- agent { engine: codex, model: "m", prompt: inline "x" } }|)
+      # The first scheduling pass will call run_attempt; sleep so we can load
+      # from the store before the fake engine returns.
+      FakeEngine.program("agent-0", {:sleep_then, {:ok, :done}})
+
+      {:ok, _pid} = Runtime.start_link(g, opts(dir))
+
+      # Load the store immediately after start_link returns. The run must be
+      # present on disk because init/1 persists before the first scheduling
+      # pass, even while a slow placement acquire (or in this test, a sleeping
+      # fake engine) is still in flight.
+      assert {:ok, visible} = Store.load(run_id, dir: dir)
+      assert visible.status == :running
+      assert map_size(visible.nodes) == 1
+    end
+  end
+
+  describe "operator hooks" do
+    test "cancel stops the run and marks non-terminal nodes cancelled", %{dir: dir} do
+      g =
+        graph("run-cancel", [
+          node("a", state: :pending, inputs: %{"x" => {:node, "slow", []}}),
+          node("slow", state: :pending)
+        ])
+
+      # `slow` sleeps so the run is still in flight when we cancel.
+      FakeEngine.program("slow", {:sleep_then, {:ok, :late}})
+
+      {:ok, pid} = Runtime.start_link(g, opts(dir))
+      :ok = Runtime.cancel(pid)
+      wait_for_exit(pid)
+
+      {:ok, final} = Store.load("run-cancel", dir: dir)
+      assert final.status == :cancelled
+      assert final.nodes["a"].state == :cancelled
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/skill_test.exs b/packages/symphony/elixir/test/symphony_elixir/skill_test.exs
new file mode 100644
index 000000000..c5537ca40
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/skill_test.exs
@@ -0,0 +1,117 @@
+defmodule SymphonyElixir.SkillTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixir.Skill
+
+  # Minimal valid YAML frontmatter shared by all fixture skills.
+  @frontmatter """
+  ---
+  codex_model: gpt-5-codex
+  reasoning_effort: medium
+  sandbox: workspace-write
+  approval_policy: never
+  tools: []
+  ---
+  """
+
+  defp setup_skill_dir do
+    dir = Path.join(System.tmp_dir!(), "skill_test_#{System.unique_integer([:positive])}")
+    partials_dir = Path.join(dir, "_partials")
+    File.mkdir_p!(dir)
+    File.mkdir_p!(partials_dir)
+
+    on_exit(fn -> File.rm_rf!(dir) end)
+
+    {dir, partials_dir}
+  end
+
+  defp write_skill!(dir, name, body) do
+    path = Path.join(dir, "#{name}.md")
+    File.write!(path, @frontmatter <> body)
+    path
+  end
+
+  defp write_partial!(partials_dir, name, body) do
+    File.write!(Path.join(partials_dir, "#{name}.md"), body)
+  end
+
+  describe "expand_partials: self-referential partial" do
+    # Regression guard for the prod outage described in the plan. Partial files
+    # that document their own token name in a prose header (e.g. "any skill that
+    # references `{{partial:graphite_sop}}` gets this content inlined") would
+    # leave a residual token in the catalog body under the old single-pass
+    # implementation. The fixpoint + seen-set must drop the self-reference so
+    # the stored body is token-free.
+    test "a partial whose body contains its own token loads cleanly" do
+      {dir, partials_dir} = setup_skill_dir()
+
+      write_partial!(partials_dir, "policy", """
+      This file is referenced via `{{partial:policy}}`.
+      Actual policy content here.
+      """)
+
+      path = write_skill!(dir, "my_skill", "Use this:\n{{partial:policy}}\nDone.\n")
+
+      assert {:ok, skill} = Skill.load(path)
+      refute String.contains?(skill.body, "{{partial:")
+      assert String.contains?(skill.body, "Actual policy content here.")
+    end
+  end
+
+  describe "expand_partials: nested partials" do
+    # Partial A references partial B. The fixpoint loop expands A on the first
+    # pass, which introduces {{partial:b}} into the body; the second pass
+    # expands B. The final body must contain B's text and no residual tokens.
+    test "partial A inlining partial B both expand into the final body" do
+      {dir, partials_dir} = setup_skill_dir()
+
+      write_partial!(partials_dir, "a", "Content from A.\n{{partial:b}}\n")
+      write_partial!(partials_dir, "b", "Content from B.")
+
+      path = write_skill!(dir, "nested_skill", "Start.\n{{partial:a}}\nEnd.\n")
+
+      assert {:ok, skill} = Skill.load(path)
+      refute String.contains?(skill.body, "{{partial:")
+      assert String.contains?(skill.body, "Content from A.")
+      assert String.contains?(skill.body, "Content from B.")
+    end
+  end
+
+  describe "expand_partials: missing partial" do
+    # A token whose partial file is genuinely absent must still be a hard load
+    # error. The seen-set logic must not shadow this: only already-seen names
+    # are dropped; an unseen name with no file on disk is an error.
+    test "a reference to a nonexistent partial returns a missing_partial error" do
+      {dir, _partials_dir} = setup_skill_dir()
+
+      path = write_skill!(dir, "broken_skill", "{{partial:does_not_exist}}\n")
+
+      assert {:error, {:missing_partial, "does_not_exist", _reason}} = Skill.load(path)
+    end
+  end
+
+  describe "expand_partials: repeated include" do
+    # A partial is a named shared contract; a skill body that references the
+    # same partial twice inlines its content once. This keeps the catalog body
+    # deterministic and is the "inline each named partial at most once" half of
+    # the fixpoint behavior (the other half drops self-reference tokens).
+    test "the same partial referenced twice is inlined once" do
+      {dir, partials_dir} = setup_skill_dir()
+
+      write_partial!(partials_dir, "contract", "SHARED-CONTRACT-TEXT")
+
+      path =
+        write_skill!(
+          dir,
+          "repeat_skill",
+          "First:\n{{partial:contract}}\nSecond:\n{{partial:contract}}\n"
+        )
+
+      assert {:ok, skill} = Skill.load(path)
+      refute String.contains?(skill.body, "{{partial:")
+
+      occurrences = skill.body |> String.split("SHARED-CONTRACT-TEXT") |> length() |> Kernel.-(1)
+      assert occurrences == 1
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir/workflow_catalog_test.exs b/packages/symphony/elixir/test/symphony_elixir/workflow_catalog_test.exs
new file mode 100644
index 000000000..9cb05c536
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir/workflow_catalog_test.exs
@@ -0,0 +1,109 @@
+defmodule SymphonyElixir.WorkflowCatalogTest do
+  use ExUnit.Case, async: false
+
+  @moduletag capture_log: true
+
+  alias SymphonyElixir.WorkflowCatalog
+
+  setup do
+    dir = Path.join(System.tmp_dir!(), "wf_catalog_#{System.unique_integer([:positive])}")
+    File.mkdir_p!(dir)
+    on_exit(fn -> File.rm_rf(dir) end)
+    # A long poll so the only scans are the boot scan and the explicit ones
+    # the test drives; keeps assertions deterministic.
+    start_supervised!({WorkflowCatalog, workflows_dir: dir, poll_ms: 60_000})
+    {:ok, dir: dir}
+  end
+
+  defp write_sym!(dir, name, body) do
+    File.write!(Path.join(dir, "#{name}.sym"), body)
+  end
+
+  test "parses .sym files and indexes them by name and trigger", %{dir: dir} do
+    write_sym!(dir, "implement", ~s|workflow "implement" on linear label "[sym] implement" { a <- agent { engine: codex, model: "m", prompt: skill "implement" {} } }|)
+    write_sym!(dir, "nightly", ~s|workflow "nightly" on cron "0 9 * * *" tz "UTC" { gc <- exec "./gc.sh" }|)
+
+    WorkflowCatalog.scan(dir)
+
+    assert {:ok, impl} = WorkflowCatalog.workflow("implement")
+    assert impl.name == "implement"
+    assert impl.trigger == %{kind: :linear, label: "[sym] implement"}
+    assert is_binary(impl.hash)
+
+    assert Enum.map(WorkflowCatalog.workflows(), & &1.name) |> Enum.sort() == ["implement", "nightly"]
+    assert [%{name: "implement"}] = WorkflowCatalog.for_trigger_kind(:linear)
+    assert [%{name: "nightly"}] = WorkflowCatalog.for_trigger_kind(:cron)
+  end
+
+  test "hot-reloads changed bytes and drops deleted files", %{dir: dir} do
+    write_sym!(dir, "w", ~s|workflow "w" on manual { a <- exec "./x.sh" }|)
+    WorkflowCatalog.scan(dir)
+    assert {:ok, %{hash: first}} = WorkflowCatalog.workflow("w")
+
+    write_sym!(dir, "w", ~s|workflow "w" on cron "* * * * *" { a <- exec "./x.sh" }|)
+    WorkflowCatalog.scan(dir)
+    assert {:ok, reloaded} = WorkflowCatalog.workflow("w")
+    assert reloaded.hash != first
+    assert reloaded.trigger.kind == :cron
+
+    File.rm!(Path.join(dir, "w.sym"))
+    WorkflowCatalog.scan(dir)
+    assert WorkflowCatalog.workflow("w") == {:error, :not_found}
+  end
+
+  test "a parse error keeps the last good version in place", %{dir: dir} do
+    write_sym!(dir, "w", ~s|workflow "w" on manual { a <- exec "./x.sh" }|)
+    WorkflowCatalog.scan(dir)
+    assert {:ok, good} = WorkflowCatalog.workflow("w")
+
+    write_sym!(dir, "w", ~s|workflow "w" on manual { this is not valid |)
+    WorkflowCatalog.scan(dir)
+    # The broken bytes are rejected; the prior parse stays published.
+    assert {:ok, ^good} = WorkflowCatalog.workflow("w")
+  end
+
+  test "a parse error is recorded with a located, file-stamped diagnostic", %{dir: dir} do
+    write_sym!(dir, "w", ~s|workflow "w" on manual { a <- exec "./x.sh" }|)
+    write_sym!(dir, "broken", "workflow \"broken\" {\n  oops\n}\n")
+    WorkflowCatalog.scan(dir)
+
+    # The good file parses; the broken one is absent from the published set
+    # but present in the error set.
+    assert {:ok, _} = WorkflowCatalog.workflow("w")
+    assert WorkflowCatalog.workflow("broken") == {:error, :not_found}
+
+    assert {:ok, err} = WorkflowCatalog.error("broken")
+    assert err.name == "broken"
+    assert err.file == "broken.sym"
+    assert is_integer(err.line) and err.line >= 1
+    assert is_integer(err.column) and err.column >= 1
+    assert is_binary(err.message)
+
+    # The healthy file has no recorded error, and `errors/0` lists only the
+    # broken one.
+    assert WorkflowCatalog.error("w") == {:error, :not_found}
+    assert Enum.map(WorkflowCatalog.errors(), & &1.name) == ["broken"]
+  end
+
+  test "a recorded error clears when the file parses again", %{dir: dir} do
+    write_sym!(dir, "w", "workflow \"w\" {\n  oops\n}\n")
+    WorkflowCatalog.scan(dir)
+    assert {:ok, _} = WorkflowCatalog.error("w")
+
+    write_sym!(dir, "w", ~s|workflow "w" on manual { a <- exec "./x.sh" }|)
+    WorkflowCatalog.scan(dir)
+    assert WorkflowCatalog.error("w") == {:error, :not_found}
+    assert {:ok, _} = WorkflowCatalog.workflow("w")
+  end
+
+  test "deleting a broken file retires its recorded error", %{dir: dir} do
+    write_sym!(dir, "broken", "workflow \"broken\" {\n  oops\n}\n")
+    WorkflowCatalog.scan(dir)
+    assert {:ok, _} = WorkflowCatalog.error("broken")
+
+    File.rm!(Path.join(dir, "broken.sym"))
+    WorkflowCatalog.scan(dir)
+    assert WorkflowCatalog.error("broken") == {:error, :not_found}
+    assert WorkflowCatalog.errors() == []
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir_web/components/ir_graph_test.exs b/packages/symphony/elixir/test/symphony_elixir_web/components/ir_graph_test.exs
new file mode 100644
index 000000000..c0dd27424
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir_web/components/ir_graph_test.exs
@@ -0,0 +1,393 @@
+defmodule SymphonyElixirWeb.Components.IRGraphTest do
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixirWeb.Components.IRGraph
+
+  # Build a minimal node map matching the shape IR.View.render_node/1 produces.
+  # Named mk_node to avoid conflicting with Kernel.node/1.
+  defp mk_node(id, opts \\ []) do
+    kind = Keyword.get(opts, :kind, "exec")
+    state = Keyword.get(opts, :state, "pending")
+    deps = Keyword.get(opts, :deps, [])
+    label = Keyword.get(opts, :label, id)
+
+    %{
+      "id" => id,
+      "kind" => kind,
+      "state" => state,
+      "deps" => deps,
+      "label" => label,
+      "envelope" => nil,
+      "attempts" => [],
+      "output" => nil,
+      "updated_at" => nil
+    }
+  end
+
+  defp mk_agent(id, engine, opts \\ []) do
+    model = Keyword.get(opts, :model, nil)
+    effort = Keyword.get(opts, :effort, nil)
+    permissions = Keyword.get(opts, :permissions, nil)
+    location = Keyword.get(opts, :location, nil)
+    skill = Keyword.get(opts, :skill, id)
+
+    base = mk_node(id, Keyword.merge([kind: "agent", label: skill], opts))
+
+    envelope =
+      %{"engine" => engine}
+      |> maybe_put("model", model)
+      |> maybe_put("effort", effort)
+      |> maybe_put("permissions", permissions)
+      |> maybe_put("location", location)
+
+    Map.put(base, "envelope", envelope)
+  end
+
+  defp maybe_put(map, _key, nil), do: map
+  defp maybe_put(map, key, value), do: Map.put(map, key, value)
+
+  describe "layout/1 layer assignment" do
+    test "a root node with no deps is in layer 0" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("a")])
+      [n] = Enum.reject(nodes, &(&1.state_class == "gtrigger"))
+      assert n.id == "a"
+      # Layer 0 nodes are positioned at pad_x (30).
+      assert n.x == 30
+    end
+
+    test "a -> b places a in layer 0 and b in layer 1" do
+      layout = IRGraph.layout([mk_node("a"), mk_node("b", deps: ["a"])])
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+
+      assert by_id["a"].x < by_id["b"].x
+    end
+
+    test "a -> b, a -> c, b -> d places roots in layer 0 and d in the last layer" do
+      nodes = [
+        mk_node("a"),
+        mk_node("b", deps: ["a"]),
+        mk_node("c", deps: ["a"]),
+        mk_node("d", deps: ["b"])
+      ]
+
+      layout = IRGraph.layout(nodes)
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+
+      # a is a root (layer 0)
+      a_x = by_id["a"].x
+      # b and c depend on a (layer 1)
+      b_x = by_id["b"].x
+      c_x = by_id["c"].x
+      # d depends on b (layer 2)
+      d_x = by_id["d"].x
+
+      assert a_x < b_x
+      assert b_x == c_x
+      assert d_x > b_x
+    end
+
+    test "emits one edge per dep" do
+      nodes = [
+        mk_node("a"),
+        mk_node("b", deps: ["a"]),
+        mk_node("c", deps: ["a"]),
+        mk_node("d", deps: ["b"])
+      ]
+
+      layout = IRGraph.layout(nodes)
+      # a->b, a->c, b->d = 3 edges
+      assert length(layout.edges) == 3
+    end
+
+    test "parallel independent roots all land in layer 0" do
+      nodes = [mk_node("x"), mk_node("y"), mk_node("z")]
+      layout = IRGraph.layout(nodes)
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+
+      # All roots at the same x
+      assert by_id["x"].x == by_id["y"].x
+      assert by_id["y"].x == by_id["z"].x
+    end
+
+    test "an empty list with no trigger returns a minimal viewbox and no nodes or edges" do
+      %{viewbox: vb, nodes: ns, edges: es} = IRGraph.layout([])
+      assert vb =~ "0 0"
+      assert ns == []
+      assert es == []
+    end
+
+    test "an empty list with a trigger returns a single trigger node" do
+      %{nodes: ns, edges: es} = IRGraph.layout([], "manual")
+      assert length(ns) == 1
+      assert hd(ns).state_class == "gtrigger"
+      assert hd(ns).label == "manual"
+      assert es == []
+    end
+
+    test "dangling dep edges (dep not in graph) are silently skipped" do
+      nodes = [mk_node("b", deps: ["ghost"])]
+      layout = IRGraph.layout(nodes)
+      # b has no known deps so it is a root
+      assert length(layout.nodes) == 1
+      assert layout.edges == []
+    end
+  end
+
+  describe "layout/1 state classes" do
+    test "succeeded state produces succeeded class" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("a", state: "succeeded")])
+      [n] = nodes
+      assert n.state_class == "succeeded"
+    end
+
+    test "running state produces running class" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("a", state: "running")])
+      [n] = nodes
+      assert n.state_class == "running"
+    end
+
+    test "gate kind gets gate prefix in state class" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("g", kind: "gate", state: "pending")])
+      [n] = nodes
+      assert n.state_class == "gate pending"
+    end
+
+    test "unknown state falls back to pending class" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("a", state: "upstream_failed")])
+      [n] = nodes
+      assert n.state_class == "pending"
+    end
+  end
+
+  describe "layout/1 labels" do
+    test "node label comes from the label field" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("agent-0", label: "my_skill")])
+      [n] = nodes
+      assert n.label == "my_skill"
+    end
+
+    test "node id is exposed separately from label" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("agent-0", label: "my_skill")])
+      [n] = nodes
+      assert n.id == "agent-0"
+    end
+
+    test "node without label field falls back to the id" do
+      node = mk_node("fallback-id") |> Map.delete("label")
+      %{nodes: nodes} = IRGraph.layout([node])
+      [n] = nodes
+      assert n.label == "fallback-id"
+    end
+  end
+
+  describe "layout/1 detail lines for agent nodes" do
+    test "agent node with full envelope produces engine/model, effort, permissions, location lines" do
+      node =
+        mk_agent("agent-0", "codex",
+          model: "gpt-5.5",
+          effort: "high",
+          permissions: "danger_full_access",
+          location: "ixvm",
+          skill: "my_skill"
+        )
+
+      %{nodes: nodes} = IRGraph.layout([node])
+      [n] = nodes
+      assert "codex gpt-5.5" in n.detail_lines
+      assert "high" in n.detail_lines
+      assert "danger_full_access" in n.detail_lines
+      assert "ixvm" in n.detail_lines
+    end
+
+    test "agent node without model shows engine only in first detail line" do
+      node = mk_agent("agent-0", "codex", skill: "s")
+      %{nodes: nodes} = IRGraph.layout([node])
+      [n] = nodes
+      assert "codex" in n.detail_lines
+    end
+
+    test "exec node detail shows exec kind" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("e", kind: "exec", label: "./run.sh")])
+      [n] = nodes
+      assert n.detail_lines == ["exec"]
+    end
+
+    test "gate node detail shows gate" do
+      %{nodes: nodes} = IRGraph.layout([mk_node("g", kind: "gate")])
+      [n] = nodes
+      assert n.detail_lines == ["gate"]
+    end
+
+    test "agent location annotates the fallback when effective placement differs" do
+      node = mk_agent("agent-0", "codex", location: "ixvm", skill: "s")
+      placement = %{"declared" => "ixvm", "effective" => "host"}
+      %{nodes: nodes} = IRGraph.layout([node], "manual", placement)
+      n = Enum.find(nodes, &(&1.id == "agent-0"))
+      assert "ixvm (fallback host)" in n.detail_lines
+      refute "ixvm" in n.detail_lines
+    end
+
+    test "agent location shows no fallback when effective matches the declared type" do
+      node = mk_agent("agent-0", "codex", location: "host:hil-compute-2", skill: "s")
+      placement = %{"declared" => "host:hil-compute-2", "effective" => "host"}
+      %{nodes: nodes} = IRGraph.layout([node], "manual", placement)
+      n = Enum.find(nodes, &(&1.id == "agent-0"))
+      assert "host:hil-compute-2" in n.detail_lines
+    end
+  end
+
+  describe "layout/1 trigger node" do
+    test "trigger produces a gtrigger node in the output" do
+      nodes = [mk_node("a")]
+      layout = IRGraph.layout(nodes, "cron 30 * * * *")
+      trigger_nodes = Enum.filter(layout.nodes, &(&1.state_class == "gtrigger"))
+      assert length(trigger_nodes) == 1
+      assert hd(trigger_nodes).label == "cron 30 * * * *"
+    end
+
+    test "trigger node is positioned to the left of root real nodes" do
+      nodes = [mk_node("a")]
+      layout = IRGraph.layout(nodes, "manual")
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+      assert by_id["__trigger__"].x < by_id["a"].x
+    end
+
+    test "trigger produces edges to each root node" do
+      nodes = [mk_node("a"), mk_node("b")]
+      layout = IRGraph.layout(nodes, "manual")
+      # 2 roots => 2 trigger edges (a->b has no dep so both are roots)
+      assert length(layout.edges) == 2
+    end
+
+    test "trigger does not add extra edges to non-root nodes" do
+      # b depends on a, so only a is a root; trigger has one edge to a, and
+      # one dep edge a->b gives 2 total
+      nodes = [mk_node("a"), mk_node("b", deps: ["a"])]
+      layout = IRGraph.layout(nodes, "cron 0 * * * *")
+      assert length(layout.edges) == 2
+    end
+
+    test "layout without trigger has no gtrigger nodes" do
+      nodes = [mk_node("a"), mk_node("b", deps: ["a"])]
+      layout = IRGraph.layout(nodes)
+      trigger_nodes = Enum.filter(layout.nodes, &(&1.state_class == "gtrigger"))
+      assert trigger_nodes == []
+    end
+  end
+
+  describe "layout box sizing" do
+    test "node width grows to fit a long label so it does not spill" do
+      long = "cron 0 0,5,10,15,20 * * *"
+      layout = IRGraph.layout([mk_node("a")], long)
+      # The box must be wide enough for the long trigger label plus padding so
+      # the text stays inside the rect (regression for the graph-spillage bug).
+      assert layout.node_w >= String.length(long) * 7 + 20
+    end
+
+    test "node height grows to fit the full envelope block" do
+      node =
+        mk_agent("agent-0", "codex",
+          model: "gpt-5.5",
+          effort: "high",
+          permissions: "danger_full_access",
+          location: "ixvm",
+          skill: "idiomatic"
+        )
+
+      layout = IRGraph.layout([node])
+      # label + id + four envelope detail lines must fit inside the box.
+      assert layout.node_h >= 44 + 3 * 13 + 6
+    end
+  end
+
+  describe "layout/1 single-node no-stretch" do
+    test "single node layout natural_width is bounded (not stretched to fill)" do
+      layout = IRGraph.layout([mk_node("a")])
+      # The natural width of a single-node layout should be much less than a
+      # typical screen width. Two pad_x margins plus one node width is the
+      # expected value. It must be less than 400 (no card-fill stretch).
+      assert layout.natural_width < 400
+    end
+
+    test "single node with trigger natural_width is bounded" do
+      layout = IRGraph.layout([mk_node("a")], "manual")
+      assert layout.natural_width < 600
+    end
+
+    test "viewBox width equals natural_width for single-node layout" do
+      layout = IRGraph.layout([mk_node("a")])
+      "0 0 " <> rest = layout.viewbox
+      [w_str | _] = String.split(rest, " ")
+      {vb_width, _} = Integer.parse(w_str)
+      assert vb_width == layout.natural_width
+    end
+  end
+
+  describe "layout/1 multi-node trigger -> route -> skill" do
+    test "three-layer trigger-route-skill graph lays out left-to-right" do
+      # route depends on nothing (root), skill depends on route
+      nodes = [
+        mk_agent("route-0", "codex", skill: "route", deps: []),
+        mk_agent("skill-0", "codex", skill: "idiomatic", deps: ["route-0"])
+      ]
+
+      layout = IRGraph.layout(nodes, "cron 30 * * * *")
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+
+      # trigger -> route-0 -> skill-0 must be strictly left-to-right
+      assert by_id["__trigger__"].x < by_id["route-0"].x
+      assert by_id["route-0"].x < by_id["skill-0"].x
+    end
+
+    test "three-layer graph has trigger edge plus dep edge (2 total)" do
+      nodes = [
+        mk_agent("route-0", "codex", skill: "route"),
+        mk_agent("skill-0", "codex", skill: "idiomatic", deps: ["route-0"])
+      ]
+
+      layout = IRGraph.layout(nodes, "cron 30 * * * *")
+      # trigger->route-0 and route-0->skill-0
+      assert length(layout.edges) == 2
+    end
+  end
+
+  describe "layout/1 edge path format" do
+    test "each edge has a non-empty d attribute starting with M" do
+      layout = IRGraph.layout([mk_node("a"), mk_node("b", deps: ["a"])])
+      assert [%{d: d}] = layout.edges
+      assert String.starts_with?(d, "M")
+    end
+  end
+
+  describe "single cron-triggered agent with full envelope" do
+    test "layout contains trigger label, skill name, engine+model, effort, permissions, location" do
+      node =
+        mk_agent("agent-0", "codex",
+          model: "gpt-5.5",
+          effort: "high",
+          permissions: "danger_full_access",
+          location: "ixvm",
+          skill: "idiomatic"
+        )
+
+      layout = IRGraph.layout([node], "cron 30 * * * *")
+      by_id = Map.new(layout.nodes, &{&1.id, &1})
+
+      # Trigger node has the cron label
+      assert by_id["__trigger__"].label == "cron 30 * * * *"
+
+      # Agent node primary label is the skill name
+      agent = by_id["agent-0"]
+      assert agent.label == "idiomatic"
+
+      # Agent node secondary id is distinct from label
+      assert agent.id == "agent-0"
+
+      # Envelope detail lines contain engine+model, effort, permissions, location
+      assert "codex gpt-5.5" in agent.detail_lines
+      assert "high" in agent.detail_lines
+      assert "danger_full_access" in agent.detail_lines
+      assert "ixvm" in agent.detail_lines
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir_web/ir_run_controller_test.exs b/packages/symphony/elixir/test/symphony_elixir_web/ir_run_controller_test.exs
new file mode 100644
index 000000000..8eac8b670
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir_web/ir_run_controller_test.exs
@@ -0,0 +1,234 @@
+defmodule SymphonyElixirWeb.IRRunControllerTest do
+  use ExUnit.Case, async: false
+  import Plug.Test
+  import Plug.Conn
+
+  alias SymphonyElixir.DSL.{Parser, Schema}
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Node, RunGraph, Store}
+
+  @opts SymphonyElixirWeb.Endpoint.init([])
+
+  # The controller reads the IR store at its default dir
+  # (Config.get().runs_dir/ir). Clean it between tests so listings are
+  # deterministic.
+  setup do
+    # The Runtime.Registry must exist for operator routes to resolve a run
+    # name; a run that is not registered then yields the :noproc the
+    # controller translates to 409. Start it if the Application is not up.
+    unless Process.whereis(SymphonyElixir.Runtime.Registry) do
+      start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    end
+
+    # The create route resolves a workflow through WorkflowCatalog's ETS
+    # table and starts it under Runtime.Supervisor. Bring up both when the
+    # Application is not running (auto_start: false in test).
+    ensure_workflow_catalog_table()
+
+    unless Process.whereis(SymphonyElixir.TaskSupervisor) do
+      start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+    end
+
+    unless Process.whereis(SymphonyElixir.Runtime.Supervisor) do
+      start_supervised!(SymphonyElixir.Runtime.Supervisor)
+    end
+
+    dir = Path.join(SymphonyElixir.Config.get().runs_dir, "ir")
+    File.rm_rf(dir)
+    File.mkdir_p!(dir)
+    :ok
+  end
+
+  # The catalog table is created by the WorkflowCatalog GenServer at boot,
+  # which test_helper does not start. Create it here so put_workflow/1 and
+  # the create route can read it, and reset its rows each test.
+  defp ensure_workflow_catalog_table do
+    table = :symphony_workflows
+
+    if :ets.whereis(table) == :undefined do
+      :ets.new(table, [:named_table, :public, read_concurrency: true])
+    else
+      :ets.delete_all_objects(table)
+    end
+  end
+
+  defp put_workflow(name, source) do
+    {:ok, ast} = Parser.parse(source)
+    entry = %{name: ast.name || name, ast: ast, trigger: ast.trigger, source: source, hash: :crypto.hash(:sha256, source)}
+    :ets.insert(:symphony_workflows, {name, entry})
+  end
+
+  defp persist_run(run_id, status) do
+    node = %{Node.new(id: "a", ast_origin: {:exec, "a"}, kind: :exec, inputs: %{}) | state: :succeeded, output: %{"v" => 1}}
+    graph = RunGraph.new(run_id, "hash", nil) |> RunGraph.put_nodes([node]) |> Map.put(:status, status)
+    :ok = Store.persist(graph)
+  end
+
+  defp get(path) do
+    :get |> conn(path) |> SymphonyElixirWeb.Endpoint.call(@opts)
+  end
+
+  defp post(path) do
+    :post |> conn(path) |> put_req_header("content-type", "application/json") |> SymphonyElixirWeb.Endpoint.call(@opts)
+  end
+
+  defp post(path, body) do
+    :post
+    |> conn(path, Jason.encode!(body))
+    |> put_req_header("content-type", "application/json")
+    |> SymphonyElixirWeb.Endpoint.call(@opts)
+  end
+
+  test "GET /api/v1/ir/schema returns the runtime enum vocabulary" do
+    conn = get("/api/v1/ir/schema")
+    assert conn.status == 200
+    body = Jason.decode!(conn.resp_body)
+
+    # The endpoint serves the runtime's accessors verbatim (atoms render as
+    # strings), so the form's option lists match what a turn accepts. Assert
+    # against the accessor, not a second hardcoded list, so the test is not
+    # itself a place the vocabulary can drift.
+    assert body["engines"] == strings(Envelope.engines())
+    assert body["permissions"] == strings(Envelope.permission_levels())
+    assert "agent" in body["node_kinds"]
+    assert "manual" in body["trigger_kinds"]
+  end
+
+  test "schema enums, struct-accepted values, and the API payload do not drift" do
+    # ENG-1825's "the UI cannot drift from the runtime" pillar. Three sources
+    # must name the same vocabulary or the form offers options a turn rejects:
+    #   1. the accessors the schema map reads (what the form renders),
+    #   2. the values Envelope.from_map/1 actually accepts (what a turn takes),
+    #   3. the JSON the /schema endpoint serves (what ships over the wire).
+    # A value added to one but not another turns this red. The generated
+    # @type unions keep Dialyzer in agreement with leg 2 at compile time;
+    # this test covers the runtime legs Dialyzer cannot see.
+    schema = Schema.to_map()
+    api = "/api/v1/ir/schema" |> get() |> Map.fetch!(:resp_body) |> Jason.decode!()
+
+    # engines: each accepted with an engine-agreeing model; an off-list value rejected.
+    assert schema.engines == Envelope.engines()
+    assert api["engines"] == strings(Envelope.engines())
+
+    for engine <- Envelope.engines() do
+      assert {:ok, %{engine: ^engine}} =
+               Envelope.from_map(%{"engine" => engine, "model" => model_for(engine)})
+    end
+
+    assert {:error, {:invalid_engine, _}} =
+             Envelope.from_map(%{"engine" => :nonsense, "model" => "m"})
+
+    # efforts
+    assert schema.efforts == Envelope.efforts()
+    assert api["efforts"] == strings(Envelope.efforts())
+
+    for effort <- Envelope.efforts() do
+      assert {:ok, %{effort: ^effort}} =
+               Envelope.from_map(%{"engine" => :codex, "model" => "m", "effort" => effort})
+    end
+
+    assert {:error, {:invalid_effort, _}} =
+             Envelope.from_map(%{"engine" => :codex, "model" => "m", "effort" => :nope})
+
+    # permissions
+    assert schema.permissions == Envelope.permission_levels()
+    assert api["permissions"] == strings(Envelope.permission_levels())
+
+    for perm <- Envelope.permission_levels() do
+      assert {:ok, %{permissions: ^perm}} =
+               Envelope.from_map(%{"engine" => :codex, "model" => "m", "permissions" => perm})
+    end
+
+    assert {:error, {:invalid_permissions, _}} =
+             Envelope.from_map(%{"engine" => :codex, "model" => "m", "permissions" => :nope})
+
+    # locations: the bare placement tags the form offers (payload-carriers
+    # supply their payload separately, so only the tag list is the shared axis).
+    assert schema.locations == Envelope.locations()
+    assert api["locations"] == strings(Envelope.locations())
+  end
+
+  defp strings(atoms), do: Enum.map(atoms, &Atom.to_string/1)
+
+  # check_engine_model_agree rejects a Claude model under :codex and a
+  # non-Claude model under :claude, so each engine needs an agreeing model.
+  defp model_for(:codex), do: "gpt-5.3-codex"
+  defp model_for(:claude), do: "claude-opus-4-8"
+
+  test "GET /api/v1/ir/runs lists persisted run summaries" do
+    persist_run("run_a", :succeeded)
+    persist_run("run_b", :failed)
+
+    conn = get("/api/v1/ir/runs")
+    assert conn.status == 200
+    body = Jason.decode!(conn.resp_body)
+    ids = Enum.map(body["runs"], & &1["run_id"])
+    assert ids == ["run_a", "run_b"]
+    assert Enum.find(body["runs"], &(&1["run_id"] == "run_b"))["status"] == "failed"
+  end
+
+  test "GET /api/v1/ir/runs/:id returns the full detail" do
+    persist_run("run_detail", :succeeded)
+
+    conn = get("/api/v1/ir/runs/run_detail")
+    assert conn.status == 200
+    body = Jason.decode!(conn.resp_body)
+    assert body["run_id"] == "run_detail"
+    assert [node] = body["nodes"]
+    assert node["id"] == "a"
+    assert node["output"] == %{"v" => 1}
+  end
+
+  test "GET an unknown run returns 404" do
+    conn = get("/api/v1/ir/runs/nope")
+    assert conn.status == 404
+    assert Jason.decode!(conn.resp_body) == %{"error" => "run not found"}
+  end
+
+  test "POST /api/v1/ir/runs starts a run from a workflow name" do
+    put_workflow("demo", ~s|workflow "demo" on manual { a <- agent { engine: codex, model: "m", prompt: inline "go" } }|)
+
+    conn = post("/api/v1/ir/runs", %{"workflow" => "demo"})
+    assert conn.status == 201
+    body = Jason.decode!(conn.resp_body)
+    assert is_binary(body["run_id"])
+    assert String.starts_with?(body["run_id"], "demo-")
+
+    # The run is materialized and persisted, so it is visible on the index.
+    run_id = body["run_id"]
+
+    assert eventually(fn ->
+             match?({:ok, _}, Store.load(run_id))
+           end)
+  end
+
+  test "POST /api/v1/ir/runs for an unknown workflow returns 404" do
+    conn = post("/api/v1/ir/runs", %{"workflow" => "nope"})
+    assert conn.status == 404
+    assert Jason.decode!(conn.resp_body)["error"] =~ "workflow_not_found"
+  end
+
+  test "POST /api/v1/ir/runs without a workflow field returns 422" do
+    conn = post("/api/v1/ir/runs", %{})
+    assert conn.status == 422
+    assert Jason.decode!(conn.resp_body)["error"] =~ "workflow"
+  end
+
+  defp eventually(fun, attempts \\ 50) do
+    cond do
+      fun.() -> true
+      attempts == 0 -> false
+      true -> Process.sleep(20) && eventually(fun, attempts - 1)
+    end
+  end
+
+  test "operator action on a run with no live process returns 409" do
+    # A persisted run with no live Runtime GenServer: cancel cannot reach a
+    # process, so the controller returns 409 rather than a 500.
+    persist_run("run_dead", :failed)
+
+    conn = post("/api/v1/ir/runs/run_dead/cancel")
+    assert conn.status == 409
+    assert Jason.decode!(conn.resp_body)["error"] =~ "no live process"
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir_web/ir_runs_live_test.exs b/packages/symphony/elixir/test/symphony_elixir_web/ir_runs_live_test.exs
new file mode 100644
index 000000000..41e0768da
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir_web/ir_runs_live_test.exs
@@ -0,0 +1,294 @@
+defmodule SymphonyElixirWeb.IRRunsLiveTest do
+  @moduledoc """
+  Phase 5 tests: the :show LiveView renders the graph SVG, the summary dl,
+  and action buttons that drive Runtime operator calls.
+  """
+
+  use ExUnit.Case, async: false
+
+  import Phoenix.ConnTest
+  import Phoenix.LiveViewTest
+
+  @endpoint SymphonyElixirWeb.Endpoint
+
+  alias SymphonyElixir.Engine.Envelope
+  alias SymphonyElixir.IR.{Node, RunGraph, Store}
+  alias SymphonyElixir.Runtime
+
+  # A fake EngineClient that blocks indefinitely (sleep_forever) so the run
+  # stays :running while the test exercises operator actions. Using async:
+  # false and a named table so concurrent suites do not interfere.
+  defmodule FakeEngine do
+    @behaviour SymphonyElixir.Runtime.EngineClient
+
+    @table :ir_runs_live_fake
+
+    def setup do
+      if :ets.whereis(@table) == :undefined do
+        :ets.new(@table, [:named_table, :public, :set])
+      end
+
+      :ets.delete_all_objects(@table)
+      :ok
+    end
+
+    def program(node_id, instruction), do: :ets.insert(@table, {node_id, instruction})
+
+    @impl true
+    def run_node(%Node{id: id}, _opts) do
+      case :ets.lookup(@table, id) do
+        [{^id, :block}] ->
+          # Block until the test is done by sleeping a long time. The task
+          # will be killed when the runtime stops.
+          Process.sleep(30_000)
+          {:ok, %{}, nil}
+
+        [{^id, {:ok, out}}] ->
+          {:ok, out, nil}
+
+        [{^id, {:error, reason}}] ->
+          {:error, reason, nil}
+
+        [] ->
+          {:ok, %{default: id}, nil}
+      end
+    end
+
+    @impl true
+    def status(_thread_id), do: :unknown
+  end
+
+  setup do
+    FakeEngine.setup()
+
+    unless Process.whereis(SymphonyElixir.Runtime.Registry) do
+      start_supervised!({Registry, keys: :unique, name: SymphonyElixir.Runtime.Registry})
+    end
+
+    unless Process.whereis(SymphonyElixir.TaskSupervisor) do
+      start_supervised!({Task.Supervisor, name: SymphonyElixir.TaskSupervisor})
+    end
+
+    # The LiveView mount calls WorkflowCatalog.workflows/0 and errors/0,
+    # which read two ETS tables. Create both if not present, mirroring the
+    # pattern used in IRRunControllerTest.
+    for table <- [:symphony_workflows, :symphony_workflow_errors] do
+      if :ets.whereis(table) == :undefined do
+        :ets.new(table, [:named_table, :public, read_concurrency: true])
+      else
+        :ets.delete_all_objects(table)
+      end
+    end
+
+    :ok
+  end
+
+  defp agent_node(id, opts \\ []) do
+    Node.new(
+      id: id,
+      ast_origin: {:agent, id},
+      kind: :agent,
+      envelope: %Envelope{engine: :codex, model: "gpt-5.3-codex"},
+      inputs: Keyword.get(opts, :inputs, %{}),
+      state: :pending
+    )
+  end
+
+  defp persist_graph(graph, store_opts \\ []) do
+    :ok = Store.persist(graph, store_opts)
+  end
+
+  defp build_graph(run_id, nodes) do
+    RunGraph.new(run_id, "hash", nil)
+    |> RunGraph.put_nodes(nodes)
+    |> Map.put(:status, :running)
+  end
+
+  test "show page renders the summary dl with trigger and placement" do
+    run_id = "live-show-#{System.unique_integer([:positive])}"
+
+    graph =
+      build_graph(run_id, [agent_node("a"), agent_node("b", inputs: %{"x" => {:node, "a", []}})])
+      |> Map.put(:trigger, %{kind: :manual})
+
+    persist_graph(graph)
+
+    {:ok, view, html} = live(build_conn(), "/ir/" <> run_id)
+
+    # The summary dl should be present.
+    assert html =~ " {:node, "inspect", []}})])
+    persist_graph(graph)
+
+    {:ok, _view, html} = live(build_conn(), "/ir/" <> run_id)
+
+    # The SVG graph component must be present.
+    assert html =~ " run_id)
+    assert html =~ "cancel run"
+  end
+
+  test "show page does not render cancel button for a succeeded run" do
+    run_id = "live-no-cancel-#{System.unique_integer([:positive])}"
+
+    graph =
+      build_graph(run_id, [agent_node("a")])
+      |> Map.put(:status, :succeeded)
+
+    persist_graph(graph)
+
+    {:ok, _view, html} = live(build_conn(), "/ir/" <> run_id)
+    refute html =~ "cancel run"
+  end
+
+  test "show page renders retry_failed and rerun buttons for a failed run" do
+    run_id = "live-failed-btns-#{System.unique_integer([:positive])}"
+
+    node = %{agent_node("a") | state: :failed}
+
+    graph =
+      build_graph(run_id, [node])
+      |> Map.put(:status, :failed)
+
+    persist_graph(graph)
+
+    {:ok, _view, html} = live(build_conn(), "/ir/" <> run_id)
+    assert html =~ "retry failed"
+    assert html =~ "rerun"
+  end
+
+  test "cancel button calls Runtime.cancel and run transitions to cancelled" do
+    run_id = "live-cancel-action-#{System.unique_integer([:positive])}"
+
+    # Use the default store dir so the Runtime, the LiveView, and the
+    # assertion all read/write the same location. Clean up this run's file
+    # after the test.
+    default_ir_dir = SymphonyElixir.IR.Store.dir()
+    File.mkdir_p!(default_ir_dir)
+    on_exit(fn -> File.rm!(Path.join(default_ir_dir, run_id <> ".json")) end)
+
+    # Build a graph with a blocking node so the run stays :running while we cancel.
+    graph = build_graph(run_id, [agent_node("slow")])
+    FakeEngine.program("slow", :block)
+
+    # Start a real runtime using the default store so cancel has a live
+    # process to reach and the store transition is visible.
+    {:ok, _pid} = Runtime.start_link(graph, engine: FakeEngine)
+
+    # Wait briefly for the runtime to persist the initial graph, then load
+    # the LiveView and click cancel.
+    assert eventually(fn ->
+             match?({:ok, _}, Store.load(run_id))
+           end),
+           "run was not persisted by the runtime in time"
+
+    {:ok, view, _html} = live(build_conn(), "/ir/" <> run_id)
+
+    # Click cancel.
+    render_click(view, "cancel")
+
+    # The Runtime should now be cancelled. Poll the store until it reflects it.
+    assert eventually(fn ->
+             case Store.load(run_id) do
+               {:ok, g} -> g.status == :cancelled
+               _ -> false
+             end
+           end),
+           "run #{run_id} did not become cancelled"
+  end
+
+  test "show page renders not-found message for an unknown run" do
+    {:ok, _view, html} = live(build_conn(), "/ir/nonexistent-run-xyz")
+    assert html =~ "run not found"
+  end
+
+  test "index paginates the runs table at 50 rows per page" do
+    # Persist 51 runs into the default store the LiveView reads. They are
+    # created now, so the latest-first sort floats all of them above any
+    # leftover runs: page 1 is exactly the per-page cap and a 51st run spills
+    # onto page 2.
+    default_ir_dir = Store.dir()
+    File.mkdir_p!(default_ir_dir)
+    prefix = "live-page-#{System.unique_integer([:positive])}-"
+
+    run_ids =
+      for i <- 1..51 do
+        run_id = prefix <> String.pad_leading(Integer.to_string(i), 3, "0")
+        persist_graph(build_graph(run_id, [agent_node("a")]))
+        run_id
+      end
+
+    on_exit(fn ->
+      for run_id <- run_ids, do: File.rm(Path.join(default_ir_dir, run_id <> ".json"))
+    end)
+
+    {:ok, _view, html} = live(build_conn(), "/")
+    # The pager renders and offers a second page once the cap is exceeded.
+    assert html =~ ~s(class="pager")
+    assert html =~ "page=2"
+    # Page 1 shows exactly the per-page cap, never the full 51.
+    assert count_run_rows(html) == 50
+
+    {:ok, _view2, html2} = live(build_conn(), "/ir?page=2")
+    # Page 2 carries the spillover and stays under the cap.
+    rows2 = count_run_rows(html2)
+    assert rows2 >= 1
+    assert rows2 <= 50
+  end
+
+  # Each runs-table row links to its run at `/ir/`; the pager links use
+  # `?page=N` on the bare path, so counting the row-link prefix counts only
+  # rendered run rows.
+  defp count_run_rows(html) do
+    (html |> String.split(~s(href="/ir/)) |> length()) - 1
+  end
+
+  test "placement_label renders fallback notation when declared != effective" do
+    run_id = "live-placement-#{System.unique_integer([:positive])}"
+
+    graph =
+      build_graph(run_id, [agent_node("a")])
+      |> Map.put(:placement, %{declared: :ixvm, effective: :host})
+
+    persist_graph(graph)
+
+    {:ok, _view, html} = live(build_conn(), "/ir/" <> run_id)
+    assert html =~ "ixvm"
+    assert html =~ "fallback"
+    assert html =~ "host"
+  end
+
+  defp eventually(fun, attempts \\ 50) do
+    cond do
+      fun.() -> true
+      attempts == 0 -> false
+      true -> Process.sleep(20) && eventually(fun, attempts - 1)
+    end
+  end
+end
diff --git a/packages/symphony/elixir/test/symphony_elixir_web/markdown_test.exs b/packages/symphony/elixir/test/symphony_elixir_web/markdown_test.exs
new file mode 100644
index 000000000..9c46d0aa4
--- /dev/null
+++ b/packages/symphony/elixir/test/symphony_elixir_web/markdown_test.exs
@@ -0,0 +1,64 @@
+defmodule SymphonyElixirWeb.MarkdownTest do
+  @moduledoc """
+  The dashboard lowers skill bodies and codex transcript text from
+  markdown to sanitized HTML. These guard the render-and-sanitize
+  contract: structural markdown becomes HTML, blank input stays empty,
+  and script injection is scrubbed before it reaches a `{:safe, _}`.
+  """
+
+  use ExUnit.Case, async: true
+
+  alias SymphonyElixirWeb.Markdown
+
+  defp render(source) do
+    {:safe, iodata} = Markdown.to_html(source)
+    IO.iodata_to_binary(iodata)
+  end
+
+  test "renders headings, emphasis, lists, and inline code" do
+    html =
+      render("""
+      # Sub tickets
+
+      Split **the work** into `tasks`:
+
+      - first
+      - second
+      """)
+
+    assert html =~ "

" + assert html =~ "Sub tickets" + assert html =~ "the work" + assert html =~ ~r{]*>tasks} + assert html =~ "
  • first
  • " + end + + test "renders fenced code blocks" do + html = + render(""" + ``` + mix deps.get + ``` + """) + + assert html =~ "
    "
    +    assert html =~ "mix deps.get"
    +  end
    +
    +  test "blank and nil input render as empty safe html" do
    +    assert Markdown.to_html(nil) == {:safe, ""}
    +    assert Markdown.to_html("") == {:safe, ""}
    +    assert Markdown.to_html("   \n  ") == {:safe, ""}
    +  end
    +
    +  test "neutralizes raw html so transcript text cannot inject" do
    +    html = render("hello  world")
    +
    +    # Earmark escapes raw html by default and the sanitizer is a second
    +    # line of defense, so no executable script element survives.
    +    refute html =~ " RepositoryCatalog.all()
    +      |> Map.new(fn repo ->
    +        {repo.name, init_repo!(Path.join(source_root, repo.name), repo.default_branch)}
    +      end)
    +
    +    config = %Config{
    +      primary_repo: Map.fetch!(local_repos, "primary-app"),
    +      repo_root: source_root,
    +      repositories_file: repositories_file
    +    }
    +
    +    on_exit(fn -> File.rm_rf!(tmp_root) end)
    +
    +    %{config: config, local_repos: local_repos, workspaces_dir: workspaces_dir}
    +  end
    +
    +  test "creates primary workspace with writable sibling repos", %{
    +    config: config,
    +    local_repos: local_repos,
    +    workspaces_dir: workspaces_dir
    +  } do
    +    run_root = Path.join(workspaces_dir, "run-1")
    +    assert {:ok, workspace} = RepoCloner.clone_all(config, run_root, "run-1")
    +
    +    assert workspace == Path.join([workspaces_dir, "run-1", "primary-app"])
    +    assert File.exists?(Path.join(workspace, "README.md"))
    +
    +    docs_repo = Path.join([workspaces_dir, "run-1", "docs"])
    +    assert File.exists?(Path.join(docs_repo, "README.md"))
    +    assert {"symphony/run-1\n", 0} = System.cmd("git", ["-C", docs_repo, "branch", "--show-current"])
    +
    +    assert {alternate, 0} = System.cmd("git", ["-C", docs_repo, "rev-parse", "--git-path", "objects/info/alternates"])
    +    alternate_path = Path.expand(String.trim(alternate), docs_repo)
    +    assert File.read!(alternate_path) =~ Path.join(Map.fetch!(local_repos, "docs"), ".git/objects")
    +  end
    +
    +  test "primary repo declares main as default", %{config: config} do
    +    assert %{default_branch: "main", primary?: true} =
    +             Enum.find(RepositoryCatalog.all(config), & &1.primary?)
    +  end
    +
    +  defp init_repo!(path, branch) do
    +    File.mkdir_p!(path)
    +    File.write!(Path.join(path, "README.md"), "# #{Path.basename(path)}\n")
    +
    +    git!(path, ["init", "--initial-branch=#{branch}"])
    +    git!(path, ["config", "user.name", "Symphony Test"])
    +    git!(path, ["config", "user.email", "symphony-test@example.com"])
    +    git!(path, ["add", "README.md"])
    +    git!(path, ["commit", "-m", "init"])
    +
    +    path
    +  end
    +
    +  defp git!(path, args) do
    +    case System.cmd("git", ["-C", path] ++ args, stderr_to_stdout: true) do
    +      {_output, 0} -> :ok
    +      {output, status} -> flunk("git #{Enum.join(args, " ")} failed with #{status}: #{output}")
    +    end
    +  end
    +end
    diff --git a/packages/symphony/package.nix b/packages/symphony/package.nix
    new file mode 100644
    index 000000000..708ac9750
    --- /dev/null
    +++ b/packages/symphony/package.nix
    @@ -0,0 +1,11 @@
    +# Registry metadata. The launcher is a flake output (`nix run .#symphony`,
    +# `index.packages..symphony`, the attr ix's symphony host modules
    +# consume) and deliberately not an overlay: nothing inside an image
    +# evaluation needs `pkgs.symphony`, and the room-server the symphony-codex
    +# image embeds is a separate package (`pkgs.symphony-room-server`, still
    +# provided by the pinned `symphony` flake input).
    +{
    +  id = "symphony";
    +  packageSet = true;
    +  flake = true;
    +}
    diff --git a/packages/symphony/workflows/example/repositories.yaml b/packages/symphony/workflows/example/repositories.yaml
    new file mode 100644
    index 000000000..b060cd22f
    --- /dev/null
    +++ b/packages/symphony/workflows/example/repositories.yaml
    @@ -0,0 +1,5 @@
    +repositories:
    +  - name: example
    +    owner_repo: example/example
    +    default_branch: main
    +    primary: true
    diff --git a/packages/symphony/workflows/example/skills/inspect.md b/packages/symphony/workflows/example/skills/inspect.md
    new file mode 100644
    index 000000000..4fb274741
    --- /dev/null
    +++ b/packages/symphony/workflows/example/skills/inspect.md
    @@ -0,0 +1,14 @@
    +---
    +codex_model: gpt-5.3-codex
    +reasoning_effort: medium
    +sandbox: workspace-write
    +approval_policy: never
    +tools: []
    +---
    +
    +You are running inside a sample Symphony workflow.
    +
    +Read the input and inspect the checked-out workspace. Do not push branches,
    +open pull requests, mutate external services, or assume any Indexable-specific
    +infrastructure exists. Return a concise summary of what you found and what a
    +human could do next.
    diff --git a/packages/symphony/workflows/example/workflows/inspect.sym b/packages/symphony/workflows/example/workflows/inspect.sym
    new file mode 100644
    index 000000000..7262fdef3
    --- /dev/null
    +++ b/packages/symphony/workflows/example/workflows/inspect.sym
    @@ -0,0 +1,10 @@
    +# A minimal manual-trigger workflow: inspect the workspace and report.
    +workflow "inspect" on manual {
    +  inspect <- agent {
    +    engine: codex
    +    model: "gpt-5.3-codex"
    +    effort: medium
    +    permissions: workspace_write
    +    prompt: skill "inspect"
    +  }
    +}
    diff --git a/tests/default.nix b/tests/default.nix
    index 38a91810b..0ff72c47b 100644
    --- a/tests/default.nix
    +++ b/tests/default.nix
    @@ -542,6 +542,31 @@ let
           packageNames = map lib.getName config.environment.systemPackages;
         };
     
    +  # The symphony control-plane module (modules/services/symphony) evaluated
    +  # standalone, the way ix's host modules consume it. `package` only needs a
    +  # /bin path shape at eval, so hello stands in for the launcher.
    +  symphonyService =
    +    let
    +      config = evalConfig [
    +        {
    +          ix.image = {
    +            name = "test/symphony-module";
    +            tag = "test";
    +          };
    +          services.symphony = {
    +            enable = true;
    +            package = pkgs.hello;
    +            primaryRepo = "/srv/checkouts/index";
    +            environmentFile = "/run/secrets/symphony.env";
    +          };
    +        }
    +      ];
    +    in
    +    {
    +      inherit config;
    +      unit = config.systemd.services.symphony;
    +    };
    +
       pythonAppClosureProbe = ix.writePythonApplication pkgs {
         name = "python-app-closure-probe";
         src = pkgs.writeText "python-app-closure-probe.py" ''
    @@ -2987,6 +3012,34 @@ let
           }
         ];
     
    +    # The control-plane runtime module that moved in-tree with
    +    # packages/symphony. These pin the env contract ix's hil deployment and
    +    # the worker module read off the unit, so a refactor that renames an
    +    # option or drops the EnvironmentFile pass-through fails here instead of
    +    # on a host switch.
    +    symphony = [
    +      {
    +        assertion = symphonyService.unit.environment.SYMPHONY_WORKFLOW_PACK == "example";
    +        message = "symphony module should default to the bundled example workflow pack";
    +      }
    +      {
    +        assertion = symphonyService.unit.environment.SYMPHONY_PRIMARY_REPO == "/srv/checkouts/index";
    +        message = "symphony module should export the primary repo checkout to the runtime";
    +      }
    +      {
    +        assertion = lib.hasSuffix "/bin/symphony" symphonyService.unit.serviceConfig.ExecStart;
    +        message = "symphony module should exec /bin/symphony from the configured package";
    +      }
    +      {
    +        assertion = symphonyService.unit.serviceConfig.EnvironmentFile == "/run/secrets/symphony.env";
    +        message = "symphony module should pass the secrets EnvironmentFile through to systemd";
    +      }
    +      {
    +        assertion = !(symphonyService.unit.environment ? SYMPHONY_HOST_USER);
    +        message = "symphony module should keep host-placement env unset until hostRuntime.enable";
    +      }
    +    ];
    +
         minecraft = [
           {
             assertion = minecraft.config.ix.image.tag == defaultMinecraftVersion;