From 53edd7153fa4cf96bae33e37f7ba1c82d082735b Mon Sep 17 00:00:00 2001
From: Dmitry Baev <baev@users.noreply.github.com>
Date: Wed, 10 Jun 2026 15:20:34 +0100
Subject: [PATCH 1/5] improve allure agent mode

---
 AGENTS.md                                     |   16 +-
 README.md                                     |    9 +-
 docs/agent_enrichment_loop.md                 |  181 ---
 docs/allure-agent-mode.md                     |  392 ------
 docs/allure-test-agent.md                     |  230 +++
 packages/cli/README.md                        |   17 +-
 packages/cli/src/commands/agent-run.ts        |  216 +++
 packages/cli/src/commands/agent.ts            |  646 +++++----
 packages/cli/src/commands/run.ts              |   20 -
 packages/cli/src/index.ts                     |   22 +-
 packages/cli/src/utils/index.ts               |    2 -
 packages/cli/test/commands/agent.test.ts      |  443 +++++-
 .../cli/test/commands/agentLatest.test.ts     |   22 +-
 packages/cli/test/commands/agentQuery.test.ts |  199 +++
 .../cli/test/commands/agentSelect.test.ts     |   77 +-
 .../cli/test/commands/run.integration.test.ts | 1039 ++++++++------
 packages/cli/test/commands/run.test.ts        |   40 -
 packages/plugin-agent/README.md               |  164 +--
 packages/plugin-agent/src/capabilities.ts     |  178 +++
 packages/plugin-agent/src/errors.ts           |   21 +
 packages/plugin-agent/src/guidance.ts         |  258 +++-
 packages/plugin-agent/src/harness.ts          |  148 +-
 packages/plugin-agent/src/index.ts            |   17 +-
 .../plugin-agent/src/inline-expectations.ts   |  295 ++++
 packages/plugin-agent/src/invalid-output.ts   |  259 ++++
 packages/plugin-agent/src/model.ts            |   49 +
 packages/plugin-agent/src/paths.ts            |   14 +
 packages/plugin-agent/src/plugin.ts           | 1240 ++++++++++++++---
 packages/plugin-agent/src/query.ts            |  252 ++++
 .../src/selection.ts}                         |   27 +-
 .../src/state.ts}                             |    0
 .../plugin-agent/test/capabilities.test.ts    |   61 +
 packages/plugin-agent/test/evidence.ts        |   24 +
 packages/plugin-agent/test/guidance.test.ts   |   70 +
 packages/plugin-agent/test/harness.test.ts    |  480 ++++++-
 packages/plugin-agent/test/index.test.ts      | 1192 ++++++++++++++--
 .../test/inline-expectations.test.ts          |  215 +++
 .../plugin-agent/test/invalid-output.test.ts  |   94 ++
 packages/plugin-agent/test/query.test.ts      |  322 +++++
 .../test/selection.test.ts}                   |   29 +-
 packages/plugin-agent/test/skills.test.ts     |  137 --
 .../test/state.test.ts}                       |   17 +-
 .../SKILL.md                                  |   92 --
 .../agents/openai.yaml                        |    7 -
 .../references/expectations-example.yaml      |   17 -
 skills/allure-agent-mode-setup/SKILL.md       |   42 -
 .../agents/openai.yaml                        |    7 -
 .../references/project-guide-template.md      |  174 ---
 .../references/root-agents-snippet.md         |   10 -
 49 files changed, 7021 insertions(+), 2462 deletions(-)
 delete mode 100644 docs/agent_enrichment_loop.md
 delete mode 100644 docs/allure-agent-mode.md
 create mode 100644 docs/allure-test-agent.md
 create mode 100644 packages/cli/src/commands/agent-run.ts
 create mode 100644 packages/cli/test/commands/agentQuery.test.ts
 create mode 100644 packages/plugin-agent/src/capabilities.ts
 create mode 100644 packages/plugin-agent/src/errors.ts
 create mode 100644 packages/plugin-agent/src/inline-expectations.ts
 create mode 100644 packages/plugin-agent/src/invalid-output.ts
 create mode 100644 packages/plugin-agent/src/paths.ts
 create mode 100644 packages/plugin-agent/src/query.ts
 rename packages/{cli/src/utils/agent-select.ts => plugin-agent/src/selection.ts} (89%)
 rename packages/{cli/src/utils/agent-state.ts => plugin-agent/src/state.ts} (100%)
 create mode 100644 packages/plugin-agent/test/capabilities.test.ts
 create mode 100644 packages/plugin-agent/test/evidence.ts
 create mode 100644 packages/plugin-agent/test/guidance.test.ts
 create mode 100644 packages/plugin-agent/test/inline-expectations.test.ts
 create mode 100644 packages/plugin-agent/test/invalid-output.test.ts
 create mode 100644 packages/plugin-agent/test/query.test.ts
 rename packages/{cli/test/utils/agent-select.test.ts => plugin-agent/test/selection.test.ts} (75%)
 delete mode 100644 packages/plugin-agent/test/skills.test.ts
 rename packages/{cli/test/utils/agent-state.test.ts => plugin-agent/test/state.test.ts} (88%)
 delete mode 100644 skills/allure-agent-mode-feature-delivery/SKILL.md
 delete mode 100644 skills/allure-agent-mode-feature-delivery/agents/openai.yaml
 delete mode 100644 skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml
 delete mode 100644 skills/allure-agent-mode-setup/SKILL.md
 delete mode 100644 skills/allure-agent-mode-setup/agents/openai.yaml
 delete mode 100644 skills/allure-agent-mode-setup/references/project-guide-template.md
 delete mode 100644 skills/allure-agent-mode-setup/references/root-agents-snippet.md

diff --git a/AGENTS.md b/AGENTS.md
index 9d6627171da..bba339fb9e0 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -1,14 +1,12 @@
 # Project Guide
 
-Use [Allure Agent Mode](docs/allure-agent-mode.md) for all test-related work in this repository.
+Use [Allure Test Agent](docs/allure-test-agent.md) for all test-related work in this repository.
 
-- Read `docs/allure-agent-mode.md` before designing, writing, reviewing, validating, debugging, or enriching tests.
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- After changing a package in this repository, treat package builds as part of validation and run the changed package build command before finalizing (for example, `yarn workspace <package-name> build`), because type-level regressions can break builds.
-- **Workspace build order:** dependency edges come from each package's `dependencies` (including `workspace:*` links to other packages). The root `yarn build` script runs `yarn workspaces foreach -Avvpt run build`, where `-t`/`--topological` schedules a workspace only after its regular `dependencies` have built successfully (parallelism `-p` is within those constraints). For a single touched package, `yarn workspace <package-name> build` is enough; when several packages change or downstream breakage is possible, prefer a full **`yarn build`** at the repo root so Yarn applies the same topological order as CI.
-- **Lint, format, and type-aware lint:** before finalizing code changes, align with the `lint` job in `.github/workflows/build.yml`: run **`yarn build`**, then **`yarn lint`**, **`yarn format:check`**, and **`yarn lint:type`**. The root script `yarn verify` runs `format:check`, `lint`, and `lint:type` together but does not run `yarn build`; use it only when you already built and need the same static checks in one command.
+- Read `docs/allure-test-agent.md` before designing, writing, reviewing, validating, debugging, or enriching tests. Use the `$allure-test-agent` skill as the durable behavior guide when it is installed; this project file contains local commands and conventions.
+- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through the local agent test service when available, or through `allure agent` otherwise. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
+- Use agent-mode execution for smoke checks too, even when the change is small or mechanical.
 - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review; use the checklist in `docs/allure-agent-mode.md` (**Agent mode failures and unavailable runs**).
+- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review.
 - Use Allure agent-mode when adding tests for features or fixes so expectations, evidence quality, and scope review are part of the loop.
-- Keep any non-testing project guidance here short; the detailed Allure workflow belongs in the linked guide.
+- Do not present ignored, excluded, swallowed, or non-gating tests as a passing validation signal.
+- After changing a package, run the changed package build command before finalizing. For several packages or possible downstream breakage, prefer the root `yarn build` so Yarn applies the same topological order as CI.
diff --git a/README.md b/README.md
index 41eb2ca64d3..dd47023f37b 100644
--- a/README.md
+++ b/README.md
@@ -74,7 +74,13 @@ For example:
 npx allure agent -- npm test
 ```
 
-`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps unless you explicitly fall back to the lower-level `ALLURE_AGENT_*` plus `allure run` flow.
+`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, accepts compact inline expectations such as `--goal`, `--expect-tests`, `--expect-test`, `--expect-label`, and `--expect-step-containing`, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps for that run.
+
+Agents and setup tools can inspect the local structured capability contract without scraping help text:
+
+```bash
+npx allure agent capabilities --json
+```
 
 ### Generating Reports Manually
 
@@ -122,6 +128,7 @@ The Allure CLI includes several helpful global options. Use `--help` to explore
 
 ```bash
 npx allure run --help
+npx allure agent capabilities --json
 npx allure agent --help
 npx allure watch --help
 ```
diff --git a/docs/agent_enrichment_loop.md b/docs/agent_enrichment_loop.md
deleted file mode 100644
index 240f396bae0..00000000000
--- a/docs/agent_enrichment_loop.md
+++ /dev/null
@@ -1,181 +0,0 @@
-# Allure Agent Enrichment Loop
-
-Canonical downstream guidance now lives in two product-facing places:
-
-- generated `AGENTS.md` in every agent-mode output directory
-- the published `@allurereport/plugin-agent` README
-- project `docs/allure-agent-mode.md` when a repository adopts the skills-based setup flow
-
-This document remains a maintainer companion for developing the plugin and harness
-inside this repository.
-
-## Goal
-
-The Allure agent plugin is intentionally read-only. It records what happened in the
-test run, but it does not mutate tests or invent evidence.
-
-The enrichment loop sits above that output:
-
-1. Generate `ALLURE_AGENT_EXPECTATIONS` as a fresh per-run YAML or JSON file.
-2. Run tests with `allure agent`, or use the lower-level `ALLURE_AGENT_*` plus `allure run` fallback when you need direct environment control.
-3. Review `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-4. Enrich only the targeted tests with real runtime metadata.
-5. Rerun the same scope and accept the change only when scope matches and the
-   resulting evidence is strong enough to review.
-
-The harness API exported by `@allurereport/plugin-agent` implements the machine
-part of this loop:
-
-- `buildAgentExpectations(...)` creates the JSON payload to write to
-  `ALLURE_AGENT_EXPECTATIONS`.
-- `loadAgentOutput(...)` reads the manifest contract from an agent output directory.
-- `planAgentEnrichmentReview(...)` maps existing `check_name` values to concrete
-  enrichment actions and produces an acceptance decision.
-- `reviewAgentOutput(...)` is the convenience wrapper that loads and reviews in one call.
-
-## Acceptance Policy
-
-The harness stays advisory for raw execution, but it is strict for enrichment review:
-
-- reject when scope drifts from expectations
-- reject when high-confidence noop-style evidence remains
-- iterate when evidence is still too weak
-- accept only when scope matches, expectations are present, and no blocking evidence gaps remain
-
-Current blocking signals:
-
-- scope drift:
-  - `missing-expected-test`
-  - `missing-expected-prefix`
-  - `missing-expected-environment`
-  - `unexpected-environment`
-  - `forbidden-selector-match`
-  - `unexpected-test`
-- evidence still missing:
-  - `failed-without-useful-steps`
-  - `failed-without-attachments`
-  - `nontrivial-run-with-empty-trace`
-  - `retries-without-new-evidence`
-  - `passed-without-observable-evidence`
-  - `metadata-mismatch`
-  - `history-id-collision`
-- anti-dummy:
-  - `noop-dominated-steps` at or above the configured confidence threshold
-
-## Remediation Mapping
-
-The harness reuses the existing `check_name` values instead of inventing a second
-diagnosis channel.
-
-| `check_name` | Action category | Expected remediation |
-| --- | --- | --- |
-| `failed-without-useful-steps` | `add-meaningful-steps` | Add setup, action, and assertion steps around real behavior |
-| `nontrivial-run-with-empty-trace` | `add-meaningful-steps` | Make the execution path observable with real runtime state |
-| `passed-without-observable-evidence` | `add-meaningful-steps` | Show what the passing path actually verified |
-| `failed-without-attachments` | `add-test-attachments` | Add real payloads, responses, screenshots, DOM snapshots, diffs, or logs |
-| `global-only-artifacts` | `add-test-attachments` | Move evidence closer to the relevant test or step |
-| `metadata-mismatch` | `repair-test-metadata` | Add only the minimal labels or parameters needed for scope review |
-| `retries-without-new-evidence` | `add-retry-diagnostics` | Add per-attempt evidence so retries show what changed |
-| `noop-dominated-steps` | `collapse-low-signal-trace` | Remove noop wrappers and replace bulk event spam with compact evidence |
-| `step-spam` | `collapse-low-signal-trace` | Reduce event spam and prefer one focused attachment when appropriate |
-
-## Metadata Baseline
-
-Keep metadata intentionally small:
-
-- require a feature or task label when the run is scoped to a feature or task
-- add severity only when it matters for review or quality-gate policy
-- keep owner, layer, epic, story, and similar taxonomy optional unless the repo already uses them
-- do not add labels that are not used by scope checks, review, or downstream policy
-
-## Runtime Enrichment Examples
-
-Canonical JS/Vitest patterns already live in:
-
-- `packages/sandbox/test/bulk.spec.ts`
-- `packages/sandbox/test/legacy.spec.ts`
-
-Use those APIs to add real evidence, not placeholders:
-
-```ts
-import { attachment, label, step } from "allure-js-commons";
-import { expect, it } from "vitest";
-
-it("creates an order", async () => {
-  await label("feature", "orders");
-  await label("severity", "critical");
-
-  const request = await step("prepare order payload", async () => {
-    const payload = { sku: "book-123", quantity: 1 };
-
-    await attachment("request.json", JSON.stringify(payload, null, 2), "application/json");
-    return payload;
-  });
-
-  const response = await step("submit order", async () => {
-    const result = await createOrder(request);
-
-    await attachment("response.json", JSON.stringify(result, null, 2), "application/json");
-    return result;
-  });
-
-  await step("assert order was created", () => {
-    expect(response.status).toBe(201);
-    expect(response.body.id).toBeDefined();
-  });
-});
-```
-
-## Anti-Dummy Rules
-
-Valid enrichment:
-
-- every step corresponds to a real action, state transition, or check
-- every attachment captures real runtime data from that execution
-- metadata exists because the review loop uses it
-
-Rejected enrichment:
-
-```ts
-await step("success", () => {});
-await attachment("result.txt", "test passed", "text/plain");
-await label("feature", "placeholder");
-```
-
-Why it is rejected:
-
-- the step records no real behavior
-- the attachment is generic text, not runtime evidence
-- the label is meaningless unless it is used by scope or policy
-
-## Minimal Harness Example
-
-```ts
-import { buildAgentExpectations, reviewAgentOutput } from "@allurereport/plugin-agent";
-import { writeFile } from "node:fs/promises";
-
-const expectations = buildAgentExpectations({
-  goal: "Validate feature A",
-  taskId: "feature-a",
-  target: {
-    environments: ["default"],
-    fullNamePrefixes: ["feature A"],
-    labelValues: { feature: "feature-a" },
-  },
-  forbidden: {
-    fullNamePrefixes: ["feature B"],
-    labelValues: { feature: ["feature-b", "legacy-feature"] },
-  },
-  notes: ["Only feature A tests should run."],
-});
-
-await writeFile("./out/agent-expected.json", JSON.stringify(expectations, null, 2));
-
-const review = await reviewAgentOutput("./out/agent-report");
-
-if (review.status !== "accept") {
-  for (const item of review.plan) {
-    console.log(item.checkName, item.category, item.remediationHint);
-  }
-}
-```
diff --git a/docs/allure-agent-mode.md b/docs/allure-agent-mode.md
deleted file mode 100644
index 548cf53e4cf..00000000000
--- a/docs/allure-agent-mode.md
+++ /dev/null
@@ -1,392 +0,0 @@
-# Allure Agent Mode
-
-## Purpose
-
-Use Allure agent-mode to review what the tests actually did, not just whether the command exited successfully.
-
-Use it when:
-
-- adding or updating tests for a feature or bug
-- reviewing existing test suites, auditing coverage, or triaging failing suites
-- validating that intended tests ran and unrelated scope did not drift in
-- improving weak or low-signal runtime evidence
-- preparing richer agent-mode reviews, quality gates, and future loop adoption
-
-## Review Principle
-
-Runtime first, source second.
-
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control.
-- If the agent-mode output is missing or incomplete, debug that first and treat any console-only conclusion as provisional.
-
-## Verification Standard
-
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.
-- After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace <package-name> build`).
-- Monorepo build order, full-repo builds, and lint/format/type-aware lint expectations before finalizing live in `AGENTS.md` (keep agent-mode docs focused on runtime evidence loops).
-
-## Agent mode failures and unavailable runs
-
-Use this when `allure agent` errors, produces no usable output directory, exits non-zero before manifests exist, or cannot be run in the current environment.
-
-1. **Keep conclusions honest:** do not upgrade a plain test-runner log to a “full” review outcome. If agent artifacts are missing, any pass/fail or scope claim stays **provisional** until agent mode succeeds for the intended command.
-2. **Confirm the invocation:** use the repo’s normal wrapper (here: `yarn allure agent -- …`) so the same subcommand runs with agent-mode instrumentation. Compare argv and cwd with a known-good run.
-3. **Locate output:** if you did not pass `--output`, run `allure agent latest` or `allure agent state-dir` and inspect the resolved directory. Prefer a fresh explicit path via `--output` or `ALLURE_AGENT_OUTPUT` when debugging path or permission issues.
-4. **Expectations and env:** ensure `ALLURE_AGENT_EXPECTATIONS` points at the file you intended (typos silently change behavior). When isolating bugs, set unique `ALLURE_AGENT_OUTPUT` / expectations paths per run (see [Per-Run Artifacts](#per-run-artifacts)).
-5. **Partial artifacts:** if `index.md` or under `manifest/` is missing but the process exited zero, treat the run as **incomplete** and investigate before signing off. If the runner shows failures that never appear in `manifest/tests.jsonl`, check `artifacts/global/stderr.txt` and other logs under `artifacts/global/` (see [When Console Errors Are Not Represented As Test Results](#when-console-errors-are-not-represented-as-test-results)).
-6. **CLI or environment blocked:** when agent mode truly cannot run (broken install, policy-blocked sandbox, missing binary), say so explicitly in your summary: what you ran instead, which artifacts are absent, and what to rerun with `allure agent` once unblocked. Do not silently default to “tests passed” narratives from console-only runs.
-7. **Escalation:** repeated failures after the steps above are a **tooling** problem—collect command line, exit code, first/last log chunks, and Allure CLI version; fix or report that before relying on any substitute workflow.
-
-Skipping agent mode remains limited to the cases already stated in this guide (impossible here, or you are debugging agent mode itself).
-
-## Repository Status
-
-This repository already has a working Allure 3 setup.
-
-- Root report configuration lives in `allurerc.mjs`.
-- Most package test suites emit results with `allure-vitest/reporter` into `./out/allure-results`.
-- The normal feature-delivery path here is to run a targeted workspace test command under `yarn allure agent -- ...`.
-- You usually do not need to bootstrap Allure from scratch in this repo; focus on expectations, evidence quality, and scope control.
-
-## Helpful Commands
-
-- `allure agent latest` prints the latest agent output directory for the current project cwd. Use it when a prior run omitted `--output` and you want to reopen the most recent agent-mode artifacts.
-- `allure agent state-dir` prints the state directory for the current project cwd. Use it when you need to inspect where `latest` pointers are stored or debug sandbox behavior.
-- `allure agent select --latest` or `allure agent select --from <output-dir>` prints the review-targeted test plan from a prior agent run. Add `--preset failed` or exact `--label name=value` / `--environment <id>` filters when you need a narrower rerun plan.
-- `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` reruns only the selected tests through the framework-agnostic Allure testplan flow. The default rerun preset is `review`.
-
-## Advanced Reruns
-
-- `--rerun-preset review|failed|unsuccessful|all` changes how the rerun seed set is chosen. Use `review` for the default agent-targeted loop, `failed` for classic failure reruns, `unsuccessful` for any non-passed tests, and `all` when you want the whole previously observed set.
-- `--rerun-environment <id>` narrows the rerun selection to one or more environment ids from the previous agent output. Repeat the flag for multiple environments.
-- `--rerun-label name=value` narrows the rerun selection to tests whose prior results carried exact matching labels. Repeat the flag for multiple label filters.
-- `ALLURE_AGENT_STATE_DIR` overrides the default project-scoped state directory used by `allure agent latest`, `allure agent state-dir`, and `--rerun-latest`. Use it when you need a deterministic shared location in CI or a constrained sandbox.
-
-## Core Loops
-
-### Test Review Loop
-
-1. Identify the exact review scope.
-2. Create a fresh expectations file for this run in a temp directory.
-3. Run only that scope with `allure agent`.
-4. Read `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-5. Read per-test markdown only for tests that failed, drifted, or have findings.
-6. Only after runtime review, inspect source code for root cause or coverage gaps.
-7. If evidence is weak or partial, enrich the tests and rerun.
-8. When iterating on the same scope, prefer `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` so the rerun stays focused on the review-targeted tests.
-
-### Feature Delivery Loop
-
-1. Understand the feature or issue and the intended test scope.
-2. Create a fresh expectations file for this run in a temp directory.
-3. Write or update the tests.
-4. Run the target scope with `allure agent`.
-5. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and the relevant per-test markdown files.
-6. Fix scope drift, weak evidence, or bad test design.
-7. Rerun with a new temp output directory and a new expectations file until the run is acceptable.
-
-### Metadata Enrichment Loop
-
-Use this when the run is functionally correct but too weak to review:
-
-1. Identify missing or low-signal findings in agent output.
-2. Add real steps, attachments, or minimal metadata only where they improve review quality.
-3. Rerun the same intended scope.
-4. Reject the run if noop-style or placeholder evidence remains.
-
-### Small Test Change Workflow
-
-Use this when the code change is mostly mechanical, such as typing cleanup, mock refactors, or helper extraction:
-
-1. Create a fresh expectations file and temp output directory for the touched scope.
-2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a small or mechanical change.
-3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-4. Only then make a final statement about regression safety or test correctness.
-
-### Coverage Review Workflow
-
-Use this for command matrices, package audits, or business-logic coverage reviews:
-
-1. Split the audit into scoped groups.
-2. Give each group its own expectations file and temp output directory.
-3. Run each group with `allure agent`.
-4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed.
-5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit.
-
-## Per-Run Artifacts
-
-Each run must use fresh temp paths so parallel runs stay isolated. `allure agent` creates a fresh temp output directory automatically when you omit `--output`, but this guide still uses explicit temp paths when you need deterministic file locations.
-
-- `ALLURE_AGENT_OUTPUT` should point to a unique temp directory per run.
-- `ALLURE_AGENT_EXPECTATIONS` should point to a unique expectations file per run.
-- Do not reuse output or expectations paths across parallel runs.
-
-YAML is the preferred format for expectations files in v1, though JSON also works.
-
-Example:
-
-Primary pattern:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-cat >"$EXPECTATIONS" <<'YAML'
-goal: Validate feature A
-task_id: feature-a
-expected:
-  environments:
-    - default
-  full_name_prefixes:
-    - feature A
-  label_values:
-    feature: feature-a
-notes:
-  - Only feature A tests should run.
-YAML
-
-npx allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- npm test
-```
-
-Lower-level fallback:
-
-```bash
-ALLURE_AGENT_OUTPUT="$TMP_DIR/agent-output" \
-ALLURE_AGENT_EXPECTATIONS="$EXPECTATIONS" \
-npx allure run -- npm test
-```
-
-Repository-oriented examples:
-
-Review an entire package:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-cat >"$EXPECTATIONS" <<'YAML'
-goal: Review CLI package tests
-task_id: cli-package-review
-expected:
-  label_values:
-    module: cli
-notes:
-  - Review runtime evidence before source inspection.
-YAML
-
-yarn allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- yarn workspace allure test
-```
-
-Compact coverage-review pattern:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-
-yarn allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- yarn workspace <workspace> test <scope>
-```
-
-Package review expectations example:
-
-```yaml
-goal: Review package tests
-task_id: package-review
-expected:
-  label_values:
-    module: my-module
-notes:
-  - Review runtime evidence before source inspection.
-```
-
-Review a single spec:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-cat >"$EXPECTATIONS" <<'YAML'
-goal: Review CLI run integration coverage
-task_id: cli-run-integration-review
-expected:
-  label_values:
-    package: test.commands.run.integration.test.ts
-notes:
-  - Review runtime evidence before source inspection.
-YAML
-
-yarn allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- yarn workspace allure test test/commands/run.integration.test.ts
-```
-
-Single-spec expectations example:
-
-```yaml
-goal: Review one spec
-task_id: single-spec-review
-expected:
-  label_values:
-    package: test.commands.run.integration.test.ts
-notes:
-  - Review runtime evidence before source inspection.
-```
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-cat >"$EXPECTATIONS" <<'YAML'
-goal: Validate plugin-agent behavior
-task_id: plugin-agent
-expected:
-  label_values:
-    package: test.index.test.ts
-notes:
-  - Only plugin-agent tests should run.
-YAML
-
-yarn allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- yarn workspace @allurereport/plugin-agent test
-```
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-cat >"$EXPECTATIONS" <<'YAML'
-goal: Validate CLI run integration coverage
-task_id: cli-run-integration
-expected:
-  label_values:
-    package: test.commands.run.integration.test.ts
-YAML
-
-yarn allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- yarn workspace allure test test/commands/run.integration.test.ts
-```
-
-## Reviewing Agent Output
-
-Read in this order:
-
-1. `index.md`
-2. `manifest/run.json`
-3. `manifest/tests.jsonl`
-4. `manifest/findings.jsonl`
-5. the relevant `tests/<environment>/<slug>.md`
-6. copied attachments under `.assets/` and process logs under `artifacts/global/`
-
-Questions to answer:
-
-- Did only the intended tests run?
-- Did the test prove the intended behavior?
-- Is the runtime evidence strong enough to understand the result?
-- Are there smells like noop steps, step spam, or generic attachments?
-
-## When Console Errors Are Not Represented As Test Results
-
-- Suite-load, import, or setup failures may appear only in `artifacts/global/stderr.txt` or global errors.
-- If `manifest/tests.jsonl` does not account for all visible failures from the test runner, inspect global stderr before concluding the run is fully modeled.
-- Treat that state as a partial runtime review, not as a clean or complete result set.
-- If runner-visible failures are present outside logical test files, final conclusions must stay provisional until the missing modeling is understood.
-
-## Test Design Best Practices
-
-- Prefer a small setup/action/assertion story over event-by-event noise.
-- Write tests that prove the intended behavior precisely and avoid unrelated actions.
-- Use helper-boundary instrumentation when several call sites need the same evidence.
-- Keep metadata minimal and purposeful.
-- Add labels only when they help scope review, debugging, or downstream policy.
-
-Good helper-boundary example:
-
-- instrument `runCommand` once instead of wrapping every `runCommand(...)` call site in identical steps
-
-## Evidence Rules
-
-### Steps
-
-Valid steps:
-
-- real setup actions
-- real user or API actions
-- real state transitions
-- real assertions and checks
-
-Invalid steps:
-
-- empty wrapper steps
-- steps named only `success`, `done`, or similar generic outcomes
-- steps that repeat logs without clarifying behavior
-
-### Attachments
-
-Valid attachments:
-
-- request and response payloads
-- logs tied to the failing or verifying point
-- screenshots, DOM snapshots, diffs, traces
-- compact summaries derived from actual runtime data
-
-Invalid attachments:
-
-- static placeholder text like `test passed`
-- generic “success” notes with no runtime evidence
-- artifacts not tied to the current execution
-
-## Metadata Rules
-
-- Add feature or task labels when the run is scoped by feature or task.
-- Add severity only when it matters for review or quality-gate policy.
-- Keep owner, epic, story, layer, and similar taxonomy optional unless the project already uses them.
-- Do not add metadata that no expectation, review step, or policy consumes.
-
-## Acceptance Rules
-
-Accept the run only when:
-
-- scope matches expectations
-- evidence is strong enough to explain what happened
-- retries include per-attempt diagnostics when needed
-- no high-confidence noop or placeholder findings remain
-
-Iterate again when:
-
-- expected tests are missing
-- unrelated tests or environments appeared
-- steps are empty or uninformative
-- attachments are missing or low-signal
-- metadata drift makes scope review ambiguous
-
-### Review Completeness
-
-A test review is not complete unless:
-
-- the relevant scope was run with agent mode, unless that is impossible
-- expectations were created for the intended scope, unless this is a broad package-health audit
-- agent artifacts were reviewed before final conclusions
-- missing or partial runtime modeling was called out explicitly
-- console-only conclusions are treated as provisional when agent output is absent or incomplete
-- agent-mode tooling failures were handled using [Agent mode failures and unavailable runs](#agent-mode-failures-and-unavailable-runs) (or agent mode was skipped only per the exceptions above)
-
-## Future Loops
-
-These are planned, but not part of the first stable core:
-
-- flaky detection and fix loop
-- known-issue and mute loop
-- quality-gate installation and adoption loop
-
-When these loops are added, they should build on the same evidence rules used here rather than bypassing them.
diff --git a/docs/allure-test-agent.md b/docs/allure-test-agent.md
new file mode 100644
index 00000000000..c6ec2ef6265
--- /dev/null
+++ b/docs/allure-test-agent.md
@@ -0,0 +1,230 @@
+# Allure Test Agent
+
+Use Allure agent mode to design, review, validate, debug, and enrich tests in this project.
+
+This file is project-specific guidance. Durable test-design, expectation, and evidence rules live in the `allure-test-agent` skill. If the skill is available, use it together with this file. If the skill is unavailable, follow this file as the local fallback and keep conclusions conservative.
+
+## Review Principle
+
+Runtime first, source second.
+
+- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through the local agent test service when available, or through `allure agent` otherwise.
+- Use agent-mode execution for smoke checks too, even when the change is small or mechanical.
+- Only skip agent mode when it is impossible or when debugging agent mode itself.
+- If agent-mode output is missing or incomplete, debug that first and treat console-only conclusions as provisional.
+
+## Local Capability Snapshot
+
+Refresh this section when Allure, test runners, CI, or project wrappers change. Confirm local support with the project wrapper, `allure --version`, and `allure agent --help` before using optional commands.
+
+Do not store the exact Allure version here. Version output is a runtime fact; this file stores the wrapper, last snapshot marker, and how to refresh capabilities.
+
+- Allure wrapper: `yarn allure`
+- Capability snapshot last checked: `2026-06-10`
+- Refresh capabilities with: `yarn allure --version`, `yarn allure agent capabilities --json`, and `yarn allure agent --help`
+- Agent execution: supported with `yarn allure agent -- <command>`
+- Output option: `--output <dir>` or `-o <dir>`; omitted output uses a fresh temporary directory
+- Expectation controls: `--goal`, `--task-id`, `--expect-tests`, `--expect-test`, `--expect-prefix`, `--expect-label`, `--expect-env`, `--forbid-label`, `--expect-step-containing`, `--expect-steps`, `--expect-attachments`, `--expect-attachment`, and advanced `--expectations <yaml|json>`
+- Latest/state directory recovery: `yarn allure agent latest`; `yarn allure agent state-dir`; `ALLURE_AGENT_STATE_DIR=<dir>` override
+- Selection/rerun support: `yarn allure agent select --latest|--from <dir>` and `yarn allure agent --rerun-latest|--rerun-from <dir> -- <command>`
+- Discovery/configuration commands: unsupported by this local CLI
+- Local agent test service: unsupported or unknown; use `yarn allure agent` directly
+
+## Local Agent Test Service
+
+Use the local agent test service when the project provides one and the task is query-heavy, stateful, or iterative. Use `allure agent` directly when service mode is unavailable or unnecessary.
+
+- Service status: unsupported or unknown
+- Start or connect command: unknown
+- Capability/status endpoint: unknown
+- Supported intents: use direct CLI runs, query, select, and rerun commands
+- Supported profiles and selectors: direct runner selectors plus agent expectation flags
+- Query support: `yarn allure agent query --latest summary|tests|findings|test` or `--from <output-dir>`
+- Realtime and cancellation support: unknown for service mode
+- Service logs or diagnostics: unknown
+- Fallback when unavailable: `yarn allure agent -- <command>`
+
+## Local Test Surfaces
+
+- Test frameworks and runners: Yarn workspaces; Vitest for most packages; Playwright for `@allurereport/e2e` and `@allurereport/static-server`
+- Test roots: package-local tests under `packages/*/test`, Playwright tests in `packages/e2e` and `packages/static-server`, plus package-specific config files
+- Allure result paths: most Vitest packages write `./out/allure-results`; `packages/sandbox` writes `./allure-results`; Playwright packages write `./out/allure-results`
+- Known selector support: Vitest file/name selectors, Playwright file/project selectors, workspace package selection through `yarn workspace <name>`
+- Known environments or services needed for tests: Playwright browser dependencies for e2e/static-server; CI runs OS matrix environments
+
+## Allure Integrations
+
+Document only integrations detected or explicitly configured in this project.
+
+- Existing Allure adapters/integrations: `allure-vitest`, `allure-playwright`, Allure CLI `run`, `agent`, `generate`, and report plugins
+- Runner config files: root `allurerc.mjs`; package `vitest.config.ts`; `packages/e2e/playwright.config.ts`; `packages/static-server/playwright.config.ts`
+- Allure results directories: package `out/allure-results`, sandbox `allure-results`, CI dumps `allure-results-<os>.zip`
+- Supported integration configuration targets: discovered package runner configs
+- Validation command for integration setup: focused package command through `yarn allure agent -- yarn workspace <name> test`
+- Known unsupported or skipped integrations: local agent service, discovery/configuration commands
+- Integration-specific quirks: many package tests clean `./out`; CI uses `yarn allure run --config=./allurerc.gate.mjs --environment=<os> --dump=allure-results-<os> -- yarn test`
+
+## Project Test-Design Conventions
+
+Fill only conventions that exist in this project. Durable test-design rules stay in the `allure-test-agent` skill.
+
+- Accepted test layers: unit/package tests with Vitest; browser/e2e tests with Playwright; CLI integration tests in `packages/cli`
+- Preferred assertion style: framework matchers and focused assertions from existing package tests
+- Parameterized test style: use existing Vitest/Playwright conventions in the touched package
+- Smoke coverage conventions: use focused package or file-level runs for small changes; root `yarn test` is broad package health
+- Mocking and integration-test preference: follow the touched package's existing test style
+- Suppression/quarantine policy: unknown; do not present skipped or non-gating tests as proof
+
+## Run Profiles
+
+Document only profiles that exist in this project. If a profile is inferred rather than confirmed, mark it as inferred.
+
+| Profile | Command or service intent | Expected use | Confidence limits |
+| --- | --- | --- | --- |
+| smoke | `yarn allure agent -- yarn workspace <name> test <file-or-pattern>` when the runner supports narrowing | Quick signal for a touched package or test file | Does not prove downstream package behavior |
+| affected | `yarn allure agent -- yarn workspace <name> test` plus changed package build | Package-level validation after local edits | Mapping may miss indirect workspace impact |
+| feature/component | `yarn allure agent --goal <text> --expect-* -- yarn workspace <name> test <selector>` | Focused validation for one behavior or component | Depends on runner selector precision |
+| full | `yarn allure agent -- yarn test` | Broad workspace test signal | Cost may be high and process-tree tests may be environment-sensitive |
+| e2e | `yarn allure agent -- yarn workspace @allurereport/e2e test` or static-server e2e command | Browser workflow validation | Requires installed Playwright browsers/dependencies |
+
+## Execution Signal And CI Trust
+
+Do not present ignored, excluded, swallowed, advisory, or non-gating test execution as proof that behavior is safe.
+
+- Default local test command: `yarn test`
+- Default local command exclusions: root `yarn test` excludes `packages/sandbox`
+- CI test jobs: `.github/workflows/build.yml` job `test` runs across OS matrix
+- CI gating status: branch protection unknown; workflow test job appears intended as a primary validation signal
+- Known ignored, skipped, muted, quarantined, or disabled tests: package-specific and runtime-dependent; inspect run output before claiming proof
+- Test artifacts retained by CI: `allure-results-<os>.zip` dumps are uploaded and later used for report generation
+
+If CI or local execution is non-gating, excludes important tests, or swallows failures, call that out before using the run as proof.
+
+## Local Expectation Controls
+
+Before each validation run, decide whether expectations reduce a real risk for the intended conclusion. When they do, use the smallest fresh inline options supported by local `allure agent --help`.
+
+- Supported expectation mechanism: inline CLI options and advanced YAML/JSON file mode
+- Exact test/file/suite/label/profile support: exact logical full name with `--expect-test`; full-name prefix with `--expect-prefix`; label with `--expect-label name=value`; environment with `--expect-env`
+- Excluded-scope controls: `--forbid-label name=value`
+- Evidence expectation controls: `--expect-step-containing <text>`, `--expect-steps <count>`, `--expect-attachments <count>`, `--expect-attachment <name|name=value|content-type=value>`
+- Check/assertion step-name controls: use `--expect-step-containing <text>` when the project records checks as test-scoped Allure steps
+- Broad-audit fallback: run the narrowest practical command, then inspect `manifest/tests.jsonl` and `manifest/findings.jsonl` before claiming scope
+
+Prefer inline options. Use `--expectations <file>` only as advanced mode when the contract is too large, generated, or policy-controlled.
+
+When expectations are justified, they should state only the parts that matter for this run:
+
+- what claim or validation depth the run is meant to support
+- what should run
+- what should not run
+- which profile, environment, variant, or parameter set is intended
+- what important checks or evidence should be visible through supported reporting or documented step-name conventions
+- why this scope is enough
+- what the run cannot prove
+
+If local expectation support is unavailable or weak, run the narrowest practical command, review observed scope from manifests, and state that expectation checking was limited.
+
+Treat the run goal as a claim boundary for review, not as proof. If the goal is wrong or stale, keep the runtime evidence and report what the observed run actually supports.
+
+## Core Loops
+
+### Test Review Loop
+
+1. Identify the exact review scope and validation depth.
+2. Create the smallest meaningful expectations using local supported controls when they protect the review conclusion.
+3. Run only that scope through the local agent test service or `allure agent`.
+4. Print the run's `index.md` path.
+5. Review `index.md`, `manifest/run.json`, `manifest/test-events.jsonl`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and relevant per-test markdown.
+6. Inspect source code only after runtime evidence explains what executed.
+7. Call out weak scope, weak evidence, execution-signal limits, or partial runtime modeling.
+
+### Test Authoring Loop
+
+1. Understand the feature, issue, expected behavior, and risk.
+2. Read the `allure-test-agent` skill's test-design guidance when available.
+3. Create the smallest meaningful expectations for the intended scope when they reduce a real validation risk.
+4. Write or update focused tests without weakening useful coverage.
+5. Run the intended scope through agent mode.
+6. Review scope, checks, evidence, and execution signal before claiming validation.
+7. Enrich tests when evidence is weak, then rerun with fresh temp output.
+
+### Evidence And Metadata Enrichment Loop
+
+Use this when tests pass but are hard to review:
+
+1. Identify weak evidence, missing checks, missing setup state, missing artifacts, or noisy metadata.
+2. Prefer framework integrations and helper-boundary instrumentation over wrapping every line.
+3. Add useful steps, attachments, parameters, descriptions, labels, or links using project conventions.
+4. Redact sensitive values while preserving useful artifact shape.
+5. Rerun the same intended scope and report evidence changes.
+
+### Coverage Review Loop
+
+1. Split broad audits into scoped groups when practical.
+2. Give each group a unique temp output directory and use expectations only when the group has a known scope or supports a validation conclusion.
+3. Run each group through agent mode.
+4. Separate observed runtime coverage from inferred source-code coverage.
+5. Mark review incomplete until every scoped group was validated through matched expectations, reviewed observed scope, or documented as a broad package-health audit.
+
+## Runtime Artifact Review
+
+After each agent-mode run:
+
+- print the run's `index.md` path
+- read `manifest/run.json`
+- read `manifest/test-events.jsonl`
+- read `manifest/tests.jsonl`
+- read `manifest/findings.jsonl`
+- read relevant per-test markdown before inspecting source
+- inspect global stderr/log artifacts when runner-visible failures are not represented as logical tests
+
+## Output, State, And Reruns
+
+Do not create persistent output or expectation paths. Use unique temp paths for every run.
+
+- Agent output policy: use omitted output for fresh temp output or an explicit unique temp dir; do not reuse output directories across runs
+- Latest output recovery: `yarn allure agent latest`
+- State directory override: `ALLURE_AGENT_STATE_DIR=<dir>`
+- Rerun from latest/prior output: `yarn allure agent --rerun-latest -- <command>` or `yarn allure agent --rerun-from <output-dir> -- <command>`
+- Selection/test plan support: `yarn allure agent select --latest` or `--from <output-dir>` with `--preset review|failed|unsuccessful|all`
+- Parallel-run rule: output paths and expectation state must not be shared
+- CI artifact retention: CI uploads Allure result dumps, not agent output directories unless a job is changed to do so
+
+## Project Metadata Conventions
+
+Fill only conventions that exist in this project.
+
+- Feature/story/component/service labels: existing tests commonly use Allure `epic`, `feature`, `story`, and `label`; package configs often set `module=<package>`
+- Owner/team metadata: unknown
+- Severity or priority metadata: use only when already present or meaningful for review/policy
+- Issue, bug, requirement, or known-defect links: unknown
+- Suite/package/module taxonomy: package-level `module` labels from Vitest config are common
+- Parameter naming and dynamic-history exclusions: follow existing package examples
+- Metadata to avoid: decorative labels or unused taxonomy that does not help selection, triage, or review
+
+## Project Evidence Conventions
+
+Fill only conventions that exist in this project.
+
+- Test descriptions: follow existing package style
+- Attachments: command output, manifests, text/JSON artifacts, screenshots/traces where relevant
+- Step naming: use specific action/check names rather than generic wrappers
+- Check/assertion step naming: use meaningful text that can be matched with `--expect-step-containing` when review requires visible checks
+- Assertion/check visibility: prefer real test-scoped steps and useful attachments around behavior, not placeholder evidence
+- Fixture/setup evidence: include only when it explains the behavior or failure
+- Sensitive data redaction: redact secrets and tokens while preserving useful artifact shape
+
+## Acceptance Rules
+
+Accept a run only when:
+
+- observed scope matches the intended scope, or drift is explained
+- coverage remains meaningful for the stated conclusion
+- important checks are visible through supported reporting, documented step-name conventions, or source review covers the gap
+- evidence is strong enough to explain what happened
+- execution-signal limits are explicit
+- no high-confidence placeholder or noop evidence findings remain
+- partial runtime modeling is called out
+
+Console-only conclusions are provisional when agent output is absent or incomplete.
diff --git a/packages/cli/README.md b/packages/cli/README.md
index d9938daa6a8..952b9227b6b 100644
--- a/packages/cli/README.md
+++ b/packages/cli/README.md
@@ -74,7 +74,21 @@ For example:
 npx allure agent -- npm test
 ```
 
-`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps unless you explicitly fall back to the lower-level `ALLURE_AGENT_*` plus `allure run` flow.
+`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, accepts compact inline expectations such as `--goal`, `--expect-tests`, `--expect-test`, `--expect-label`, and `--expect-step-containing`, and can still load an expectations file with `--expectations` when needed. Configured presentation or export plugins such as Awesome or TestOps are ignored for that run.
+
+Agents and setup tools can inspect the local structured capability contract without scraping help text:
+
+```bash
+npx allure agent capabilities --json
+```
+
+After a run, agents can query the output directory without manually reading every manifest:
+
+```bash
+npx allure agent query --latest summary
+npx allure agent query --latest tests --status failed
+npx allure agent query --from ./agent-output findings --severity high
+```
 
 ### Generating Reports Manually
 
@@ -118,6 +132,7 @@ The Allure CLI includes several helpful global options. Use `--help` to explore
 
 ```bash
 npx allure run --help
+npx allure agent capabilities --json
 npx allure agent --help
 npx allure watch --help
 ```
diff --git a/packages/cli/src/commands/agent-run.ts b/packages/cli/src/commands/agent-run.ts
new file mode 100644
index 00000000000..ff8a19dfdb1
--- /dev/null
+++ b/packages/cli/src/commands/agent-run.ts
@@ -0,0 +1,216 @@
+import * as console from "node:console";
+import { mkdtemp, realpath, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join, resolve } from "node:path";
+import process, { exit } from "node:process";
+
+import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core";
+import {
+  createAgentTestPlanContext,
+  AgentUsageError,
+  formatAgentOutputLinks,
+  isPathInside,
+  normalizeAgentRerunPreset,
+  parseAgentLabelFilters,
+  resolveAgentStateDir,
+  writeLatestAgentState,
+  type AgentExpectationsInput,
+} from "@allurereport/plugin-agent";
+
+import { normalizeCommandEnvironmentOptions, resolveCommandEnvironment } from "../utils/environment.js";
+import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js";
+import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js";
+
+export const formatAgentCommand = (args: string[]) => args.join(" ");
+
+export const printAgentOutputLinks = (outputDir: string) => {
+  for (const line of formatAgentOutputLinks(outputDir)) {
+    console.log(line);
+  }
+};
+
+export const persistLatestAgentState = async (value: Parameters<typeof writeLatestAgentState>[0]) => {
+  try {
+    await writeLatestAgentState(value);
+  } catch (error) {
+    console.error(
+      `Could not update latest agent output in ${resolveAgentStateDir(value.cwd)}: ${(error as Error).message}`,
+    );
+  }
+};
+
+export type ExecuteAgentModeParams = {
+  configPath?: string;
+  cwd?: string;
+  output?: string;
+  expectations?: string;
+  inlineExpectations?: AgentExpectationsInput;
+  environment?: string;
+  environmentName?: string;
+  silent?: boolean;
+  rerunFrom?: string;
+  rerunLatest?: boolean;
+  rerunPreset?: string;
+  rerunEnvironments?: string[];
+  rerunLabels?: string[];
+  args: string[];
+};
+
+export const executeAgentMode = async (params: ExecuteAgentModeParams) => {
+  const {
+    configPath,
+    cwd: configuredCwd,
+    output,
+    expectations,
+    inlineExpectations,
+    environment,
+    environmentName,
+    silent,
+    rerunFrom,
+    rerunLatest,
+    rerunPreset,
+    rerunEnvironments,
+    rerunLabels,
+    args,
+  } = params;
+  const command = args[0];
+  const commandArgs = args.slice(1);
+  const cwd = await realpath(configuredCwd ?? process.cwd());
+  const commandString = formatAgentCommand(args);
+  const hasRerunSource = !!rerunFrom || !!rerunLatest;
+  const hasRerunFilters = !!rerunPreset || !!rerunEnvironments?.length || !!rerunLabels?.length;
+
+  if (!hasRerunSource && hasRerunFilters) {
+    throw new AgentUsageError("Use rerun filters only together with --rerun-from <path> or --rerun-latest");
+  }
+
+  const rerunContext = await createAgentTestPlanContext({
+    cwd,
+    from: rerunFrom,
+    latest: rerunLatest,
+    preset: normalizeAgentRerunPreset(rerunPreset),
+    environments: rerunEnvironments?.length ? rerunEnvironments : undefined,
+    labelFilters: parseAgentLabelFilters(rerunLabels),
+  });
+  const childEnvironmentVariables = {
+    ...createChildAllureCliEnvironment("agent"),
+    ...(rerunContext ? { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } : {}),
+  };
+
+  try {
+    if (getActiveAllureCliCommand()) {
+      console.log(commandString);
+
+      const exitCode = await executeNestedAllureCommand({
+        command,
+        commandArgs,
+        cwd,
+        ...(rerunContext ? { environmentVariables: { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } } : {}),
+        silent,
+      });
+
+      exit(exitCode ?? -1);
+      return;
+    }
+
+    const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-"));
+    const expectationsPath = expectations ? resolve(cwd, expectations) : undefined;
+    const environmentOptions = {
+      environment,
+      environmentName,
+    };
+
+    normalizeCommandEnvironmentOptions(environmentOptions);
+
+    if (expectationsPath && isPathInside(outputDir, expectationsPath)) {
+      throw new AgentUsageError(
+        `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`,
+      );
+    }
+
+    const config = await readConfig(cwd, configPath, {
+      output: outputDir,
+      plugins: {
+        agent: {
+          options: {
+            outputDir,
+            command: commandString,
+            ...(expectationsPath ? { expectationsPath } : {}),
+            ...(inlineExpectations ? { expectations: inlineExpectations } : {}),
+          },
+        },
+      },
+    });
+    const resolvedEnvironment = resolveCommandEnvironment(config, environmentOptions);
+
+    try {
+      await rm(outputDir, { recursive: true });
+    } catch (error) {
+      if (!isFileNotFoundError(error)) {
+        console.error("could not clean output directory", error);
+      }
+    }
+
+    const startedAt = new Date().toISOString();
+
+    await persistLatestAgentState({
+      cwd,
+      outputDir,
+      expectationsPath,
+      command: commandString,
+      startedAt,
+      status: "running",
+    });
+
+    printAgentOutputLinks(outputDir);
+    if (expectationsPath) {
+      console.log(`agent expectations: ${expectationsPath}`);
+    } else if (inlineExpectations) {
+      console.log("agent expectations: CLI options");
+    }
+    console.log(commandString);
+
+    const allureReport = new AllureReport({
+      ...config,
+      output: outputDir,
+      environment: resolvedEnvironment?.id,
+      open: false,
+      port: undefined,
+      qualityGate: undefined,
+      allureService: undefined,
+      realTime: false,
+      plugins: config.plugins,
+    });
+    const knownIssues = await allureReport.store.allKnownIssues();
+
+    const { globalExitCode } = await executeAllureRun({
+      allureReport,
+      knownIssues,
+      cwd,
+      command,
+      commandArgs,
+      environmentVariables: childEnvironmentVariables,
+      environment: resolvedEnvironment?.id,
+      withQualityGate: false,
+      logs: "pipe",
+      silent,
+      ignoreLogs: false,
+      logProcessExit: false,
+    });
+
+    await persistLatestAgentState({
+      cwd,
+      outputDir,
+      expectationsPath,
+      command: commandString,
+      startedAt,
+      finishedAt: new Date().toISOString(),
+      status: "finished",
+      exitCode: globalExitCode.actual ?? globalExitCode.original,
+    });
+
+    exit(globalExitCode.actual ?? globalExitCode.original);
+  } finally {
+    await rerunContext?.cleanup();
+  }
+};
diff --git a/packages/cli/src/commands/agent.ts b/packages/cli/src/commands/agent.ts
index 8f63d6c05fe..b5b85d57106 100644
--- a/packages/cli/src/commands/agent.ts
+++ b/packages/cli/src/commands/agent.ts
@@ -1,61 +1,54 @@
 import * as console from "node:console";
-import { mkdir, mkdtemp, realpath, rm, writeFile } from "node:fs/promises";
+import { mkdir, mkdtemp, realpath, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
-import { dirname, join, relative, resolve } from "node:path";
+import { dirname, join, resolve } from "node:path";
 import process, { exit } from "node:process";
 
-import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core";
-import { Command, Option, UsageError } from "clipanion";
-
 import {
-  createAgentTestPlanContext,
+  AGENT_FINDING_CATEGORIES,
+  AGENT_FINDING_SEVERITIES,
+  AGENT_TASK_MAP_HELP,
+  AGENT_TEST_STATUSES,
+  AgentExpectationUsageError,
+  buildAgentInlineExpectations,
+  buildAgentQueryPayload,
+  createAgentCapabilities,
+  formatAgentOutputLinks,
+  isAgentExpectationUsageError,
+  isAgentTaskMapHelpRequest,
+  isAgentUsageError,
+  loadAgentOutput,
+  normalizeAgentQueryLimit,
+  normalizeAgentQueryView,
   normalizeAgentRerunPreset,
+  normalizeRepeatedEnumValues,
+  normalizeRepeatedStringValues,
   parseAgentLabelFilters,
+  readLatestAgentState,
   resolveAgentSelectionOutputDir,
+  resolveAgentStateDir,
   selectAgentTestPlan,
-} from "../utils/agent-select.js";
-import { readLatestAgentState, resolveAgentStateDir, writeLatestAgentState } from "../utils/agent-state.js";
-import {
-  environmentNameOption,
-  environmentOption,
-  normalizeCommandEnvironmentOptions,
-  resolveCommandEnvironment,
-} from "../utils/environment.js";
-import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js";
-import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js";
-
-const withProcessEnv = async <T>(overrides: Record<string, string | undefined>, fn: () => Promise<T>): Promise<T> => {
-  const previousValues = new Map<string, string | undefined>();
-
-  for (const [key, value] of Object.entries(overrides)) {
-    previousValues.set(key, process.env[key]);
-
-    if (value === undefined) {
-      delete process.env[key];
-      continue;
-    }
+  validateAgentExpectationsFile,
+  writeLatestAgentState,
+  writeInvalidAgentExpectationOutput,
+  type AgentExpectationsInput,
+} from "@allurereport/plugin-agent";
+import { Command, Option, UsageError } from "clipanion";
 
-    process.env[key] = value;
-  }
+export { AGENT_TASK_MAP_HELP, createAgentCapabilities, isAgentTaskMapHelpRequest };
 
-  try {
-    return await fn();
-  } finally {
-    for (const [key, value] of previousValues) {
-      if (value === undefined) {
-        delete process.env[key];
-        continue;
-      }
+const readOptionalString = (value: unknown): string | undefined => (typeof value === "string" ? value : undefined);
 
-      process.env[key] = value;
-    }
-  }
-};
+const readOptionalBoolean = (value: unknown): boolean => value === true;
+
+const readOptionalStringArray = (value: unknown): string[] | undefined => (Array.isArray(value) ? value : undefined);
 
-const isPathInside = (parentPath: string, candidatePath: string) => {
-  const rel = relative(parentPath, candidatePath);
+const formatAgentCommand = (args: string[]) => args.join(" ");
 
-  return rel === "" || (!rel.startsWith("..") && rel !== "." && !rel.startsWith("../"));
+const printAgentOutputLinks = (outputDir: string) => {
+  for (const line of formatAgentOutputLinks(outputDir)) {
+    console.log(line);
+  }
 };
 
 const persistLatestAgentState = async (value: Parameters<typeof writeLatestAgentState>[0]) => {
@@ -68,11 +61,47 @@ const persistLatestAgentState = async (value: Parameters<typeof writeLatestAgent
   }
 };
 
-const readOptionalString = (value: unknown): string | undefined => (typeof value === "string" ? value : undefined);
+const agentEnvironmentOption = () =>
+  Option.String("--environment,--env", {
+    description:
+      "Force specific environment ID to all tests in the run. Given environment has higher priority than the one defined in the config file (default: empty string)",
+  });
 
-const readOptionalBoolean = (value: unknown): boolean => value === true;
+const agentEnvironmentNameOption = () =>
+  Option.String("--environment-name", {
+    description:
+      "Force specific environment display name to all tests in the run. Has lower priority than --environment and higher than the config value (default: empty string)",
+  });
 
-const readOptionalStringArray = (value: unknown): string[] | undefined => (Array.isArray(value) ? value : undefined);
+const throwCliUsageError = (error: unknown): never => {
+  if (isAgentUsageError(error)) {
+    throw new UsageError((error as Error).message);
+  }
+
+  throw error;
+};
+
+export class AgentCapabilitiesCommand extends Command {
+  static paths = [["agent", "capabilities"]];
+
+  static usage = Command.Usage({
+    description: "Print structured Allure agent capability information",
+    details:
+      "This command prints the locally supported agent-mode commands, expectation controls, output files, rerun support, and known unsupported capability families as JSON.",
+    examples: [
+      ["agent capabilities", "Print agent capabilities as JSON"],
+      ["agent capabilities --json", "Print agent capabilities as JSON explicitly"],
+    ],
+  });
+
+  json = Option.Boolean("--json", true, {
+    description: "Print capabilities as JSON (default: true)",
+  });
+
+  async execute() {
+    console.log(JSON.stringify(createAgentCapabilities(), null, 2));
+  }
+}
 
 export class AgentCommand extends Command {
   static paths = [["agent"]];
@@ -105,9 +134,58 @@ export class AgentCommand extends Command {
     description: "The path to a YAML or JSON expectations file",
   });
 
-  environment = environmentOption();
+  goal = Option.Array("--goal", {
+    description: "The review goal to record in inline agent expectations",
+  });
+
+  taskId = Option.Array("--task-id", {
+    description: "The task or feature id to record in inline agent expectations",
+  });
+
+  expectTests = Option.Array("--expect-tests", {
+    description: "The expected number of visible logical tests in the intended scope",
+  });
+
+  expectLabels = Option.Array("--expect-label", {
+    description: "Expected label selector in name=value form. Repeat the option for multiple selectors",
+  });
+
+  expectEnvironments = Option.Array("--expect-env", {
+    description: "Expected environment id. Repeat the option for multiple environments",
+  });
+
+  expectFullNames = Option.Array("--expect-test", {
+    description: "Expected full test name. Repeat the option for multiple tests",
+  });
+
+  expectPrefixes = Option.Array("--expect-prefix", {
+    description: "Expected full-name prefix. Repeat the option for multiple prefixes",
+  });
+
+  forbidLabels = Option.Array("--forbid-label", {
+    description: "Forbidden label selector in name=value form. Repeat the option for multiple selectors",
+  });
+
+  expectStepContains = Option.Array("--expect-step-containing", {
+    description: "Require a test-scoped step name containing this text per evidence-target logical test",
+  });
 
-  environmentName = environmentNameOption();
+  expectSteps = Option.Array("--expect-steps", {
+    description: "Require at least this many meaningful steps per expected logical test",
+  });
+
+  expectAttachments = Option.Array("--expect-attachments", {
+    description: "Require at least this many non-missing attachments per expected logical test",
+  });
+
+  expectAttachmentFilters = Option.Array("--expect-attachment", {
+    description:
+      "Require a matching non-missing attachment per expected logical test. Use a file name or name=value/content-type=value",
+  });
+
+  environment = agentEnvironmentOption();
+
+  environmentName = agentEnvironmentNameOption();
 
   silent = Option.Boolean("--silent", {
     description: "Don't pipe the process output logs to console (default: false)",
@@ -137,22 +215,91 @@ export class AgentCommand extends Command {
 
   async execute() {
     const args = this.commandToRun.filter((arg) => arg !== "--") as string[] | undefined;
+    const configPath = readOptionalString(this.config);
+    const configuredCwd = readOptionalString(this.cwd);
+    const output = readOptionalString(this.output);
+    const expectations = readOptionalString(this.expectations);
+
+    if (!args || !args.length) {
+      throw new UsageError("expecting command to be specified after --, e.g. allure agent -- npm run test");
+    }
+
+    try {
+      const inlineExpectations = buildAgentInlineExpectations({
+        goal: this.goal,
+        taskId: this.taskId,
+        expectTests: this.expectTests,
+        expectLabels: readOptionalStringArray(this.expectLabels),
+        expectEnvironments: readOptionalStringArray(this.expectEnvironments),
+        expectFullNames: readOptionalStringArray(this.expectFullNames),
+        expectPrefixes: readOptionalStringArray(this.expectPrefixes),
+        forbidLabels: readOptionalStringArray(this.forbidLabels),
+        expectStepContains: readOptionalStringArray(this.expectStepContains),
+        expectSteps: this.expectSteps,
+        expectAttachments: this.expectAttachments,
+        expectAttachmentFilters: readOptionalStringArray(this.expectAttachmentFilters),
+      });
+
+      if (expectations && inlineExpectations) {
+        throw new AgentExpectationUsageError(
+          "Use either --expectations <file> or inline expectation flags, not both",
+          "--expectations",
+        );
+      }
+
+      await validateAgentExpectationsFile({
+        cwd: await realpath(configuredCwd ?? process.cwd()),
+        output,
+        expectations,
+      });
+
+      const { executeAgentMode } = await import("./agent-run.js");
+
+      await executeAgentMode({
+        configPath,
+        cwd: configuredCwd,
+        output,
+        expectations,
+        inlineExpectations: inlineExpectations as AgentExpectationsInput | undefined,
+        environment: readOptionalString(this.environment),
+        environmentName: readOptionalString(this.environmentName),
+        silent: readOptionalBoolean(this.silent),
+        rerunFrom: readOptionalString(this.rerunFrom),
+        rerunLatest: readOptionalBoolean(this.rerunLatest),
+        rerunPreset: readOptionalString(this.rerunPreset),
+        rerunEnvironments: readOptionalStringArray(this.rerunEnvironments),
+        rerunLabels: readOptionalStringArray(this.rerunLabels),
+        args,
+      });
+    } catch (error) {
+      if (!isAgentExpectationUsageError(error)) {
+        throwCliUsageError(error);
+      }
 
-    await executeAgentMode({
-      configPath: readOptionalString(this.config),
-      cwd: readOptionalString(this.cwd),
-      output: readOptionalString(this.output),
-      expectations: readOptionalString(this.expectations),
-      environment: readOptionalString(this.environment),
-      environmentName: readOptionalString(this.environmentName),
-      silent: readOptionalBoolean(this.silent),
-      rerunFrom: readOptionalString(this.rerunFrom),
-      rerunLatest: readOptionalBoolean(this.rerunLatest),
-      rerunPreset: readOptionalString(this.rerunPreset),
-      rerunEnvironments: readOptionalStringArray(this.rerunEnvironments),
-      rerunLabels: readOptionalStringArray(this.rerunLabels),
-      args,
-    });
+      const expectationError = error as AgentExpectationUsageError;
+      const cwd = await realpath(configuredCwd ?? process.cwd());
+      const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-"));
+      const commandString = formatAgentCommand(args);
+      const { generatedAt } = await writeInvalidAgentExpectationOutput({
+        outputDir,
+        command: commandString,
+        error: expectationError,
+      });
+
+      await persistLatestAgentState({
+        cwd,
+        outputDir,
+        command: commandString,
+        startedAt: generatedAt,
+        finishedAt: generatedAt,
+        status: "finished",
+        exitCode: 1,
+      });
+
+      printAgentOutputLinks(outputDir);
+      console.error(expectationError.message);
+      exit(1);
+    }
   }
 }
 
@@ -160,11 +307,15 @@ export class AgentLatestCommand extends Command {
   static paths = [["agent", "latest"]];
 
   static usage = Command.Usage({
-    description: "Print the latest Allure agent output directory for the current project",
-    details: "This command prints the latest agent output directory recorded for the resolved project cwd.",
+    description: "Print the latest Allure agent output directory and index path for the current project",
+    details:
+      "This command prints the latest agent output directory and index.md path recorded for the resolved project cwd.",
     examples: [
-      ["agent latest", "Print the latest agent output directory for the current project"],
-      ["agent latest --cwd ./packages/cli", "Print the latest agent output directory for a specific project cwd"],
+      ["agent latest", "Print the latest agent output directory and index path for the current project"],
+      [
+        "agent latest --cwd ./packages/cli",
+        "Print the latest agent output directory and index path for a specific project cwd",
+      ],
     ],
   });
 
@@ -190,7 +341,7 @@ export class AgentLatestCommand extends Command {
       return;
     }
 
-    console.log(latestState.outputDir);
+    printAgentOutputLinks(latestState.outputDir);
   }
 }
 
@@ -218,269 +369,202 @@ export class AgentStateDirCommand extends Command {
   }
 }
 
-export class AgentSelectCommand extends Command {
-  static paths = [["agent", "select"]];
+export class AgentQueryCommand extends Command {
+  static paths = [["agent", "query"]];
 
   static usage = Command.Usage({
-    description: "Select tests from an existing agent output and emit a test plan",
+    description: "Query an existing Allure agent output directory as focused JSON",
     details:
-      "This command resolves a set of tests from a prior agent run and prints or writes a testplan.json payload.",
+      "This command reads a prior agent output directory and prints focused JSON for a run summary, test list, findings list, or one test. Use --latest to query the latest recorded output for the project, or --from to query a specific output directory.",
     examples: [
-      ["agent select --from ./out/agent-output", "Print a test plan for the default review-targeted tests"],
-      ["agent select --latest --preset failed", "Print a test plan for failed tests from the latest project run"],
-      ["agent select --from ./out/agent-output --output ./testplan.json", "Write the selected test plan to a file"],
+      ["agent query --latest summary", "Print a summary for the latest agent output"],
+      ["agent query --from ./out/agent-output tests --status failed", "List failed tests from a prior output"],
+      [
+        "agent query --from ./out/agent-output findings --severity high",
+        "List high-severity findings from a prior output",
+      ],
+      [
+        'agent query --latest test --test "suite should pass" --include-markdown',
+        "Print one test summary with its per-test markdown",
+      ],
     ],
   });
 
+  view = Option.String({
+    required: false,
+    name: "Query view: summary, tests, findings, or test (default: summary)",
+  });
+
   cwd = Option.String("--cwd", {
     description:
       "The project directory used to resolve --latest and relative paths (default: current working directory)",
   });
 
   from = Option.String("--from", {
-    description: "The prior agent output directory to select tests from",
+    description: "The prior agent output directory to query",
   });
 
   latest = Option.Boolean("--latest", {
     description: "Use the latest recorded agent output for the current project cwd",
   });
 
-  preset = Option.String("--preset", {
-    description: "The selection preset: review, failed, unsuccessful, or all (default: review)",
+  statuses = Option.Array("--status", {
+    description: "Filter tests by status: failed, broken, unknown, skipped, or passed. Repeat for multiple statuses",
   });
 
   environments = Option.Array("--environment", {
-    description: "Filter selected tests by environment id. Repeat the option for multiple environments",
+    description: "Filter tests by environment id. Repeat the option for multiple environments",
   });
 
   labels = Option.Array("--label", {
-    description: "Filter selected tests by exact label name=value. Repeat the option for multiple filters",
+    description: "Filter tests by exact label name=value. Repeat the option for multiple filters",
   });
 
-  output = Option.String("--output,-o", {
-    description: "Write the resulting test plan to this file instead of printing it to stdout",
+  severities = Option.Array("--severity", {
+    description: "Filter findings by severity: high, warning, or info. Repeat for multiple severities",
   });
 
-  async execute() {
-    const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd());
-    const environments = readOptionalStringArray(this.environments);
-    const labels = readOptionalStringArray(this.labels);
-    const outputDir = await resolveAgentSelectionOutputDir({
-      cwd,
-      from: readOptionalString(this.from),
-      latest: readOptionalBoolean(this.latest),
-    });
-    const selection = await selectAgentTestPlan({
-      outputDir,
-      preset: normalizeAgentRerunPreset(readOptionalString(this.preset)),
-      environments: environments?.length ? environments : undefined,
-      labelFilters: parseAgentLabelFilters(labels),
-    });
-
-    if (!selection.testPlan.tests.length) {
-      console.error(`No tests matched selection in ${selection.outputDir}`);
-      exit(1);
-      return;
-    }
+  categories = Option.Array("--category", {
+    description: "Filter findings by category. Repeat the option for multiple categories",
+  });
 
-    const serialized = `${JSON.stringify(selection.testPlan, null, 2)}\n`;
+  checks = Option.Array("--check", {
+    description: "Filter findings by check name. Repeat the option for multiple checks",
+  });
 
-    const output = readOptionalString(this.output);
+  test = Option.String("--test", {
+    description: "Filter to one test by full name, test result id, history id, or markdown path",
+  });
 
-    if (!output) {
-      console.log(serialized.trimEnd());
-      return;
-    }
+  limit = Option.String("--limit", {
+    description: "Limit returned tests or findings to this non-negative count",
+  });
 
-    const outputPath = resolve(cwd, output);
+  includeMarkdown = Option.Boolean("--include-markdown", {
+    description: "Include the per-test markdown content for the test view",
+  });
 
-    await mkdir(dirname(outputPath), { recursive: true });
-    await writeFile(outputPath, serialized, "utf-8");
-    console.log(outputPath);
-  }
-}
+  async execute() {
+    try {
+      const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd());
+      const view = normalizeAgentQueryView(readOptionalString(this.view));
+      const outputDir = await resolveAgentSelectionOutputDir({
+        cwd,
+        from: readOptionalString(this.from),
+        latest: readOptionalBoolean(this.latest),
+      });
+      const output = await loadAgentOutput(outputDir);
+      const payload = await buildAgentQueryPayload(output, view, {
+        environments: normalizeRepeatedStringValues(readOptionalStringArray(this.environments)),
+        labelFilters: parseAgentLabelFilters(readOptionalStringArray(this.labels)),
+        statuses: normalizeRepeatedEnumValues(readOptionalStringArray(this.statuses), AGENT_TEST_STATUSES, "--status"),
+        severities: normalizeRepeatedEnumValues(
+          readOptionalStringArray(this.severities),
+          AGENT_FINDING_SEVERITIES,
+          "--severity",
+        ),
+        categories: normalizeRepeatedEnumValues(
+          readOptionalStringArray(this.categories),
+          AGENT_FINDING_CATEGORIES,
+          "--category",
+        ),
+        checks: normalizeRepeatedStringValues(readOptionalStringArray(this.checks)),
+        test: readOptionalString(this.test),
+        limit: normalizeAgentQueryLimit(readOptionalString(this.limit)),
+        includeMarkdown: readOptionalBoolean(this.includeMarkdown),
+      });
 
-export const executeAgentMode = async (params: {
-  configPath?: string;
-  cwd?: string;
-  output?: string;
-  expectations?: string;
-  environment?: string;
-  environmentName?: string;
-  silent?: boolean;
-  rerunFrom?: string;
-  rerunLatest?: boolean;
-  rerunPreset?: string;
-  rerunEnvironments?: string[];
-  rerunLabels?: string[];
-  args?: string[];
-}) => {
-  const {
-    configPath,
-    cwd: configuredCwd,
-    output,
-    expectations,
-    environment,
-    environmentName,
-    silent,
-    rerunFrom,
-    rerunLatest,
-    rerunPreset,
-    rerunEnvironments,
-    rerunLabels,
-    args,
-  } = params;
-
-  if (!args || !args.length) {
-    throw new UsageError("expecting command to be specified after --, e.g. allure agent -- npm run test");
+      console.log(JSON.stringify(payload, null, 2));
+    } catch (error) {
+      throwCliUsageError(error);
+    }
   }
+}
 
-  const command = args[0];
-  const commandArgs = args.slice(1);
-  const cwd = await realpath(configuredCwd ?? process.cwd());
-  const commandString = `${command} ${commandArgs.join(" ")}`;
-  const hasRerunSource = !!rerunFrom || !!rerunLatest;
-  const hasRerunFilters = !!rerunPreset || !!rerunEnvironments?.length || !!rerunLabels?.length;
-
-  if (!hasRerunSource && hasRerunFilters) {
-    throw new UsageError("Use rerun filters only together with --rerun-from <path> or --rerun-latest");
-  }
+export class AgentSelectCommand extends Command {
+  static paths = [["agent", "select"]];
 
-  const rerunContext = await createAgentTestPlanContext({
-    cwd,
-    from: rerunFrom,
-    latest: rerunLatest,
-    preset: normalizeAgentRerunPreset(rerunPreset),
-    environments: rerunEnvironments?.length ? rerunEnvironments : undefined,
-    labelFilters: parseAgentLabelFilters(rerunLabels),
+  static usage = Command.Usage({
+    description: "Select tests from an existing agent output and emit a test plan",
+    details:
+      "This command resolves a set of tests from a prior agent run and prints or writes a testplan.json payload. When --output is used, stdout contains the written test plan path, source output directory, preset, and selected test count.",
+    examples: [
+      ["agent select --from ./out/agent-output", "Print a test plan for the default review-targeted tests"],
+      ["agent select --latest --preset failed", "Print a test plan for failed tests from the latest project run"],
+      ["agent select --from ./out/agent-output --output ./testplan.json", "Write the selected test plan to a file"],
+    ],
   });
-  const childEnvironmentVariables = {
-    ...createChildAllureCliEnvironment("agent"),
-    ...(rerunContext ? { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } : {}),
-  };
 
-  try {
-    if (getActiveAllureCliCommand()) {
-      console.log(commandString);
+  cwd = Option.String("--cwd", {
+    description:
+      "The project directory used to resolve --latest and relative paths (default: current working directory)",
+  });
 
-      const exitCode = await executeNestedAllureCommand({
-        command,
-        commandArgs,
-        cwd,
-        ...(rerunContext ? { environmentVariables: { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } } : {}),
-        silent,
-      });
+  from = Option.String("--from", {
+    description: "The prior agent output directory to select tests from",
+  });
 
-      exit(exitCode ?? -1);
-      return;
-    }
+  latest = Option.Boolean("--latest", {
+    description: "Use the latest recorded agent output for the current project cwd",
+  });
 
-    const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-"));
-    const expectationsPath = expectations ? resolve(cwd, expectations) : undefined;
-    const environmentOptions = {
-      environment,
-      environmentName,
-    };
+  preset = Option.String("--preset", {
+    description: "The selection preset: review, failed, unsuccessful, or all (default: review)",
+  });
 
-    normalizeCommandEnvironmentOptions(environmentOptions);
+  environments = Option.Array("--environment", {
+    description: "Filter selected tests by environment id. Repeat the option for multiple environments",
+  });
 
-    if (expectationsPath && isPathInside(outputDir, expectationsPath)) {
-      throw new UsageError(
-        `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`,
-      );
-    }
+  labels = Option.Array("--label", {
+    description: "Filter selected tests by exact label name=value. Repeat the option for multiple filters",
+  });
 
-    const config = await readConfig(cwd, configPath, {
-      output: outputDir,
-      plugins: {
-        agent: {
-          options: {
-            outputDir,
-          },
-        },
-      },
-    });
-    const resolvedEnvironment = resolveCommandEnvironment(config, environmentOptions);
+  output = Option.String("--output,-o", {
+    description: "Write the resulting test plan to this file instead of printing it to stdout",
+  });
 
+  async execute() {
     try {
-      await rm(outputDir, { recursive: true });
-    } catch (error) {
-      if (!isFileNotFoundError(error)) {
-        console.error("could not clean output directory", error);
+      const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd());
+      const environments = readOptionalStringArray(this.environments);
+      const labels = readOptionalStringArray(this.labels);
+      const outputDir = await resolveAgentSelectionOutputDir({
+        cwd,
+        from: readOptionalString(this.from),
+        latest: readOptionalBoolean(this.latest),
+      });
+      const selection = await selectAgentTestPlan({
+        outputDir,
+        preset: normalizeAgentRerunPreset(readOptionalString(this.preset)),
+        environments: environments?.length ? environments : undefined,
+        labelFilters: parseAgentLabelFilters(labels),
+      });
+
+      if (!selection.testPlan.tests.length) {
+        console.error(`No tests matched selection in ${selection.outputDir}`);
+        exit(1);
+        return;
       }
-    }
 
-    const startedAt = new Date().toISOString();
+      const serialized = `${JSON.stringify(selection.testPlan, null, 2)}\n`;
+      const output = readOptionalString(this.output);
 
-    await persistLatestAgentState({
-      cwd,
-      outputDir,
-      expectationsPath,
-      command: commandString,
-      startedAt,
-      status: "running",
-    });
+      if (!output) {
+        console.log(serialized.trimEnd());
+        return;
+      }
 
-    console.log(`agent output: ${outputDir}`);
-    if (expectationsPath) {
-      console.log(`agent expectations: ${expectationsPath}`);
-    }
-    console.log(commandString);
-
-    const allureReport = new AllureReport({
-      ...config,
-      output: outputDir,
-      environment: resolvedEnvironment?.id,
-      open: false,
-      port: undefined,
-      qualityGate: undefined,
-      allureService: undefined,
-      realTime: false,
-      plugins: config.plugins,
-    });
-    const knownIssues = await allureReport.store.allKnownIssues();
-
-    const { globalExitCode } = await withProcessEnv(
-      {
-        ALLURE_AGENT_OUTPUT: outputDir,
-        ALLURE_AGENT_EXPECTATIONS: expectationsPath,
-        ALLURE_AGENT_COMMAND: commandString,
-        ALLURE_AGENT_PROJECT_ROOT: cwd,
-        ALLURE_AGENT_NAME: undefined,
-        ALLURE_AGENT_LOOP_ID: undefined,
-        ALLURE_AGENT_TASK_ID: undefined,
-        ALLURE_AGENT_CONVERSATION_ID: undefined,
-      },
-      async () =>
-        await executeAllureRun({
-          allureReport,
-          knownIssues,
-          cwd,
-          command,
-          commandArgs,
-          environmentVariables: childEnvironmentVariables,
-          environment: resolvedEnvironment?.id,
-          withQualityGate: false,
-          logs: "pipe",
-          silent,
-          ignoreLogs: false,
-          logProcessExit: false,
-        }),
-    );
+      const outputPath = resolve(cwd, output);
 
-    await persistLatestAgentState({
-      cwd,
-      outputDir,
-      expectationsPath,
-      command: commandString,
-      startedAt,
-      finishedAt: new Date().toISOString(),
-      status: "finished",
-      exitCode: globalExitCode.actual ?? globalExitCode.original,
-    });
-
-    exit(globalExitCode.actual ?? globalExitCode.original);
-  } finally {
-    await rerunContext?.cleanup();
+      await mkdir(dirname(outputPath), { recursive: true });
+      await writeFile(outputPath, serialized, "utf-8");
+      console.log(`agent testplan: ${outputPath}`);
+      console.log(`agent selection source: ${selection.outputDir}`);
+      console.log(`agent selection preset: ${selection.preset}`);
+      console.log(`agent selection tests: ${selection.selectedTests.length}`);
+    } catch (error) {
+      throwCliUsageError(error);
+    }
   }
-};
+}
diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts
index 3aff1f40389..571ff73e646 100644
--- a/packages/cli/src/commands/run.ts
+++ b/packages/cli/src/commands/run.ts
@@ -1,6 +1,5 @@
 import * as console from "node:console";
 import { realpath, rm } from "node:fs/promises";
-import { resolve } from "node:path";
 import process, { exit } from "node:process";
 
 import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core";
@@ -16,7 +15,6 @@ import {
   resolveCommandEnvironment,
 } from "../utils/environment.js";
 import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js";
-import { executeAgentMode } from "./agent.js";
 import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js";
 
 export class RunCommand extends Command {
@@ -105,24 +103,6 @@ export class RunCommand extends Command {
       throw new UsageError("expecting command to be specified after --, e.g. allure run -- npm run test");
     }
 
-    const legacyAgentOutput = process.env.ALLURE_AGENT_OUTPUT;
-
-    if (legacyAgentOutput) {
-      await executeAgentMode({
-        configPath: this.config,
-        cwd: this.cwd,
-        output: resolve(process.cwd(), legacyAgentOutput),
-        expectations: process.env.ALLURE_AGENT_EXPECTATIONS
-          ? resolve(process.cwd(), process.env.ALLURE_AGENT_EXPECTATIONS)
-          : undefined,
-        environment: this.environment,
-        environmentName: this.environmentName,
-        silent: this.silent,
-        args,
-      });
-      return;
-    }
-
     const before = new Date().getTime();
 
     process.on("exit", (exitCode) => {
diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts
index 031b3ec44f0..bf0cc3ef344 100644
--- a/packages/cli/src/index.ts
+++ b/packages/cli/src/index.ts
@@ -1,11 +1,14 @@
 import { readFileSync } from "node:fs";
-import { argv } from "node:process";
+import process, { argv } from "node:process";
 
 import { Builtins, Cli } from "clipanion";
 
 import {
   AgentCommand,
+  AGENT_TASK_MAP_HELP,
+  AgentCapabilitiesCommand,
   AgentLatestCommand,
+  AgentQueryCommand,
   AgentSelectCommand,
   AgentStateDirCommand,
   Allure2Command,
@@ -27,6 +30,7 @@ import {
   SlackCommand,
   TestPlanCommand,
   WatchCommand,
+  isAgentTaskMapHelpRequest,
 } from "./commands/index.js";
 
 const [node, app, ...args] = argv;
@@ -43,7 +47,9 @@ const cli = new Cli({
 
 cli.register(AwesomeCommand);
 cli.register(Allure2Command);
+cli.register(AgentCapabilitiesCommand);
 cli.register(AgentLatestCommand);
+cli.register(AgentQueryCommand);
 cli.register(AgentSelectCommand);
 cli.register(AgentStateDirCommand);
 cli.register(AgentCommand);
@@ -66,7 +72,19 @@ cli.register(ResultsPackCommand);
 cli.register(ResultsUnpackCommand);
 cli.register(Builtins.HelpCommand);
 cli.register(Builtins.VersionCommand);
-cli.runExit(args);
+void cli
+  .run(args)
+  .then((exitCode) => {
+    if (exitCode === 0 && isAgentTaskMapHelpRequest(args)) {
+      process.stdout.write(`\n${AGENT_TASK_MAP_HELP}`);
+    }
+
+    process.exitCode = exitCode;
+  })
+  .catch((error: unknown) => {
+    console.error(error);
+    process.exitCode = 1;
+  });
 
 export { type Config as AllureConfig, defineConfig } from "@allurereport/plugin-api";
 export { defaultChartsConfig } from "@allurereport/charts-api";
diff --git a/packages/cli/src/utils/index.ts b/packages/cli/src/utils/index.ts
index a13db4c6f55..1513f39e825 100644
--- a/packages/cli/src/utils/index.ts
+++ b/packages/cli/src/utils/index.ts
@@ -2,6 +2,4 @@ export * from "./process.js";
 export * from "./terminal.js";
 export * from "./logs.js";
 export * from "./execution-context.js";
-export * from "./agent-state.js";
-export * from "./agent-select.js";
 export * from "./fileSystem.js";
diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts
index 9c32fb8bbe6..6de69f72c5c 100644
--- a/packages/cli/test/commands/agent.test.ts
+++ b/packages/cli/test/commands/agent.test.ts
@@ -1,14 +1,29 @@
 import { resolve } from "node:path";
 
 import { readConfig } from "@allurereport/core";
+import {
+  AgentExpectationUsageError,
+  AgentUsageError,
+  buildAgentInlineExpectations,
+  createAgentTestPlanContext,
+  validateAgentExpectationsFile,
+  writeInvalidAgentExpectationOutput,
+  writeLatestAgentState,
+} from "@allurereport/plugin-agent";
 import { epic, feature, label, story } from "allure-js-commons";
 import { run, UsageError } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
-import { AgentCommand } from "../../src/commands/agent.js";
+import {
+  AgentCommand,
+  AgentCapabilitiesCommand,
+  AgentLatestCommand,
+  AgentQueryCommand,
+  AgentSelectCommand,
+  AgentStateDirCommand,
+  createAgentCapabilities,
+} from "../../src/commands/agent.js";
 import { executeAllureRun, executeNestedAllureCommand } from "../../src/commands/commons/run.js";
-import { createAgentTestPlanContext } from "../../src/utils/agent-select.js";
-import { writeLatestAgentState } from "../../src/utils/agent-state.js";
 import { ALLURE_CLI_ACTIVE_COMMAND_ENV } from "../../src/utils/execution-context.js";
 
 const { exitMock } = vi.hoisted(() => {
@@ -29,8 +44,11 @@ vi.mock("node:process", async (importOriginal) => ({
 vi.mock("node:fs/promises", async (importOriginal) => ({
   ...(await importOriginal()),
   realpath: vi.fn().mockResolvedValue("/cwd"),
+  readFile: vi.fn().mockResolvedValue("goal: valid file expectations\n"),
   mkdtemp: vi.fn().mockResolvedValue("/tmp/allure-agent-123"),
   rm: vi.fn().mockResolvedValue(undefined),
+  mkdir: vi.fn().mockResolvedValue(undefined),
+  writeFile: vi.fn().mockResolvedValue(undefined),
 }));
 vi.mock("@allurereport/core", async () => {
   const { AllureReportMock } = await import("../utils.js");
@@ -51,27 +69,42 @@ vi.mock("../../src/commands/commons/run.js", () => ({
   }),
   executeNestedAllureCommand: vi.fn().mockResolvedValue(0),
 }));
-vi.mock("../../src/utils/agent-state.js", () => ({
-  resolveAgentStateDir: vi.fn().mockReturnValue("/tmp/allure-agent-state-0f0810f05e3f7d8f"),
-  writeLatestAgentState: vi.fn().mockResolvedValue(undefined),
-  readLatestAgentState: vi.fn().mockResolvedValue(undefined),
-}));
-vi.mock("../../src/utils/agent-select.js", () => ({
-  normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"),
-  parseAgentLabelFilters: vi.fn((values?: string[]) =>
-    (values ?? []).map((value) => {
-      const [name, filterValue] = value.split("=");
-
-      return {
-        name,
-        value: filterValue,
-      };
+vi.mock("@allurereport/plugin-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@allurereport/plugin-agent")>();
+
+  return {
+    ...actual,
+    resolveAgentStateDir: vi.fn().mockReturnValue("/tmp/allure-agent-state-0f0810f05e3f7d8f"),
+    writeLatestAgentState: vi.fn().mockResolvedValue(undefined),
+    readLatestAgentState: vi.fn().mockResolvedValue(undefined),
+    normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"),
+    parseAgentLabelFilters: vi.fn((values?: string[]) =>
+      (values ?? []).map((value) => {
+        const [name, filterValue] = value.split("=");
+
+        return {
+          name,
+          value: filterValue,
+        };
+      }),
+    ),
+    resolveAgentSelectionOutputDir: vi.fn(),
+    selectAgentTestPlan: vi.fn(),
+    createAgentTestPlanContext: vi.fn().mockResolvedValue(undefined),
+    buildAgentInlineExpectations: vi.fn((options: Record<string, unknown>) =>
+      Object.values(options).some((value) =>
+        Array.isArray(value) ? value.length > 0 : typeof value === "string" && value.length > 0,
+      )
+        ? { goal: "mock inline expectations" }
+        : undefined,
+    ),
+    validateAgentExpectationsFile: vi.fn().mockResolvedValue(undefined),
+    writeInvalidAgentExpectationOutput: vi.fn().mockResolvedValue({
+      outputDir: "/tmp/allure-agent-123",
+      generatedAt: "2026-06-10T16:00:00.000Z",
     }),
-  ),
-  resolveAgentSelectionOutputDir: vi.fn(),
-  selectAgentTestPlan: vi.fn(),
-  createAgentTestPlanContext: vi.fn().mockResolvedValue(undefined),
-}));
+  };
+});
 
 beforeEach(async () => {
   await epic("coverage");
@@ -83,6 +116,37 @@ beforeEach(async () => {
 
   const { AllureReportMock } = await import("../utils.js");
 
+  (executeAllureRun as Mock).mockReset();
+  (executeNestedAllureCommand as Mock).mockReset();
+  (writeLatestAgentState as Mock).mockReset();
+  (createAgentTestPlanContext as Mock).mockReset();
+  (buildAgentInlineExpectations as Mock).mockReset();
+  (validateAgentExpectationsFile as Mock).mockReset();
+  (writeInvalidAgentExpectationOutput as Mock).mockReset();
+  (readConfig as Mock).mockReset();
+
+  (executeAllureRun as Mock).mockResolvedValue({
+    globalExitCode: {
+      original: 0,
+      actual: undefined,
+    },
+    testProcessResult: null,
+  });
+  (executeNestedAllureCommand as Mock).mockResolvedValue(0);
+  (writeLatestAgentState as Mock).mockResolvedValue(undefined);
+  (createAgentTestPlanContext as Mock).mockResolvedValue(undefined);
+  (buildAgentInlineExpectations as Mock).mockImplementation((options: Record<string, unknown>) =>
+    Object.values(options).some((value) =>
+      Array.isArray(value) ? value.length > 0 : typeof value === "string" && value.length > 0,
+    )
+      ? { goal: "mock inline expectations" }
+      : undefined,
+  );
+  (validateAgentExpectationsFile as Mock).mockResolvedValue(undefined);
+  (writeInvalidAgentExpectationOutput as Mock).mockResolvedValue({
+    outputDir: "/tmp/allure-agent-123",
+    generatedAt: "2026-06-10T16:00:00.000Z",
+  });
   AllureReportMock.prototype.store = {
     allKnownIssues: vi.fn().mockResolvedValue([]),
   };
@@ -109,6 +173,123 @@ beforeEach(async () => {
 });
 
 describe("agent command", () => {
+  const stripAnsi = (value: string) => value.replace(new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, "g"), "");
+
+  const captureAgentHelp = async (args: string[]) => {
+    const stdout = { write: vi.fn() };
+
+    const exitCode = await run(
+      { binaryName: "allure" },
+      [
+        AgentCapabilitiesCommand,
+        AgentLatestCommand,
+        AgentQueryCommand,
+        AgentSelectCommand,
+        AgentStateDirCommand,
+        AgentCommand,
+      ],
+      args,
+      {
+        stdout: stdout as unknown as NodeJS.WritableStream,
+      },
+    );
+
+    expect(exitCode).toBe(0);
+
+    return stripAnsi(stdout.write.mock.calls.map(([chunk]) => String(chunk)).join(""));
+  };
+
+  it.each([
+    {
+      command: "agent",
+      args: ["agent", "--help"],
+      expected: [
+        "Multiple commands match your selection:",
+        "allure agent capabilities",
+        "allure agent latest",
+        "allure agent query",
+        "allure agent select",
+        "allure agent state-dir",
+        "allure agent [--config",
+        "--expect-tests #0",
+        "--expect-label #0",
+        "--expect-test #0",
+        "--expect-step-containing #0",
+        "--rerun-latest",
+        "Run again with -h=<index>",
+      ],
+    },
+    {
+      command: "agent capabilities",
+      args: ["agent", "capabilities", "--help"],
+      expected: ["Print structured Allure agent capability information", "$ allure agent capabilities", "--json"],
+    },
+    {
+      command: "agent query",
+      args: ["agent", "query", "--help"],
+      expected: [
+        "Query an existing Allure agent output directory as focused JSON",
+        "$ allure agent query",
+        "--latest",
+        "--from #0",
+        "--status #0",
+        "--severity #0",
+        "--include-markdown",
+      ],
+    },
+    {
+      command: "agent select",
+      args: ["agent", "select", "--help"],
+      expected: [
+        "Select tests from an existing agent output and emit a test plan",
+        "$ allure agent select",
+        "--latest",
+        "--preset #0",
+        "--environment #0",
+        "--label #0",
+        "--output,-o #0",
+      ],
+    },
+    {
+      command: "agent latest",
+      args: ["agent", "latest", "--help"],
+      expected: [
+        "Print the latest Allure agent output directory and index path for the current project",
+        "$ allure agent latest",
+        "--cwd #0",
+      ],
+    },
+    {
+      command: "agent state-dir",
+      args: ["agent", "state-dir", "--help"],
+      expected: [
+        "Print the Allure agent state directory for the current project",
+        "$ allure agent state-dir",
+        "--cwd #0",
+      ],
+    },
+  ])("should expose $command help for local capability detection", async ({ args, expected }) => {
+    const output = await captureAgentHelp(args);
+
+    expected.forEach((line) => {
+      expect(output).toContain(line);
+    });
+  });
+
+  it("should print structured agent capabilities as JSON", async () => {
+    const consoleModule = await import("node:console");
+    const logMock = consoleModule.log as Mock;
+
+    const exitCode = await run(AgentCapabilitiesCommand, ["agent", "capabilities", "--json"]);
+
+    expect(exitCode).toBe(0);
+    expect(logMock).toHaveBeenCalledTimes(1);
+
+    const payload = JSON.parse(logMock.mock.calls[0][0]) as ReturnType<typeof createAgentCapabilities>;
+
+    expect(payload).toEqual(createAgentCapabilities());
+  });
+
   it("should fail with usage error when command to run is missing", async () => {
     const command = new AgentCommand();
 
@@ -117,12 +298,13 @@ describe("agent command", () => {
     await expect(command.execute()).rejects.toBeInstanceOf(UsageError);
   });
 
-  it("should reject expectations files placed inside the output directory", async () => {
+  it("should translate plugin-agent expectation file validation failures to usage errors", async () => {
     const command = new AgentCommand();
 
     command.output = "./custom-output";
     command.expectations = "./custom-output/expected.yaml";
     command.commandToRun = ["--", "npm", "test"];
+    (validateAgentExpectationsFile as Mock).mockRejectedValueOnce(new AgentUsageError("invalid expectation path"));
 
     await expect(command.execute()).rejects.toBeInstanceOf(UsageError);
 
@@ -143,6 +325,7 @@ describe("agent command", () => {
         agent: {
           options: {
             outputDir: "/tmp/allure-agent-123",
+            command: "npm test",
           },
         },
       },
@@ -176,7 +359,8 @@ describe("agent command", () => {
       }),
     );
     expect(logMock).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123");
-    expect(logMock).toHaveBeenNthCalledWith(2, "npm test");
+    expect(logMock).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md");
+    expect(logMock).toHaveBeenNthCalledWith(3, "npm test");
     expect(logMock.mock.invocationCallOrder[0]).toBeLessThan((executeAllureRun as Mock).mock.invocationCallOrder[0]);
     expect(writeLatestAgentState).toHaveBeenNthCalledWith(
       1,
@@ -246,14 +430,170 @@ describe("agent command", () => {
         agent: {
           options: {
             outputDir: resolvedOutput,
+            command: "npm test",
+            expectationsPath: resolvedExpectations,
           },
         },
       },
     });
     expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${resolvedOutput}`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${resolvedOutput}/index.md`);
     expect(consoleModule.log).toHaveBeenCalledWith(`agent expectations: ${resolvedExpectations}`);
   });
 
+  it("should pass inline expectation options to plugin-agent and readConfig", async () => {
+    const consoleModule = await import("node:console");
+
+    await run(AgentCommand, [
+      "agent",
+      "--goal",
+      "Review agent visibility",
+      "--task-id",
+      "agent-inline",
+      "--expect-tests",
+      "2",
+      "--expect-label",
+      "module=plugin-agent",
+      "--expect-env",
+      "node",
+      "--expect-test",
+      "suite should pass",
+      "--expect-prefix",
+      "suite",
+      "--forbid-label",
+      "layer=e2e",
+      "--expect-step-containing",
+      "assert expected behavior",
+      "--expect-steps",
+      "1",
+      "--expect-attachments",
+      "1",
+      "--expect-attachment",
+      "trace.zip",
+      "--expect-attachment",
+      "content-type=application/json",
+      "--",
+      "npm",
+      "test",
+    ]);
+
+    expect(buildAgentInlineExpectations).toHaveBeenCalledWith({
+      goal: ["Review agent visibility"],
+      taskId: ["agent-inline"],
+      expectTests: ["2"],
+      expectLabels: ["module=plugin-agent"],
+      expectEnvironments: ["node"],
+      expectFullNames: ["suite should pass"],
+      expectPrefixes: ["suite"],
+      forbidLabels: ["layer=e2e"],
+      expectStepContains: ["assert expected behavior"],
+      expectSteps: ["1"],
+      expectAttachments: ["1"],
+      expectAttachmentFilters: ["trace.zip", "content-type=application/json"],
+    });
+    expect(readConfig).toHaveBeenCalledWith(
+      "/cwd",
+      undefined,
+      expect.objectContaining({
+        plugins: {
+          agent: {
+            options: expect.objectContaining({
+              expectations: { goal: "mock inline expectations" },
+            }),
+          },
+        },
+      }),
+    );
+    expect(consoleModule.log).toHaveBeenCalledWith("agent expectations: CLI options");
+    expect(exitMock).toHaveBeenCalledWith(0);
+  });
+
+  it("should reject mixing an expectations file with inline expectation flags", async () => {
+    const consoleModule = await import("node:console");
+    const command = new AgentCommand();
+
+    command.expectations = "./expected.yaml";
+    command.goal = ["Review"];
+    command.commandToRun = ["--", "npm", "test"];
+
+    await command.execute();
+
+    expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({
+      outputDir: "/tmp/allure-agent-123",
+      command: "npm test",
+      error: expect.any(AgentExpectationUsageError),
+    });
+    expect(consoleModule.error).toHaveBeenCalledWith("Use either --expectations <file> or inline expectation flags, not both");
+    expect(executeAllureRun).not.toHaveBeenCalled();
+    expect(exitMock).toHaveBeenCalledWith(1);
+  });
+
+  it("should write invalid agent output when plugin-agent inline expectation parsing fails", async () => {
+    const consoleModule = await import("node:console");
+    const command = new AgentCommand();
+    const outputDir = resolve("/cwd", "./agent-invalid");
+    const error = new AgentExpectationUsageError(
+      'Invalid --expect-label "module". Expected the form name=value, for example module=cli',
+      "--expect-label",
+    );
+
+    (buildAgentInlineExpectations as Mock).mockImplementationOnce(() => {
+      throw error;
+    });
+
+    command.output = "./agent-invalid";
+    command.expectLabels = ["module"];
+    command.commandToRun = ["--", "npm", "test"];
+
+    await command.execute();
+
+    expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({
+      outputDir,
+      command: "npm test",
+      error,
+    });
+    expect(readConfig).not.toHaveBeenCalled();
+    expect(executeAllureRun).not.toHaveBeenCalled();
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`);
+    expect(consoleModule.error).toHaveBeenCalledWith(
+      'Invalid --expect-label "module". Expected the form name=value, for example module=cli',
+    );
+    expect(exitMock).toHaveBeenCalledWith(1);
+  });
+
+  it("should write invalid agent output when plugin-agent expectation file validation fails", async () => {
+    const consoleModule = await import("node:console");
+    const command = new AgentCommand();
+    const outputDir = resolve("/cwd", "./agent-invalid-file");
+    const error = new AgentExpectationUsageError(
+      "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object",
+      "--expectations",
+    );
+
+    (validateAgentExpectationsFile as Mock).mockRejectedValueOnce(error);
+
+    command.output = "./agent-invalid-file";
+    command.expectations = "./expected.yaml";
+    command.commandToRun = ["--", "npm", "test"];
+
+    await command.execute();
+
+    expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({
+      outputDir,
+      command: "npm test",
+      error,
+    });
+    expect(readConfig).not.toHaveBeenCalled();
+    expect(executeAllureRun).not.toHaveBeenCalled();
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`);
+    expect(consoleModule.error).toHaveBeenCalledWith(
+      "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object",
+    );
+    expect(exitMock).toHaveBeenCalledWith(1);
+  });
+
   it("should pass ALLURE_TESTPLAN_PATH to the child process when rerun-from is enabled", async () => {
     const cleanupMock = vi.fn().mockResolvedValue(undefined);
 
@@ -338,36 +678,10 @@ describe("agent command", () => {
     delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV];
   });
 
-  it("should sandbox ALLURE_AGENT_* variables during execution and restore them afterwards", async () => {
+  it("should pass agent metadata to the plugin through options", async () => {
     const resolvedOutput = resolve("/cwd", "./custom-output");
     const resolvedExpectations = resolve("/cwd", "./expected.yaml");
 
-    process.env.ALLURE_AGENT_OUTPUT = "ambient-output";
-    process.env.ALLURE_AGENT_EXPECTATIONS = "ambient-expected";
-    process.env.ALLURE_AGENT_NAME = "ambient-name";
-    process.env.ALLURE_AGENT_LOOP_ID = "ambient-loop";
-    process.env.ALLURE_AGENT_TASK_ID = "ambient-task";
-    process.env.ALLURE_AGENT_CONVERSATION_ID = "ambient-conversation";
-
-    (executeAllureRun as Mock).mockImplementationOnce(async () => {
-      expect(process.env.ALLURE_AGENT_OUTPUT).toBe(resolvedOutput);
-      expect(process.env.ALLURE_AGENT_EXPECTATIONS).toBe(resolvedExpectations);
-      expect(process.env.ALLURE_AGENT_COMMAND).toBe("npm test");
-      expect(process.env.ALLURE_AGENT_PROJECT_ROOT).toBe("/cwd");
-      expect(process.env.ALLURE_AGENT_NAME).toBeUndefined();
-      expect(process.env.ALLURE_AGENT_LOOP_ID).toBeUndefined();
-      expect(process.env.ALLURE_AGENT_TASK_ID).toBeUndefined();
-      expect(process.env.ALLURE_AGENT_CONVERSATION_ID).toBeUndefined();
-
-      return {
-        globalExitCode: {
-          original: 0,
-          actual: undefined,
-        },
-        testProcessResult: null,
-      };
-    });
-
     await run(AgentCommand, [
       "agent",
       "--output",
@@ -379,18 +693,17 @@ describe("agent command", () => {
       "test",
     ]);
 
-    expect(process.env.ALLURE_AGENT_OUTPUT).toBe("ambient-output");
-    expect(process.env.ALLURE_AGENT_EXPECTATIONS).toBe("ambient-expected");
-    expect(process.env.ALLURE_AGENT_NAME).toBe("ambient-name");
-    expect(process.env.ALLURE_AGENT_LOOP_ID).toBe("ambient-loop");
-    expect(process.env.ALLURE_AGENT_TASK_ID).toBe("ambient-task");
-    expect(process.env.ALLURE_AGENT_CONVERSATION_ID).toBe("ambient-conversation");
-
-    delete process.env.ALLURE_AGENT_OUTPUT;
-    delete process.env.ALLURE_AGENT_EXPECTATIONS;
-    delete process.env.ALLURE_AGENT_NAME;
-    delete process.env.ALLURE_AGENT_LOOP_ID;
-    delete process.env.ALLURE_AGENT_TASK_ID;
-    delete process.env.ALLURE_AGENT_CONVERSATION_ID;
+    expect(readConfig).toHaveBeenCalledWith("/cwd", undefined, {
+      output: resolvedOutput,
+      plugins: {
+        agent: {
+          options: {
+            outputDir: resolvedOutput,
+            command: "npm test",
+            expectationsPath: resolvedExpectations,
+          },
+        },
+      },
+    });
   });
 });
diff --git a/packages/cli/test/commands/agentLatest.test.ts b/packages/cli/test/commands/agentLatest.test.ts
index d0145e2f8fb..dff18eac62a 100644
--- a/packages/cli/test/commands/agentLatest.test.ts
+++ b/packages/cli/test/commands/agentLatest.test.ts
@@ -1,9 +1,9 @@
+import { readLatestAgentState, resolveAgentStateDir } from "@allurereport/plugin-agent";
 import { epic, feature, label, story } from "allure-js-commons";
 import { run } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { AgentLatestCommand, AgentStateDirCommand } from "../../src/commands/agent.js";
-import { readLatestAgentState, resolveAgentStateDir } from "../../src/utils/agent-state.js";
 
 vi.mock("node:console", async (importOriginal) => ({
   ...(await importOriginal()),
@@ -18,11 +18,16 @@ vi.mock("node:fs/promises", async (importOriginal) => ({
   ...(await importOriginal()),
   realpath: vi.fn().mockResolvedValue("/cwd"),
 }));
-vi.mock("../../src/utils/agent-state.js", () => ({
-  readLatestAgentState: vi.fn(),
-  resolveAgentStateDir: vi.fn(),
-  writeLatestAgentState: vi.fn(),
-}));
+vi.mock("@allurereport/plugin-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@allurereport/plugin-agent")>();
+
+  return {
+    ...actual,
+    readLatestAgentState: vi.fn(),
+    resolveAgentStateDir: vi.fn(),
+    writeLatestAgentState: vi.fn(),
+  };
+});
 
 beforeEach(async () => {
   await epic("coverage");
@@ -33,7 +38,7 @@ beforeEach(async () => {
 });
 
 describe("agent latest command", () => {
-  it("should print the latest output directory for the resolved project cwd", async () => {
+  it("should print the latest output directory and index path for the resolved project cwd", async () => {
     const consoleModule = await import("node:console");
 
     (readLatestAgentState as Mock).mockResolvedValueOnce({
@@ -48,7 +53,8 @@ describe("agent latest command", () => {
     await run(AgentLatestCommand, ["agent", "latest"]);
 
     expect(readLatestAgentState).toHaveBeenCalledWith("/cwd");
-    expect(consoleModule.log).toHaveBeenCalledWith("/tmp/allure-agent-123");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md");
   });
 
   it("should exit with code 1 when no latest output exists for the project", async () => {
diff --git a/packages/cli/test/commands/agentQuery.test.ts b/packages/cli/test/commands/agentQuery.test.ts
new file mode 100644
index 00000000000..f6a53254090
--- /dev/null
+++ b/packages/cli/test/commands/agentQuery.test.ts
@@ -0,0 +1,199 @@
+import {
+  AgentUsageError,
+  buildAgentQueryPayload,
+  loadAgentOutput,
+  resolveAgentSelectionOutputDir,
+  type AgentOutputBundle,
+} from "@allurereport/plugin-agent";
+import { epic, feature, label, story } from "allure-js-commons";
+import { run } from "clipanion";
+import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
+
+import { AgentQueryCommand } from "../../src/commands/agent.js";
+
+vi.mock("node:console", async (importOriginal) => ({
+  ...(await importOriginal()),
+  log: vi.fn(),
+  error: vi.fn(),
+}));
+vi.mock("node:fs/promises", async (importOriginal) => ({
+  ...(await importOriginal()),
+  realpath: vi.fn().mockResolvedValue("/cwd"),
+}));
+vi.mock("@allurereport/plugin-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@allurereport/plugin-agent")>();
+
+  return {
+    ...actual,
+    buildAgentQueryPayload: vi.fn(),
+    loadAgentOutput: vi.fn(),
+    resolveAgentSelectionOutputDir: vi.fn(),
+  };
+});
+
+const agentOutput = {
+  outputDir: "/tmp/agent-output",
+} as AgentOutputBundle;
+
+const readLoggedJson = async <T>() => {
+  const consoleModule = await import("node:console");
+  const logMock = consoleModule.log as Mock;
+
+  expect(logMock).toHaveBeenCalledTimes(1);
+
+  return JSON.parse(logMock.mock.calls[0][0]) as T;
+};
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("agentQuery");
+  await label("coverage", "agent-mode");
+  vi.clearAllMocks();
+  (resolveAgentSelectionOutputDir as Mock).mockResolvedValue("/tmp/agent-output");
+  (loadAgentOutput as Mock).mockResolvedValue(agentOutput);
+  (buildAgentQueryPayload as Mock).mockResolvedValue({
+    schema: "allure-agent-query/v1",
+    view: "summary",
+    output_dir: "/tmp/agent-output",
+  });
+});
+
+describe("agent query command", () => {
+  it("should resolve the latest output and print the plugin-agent summary payload", async () => {
+    await run(AgentQueryCommand, ["agent", "query", "--latest", "summary"]);
+
+    expect(resolveAgentSelectionOutputDir).toHaveBeenCalledWith({
+      cwd: "/cwd",
+      from: undefined,
+      latest: true,
+    });
+    expect(loadAgentOutput).toHaveBeenCalledWith("/tmp/agent-output");
+    expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "summary", {
+      environments: undefined,
+      labelFilters: [],
+      statuses: undefined,
+      severities: undefined,
+      categories: undefined,
+      checks: undefined,
+      test: undefined,
+      limit: undefined,
+      includeMarkdown: false,
+    });
+
+    await expect(readLoggedJson()).resolves.toEqual({
+      schema: "allure-agent-query/v1",
+      view: "summary",
+      output_dir: "/tmp/agent-output",
+    });
+  });
+
+  it("should pass test query filters to plugin-agent", async () => {
+    (buildAgentQueryPayload as Mock).mockResolvedValueOnce({
+      schema: "allure-agent-query/v1",
+      view: "tests",
+      output_dir: "/tmp/agent-output",
+      tests: [],
+    });
+
+    await run(AgentQueryCommand, [
+      "agent",
+      "query",
+      "tests",
+      "--from",
+      "./agent-output",
+      "--status",
+      "failed",
+      "--label",
+      "module=cli",
+      "--limit",
+      "1",
+    ]);
+
+    expect(resolveAgentSelectionOutputDir).toHaveBeenCalledWith({
+      cwd: "/cwd",
+      from: "./agent-output",
+      latest: false,
+    });
+    expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "tests", {
+      environments: undefined,
+      labelFilters: [{ name: "module", value: "cli" }],
+      statuses: ["failed"],
+      severities: undefined,
+      categories: undefined,
+      checks: undefined,
+      test: undefined,
+      limit: 1,
+      includeMarkdown: false,
+    });
+
+    await expect(readLoggedJson()).resolves.toEqual(
+      expect.objectContaining({
+        view: "tests",
+      }),
+    );
+  });
+
+  it("should pass finding query filters to plugin-agent", async () => {
+    await run(AgentQueryCommand, [
+      "agent",
+      "query",
+      "findings",
+      "--from",
+      "./agent-output",
+      "--severity",
+      "high",
+      "--category",
+      "scope",
+      "--check",
+      "expected-label-missing",
+      "--test",
+      "suite should fail",
+    ]);
+
+    expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "findings", {
+      environments: undefined,
+      labelFilters: [],
+      statuses: undefined,
+      severities: ["high"],
+      categories: ["scope"],
+      checks: ["expected-label-missing"],
+      test: "suite should fail",
+      limit: undefined,
+      includeMarkdown: false,
+    });
+  });
+
+  it("should pass one-test markdown requests to plugin-agent", async () => {
+    await run(AgentQueryCommand, [
+      "agent",
+      "query",
+      "test",
+      "--from",
+      "./agent-output",
+      "--test",
+      "suite should fail",
+      "--include-markdown",
+    ]);
+
+    expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "test", {
+      environments: undefined,
+      labelFilters: [],
+      statuses: undefined,
+      severities: undefined,
+      categories: undefined,
+      checks: undefined,
+      test: "suite should fail",
+      limit: undefined,
+      includeMarkdown: true,
+    });
+  });
+
+  it("should translate plugin-agent query usage errors to CLI failures", async () => {
+    (buildAgentQueryPayload as Mock).mockRejectedValueOnce(new AgentUsageError("No tests matched query"));
+
+    const exitCode = await run(AgentQueryCommand, ["agent", "query", "test", "--from", "./agent-output"]);
+
+    expect(exitCode).toBe(1);
+  });
+});
diff --git a/packages/cli/test/commands/agentSelect.test.ts b/packages/cli/test/commands/agentSelect.test.ts
index 53f6604ec5d..50556a905bf 100644
--- a/packages/cli/test/commands/agentSelect.test.ts
+++ b/packages/cli/test/commands/agentSelect.test.ts
@@ -1,9 +1,9 @@
+import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "@allurereport/plugin-agent";
 import { epic, feature, label, story } from "allure-js-commons";
 import { run, UsageError } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
 import { AgentSelectCommand } from "../../src/commands/agent.js";
-import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "../../src/utils/agent-select.js";
 
 vi.mock("node:console", async (importOriginal) => ({
   ...(await importOriginal()),
@@ -20,22 +20,27 @@ vi.mock("node:fs/promises", async (importOriginal) => ({
   mkdir: vi.fn().mockResolvedValue(undefined),
   writeFile: vi.fn().mockResolvedValue(undefined),
 }));
-vi.mock("../../src/utils/agent-select.js", () => ({
-  normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"),
-  parseAgentLabelFilters: vi.fn((values?: string[]) =>
-    (values ?? []).map((value) => {
-      const [name, filterValue] = value.split("=");
-
-      return {
-        name,
-        value: filterValue,
-      };
-    }),
-  ),
-  resolveAgentSelectionOutputDir: vi.fn(),
-  selectAgentTestPlan: vi.fn(),
-  createAgentTestPlanContext: vi.fn(),
-}));
+vi.mock("@allurereport/plugin-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("@allurereport/plugin-agent")>();
+
+  return {
+    ...actual,
+    normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"),
+    parseAgentLabelFilters: vi.fn((values?: string[]) =>
+      (values ?? []).map((value) => {
+        const [name, filterValue] = value.split("=");
+
+        return {
+          name,
+          value: filterValue,
+        };
+      }),
+    ),
+    resolveAgentSelectionOutputDir: vi.fn(),
+    selectAgentTestPlan: vi.fn(),
+    createAgentTestPlanContext: vi.fn(),
+  };
+});
 
 beforeEach(async () => {
   await epic("coverage");
@@ -81,4 +86,42 @@ describe("agent select command", () => {
       `{\n  "version": "1.0",\n  "tests": [\n    {\n      "selector": "suite feature A"\n    }\n  ]\n}`,
     );
   });
+
+  it("should write the selected test plan and print selection summary when output is provided", async () => {
+    const consoleModule = await import("node:console");
+    const fsModule = await import("node:fs/promises");
+
+    (resolveAgentSelectionOutputDir as Mock).mockResolvedValueOnce("/tmp/agent-output");
+    (selectAgentTestPlan as Mock).mockResolvedValueOnce({
+      outputDir: "/tmp/agent-output",
+      preset: "failed",
+      selectedTests: [{ full_name: "suite feature A" }, { full_name: "suite feature B" }],
+      testPlan: {
+        version: "1.0",
+        tests: [{ selector: "suite feature A" }, { selector: "suite feature B" }],
+      },
+    });
+
+    await run(AgentSelectCommand, [
+      "agent",
+      "select",
+      "--from",
+      "./agent-output",
+      "--preset",
+      "failed",
+      "--output",
+      "./testplan.json",
+    ]);
+
+    expect(fsModule.mkdir).toHaveBeenCalledWith("/cwd", { recursive: true });
+    expect(fsModule.writeFile).toHaveBeenCalledWith(
+      "/cwd/testplan.json",
+      `{\n  "version": "1.0",\n  "tests": [\n    {\n      "selector": "suite feature A"\n    },\n    {\n      "selector": "suite feature B"\n    }\n  ]\n}\n`,
+      "utf-8",
+    );
+    expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent testplan: /cwd/testplan.json");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent selection source: /tmp/agent-output");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(3, "agent selection preset: failed");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(4, "agent selection tests: 2");
+  });
 });
diff --git a/packages/cli/test/commands/run.integration.test.ts b/packages/cli/test/commands/run.integration.test.ts
index a93edb3a334..cc884d49ede 100644
--- a/packages/cli/test/commands/run.integration.test.ts
+++ b/packages/cli/test/commands/run.integration.test.ts
@@ -7,7 +7,7 @@ import process from "node:process";
 import { fileURLToPath } from "node:url";
 import { promisify } from "node:util";
 
-import { epic, feature, label, story } from "allure-js-commons";
+import { attachment, epic, feature, label, step, story } from "allure-js-commons";
 import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest";
 
 const execFileAsync = promisify(execFile);
@@ -43,6 +43,14 @@ const pathExists = async (filePath: string) => {
   }
 };
 
+const attachCommandOutput = async (name: string, output: { stdout: string; stderr: string }) => {
+  await attachment(`${name} stdout`, output.stdout || "<empty>", "text/plain");
+
+  if (output.stderr) {
+    await attachment(`${name} stderr`, output.stderr, "text/plain");
+  }
+};
+
 const writeJson = async (filePath: string, value: unknown) => {
   await writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf-8");
 };
@@ -123,39 +131,144 @@ describe("run command integration", () => {
     await rm(tempDir, { recursive: true, force: true });
   });
 
-  it("writes the full agent directory contract in the built CLI path", async () => {
-    const fixtureDir = join(tempDir, "built-run");
+  it("prints the agent task map from built CLI help", async () => {
+    let stdout = "";
+    let stderr = "";
+
+    await step("run built agent help", async () => {
+      const helpResult = await runCommand(process.execPath, [cliPath, "agent", "--help"]);
+
+      stdout = helpResult.stdout;
+      stderr = helpResult.stderr;
+      await attachCommandOutput("agent help", helpResult);
+    });
+
+    await step("verify agent task map help", async () => {
+      expect(stderr).toBe("");
+      expect(stdout).toContain("Multiple commands match your selection:");
+      expect(stdout).toContain("Agent task map:");
+      expect(stdout).toContain("allure --version");
+      expect(stdout).toContain("allure agent --help");
+      expect(stdout).toContain("allure agent capabilities");
+      expect(stdout).toContain("allure agent --goal ... -- <command>");
+      expect(stdout).toContain("allure agent latest");
+      expect(stdout).toContain("allure agent state-dir");
+      expect(stdout).toContain("allure agent select --latest");
+      expect(stdout).toContain("allure agent select --from <output-dir>");
+      expect(stdout).toContain("allure agent --rerun-latest -- <command>");
+      expect(stdout).toContain("allure agent --rerun-from <output-dir> -- <command>");
+      expect(stdout).toContain("ALLURE_AGENT_STATE_DIR=<dir>");
+    });
+  }, 240_000);
+
+  it("prints structured agent capabilities from the built CLI", async () => {
+    let stdout = "";
+    let stderr = "";
+
+    await step("run built agent capabilities command", async () => {
+      const result = await runYarnCommand(["allure", "agent", "capabilities", "--json"]);
+
+      stdout = result.stdout;
+      stderr = result.stderr;
+      await attachCommandOutput("agent capabilities", result);
+    });
+
+    await step("verify built agent capabilities output", async () => {
+      const capabilities = JSON.parse(stdout) as {
+        schema: string;
+        commands: {
+          run: {
+            supported: boolean;
+            options: string[];
+          };
+          latest: {
+            output: string[];
+          };
+          select: {
+            supported: boolean;
+            presets: string[];
+            output: string[];
+          };
+          query: {
+            supported: boolean;
+          };
+        };
+        expectations: {
+          inline: {
+            expected: {
+              fullNames: boolean;
+            };
+            forbidden: {
+              labels: boolean;
+              fullNames: boolean;
+            };
+            evidence: {
+              attachmentFilters: string[];
+            };
+          };
+        };
+        output: {
+          files: string[];
+        };
+        unsupported: {
+          discovery: boolean;
+          localAgentService: boolean;
+        };
+      };
+
+      expect(stderr).toBe("");
+      expect(capabilities.schema).toBe("allure-agent-capabilities/v1");
+      expect(capabilities.commands.run.supported).toBe(true);
+      expect(capabilities.commands.run.options).toContain("--expect-test");
+      expect(capabilities.commands.latest.output).toEqual(["agent output: <dir>", "agent index: <dir>/index.md"]);
+      expect(capabilities.commands.select.supported).toBe(true);
+      expect(capabilities.commands.select.output).toEqual([
+        "stdout-testplan-json",
+        "file-testplan-json",
+        "file-summary",
+      ]);
+      expect(capabilities.commands.select.presets).toEqual(["review", "failed", "unsuccessful", "all"]);
+      expect(capabilities.commands.query.supported).toBe(true);
+      expect(capabilities.expectations.inline.expected.fullNames).toBe(true);
+      expect(capabilities.expectations.inline.forbidden.labels).toBe(true);
+      expect(capabilities.expectations.inline.forbidden.fullNames).toBe(false);
+      expect(capabilities.expectations.inline.evidence.attachmentFilters).toEqual(["name", "content-type"]);
+      expect(capabilities.output.files).toContain("manifest/run.json");
+      expect(capabilities.unsupported.discovery).toBe(true);
+      expect(capabilities.unsupported).not.toHaveProperty("query");
+      expect(capabilities.unsupported.localAgentService).toBe(true);
+    });
+  }, 240_000);
+
+  it("runs the built agent command with an agent-only profile", async () => {
+    const fixtureDir = join(tempDir, "built-agent");
+    const homeDir = join(fixtureDir, "home");
     const outputDir = join(fixtureDir, "agent-output");
     const reportDir = join(fixtureDir, "report");
     const expectationsPath = join(fixtureDir, "expected.yaml");
     const configPath = join(fixtureDir, "allurerc.mjs");
     const emitResultsPath = join(fixtureDir, "emit-results.mjs");
-    const projectGuidePath = join(fixtureDir, "docs", "allure-agent-mode.md");
-    const expectationsSource = `goal: Validate built CLI agent output
-task_id: cli-integration
+    const expectationsSource = `goal: Validate built CLI agent command
+task_id: cli-agent-integration
 expected:
   environments:
     - default
 notes:
-  - The legacy run invocation should delegate to the agent command contract.
-`;
-    const projectGuideSource = `# Fixture Agent Guide
-
-- This guide belongs to the fixture cwd used by the legacy run compatibility test.
+  - The agent command should ignore configured report and export plugins.
 `;
     const configSource = `
 export default {
-  name: "CLI Integration Report",
+  name: "CLI Agent Report",
   output: ${JSON.stringify(reportDir)},
   plugins: {
     awesome: {
       options: {
-        reportName: "CLI Integration Report"
+        reportName: "CLI Agent Report"
       }
     },
     dashboard: {
       options: {
-        reportName: "CLI Integration Dashboard"
+        reportName: "CLI Agent Dashboard"
       }
     },
     testops: {
@@ -177,111 +290,141 @@ await cp(fixture, join(outDir, \`\${randomUUID()}-result.json\`));
 console.log("emitted simple result");
 `.trimStart();
 
-    await mkdir(join(fixtureDir, "docs"), { recursive: true });
-    await writeFile(expectationsPath, expectationsSource, "utf-8");
-    await writeFile(configPath, configSource, "utf-8");
-    await writeFile(emitResultsPath, emitResultsSource, "utf-8");
-    await writeFile(projectGuidePath, projectGuideSource, "utf-8");
+    let expectedStateDir = "";
+    let stdout = "";
+    let stderr = "";
+    let latestStdout = "";
+    let latestStderr = "";
+    let stateDirStdout = "";
+    let stateDirStderr = "";
+
+    await step("prepare built agent fixture", async () => {
+      await mkdir(fixtureDir, { recursive: true });
+      const resolvedFixtureDir = await realpath(fixtureDir);
+      expectedStateDir = join(
+        tmpdir(),
+        `allure-agent-state-${createHash("sha256").update(resolvedFixtureDir).digest("hex").slice(0, 16)}`,
+      );
+      await writeFile(expectationsPath, expectationsSource, "utf-8");
+      await writeFile(configPath, configSource, "utf-8");
+      await writeFile(emitResultsPath, emitResultsSource, "utf-8");
+      await attachment(
+        "fixture paths",
+        JSON.stringify({ fixtureDir, outputDir, expectationsPath, expectedStateDir }, null, 2),
+        "application/json",
+      );
+    });
 
-    const { stdout, stderr } = await runCommand(
-      process.execPath,
-      [cliPath, "run", "--config", configPath, "--cwd", fixtureDir, "--", "node", emitResultsPath, simpleResultFixture],
-      {
+    await step("run built agent command and state commands", async () => {
+      const runResult = await runCommand(
+        process.execPath,
+        [
+          cliPath,
+          "agent",
+          "--config",
+          configPath,
+          "--cwd",
+          fixtureDir,
+          "--output",
+          outputDir,
+          "--expectations",
+          expectationsPath,
+          "--",
+          "node",
+          emitResultsPath,
+          simpleResultFixture,
+        ],
+        {
+          env: {
+            ...process.env,
+            HOME: homeDir,
+          },
+        },
+      );
+      stdout = runResult.stdout;
+      stderr = runResult.stderr;
+      await attachCommandOutput("agent command", runResult);
+
+      const latestResult = await runCommand(process.execPath, [cliPath, "agent", "latest", "--cwd", fixtureDir], {
         env: {
           ...process.env,
-          ALLURE_AGENT_OUTPUT: outputDir,
-          ALLURE_AGENT_EXPECTATIONS: expectationsPath,
+          HOME: homeDir,
         },
-      },
-    );
-
-    await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy();
-
-    const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as {
-      command: string | null;
-      expectations_present: boolean;
-      paths: {
-        expected_manifest: string | null;
-        project_guide: string | null;
+      });
+      latestStdout = latestResult.stdout;
+      latestStderr = latestResult.stderr;
+      await attachCommandOutput("agent latest", latestResult);
+
+      const stateDirResult = await runCommand(process.execPath, [cliPath, "agent", "state-dir", "--cwd", fixtureDir], {
+        env: {
+          ...process.env,
+          HOME: homeDir,
+        },
+      });
+      stateDirStdout = stateDirResult.stdout;
+      stateDirStderr = stateDirResult.stderr;
+      await attachCommandOutput("agent state-dir", stateDirResult);
+    });
+
+    await step("verify built agent output contract", async () => {
+      await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy();
+      await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy();
+      await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy();
+      await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy();
+      await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy();
+
+      const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as {
+        command: string | null;
+        expectations_present: boolean;
+        paths: {
+          expected_manifest: string | null;
+        };
       };
-    };
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
-    const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8");
-    const expectedCopy = await readFile(join(outputDir, "manifest", "expected.json"), "utf-8");
-    const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
-    const copiedProjectGuide = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8");
-
-    expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`);
-    expect(runManifest.expectations_present).toBe(true);
-    expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json");
-    expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md");
-    expect(expectedCopy).toContain('"task_id": "cli-integration"');
-    expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)");
-    expect(copiedProjectGuide).toContain("# Fixture Agent Guide");
-    expect(indexContent).toContain("# CLI Integration Report");
-    expect(indexContent).toContain("## Expected Scope");
-    expect(indexContent).toContain("## Advisory Check Summary");
-    expect(indexContent).toContain("## Passed");
-    expect(findingsContent).toBe("");
-    expect(await pathExists(join(outputDir, "awesome"))).toBe(false);
-    expect(await pathExists(join(outputDir, "dashboard"))).toBe(false);
-    expect(stdout).toContain(`agent output: ${outputDir}`);
-    expect(stdout).toContain(`agent expectations: ${expectationsPath}`);
-    expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`);
-    expect(stdout).toContain("emitted simple result");
-    expect(stdout).not.toContain("process finished with code");
-    expect(stdout).not.toContain("exit code ");
-    expect(stdout).not.toContain("[DEP0190]");
-    expect(stdout).not.toContain("NO_COLOR");
-    expect(stderr).not.toContain("[DEP0190]");
-    expect(stderr).not.toContain("NO_COLOR");
-    expect(stderr).not.toContain("Allure TestOps");
+      const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
+      const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8");
+
+      expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`);
+      expect(runManifest.expectations_present).toBe(true);
+      expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json");
+      expect(agentsGuide).toContain("## Command Task Map");
+      expect(agentsGuide).toContain("manifest/run.json");
+      expect(await pathExists(join(outputDir, "project"))).toBe(false);
+      expect(findingsContent).toBe("");
+      expect(await pathExists(join(outputDir, "awesome"))).toBe(false);
+      expect(await pathExists(join(outputDir, "dashboard"))).toBe(false);
+      expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`);
+      expect(stdout).toContain(`agent output: ${outputDir}`);
+      expect(stdout).toContain(`agent index: ${join(outputDir, "index.md")}`);
+      expect(stdout).toContain(`agent expectations: ${expectationsPath}`);
+      expect(stdout).toContain("emitted simple result");
+      expect(stdout).not.toContain("process finished with code");
+      expect(stdout).not.toContain("exit code ");
+      expect(stdout).not.toContain("[DEP0190]");
+      expect(stdout).not.toContain("NO_COLOR");
+      expect(stderr).not.toContain("[DEP0190]");
+      expect(stderr).not.toContain("NO_COLOR");
+      expect(stderr).not.toContain("Allure TestOps");
+      expect(latestStdout).toContain(`agent output: ${outputDir}`);
+      expect(latestStdout).toContain(`agent index: ${join(outputDir, "index.md")}`);
+      expect(latestStderr).toBe("");
+      expect(stateDirStdout.trim()).toBe(expectedStateDir);
+      expect(stateDirStderr).toBe("");
+    });
   }, 240_000);
 
-  it("runs the built agent command with an agent-only profile", async () => {
-    const fixtureDir = join(tempDir, "built-agent");
+  it("runs agent mode with --expect-test to require a newly added test", async () => {
+    const fixtureDir = join(tempDir, "agent-expect-test");
     const homeDir = join(fixtureDir, "home");
     const outputDir = join(fixtureDir, "agent-output");
     const reportDir = join(fixtureDir, "report");
-    const expectationsPath = join(fixtureDir, "expected.yaml");
     const configPath = join(fixtureDir, "allurerc.mjs");
-    const emitResultsPath = join(fixtureDir, "emit-results.mjs");
-    const projectGuidePath = join(fixtureDir, "docs", "allure-agent-mode.md");
-    const expectationsSource = `goal: Validate built CLI agent command
-task_id: cli-agent-integration
-expected:
-  environments:
-    - default
-notes:
-  - The agent command should ignore configured report and export plugins.
-`;
-    const projectGuideSource = `# Fixture Agent Guide
-
-- This guide belongs to the fixture cwd used by the built agent integration test.
-`;
+    const emitResultsPath = join(fixtureDir, "emit-new-test-result.mjs");
+    const resultFixturePath = join(fixtureDir, "new-test-result.json");
+    const expectedFullName = "agent flow reports the newly added test";
     const configSource = `
 export default {
-  name: "CLI Agent Report",
-  output: ${JSON.stringify(reportDir)},
-  plugins: {
-    awesome: {
-      options: {
-        reportName: "CLI Agent Report"
-      }
-    },
-    dashboard: {
-      options: {
-        reportName: "CLI Agent Dashboard"
-      }
-    },
-    testops: {
-      options: {}
-    }
-  }
+  name: "CLI Agent Expect Test Report",
+  output: ${JSON.stringify(reportDir)}
 };
 `.trimStart();
     const emitResultsSource = `
@@ -294,108 +437,106 @@ const outDir = join(process.cwd(), "allure-results");
 
 await mkdir(outDir, { recursive: true });
 await cp(fixture, join(outDir, \`\${randomUUID()}-result.json\`));
-console.log("emitted simple result");
+console.log("emitted newly added test result");
 `.trimStart();
 
-    await mkdir(join(fixtureDir, "docs"), { recursive: true });
-    const resolvedFixtureDir = await realpath(fixtureDir);
-    const expectedStateDir = join(
-      tmpdir(),
-      `allure-agent-state-${createHash("sha256").update(resolvedFixtureDir).digest("hex").slice(0, 16)}`,
-    );
-    await writeFile(expectationsPath, expectationsSource, "utf-8");
-    await writeFile(configPath, configSource, "utf-8");
-    await writeFile(emitResultsPath, emitResultsSource, "utf-8");
-    await writeFile(projectGuidePath, projectGuideSource, "utf-8");
-
-    const { stdout, stderr } = await runCommand(
-      process.execPath,
-      [
-        cliPath,
-        "agent",
-        "--config",
-        configPath,
-        "--cwd",
-        fixtureDir,
-        "--output",
-        outputDir,
-        "--expectations",
-        expectationsPath,
-        "--",
-        "node",
-        emitResultsPath,
-        simpleResultFixture,
-      ],
-      {
-        env: {
-          ...process.env,
-          HOME: homeDir,
-        },
-      },
-    );
-    const { stdout: latestStdout, stderr: latestStderr } = await runCommand(
-      process.execPath,
-      [cliPath, "agent", "latest", "--cwd", fixtureDir],
-      {
-        env: {
-          ...process.env,
-          HOME: homeDir,
-        },
-      },
-    );
-    const { stdout: stateDirStdout, stderr: stateDirStderr } = await runCommand(
-      process.execPath,
-      [cliPath, "agent", "state-dir", "--cwd", fixtureDir],
-      {
-        env: {
-          ...process.env,
-          HOME: homeDir,
+    let stdout = "";
+    let stderr = "";
+
+    await step("prepare new test fixture", async () => {
+      await mkdir(fixtureDir, { recursive: true });
+      const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record<string, unknown>;
+      const expectedResult = {
+        ...baseResult,
+        uuid: "agent-expect-test-uuid",
+        historyId: "agent-expect-test-history",
+        name: "reports the newly added test",
+        fullName: expectedFullName,
+        status: "passed",
+        labels: [
+          { name: "suite", value: "agent flow" },
+          { name: "feature", value: "expect-test" },
+        ],
+      };
+
+      await writeFile(configPath, configSource, "utf-8");
+      await writeFile(emitResultsPath, emitResultsSource, "utf-8");
+      await writeJson(resultFixturePath, expectedResult);
+      await attachment(
+        "expect-test fixture",
+        JSON.stringify({ fixtureDir, outputDir, expectedFullName }, null, 2),
+        "application/json",
+      );
+    });
+
+    await step("run built agent command with expected full test name", async () => {
+      const runResult = await runCommand(
+        process.execPath,
+        [
+          cliPath,
+          "agent",
+          "--config",
+          configPath,
+          "--cwd",
+          fixtureDir,
+          "--output",
+          outputDir,
+          "--goal",
+          "Validate newly added test is reported",
+          "--expect-tests",
+          "1",
+          "--expect-test",
+          expectedFullName,
+          "--",
+          "node",
+          emitResultsPath,
+          resultFixturePath,
+        ],
+        {
+          env: {
+            ...process.env,
+            HOME: homeDir,
+          },
         },
-      },
-    );
-
-    await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy();
-    await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy();
-
-    const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as {
-      command: string | null;
-      expectations_present: boolean;
-      paths: {
-        expected_manifest: string | null;
-        project_guide: string | null;
+      );
+
+      stdout = runResult.stdout;
+      stderr = runResult.stderr;
+      await attachCommandOutput("agent expect-test", runResult);
+    });
+
+    await step("verify expect-test output", async () => {
+      const expectedManifest = JSON.parse(await readFile(join(outputDir, "manifest", "expected.json"), "utf-8")) as {
+        expected: {
+          full_names?: string[];
+          test_count?: number;
+        };
       };
-    };
-    const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
-    const copiedProjectGuide = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8");
-    const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8");
-
-    expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`);
-    expect(runManifest.expectations_present).toBe(true);
-    expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json");
-    expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md");
-    expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)");
-    expect(copiedProjectGuide).toContain("# Fixture Agent Guide");
-    expect(findingsContent).toBe("");
-    expect(await pathExists(join(outputDir, "awesome"))).toBe(false);
-    expect(await pathExists(join(outputDir, "dashboard"))).toBe(false);
-    expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`);
-    expect(stdout).toContain(`agent output: ${outputDir}`);
-    expect(stdout).toContain(`agent expectations: ${expectationsPath}`);
-    expect(stdout).toContain("emitted simple result");
-    expect(stdout).not.toContain("process finished with code");
-    expect(stdout).not.toContain("exit code ");
-    expect(stdout).not.toContain("[DEP0190]");
-    expect(stdout).not.toContain("NO_COLOR");
-    expect(stderr).not.toContain("[DEP0190]");
-    expect(stderr).not.toContain("NO_COLOR");
-    expect(stderr).not.toContain("Allure TestOps");
-    expect(latestStdout.trim()).toBe(outputDir);
-    expect(latestStderr).toBe("");
-    expect(stateDirStdout.trim()).toBe(expectedStateDir);
-    expect(stateDirStderr).toBe("");
+      const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as {
+        expectations_present: boolean;
+      };
+      const tests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8"))
+        .trim()
+        .split("\n")
+        .filter(Boolean)
+        .map((line) => JSON.parse(line) as { full_name: string });
+      const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8");
+      const indexMarkdown = await readFile(join(outputDir, "index.md"), "utf-8");
+
+      expect(runManifest.expectations_present).toBe(true);
+      expect(expectedManifest.expected.test_count).toBe(1);
+      expect(expectedManifest.expected.full_names).toEqual([expectedFullName]);
+      expect(tests).toEqual([
+        expect.objectContaining({
+          full_name: expectedFullName,
+        }),
+      ]);
+      expect(findingsContent).toBe("");
+      expect(indexMarkdown).toContain(expectedFullName);
+      expect(stdout).toContain("agent expectations: CLI options");
+      expect(stdout).toContain("emitted newly added test result");
+      expect(stderr).toBe("");
+    });
   }, 240_000);
 
   it("supports agent select and rerun-from with the default review preset", async () => {
@@ -407,6 +548,7 @@ console.log("emitted simple result");
     const configPath = join(fixtureDir, "allurerc.mjs");
     const emitResultsPath = join(fixtureDir, "emit-plan-results.mjs");
     const fixturesManifestPath = join(fixtureDir, "fixtures.json");
+    const selectedTestPlanPath = join(fixtureDir, "selected-testplan.json");
     const featureAFixturePath = join(fixtureDir, "feature-a-result.json");
     const featureBFixturePath = join(fixtureDir, "feature-b-result.json");
     const previousManifestDir = join(previousOutputDir, "manifest");
@@ -447,79 +589,65 @@ for (const fixture of fixtures) {
 console.log(\`selected selectors: \${Array.from(selectors).join(",")}\`);
 `.trimStart();
 
-    await mkdir(previousManifestDir, { recursive: true });
-    await writeFile(configPath, configSource, "utf-8");
-    await writeFile(emitResultsPath, emitResultsSource, "utf-8");
-
-    const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record<string, unknown>;
-    const featureAResult = {
-      ...baseResult,
-      uuid: "feature-a-uuid",
-      historyId: "feature-a-history",
-      name: "feature A",
-      fullName: "suite feature A",
-      status: "passed",
-      labels: [
-        { name: "suite", value: "suite" },
-        { name: "feature", value: "checkout" },
-        { name: "priority", value: "high" },
-      ],
-    };
-    const featureBResult = {
-      ...baseResult,
-      uuid: "feature-b-uuid",
-      historyId: "feature-b-history",
-      name: "feature B",
-      fullName: "suite feature B",
-      status: "passed",
-      labels: [
-        { name: "suite", value: "suite" },
-        { name: "feature", value: "payments" },
-        { name: "priority", value: "low" },
-      ],
-    };
+    await step("prepare previous agent output and rerun fixtures", async () => {
+      await mkdir(previousManifestDir, { recursive: true });
+      await writeFile(configPath, configSource, "utf-8");
+      await writeFile(emitResultsPath, emitResultsSource, "utf-8");
+
+      const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record<string, unknown>;
+      const featureAResult = {
+        ...baseResult,
+        uuid: "feature-a-uuid",
+        historyId: "feature-a-history",
+        name: "feature A",
+        fullName: "suite feature A",
+        status: "passed",
+        labels: [
+          { name: "suite", value: "suite" },
+          { name: "feature", value: "checkout" },
+          { name: "priority", value: "high" },
+        ],
+      };
+      const featureBResult = {
+        ...baseResult,
+        uuid: "feature-b-uuid",
+        historyId: "feature-b-history",
+        name: "feature B",
+        fullName: "suite feature B",
+        status: "passed",
+        labels: [
+          { name: "suite", value: "suite" },
+          { name: "feature", value: "payments" },
+          { name: "priority", value: "low" },
+        ],
+      };
 
-    await writeJson(featureAFixturePath, featureAResult);
-    await writeJson(featureBFixturePath, featureBResult);
-    await writeJson(fixturesManifestPath, [
-      {
-        selector: "suite feature A",
-        file: featureAFixturePath,
-      },
-      {
-        selector: "suite feature B",
-        file: featureBFixturePath,
-      },
-    ]);
-
-    await writeJson(join(previousManifestDir, "run.json"), {
-      schema_version: "allure-agent-output/v1",
-      report_uuid: "previous-report",
-      generated_at: "2026-04-15T18:00:00.000Z",
-      command: "node prior-run",
-      actual_exit_code: 0,
-      original_exit_code: 0,
-      exit_code: {
-        original: 0,
-        actual: 0,
-      },
-      summary: {
-        stats: {
-          total: 2,
-          failed: 1,
-          broken: 0,
-          skipped: 0,
-          unknown: 0,
-          passed: 1,
+      await writeJson(featureAFixturePath, featureAResult);
+      await writeJson(featureBFixturePath, featureBResult);
+      await writeJson(fixturesManifestPath, [
+        {
+          selector: "suite feature A",
+          file: featureAFixturePath,
         },
-        duration_ms: {
-          total: 10,
-          average: 5,
-          max: 5,
+        {
+          selector: "suite feature B",
+          file: featureBFixturePath,
         },
-        environments: [
-          {
-            environmentId: "default",
+      ]);
+
+      await writeJson(join(previousManifestDir, "run.json"), {
+        schema_version: "allure-agent-output/v1",
+        report_uuid: "previous-report",
+        generated_at: "2026-04-15T18:00:00.000Z",
+        command: "node prior-run",
+        actual_exit_code: 0,
+        original_exit_code: 0,
+        exit_code: {
+          original: 0,
+          actual: 0,
+        },
+        summary: {
+          stats: {
             total: 2,
             failed: 1,
             broken: 0,
@@ -527,165 +655,230 @@ console.log(\`selected selectors: \${Array.from(selectors).join(",")}\`);
             unknown: 0,
             passed: 1,
           },
-        ],
-      },
-      paths: {
-        index_md: "index.md",
-        agents_md: "AGENTS.md",
-        tests_manifest: "manifest/tests.jsonl",
-        findings_manifest: "manifest/findings.jsonl",
-        expected_manifest: null,
-        project_guide: null,
-        process_logs: {
-          stdout: null,
-          stderr: null,
-        },
-      },
-      expectations_present: false,
-      check_summary: {
-        total: 1,
-        countsBySeverity: {
-          high: 1,
-          warning: 0,
-          info: 0,
+          duration_ms: {
+            total: 10,
+            average: 5,
+            max: 5,
+          },
+          environments: [
+            {
+              environmentId: "default",
+              total: 2,
+              failed: 1,
+              broken: 0,
+              skipped: 0,
+              unknown: 0,
+              passed: 1,
+            },
+          ],
         },
-        countsByCategory: {
-          bootstrap: 0,
-          scope: 0,
-          metadata: 0,
-          evidence: 1,
-          smells: 0,
+        paths: {
+          index_md: "index.md",
+          agents_md: "AGENTS.md",
+          tests_manifest: "manifest/tests.jsonl",
+          findings_manifest: "manifest/findings.jsonl",
+          expected_manifest: null,
+          process_logs: {
+            stdout: null,
+            stderr: null,
+          },
         },
-      },
-      agent_context: {
-        agent_name: null,
-        loop_id: null,
-        task_id: null,
-        conversation_id: null,
-      },
-    });
-    await writeJsonl(join(previousManifestDir, "tests.jsonl"), [
-      {
-        environment_id: "default",
-        history_id: "feature-a-history",
-        test_result_id: "feature-a-tr",
-        full_name: "suite feature A",
-        package: "suite",
-        labels: [
-          { name: "feature", value: "checkout" },
-          { name: "priority", value: "high" },
-        ],
-        status: "failed",
-        duration_ms: 5,
-        retries: 0,
-        flaky: false,
-        scope_match: "match",
-        finding_counts: {
+        expectations_present: false,
+        check_summary: {
           total: 1,
-          high: 1,
-          warning: 0,
-          info: 0,
+          countsBySeverity: {
+            high: 1,
+            warning: 0,
+            info: 0,
+          },
+          countsByCategory: {
+            bootstrap: 0,
+            scope: 0,
+            metadata: 0,
+            evidence: 1,
+            smells: 0,
+          },
         },
-        markdown_path: "tests/default/feature-a.md",
-        assets_dir: "tests/default/feature-a.assets",
-      },
-      {
-        environment_id: "default",
-        history_id: "feature-b-history",
-        test_result_id: "feature-b-tr",
-        full_name: "suite feature B",
-        package: "suite",
-        labels: [
-          { name: "feature", value: "payments" },
-          { name: "priority", value: "low" },
-        ],
-        status: "passed",
-        duration_ms: 5,
-        retries: 0,
-        flaky: false,
-        scope_match: "match",
-        finding_counts: {
-          total: 0,
-          high: 0,
-          warning: 0,
-          info: 0,
+        agent_context: {
+          agent_name: null,
+          loop_id: null,
+          task_id: null,
+          conversation_id: null,
         },
-        markdown_path: "tests/default/feature-b.md",
-        assets_dir: "tests/default/feature-b.assets",
-      },
-    ]);
-    await writeJsonl(join(previousManifestDir, "findings.jsonl"), [
-      {
-        finding_id: "finding-feature-a",
-        subject: "tests/default/feature-a.md",
-        severity: "high",
-        category: "evidence",
-        check_name: "failed-without-useful-steps",
-        message: "Feature A needs focused rerun coverage",
-        explanation: "Feature A should be the only review-targeted rerun candidate.",
-        evidence_paths: [],
-        remediation_hint: "Rerun only feature A.",
-      },
-    ]);
-
-    const { stdout: selectStdout, stderr: selectStderr } = await runCommand(
-      process.execPath,
-      [cliPath, "agent", "select", "--from", previousOutputDir],
-      {
-        env: {
-          ...process.env,
-          HOME: homeDir,
+      });
+      await writeJsonl(join(previousManifestDir, "tests.jsonl"), [
+        {
+          environment_id: "default",
+          history_id: "feature-a-history",
+          test_result_id: "feature-a-tr",
+          full_name: "suite feature A",
+          package: "suite",
+          labels: [
+            { name: "feature", value: "checkout" },
+            { name: "priority", value: "high" },
+          ],
+          status: "failed",
+          duration_ms: 5,
+          retries: 0,
+          flaky: false,
+          scope_match: "match",
+          finding_counts: {
+            total: 1,
+            high: 1,
+            warning: 0,
+            info: 0,
+          },
+          markdown_path: "tests/default/feature-a.md",
+          assets_dir: "tests/default/feature-a.assets",
         },
-      },
-    );
-    const { stdout, stderr } = await runCommand(
-      process.execPath,
-      [
-        cliPath,
-        "agent",
-        "--config",
-        configPath,
-        "--cwd",
-        fixtureDir,
-        "--output",
-        outputDir,
-        "--rerun-from",
-        previousOutputDir,
-        "--",
-        "node",
-        emitResultsPath,
-        fixturesManifestPath,
-      ],
-      {
-        env: {
-          ...process.env,
-          HOME: homeDir,
+        {
+          environment_id: "default",
+          history_id: "feature-b-history",
+          test_result_id: "feature-b-tr",
+          full_name: "suite feature B",
+          package: "suite",
+          labels: [
+            { name: "feature", value: "payments" },
+            { name: "priority", value: "low" },
+          ],
+          status: "passed",
+          duration_ms: 5,
+          retries: 0,
+          flaky: false,
+          scope_match: "match",
+          finding_counts: {
+            total: 0,
+            high: 0,
+            warning: 0,
+            info: 0,
+          },
+          markdown_path: "tests/default/feature-b.md",
+          assets_dir: "tests/default/feature-b.assets",
+        },
+      ]);
+      await writeJsonl(join(previousManifestDir, "findings.jsonl"), [
+        {
+          finding_id: "finding-feature-a",
+          subject: "tests/default/feature-a.md",
+          severity: "high",
+          category: "evidence",
+          check_name: "failed-without-useful-steps",
+          message: "Feature A needs focused rerun coverage",
+          explanation: "Feature A should be the only review-targeted rerun candidate.",
+          evidence_paths: [],
+          remediation_hint: "Rerun only feature A.",
         },
-      },
-    );
+      ]);
+      await attachment(
+        "previous run summary",
+        JSON.stringify({ previousOutputDir, selected: "suite feature A", skipped: "suite feature B" }, null, 2),
+        "application/json",
+      );
+    });
 
-    expect(JSON.parse(selectStdout)).toEqual({
-      version: "1.0",
-      tests: [
+    let selectStdout = "";
+    let selectStderr = "";
+    let selectFileStdout = "";
+    let selectFileStderr = "";
+    let stdout = "";
+    let stderr = "";
+
+    await step("select tests and rerun built agent command", async () => {
+      const selectResult = await runCommand(
+        process.execPath,
+        [cliPath, "agent", "select", "--from", previousOutputDir],
         {
-          selector: "suite feature A",
+          env: {
+            ...process.env,
+            HOME: homeDir,
+          },
+        },
+      );
+      selectStdout = selectResult.stdout;
+      selectStderr = selectResult.stderr;
+      await attachCommandOutput("agent select", selectResult);
+
+      const selectFileResult = await runCommand(
+        process.execPath,
+        [cliPath, "agent", "select", "--from", previousOutputDir, "--output", selectedTestPlanPath],
+        {
+          env: {
+            ...process.env,
+            HOME: homeDir,
+          },
+        },
+      );
+      selectFileStdout = selectFileResult.stdout;
+      selectFileStderr = selectFileResult.stderr;
+      await attachCommandOutput("agent select output file", selectFileResult);
+
+      const runResult = await runCommand(
+        process.execPath,
+        [
+          cliPath,
+          "agent",
+          "--config",
+          configPath,
+          "--cwd",
+          fixtureDir,
+          "--output",
+          outputDir,
+          "--rerun-from",
+          previousOutputDir,
+          "--",
+          "node",
+          emitResultsPath,
+          fixturesManifestPath,
+        ],
+        {
+          env: {
+            ...process.env,
+            HOME: homeDir,
+          },
         },
-      ],
+      );
+      stdout = runResult.stdout;
+      stderr = runResult.stderr;
+      await attachCommandOutput("agent rerun-from", runResult);
+    });
+
+    await step("verify selected rerun output", async () => {
+      expect(JSON.parse(selectStdout)).toEqual({
+        version: "1.0",
+        tests: [
+          {
+            selector: "suite feature A",
+          },
+        ],
+      });
+      expect(selectStderr).toBe("");
+      expect(JSON.parse(await readFile(selectedTestPlanPath, "utf-8"))).toEqual({
+        version: "1.0",
+        tests: [
+          {
+            selector: "suite feature A",
+          },
+        ],
+      });
+      expect(selectFileStdout).toContain(`agent testplan: ${selectedTestPlanPath}`);
+      expect(selectFileStdout).toContain(`agent selection source: ${previousOutputDir}`);
+      expect(selectFileStdout).toContain("agent selection preset: review");
+      expect(selectFileStdout).toContain("agent selection tests: 1");
+      expect(selectFileStderr).toBe("");
+      expect(stdout).toContain("selected selectors: suite feature A");
+      expect(stderr).toBe("");
+
+      const selectedTests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8"))
+        .trim()
+        .split("\n")
+        .filter(Boolean)
+        .map((line) => JSON.parse(line) as { full_name: string });
+
+      expect(selectedTests).toEqual([
+        expect.objectContaining({
+          full_name: "suite feature A",
+        }),
+      ]);
     });
-    expect(selectStderr).toBe("");
-    expect(stdout).toContain("selected selectors: suite feature A");
-    expect(stderr).toBe("");
-
-    const selectedTests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8"))
-      .trim()
-      .split("\n")
-      .filter(Boolean)
-      .map((line) => JSON.parse(line) as { full_name: string });
-
-    expect(selectedTests).toEqual([
-      expect.objectContaining({
-        full_name: "suite feature A",
-      }),
-    ]);
   }, 240_000);
 });
diff --git a/packages/cli/test/commands/run.test.ts b/packages/cli/test/commands/run.test.ts
index f10cbeba0ca..07e594ad11b 100644
--- a/packages/cli/test/commands/run.test.ts
+++ b/packages/cli/test/commands/run.test.ts
@@ -1,12 +1,9 @@
-import { resolve } from "node:path";
-
 import { readConfig } from "@allurereport/core";
 import AwesomePlugin from "@allurereport/plugin-awesome";
 import { epic, feature, label, story } from "allure-js-commons";
 import { run, UsageError } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
-import { executeAgentMode } from "../../src/commands/agent.js";
 import { RunCommand } from "../../src/commands/run.js";
 import { ALLURE_CLI_ACTIVE_COMMAND_ENV } from "../../src/utils/execution-context.js";
 
@@ -90,10 +87,6 @@ vi.mock("@allurereport/static-server", async (importOriginal) => ({
   ...(await importOriginal()),
   serve: vi.fn(),
 }));
-vi.mock("../../src/commands/agent.js", () => ({
-  executeAgentMode: vi.fn().mockResolvedValue(undefined),
-}));
-
 beforeEach(async () => {
   await epic("coverage");
   await feature("cli-run");
@@ -101,8 +94,6 @@ beforeEach(async () => {
   await label("coverage", "cli-run");
   vi.clearAllMocks();
   delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV];
-  delete process.env.ALLURE_AGENT_OUTPUT;
-  delete process.env.ALLURE_AGENT_EXPECTATIONS;
 
   const { AllureReportMock } = await import("../utils.js");
 
@@ -235,35 +226,4 @@ describe("run command", () => {
 
     delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV];
   });
-
-  it("should delegate legacy env-based agent mode to the agent command", async () => {
-    await epic("coverage");
-    await feature("agent-mode");
-    await story("run");
-    await label("coverage", "agent-mode");
-    const { AllureReportMock } = await import("../utils.js");
-    const { runProcess } = await import("../../src/utils/index.js");
-    const consoleModule = await import("node:console");
-
-    process.env.ALLURE_AGENT_OUTPUT = "./legacy-agent-output";
-    process.env.ALLURE_AGENT_EXPECTATIONS = "./legacy-expected.yaml";
-
-    await run(RunCommand, ["run", "--cwd", "./fixture", "--silent", "--", "npm", "test"]);
-
-    expect(executeAgentMode).toHaveBeenCalledWith({
-      configPath: undefined,
-      cwd: "./fixture",
-      output: resolve(process.cwd(), "./legacy-agent-output"),
-      expectations: resolve(process.cwd(), "./legacy-expected.yaml"),
-      environment: undefined,
-      environmentName: undefined,
-      silent: true,
-      args: ["npm", "test"],
-    });
-    expect(readConfig).not.toHaveBeenCalled();
-    expect(AllureReportMock).not.toHaveBeenCalled();
-    expect(runProcess).not.toHaveBeenCalled();
-    expect(consoleModule.log).not.toHaveBeenCalled();
-    expect(exitMock).not.toHaveBeenCalled();
-  });
 });
diff --git a/packages/plugin-agent/README.md b/packages/plugin-agent/README.md
index d2727073d6b..4bb44365307 100644
--- a/packages/plugin-agent/README.md
+++ b/packages/plugin-agent/README.md
@@ -27,14 +27,13 @@ When enabled, the plugin writes:
 - `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` for machine-readable review
 - copied run logs and other artifacts under `artifacts/`
 - `AGENTS.md` with guidance for consuming the directory
-- `manifest/expected.json` when `ALLURE_AGENT_EXPECTATIONS` is provided
-- `project/docs/allure-agent-mode.md` when the project has a guide at `docs/allure-agent-mode.md`
+- `manifest/expected.json` when inline flags, `--expectations <file>`, or plugin options provide expectations
 
 If no output directory is configured, the plugin does nothing.
 
 The plugin stays read-only by design. A separate harness layer can consume the
 generated manifests, plan enrichment work, and decide whether a rerun is ready to
-accept. See [the enrichment loop guide](../../docs/agent_enrichment_loop.md).
+accept.
 
 ## Verification Standard
 
@@ -42,19 +41,34 @@ accept. See [the enrichment loop guide](../../docs/agent_enrichment_loop.md).
 - Use `allure agent` for smoke checks too, even when the change is small or mechanical.
 - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
 
-## Skills-First Workflow
+## CLI Capability Workflow
 
-The downstream workflow is intended to be skills-first:
+The installed CLI help is the local contract for agent mode. When an agent needs
+to choose supported commands or flags, detect the local CLI surface first:
 
-1. install the Allure skills bundle
-2. run the setup skill in a project
-3. let the setup skill create or update root `AGENTS.md`
-4. let the setup skill create `docs/allure-agent-mode.md`
-5. use Allure agent-mode in future test work through the project guide plus per-run manifests
+```shell
+allure --version
+allure agent capabilities --json
+allure agent --help
+allure agent query --help
+allure agent select --help
+allure agent latest --help
+allure agent state-dir --help
+```
 
-Every generated run includes an `AGENTS.md` playbook. When the project has
-`docs/allure-agent-mode.md`, the run output also copies that guide and tells agents
-to read it first.
+`allure agent capabilities --json` is the structured local contract for agents.
+`allure agent --help` includes the human-readable command task map. Each
+agent-mode command names the loop it supports, the problem signal that calls for
+it, and the task the agent should perform with it. For example, `allure agent
+latest` belongs to output recovery, `allure agent state-dir` belongs to tooling
+diagnosis, `allure agent query` belongs to output inspection,
+`allure agent select` belongs to rerun planning, and `--rerun-*` belongs to
+focused retry loops.
+
+Every generated run includes an `AGENTS.md` playbook with the same stable
+artifact-reading order, command task map, workflow guidance, and remediation
+rules. Reusable skills and common knowledge files should not hard-code
+version-specific flags; they should ask the local CLI when support is unclear.
 
 ## Install
 
@@ -90,30 +104,28 @@ The preferred CLI entrypoint is:
 npx allure agent -- npm test
 ```
 
-You can provide an explicit expectations file and output directory when you need deterministic paths:
+You can provide compact inline expectations for the common review path:
 
 ```shell
 npx allure agent \
-  --output ./out/agent-report \
-  --expectations ./out/agent-expected.yaml \
-  -- npm test
+	  --goal "Review feature A" \
+	  --expect-tests 3 \
+	  --expect-label feature=feature-a \
+	  --expect-step-containing "validate feature A" \
+	  --expect-steps 1 \
+	  -- npm test
 ```
 
-That command uses an agent-only profile by default, so configured presentation and export plugins such as Awesome, Dashboard, or TestOps are ignored for that run.
-
-You can also enable the plugin through lower-level environment variables when you need direct env control:
+Use an explicit expectations file and output directory when inline flags become awkward or you need deterministic paths:
 
 ```shell
-ALLURE_AGENT_OUTPUT=./out/agent-report npx allure run -- npm test
+npx allure agent \
+  --output ./out/agent-report \
+  --expectations ./out/agent-expected.yaml \
+  -- npm test
 ```
 
-To compare the run against an intended scope, provide an expectations file:
-
-```shell
-ALLURE_AGENT_OUTPUT=./out/agent-report \
-ALLURE_AGENT_EXPECTATIONS=./out/agent-expected.yaml \
-npx allure run -- npm test
-```
+That command uses an agent-only profile by default, so configured presentation and export plugins such as Awesome, Dashboard, or TestOps are ignored for that run.
 
 ## Options
 
@@ -121,19 +133,14 @@ The plugin accepts the following options:
 
 | Option | Description | Type | Default |
 |--------|-------------|------|---------|
-| `outputDir` | Directory where the markdown report will be written. Relative paths are resolved from the `allure` process working directory | `string` | `ALLURE_AGENT_OUTPUT` |
-
-## Environment Variables
-
-| Variable | Description |
-|----------|-------------|
-| `ALLURE_AGENT_OUTPUT` | Directory where the agent output should be written when `outputDir` is not set |
-| `ALLURE_AGENT_EXPECTATIONS` | Optional path to a YAML or JSON file describing expected and forbidden test scope |
-| `ALLURE_AGENT_COMMAND` | The executed command string recorded in `manifest/run.json` and `index.md` |
-| `ALLURE_AGENT_NAME` | Optional agent identifier recorded in `manifest/run.json` |
-| `ALLURE_AGENT_LOOP_ID` | Optional loop identifier recorded in `manifest/run.json` |
-| `ALLURE_AGENT_TASK_ID` | Optional task identifier recorded in `manifest/run.json` |
-| `ALLURE_AGENT_CONVERSATION_ID` | Optional conversation identifier recorded in `manifest/run.json` |
+| `outputDir` | Directory where the markdown report will be written. Relative paths are resolved from the `allure` process working directory | `string` | none |
+| `expectationsPath` | Path to a YAML or JSON file describing expected and forbidden test scope | `string` | none |
+| `expectations` | Inline expectations object. Use either `expectationsPath` or `expectations`, not both | `AgentExpectationsInput` | none |
+| `command` | Executed command string recorded in `manifest/run.json` and `index.md` | `string` | none |
+| `agentName` | Optional agent identifier recorded in `manifest/run.json` | `string` | none |
+| `loopId` | Optional loop identifier recorded in `manifest/run.json` | `string` | none |
+| `taskId` | Optional task identifier recorded in `manifest/run.json` | `string` | expectations task id |
+| `conversationId` | Optional conversation identifier recorded in `manifest/run.json` | `string` | none |
 
 ## Manifest Contract
 
@@ -148,8 +155,7 @@ The plugin emits a hybrid output:
   - `manifest/test-events.jsonl`
   - `manifest/tests.jsonl`
   - `manifest/findings.jsonl`
-  - `manifest/expected.json` when an expectations file is provided
-  - `project/docs/allure-agent-mode.md` when the project guide is available
+  - `manifest/expected.json` when expectations are provided
 
 `index.md` is the landing page for the run. It includes run identity, expected scope,
 advisory check summary, process logs, and grouped test links.
@@ -162,10 +168,20 @@ Each test markdown file includes:
 - retry history
 - advisory findings and rerun guidance when evidence is weak
 
+## Expectations
+
+The preferred `allure agent` workflow uses inline flags:
+
+- `--goal <text>` records the review intent.
+- `--expect-tests <count>` checks visible logical test count.
+- `--expect-label name=value`, `--expect-env <id>`, `--expect-test "<fullName>"`, and `--expect-prefix <prefix>` define expected scope. For a newly added test, use `--expect-test "<fullName>"` so a missing reported test becomes an explicit finding.
+- `--expect-step-containing <text>`, `--expect-steps <count>`, `--expect-attachments <count>`, and `--expect-attachment <name|name=value|content-type=value>` define evidence expectations per evidence-target logical test.
+
+The plugin normalizes inline expectations into `manifest/expected.json`.
+
 ## Expectations File
 
-When `ALLURE_AGENT_EXPECTATIONS` is set, the plugin accepts YAML or JSON, normalizes
-it into `manifest/expected.json`, and compares the run against it.
+When `--expectations <file>` or the plugin `expectationsPath` option is set, the plugin accepts YAML or JSON, normalizes it into `manifest/expected.json`, and compares the run against it.
 
 Expected top-level fields:
 
@@ -173,6 +189,7 @@ Expected top-level fields:
 goal: Validate feature A
 task_id: feature-a
 expected:
+  test_count: 3
   environments:
     - default
   full_names:
@@ -197,23 +214,27 @@ notes:
 Selectors are advisory. The plugin does not fail the run; it records findings in
 markdown and `manifest/findings.jsonl`.
 
-## Review Loop
+## Agent Workflow Pattern
 
-The intended usage pattern is:
+Use the smallest workflow that matches the task. For the common change-validation path:
 
-1. Run tests with `allure agent -- <command>`.
+1. Run tests with `allure agent --goal <text> --expect-test "<fullName>" --expect-label name=value --expect-step-containing <text> -- <command>`.
 2. Watch `manifest/run.json` and `manifest/test-events.jsonl` while the run is active.
 3. Review `index.md` plus the manifest files.
 4. If evidence is weak, add steps, attachments, labels, or parameters.
-5. Rerun the same scope with the same expectations file.
+5. Rerun the same scope with the same expectations.
 6. Accept the run or iterate based on advisory findings.
 
+When a prior agent run already captured failed tests, prefer
+`allure agent --rerun-latest --rerun-preset failed -- <command>` or
+`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>`
+instead of spending context reconstructing runner-specific test names.
+
 For small mechanical test changes, use a scoped agent-mode run for the smoke check
 too. Plain runner commands should be reserved for cases where agent mode is
 impossible or when you are debugging agent mode itself.
 
-For grouped coverage reviews, prefer one temp output directory and one expectations
-file per scope instead of trying to review a whole command matrix from a single run.
+For grouped coverage reviews, prefer one scoped expectation set per group instead of trying to review a whole command matrix from a single run.
 
 ## Test Enrichment Best Practices
 
@@ -246,25 +267,12 @@ When agent output does not fully model runner-visible failures:
 - treat the review as partial when suite-load, import, or setup failures are visible outside logical test files
 - keep console-only conclusions provisional until the missing modeling is understood
 
-## Project Guide
-
-Projects using the skills flow should keep a short root `AGENTS.md` and a deeper
-`docs/allure-agent-mode.md`.
-
-`AGENTS.md` should route all test-related work to the deeper guide. The deeper guide
-should explain:
-
-- the feature-delivery loop
-- the metadata-enrichment loop
-- per-run temp expectations and output paths
-- meaningful evidence rules
-- minimal metadata rules
-- future loops like flaky, known-issue, mute, and quality gates
-
 ## Copyable Agent Instructions
 
 The generated `AGENTS.md` already contains this guidance for each run. If you want
-the same policy in a project-level skill or agent prompt, you can start with:
+the same stable policy in a reusable skill or agent prompt, keep version-specific
+CLI details out of that reusable body and make agents inspect local CLI help when
+support is unclear:
 
 ```md
 ## Allure Agent Mode Instructions
@@ -273,15 +281,13 @@ the same policy in a project-level skill or agent prompt, you can start with:
 - Use `allure agent` for smoke checks too, even when the change is small or mechanical.
 - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
 - After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.
-- Use `allure agent latest` to reopen the newest run when `--output` was omitted.
-- Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.
-- Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.
-- Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests.
-- Use `--rerun-preset review|failed|unsuccessful|all`, repeated `--rerun-environment <id>`, and repeated `--rerun-label name=value` when you need a narrower rerun selection from the previous output.
-- Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.
-- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control.
-- Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted rerun.
-- Run tests with `ALLURE_AGENT_OUTPUT` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
+- Use `allure --version`, `allure agent capabilities --json`, and `allure agent --help` before choosing flags when the local CLI surface is unknown.
+- Use `allure agent latest` to print the newest output directory and `index.md` path when `--output` was omitted.
+- Use `allure agent latest`, `state-dir`, `query`, `select`, and `--rerun-*` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.
+- Use `allure agent query --latest summary|tests|findings|test` or `allure agent query --from <output-dir> ...` to inspect prior output as focused JSON before manually opening raw manifests.
+- Use `allure agent select --from <output-dir> --output <file>` when you want the CLI to write the test plan and print a short summary with the file path, source output, preset, and selected count.
+- When rerunning previous failures, use `allure agent --rerun-latest --rerun-preset failed -- <command>` or `allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>` instead of manually rebuilding runner-specific test names.
+- Run tests with `allure agent` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
 - Enrich only the intended tests. Add real steps for real setup, actions, and assertions.
 - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.
 - Keep metadata minimal. Add labels or severity only when scope review, debugging, or quality policy uses them.
@@ -303,7 +309,7 @@ import {
 ```
 
 - `buildAgentExpectations(...)` converts a goal plus target/forbidden selectors into
-  the JSON shape expected by `ALLURE_AGENT_EXPECTATIONS`.
+  the expectations shape accepted by inline flags, expectations files, and the plugin expectations option.
 - `loadAgentOutput(...)` reads `manifest/run.json`, `manifest/tests.jsonl`, and
   `manifest/findings.jsonl`.
 - `planAgentEnrichmentReview(...)` maps `check_name` values to enrichment actions
@@ -325,5 +331,5 @@ The enrichment loop should add only real runtime evidence:
 Avoid dummy enrichment such as empty wrapper steps, placeholder `"passed"` text
 attachments, or labels that are never used downstream.
 
-For a fuller policy, remediation mapping, and JS/Vitest examples based on the
-existing sandbox tests, see [the enrichment loop guide](../../docs/agent_enrichment_loop.md).
+For remediation mapping and JS/Vitest examples based on the existing sandbox
+tests, inspect the package tests and generated run `AGENTS.md` guidance.
diff --git a/packages/plugin-agent/src/capabilities.ts b/packages/plugin-agent/src/capabilities.ts
new file mode 100644
index 00000000000..ff822c9af2e
--- /dev/null
+++ b/packages/plugin-agent/src/capabilities.ts
@@ -0,0 +1,178 @@
+export const AGENT_CAPABILITIES_SCHEMA = "allure-agent-capabilities/v1";
+
+export const createAgentCapabilities = () =>
+  ({
+    schema: AGENT_CAPABILITIES_SCHEMA,
+    commands: {
+      help: {
+        supported: true,
+        usage: "allure --version; allure agent --help; allure agent capabilities",
+        output: ["human", "json"],
+      },
+      run: {
+        supported: true,
+        usage: "allure agent [options] -- <command>",
+        options: [
+          "--config",
+          "--cwd",
+          "--output",
+          "--expectations",
+          "--goal",
+          "--task-id",
+          "--expect-tests",
+          "--expect-label",
+          "--expect-env",
+          "--expect-test",
+          "--expect-prefix",
+          "--expect-step-containing",
+          "--forbid-label",
+          "--expect-steps",
+          "--expect-attachments",
+          "--expect-attachment",
+          "--environment",
+          "--environment-name",
+          "--silent",
+          "--rerun-from",
+          "--rerun-latest",
+          "--rerun-preset",
+          "--rerun-environment",
+          "--rerun-label",
+        ],
+      },
+      latest: {
+        supported: true,
+        usage: "allure agent latest [--cwd <dir>]",
+        output: ["agent output: <dir>", "agent index: <dir>/index.md"],
+      },
+      stateDir: {
+        supported: true,
+        usage: "allure agent state-dir [--cwd <dir>]",
+        environmentVariable: "ALLURE_AGENT_STATE_DIR",
+      },
+      select: {
+        supported: true,
+        usage: "allure agent select (--latest | --from <output-dir>) [options]",
+        presets: ["review", "failed", "unsuccessful", "all"],
+        filters: ["environment", "label"],
+        output: ["stdout-testplan-json", "file-testplan-json", "file-summary"],
+      },
+      query: {
+        supported: true,
+        usage: "allure agent query (--latest | --from <output-dir>) [summary|tests|findings|test] [options]",
+        views: ["summary", "tests", "findings", "test"],
+        filters: ["status", "environment", "label", "severity", "category", "check", "test"],
+        output: ["json"],
+      },
+      rerun: {
+        supported: true,
+        usage: "allure agent (--rerun-latest | --rerun-from <output-dir>) [filters] -- <command>",
+        presets: ["review", "failed", "unsuccessful", "all"],
+        filters: ["environment", "label"],
+        transport: "ALLURE_TESTPLAN_PATH",
+      },
+    },
+    expectations: {
+      inline: {
+        supported: true,
+        goal: true,
+        taskId: true,
+        expected: {
+          testCount: true,
+          labels: true,
+          environments: true,
+          fullNames: true,
+          fullNamePrefixes: true,
+        },
+        forbidden: {
+          labels: true,
+          environments: false,
+          fullNames: false,
+          fullNamePrefixes: false,
+        },
+        evidence: {
+          stepNameContains: true,
+          minSteps: true,
+          minAttachments: true,
+          attachmentFilters: ["name", "content-type"],
+        },
+      },
+      file: {
+        supported: true,
+        formats: ["yaml", "json"],
+      },
+    },
+    output: {
+      automaticTempDirectory: true,
+      explicitOutputOption: "--output <dir>",
+      schema: "allure-agent-output/v1",
+      files: [
+        "index.md",
+        "AGENTS.md",
+        "manifest/run.json",
+        "manifest/test-events.jsonl",
+        "manifest/tests.jsonl",
+        "manifest/findings.jsonl",
+        "manifest/expected.json",
+        "tests/<environment>/<slug>.md",
+        "artifacts/global/",
+      ],
+    },
+    unsupported: {
+      discovery: true,
+      configureIntegration: true,
+      executionSignal: true,
+      compare: true,
+      flaky: true,
+      duplicates: true,
+      stale: true,
+      suppressions: true,
+      observe: true,
+      interrupt: true,
+      localAgentService: true,
+      expectationControls: ["--expect-evidence"],
+    },
+  }) as const;
+
+export const AGENT_TASK_MAP_HELP = `Agent task map:
+  allure --version
+  allure agent --help
+  allure agent capabilities
+      Setup and capability detection. Use when the local CLI surface is unknown,
+      generated guidance may be stale, or an agent needs supported flags without
+      guessing.
+
+  allure agent --goal ... -- <command>
+      Run a test command with runtime evidence, scope expectations, and
+      agent-readable artifacts for review, debugging, smoke checks, or validation.
+
+  allure agent latest
+      Recover the newest agent output directory and index.md when --output was
+      omitted or a follow-up task needs the previous run.
+
+  allure agent state-dir
+      Show where project-scoped latest-run pointers are stored. Useful when
+      latest cannot find a run or CI/sandbox state looks wrong.
+
+  allure agent select --latest
+  allure agent select --from <output-dir>
+      Inspect/filter prior results and write an Allure test plan before rerun.
+
+  allure agent query --latest summary
+  allure agent query --from <output-dir> tests
+  allure agent query --from <output-dir> findings
+      Inspect prior agent output as focused JSON without manually loading raw
+      manifests. Use for summaries, filtered test lists, findings, or one test.
+
+  allure agent --rerun-latest -- <command>
+  allure agent --rerun-from <output-dir> -- <command>
+      Rerun the failed, unsuccessful, or selected tests from prior agent output
+      through Allure test plan support.
+
+Environment:
+  ALLURE_AGENT_STATE_DIR=<dir>
+      Override the project-scoped state directory. Useful in CI, sandboxes, or
+      multi-job setups that need a deterministic shared state location.
+`;
+
+export const isAgentTaskMapHelpRequest = (args: string[]) =>
+  args.length === 2 && args[0] === "agent" && (args[1] === "--help" || args[1] === "-h");
diff --git a/packages/plugin-agent/src/errors.ts b/packages/plugin-agent/src/errors.ts
new file mode 100644
index 00000000000..d4f38e9b576
--- /dev/null
+++ b/packages/plugin-agent/src/errors.ts
@@ -0,0 +1,21 @@
+export class AgentUsageError extends Error {
+  constructor(message: string) {
+    super(message);
+    this.name = "AgentUsageError";
+  }
+}
+
+export class AgentExpectationUsageError extends AgentUsageError {
+  readonly sourceOption?: string;
+
+  constructor(message: string, sourceOption?: string) {
+    super(message);
+    this.name = "AgentExpectationUsageError";
+    this.sourceOption = sourceOption;
+  }
+}
+
+export const isAgentUsageError = (error: unknown): error is AgentUsageError => error instanceof AgentUsageError;
+
+export const isAgentExpectationUsageError = (error: unknown): error is AgentExpectationUsageError =>
+  error instanceof AgentExpectationUsageError;
diff --git a/packages/plugin-agent/src/guidance.ts b/packages/plugin-agent/src/guidance.ts
index 0a83960dbf7..d6fe062e255 100644
--- a/packages/plugin-agent/src/guidance.ts
+++ b/packages/plugin-agent/src/guidance.ts
@@ -15,12 +15,28 @@ export type EnrichmentActionDefinition = {
 };
 
 export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDefinition> = {
-  "invalid-expectations-file": {
+  "expectations-invalid": {
     category: "bootstrap-allure",
-    title: "Repair the expectations file",
-    guidance: "Regenerate a valid YAML or JSON expectations file before the next enrichment iteration.",
+    title: "Repair the expectations input",
+    guidance: "Regenerate valid inline expectations or a valid YAML/JSON expectations file before the next iteration.",
   },
-  "no-visible-tests": {
+  "expectations-empty": {
+    category: "narrow-test-scope",
+    title: "Add recognized expectation controls",
+    guidance: "Rerun with supported M1 expectation controls or omit expectations for an intentionally broad review.",
+  },
+  "expectations-unsupported-control": {
+    category: "review-manually",
+    title: "Use supported expectation controls",
+    guidance: "Replace unsupported controls with supported M1 flags or report weaker checking explicitly.",
+  },
+  "expectations-weak-goal": {
+    category: "review-manually",
+    title: "Use a more specific goal next time",
+    guidance:
+      "Base conclusions on observed evidence and rerun with a specific goal when expectation precision matters.",
+  },
+  "no-tests-observed": {
     category: "bootstrap-allure",
     title: "Restore Allure result generation",
     guidance: "Make sure the test command emits Allure results before rerunning the enrichment loop.",
@@ -42,22 +58,27 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDe
     guidance:
       "Compare run statistics with the logical test files and document any skipped or non-passed results that were not rendered.",
   },
-  "missing-expected-test": {
+  "expected-test-missing": {
     category: "narrow-test-scope",
     title: "Bring the intended test back into scope",
     guidance: "Regenerate expectations and rerun only the planned tests or selectors.",
   },
-  "missing-expected-prefix": {
+  "expected-count-mismatch": {
+    category: "narrow-test-scope",
+    title: "Restore the expected visible test count",
+    guidance: "Check the command, selectors, and agent modeling before accepting the run.",
+  },
+  "expected-prefix-missing": {
     category: "narrow-test-scope",
     title: "Restore the intended name-prefix scope",
     guidance: "Check the selector and rerun only the feature slice that should have matched it.",
   },
-  "missing-expected-environment": {
+  "expected-environment-missing": {
     category: "narrow-test-scope",
     title: "Rerun the intended environment",
     guidance: "Constrain the rerun to the expected environment before accepting the result.",
   },
-  "missing-expected-label-selector": {
+  "expected-label-missing": {
     category: "repair-test-metadata",
     title: "Add the minimal missing scope label",
     guidance: "Only add the labels required by the expectations selector; do not inflate metadata.",
@@ -67,6 +88,11 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDe
     title: "Remove unrelated environments from the rerun",
     guidance: "Tighten the rerun selector so unrelated environments do not appear in agent output.",
   },
+  "forbidden-label-observed": {
+    category: "narrow-test-scope",
+    title: "Stop forbidden labeled tests from running",
+    guidance: "Reject the run, narrow the rerun scope, and keep the forbidden label expectation.",
+  },
   "forbidden-selector-match": {
     category: "narrow-test-scope",
     title: "Stop forbidden tests from running",
@@ -92,6 +118,26 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDe
     title: "Add meaningful setup, action, and assertion steps",
     guidance: "Wrap only real actions, state transitions, and checks in Allure steps before rerunning.",
   },
+  "expected-step-containing-missing": {
+    category: "add-meaningful-steps",
+    title: "Add or correct the expected step text",
+    guidance: "Expose the expected runtime check as a test-scoped Allure step, or correct the expectation wording.",
+  },
+  "insufficient-expected-steps": {
+    category: "add-meaningful-steps",
+    title: "Add the expected meaningful steps",
+    guidance: "Expose real setup, action, state transition, and assertion steps without adding filler.",
+  },
+  "insufficient-expected-attachments": {
+    category: "add-test-attachments",
+    title: "Add the expected runtime attachments",
+    guidance: "Attach focused runtime evidence such as payloads, logs, screenshots, diffs, or traces.",
+  },
+  "missing-expected-attachment": {
+    category: "add-test-attachments",
+    title: "Add the required attachment",
+    guidance: "Attach the requested runtime artifact near the relevant action or assertion.",
+  },
   "failed-without-attachments": {
     category: "add-test-attachments",
     title: "Attach focused runtime evidence near the failure",
@@ -132,18 +178,131 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record<string, EnrichmentActionDe
   },
 };
 
-export const AGENT_ENRICHMENT_WORKFLOW = [
-  "Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted enrichment iteration.",
-  "Run tests with `allure agent --output <dir> --expectations <file> -- <command>`.",
-  "After each test run, print the `index.md` path from that output directory so collaborators can open the run overview quickly.",
-  "Use `allure agent latest` to recover the newest output directory when a prior run omitted `--output`.",
-  "Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.",
-  "Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.",
-  "Use `allure agent select --latest` or `allure agent select --from <output-dir>` to inspect the review-targeted test plan before rerunning.",
-  "Use `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` to rerun only the selected tests through Allure testplan support. Add `--rerun-preset`, repeated `--rerun-environment`, or repeated `--rerun-label name=value` filters when you need a narrower rerun slice.",
-  "Inspect `manifest/run.json`, tail `manifest/test-events.jsonl`, then review `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before editing tests.",
-  "Enrich only the intended tests, rerun the same scope, and compare the rerun against `manifest/expected.json` when present.",
-  "Accept the rerun only when scope is clean, evidence is strong enough to review, and no high-confidence dummy findings remain.",
+export const AGENT_WORKFLOWS_MARKDOWN = `Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done.
+
+### Validate A Change
+
+Use when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.
+
+Commands:
+
+- \`allure agent --goal <text> --expect-* -- <command>\`
+
+Done when:
+
+- the expected scope ran and no forbidden scope appeared
+- \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed
+- the \`index.md\` path was reported
+- the changed package build and required static checks passed when this repository guide requires them
+
+### Add Or Update Tests
+
+Use when creating or changing tests for a feature, fix, or behavior gap.
+
+Commands:
+
+- \`allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>" --expect-label name=value --expect-step-containing <text> -- <command>\`
+
+Done when:
+
+- the tests prove the intended behavior rather than only touching the code path
+- scope expectations match the intended feature, issue, or package slice
+- each expected test has enough steps or attachments for a reviewer to understand what happened
+- weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope
+
+### Review Existing Coverage
+
+Use when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first.
+
+Commands:
+
+- one scoped \`allure agent --goal <text> --expect-* -- <command>\` run per review group
+
+Done when:
+
+- the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run
+- each group has expectations that describe the intended scope
+- runtime artifacts are reviewed before source-only coverage conclusions
+- uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary
+
+### Triage Failures
+
+Use when tests failed, broke, or runner output does not match agent artifacts.
+
+Commands:
+
+- \`allure agent latest\`
+- \`allure agent --rerun-latest --rerun-preset failed -- <command>\`
+- \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\`
+
+Done when:
+
+- failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly
+- \`artifacts/global/stderr.txt\` and global errors were checked when failures are missing from \`manifest/tests.jsonl\`
+- reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan
+
+### Rerun A Prior Scope
+
+Use when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused.
+
+Commands:
+
+- \`allure agent select --latest [--preset review|failed|unsuccessful|all]\`
+- \`allure agent select --from <output-dir> [--environment <id>] [--label name=value]\`
+- \`allure agent --rerun-latest -- <command>\`
+- \`allure agent --rerun-from <output-dir> -- <command>\`
+
+Done when:
+
+- the rerun scope comes from Allure testplan support
+- \`--rerun-preset\`, \`--rerun-environment\`, or \`--rerun-label\` filters explain any narrowed selection
+- manual test names are used only as a fallback when testplan support is unavailable
+- the rerun output is reviewed before making a new conclusion
+
+### Improve Evidence Quality
+
+Use when tests pass or fail but the runtime story is too weak to review.
+
+Commands:
+
+- \`allure agent --expect-step-containing <text> --expect-steps <count> --expect-attachments <count> -- <command>\`
+- \`allure agent --expect-attachment <name|name=value|content-type=value> -- <command>\`
+
+Done when:
+
+- steps describe real setup, actions, state transitions, or assertions
+- attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces
+- placeholder steps, generic \`"passed"\` attachments, and other dummy evidence are removed
+- the same intended scope was rerun and no high-confidence evidence findings remain
+
+### Recover Or Diagnose Agent Mode
+
+Use when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox.
+
+Commands:
+
+- \`allure --version\`
+- \`allure agent capabilities --json\`
+- \`allure agent --help\`
+- \`allure agent latest\`
+- \`allure agent state-dir\`
+- \`ALLURE_AGENT_STATE_DIR=<dir>\`
+
+Done when:
+
+- supported local commands and flags are known from capabilities or help output
+- the output directory, \`index.md\` path, or state directory is identified, or the reason it is unavailable is documented
+- console-only conclusions stay provisional until agent-mode artifacts are available`;
+
+export const AGENT_COMMAND_TASK_MAP = [
+  "`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.",
+  "`allure agent --goal ... -- <command>`: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone.",
+  "`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, or a follow-up task needs prior output before selecting or rerunning tests.",
+  "`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where project-scoped latest pointers are stored.",
+  "`allure agent query --latest summary|tests|findings|test` / `allure agent query --from <output-dir> ...`: output inspection loop. Use when you need a focused JSON summary, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.",
+  "`allure agent select --latest` / `allure agent select --from <output-dir>`: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.",
+  "`allure agent --rerun-latest` / `allure agent --rerun-from <output-dir>`: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.",
+  "`ALLURE_AGENT_STATE_DIR=<dir>`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable.",
 ] as const;
 
 export const AGENT_VERIFICATION_RULES = [
@@ -154,18 +313,6 @@ export const AGENT_VERIFICATION_RULES = [
   "After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.",
 ] as const;
 
-export const AGENT_SMALL_TEST_CHANGE_WORKFLOW = [
-  "Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.",
-  "Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.",
-  "Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim.",
-] as const;
-
-export const AGENT_COVERAGE_REVIEW_WORKFLOW = [
-  "Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.",
-  "Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.",
-  "Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad.",
-] as const;
-
 export const AGENT_TEST_ENRICHMENT_BEST_PRACTICES = [
   "Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.",
   "Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.",
@@ -205,14 +352,17 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
 - Use \`allure agent\` for smoke checks too, even when the change is small or mechanical.
 - Only skip agent mode when it is impossible or when you are debugging agent mode itself.
 - After each agent-mode test run, print the \`index.md\` path from that run's output directory so users can open the run overview quickly.
-- Use \`ALLURE_AGENT_*\` with \`allure run\` only as the lower-level fallback when you need direct environment control.
-- Use \`allure agent latest\` to reopen the newest run when \`--output\` was omitted.
+- Use \`allure agent latest\` to print the newest output directory and \`index.md\` path when \`--output\` was omitted.
+- Use \`allure agent capabilities --json\` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text.
 - Use \`allure agent state-dir\` to inspect where the current project stores its latest-agent state.
-- Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning.
+- Use \`allure agent latest\`, \`state-dir\`, \`query\`, \`select\`, and \`--rerun-*\` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands.
+- Use \`allure agent query --latest summary|tests|findings|test\` or \`allure agent query --from <output-dir> ...\` to inspect prior output as focused JSON before manually opening raw manifests.
+- Use \`allure agent select --latest\` or \`allure agent select --from <output-dir>\` to inspect the review-targeted test plan before rerunning; add \`--output <file>\` when you want the CLI to write the plan and print a short selection summary.
 - Use \`allure agent --rerun-latest -- <command>\` or \`allure agent --rerun-from <output-dir> -- <command>\` to rerun only the selected tests.
+- When rerunning previous failures, use \`allure agent --rerun-latest --rerun-preset failed -- <command>\` or \`allure agent --rerun-from <output-dir> --rerun-preset failed -- <command>\` instead of manually rebuilding runner-specific test names.
 - Use \`--rerun-preset review|failed|unsuccessful|all\`, repeated \`--rerun-environment <id>\`, and repeated \`--rerun-label name=value\` when you need a narrower rerun selection from the previous output.
 - Use \`ALLURE_AGENT_STATE_DIR\` when you need to override where the current project stores latest-agent state for \`latest\`, \`state-dir\`, or \`--rerun-latest\`.
-- Generate or refresh \`ALLURE_AGENT_EXPECTATIONS\` before each targeted rerun.
+- Prefer inline \`allure agent\` expectation flags such as \`--goal\`, \`--expect-tests\`, \`--expect-test\`, \`--expect-label\`, and \`--expect-step-containing\`; use \`--expectations <file>\` only when flags become awkward.
 - Run tests with \`allure agent\` and review \`manifest/run.json\`, \`manifest/test-events.jsonl\`, \`index.md\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\`.
 - Enrich only the intended tests. Add real steps for real setup, actions, and assertions.
 - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.
@@ -222,32 +372,21 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions
 
 const renderBullets = (items: readonly string[]) => items.map((item) => `- ${item}`).join("\n");
 
-const renderNumbered = (items: readonly string[]) => items.map((item, index) => `${index + 1}. ${item}`).join("\n");
-
 const renderRemediationGuide = () =>
   Object.entries(ENRICHMENT_ACTIONS_BY_CHECK_NAME)
     .map(([checkName, action]) => `- \`${checkName}\`: ${action.title}. ${action.guidance}`)
     .join("\n");
 
-export const renderAgentsGuide = (projectGuidePath?: string) =>
+export const renderAgentsGuide = () =>
   `# AGENTS Guide
 
 ## Reading Order
 
-${
-  projectGuidePath
-    ? `1. Read [project guidance](${projectGuidePath}) first for repo-specific testing conventions and loop expectations.
-2. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
-3. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
-4. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
-5. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
-6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`
-    : `1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
+1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary.
 2. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active.
 3. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings.
 4. Open the relevant file under \`tests/<environment>/<historyId-or-trId>.md\` for evidence review.
-5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.`
-}
+5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.
 
 ## Directory Contract
 
@@ -256,28 +395,23 @@ ${
 - \`manifest/test-events.jsonl\` is the append-only live event stream for machine consumers during the run.
 - \`manifest/tests.jsonl\` contains one logical test summary per line.
 - \`manifest/findings.jsonl\` contains one advisory finding per line.
-- \`manifest/expected.json\` is copied from \`ALLURE_AGENT_EXPECTATIONS\` when provided.
-- \`project/docs/allure-agent-mode.md\` is copied from the project when available so each run keeps the guide used for that execution.
+- \`manifest/expected.json\` contains normalized expectations from inline flags or \`--expectations <file>\` when provided.
 - \`tests/<environment>/<slug>.md\` contains one logical test per file.
 - Retries from the same run are nested inside the same logical test file.
 - \`tests/<environment>/<slug>.assets/\` contains copied attachments for that logical test.
 - \`artifacts/global/\` contains copied global artifacts for the whole run.
 
-## Enrichment Loop Workflow
+## Command Task Map
 
-${renderNumbered(AGENT_ENRICHMENT_WORKFLOW)}
+${renderBullets(AGENT_COMMAND_TASK_MAP)}
 
-## Verification Standard
+## Agent Workflows
 
-${renderBullets(AGENT_VERIFICATION_RULES)}
+${AGENT_WORKFLOWS_MARKDOWN}
 
-## Small Test Change Workflow
-
-${renderNumbered(AGENT_SMALL_TEST_CHANGE_WORKFLOW)}
-
-## Coverage Review Workflow
+## Verification Standard
 
-${renderNumbered(AGENT_COVERAGE_REVIEW_WORKFLOW)}
+${renderBullets(AGENT_VERIFICATION_RULES)}
 
 ## Test Enrichment Best Practices
 
diff --git a/packages/plugin-agent/src/harness.ts b/packages/plugin-agent/src/harness.ts
index ccef9cfcb16..333fefce79a 100644
--- a/packages/plugin-agent/src/harness.ts
+++ b/packages/plugin-agent/src/harness.ts
@@ -10,6 +10,15 @@ export type AgentFindingCategory = "bootstrap" | "scope" | "metadata" | "evidenc
 export type AgentScopeMatch = "match" | "unexpected" | "forbidden" | "unknown";
 export type AgentAcceptanceStatus = "accept" | "iterate" | "reject";
 export type AgentAcceptanceImpact = "advisory" | "iterate" | "reject";
+export type AgentExpectationResultStatus =
+  | "matched"
+  | "failed"
+  | "partial"
+  | "degraded"
+  | "unsupported"
+  | "unavailable"
+  | "not_requested";
+export type AgentExpectationResultImpact = "accept" | "reject" | "iterate" | "advisory";
 export type AgentEnrichmentActionCategory = EnrichmentActionCategory;
 
 export type AgentExpectationSelector = {
@@ -17,6 +26,18 @@ export type AgentExpectationSelector = {
   full_names?: string[];
   full_name_prefixes?: string[];
   label_values?: Record<string, string | string[]>;
+  test_count?: number;
+};
+
+export type AgentEvidenceExpectations = {
+  required?: boolean;
+  min_steps?: number;
+  min_attachments?: number;
+  step_name_contains?: string[];
+  attachments?: Array<{
+    name?: string;
+    content_type?: string;
+  }>;
 };
 
 export type AgentExpectations = {
@@ -24,6 +45,7 @@ export type AgentExpectations = {
   task_id?: string;
   expected?: AgentExpectationSelector;
   forbidden?: AgentExpectationSelector;
+  evidence?: AgentEvidenceExpectations;
   notes?: string[];
 };
 
@@ -107,7 +129,6 @@ export type AgentRunManifest = {
     findings_manifest: string;
     test_events_manifest?: string;
     expected_manifest: string | null;
-    project_guide: string | null;
     process_logs: {
       stdout: string | null;
       stderr: string | null;
@@ -158,6 +179,28 @@ export type AgentRunManifest = {
     };
   };
   expectations_present: boolean;
+  expectations: AgentExpectations | null;
+  expectation_result: {
+    schema_version: "allure-agent-expectation-result/v1";
+    status: AgentExpectationResultStatus;
+    impact: AgentExpectationResultImpact;
+    source: {
+      kind: "inline" | "file" | "none";
+      path: string | null;
+    };
+    recognized_control_count: number;
+    unsupported_controls: string[];
+    degraded_controls: string[];
+    summary: {
+      expected_tests: number;
+      observed_tests: number;
+      missing_expected: number;
+      forbidden_observed: number;
+      unexpected_observed: number;
+      evidence_mismatches: number;
+    };
+    finding_ids: string[];
+  };
   check_summary: {
     total: number;
     countsBySeverity: Record<AgentFindingSeverity, number>;
@@ -195,17 +238,48 @@ export type AgentTestManifestLine = {
 };
 
 export type AgentFindingManifestLine = {
+  schema_version?: "allure-agent-finding/v2";
+  check_id?: string;
+  instance_id?: string;
   finding_id: string;
-  subject: string;
+  subject:
+    | string
+    | {
+        type: "run" | "test" | "environment" | "attachment" | "global";
+        id?: string;
+        path?: string;
+        full_name?: string;
+        environment?: string;
+      };
+  subject_ref?: string;
+  subject_type?: "run" | "test";
   severity: AgentFindingSeverity;
+  impact?: AgentAcceptanceImpact;
   category: AgentFindingCategory;
   check_name: string;
+  title?: string;
   message: string;
   explanation: string;
   evidence_paths: string[];
   remediation_hint: string;
   expected_reference?: string;
   confidence?: number;
+  expected?: Record<string, unknown>;
+  observed?: Record<string, unknown>;
+  evidence?: {
+    paths?: string[];
+  };
+  action?: string;
+  legacy?: {
+    finding_id: string;
+    subject: string;
+    subject_type?: "run" | "test";
+    check_name: string;
+    explanation?: string;
+    evidence_paths?: string[];
+    remediation_hint: string;
+    expected_reference?: string;
+  };
 };
 
 export type AgentOutputBundle = {
@@ -284,21 +358,28 @@ export const AGENT_ENRICHMENT_ACTIONS: Record<string, AgentEnrichmentAction> = O
 ) as Record<string, AgentEnrichmentAction>;
 
 export const SCOPE_REJECTING_CHECKS = [
-  "missing-expected-test",
-  "missing-expected-prefix",
-  "missing-expected-environment",
+  "expected-test-missing",
+  "expected-count-mismatch",
+  "expected-prefix-missing",
+  "expected-label-missing",
+  "expected-environment-missing",
+  "no-tests-observed",
   "unexpected-environment",
-  "forbidden-selector-match",
+  "forbidden-label-observed",
   "unexpected-test",
 ] as const;
 
 export const ITERATION_REQUIRED_CHECKS = [
-  "invalid-expectations-file",
-  "no-visible-tests",
+  "expectations-invalid",
+  "expectations-empty",
+  "expectations-unsupported-control",
   "runner-failures-outside-logical-results",
-  "missing-expected-label-selector",
   "metadata-mismatch",
   "history-id-collision",
+  "expected-step-containing-missing",
+  "insufficient-expected-steps",
+  "insufficient-expected-attachments",
+  "missing-expected-attachment",
   "failed-without-useful-steps",
   "failed-without-attachments",
   "nontrivial-run-with-empty-trace",
@@ -322,6 +403,28 @@ const IMPACT_ORDER: Record<AgentAcceptanceImpact, number> = {
 
 const uniqueValues = (values: string[]) => Array.from(new Set(values));
 
+const checkNameForFinding = (finding: AgentFindingManifestLine) => finding.check_id ?? finding.check_name;
+
+const subjectRefForFinding = (finding: AgentFindingManifestLine) => {
+  if (finding.subject_ref) {
+    return finding.subject_ref;
+  }
+
+  if (typeof finding.subject === "string") {
+    return finding.subject;
+  }
+
+  return finding.subject.path ?? finding.subject.id ?? finding.subject.type;
+};
+
+const subjectTypeForFinding = (finding: AgentFindingManifestLine): "run" | "test" =>
+  finding.subject_type ??
+  (typeof finding.subject === "object" && finding.subject.type === "test"
+    ? "test"
+    : subjectRefForFinding(finding) === "run"
+      ? "run"
+      : "test");
+
 const normalizeStringArray = (value?: string | string[]) => {
   if (typeof value === "string") {
     return value.length ? [value] : [];
@@ -426,18 +529,24 @@ const impactForFinding = (
   finding: AgentFindingManifestLine,
   antiDummyConfidenceThreshold: number,
 ): AgentAcceptanceImpact => {
-  if (SCOPE_REJECTING_CHECKS.includes(finding.check_name as (typeof SCOPE_REJECTING_CHECKS)[number])) {
+  if (finding.impact === "reject" || finding.impact === "iterate" || finding.impact === "advisory") {
+    return finding.impact;
+  }
+
+  const checkName = checkNameForFinding(finding);
+
+  if (SCOPE_REJECTING_CHECKS.includes(checkName as (typeof SCOPE_REJECTING_CHECKS)[number])) {
     return "reject";
   }
 
   if (
-    ANTI_DUMMY_CHECKS.includes(finding.check_name as (typeof ANTI_DUMMY_CHECKS)[number]) &&
+    ANTI_DUMMY_CHECKS.includes(checkName as (typeof ANTI_DUMMY_CHECKS)[number]) &&
     (finding.confidence ?? 0) >= antiDummyConfidenceThreshold
   ) {
     return "reject";
   }
 
-  if (ITERATION_REQUIRED_CHECKS.includes(finding.check_name as (typeof ITERATION_REQUIRED_CHECKS)[number])) {
+  if (ITERATION_REQUIRED_CHECKS.includes(checkName as (typeof ITERATION_REQUIRED_CHECKS)[number])) {
     return "iterate";
   }
 
@@ -463,7 +572,7 @@ export const buildAgentExpectations = (input: AgentHarnessRequest): AgentExpecta
 };
 
 export const mapFindingToEnrichmentAction = (finding: AgentFindingManifestLine | string): AgentEnrichmentAction => {
-  const checkName = typeof finding === "string" ? finding : finding.check_name;
+  const checkName = typeof finding === "string" ? finding : checkNameForFinding(finding);
   const mapped = AGENT_ENRICHMENT_ACTIONS[checkName];
 
   return mapped ?? { ...FALLBACK_ACTION, checkName };
@@ -498,17 +607,18 @@ export const planAgentEnrichmentReview = (
   const plan = sortPlan(
     output.findings.map((finding) => {
       const action = mapFindingToEnrichmentAction(finding);
-      const matchedTest = testsByPath.get(finding.subject);
+      const subject = subjectRefForFinding(finding);
+      const matchedTest = testsByPath.get(subject);
 
       return {
         ...action,
-        subject: finding.subject,
-        subjectType: finding.subject === "run" ? "run" : "test",
+        subject,
+        subjectType: subjectTypeForFinding(finding),
         severity: finding.severity,
         message: finding.message,
         explanation: finding.explanation,
-        remediationHint: finding.remediation_hint,
-        evidencePaths: finding.evidence_paths,
+        remediationHint: finding.action ?? finding.remediation_hint,
+        evidencePaths: finding.evidence?.paths ?? finding.evidence_paths,
         expectedReference: finding.expected_reference,
         confidence: finding.confidence,
         acceptanceImpact: impactForFinding(finding, antiDummyConfidenceThreshold),
@@ -525,7 +635,7 @@ export const planAgentEnrichmentReview = (
 
   if (!output.run.expectations_present) {
     notes.push(
-      "Generate ALLURE_AGENT_EXPECTATIONS before the next enrichment iteration so scope checks are comparable.",
+      "Declare inline expectations or provide an expectations file before the next enrichment iteration so scope checks are comparable.",
     );
   }
 
diff --git a/packages/plugin-agent/src/index.ts b/packages/plugin-agent/src/index.ts
index 57de8d402c7..b7ae6d8553b 100644
--- a/packages/plugin-agent/src/index.ts
+++ b/packages/plugin-agent/src/index.ts
@@ -1,3 +1,18 @@
-export { type AgentPluginOptions } from "./model.js";
+export {
+  type AgentAttachmentExpectationInput,
+  type AgentEvidenceExpectationInput,
+  type AgentExpectationSelectorInput,
+  type AgentExpectationsInput,
+  type AgentPluginOptions,
+  parseAgentExpectations,
+} from "./model.js";
+export * from "./capabilities.js";
+export * from "./errors.js";
 export * from "./harness.js";
+export * from "./inline-expectations.js";
+export * from "./invalid-output.js";
+export * from "./paths.js";
+export * from "./query.js";
+export * from "./selection.js";
+export * from "./state.js";
 export { AgentPlugin as default } from "./plugin.js";
diff --git a/packages/plugin-agent/src/inline-expectations.ts b/packages/plugin-agent/src/inline-expectations.ts
new file mode 100644
index 00000000000..cf27ad8b7f0
--- /dev/null
+++ b/packages/plugin-agent/src/inline-expectations.ts
@@ -0,0 +1,295 @@
+import { readFile } from "node:fs/promises";
+import { resolve } from "node:path";
+
+import { AgentExpectationUsageError, AgentUsageError } from "./errors.js";
+import type { AgentAttachmentExpectationInput, AgentExpectationsInput } from "./model.js";
+import { parseAgentExpectations } from "./model.js";
+import { isPathInside } from "./paths.js";
+
+type SingleStringOptionValue = string | string[] | undefined;
+
+export type AgentInlineExpectationOptions = {
+  goal?: SingleStringOptionValue;
+  taskId?: SingleStringOptionValue;
+  expectTests?: SingleStringOptionValue;
+  expectLabels?: string[];
+  expectEnvironments?: string[];
+  expectFullNames?: string[];
+  expectPrefixes?: string[];
+  forbidLabels?: string[];
+  expectStepContains?: string[];
+  expectSteps?: SingleStringOptionValue;
+  expectAttachments?: SingleStringOptionValue;
+  expectAttachmentFilters?: string[];
+};
+
+const readNonNegativeInteger = (value: string | undefined, optionName: string): number | undefined => {
+  if (value === undefined) {
+    return undefined;
+  }
+
+  if (!/^\d+$/.test(value)) {
+    throw new AgentExpectationUsageError(`${optionName} must be a non-negative integer`, optionName);
+  }
+
+  const parsed = Number(value);
+
+  if (!Number.isSafeInteger(parsed)) {
+    throw new AgentExpectationUsageError(`${optionName} must be a non-negative integer`, optionName);
+  }
+
+  return parsed;
+};
+
+const readPositiveInteger = (value: string | undefined, optionName: string): number | undefined => {
+  if (value === undefined) {
+    return undefined;
+  }
+
+  if (!/^[1-9]\d*$/.test(value)) {
+    throw new AgentExpectationUsageError(`${optionName} must be a positive integer`, optionName);
+  }
+
+  const parsed = Number(value);
+
+  if (!Number.isSafeInteger(parsed)) {
+    throw new AgentExpectationUsageError(`${optionName} must be a positive integer`, optionName);
+  }
+
+  return parsed;
+};
+
+const readSingleStringOption = (value: SingleStringOptionValue, optionName: string): string | undefined => {
+  const values = Array.isArray(value) ? value : typeof value === "string" ? [value] : [];
+
+  if (values.length > 1) {
+    throw new AgentExpectationUsageError(`Duplicate ${optionName} is not allowed`, optionName);
+  }
+
+  if (!values.length) {
+    return undefined;
+  }
+
+  const normalized = values[0].trim();
+
+  if (!normalized) {
+    throw new AgentExpectationUsageError(`${optionName} value must be non-empty`, optionName);
+  }
+
+  return normalized;
+};
+
+const parseNameValue = (value: string, optionName: string, example: string) => {
+  const parts = value.split("=");
+
+  if (parts.length !== 2) {
+    throw new AgentExpectationUsageError(
+      `Invalid ${optionName} ${JSON.stringify(value)}. Expected ${example}`,
+      optionName,
+    );
+  }
+
+  const name = parts[0].trim();
+  const filterValue = parts[1].trim();
+
+  if (!name || !filterValue) {
+    throw new AgentExpectationUsageError(
+      `Invalid ${optionName} ${JSON.stringify(value)}. Expected ${example}`,
+      optionName,
+    );
+  }
+
+  return {
+    name,
+    value: filterValue,
+  };
+};
+
+const addLabelValues = (target: Record<string, string[]>, values: string[] | undefined, optionName: string) => {
+  for (const rawValue of values ?? []) {
+    const { name, value } = parseNameValue(rawValue, optionName, "the form name=value, for example module=cli");
+    const current = target[name] ?? [];
+
+    if (!current.includes(value)) {
+      current.push(value);
+    }
+
+    target[name] = current;
+  }
+};
+
+const normalizeStringValues = (values: string[] | undefined, optionName: string) => {
+  const normalized: string[] = [];
+
+  for (const value of values ?? []) {
+    const trimmed = value.trim();
+
+    if (!trimmed) {
+      throw new AgentExpectationUsageError(`${optionName} value must be non-empty`, optionName);
+    }
+
+    normalized.push(trimmed);
+  }
+
+  return normalized;
+};
+
+export const buildAgentInlineExpectations = (
+  options: AgentInlineExpectationOptions,
+): AgentExpectationsInput | undefined => {
+  const expectedLabels: Record<string, string[]> = {};
+  const forbiddenLabels: Record<string, string[]> = {};
+  const expected: NonNullable<AgentExpectationsInput["expected"]> = {};
+  const forbidden: NonNullable<AgentExpectationsInput["forbidden"]> = {};
+  const evidence: NonNullable<AgentExpectationsInput["evidence"]> = {};
+  const attachmentFilters: AgentAttachmentExpectationInput[] = [];
+
+  addLabelValues(expectedLabels, options.expectLabels, "--expect-label");
+  addLabelValues(forbiddenLabels, options.forbidLabels, "--forbid-label");
+
+  const expectTests = readNonNegativeInteger(readSingleStringOption(options.expectTests, "--expect-tests"), "--expect-tests");
+  const expectSteps = readPositiveInteger(readSingleStringOption(options.expectSteps, "--expect-steps"), "--expect-steps");
+  const expectAttachments = readPositiveInteger(
+    readSingleStringOption(options.expectAttachments, "--expect-attachments"),
+    "--expect-attachments",
+  );
+  const expectedEnvironments = normalizeStringValues(options.expectEnvironments, "--expect-env");
+  const expectedFullNames = normalizeStringValues(options.expectFullNames, "--expect-test");
+  const expectedPrefixes = normalizeStringValues(options.expectPrefixes, "--expect-prefix");
+  const expectedStepContains = normalizeStringValues(options.expectStepContains, "--expect-step-containing");
+
+  for (const rawValue of options.expectAttachmentFilters ?? []) {
+    const parsed = rawValue.includes("=")
+      ? parseNameValue(
+          rawValue,
+          "--expect-attachment",
+          "a file name or a filter such as name=trace.zip or content-type=application/json",
+        )
+      : { name: "name", value: rawValue.trim() };
+    const normalizedName = parsed.name.toLowerCase().replace(/_/g, "-");
+
+    if (!parsed.value) {
+      throw new AgentExpectationUsageError(
+        "Invalid --expect-attachment value. Expected a non-empty file name or filter such as name=trace.zip",
+        "--expect-attachment",
+      );
+    }
+
+    if (normalizedName === "name") {
+      attachmentFilters.push({ name: parsed.value });
+      continue;
+    }
+
+    if (normalizedName === "content-type" || normalizedName === "type") {
+      attachmentFilters.push({ content_type: parsed.value });
+      continue;
+    }
+
+    throw new AgentExpectationUsageError(
+      `Invalid --expect-attachment key ${JSON.stringify(parsed.name)}. Expected name or content-type`,
+      "--expect-attachment",
+    );
+  }
+
+  if (expectTests !== undefined) {
+    expected.test_count = expectTests;
+  }
+
+  if (expectedEnvironments.length) {
+    expected.environments = expectedEnvironments;
+  }
+
+  if (expectedFullNames.length) {
+    expected.full_names = expectedFullNames;
+  }
+
+  if (expectedPrefixes.length) {
+    expected.full_name_prefixes = expectedPrefixes;
+  }
+
+  if (Object.keys(expectedLabels).length) {
+    expected.label_values = expectedLabels;
+  }
+
+  if (Object.keys(forbiddenLabels).length) {
+    forbidden.label_values = forbiddenLabels;
+  }
+
+  if (expectSteps !== undefined) {
+    evidence.min_steps = expectSteps;
+  }
+
+  if (expectAttachments !== undefined) {
+    evidence.min_attachments = expectAttachments;
+  }
+
+  if (expectedStepContains.length) {
+    evidence.step_name_contains = expectedStepContains;
+  }
+
+  if (attachmentFilters.length) {
+    evidence.attachments = attachmentFilters;
+  }
+
+  if (
+    expected.test_count === 0 &&
+    (expected.environments?.length ||
+      expected.full_names?.length ||
+      expected.full_name_prefixes?.length ||
+      Object.keys(expected.label_values ?? {}).length ||
+      evidence.step_name_contains?.length ||
+      evidence.min_steps !== undefined ||
+      evidence.min_attachments !== undefined ||
+      evidence.attachments?.length)
+  ) {
+    throw new AgentExpectationUsageError(
+      "--expect-tests 0 cannot be combined with positive scope or evidence expectations",
+      "--expect-tests",
+    );
+  }
+
+  const inlineExpectations: AgentExpectationsInput = {
+    ...(readSingleStringOption(options.goal, "--goal") ? { goal: readSingleStringOption(options.goal, "--goal") } : {}),
+    ...(readSingleStringOption(options.taskId, "--task-id")
+      ? { task_id: readSingleStringOption(options.taskId, "--task-id") }
+      : {}),
+    ...(Object.keys(expected).length ? { expected } : {}),
+    ...(Object.keys(forbidden).length ? { forbidden } : {}),
+    ...(Object.keys(evidence).length ? { evidence } : {}),
+  };
+
+  return Object.keys(inlineExpectations).length ? inlineExpectations : undefined;
+};
+
+export const validateAgentExpectationsFile = async (params: {
+  cwd: string;
+  output?: string;
+  expectations?: string;
+}) => {
+  const { cwd, output, expectations } = params;
+
+  if (!expectations) {
+    return;
+  }
+
+  const expectationsPath = resolve(cwd, expectations);
+
+  if (output) {
+    const outputDir = resolve(cwd, output);
+
+    if (isPathInside(outputDir, expectationsPath)) {
+      throw new AgentUsageError(
+        `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`,
+      );
+    }
+  }
+
+  try {
+    parseAgentExpectations(await readFile(expectationsPath, "utf-8"));
+  } catch (error) {
+    throw new AgentExpectationUsageError(
+      `Could not load expectations from ${expectationsPath}: ${(error as Error).message}`,
+      "--expectations",
+    );
+  }
+};
diff --git a/packages/plugin-agent/src/invalid-output.ts b/packages/plugin-agent/src/invalid-output.ts
new file mode 100644
index 00000000000..db7092dd1d2
--- /dev/null
+++ b/packages/plugin-agent/src/invalid-output.ts
@@ -0,0 +1,259 @@
+import { mkdir, rm, writeFile } from "node:fs/promises";
+import { dirname, join } from "node:path";
+
+import { AgentExpectationUsageError } from "./errors.js";
+
+const isFileNotFoundError = (error: unknown): error is NodeJS.ErrnoException =>
+  typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT";
+
+const emptyAgentStats = () => ({
+  total: 0,
+  failed: 0,
+  broken: 0,
+  skipped: 0,
+  unknown: 0,
+  passed: 0,
+});
+
+const writeJson = async (path: string, value: unknown) => {
+  await mkdir(dirname(path), { recursive: true });
+  await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, "utf-8");
+};
+
+const writeText = async (path: string, value: string) => {
+  await mkdir(dirname(path), { recursive: true });
+  await writeFile(path, value, "utf-8");
+};
+
+const writeJsonl = async (path: string, values: unknown[]) => {
+  await writeText(path, values.map((value) => JSON.stringify(value)).join("\n") + (values.length ? "\n" : ""));
+};
+
+export const createInvalidExpectationFinding = (params: { message: string; sourceOption?: string }) => {
+  const action = "Fix expectation syntax before using the run as validation.";
+
+  return {
+    schema_version: "allure-agent-finding/v2",
+    check_id: "expectations-invalid",
+    instance_id: "F0001",
+    severity: "high",
+    impact: "reject",
+    confidence: 1,
+    category: "bootstrap",
+    title: "Expectation input is invalid",
+    message: params.message,
+    subject: {
+      type: "run",
+    },
+    expected: params.sourceOption ? { option: params.sourceOption } : { expectations: "valid M1 expectation input" },
+    observed: {
+      error: params.message,
+      execution_skipped: true,
+    },
+    evidence: {
+      paths: ["manifest/run.json"],
+    },
+    action,
+    source: params.sourceOption
+      ? {
+          kind: "inline-option",
+          option: params.sourceOption,
+        }
+      : undefined,
+    legacy: {
+      finding_id: "F0001",
+      check_name: "expectations-invalid",
+      remediation_hint: action,
+    },
+    finding_id: "F0001",
+    subject_ref: "run",
+    subject_type: "run",
+    check_name: "expectations-invalid",
+    explanation: "The agent expectation controls could not be parsed, so the test command was not executed.",
+    evidence_paths: ["manifest/run.json"],
+    remediation_hint: action,
+    expected_reference: params.sourceOption,
+  };
+};
+
+export const writeInvalidAgentExpectationOutput = async (params: {
+  outputDir: string;
+  command: string;
+  error: AgentExpectationUsageError;
+}) => {
+  const { outputDir, command, error } = params;
+  const generatedAt = new Date().toISOString();
+  const finding = createInvalidExpectationFinding({
+    message: error.message,
+    sourceOption: error.sourceOption,
+  });
+  const stats = emptyAgentStats();
+
+  try {
+    await rm(outputDir, { recursive: true });
+  } catch (rmError) {
+    if (!isFileNotFoundError(rmError)) {
+      console.error("could not clean output directory", rmError);
+    }
+  }
+
+  const runManifest = {
+    schema_version: "allure-agent-output/v1",
+    report_uuid: null,
+    generated_at: generatedAt,
+    phase: "done",
+    command,
+    actual_exit_code: null,
+    original_exit_code: null,
+    exit_code: null,
+    summary: {
+      stats,
+      modeled_stats: stats,
+      unmodeled_from_stats: stats,
+      compact: {
+        visible_results: 0,
+        logical_tests: 0,
+        unmodeled_visible_results: 0,
+        runner_failures_outside_logical_tests: 0,
+        completeness: "complete",
+        findings: 1,
+      },
+      duration_ms: {
+        total: 0,
+        average: 0,
+        max: 0,
+      },
+      environments: [],
+    },
+    modeling: {
+      completeness: "complete",
+      reasons: ["test command was skipped because agent expectations were invalid"],
+      modeledStats: stats,
+      unmodeledFromStats: stats,
+      runnerFailures: {
+        total: 0,
+        globalErrors: 0,
+        stderrActionable: 0,
+        samples: [],
+      },
+      stderr: {
+        actionableCount: 0,
+        actionableSamples: [],
+        noisyWarningCount: 0,
+        noisyWarningSamples: [],
+      },
+      compact: {
+        visible_results: 0,
+        logical_tests: 0,
+        unmodeled_visible_results: 0,
+        runner_failures_outside_logical_tests: 0,
+        completeness: "complete",
+      },
+    },
+    paths: {
+      index_md: "index.md",
+      agents_md: "AGENTS.md",
+      tests_manifest: "manifest/tests.jsonl",
+      findings_manifest: "manifest/findings.jsonl",
+      test_events_manifest: "manifest/test-events.jsonl",
+      expected_manifest: null,
+      process_logs: {
+        stdout: null,
+        stderr: null,
+      },
+    },
+    expectations_present: false,
+    expectations: null,
+    expectation_result: {
+      schema_version: "allure-agent-expectation-result/v1",
+      status: "unavailable",
+      impact: "reject",
+      source: {
+        kind: "none",
+        path: null,
+      },
+      recognized_control_count: 0,
+      unsupported_controls: [],
+      degraded_controls: [],
+      summary: {
+        expected_tests: 0,
+        observed_tests: 0,
+        missing_expected: 0,
+        forbidden_observed: 0,
+        unexpected_observed: 0,
+        evidence_mismatches: 0,
+      },
+      finding_ids: ["F0001"],
+    },
+    check_summary: {
+      total: 1,
+      countsBySeverity: {
+        high: 1,
+        warning: 0,
+        info: 0,
+      },
+      countsByCategory: {
+        bootstrap: 1,
+        scope: 0,
+        metadata: 0,
+        evidence: 0,
+        smells: 0,
+      },
+    },
+    agent_context: {
+      agent_name: null,
+      loop_id: null,
+      task_id: null,
+      conversation_id: null,
+    },
+  };
+  const index = [
+    "# Allure Agent Run",
+    "",
+    "- Phase: done",
+    `- Command: ${command || "(not executed)"}`,
+    "- Exit code: not available",
+    "",
+    "## Expectation Result",
+    "",
+    "- Status: unavailable",
+    "- Impact: reject",
+    "- Recognized controls: 0",
+    "- Summary: expectation input was invalid; test execution was skipped",
+    "",
+    "## Findings",
+    "",
+    `- [HIGH][reject][bootstrap] ${finding.title}`,
+    `  Expected: ${error.sourceOption ?? "valid M1 expectation input"}`,
+    `  Observed: ${error.message}`,
+    `  Action: ${finding.action}`,
+    "",
+    "## Machine-Readable Artifacts",
+    "",
+    "- Run Manifest: [manifest/run.json](manifest/run.json)",
+    "- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)",
+    "",
+  ].join("\n");
+
+  await Promise.all([
+    writeJson(join(outputDir, "manifest", "run.json"), runManifest),
+    writeJsonl(join(outputDir, "manifest", "findings.jsonl"), [finding]),
+    writeJsonl(join(outputDir, "manifest", "tests.jsonl"), []),
+    writeJsonl(join(outputDir, "manifest", "test-events.jsonl"), []),
+    writeText(join(outputDir, "index.md"), index),
+    writeText(
+      join(outputDir, "AGENTS.md"),
+      [
+        "# Allure Agent Output",
+        "",
+        "Read `index.md`, `manifest/run.json`, and `manifest/findings.jsonl` before using this run.",
+        "",
+      ].join("\n"),
+    ),
+  ]);
+
+  return {
+    outputDir,
+    generatedAt,
+  };
+};
diff --git a/packages/plugin-agent/src/model.ts b/packages/plugin-agent/src/model.ts
index 24374ceda4a..47a32c33e04 100644
--- a/packages/plugin-agent/src/model.ts
+++ b/packages/plugin-agent/src/model.ts
@@ -1,3 +1,52 @@
+import { parse } from "yaml";
+
+export type AgentExpectationSelectorInput = {
+  environments?: string[];
+  full_names?: string[];
+  full_name_prefixes?: string[];
+  label_values?: Record<string, string | string[]>;
+  test_count?: number;
+};
+
+export type AgentAttachmentExpectationInput = {
+  name?: string;
+  content_type?: string;
+};
+
+export type AgentEvidenceExpectationInput = {
+  required?: boolean;
+  min_steps?: number;
+  min_attachments?: number;
+  step_name_contains?: string[];
+  attachments?: AgentAttachmentExpectationInput[];
+};
+
+export type AgentExpectationsInput = {
+  goal?: string;
+  task_id?: string;
+  expected?: AgentExpectationSelectorInput;
+  forbidden?: AgentExpectationSelectorInput;
+  evidence?: AgentEvidenceExpectationInput;
+  notes?: string | string[];
+};
+
 export type AgentPluginOptions = {
   outputDir?: string;
+  expectationsPath?: string;
+  expectations?: AgentExpectationsInput;
+  command?: string;
+  agentName?: string;
+  loopId?: string;
+  taskId?: string;
+  conversationId?: string;
+};
+
+export const parseAgentExpectations = (rawContent: string): AgentExpectationsInput => {
+  const parsed = parse(rawContent) as AgentExpectationsInput;
+
+  if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
+    throw new Error("Expected a YAML or JSON object");
+  }
+
+  return parsed;
 };
diff --git a/packages/plugin-agent/src/paths.ts b/packages/plugin-agent/src/paths.ts
new file mode 100644
index 00000000000..d181c06e490
--- /dev/null
+++ b/packages/plugin-agent/src/paths.ts
@@ -0,0 +1,14 @@
+import { join, relative } from "node:path";
+
+export const isPathInside = (parentPath: string, candidatePath: string) => {
+  const rel = relative(parentPath, candidatePath);
+
+  return rel === "" || (!rel.startsWith("..") && rel !== "." && !rel.startsWith("../"));
+};
+
+export const resolveAgentIndexPath = (outputDir: string) => join(outputDir, "index.md");
+
+export const formatAgentOutputLinks = (outputDir: string) => [
+  `agent output: ${outputDir}`,
+  `agent index: ${resolveAgentIndexPath(outputDir)}`,
+];
diff --git a/packages/plugin-agent/src/plugin.ts b/packages/plugin-agent/src/plugin.ts
index 588fbcdd277..fd84c668ba5 100644
--- a/packages/plugin-agent/src/plugin.ts
+++ b/packages/plugin-agent/src/plugin.ts
@@ -1,6 +1,6 @@
 import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises";
 import { basename, dirname, extname, join, relative, resolve } from "node:path";
-import process, { env } from "node:process";
+import process from "node:process";
 
 import {
   type AttachmentLink,
@@ -24,21 +24,18 @@ import type {
   RealtimeSubscriber,
   ResultFile,
 } from "@allurereport/plugin-api";
-import { parse } from "yaml";
 
 import { renderAgentsGuide } from "./guidance.js";
-import type { AgentPluginOptions } from "./model.js";
-
-const AGENT_OUTPUT_ENV = "ALLURE_AGENT_OUTPUT";
-const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS";
-const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND";
-const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT";
-const AGENT_NAME_ENV = "ALLURE_AGENT_NAME";
-const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID";
-const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID";
-const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID";
+import type {
+  AgentEvidenceExpectationInput,
+  AgentExpectationSelectorInput,
+  AgentExpectationsInput,
+  AgentPluginOptions,
+} from "./model.js";
+import { parseAgentExpectations } from "./model.js";
+
 const AGENT_SCHEMA_VERSION = "allure-agent-output/v1";
-const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest", "project"] as const;
+const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"] as const;
 const STATUS_ORDER: Record<TestStatus, number> = {
   failed: 0,
   broken: 1,
@@ -86,6 +83,7 @@ const STACK_TRACE_LINE_PATTERN = /^\s*(at\s+|file:|node:internal|Caused by:\s*$|
 
 type FindingSeverity = "info" | "warning" | "high";
 type FindingCategory = "bootstrap" | "scope" | "metadata" | "evidence" | "smells";
+type FindingImpact = "reject" | "iterate" | "advisory";
 type ScopeMatch = "match" | "unexpected" | "forbidden" | "unknown";
 type ModelingCompleteness = "complete" | "partial";
 type RunnerIssueKind = "import" | "suite-load" | "setup" | "global-error";
@@ -191,10 +189,15 @@ type AgentRuntimeState = {
   store: AllureStore;
   generatedAt: string;
   command?: string;
+  agentContext: {
+    agentName?: string;
+    loopId?: string;
+    taskId?: string;
+    conversationId?: string;
+  };
   createFinding: ReturnType<typeof createFindingFactory>;
   expectations?: LoadedExpectations;
   expectationLoadFindings: AgentFinding[];
-  projectGuide?: LoadedProjectGuide;
   unsubscribers: Array<() => void>;
   queue: Promise<void>;
   lastError?: Error;
@@ -233,29 +236,23 @@ type AgentFinding = {
   subject: string;
   subjectType: "run" | "test";
   severity: FindingSeverity;
+  impact?: FindingImpact;
   category: FindingCategory;
   checkName: string;
+  title?: string;
   message: string;
   explanation: string;
   evidencePaths: string[];
   remediationHint: string;
   expectedReference?: string;
   confidence?: number;
-};
-
-type ExpectationSelectorInput = {
-  environments?: string[];
-  full_names?: string[];
-  full_name_prefixes?: string[];
-  label_values?: Record<string, string | string[]>;
-};
-
-type ExpectationsInput = {
-  goal?: string;
-  task_id?: string;
-  expected?: ExpectationSelectorInput;
-  forbidden?: ExpectationSelectorInput;
-  notes?: string | string[];
+  expected?: Record<string, unknown>;
+  observed?: Record<string, unknown>;
+  action?: string;
+  source?: Record<string, unknown>;
+  limits?: string;
+  affected?: Record<string, unknown>;
+  moreCount?: number;
 };
 
 type NormalizedExpectationSelectors = {
@@ -263,22 +260,32 @@ type NormalizedExpectationSelectors = {
   fullNames: string[];
   fullNamePrefixes: string[];
   labelValues: Record<string, string[]>;
+  testCount?: number;
+};
+
+type NormalizedAttachmentExpectation = {
+  name?: string;
+  contentType?: string;
+};
+
+type NormalizedEvidenceExpectations = {
+  minSteps?: number;
+  minAttachments?: number;
+  stepNameContains: string[];
+  attachments: NormalizedAttachmentExpectation[];
 };
 
 type LoadedExpectations = {
-  sourcePath: string;
+  sourcePath?: string;
+  sourceKind: "file" | "inline";
   relativePath: string;
-  raw: ExpectationsInput;
+  raw: AgentExpectationsInput;
   goal?: string;
   taskId?: string;
   notes: string[];
   expected: NormalizedExpectationSelectors;
   forbidden: NormalizedExpectationSelectors;
-};
-
-type LoadedProjectGuide = {
-  sourcePath: string;
-  relativePath: string;
+  evidence: NormalizedEvidenceExpectations;
 };
 
 type ScopeEvaluation = {
@@ -393,11 +400,18 @@ const normalizeLabelValues = (value: unknown) => {
   );
 };
 
-const normalizeSelectors = (input?: ExpectationSelectorInput): NormalizedExpectationSelectors => ({
+const normalizeNonNegativeInteger = (value: unknown) =>
+  typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined;
+
+const normalizePositiveInteger = (value: unknown) =>
+  typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
+
+const normalizeSelectors = (input?: AgentExpectationSelectorInput): NormalizedExpectationSelectors => ({
   environments: normalizeStringArray(input?.environments),
   fullNames: normalizeStringArray(input?.full_names),
   fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes),
   labelValues: normalizeLabelValues(input?.label_values),
+  testCount: normalizeNonNegativeInteger(input?.test_count),
 });
 
 const hasSelector = (selectors: NormalizedExpectationSelectors) =>
@@ -406,6 +420,25 @@ const hasSelector = (selectors: NormalizedExpectationSelectors) =>
   selectors.fullNamePrefixes.length > 0 ||
   Object.keys(selectors.labelValues).length > 0;
 
+const normalizeEvidenceExpectations = (input?: AgentEvidenceExpectationInput): NormalizedEvidenceExpectations => ({
+  minSteps: normalizePositiveInteger(input?.min_steps),
+  minAttachments: normalizePositiveInteger(input?.min_attachments),
+  stepNameContains: normalizeStringArray(input?.step_name_contains),
+  attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => {
+    if (!attachment || typeof attachment !== "object") {
+      return [];
+    }
+
+    const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined;
+    const contentType =
+      typeof attachment.content_type === "string" && attachment.content_type.length > 0
+        ? attachment.content_type
+        : undefined;
+
+    return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : [];
+  }),
+});
+
 const normalizeNotes = (value: string | string[] | undefined) => {
   if (typeof value === "string") {
     return value.length > 0 ? [value] : [];
@@ -591,6 +624,36 @@ const mergeStepSummaries = (items: StepTreeSummary[]) =>
     },
   );
 
+const collectStepNames = (steps: TestStepResult[], path: string[] = []): Array<{ name: string; path: string[] }> => {
+  const names: Array<{ name: string; path: string[] }> = [];
+
+  for (const node of steps) {
+    if (!isStep(node)) {
+      continue;
+    }
+
+    const nextPath = [...path, node.name];
+
+    names.push({ name: node.name, path: nextPath });
+
+    if (node.steps.length) {
+      names.push(...collectStepNames(node.steps, nextPath));
+    }
+  }
+
+  return names;
+};
+
+const testStepContainsText = (entry: TestEntry, expectedText: string) => {
+  const expected = normalizeStepText(expectedText);
+
+  if (!expected) {
+    return false;
+  }
+
+  return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected));
+};
+
 const buildAttemptSignature = (attempt: AttemptRecord) =>
   JSON.stringify({
     status: attempt.tr.status,
@@ -699,6 +762,8 @@ const summarizeStatusCounts = (counts: StatusCounts) =>
 
 const normalizeLogLine = (value: string) => value.replace(/\s+/g, " ").trim();
 
+const normalizeStepText = (value: string) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase();
+
 const normalizeWarningLine = (value: string) =>
   normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: ");
 
@@ -1213,12 +1278,16 @@ const renderModelingSummary = (modeling: ModelingSummary) => {
 };
 
 const renderSelectorSummary = (title: string, selectors: NormalizedExpectationSelectors) => {
-  if (!hasSelector(selectors)) {
+  if (!hasSelector(selectors) && selectors.testCount === undefined) {
     return `- ${title}: None`;
   }
 
   const parts: string[] = [];
 
+  if (selectors.testCount !== undefined) {
+    parts.push(`test count: ${selectors.testCount}`);
+  }
+
   if (selectors.environments.length) {
     parts.push(`environments: ${selectors.environments.join(", ")}`);
   }
@@ -1242,6 +1311,39 @@ const renderSelectorSummary = (title: string, selectors: NormalizedExpectationSe
   return `- ${title}: ${parts.join(" | ")}`;
 };
 
+const renderEvidenceExpectationSummary = (evidence: NormalizedEvidenceExpectations) => {
+  const parts: string[] = [];
+
+  if (evidence.minSteps !== undefined) {
+    parts.push(`meaningful steps per test: >= ${evidence.minSteps}`);
+  }
+
+  if (evidence.minAttachments !== undefined) {
+    parts.push(`attachments per test: >= ${evidence.minAttachments}`);
+  }
+
+  if (evidence.stepNameContains.length) {
+    parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`);
+  }
+
+  if (evidence.attachments.length) {
+    parts.push(
+      `attachments: ${evidence.attachments
+        .map((attachment) =>
+          [
+            attachment.name ? `name=${attachment.name}` : undefined,
+            attachment.contentType ? `content-type=${attachment.contentType}` : undefined,
+          ]
+            .filter(Boolean)
+            .join(", "),
+        )
+        .join("; ")}`,
+    );
+  }
+
+  return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`;
+};
+
 const buildCheckSummary = (findings: AgentFinding[]) => {
   const countsBySeverity = {
     high: 0,
@@ -1268,6 +1370,281 @@ const buildCheckSummary = (findings: AgentFinding[]) => {
   };
 };
 
+const EXPECTATION_CHECK_IDS = new Set<string>([
+  "expectations-invalid",
+  "expectations-empty",
+  "expectations-unsupported-control",
+  "expectations-weak-goal",
+  "expected-test-missing",
+  "expected-prefix-missing",
+  "expected-label-missing",
+  "expected-environment-missing",
+  "expected-count-mismatch",
+  "expected-step-containing-missing",
+  "insufficient-expected-steps",
+  "insufficient-expected-attachments",
+  "missing-expected-attachment",
+  "forbidden-label-observed",
+  "no-tests-observed",
+] as const);
+
+const MISSING_EXPECTED_CHECK_IDS = new Set<string>([
+  "expected-test-missing",
+  "expected-prefix-missing",
+  "expected-label-missing",
+  "expected-environment-missing",
+] as const);
+
+const EVIDENCE_MISMATCH_CHECK_IDS = new Set<string>([
+  "expected-step-containing-missing",
+  "insufficient-expected-steps",
+  "insufficient-expected-attachments",
+  "missing-expected-attachment",
+] as const);
+
+const countLabelValues = (labelValues: Record<string, string[]>) =>
+  Object.values(labelValues).reduce((total, values) => total + values.length, 0);
+
+const recognizedControlCount = (expectations?: LoadedExpectations) => {
+  if (!expectations) {
+    return 0;
+  }
+
+  return (
+    (expectations.goal ? 1 : 0) +
+    (expectations.taskId ? 1 : 0) +
+    (expectations.expected.testCount !== undefined ? 1 : 0) +
+    expectations.expected.environments.length +
+    expectations.expected.fullNames.length +
+    expectations.expected.fullNamePrefixes.length +
+    countLabelValues(expectations.expected.labelValues) +
+    countLabelValues(expectations.forbidden.labelValues) +
+    (expectations.evidence.minSteps !== undefined ? 1 : 0) +
+    (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
+    expectations.evidence.stepNameContains.length +
+    expectations.evidence.attachments.length
+  );
+};
+
+const runtimeMatchingControlCount = (expectations?: LoadedExpectations) => {
+  if (!expectations) {
+    return 0;
+  }
+
+  return (
+    (expectations.expected.testCount !== undefined ? 1 : 0) +
+    expectations.expected.environments.length +
+    expectations.expected.fullNames.length +
+    expectations.expected.fullNamePrefixes.length +
+    countLabelValues(expectations.expected.labelValues) +
+    countLabelValues(expectations.forbidden.labelValues) +
+    (expectations.evidence.minSteps !== undefined ? 1 : 0) +
+    (expectations.evidence.minAttachments !== undefined ? 1 : 0) +
+    expectations.evidence.stepNameContains.length +
+    expectations.evidence.attachments.length
+  );
+};
+
+const toExpectationModel = (expectations: LoadedExpectations) => {
+  const expected: AgentExpectationSelectorInput = {};
+  const forbidden: AgentExpectationSelectorInput = {};
+  const evidence: AgentEvidenceExpectationInput = {};
+
+  if (expectations.expected.testCount !== undefined) {
+    expected.test_count = expectations.expected.testCount;
+  }
+
+  if (expectations.expected.environments.length) {
+    expected.environments = expectations.expected.environments;
+  }
+
+  if (expectations.expected.fullNames.length) {
+    expected.full_names = expectations.expected.fullNames;
+  }
+
+  if (expectations.expected.fullNamePrefixes.length) {
+    expected.full_name_prefixes = expectations.expected.fullNamePrefixes;
+  }
+
+  if (Object.keys(expectations.expected.labelValues).length) {
+    expected.label_values = expectations.expected.labelValues;
+  }
+
+  if (Object.keys(expectations.forbidden.labelValues).length) {
+    forbidden.label_values = expectations.forbidden.labelValues;
+  }
+
+  if (expectations.evidence.minSteps !== undefined) {
+    evidence.min_steps = expectations.evidence.minSteps;
+  }
+
+  if (expectations.evidence.minAttachments !== undefined) {
+    evidence.min_attachments = expectations.evidence.minAttachments;
+  }
+
+  if (expectations.evidence.stepNameContains.length) {
+    evidence.step_name_contains = expectations.evidence.stepNameContains;
+  }
+
+  if (expectations.evidence.attachments.length) {
+    evidence.attachments = expectations.evidence.attachments.map((attachment) => ({
+      ...(attachment.name ? { name: attachment.name } : {}),
+      ...(attachment.contentType ? { content_type: attachment.contentType } : {}),
+    }));
+  }
+
+  return {
+    ...(expectations.goal ? { goal: expectations.goal } : {}),
+    ...(expectations.taskId ? { task_id: expectations.taskId } : {}),
+    ...(Object.keys(expected).length ? { expected } : {}),
+    ...(Object.keys(forbidden).length ? { forbidden } : {}),
+    ...(Object.keys(evidence).length ? { evidence } : {}),
+    ...(expectations.notes.length ? { notes: expectations.notes } : {}),
+  };
+};
+
+const defaultImpactForFinding = (finding: AgentFinding): FindingImpact => {
+  if (finding.impact) {
+    return finding.impact;
+  }
+
+  if (
+    [
+      "expected-test-missing",
+      "expected-prefix-missing",
+      "expected-label-missing",
+      "expected-environment-missing",
+      "forbidden-label-observed",
+      "no-tests-observed",
+    ].includes(finding.checkName)
+  ) {
+    return "reject";
+  }
+
+  if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) {
+    return "reject";
+  }
+
+  if (
+    [
+      "expectations-invalid",
+      "expectations-empty",
+      "expectations-unsupported-control",
+      "expected-count-mismatch",
+      "expected-step-containing-missing",
+      "insufficient-expected-steps",
+      "insufficient-expected-attachments",
+      "missing-expected-attachment",
+      "runner-failures-outside-logical-results",
+      "metadata-mismatch",
+      "history-id-collision",
+      "failed-without-useful-steps",
+      "failed-without-attachments",
+      "nontrivial-run-with-empty-trace",
+      "retries-without-new-evidence",
+      "passed-without-observable-evidence",
+    ].includes(finding.checkName)
+  ) {
+    return "iterate";
+  }
+
+  if (finding.severity === "high") {
+    return "iterate";
+  }
+
+  return "advisory";
+};
+
+const strongestImpact = (findings: AgentFinding[], fallback: FindingImpact): FindingImpact => {
+  if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
+    return "reject";
+  }
+
+  if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
+    return "iterate";
+  }
+
+  return fallback;
+};
+
+const buildExpectationResult = (params: {
+  expectations?: LoadedExpectations;
+  findings: AgentFinding[];
+  observedTestCount: number;
+  modelingSummary: ModelingSummary;
+}) => {
+  const { expectations, findings, observedTestCount, modelingSummary } = params;
+  const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName));
+  const recognized = recognizedControlCount(expectations);
+  const runtimeMatching = runtimeMatchingControlCount(expectations);
+  const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid");
+  const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty");
+  const unsupportedFindings = expectationFindings.filter(
+    (finding) => finding.checkName === "expectations-unsupported-control",
+  );
+  const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal");
+  const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0;
+  let status: "matched" | "failed" | "partial" | "degraded" | "unsupported" | "unavailable" | "not_requested";
+  let impact: "accept" | "reject" | "iterate" | "advisory";
+
+  if (invalidFindings.length) {
+    status = "unavailable";
+    impact =
+      strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject");
+  } else if (emptyFindings.length || unsupportedFindings.length) {
+    status = "unsupported";
+    impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate";
+  } else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) {
+    status = "failed";
+    impact = "reject";
+  } else if (runtimeMatching === 0) {
+    status = "not_requested";
+    impact = "advisory";
+  } else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) {
+    status = "failed";
+    impact = "reject";
+  } else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) {
+    status = "failed";
+    impact = "iterate";
+  } else if (modelingSummary.completeness === "partial") {
+    status = "partial";
+    impact = "iterate";
+  } else {
+    status = "matched";
+    impact = "accept";
+  }
+
+  return {
+    schema_version: "allure-agent-expectation-result/v1",
+    status,
+    impact,
+    source: expectations
+      ? {
+          kind: expectations.sourceKind,
+          path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null,
+        }
+      : {
+          kind: "none",
+          path: null,
+        },
+    recognized_control_count: recognized,
+    unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message),
+    degraded_controls: [] as string[],
+    summary: {
+      expected_tests: expectedTests,
+      observed_tests: observedTestCount,
+      missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName))
+        .length,
+      forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed")
+        .length,
+      unexpected_observed: 0,
+      evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName))
+        .length,
+    },
+    finding_ids: expectationFindings.map((finding) => finding.findingId),
+  };
+};
+
 const sortFindings = (findings: AgentFinding[]) =>
   [...findings].sort((left, right) => {
     const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity];
@@ -1302,6 +1679,35 @@ const renderFindingEvidenceLinks = (params: { finding: AgentFinding; currentFile
     .join("\n");
 };
 
+const formatFindingStructuredValue = (value: unknown): string | undefined => {
+  if (value === undefined || value === null) {
+    return undefined;
+  }
+
+  if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") {
+    return String(value);
+  }
+
+  if (Array.isArray(value)) {
+    return value
+      .map((item) => formatFindingStructuredValue(item))
+      .filter(Boolean)
+      .join(", ");
+  }
+
+  if (typeof value === "object") {
+    const parts = Object.entries(value as Record<string, unknown>).flatMap(([key, item]) => {
+      const formatted = formatFindingStructuredValue(item);
+
+      return formatted ? [`${key}: ${formatted}`] : [];
+    });
+
+    return parts.length ? parts.join("; ") : undefined;
+  }
+
+  return undefined;
+};
+
 const renderFindingsSection = (params: {
   title: string;
   findings: AgentFinding[];
@@ -1317,32 +1723,34 @@ const renderFindingsSection = (params: {
   const lines: string[] = [`## ${title}`, ""];
 
   for (const finding of sortFindings(findings)) {
+    const impact = defaultImpactForFinding(finding);
+    const expected =
+      formatFindingStructuredValue(finding.expected) ??
+      (finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined);
+    const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation;
+    const evidenceLinks = renderFindingEvidenceLinks({
+      finding,
+      currentFilePath,
+      outputDir,
+    });
+
     lines.push(
-      `### [${finding.severity.toUpperCase()}] ${escapeInlineMarkdown(finding.category)} / ${escapeInlineMarkdown(finding.checkName)}`,
+      `- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`,
     );
-    lines.push("");
-    lines.push(`- Message: ${escapeInlineMarkdown(finding.message)}`);
-    lines.push(`- Explanation: ${escapeInlineMarkdown(finding.explanation)}`);
-    lines.push(`- Remediation: ${escapeInlineMarkdown(finding.remediationHint)}`);
 
-    if (finding.expectedReference) {
-      lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`);
+    if (expected) {
+      lines.push(`  Expected: ${escapeInlineMarkdown(expected)}`);
     }
 
-    if (finding.confidence !== undefined) {
-      lines.push(`- Confidence: ${finding.confidence}`);
+    if (observed) {
+      lines.push(`  Observed: ${escapeInlineMarkdown(observed)}`);
     }
 
-    lines.push("- Evidence:");
-    lines.push("");
-    lines.push(
-      renderFindingEvidenceLinks({
-        finding,
-        currentFilePath,
-        outputDir,
-      }),
-    );
-    lines.push("");
+    lines.push(`  Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`);
+
+    if (evidenceLinks !== "None") {
+      lines.push(`  Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`);
+    }
   }
 
   return lines.join("\n").trimEnd();
@@ -1361,6 +1769,32 @@ const renderExpectationSection = (entry: TestEntry) => {
   return lines.join("\n");
 };
 
+const renderExpectationResultSection = (params: {
+  expectations?: LoadedExpectations;
+  findings: AgentFinding[];
+  observedTestCount: number;
+  modelingSummary: ModelingSummary;
+}) => {
+  const result = buildExpectationResult(params);
+  const summary = result.summary;
+
+  return [
+    "## Expectation Result",
+    "",
+    `- Status: ${result.status}`,
+    `- Impact: ${result.impact}`,
+    `- Recognized Controls: ${result.recognized_control_count}`,
+    `- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`,
+    `- Expected Tests: ${summary.expected_tests}`,
+    `- Observed Tests: ${summary.observed_tests}`,
+    `- Missing Expected: ${summary.missing_expected}`,
+    `- Forbidden Observed: ${summary.forbidden_observed}`,
+    `- Evidence Mismatches: ${summary.evidence_mismatches}`,
+    `- Run Manifest: [manifest/run.json](manifest/run.json)`,
+    `- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`,
+  ].join("\n");
+};
+
 const renderRerunGuidance = (findings: AgentFinding[]) => {
   const relevant = findings.filter(
     ({ category }) => category === "evidence" || category === "smells" || category === "metadata",
@@ -1385,9 +1819,7 @@ const renderRerunGuidance = (findings: AgentFinding[]) => {
     lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs.");
   }
 
-  lines.push(
-    "- Rerun only the relevant tests with the same expectations file so the next review is scoped and comparable.",
-  );
+  lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable.");
 
   return lines.join("\n");
 };
@@ -1588,16 +2020,29 @@ const renderIndex = (params: {
     lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`);
     lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`);
     lines.push(
-      `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`,
+      expectations.sourceKind === "inline"
+        ? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))`
+        : `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`,
     );
     lines.push(renderSelectorSummary("Expected selectors", expectations.expected));
     lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden));
+    lines.push(renderEvidenceExpectationSummary(expectations.evidence));
 
     if (expectations.notes.length) {
       lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`);
     }
   }
 
+  lines.push("");
+  lines.push(
+    renderExpectationResultSection({
+      expectations,
+      findings,
+      observedTestCount: tests.length,
+      modelingSummary,
+    }),
+  );
+
   lines.push("");
   lines.push("## Advisory Check Summary");
   lines.push("");
@@ -1884,11 +2329,7 @@ const readMaterializedArtifactText = async (outputDir: string, artifact?: Materi
   }
 };
 
-const resolveOutputDir = (options: AgentPluginOptions) => {
-  const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV];
-
-  return outputDir ? resolve(outputDir) : undefined;
-};
+const resolveOutputDir = (options: AgentPluginOptions) => (options.outputDir ? resolve(options.outputDir) : undefined);
 
 const cleanupManagedEntries = async (outputDir: string) => {
   await Promise.all(
@@ -1939,18 +2380,116 @@ const createFindingFactory = () => {
   };
 };
 
-const parseExpectations = (rawContent: string) => {
-  const parsed = parse(rawContent) as ExpectationsInput;
-
+const assertExpectationsObject = (parsed: AgentExpectationsInput) => {
   if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) {
-    throw new Error("Expected a YAML or JSON object");
+    throw new Error("Expected an expectations object");
   }
+};
+
+const writeExpectedManifest = async (outputDir: string, parsed: AgentExpectationsInput) => {
+  const relativePath = normalizeMarkdownPath("manifest/expected.json");
 
-  return parsed;
+  await mkdir(join(outputDir, "manifest"), { recursive: true });
+  await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
+
+  return relativePath;
 };
 
-const loadExpectations = async (outputDir: string, createFinding: ReturnType<typeof createFindingFactory>) => {
-  const configuredPath = env[AGENT_EXPECTATIONS_ENV];
+const toLoadedExpectations = (params: {
+  parsed: AgentExpectationsInput;
+  relativePath: string;
+  sourceKind: "file" | "inline";
+  sourcePath?: string;
+}) => {
+  const { parsed, relativePath, sourceKind, sourcePath } = params;
+
+  return {
+    sourcePath,
+    sourceKind,
+    relativePath,
+    raw: parsed,
+    goal: parsed.goal,
+    taskId: parsed.task_id,
+    notes: normalizeNotes(parsed.notes),
+    expected: normalizeSelectors(parsed.expected),
+    forbidden: normalizeSelectors(parsed.forbidden),
+    evidence: normalizeEvidenceExpectations(parsed.evidence),
+  } satisfies LoadedExpectations;
+};
+
+const loadExpectations = async (
+  outputDir: string,
+  createFinding: ReturnType<typeof createFindingFactory>,
+  options: AgentPluginOptions,
+) => {
+  const configuredPath = options.expectationsPath;
+  const inlineExpectations = options.expectations;
+
+  if (!configuredPath && !inlineExpectations) {
+    return {
+      expectations: undefined,
+      findings: [] as AgentFinding[],
+    };
+  }
+
+  if (configuredPath && inlineExpectations) {
+    return {
+      expectations: undefined,
+      findings: [
+        createFinding({
+          subject: "run",
+          subjectType: "run",
+          severity: "high",
+          category: "bootstrap",
+          impact: "reject",
+          checkName: "expectations-invalid",
+          title: "Expectation input is invalid",
+          message: "Both file and inline agent expectations were provided.",
+          explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.",
+          evidencePaths: [],
+          remediationHint: "Rerun with one expectations source so scope checks are unambiguous.",
+          expectedReference: undefined,
+        }),
+      ],
+    };
+  }
+
+  if (inlineExpectations) {
+    try {
+      assertExpectationsObject(inlineExpectations);
+
+      const relativePath = await writeExpectedManifest(outputDir, inlineExpectations);
+
+      return {
+        expectations: toLoadedExpectations({
+          parsed: inlineExpectations,
+          relativePath,
+          sourceKind: "inline",
+        }),
+        findings: [] as AgentFinding[],
+      };
+    } catch (error) {
+      return {
+        expectations: undefined,
+        findings: [
+          createFinding({
+            subject: "run",
+            subjectType: "run",
+            severity: "high",
+            category: "bootstrap",
+            impact: "reject",
+            checkName: "expectations-invalid",
+            title: "Expectation input is invalid",
+            message: "Could not load inline agent expectations",
+            explanation: `The inline expectations option could not be normalized: ${(error as Error).message}`,
+            evidencePaths: [],
+            remediationHint: "Provide a valid expectations object before rerunning.",
+            expectedReference: undefined,
+          }),
+        ],
+      };
+    }
+  }
 
   if (!configuredPath) {
     return {
@@ -1963,24 +2502,16 @@ const loadExpectations = async (outputDir: string, createFinding: ReturnType<typ
 
   try {
     const rawContent = await readFile(expectationsPath, "utf-8");
-    const parsed = parseExpectations(rawContent);
-
-    const relativePath = normalizeMarkdownPath("manifest/expected.json");
-
-    await mkdir(join(outputDir, "manifest"), { recursive: true });
-    await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8");
+    const parsed = parseAgentExpectations(rawContent);
+    const relativePath = await writeExpectedManifest(outputDir, parsed);
 
     return {
-      expectations: {
-        sourcePath: expectationsPath,
+      expectations: toLoadedExpectations({
+        parsed,
         relativePath,
-        raw: parsed,
-        goal: parsed.goal,
-        taskId: parsed.task_id,
-        notes: normalizeNotes(parsed.notes),
-        expected: normalizeSelectors(parsed.expected),
-        forbidden: normalizeSelectors(parsed.forbidden),
-      } satisfies LoadedExpectations,
+        sourceKind: "file",
+        sourcePath: expectationsPath,
+      }),
       findings: [] as AgentFinding[],
     };
   } catch (error) {
@@ -1992,11 +2523,13 @@ const loadExpectations = async (outputDir: string, createFinding: ReturnType<typ
           subjectType: "run",
           severity: "high",
           category: "bootstrap",
-          checkName: "invalid-expectations-file",
-          message: `Could not load ALLURE_AGENT_EXPECTATIONS from ${expectationsPath}`,
+          impact: "reject",
+          checkName: "expectations-invalid",
+          title: "Expectation input is invalid",
+          message: `Could not load expectations from ${expectationsPath}`,
           explanation: `The expectations file could not be parsed as YAML or JSON: ${(error as Error).message}`,
           evidencePaths: [],
-          remediationHint: "Provide a readable YAML or JSON file in ALLURE_AGENT_EXPECTATIONS before rerunning.",
+          remediationHint: "Provide a readable YAML or JSON expectations file before rerunning.",
           expectedReference: undefined,
         }),
       ],
@@ -2004,30 +2537,6 @@ const loadExpectations = async (outputDir: string, createFinding: ReturnType<typ
   }
 };
 
-const loadProjectGuide = async (outputDir: string): Promise<LoadedProjectGuide | undefined> => {
-  const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd());
-  const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md");
-
-  try {
-    const content = await readFile(sourcePath, "utf-8");
-    const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md"));
-
-    await mkdir(join(outputDir, "project", "docs"), { recursive: true });
-    await writeFile(join(outputDir, relativePath), content, "utf-8");
-
-    return {
-      sourcePath,
-      relativePath,
-    };
-  } catch (error) {
-    if ((error as NodeJS.ErrnoException).code === "ENOENT") {
-      return undefined;
-    }
-
-    throw error;
-  }
-};
-
 const computeScopeEvaluation = (params: {
   tr: TestResult;
   environmentId: string;
@@ -2120,6 +2629,43 @@ const collectTestEvidencePaths = (entry: TestEntry) => {
   return uniqueValues(paths);
 };
 
+const getExpectationTargetEntries = (entries: TestEntry[], expectations: LoadedExpectations) => {
+  if (!hasSelector(expectations.expected)) {
+    return entries;
+  }
+
+  return entries.filter((entry) => entry.scope.scopeMatch === "match");
+};
+
+const currentAttemptStepSummary = (entry: TestEntry) =>
+  mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]);
+
+const nonMissingArtifacts = (entry: TestEntry) => entry.allArtifacts.filter((artifact) => !artifact.missing);
+
+const formatAttachmentExpectation = (expectation: NormalizedAttachmentExpectation) =>
+  [
+    expectation.name ? `name=${expectation.name}` : undefined,
+    expectation.contentType ? `content-type=${expectation.contentType}` : undefined,
+  ]
+    .filter(Boolean)
+    .join(", ");
+
+const matchesAttachmentExpectation = (artifact: MaterializedArtifact, expectation: NormalizedAttachmentExpectation) => {
+  if (artifact.missing) {
+    return false;
+  }
+
+  if (expectation.name && artifact.displayName !== expectation.name) {
+    return false;
+  }
+
+  if (expectation.contentType && artifact.contentType !== expectation.contentType) {
+    return false;
+  }
+
+  return true;
+};
+
 const buildRunAndTestFindings = (params: {
   entries: TestEntry[];
   expectations?: LoadedExpectations;
@@ -2132,19 +2678,24 @@ const buildRunAndTestFindings = (params: {
   const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
   const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
 
-  if (entries.length === 0) {
+  if (entries.length === 0 && expectations?.expected.testCount !== 0) {
     runFindings.push(
       createFinding({
         subject: "run",
         subjectType: "run",
         severity: "high",
+        impact: "reject",
         category: "bootstrap",
-        checkName: "no-visible-tests",
+        checkName: "no-tests-observed",
+        title: "No logical tests were observed",
         message: "No visible test results were found in the run.",
         explanation: "The agent output was generated, but there were no visible logical test results to review.",
-        evidencePaths: [],
-        remediationHint:
-          "Verify that Allure results are being generated and that the test command actually executed the intended tests.",
+        evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"],
+        remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.",
+        expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" },
+        observed: { test_count: 0 },
+        action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.",
+        confidence: 1,
       }),
     );
   }
@@ -2162,7 +2713,7 @@ const buildRunAndTestFindings = (params: {
           "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.",
         evidencePaths: [],
         remediationHint:
-          "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics, or use `ALLURE_AGENT_*` with `allure run` for lower-level control.",
+          "Run tests through `allure agent -- <command>` without `--silent` when you need bootstrap diagnostics.",
         confidence: 0.9,
       }),
     );
@@ -2216,6 +2767,90 @@ const buildRunAndTestFindings = (params: {
 
   if (expectations) {
     const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
+    const targetEntries = getExpectationTargetEntries(entries, expectations);
+    const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
+    const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
+
+    if (recognizedControlCount(expectations) === 0) {
+      runFindings.push(
+        createFinding({
+          subject: "run",
+          subjectType: "run",
+          severity: "high",
+          impact: "iterate",
+          category: "scope",
+          checkName: "expectations-empty",
+          title: "Expectation source did not contain recognized controls",
+          message: "Expectation source was provided but no recognized M1 controls were parsed.",
+          explanation: "The run can still be reviewed, but expectation precision was not requested.",
+          evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
+          remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
+          observed: { recognized_control_count: 0 },
+          action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.",
+          confidence: 1,
+        }),
+      );
+    }
+
+    if (
+      (hasRuntimeControls && !expectations.goal) ||
+      ["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal)
+    ) {
+      runFindings.push(
+        createFinding({
+          subject: "run",
+          subjectType: "run",
+          severity: "info",
+          impact: "advisory",
+          category: "scope",
+          checkName: "expectations-weak-goal",
+          title: "Run goal is missing or too generic",
+          message: expectations.goal
+            ? `The run goal is too generic: ${expectations.goal}`
+            : "Runtime expectations were provided without a goal.",
+          explanation: "The goal is intent metadata and does not change the runtime evidence.",
+          evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
+          remediationHint:
+            "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
+          expected: { goal: "specific validation claim" },
+          observed: { goal: expectations.goal ?? null },
+          action:
+            "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.",
+          confidence: 0.9,
+        }),
+      );
+    }
+
+    if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) {
+      const severity: FindingSeverity =
+        expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning";
+      const impact: FindingImpact =
+        expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate";
+
+      runFindings.push(
+        createFinding({
+          subject: "run",
+          subjectType: "run",
+          severity,
+          impact,
+          category: "scope",
+          checkName: "expected-count-mismatch",
+          title: "Observed logical test count did not match",
+          message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`,
+          explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.",
+          evidencePaths: expectations.relativePath
+            ? [expectations.relativePath, "manifest/tests.jsonl"]
+            : ["manifest/tests.jsonl"],
+          remediationHint:
+            "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
+          expectedReference: "expected.test_count",
+          expected: { test_count: expectations.expected.testCount },
+          observed: { test_count: entries.length },
+          action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.",
+          confidence: 1,
+        }),
+      );
+    }
 
     expectations.expected.fullNames.forEach((fullName, index) => {
       if (!allFullNames.includes(fullName)) {
@@ -2224,14 +2859,23 @@ const buildRunAndTestFindings = (params: {
             subject: "run",
             subjectType: "run",
             severity: "high",
+            impact: "reject",
             category: "scope",
-            checkName: "missing-expected-test",
-            message: `Expected test did not run: ${fullName}`,
-            explanation:
-              "The expectations file explicitly listed this test, but it did not appear in the agentic output.",
-            evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-            remediationHint: "Check the test selection, environment, and feature branch scope before rerunning.",
+            checkName: "expected-test-missing",
+            title: "Expected test was not observed",
+            message: "The expected test did not appear in the observed logical results.",
+            explanation: `Expected test did not run: ${fullName}`,
+            evidencePaths: expectations.relativePath
+              ? [expectations.relativePath, "manifest/tests.jsonl"]
+              : ["manifest/tests.jsonl"],
+            remediationHint:
+              "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
             expectedReference: `expected.full_names[${index}]`,
+            expected: { full_names: [fullName] },
+            observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
+            action:
+              "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.",
+            confidence: 1,
           }),
         );
       }
@@ -2243,15 +2887,22 @@ const buildRunAndTestFindings = (params: {
           createFinding({
             subject: "run",
             subjectType: "run",
-            severity: "warning",
+            severity: "high",
+            impact: "reject",
             category: "scope",
-            checkName: "missing-expected-prefix",
-            message: `No executed test matched the expected prefix: ${prefix}`,
-            explanation: "The expectations file asked for tests within this name prefix, but none were recorded.",
-            evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-            remediationHint:
-              "Check the expected selector or adjust the executed test target so the intended scope is covered.",
+            checkName: "expected-prefix-missing",
+            title: "Expected test prefix was not observed",
+            message: `No observed test full name started with the expected prefix: ${prefix}`,
+            explanation: "The expectations asked for tests within this name prefix, but none were recorded.",
+            evidencePaths: expectations.relativePath
+              ? [expectations.relativePath, "manifest/tests.jsonl"]
+              : ["manifest/tests.jsonl"],
+            remediationHint: "Treat the run as wrong scope or missing coverage.",
             expectedReference: `expected.full_name_prefixes[${index}]`,
+            expected: { full_name_prefixes: [prefix] },
+            observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) },
+            action: "Treat the run as wrong scope or missing coverage.",
+            confidence: 1,
           }),
         );
       }
@@ -2263,15 +2914,22 @@ const buildRunAndTestFindings = (params: {
           createFinding({
             subject: "run",
             subjectType: "run",
-            severity: "warning",
+            severity: "high",
+            impact: "reject",
             category: "scope",
-            checkName: "missing-expected-environment",
+            checkName: "expected-environment-missing",
+            title: "Expected environment was not observed",
             message: `Expected environment did not appear in the run: ${environment}`,
-            explanation:
-              "The expectations file scoped the run to this environment, but no logical test result matched it.",
-            evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-            remediationHint: "Check the environment selector or rerun the intended environment explicitly.",
+            explanation: "The expectations scoped the run to this environment, but no logical test result matched it.",
+            evidencePaths: expectations.relativePath
+              ? [expectations.relativePath, "manifest/tests.jsonl"]
+              : ["manifest/tests.jsonl"],
+            remediationHint: "Rerun in the intended environment before making environment-specific claims.",
             expectedReference: `expected.environments[${index}]`,
+            expected: { environments: [environment] },
+            observed: { environments: actualEnvironments },
+            action: "Rerun in the intended environment before making environment-specific claims.",
+            confidence: 1,
           }),
         );
       }
@@ -2285,16 +2943,23 @@ const buildRunAndTestFindings = (params: {
           createFinding({
             subject: "run",
             subjectType: "run",
-            severity: "warning",
+            severity: "high",
+            impact: "reject",
             category: "scope",
-            checkName: "missing-expected-label-selector",
-            message: `No executed test matched ${formatLabelRequirement(labelName, values)}`,
+            checkName: "expected-label-missing",
+            title: "Expected label was not observed",
+            message: `No observed test had ${formatLabelRequirement(labelName, values)}`,
             explanation:
-              "The expectations file defined a label selector for the intended scope, but no logical test result satisfied it.",
-            evidencePaths: expectations.relativePath ? [expectations.relativePath] : [],
-            remediationHint:
-              "Add the expected label metadata to the intended tests or adjust the expectations selector.",
+              "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.",
+            evidencePaths: expectations.relativePath
+              ? [expectations.relativePath, "manifest/tests.jsonl"]
+              : ["manifest/tests.jsonl"],
+            remediationHint: "Fix metadata, selector, or run the correct labeled scope.",
             expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`,
+            expected: { label_values: { [labelName]: values } },
+            observed: { test_count: entries.length },
+            action: "Fix metadata, selector, or run the correct labeled scope.",
+            confidence: 1,
           }),
         );
       }
@@ -2322,6 +2987,10 @@ const buildRunAndTestFindings = (params: {
     }
   }
 
+  const evidenceTargetKeys = expectations
+    ? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key))
+    : new Set<string>();
+
   for (const entry of entries) {
     const currentAttempt = entry.attempts[0];
     const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature));
@@ -2329,26 +2998,51 @@ const buildRunAndTestFindings = (params: {
     const allStepSummary = mergeStepSummaries(
       entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])),
     );
+    const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false;
+    const expectedEvidence = expectations?.evidence;
+    const currentStepSummary = currentAttemptStepSummary(entry);
+    const currentMeaningfulSteps = currentStepSummary.meaningfulSteps;
+    const currentAttachments = nonMissingArtifacts(entry);
     const hasUsefulSteps =
       currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0;
     const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing);
     const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0;
 
     if (entry.scope.scopeMatch === "forbidden") {
+      const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) =>
+        reference.startsWith("forbidden.label_values"),
+      );
+      const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match";
+
       entry.findings.push(
         createFinding({
           subject: entry.key,
           subjectType: "test",
           severity: "high",
+          impact: "reject",
           category: "scope",
-          checkName: "forbidden-selector-match",
-          message: "This test matched a forbidden selector from the expectations file.",
-          explanation: "The logical test belongs to a scope that the expectations file explicitly marked as forbidden.",
+          checkName,
+          title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed",
+          message: forbiddenLabelReference
+            ? "This test has a label value that was explicitly forbidden."
+            : "This test matched a forbidden selector from the expectations.",
+          explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.",
           evidencePaths: expectations?.relativePath
             ? [entry.relativePath, expectations.relativePath]
             : [entry.relativePath],
-          remediationHint: "Tighten the test selection or update the expectations file before accepting the run.",
-          expectedReference: entry.scope.expectedReferences[0],
+          remediationHint: forbiddenLabelReference
+            ? "Treat as scope drift. Split or correct the run before using it as focused validation."
+            : "Tighten the test selection or update the expectations before accepting the run.",
+          expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0],
+          expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true },
+          observed: {
+            full_name: entry.tr.fullName ?? entry.tr.name,
+            labels: toLabelEntries(entry.tr.labels),
+          },
+          action: forbiddenLabelReference
+            ? "Treat as scope drift. Split or correct the run before using it as focused validation."
+            : "Tighten the test selection or update the expectations before accepting the run.",
+          confidence: 1,
         }),
       );
     } else if (entry.scope.scopeMatch === "unexpected") {
@@ -2361,12 +3055,12 @@ const buildRunAndTestFindings = (params: {
           checkName: "unexpected-test",
           message: "This test ran outside the expected scope.",
           explanation:
-            "The expectations file defined positive scope selectors, but this logical test did not match any of them.",
+            "The expectations defined positive scope selectors, but this logical test did not match any of them.",
           evidencePaths: expectations?.relativePath
             ? [entry.relativePath, expectations.relativePath]
             : [entry.relativePath],
           remediationHint:
-            "Rerun only the intended tests or broaden the expectations file if this test is part of the plan.",
+            "Rerun only the intended tests or broaden the expectations if this test is part of the plan.",
         }),
       );
     }
@@ -2411,6 +3105,142 @@ const buildRunAndTestFindings = (params: {
       );
     }
 
+    expectedEvidence?.stepNameContains.forEach((expectedText, index) => {
+      if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) {
+        return;
+      }
+
+      entry.findings.push(
+        createFinding({
+          subject: entry.key,
+          subjectType: "test",
+          severity: "warning",
+          impact: "iterate",
+          category: "evidence",
+          checkName: "expected-step-containing-missing",
+          title: "Expected step text was not observed",
+          message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`,
+          explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`,
+          evidencePaths: expectations?.relativePath
+            ? [entry.relativePath, expectations.relativePath]
+            : [entry.relativePath],
+          remediationHint:
+            "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
+          expectedReference: `evidence.step_name_contains[${index}]`,
+          expected: { step_name_contains: [expectedText] },
+          observed: { steps: currentStepSummary.totalSteps, matched: false },
+          action:
+            "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.",
+          confidence: 0.9,
+        }),
+      );
+    });
+
+    if (
+      expectedEvidenceApplies &&
+      expectedEvidence?.minSteps !== undefined &&
+      currentMeaningfulSteps < expectedEvidence.minSteps
+    ) {
+      entry.findings.push(
+        createFinding({
+          subject: entry.key,
+          subjectType: "test",
+          severity: "warning",
+          impact: "iterate",
+          category: "evidence",
+          checkName: "insufficient-expected-steps",
+          title: "Expected step count was not met",
+          message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`,
+          explanation:
+            "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.",
+          evidencePaths: expectations?.relativePath
+            ? [entry.relativePath, expectations.relativePath]
+            : [entry.relativePath],
+          remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
+          expectedReference: "evidence.min_steps",
+          expected: { min_steps: expectedEvidence.minSteps },
+          observed: { meaningful_steps: currentMeaningfulSteps },
+          action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.",
+          confidence: 0.9,
+        }),
+      );
+    }
+
+    if (
+      expectedEvidenceApplies &&
+      expectedEvidence?.minAttachments !== undefined &&
+      currentAttachments.length < expectedEvidence.minAttachments
+    ) {
+      entry.findings.push(
+        createFinding({
+          subject: entry.key,
+          subjectType: "test",
+          severity: "warning",
+          impact: "iterate",
+          category: "evidence",
+          checkName: "insufficient-expected-attachments",
+          title: "Expected attachment count was not met",
+          message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`,
+          explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.",
+          evidencePaths: expectations?.relativePath
+            ? [entry.relativePath, expectations.relativePath]
+            : [entry.relativePath],
+          remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.",
+          expectedReference: "evidence.min_attachments",
+          expected: { min_attachments: expectedEvidence.minAttachments },
+          observed: { attachments: currentAttachments.length },
+          action: "Attach real runtime artifacts only when they are needed for debugging or review.",
+          confidence: 0.9,
+        }),
+      );
+    }
+
+    expectedEvidence?.attachments.forEach((attachmentExpectation, index) => {
+      if (!expectedEvidenceApplies) {
+        return;
+      }
+
+      if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) {
+        return;
+      }
+
+      entry.findings.push(
+        createFinding({
+          subject: entry.key,
+          subjectType: "test",
+          severity: "warning",
+          impact: "iterate",
+          category: "evidence",
+          checkName: "missing-expected-attachment",
+          title: "Expected attachment was not observed",
+          message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`,
+          explanation:
+            "The expectations require every expected logical test to include a non-missing attachment matching this filter.",
+          evidencePaths: expectations?.relativePath
+            ? [entry.relativePath, expectations.relativePath]
+            : [entry.relativePath],
+          remediationHint:
+            "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
+          expectedReference: `evidence.attachments[${index}]`,
+          expected: {
+            attachment: {
+              ...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}),
+              ...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}),
+            },
+          },
+          observed: {
+            attachments: currentAttachments.map((attachment) => ({
+              name: attachment.displayName,
+              content_type: attachment.contentType ?? null,
+            })),
+          },
+          action:
+            "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.",
+          confidence: 0.95,
+        }),
+      );
+    });
+
     if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) {
       entry.findings.push(
         createFinding({
@@ -2790,17 +3620,23 @@ const appendJsonlLine = async (path: string, item: unknown) => {
 const toRunManifest = (params: {
   context: PluginContext;
   command?: string;
+  agentContext: AgentRuntimeState["agentContext"];
   generatedAt: string;
   phase: RunPhase;
   expectations?: LoadedExpectations;
-  projectGuide?: LoadedProjectGuide;
   snapshot: AgentSnapshot;
 }) => {
-  const { context, command, generatedAt, phase, expectations, projectGuide, snapshot } = params;
+  const { context, command, agentContext, generatedAt, phase, expectations, snapshot } = params;
   const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt");
   const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt");
   const originalExitCode = snapshot.globalExitCode?.original ?? null;
   const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null;
+  const expectationResult = buildExpectationResult({
+    expectations,
+    findings: snapshot.combinedAllFindings,
+    observedTestCount: snapshot.entries.length,
+    modelingSummary: snapshot.modelingSummary,
+  });
 
   return {
     schema_version: AGENT_SCHEMA_VERSION,
@@ -2835,26 +3671,27 @@ const toRunManifest = (params: {
       findings_manifest: "manifest/findings.jsonl",
       test_events_manifest: "manifest/test-events.jsonl",
       expected_manifest: expectations?.relativePath ?? null,
-      project_guide: projectGuide?.relativePath ?? null,
       process_logs: {
         stdout: stdoutArtifact?.relativePath ?? null,
         stderr: stderrArtifact?.relativePath ?? null,
       },
     },
     expectations_present: Boolean(expectations),
+    expectations: expectations ? toExpectationModel(expectations) : null,
+    expectation_result: expectationResult,
     check_summary: buildCheckSummary(snapshot.combinedAllFindings),
     agent_context: {
-      agent_name: env[AGENT_NAME_ENV] ?? null,
-      loop_id: env[AGENT_LOOP_ID_ENV] ?? null,
-      task_id: env[AGENT_TASK_ID_ENV] ?? expectations?.taskId ?? null,
-      conversation_id: env[AGENT_CONVERSATION_ID_ENV] ?? null,
+      agent_name: agentContext.agentName ?? null,
+      loop_id: agentContext.loopId ?? null,
+      task_id: agentContext.taskId ?? expectations?.taskId ?? null,
+      conversation_id: agentContext.conversationId ?? null,
     },
   };
 };
 
 const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot: AgentSnapshot; phase: RunPhase }) => {
   const { runtime, snapshot, phase } = params;
-  const { outputDir, context, command, generatedAt, expectations, projectGuide } = runtime;
+  const { outputDir, context, command, generatedAt, expectations } = runtime;
   const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath));
   const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir)));
 
@@ -2890,10 +3727,10 @@ const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot
       toRunManifest({
         context,
         command,
+        agentContext: runtime.agentContext,
         generatedAt,
         phase,
         expectations,
-        projectGuide,
         snapshot,
       }),
     ),
@@ -2922,7 +3759,7 @@ const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot
         findings: snapshot.combinedAllFindings,
       }),
     ),
-    writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(projectGuide?.relativePath)),
+    writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()),
   ]);
 };
 
@@ -2968,7 +3805,7 @@ const createBootstrapSnapshot = (): AgentSnapshot => ({
 });
 
 const writeBootstrapFiles = async (runtime: AgentRuntimeState) => {
-  await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(runtime.projectGuide?.relativePath));
+  await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide());
   await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl"));
   await writeSnapshotFiles({
     runtime,
@@ -2995,20 +3832,57 @@ const toTestsManifestLine = (entry: TestEntry) => ({
   assets_dir: entry.relativeAssetDir,
 });
 
-const toFindingManifestLine = (finding: AgentFinding) => ({
-  finding_id: finding.findingId,
-  subject: finding.subject,
-  severity: finding.severity,
-  category: finding.category,
-  check_name: finding.checkName,
-  message: finding.message,
-  explanation: finding.explanation,
-  evidence_paths: finding.evidencePaths,
-  remediation_hint: finding.remediationHint,
-  expected_reference: finding.expectedReference,
-  confidence: finding.confidence,
+const toFindingSubject = (finding: AgentFinding) => ({
+  type: finding.subjectType,
+  ...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}),
 });
 
+const toFindingManifestLine = (finding: AgentFinding) => {
+  const impact = defaultImpactForFinding(finding);
+  const confidence = finding.confidence ?? 1;
+
+  return {
+    schema_version: "allure-agent-finding/v2",
+    check_id: finding.checkName,
+    instance_id: finding.findingId,
+    severity: finding.severity,
+    impact,
+    confidence,
+    category: finding.category,
+    title: finding.title ?? finding.message,
+    message: finding.message,
+    subject: toFindingSubject(finding),
+    expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}),
+    observed: finding.observed ?? { detail: finding.explanation },
+    evidence: {
+      paths: finding.evidencePaths,
+    },
+    action: finding.action ?? finding.remediationHint,
+    ...(finding.source ? { source: finding.source } : {}),
+    ...(finding.limits ? { limits: finding.limits } : {}),
+    ...(finding.affected ? { affected: finding.affected } : {}),
+    ...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}),
+    legacy: {
+      finding_id: finding.findingId,
+      subject: finding.subject,
+      subject_type: finding.subjectType,
+      check_name: finding.checkName,
+      explanation: finding.explanation,
+      evidence_paths: finding.evidencePaths,
+      remediation_hint: finding.remediationHint,
+      expected_reference: finding.expectedReference,
+    },
+    finding_id: finding.findingId,
+    subject_ref: finding.subject,
+    subject_type: finding.subjectType,
+    check_name: finding.checkName,
+    explanation: finding.explanation,
+    evidence_paths: finding.evidencePaths,
+    remediation_hint: finding.remediationHint,
+    expected_reference: finding.expectedReference,
+  };
+};
+
 const queueRuntimeTask = (runtime: AgentRuntimeState, task: () => Promise<void>) => {
   runtime.queue = runtime.queue
     .catch(() => undefined)
@@ -3171,18 +4045,22 @@ const createRuntimeState = async (params: {
 
   const generatedAt = new Date().toISOString();
   const createFinding = createFindingFactory();
-  const expectationLoadResult = await loadExpectations(outputDir, createFinding);
-  const projectGuide = await loadProjectGuide(outputDir);
+  const expectationLoadResult = await loadExpectations(outputDir, createFinding, options);
   const runtime: AgentRuntimeState = {
     outputDir,
     context,
     store,
     generatedAt,
-    command: env[AGENT_COMMAND_ENV],
+    command: options.command,
+    agentContext: {
+      agentName: options.agentName,
+      loopId: options.loopId,
+      taskId: options.taskId,
+      conversationId: options.conversationId,
+    },
     createFinding,
     expectations: expectationLoadResult.expectations,
     expectationLoadFindings: expectationLoadResult.findings,
-    projectGuide,
     unsubscribers: [],
     queue: Promise.resolve(),
     seenLogicalKeys: new Set<string>(),
diff --git a/packages/plugin-agent/src/query.ts b/packages/plugin-agent/src/query.ts
new file mode 100644
index 00000000000..92f8e816444
--- /dev/null
+++ b/packages/plugin-agent/src/query.ts
@@ -0,0 +1,252 @@
+import { readFile } from "node:fs/promises";
+import { join } from "node:path";
+
+import type { TestLabel, TestStatus } from "@allurereport/core-api";
+
+import { AgentUsageError } from "./errors.js";
+import type { AgentFindingCategory, AgentFindingSeverity, AgentOutputBundle, AgentTestManifestLine } from "./harness.js";
+import type { AgentLabelFilter } from "./selection.js";
+
+export const AGENT_QUERY_SCHEMA = "allure-agent-query/v1";
+export const AGENT_QUERY_VIEWS = ["summary", "tests", "findings", "test"] as const;
+export const AGENT_TEST_STATUSES: TestStatus[] = ["failed", "broken", "unknown", "skipped", "passed"];
+export const AGENT_FINDING_SEVERITIES: AgentFindingSeverity[] = ["high", "warning", "info"];
+export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = ["bootstrap", "scope", "metadata", "evidence", "smells"];
+
+export type AgentQueryView = (typeof AGENT_QUERY_VIEWS)[number];
+
+export type AgentQueryFilters = {
+  environments?: string[];
+  labelFilters: AgentLabelFilter[];
+  statuses?: TestStatus[];
+  severities?: AgentFindingSeverity[];
+  categories?: AgentFindingCategory[];
+  checks?: string[];
+  test?: string;
+  limit?: number;
+  includeMarkdown?: boolean;
+};
+
+export const normalizeAgentQueryView = (value?: string): AgentQueryView => {
+  if (!value) {
+    return "summary";
+  }
+
+  const normalized = value.trim().toLowerCase();
+
+  if (!AGENT_QUERY_VIEWS.includes(normalized as AgentQueryView)) {
+    throw new AgentUsageError(
+      `Invalid query view ${JSON.stringify(value)}. Expected one of: ${AGENT_QUERY_VIEWS.join(", ")}`,
+    );
+  }
+
+  return normalized as AgentQueryView;
+};
+
+const normalizeOptionalStringValues = (values: string[] | undefined) =>
+  values?.map((value) => value.trim()).filter(Boolean) ?? [];
+
+export const normalizeRepeatedEnumValues = <T extends string>(
+  values: string[] | undefined,
+  allowed: readonly T[],
+  optionName: string,
+): T[] | undefined => {
+  const normalized = normalizeOptionalStringValues(values).map((value) => value.toLowerCase());
+
+  if (!normalized.length) {
+    return undefined;
+  }
+
+  const invalid = normalized.find((value) => !allowed.includes(value as T));
+
+  if (invalid) {
+    throw new AgentUsageError(
+      `Invalid ${optionName} value ${JSON.stringify(invalid)}. Expected one of: ${allowed.join(", ")}`,
+    );
+  }
+
+  return normalized as T[];
+};
+
+export const normalizeRepeatedStringValues = (values: string[] | undefined): string[] | undefined => {
+  const normalized = normalizeOptionalStringValues(values);
+
+  return normalized.length ? normalized : undefined;
+};
+
+export const normalizeAgentQueryLimit = (value?: string): number | undefined => {
+  if (value === undefined) {
+    return undefined;
+  }
+
+  if (!/^\d+$/.test(value)) {
+    throw new AgentUsageError("--limit must be a non-negative integer");
+  }
+
+  const parsed = Number(value);
+
+  if (!Number.isSafeInteger(parsed)) {
+    throw new AgentUsageError("--limit must be a non-negative integer");
+  }
+
+  return parsed;
+};
+
+const matchesLabelFilters = (labels: TestLabel[], filters: AgentLabelFilter[]) =>
+  filters.every((filter) => labels.some((label) => label.name === filter.name && label.value === filter.value));
+
+const matchesAgentTestIdentifier = (test: AgentTestManifestLine, identifier: string) =>
+  test.full_name === identifier ||
+  test.test_result_id === identifier ||
+  test.history_id === identifier ||
+  test.markdown_path === identifier;
+
+const agentFindingSubjectRef = (finding: AgentOutputBundle["findings"][number]) => {
+  if (finding.subject_ref) {
+    return finding.subject_ref;
+  }
+
+  if (typeof finding.subject === "string") {
+    return finding.subject;
+  }
+
+  return finding.subject.path ?? finding.subject.id ?? finding.subject.type;
+};
+
+const agentFindingCheckName = (finding: AgentOutputBundle["findings"][number]) =>
+  finding.check_id ?? finding.check_name;
+
+const filterAgentQueryTests = (tests: AgentTestManifestLine[], filters: AgentQueryFilters) =>
+  tests
+    .filter((test) => (filters.statuses?.length ? filters.statuses.includes(test.status) : true))
+    .filter((test) => (filters.environments?.length ? filters.environments.includes(test.environment_id) : true))
+    .filter((test) => (filters.labelFilters.length ? matchesLabelFilters(test.labels, filters.labelFilters) : true))
+    .filter((test) => (filters.test ? matchesAgentTestIdentifier(test, filters.test) : true));
+
+const hasAgentQueryTestFilters = (filters: AgentQueryFilters) =>
+  Boolean(filters.statuses?.length || filters.environments?.length || filters.labelFilters.length || filters.test);
+
+const filterAgentQueryFindings = (output: AgentOutputBundle, filters: AgentQueryFilters) => {
+  const matchedSubjects = hasAgentQueryTestFilters(filters)
+    ? new Set(filterAgentQueryTests(output.tests, filters).map((test) => test.markdown_path))
+    : undefined;
+
+  return output.findings
+    .filter((finding) => (matchedSubjects ? matchedSubjects.has(agentFindingSubjectRef(finding)) : true))
+    .filter((finding) => (filters.severities?.length ? filters.severities.includes(finding.severity) : true))
+    .filter((finding) => (filters.categories?.length ? filters.categories.includes(finding.category) : true))
+    .filter((finding) => (filters.checks?.length ? filters.checks.includes(agentFindingCheckName(finding)) : true));
+};
+
+const applyAgentQueryLimit = <T>(items: T[], limit: number | undefined): T[] =>
+  limit === undefined ? items : items.slice(0, limit);
+
+const resolveAgentOutputPath = (output: AgentOutputBundle, relativePath: string | null | undefined) =>
+  relativePath ? join(output.outputDir, relativePath) : null;
+
+const buildAgentQuerySummaryPayload = (output: AgentOutputBundle) => ({
+  schema: AGENT_QUERY_SCHEMA,
+  view: "summary",
+  output_dir: output.outputDir,
+  index_md: resolveAgentOutputPath(output, output.run.paths.index_md),
+  run: {
+    schema_version: output.run.schema_version,
+    generated_at: output.run.generated_at,
+    phase: output.run.phase ?? null,
+    command: output.run.command,
+    exit_code: output.run.exit_code,
+    expectations_present: output.run.expectations_present,
+    expectation_result: output.run.expectation_result,
+    agent_context: output.run.agent_context,
+  },
+  summary: output.run.summary,
+  modeling: output.run.modeling ?? null,
+  check_summary: output.run.check_summary,
+  paths: {
+    index_md: resolveAgentOutputPath(output, output.run.paths.index_md),
+    agents_md: resolveAgentOutputPath(output, output.run.paths.agents_md),
+    tests_manifest: resolveAgentOutputPath(output, output.run.paths.tests_manifest),
+    findings_manifest: resolveAgentOutputPath(output, output.run.paths.findings_manifest),
+    test_events_manifest: resolveAgentOutputPath(output, output.run.paths.test_events_manifest),
+    expected_manifest: resolveAgentOutputPath(output, output.run.paths.expected_manifest),
+    process_logs: {
+      stdout: resolveAgentOutputPath(output, output.run.paths.process_logs.stdout),
+      stderr: resolveAgentOutputPath(output, output.run.paths.process_logs.stderr),
+    },
+  },
+  ...(output.expected ? { expected: output.expected } : {}),
+});
+
+const buildAgentQueryTestsPayload = (output: AgentOutputBundle, filters: AgentQueryFilters) => {
+  const matched = filterAgentQueryTests(output.tests, filters);
+  const returned = applyAgentQueryLimit(matched, filters.limit);
+
+  return {
+    schema: AGENT_QUERY_SCHEMA,
+    view: "tests",
+    output_dir: output.outputDir,
+    total_matches: matched.length,
+    returned: returned.length,
+    tests: returned,
+  };
+};
+
+const buildAgentQueryFindingsPayload = (output: AgentOutputBundle, filters: AgentQueryFilters) => {
+  const matched = filterAgentQueryFindings(output, filters);
+  const returned = applyAgentQueryLimit(matched, filters.limit);
+
+  return {
+    schema: AGENT_QUERY_SCHEMA,
+    view: "findings",
+    output_dir: output.outputDir,
+    total_matches: matched.length,
+    returned: returned.length,
+    findings: returned,
+  };
+};
+
+const buildAgentQueryTestPayload = async (output: AgentOutputBundle, filters: AgentQueryFilters) => {
+  const matched = filterAgentQueryTests(output.tests, filters);
+
+  if (!matched.length) {
+    throw new AgentUsageError(`No tests matched query in ${output.outputDir}`);
+  }
+
+  if (matched.length > 1) {
+    throw new AgentUsageError(`Query matched ${matched.length} tests in ${output.outputDir}. Use --test <full-name-or-id>.`);
+  }
+
+  const test = matched[0];
+  const markdownPath = resolveAgentOutputPath(output, test.markdown_path);
+  const findings = output.findings.filter((finding) => agentFindingSubjectRef(finding) === test.markdown_path);
+
+  return {
+    schema: AGENT_QUERY_SCHEMA,
+    view: "test",
+    output_dir: output.outputDir,
+    markdown_path: markdownPath,
+    test,
+    findings,
+    ...(filters.includeMarkdown && markdownPath ? { markdown: await readFile(markdownPath, "utf-8") } : {}),
+  };
+};
+
+export const buildAgentQueryPayload = async (
+  output: AgentOutputBundle,
+  view: AgentQueryView,
+  filters: AgentQueryFilters,
+) => {
+  switch (view) {
+    case "summary":
+      return buildAgentQuerySummaryPayload(output);
+
+    case "tests":
+      return buildAgentQueryTestsPayload(output, filters);
+
+    case "findings":
+      return buildAgentQueryFindingsPayload(output, filters);
+
+    case "test":
+      return buildAgentQueryTestPayload(output, filters);
+  }
+};
diff --git a/packages/cli/src/utils/agent-select.ts b/packages/plugin-agent/src/selection.ts
similarity index 89%
rename from packages/cli/src/utils/agent-select.ts
rename to packages/plugin-agent/src/selection.ts
index 32437ca1e77..16d93d8ecba 100644
--- a/packages/cli/src/utils/agent-select.ts
+++ b/packages/plugin-agent/src/selection.ts
@@ -3,15 +3,10 @@ import { tmpdir } from "node:os";
 import { join, resolve } from "node:path";
 
 import type { TestLabel, TestPlan, TestPlanTest } from "@allurereport/core-api";
-import {
-  loadAgentOutput,
-  planAgentEnrichmentReview,
-  type AgentOutputBundle,
-  type AgentTestManifestLine,
-} from "@allurereport/plugin-agent";
-import { UsageError } from "clipanion";
 
-import { readLatestAgentState } from "./agent-state.js";
+import { AgentUsageError } from "./errors.js";
+import { loadAgentOutput, planAgentEnrichmentReview, type AgentOutputBundle, type AgentTestManifestLine } from "./harness.js";
+import { readLatestAgentState } from "./state.js";
 
 export type AgentRerunPreset = "review" | "failed" | "unsuccessful" | "all";
 
@@ -35,7 +30,7 @@ export type AgentTestPlanContext = {
   cleanup: () => Promise<void>;
 };
 
-const AGENT_RERUN_PRESETS: AgentRerunPreset[] = ["review", "failed", "unsuccessful", "all"];
+export const AGENT_RERUN_PRESETS: AgentRerunPreset[] = ["review", "failed", "unsuccessful", "all"];
 
 const ALLURE_ID_LABEL = "ALLURE_ID";
 
@@ -108,7 +103,7 @@ export const normalizeAgentRerunPreset = (value?: string): AgentRerunPreset => {
   const normalized = value.trim().toLowerCase();
 
   if (!isAgentRerunPreset(normalized)) {
-    throw new UsageError(
+    throw new AgentUsageError(
       `Invalid rerun preset ${JSON.stringify(value)}. Expected one of: ${AGENT_RERUN_PRESETS.join(", ")}`,
     );
   }
@@ -121,7 +116,7 @@ export const parseAgentLabelFilters = (values?: string[]): AgentLabelFilter[] =>
     const separatorIndex = value.indexOf("=");
 
     if (separatorIndex <= 0 || separatorIndex === value.length - 1) {
-      throw new UsageError(
+      throw new AgentUsageError(
         `Invalid label filter ${JSON.stringify(value)}. Expected the form name=value, for example feature=checkout`,
       );
     }
@@ -130,7 +125,7 @@ export const parseAgentLabelFilters = (values?: string[]): AgentLabelFilter[] =>
     const filterValue = value.slice(separatorIndex + 1).trim();
 
     if (!name || !filterValue) {
-      throw new UsageError(
+      throw new AgentUsageError(
         `Invalid label filter ${JSON.stringify(value)}. Expected the form name=value, for example feature=checkout`,
       );
     }
@@ -149,11 +144,11 @@ export const resolveAgentSelectionOutputDir = async (params: {
   const { cwd, from, latest } = params;
 
   if (from && latest) {
-    throw new UsageError("Use either --from or --latest, not both");
+    throw new AgentUsageError("Use either --from or --latest, not both");
   }
 
   if (!from && !latest) {
-    throw new UsageError("Expected either --from <path> or --latest");
+    throw new AgentUsageError("Expected either --from <path> or --latest");
   }
 
   if (from) {
@@ -163,7 +158,7 @@ export const resolveAgentSelectionOutputDir = async (params: {
   const latestState = await readLatestAgentState(cwd);
 
   if (!latestState) {
-    throw new UsageError(`No latest agent output found for ${cwd}`);
+    throw new AgentUsageError(`No latest agent output found for ${cwd}`);
   }
 
   return latestState.outputDir;
@@ -216,7 +211,7 @@ export const createAgentTestPlanContext = async (params: {
   });
 
   if (!selection.testPlan.tests.length) {
-    throw new UsageError(
+    throw new AgentUsageError(
       `No tests matched rerun selection in ${selection.outputDir}. Adjust the preset or filters before rerunning.`,
     );
   }
diff --git a/packages/cli/src/utils/agent-state.ts b/packages/plugin-agent/src/state.ts
similarity index 100%
rename from packages/cli/src/utils/agent-state.ts
rename to packages/plugin-agent/src/state.ts
diff --git a/packages/plugin-agent/test/capabilities.test.ts b/packages/plugin-agent/test/capabilities.test.ts
new file mode 100644
index 00000000000..cfae6ef8a62
--- /dev/null
+++ b/packages/plugin-agent/test/capabilities.test.ts
@@ -0,0 +1,61 @@
+import { epic, feature, label, story } from "allure-js-commons";
+import { beforeEach, describe, expect, it } from "vitest";
+
+import { AGENT_TASK_MAP_HELP, createAgentCapabilities, isAgentTaskMapHelpRequest } from "../src/capabilities.js";
+import { attachJsonEvidence, expectTextToContainAll } from "./evidence.js";
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("agent-capabilities");
+  await label("coverage", "agent-mode");
+});
+
+describe("agent capabilities", () => {
+  it("should describe the supported local agent command surface", async () => {
+    const payload = createAgentCapabilities();
+
+    await attachJsonEvidence("agent capabilities payload", payload);
+    expect(payload.schema).toBe("allure-agent-capabilities/v1");
+    expect(payload.commands.run.supported).toBe(true);
+    expect(payload.commands.run.options).toContain("--expect-test");
+    expect(payload.commands.latest.output).toEqual(["agent output: <dir>", "agent index: <dir>/index.md"]);
+    expect(payload.commands.select.output).toEqual(["stdout-testplan-json", "file-testplan-json", "file-summary"]);
+    expect(payload.commands.select.presets).toEqual(["review", "failed", "unsuccessful", "all"]);
+    expect(payload.commands.query.supported).toBe(true);
+    expect(payload.commands.query.views).toEqual(["summary", "tests", "findings", "test"]);
+    expect(payload.commands.query.filters).toContain("status");
+    expect(payload.expectations.inline.expected.fullNames).toBe(true);
+    expect(payload.expectations.inline.forbidden.labels).toBe(true);
+    expect(payload.expectations.inline.forbidden.fullNames).toBe(false);
+    expect(payload.expectations.inline.evidence.stepNameContains).toBe(true);
+    expect(payload.expectations.inline.evidence.attachmentFilters).toEqual(["name", "content-type"]);
+    expect(payload.commands.run.options).not.toContain("--expect-evidence");
+    expect(payload.output.files).toContain("manifest/run.json");
+    expect(payload.unsupported.discovery).toBe(true);
+    expect(payload.unsupported).not.toHaveProperty("query");
+    expect(payload.unsupported.localAgentService).toBe(true);
+  });
+
+  it("should define the task-map help request and help content", async () => {
+    const helpRequestCases = [
+      { args: ["agent", "--help"], expected: true },
+      { args: ["agent", "-h"], expected: true },
+      { args: ["agent", "-h=3"], expected: false },
+      { args: ["agent", "latest", "--help"], expected: false },
+    ];
+
+    await attachJsonEvidence("task map help request cases", helpRequestCases);
+    expect(helpRequestCases.map(({ args }) => isAgentTaskMapHelpRequest(args))).toEqual(
+      helpRequestCases.map(({ expected }) => expected),
+    );
+    await expectTextToContainAll("agent task map help", AGENT_TASK_MAP_HELP, [
+      "Agent task map:",
+      "allure agent capabilities",
+      "allure agent --goal ... -- <command>",
+      "allure agent query --from <output-dir> tests",
+      "allure agent select --from <output-dir>",
+      "ALLURE_AGENT_STATE_DIR=<dir>",
+    ]);
+  });
+});
diff --git a/packages/plugin-agent/test/evidence.ts b/packages/plugin-agent/test/evidence.ts
new file mode 100644
index 00000000000..88bc0b35dca
--- /dev/null
+++ b/packages/plugin-agent/test/evidence.ts
@@ -0,0 +1,24 @@
+import { attachment, step } from "allure-js-commons";
+import { expect } from "vitest";
+
+const formatJson = (value: unknown) => JSON.stringify(value, null, 2);
+
+export const attachJsonEvidence = async (name: string, value: unknown) => {
+  await attachment(name, formatJson(value), "application/json");
+};
+
+export const attachTextEvidence = async (name: string, value: string, contentType: string = "text/plain") => {
+  await attachment(name, value, contentType);
+};
+
+export const expectTextToContainAll = async (artifactName: string, content: string, expectedText: string[]) => {
+  await step(`verify ${artifactName} required text`, async () => {
+    const missing = expectedText.filter((expected) => !content.includes(expected));
+
+    await attachJsonEvidence(`${artifactName} required text`, {
+      checked: expectedText,
+      missing,
+    });
+    expect(missing).toEqual([]);
+  });
+};
diff --git a/packages/plugin-agent/test/guidance.test.ts b/packages/plugin-agent/test/guidance.test.ts
new file mode 100644
index 00000000000..ff719b87b04
--- /dev/null
+++ b/packages/plugin-agent/test/guidance.test.ts
@@ -0,0 +1,70 @@
+import { readFile } from "node:fs/promises";
+import { dirname, join, resolve } from "node:path";
+import { fileURLToPath } from "node:url";
+
+import { story } from "allure-js-commons";
+import { beforeEach, describe, expect, it } from "vitest";
+
+import { renderAgentsGuide } from "../src/guidance.js";
+import { expectTextToContainAll } from "./evidence.js";
+
+beforeEach(async () => {
+  await story("guidance");
+});
+
+const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../../..");
+
+describe("allure agent-mode guidance", () => {
+  it("should keep stable guidance in the package README and generated run playbook", async () => {
+    const readme = await readFile(join(repoRoot, "packages", "plugin-agent", "README.md"), "utf-8");
+    const agentsGuide = renderAgentsGuide();
+
+    await expectTextToContainAll("generated AGENTS guide", agentsGuide, [
+      "## Reading Order",
+      "## Command Task Map",
+      "## Agent Workflows",
+      "Use the smallest workflow that matches the task.",
+      "### Validate A Change",
+      "### Add Or Update Tests",
+      "### Review Existing Coverage",
+      "### Triage Failures",
+      "### Rerun A Prior Scope",
+      "### Improve Evidence Quality",
+      "### Recover Or Diagnose Agent Mode",
+      "allure agent --goal <text> --expect-tests <count> --expect-test",
+      "allure agent latest",
+      "allure agent state-dir",
+      "allure agent query --latest summary|tests|findings|test",
+      "allure agent select --latest",
+      "allure agent --rerun-latest",
+      "--preset review|failed|unsuccessful|all",
+      "--environment <id>",
+      "--label name=value",
+      "--rerun-environment",
+      "--rerun-label",
+      "ALLURE_AGENT_STATE_DIR",
+      "manifest/run.json",
+      "manifest/test-events.jsonl",
+    ]);
+
+    await expectTextToContainAll("plugin-agent README", readme, [
+      "## Verification Standard",
+      "## CLI Capability Workflow",
+      "allure --version",
+      "allure agent capabilities --json",
+      "allure agent --help",
+      "allure agent query --help",
+      "allure agent select --help",
+      "allure agent latest --help",
+      "allure agent state-dir --help",
+      "`allure agent capabilities --json` is the structured local contract for agents.",
+      "`allure agent --help` includes the human-readable command task map",
+      '--expect-test "<fullName>"',
+      "instead of spending context reconstructing runner-specific test names",
+      "instead of manually rebuilding runner-specific test names",
+      "For small mechanical test changes, use a scoped agent-mode run for the smoke check",
+      "treat the review as partial",
+      "Use `allure --version`, `allure agent capabilities --json`, and `allure agent --help` before choosing flags",
+    ]);
+  });
+});
diff --git a/packages/plugin-agent/test/harness.test.ts b/packages/plugin-agent/test/harness.test.ts
index bdc5647513e..7d5d6b0d354 100644
--- a/packages/plugin-agent/test/harness.test.ts
+++ b/packages/plugin-agent/test/harness.test.ts
@@ -1,8 +1,8 @@
-import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
 import { tmpdir } from "node:os";
 import { join } from "node:path";
 
-import type { AttachmentLink, TestResult } from "@allurereport/core-api";
+import type { AttachmentLink, DefaultTestStepResult, TestResult } from "@allurereport/core-api";
 import type { AllureStore, PluginContext } from "@allurereport/plugin-api";
 import { BufferResultFile } from "@allurereport/reader-api";
 import { story } from "allure-js-commons";
@@ -10,6 +10,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
 import AgentPlugin, {
   type AgentFindingManifestLine,
+  type AgentExpectationsInput,
   type AgentOutputBundle,
   AGENT_ENRICHMENT_ACTIONS,
   buildAgentExpectations,
@@ -17,11 +18,11 @@ import AgentPlugin, {
   planAgentEnrichmentReview,
   reviewAgentOutput,
 } from "../src/index.js";
+import { attachJsonEvidence } from "./evidence.js";
 
 beforeEach(async () => {
   await story("harness");
 });
-const AGENT_ENV_VARS = ["ALLURE_AGENT_EXPECTATIONS", "ALLURE_AGENT_COMMAND", "ALLURE_AGENT_PROJECT_ROOT"] as const;
 
 const createContext = (reportName: string = "Harness Report"): PluginContext =>
   ({
@@ -64,6 +65,16 @@ const createAttachment = (overrides: Partial<AttachmentLink> = {}): AttachmentLi
     ...overrides,
   }) as AttachmentLink;
 
+const createStep = (overrides: Partial<DefaultTestStepResult> = {}): DefaultTestStepResult => ({
+  name: "assert expected behavior",
+  parameters: [],
+  status: "passed",
+  steps: [],
+  type: "step",
+  message: "checked",
+  ...overrides,
+});
+
 const createStore = (overrides: Partial<AllureStore> = {}): AllureStore =>
   ({
     allTestResults: vi.fn().mockResolvedValue([]),
@@ -80,6 +91,28 @@ const createStore = (overrides: Partial<AllureStore> = {}): AllureStore =>
     ...overrides,
   }) as AllureStore;
 
+const readJson = async <T>(path: string): Promise<T> => {
+  const value = JSON.parse(await readFile(path, "utf-8")) as T;
+
+  await attachJsonEvidence(`parsed ${path}`, value);
+
+  return value;
+};
+
+const readJsonl = async <T>(path: string): Promise<T[]> => {
+  const content = await readFile(path, "utf-8");
+
+  const values = content
+    .split("\n")
+    .map((line) => line.trim())
+    .filter(Boolean)
+    .map((line) => JSON.parse(line) as T);
+
+  await attachJsonEvidence(`parsed ${path}`, values);
+
+  return values;
+};
+
 const createFinding = (overrides: Partial<AgentFindingManifestLine> = {}): AgentFindingManifestLine => ({
   finding_id: "finding-1",
   subject: "run",
@@ -158,7 +191,6 @@ const createOutputBundle = (overrides: Partial<AgentOutputBundle> = {}): AgentOu
       tests_manifest: "manifest/tests.jsonl",
       findings_manifest: "manifest/findings.jsonl",
       expected_manifest: "manifest/expected.json",
-      project_guide: null,
       process_logs: {
         stdout: "artifacts/global/stdout.txt",
         stderr: null,
@@ -204,6 +236,30 @@ const createOutputBundle = (overrides: Partial<AgentOutputBundle> = {}): AgentOu
       },
     },
     expectations_present: true,
+    expectations: {
+      goal: "Verify harness fixture",
+    },
+    expectation_result: {
+      schema_version: "allure-agent-expectation-result/v1",
+      status: "matched",
+      impact: "accept",
+      source: {
+        kind: "inline",
+        path: null,
+      },
+      recognized_control_count: 1,
+      unsupported_controls: [],
+      degraded_controls: [],
+      summary: {
+        expected_tests: 0,
+        observed_tests: 1,
+        missing_expected: 0,
+        forbidden_observed: 0,
+        unexpected_observed: 0,
+        evidence_mismatches: 0,
+      },
+      finding_ids: [],
+    },
     check_summary: {
       total: 0,
       countsBySeverity: {
@@ -267,18 +323,83 @@ describe("agent enrichment harness", () => {
 
   beforeEach(async () => {
     tempDir = await mkdtemp(join(tmpdir(), "plugin-agent-harness-"));
-    AGENT_ENV_VARS.forEach((name) => {
-      delete process.env[name];
-    });
   });
 
   afterEach(async () => {
-    AGENT_ENV_VARS.forEach((name) => {
-      delete process.env[name];
-    });
     await rm(tempDir, { recursive: true, force: true });
   });
 
+  type ExpectationHarnessRun = {
+    expectations: AgentExpectationsInput;
+    tests?: TestResult[];
+    environmentByTestId?: Record<string, string>;
+    attachmentsByTestId?: Record<string, AttachmentLink[]>;
+    contentByAttachmentId?: Record<string, BufferResultFile>;
+  };
+
+  const runExpectationHarness = async (name: string, params: ExpectationHarnessRun) => {
+    const outputDir = join(tempDir, name);
+    const tests = params.tests ?? [createTestResult()];
+    const stats = tests.reduce<Record<string, number>>(
+      (acc, test) => {
+        acc.total += 1;
+        acc[test.status] = (acc[test.status] ?? 0) + 1;
+
+        return acc;
+      },
+      {
+        total: 0,
+      },
+    );
+
+    await new AgentPlugin({
+      outputDir,
+      expectations: params.expectations,
+      command: "yarn test expectation-harness",
+    }).done(
+      createContext(),
+      createStore({
+        allTestResults: vi.fn().mockResolvedValue(tests),
+        testsStatistic: vi.fn().mockResolvedValue(stats),
+        environmentIdByTrId: vi.fn().mockImplementation(async (id: string) => {
+          return params.environmentByTestId?.[id] ?? "default";
+        }),
+        attachmentsByTrId: vi.fn().mockImplementation(async (id: string) => {
+          return params.attachmentsByTestId?.[id] ?? [];
+        }),
+        attachmentContentById: vi.fn().mockImplementation(async (id: string) => {
+          return params.contentByAttachmentId?.[id];
+        }),
+      }),
+    );
+
+    return {
+      outputDir,
+      run: await readJson<AgentOutputBundle["run"]>(join(outputDir, "manifest", "run.json")),
+      findings: await readJsonl<AgentFindingManifestLine>(join(outputDir, "manifest", "findings.jsonl")),
+    };
+  };
+
+  const expectNoExpectationFinding = (findings: AgentFindingManifestLine[], checkName: string) => {
+    expect(findings).not.toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          check_name: checkName,
+        }),
+      ]),
+    );
+  };
+
+  const expectExpectationFinding = (findings: AgentFindingManifestLine[], checkName: string) => {
+    expect(findings).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          check_name: checkName,
+        }),
+      ]),
+    );
+  };
+
   it("should build expectations from a harness request", () => {
     expect(
       buildAgentExpectations({
@@ -321,22 +442,311 @@ describe("agent enrichment harness", () => {
     });
   });
 
-  it("should map enrichment findings to the intended remediation categories", () => {
-    expect(AGENT_ENRICHMENT_ACTIONS["failed-without-useful-steps"].category).toBe("add-meaningful-steps");
-    expect(mapFindingToEnrichmentAction("nontrivial-run-with-empty-trace").category).toBe("add-meaningful-steps");
-    expect(mapFindingToEnrichmentAction("passed-without-observable-evidence").category).toBe("add-meaningful-steps");
-    expect(mapFindingToEnrichmentAction("failed-without-attachments").category).toBe("add-test-attachments");
-    expect(mapFindingToEnrichmentAction("global-only-artifacts").category).toBe("add-test-attachments");
-    expect(mapFindingToEnrichmentAction("runner-failures-outside-logical-results").category).toBe("bootstrap-allure");
-    expect(mapFindingToEnrichmentAction("unmodeled-visible-results").category).toBe("review-manually");
-    expect(mapFindingToEnrichmentAction("metadata-mismatch").category).toBe("repair-test-metadata");
-    expect(mapFindingToEnrichmentAction("retries-without-new-evidence").category).toBe("add-retry-diagnostics");
-    expect(mapFindingToEnrichmentAction("noop-dominated-steps").category).toBe("collapse-low-signal-trace");
-    expect(mapFindingToEnrichmentAction("step-spam").category).toBe("collapse-low-signal-trace");
-    expect(mapFindingToEnrichmentAction("unexpected-test").category).toBe("narrow-test-scope");
+  it.each([
+    {
+      name: "expected test count",
+      checkName: "expected-count-mismatch",
+      matched: {
+        expectations: {
+          expected: {
+            test_count: 1,
+          },
+        },
+      },
+      unmet: {
+        expectations: {
+          expected: {
+            test_count: 2,
+          },
+        },
+      },
+    },
+    {
+      name: "expected test full name",
+      checkName: "expected-test-missing",
+      matched: {
+        expectations: {
+          expected: {
+            full_names: ["suite should pass"],
+          },
+        },
+      },
+      unmet: {
+        expectations: {
+          expected: {
+            full_names: ["suite should be visible"],
+          },
+        },
+      },
+    },
+    {
+      name: "expected test full-name prefix",
+      checkName: "expected-prefix-missing",
+      matched: {
+        expectations: {
+          expected: {
+            full_name_prefixes: ["suite should"],
+          },
+        },
+      },
+      unmet: {
+        expectations: {
+          expected: {
+            full_name_prefixes: ["api should"],
+          },
+        },
+      },
+    },
+    {
+      name: "expected environment",
+      checkName: "expected-environment-missing",
+      matched: {
+        expectations: {
+          expected: {
+            environments: ["default"],
+          },
+        },
+      },
+      unmet: {
+        expectations: {
+          expected: {
+            environments: ["web"],
+          },
+        },
+      },
+    },
+    {
+      name: "expected label value",
+      checkName: "expected-label-missing",
+      matched: {
+        expectations: {
+          expected: {
+            label_values: {
+              module: "cli",
+            },
+          },
+        },
+        tests: [
+          createTestResult({
+            labels: [{ name: "module", value: "cli" }],
+          }),
+        ],
+      },
+      unmet: {
+        expectations: {
+          expected: {
+            label_values: {
+              module: "cli",
+            },
+          },
+        },
+      },
+    },
+    {
+      name: "forbidden label value",
+      checkName: "forbidden-label-observed",
+      matched: {
+        expectations: {
+          forbidden: {
+            label_values: {
+              layer: "e2e",
+            },
+          },
+        },
+      },
+      unmet: {
+        expectations: {
+          forbidden: {
+            label_values: {
+              layer: "e2e",
+            },
+          },
+        },
+        tests: [
+          createTestResult({
+            labels: [{ name: "layer", value: "e2e" }],
+          }),
+        ],
+      },
+    },
+    {
+      name: "expected step text",
+      checkName: "expected-step-containing-missing",
+      matched: {
+        expectations: {
+          evidence: {
+            step_name_contains: ["assert expected behavior"],
+          },
+        },
+        tests: [
+          createTestResult({
+            steps: [createStep()],
+          }),
+        ],
+      },
+      unmet: {
+        expectations: {
+          evidence: {
+            step_name_contains: ["assert expected behavior"],
+          },
+        },
+      },
+    },
+    {
+      name: "expected meaningful step count",
+      checkName: "insufficient-expected-steps",
+      matched: {
+        expectations: {
+          evidence: {
+            min_steps: 1,
+          },
+        },
+        tests: [
+          createTestResult({
+            steps: [createStep()],
+          }),
+        ],
+      },
+      unmet: {
+        expectations: {
+          evidence: {
+            min_steps: 1,
+          },
+        },
+      },
+    },
+    {
+      name: "expected attachment count",
+      checkName: "insufficient-expected-attachments",
+      matched: {
+        expectations: {
+          evidence: {
+            min_attachments: 1,
+          },
+        },
+        attachmentsByTestId: {
+          "tr-1": [createAttachment()],
+        },
+        contentByAttachmentId: {
+          "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"),
+        },
+      },
+      unmet: {
+        expectations: {
+          evidence: {
+            min_attachments: 1,
+          },
+        },
+      },
+    },
+    {
+      name: "expected attachment name",
+      checkName: "missing-expected-attachment",
+      matched: {
+        expectations: {
+          evidence: {
+            attachments: [{ name: "artifact.txt" }],
+          },
+        },
+        attachmentsByTestId: {
+          "tr-1": [createAttachment()],
+        },
+        contentByAttachmentId: {
+          "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"),
+        },
+      },
+      unmet: {
+        expectations: {
+          evidence: {
+            attachments: [{ name: "missing.txt" }],
+          },
+        },
+        attachmentsByTestId: {
+          "tr-1": [createAttachment()],
+        },
+        contentByAttachmentId: {
+          "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"),
+        },
+      },
+    },
+    {
+      name: "expected attachment content type",
+      checkName: "missing-expected-attachment",
+      matched: {
+        expectations: {
+          evidence: {
+            attachments: [{ content_type: "text/plain" }],
+          },
+        },
+        attachmentsByTestId: {
+          "tr-1": [createAttachment()],
+        },
+        contentByAttachmentId: {
+          "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"),
+        },
+      },
+      unmet: {
+        expectations: {
+          evidence: {
+            attachments: [{ content_type: "application/json" }],
+          },
+        },
+        attachmentsByTestId: {
+          "tr-1": [createAttachment()],
+        },
+        contentByAttachmentId: {
+          "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"),
+        },
+      },
+    },
+  ] satisfies Array<{
+    name: string;
+    checkName: string;
+    matched: ExpectationHarnessRun;
+    unmet: ExpectationHarnessRun;
+  }>)("should report $checkName only when $name is unmet", async ({ name, checkName, matched, unmet }) => {
+    const matchedOutput = await runExpectationHarness(`${name.replace(/[^a-z0-9]+/gi, "-")}-matched`, matched);
+    const unmetOutput = await runExpectationHarness(`${name.replace(/[^a-z0-9]+/gi, "-")}-unmet`, unmet);
+
+    expectNoExpectationFinding(matchedOutput.findings, checkName);
+    expectExpectationFinding(unmetOutput.findings, checkName);
+  });
+
+  it("should map enrichment findings to the intended remediation categories", async () => {
+    const mappedActions = {
+      "failed-without-useful-steps": AGENT_ENRICHMENT_ACTIONS["failed-without-useful-steps"].category,
+      "nontrivial-run-with-empty-trace": mapFindingToEnrichmentAction("nontrivial-run-with-empty-trace").category,
+      "passed-without-observable-evidence": mapFindingToEnrichmentAction("passed-without-observable-evidence").category,
+      "failed-without-attachments": mapFindingToEnrichmentAction("failed-without-attachments").category,
+      "global-only-artifacts": mapFindingToEnrichmentAction("global-only-artifacts").category,
+      "runner-failures-outside-logical-results": mapFindingToEnrichmentAction("runner-failures-outside-logical-results")
+        .category,
+      "unmodeled-visible-results": mapFindingToEnrichmentAction("unmodeled-visible-results").category,
+      "metadata-mismatch": mapFindingToEnrichmentAction("metadata-mismatch").category,
+      "retries-without-new-evidence": mapFindingToEnrichmentAction("retries-without-new-evidence").category,
+      "noop-dominated-steps": mapFindingToEnrichmentAction("noop-dominated-steps").category,
+      "step-spam": mapFindingToEnrichmentAction("step-spam").category,
+      "unexpected-test": mapFindingToEnrichmentAction("unexpected-test").category,
+    };
+
+    await attachJsonEvidence("enrichment action category map", mappedActions);
+    expect(mappedActions).toEqual({
+      "failed-without-useful-steps": "add-meaningful-steps",
+      "nontrivial-run-with-empty-trace": "add-meaningful-steps",
+      "passed-without-observable-evidence": "add-meaningful-steps",
+      "failed-without-attachments": "add-test-attachments",
+      "global-only-artifacts": "add-test-attachments",
+      "runner-failures-outside-logical-results": "bootstrap-allure",
+      "unmodeled-visible-results": "review-manually",
+      "metadata-mismatch": "repair-test-metadata",
+      "retries-without-new-evidence": "add-retry-diagnostics",
+      "noop-dominated-steps": "collapse-low-signal-trace",
+      "step-spam": "collapse-low-signal-trace",
+      "unexpected-test": "narrow-test-scope",
+    });
   });
 
-  it("should reject high-confidence noop-style evidence", () => {
+  it("should reject high-confidence noop-style evidence", async () => {
     const review = planAgentEnrichmentReview(
       createOutputBundle({
         findings: [
@@ -352,6 +762,7 @@ describe("agent enrichment harness", () => {
       }),
     );
 
+    await attachJsonEvidence("noop-style evidence review decision", review);
     expect(review.status).toBe("reject");
     expect(review.rejecting).toEqual(
       expect.arrayContaining([
@@ -403,10 +814,7 @@ describe("agent enrichment harness", () => {
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-    process.env.ALLURE_AGENT_COMMAND = "yarn test clean-run";
-
-    await new AgentPlugin({ outputDir }).done(
+    await new AgentPlugin({ outputDir, expectationsPath, command: "yarn test clean-run" }).done(
       createContext(),
       createStore({
         allTestResults: vi.fn().mockResolvedValue([testResult]),
@@ -424,6 +832,7 @@ describe("agent enrichment harness", () => {
 
     const review = await reviewAgentOutput(outputDir);
 
+    await attachJsonEvidence("clean scoped run review decision", review);
     expect(review.status).toBe("accept");
     expect(review.plan).toEqual([]);
     expect(review.rerun.useExistingExpectations).toBe(true);
@@ -479,9 +888,7 @@ describe("agent enrichment harness", () => {
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-
-    await new AgentPlugin({ outputDir }).done(
+    await new AgentPlugin({ outputDir, expectationsPath }).done(
       createContext(),
       createStore({
         allTestResults: vi.fn().mockResolvedValue([matching, forbidden]),
@@ -496,7 +903,7 @@ describe("agent enrichment harness", () => {
     expect(review.rejecting).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
-          checkName: "forbidden-selector-match",
+          checkName: "forbidden-label-observed",
           category: "narrow-test-scope",
         }),
       ]),
@@ -539,9 +946,7 @@ describe("agent enrichment harness", () => {
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-
-    await new AgentPlugin({ outputDir }).done(
+    await new AgentPlugin({ outputDir, expectationsPath }).done(
       createContext(),
       createStore({
         allTestResults: vi.fn().mockResolvedValue([testResult]),
@@ -551,6 +956,7 @@ describe("agent enrichment harness", () => {
 
     const review = await reviewAgentOutput(outputDir);
 
+    await attachJsonEvidence("low-signal failure review decision", review);
     expect(review.status).toBe("iterate");
     expect(review.iterate).toEqual(
       expect.arrayContaining([
@@ -644,9 +1050,7 @@ describe("agent enrichment harness", () => {
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-
-    await new AgentPlugin({ outputDir }).done(
+    await new AgentPlugin({ outputDir, expectationsPath }).done(
       createContext(),
       createStore({
         allTestResults: vi.fn().mockResolvedValue([current]),
diff --git a/packages/plugin-agent/test/index.test.ts b/packages/plugin-agent/test/index.test.ts
index e0f32106d1e..45ba11badbe 100644
--- a/packages/plugin-agent/test/index.test.ts
+++ b/packages/plugin-agent/test/index.test.ts
@@ -13,25 +13,16 @@ import type {
   ResultFile,
 } from "@allurereport/plugin-api";
 import { BufferResultFile } from "@allurereport/reader-api";
-import { story } from "allure-js-commons";
+import { attachment, step, story } from "allure-js-commons";
 import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
 
+import type { AgentExpectationsInput } from "../src/index.js";
 import { AgentPlugin } from "../src/plugin.js";
+import { attachJsonEvidence, attachTextEvidence } from "./evidence.js";
 
 beforeEach(async () => {
   await story("index");
 });
-const AGENT_ENV_VARS = [
-  "ALLURE_AGENT_OUTPUT",
-  "ALLURE_AGENT_EXPECTATIONS",
-  "ALLURE_AGENT_COMMAND",
-  "ALLURE_AGENT_PROJECT_ROOT",
-  "ALLURE_AGENT_NAME",
-  "ALLURE_AGENT_LOOP_ID",
-  "ALLURE_AGENT_TASK_ID",
-  "ALLURE_AGENT_CONVERSATION_ID",
-] as const;
-
 const createContext = (reportName: string = "Agent Report"): PluginContext =>
   ({
     reportName,
@@ -165,29 +156,146 @@ const createRealtimeSubscriber = () => {
   };
 };
 
-const readJson = async <T>(path: string): Promise<T> => JSON.parse(await readFile(path, "utf-8")) as T;
+const readText = async (path: string, contentType: string = "text/plain"): Promise<string> => {
+  const content = await readFile(path, "utf-8");
+
+  await attachTextEvidence(`agent artifact ${path}`, content, contentType);
+
+  return content;
+};
+
+const readJson = async <T>(path: string): Promise<T> => {
+  const value = JSON.parse(await readFile(path, "utf-8")) as T;
+
+  await attachJsonEvidence(`parsed ${path}`, value);
 
-const readJsonl = async <T>(path: string): Promise<T[]> =>
-  (await readFile(path, "utf-8"))
+  return value;
+};
+
+const readJsonl = async <T>(path: string): Promise<T[]> => {
+  const values = (await readFile(path, "utf-8"))
     .trim()
     .split("\n")
     .filter(Boolean)
     .map((line) => JSON.parse(line) as T);
 
+  await attachJsonEvidence(`parsed ${path}`, values);
+
+  return values;
+};
+
+type TestFindingLine = {
+  schema_version?: string;
+  check_id?: string;
+  instance_id?: string;
+  check_name: string;
+  severity: "info" | "warning" | "high";
+  impact?: "reject" | "iterate" | "advisory";
+  subject: unknown;
+  subject_ref?: string;
+};
+
+type AttachmentContentFixture = {
+  content: string;
+  fileName: string;
+};
+
+const createMeaningfulStep = (name: string = "assert expected behavior"): TestStepResult =>
+  ({
+    type: "step",
+    name,
+    parameters: [
+      {
+        name: "state",
+        value: "verified",
+      },
+    ],
+    status: "passed",
+    steps: [],
+  }) as TestStepResult;
+
+const createStoreWithGlobalLogs = (
+  overrides: Partial<AllureStore> = {},
+  attachmentContents: Record<string, AttachmentContentFixture> = {},
+): AllureStore => {
+  const stdout = createAttachment({
+    id: "global-stdout",
+    name: "stdout.txt",
+    originalFileName: "stdout.txt",
+  });
+  const contents = new Map<string, AttachmentContentFixture>([
+    [
+      stdout.id,
+      {
+        content: "stdout",
+        fileName: "stdout.txt",
+      },
+    ],
+    ...Object.entries(attachmentContents),
+  ]);
+
+  return createStore({
+    ...overrides,
+    allGlobalAttachments: overrides.allGlobalAttachments ?? vi.fn().mockResolvedValue([stdout]),
+    attachmentContentById:
+      overrides.attachmentContentById ??
+      vi.fn().mockImplementation(async (id: string) => {
+        const fixture = contents.get(id);
+
+        return fixture ? new BufferResultFile(Buffer.from(fixture.content, "utf-8"), fixture.fileName) : undefined;
+      }),
+  });
+};
+
+const expectationOutputName = (field: string, suffix: string) => `${field.replace(/\./g, "-")}-${suffix}`;
+
 describe("AgentPlugin", () => {
   let tempDir: string;
 
+  const runInlineExpectationCase = async (params: {
+    outputName: string;
+    expectations: AgentExpectationsInput;
+    testResult?: TestResult;
+    environmentId?: string;
+    attachments?: AttachmentLink[];
+    attachmentContents?: Record<string, AttachmentContentFixture>;
+  }) => {
+    const outputDir = join(tempDir, params.outputName);
+    const testResult =
+      params.testResult ??
+      createTestResult({
+        id: "tr-expectation",
+        historyId: "expectation-history",
+        fullName: "suite expected behavior",
+      });
+
+    await new AgentPlugin({
+      outputDir,
+      expectations: { goal: "Verify expectation case", ...params.expectations },
+    }).done(
+      createContext(),
+      createStoreWithGlobalLogs(
+        {
+          allTestResults: vi.fn().mockResolvedValue([testResult]),
+          testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }),
+          environmentIdByTrId: vi.fn().mockResolvedValue(params.environmentId ?? "default"),
+          attachmentsByTrId: vi.fn().mockResolvedValue(params.attachments ?? []),
+        },
+        params.attachmentContents,
+      ),
+    );
+
+    return {
+      outputDir,
+      findings: await readJsonl<TestFindingLine>(join(outputDir, "manifest", "findings.jsonl")),
+    };
+  };
+
   beforeEach(async () => {
     tempDir = await mkdtemp(join(tmpdir(), "plugin-agent-"));
-    AGENT_ENV_VARS.forEach((name) => {
-      delete process.env[name];
-    });
   });
 
   afterEach(async () => {
-    AGENT_ENV_VARS.forEach((name) => {
-      delete process.env[name];
-    });
     await rm(tempDir, { recursive: true, force: true });
   });
 
@@ -212,9 +320,9 @@ describe("AgentPlugin", () => {
         test_events_manifest: string;
       };
     }>(join(outputDir, "manifest", "run.json"));
-    const guide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
-    const testEvents = await readFile(join(outputDir, "manifest", "test-events.jsonl"), "utf-8");
+    const guide = await readText(join(outputDir, "AGENTS.md"), "text/markdown");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
+    const testEvents = await readText(join(outputDir, "manifest", "test-events.jsonl"), "application/x-jsonlines");
 
     expect(runManifest.phase).toBe("running");
     expect(runManifest.paths.test_events_manifest).toBe("manifest/test-events.jsonl");
@@ -250,7 +358,7 @@ describe("AgentPlugin", () => {
     const runningManifest = await readJson<{
       phase: "running" | "done";
     }>(join(outputDir, "manifest", "run.json"));
-    const testContent = await readFile(join(outputDir, "tests", "default", "live-history.md"), "utf-8");
+    const testContent = await readText(join(outputDir, "tests", "default", "live-history.md"), "text/markdown");
     const eventLines = await readJsonl<{
       event_type: string;
       markdown_path?: string;
@@ -284,21 +392,17 @@ describe("AgentPlugin", () => {
     expect(finalEvents.at(-1)).toEqual(expect.objectContaining({ event_type: "run_finished" }));
   });
 
-  it("should prefer option outputDir over ALLURE_AGENT_OUTPUT", async () => {
+  it("should write output only when outputDir is configured", async () => {
     const optionDir = join(tempDir, "option-output");
-    const envDir = join(tempDir, "env-output");
     const store = createStore({
       allTestResults: vi.fn().mockResolvedValue([createTestResult()]),
       testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }),
     });
 
-    process.env.ALLURE_AGENT_OUTPUT = envDir;
-
     await new AgentPlugin({ outputDir: optionDir }).done(createContext(), store);
 
     await expect(stat(join(optionDir, "index.md"))).resolves.toBeTruthy();
     await expect(stat(join(optionDir, "AGENTS.md"))).resolves.toBeTruthy();
-    await expect(stat(join(envDir, "index.md"))).rejects.toThrow();
   });
 
   it("should clean only managed entries before writing", async () => {
@@ -312,9 +416,9 @@ describe("AgentPlugin", () => {
 
     await new AgentPlugin({ outputDir }).done(createContext(), createStore());
 
-    expect(await readFile(join(outputDir, "notes.txt"), "utf-8")).toBe("keep me");
-    expect(await readFile(join(outputDir, "index.md"), "utf-8")).toContain("# Agent Report");
-    expect(await readFile(join(outputDir, "AGENTS.md"), "utf-8")).toContain("# AGENTS Guide");
+    expect(await readText(join(outputDir, "notes.txt"))).toBe("keep me");
+    expect(await readText(join(outputDir, "index.md"), "text/markdown")).toContain("# Agent Report");
+    expect(await readText(join(outputDir, "AGENTS.md"), "text/markdown")).toContain("# AGENTS Guide");
   });
 
   it("should use historyId-based file names and fall back to the test result id", async () => {
@@ -358,8 +462,8 @@ describe("AgentPlugin", () => {
 
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
-    const testContent = await readFile(join(outputDir, "tests", "default", "history.id_1.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
+    const testContent = await readText(join(outputDir, "tests", "default", "history.id_1.md"), "text/markdown");
 
     expect(indexContent).toContain("test/index.test.ts#AgentPlugin should keep markdown readable (v1)");
     expect(testContent).toContain("Name: should keep markdown readable (v1)");
@@ -411,7 +515,7 @@ describe("AgentPlugin", () => {
 
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const primaryContent = await readFile(join(outputDir, "tests", "default", "shared-history.md"), "utf-8");
+    const primaryContent = await readText(join(outputDir, "tests", "default", "shared-history.md"), "text/markdown");
 
     expect(primaryContent).toContain("## Retry 1");
     expect(primaryContent).toContain("retry failure");
@@ -460,7 +564,7 @@ describe("AgentPlugin", () => {
 
     await new AgentPlugin({ outputDir }).done(createContext("My Report"), store);
 
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
 
     expect(indexContent).toContain("# My Report");
     expect(indexContent).toContain("## Process Logs");
@@ -470,73 +574,109 @@ describe("AgentPlugin", () => {
     expect(indexContent).toContain("stdout.txt");
     expect(indexContent).toContain("stderr.txt");
     expect(indexContent).toContain("Too many failures");
-    expect(await readFile(join(outputDir, "artifacts", "global", "stdout.txt"), "utf-8")).toBe("stdout content");
-    expect(await readFile(join(outputDir, "artifacts", "global", "stderr.txt"), "utf-8")).toBe("stderr content");
-    expect(await readFile(join(outputDir, "AGENTS.md"), "utf-8")).toContain("## Reading Order");
+    expect(await readText(join(outputDir, "artifacts", "global", "stdout.txt"))).toBe("stdout content");
+    expect(await readText(join(outputDir, "artifacts", "global", "stderr.txt"))).toBe("stderr content");
+    expect(await readText(join(outputDir, "AGENTS.md"), "text/markdown")).toContain("## Reading Order");
   });
 
-  it("should copy project guidance and reference it from AGENTS.md and run manifest", async () => {
-    const outputDir = join(tempDir, "project-guide");
-    const projectRoot = join(tempDir, "project-root");
-    const guidePath = join(projectRoot, "docs", "allure-agent-mode.md");
+  it("should generate standalone AGENTS guidance", async () => {
+    const outputDir = join(tempDir, "standalone-agents-guide");
     const store = createStore({
       allTestResults: vi.fn().mockResolvedValue([createTestResult()]),
       testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }),
     });
 
-    await mkdir(join(projectRoot, "docs"), { recursive: true });
-    await writeFile(guidePath, "# Project Allure Guide\n\nUse agent mode here.\n", "utf-8");
-    process.env.ALLURE_AGENT_PROJECT_ROOT = projectRoot;
-
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const guideCopy = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8");
-    const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
+    const agentsGuide = await readText(join(outputDir, "AGENTS.md"), "text/markdown");
     const runManifest = await readJson<{
-      paths: {
-        project_guide: string | null;
-      };
+      paths: Record<string, unknown>;
     }>(join(outputDir, "manifest", "run.json"));
 
-    expect(guideCopy).toContain("Project Allure Guide");
-    expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)");
-    expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md");
+    expect(agentsGuide).toContain("## Reading Order");
+    expect(agentsGuide).toContain("## Command Task Map");
+    expect(runManifest.paths).toEqual(expect.objectContaining({ index_md: "index.md", agents_md: "AGENTS.md" }));
   });
 
   it("should include downstream enrichment best practices in AGENTS.md", async () => {
     const outputDir = join(tempDir, "agents-guide");
 
-    await new AgentPlugin({ outputDir }).done(createContext(), createStore());
+    const guide = await step("render AGENTS guidance", async () => {
+      await new AgentPlugin({ outputDir }).done(createContext(), createStore());
 
-    const guide = await readFile(join(outputDir, "AGENTS.md"), "utf-8");
+      return await readText(join(outputDir, "AGENTS.md"), "text/markdown");
+    });
 
-    expect(guide).toContain("## Enrichment Loop Workflow");
-    expect(guide).toContain("## Verification Standard");
-    expect(guide).toContain("manifest/test-events.jsonl");
-    expect(guide).toContain("allure agent latest");
-    expect(guide).toContain("allure agent state-dir");
-    expect(guide).toContain("allure agent select --latest");
-    expect(guide).toContain("allure agent --rerun-latest");
-    expect(guide).toContain("--rerun-preset");
-    expect(guide).toContain("--rerun-environment");
-    expect(guide).toContain("--rerun-label");
-    expect(guide).toContain("ALLURE_AGENT_STATE_DIR");
-    expect(guide).toContain("print the `index.md` path");
-    expect(guide).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.",
-    );
-    expect(guide).toContain("Use `allure agent` for smoke checks too, even when the change is small or mechanical.");
-    expect(guide).toContain("Only skip agent mode when it is impossible or when you are debugging agent mode itself.");
-    expect(guide).toContain("## Small Test Change Workflow");
-    expect(guide).toContain("## Coverage Review Workflow");
-    expect(guide).toContain("## Test Enrichment Best Practices");
-    expect(guide).toContain("## Anti-Dummy Policy");
-    expect(guide).toContain("## Acceptance Checklist");
-    expect(guide).toContain("## Review Completeness");
-    expect(guide).toContain("## Partial Runtime Review");
-    expect(guide).toContain("teach `runCommand` to emit a step");
-    expect(guide).toContain("`failed-without-useful-steps`");
-    expect(guide).toContain("`noop-dominated-steps`");
+    await step("verify generated workflow guidance", async () => {
+      await attachment(
+        "verified AGENTS guidance sections",
+        JSON.stringify(
+          {
+            sections: [
+              "Agent Workflows",
+              "Command Task Map",
+              "Verification Standard",
+              "Test Enrichment Best Practices",
+            ],
+            command: 'allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>"',
+          },
+          null,
+          2,
+        ),
+        "application/json",
+      );
+      expect(guide).toContain("## Agent Workflows");
+      expect(guide).toContain("Use the smallest workflow that matches the task.");
+      expect(guide).toContain("### Validate A Change");
+      expect(guide).toContain("### Add Or Update Tests");
+      expect(guide).toContain("### Review Existing Coverage");
+      expect(guide).toContain("### Triage Failures");
+      expect(guide).toContain("### Rerun A Prior Scope");
+      expect(guide).toContain("### Improve Evidence Quality");
+      expect(guide).toContain("### Recover Or Diagnose Agent Mode");
+      expect(guide).toContain("Use when code or tests changed and you need a user-facing safety conclusion.");
+      expect(guide).toContain("Commands:");
+      expect(guide).toContain("Done when:");
+      expect(guide).toContain("## Verification Standard");
+      expect(guide).toContain("manifest/test-events.jsonl");
+      expect(guide).toContain("allure agent latest");
+      expect(guide).toContain("allure agent state-dir");
+      expect(guide).toContain("allure agent select --latest");
+      expect(guide).toContain("allure agent --rerun-latest");
+      expect(guide).toContain("## Command Task Map");
+      expect(guide).toContain("setup and capability-detection loop");
+      expect(guide).toContain("output recovery loop");
+      expect(guide).toContain("tooling diagnosis loop");
+      expect(guide).toContain("rerun-planning loop");
+      expect(guide).toContain("focused retry loop");
+      expect(guide).toContain("state-control loop");
+      expect(guide).toContain("--rerun-preset");
+      expect(guide).toContain("instead of rebuilding runner-specific test names");
+      expect(guide).toContain("allure agent --rerun-latest --rerun-preset failed -- <command>");
+      expect(guide).toContain("--rerun-environment");
+      expect(guide).toContain("--rerun-label");
+      expect(guide).toContain("ALLURE_AGENT_STATE_DIR");
+      expect(guide).toContain('allure agent --goal <text> --expect-tests <count> --expect-test "<fullName>"');
+      expect(guide).toContain("print the `index.md` path");
+      expect(guide).toContain(
+        "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.",
+      );
+      expect(guide).toContain("Use `allure agent` for smoke checks too, even when the change is small or mechanical.");
+      expect(guide).toContain(
+        "Only skip agent mode when it is impossible or when you are debugging agent mode itself.",
+      );
+      expect(guide).toContain(
+        "For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.",
+      );
+      expect(guide).toContain("## Test Enrichment Best Practices");
+      expect(guide).toContain("## Anti-Dummy Policy");
+      expect(guide).toContain("## Acceptance Checklist");
+      expect(guide).toContain("## Review Completeness");
+      expect(guide).toContain("## Partial Runtime Review");
+      expect(guide).toContain("teach `runCommand` to emit a step");
+      expect(guide).toContain("`failed-without-useful-steps`");
+      expect(guide).toContain("`noop-dominated-steps`");
+    });
   });
 
   it("should render fixtures, copy attachments, and keep missing attachments visible", async () => {
@@ -622,7 +762,7 @@ describe("AgentPlugin", () => {
 
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const content = await readFile(join(outputDir, "tests", "default", "artifact-history.md"), "utf-8");
+    const content = await readText(join(outputDir, "tests", "default", "artifact-history.md"), "text/markdown");
 
     expect(content).toContain("### Before Fixture: setup");
     expect(content).toContain("### Steps");
@@ -630,9 +770,9 @@ describe("AgentPlugin", () => {
     expect(content).toContain("screenshot.png");
     expect(content).toContain("fixture.log");
     expect(
-      await readFile(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png"), "utf-8"),
+      await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png")),
     ).toBe("png-bytes");
-    expect(await readFile(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"), "utf-8")).toBe(
+    expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"))).toBe(
       "fixture log",
     );
   });
@@ -694,13 +834,14 @@ notes:
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-    process.env.ALLURE_AGENT_COMMAND = "yarn test feature-a";
-    process.env.ALLURE_AGENT_NAME = "codex";
-    process.env.ALLURE_AGENT_LOOP_ID = "loop-1";
-    process.env.ALLURE_AGENT_CONVERSATION_ID = "conversation-1";
-
-    await new AgentPlugin({ outputDir }).done(createContext(), store);
+    await new AgentPlugin({
+      outputDir,
+      expectationsPath,
+      command: "yarn test feature-a",
+      agentName: "codex",
+      loopId: "loop-1",
+      conversationId: "conversation-1",
+    }).done(createContext(), store);
 
     const runManifest = await readJson<{
       command: string;
@@ -713,7 +854,6 @@ notes:
       };
       paths: {
         expected_manifest: string;
-        project_guide: string | null;
       };
       check_summary: {
         total: number;
@@ -726,15 +866,15 @@ notes:
     const findingsManifest = await readJsonl<{
       check_name: string;
       severity: "info" | "warning" | "high";
-      subject: string;
+      subject?: unknown;
+      subject_ref?: string;
     }>(join(outputDir, "manifest", "findings.jsonl"));
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
-    const forbiddenContent = await readFile(join(outputDir, "tests", "api", "feature-b-history.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
+    const forbiddenContent = await readText(join(outputDir, "tests", "api", "feature-b-history.md"), "text/markdown");
 
     expect(runManifest.command).toBe("yarn test feature-a");
     expect(runManifest.expectations_present).toBe(true);
     expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json");
-    expect(runManifest.paths.project_guide).toBeNull();
     expect(runManifest.agent_context).toEqual({
       agent_name: "codex",
       loop_id: "loop-1",
@@ -756,14 +896,14 @@ notes:
     expect(findingsManifest).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
-          check_name: "forbidden-selector-match",
+          check_name: "forbidden-label-observed",
           severity: "high",
-          subject: "tests/api/feature-b-history.md",
+          subject_ref: "tests/api/feature-b-history.md",
         }),
         expect.objectContaining({
           check_name: "unexpected-environment",
           severity: "warning",
-          subject: "run",
+          subject_ref: "run",
         }),
       ]),
     );
@@ -776,7 +916,773 @@ notes:
     expect(forbiddenContent).toContain("## Expectation Comparison");
     expect(forbiddenContent).toContain("Scope Match: forbidden");
     expect(forbiddenContent).toContain("## Quality Findings");
-    expect(await readFile(join(outputDir, "manifest", "expected.json"), "utf-8")).toContain('"task_id": "feature-a"');
+    expect(await readText(join(outputDir, "manifest", "expected.json"), "application/json")).toContain(
+      '"task_id": "feature-a"',
+    );
+  });
+
+  it("should load inline expectations and report count and evidence gaps", async () => {
+    const outputDir = join(tempDir, "inline-expectations");
+    const matching = createTestResult({
+      id: "tr-inline",
+      historyId: "inline-history",
+      name: "inline should be visible",
+      fullName: "inline should be visible",
+      labels: [
+        {
+          name: "feature",
+          value: "inline",
+        },
+      ],
+    });
+    const store = createStore({
+      allTestResults: vi.fn().mockResolvedValue([matching]),
+      testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }),
+    });
+
+    const expectations = {
+      goal: "Review inline expectations",
+      expected: {
+        test_count: 2,
+        label_values: {
+          feature: "inline",
+        },
+      },
+      evidence: {
+        min_steps: 1,
+        min_attachments: 1,
+        step_name_contains: ["assert expected behavior"],
+        attachments: [
+          {
+            name: "evidence.json",
+          },
+        ],
+      },
+    };
+
+    await new AgentPlugin({ outputDir, expectations }).done(createContext(), store);
+
+    const expectedManifest = await readJson<{
+      expected: {
+        test_count: number;
+      };
+      evidence: {
+        step_name_contains: string[];
+      };
+    }>(join(outputDir, "manifest", "expected.json"));
+    const findingsManifest = await readJsonl<{
+      check_name: string;
+      severity: "info" | "warning" | "high";
+      subject?: unknown;
+      subject_ref?: string;
+    }>(join(outputDir, "manifest", "findings.jsonl"));
+    const runManifest = await readJson<{
+      expectations: {
+        evidence: {
+          step_name_contains: string[];
+        };
+      };
+      expectation_result: {
+        status: string;
+        impact: string;
+        recognized_control_count: number;
+        summary: {
+          expected_tests: number;
+          observed_tests: number;
+          evidence_mismatches: number;
+        };
+      };
+    }>(join(outputDir, "manifest", "run.json"));
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
+
+    expect(expectedManifest.expected.test_count).toBe(2);
+    expect(expectedManifest.evidence.step_name_contains).toEqual(["assert expected behavior"]);
+    expect(runManifest.expectations.evidence.step_name_contains).toEqual(["assert expected behavior"]);
+    expect(runManifest.expectation_result.status).toBe("failed");
+    expect(runManifest.expectation_result.impact).toBe("iterate");
+    expect(runManifest.expectation_result.recognized_control_count).toBe(7);
+    expect(runManifest.expectation_result.summary).toEqual(
+      expect.objectContaining({
+        expected_tests: 2,
+        observed_tests: 1,
+        evidence_mismatches: 4,
+      }),
+    );
+    expect(indexContent).toContain("Expectations Source: CLI options");
+    expect(indexContent).toContain("## Expectation Result");
+    expect(indexContent).toContain("Status: failed");
+    expect(indexContent).toContain("test count: 2");
+    expect(indexContent).toContain("step contains: assert expected behavior");
+    expect(findingsManifest).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          schema_version: "allure-agent-finding/v2",
+          check_id: "expected-count-mismatch",
+          check_name: "expected-count-mismatch",
+          severity: "warning",
+          impact: "iterate",
+          subject_ref: "run",
+        }),
+        expect.objectContaining({
+          check_name: "expected-step-containing-missing",
+          severity: "warning",
+          subject_ref: "tests/default/inline-history.md",
+        }),
+        expect.objectContaining({
+          check_name: "insufficient-expected-steps",
+          severity: "warning",
+          subject_ref: "tests/default/inline-history.md",
+        }),
+        expect.objectContaining({
+          check_name: "insufficient-expected-attachments",
+          severity: "warning",
+          subject_ref: "tests/default/inline-history.md",
+        }),
+        expect.objectContaining({
+          check_name: "missing-expected-attachment",
+          severity: "warning",
+          subject_ref: "tests/default/inline-history.md",
+        }),
+      ]),
+    );
+  });
+
+  it("should mark metadata-only expectations as not requested", async () => {
+    const outputDir = join(tempDir, "metadata-only-expectations");
+    const matching = createTestResult({
+      id: "tr-metadata-only",
+      historyId: "metadata-only-history",
+      name: "metadata-only test",
+      fullName: "metadata-only test",
+    });
+    const store = createStore({
+      allTestResults: vi.fn().mockResolvedValue([matching]),
+      testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }),
+    });
+
+    await new AgentPlugin({
+      outputDir,
+      expectations: {
+        goal: "record review context",
+        task_id: "TASK-1",
+      },
+    }).done(createContext(), store);
+
+    const runManifest = await readJson<{
+      expectation_result: {
+        status: string;
+        impact: string;
+        recognized_control_count: number;
+        summary: {
+          observed_tests: number;
+        };
+      };
+    }>(join(outputDir, "manifest", "run.json"));
+
+    expect(runManifest.expectation_result).toEqual(
+      expect.objectContaining({
+        status: "not_requested",
+        impact: "advisory",
+        recognized_control_count: 2,
+      }),
+    );
+    expect(runManifest.expectation_result.summary.observed_tests).toBe(1);
+  });
+
+  it("should render every parsed inline expectation config field", async () => {
+    const traceAttachment = createAttachment({
+      id: "trace-json",
+      name: "trace.json",
+      originalFileName: "trace.json",
+      ext: ".json",
+      contentType: "application/json",
+    });
+    const { outputDir, findings } = await runInlineExpectationCase({
+      outputName: "parsed-inline-config-fields",
+      environmentId: "web",
+      testResult: createTestResult({
+        id: "tr-parsed-config",
+        historyId: "parsed-config-history",
+        fullName: "suite expected behavior",
+        labels: [
+          {
+            name: "feature",
+            value: "scope",
+          },
+        ],
+        steps: [createMeaningfulStep()],
+      }),
+      attachments: [traceAttachment],
+      attachmentContents: {
+        "trace-json": {
+          content: "{}",
+          fileName: "trace.json",
+        },
+      },
+      expectations: {
+        goal: "Review parsed inline config fields",
+        task_id: "agent-inline-fields",
+        expected: {
+          test_count: 1,
+          environments: ["web"],
+          full_names: ["suite expected behavior"],
+          full_name_prefixes: ["suite expected"],
+          label_values: {
+            feature: "scope",
+          },
+        },
+        forbidden: {
+          environments: ["api"],
+          full_names: ["suite forbidden behavior"],
+          full_name_prefixes: ["suite forbidden"],
+          label_values: {
+            feature: ["forbidden"],
+          },
+        },
+        evidence: {
+          min_steps: 1,
+          min_attachments: 1,
+          step_name_contains: ["assert expected behavior"],
+          attachments: [
+            {
+              name: "trace.json",
+            },
+            {
+              content_type: "application/json",
+            },
+          ],
+        },
+        notes: "Keep every field visible to reviewers",
+      },
+    });
+
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
+
+    expect(findings).toEqual([]);
+    expect(indexContent).toContain("Goal: Review parsed inline config fields");
+    expect(indexContent).toContain("Feature / Task: agent-inline-fields");
+    expect(indexContent).toContain(
+      "Expected selectors: test count: 1 | environments: web | full names: suite expected behavior | prefixes: suite expected | labels: feature in [scope]",
+    );
+    expect(indexContent).toContain(
+      "Forbidden selectors: environments: api | full names: suite forbidden behavior | prefixes: suite forbidden | labels: feature in [forbidden]",
+    );
+    expect(indexContent).toContain(
+      "Evidence expectations: meaningful steps per test: >= 1 | attachments per test: >= 1 | step contains: assert expected behavior | attachments: name=trace.json; content-type=application/json",
+    );
+    expect(indexContent).toContain("Notes: Keep every field visible to reviewers");
+  });
+
+  it.each([
+    {
+      field: "expected.test_count",
+      expectations: {
+        expected: {
+          test_count: 1,
+        },
+      },
+    },
+    {
+      field: "expected.environments",
+      environmentId: "web",
+      expectations: {
+        expected: {
+          environments: ["web"],
+        },
+      },
+    },
+    {
+      field: "expected.full_names",
+      expectations: {
+        expected: {
+          full_names: ["suite expected behavior"],
+        },
+      },
+    },
+    {
+      field: "expected.full_name_prefixes",
+      expectations: {
+        expected: {
+          full_name_prefixes: ["suite expected"],
+        },
+      },
+    },
+    {
+      field: "expected.label_values",
+      testResult: createTestResult({
+        id: "tr-expected-label-pass",
+        historyId: "expected-label-pass-history",
+        fullName: "suite expected behavior",
+        labels: [
+          {
+            name: "feature",
+            value: "scope",
+          },
+        ],
+      }),
+      expectations: {
+        expected: {
+          label_values: {
+            feature: "scope",
+          },
+        },
+      },
+    },
+  ])("should report no findings when $field is met", async ({ field, expectations, testResult, environmentId }) => {
+    const { findings } = await runInlineExpectationCase({
+      outputName: expectationOutputName(field, "met"),
+      expectations,
+      testResult,
+      environmentId,
+    });
+
+    expect(findings).toEqual([]);
+  });
+
+  it.each([
+    {
+      field: "expected.test_count",
+      checkName: "expected-count-mismatch",
+      expectations: {
+        expected: {
+          test_count: 2,
+        },
+      },
+    },
+    {
+      field: "expected.environments",
+      checkName: "expected-environment-missing",
+      environmentId: "api",
+      expectations: {
+        expected: {
+          environments: ["web"],
+        },
+      },
+    },
+    {
+      field: "expected.full_names",
+      checkName: "expected-test-missing",
+      expectations: {
+        expected: {
+          full_names: ["suite missing behavior"],
+        },
+      },
+    },
+    {
+      field: "expected.full_name_prefixes",
+      checkName: "expected-prefix-missing",
+      expectations: {
+        expected: {
+          full_name_prefixes: ["suite missing"],
+        },
+      },
+    },
+    {
+      field: "expected.label_values",
+      checkName: "expected-label-missing",
+      testResult: createTestResult({
+        id: "tr-expected-label-fail",
+        historyId: "expected-label-fail-history",
+        fullName: "suite expected behavior",
+        labels: [
+          {
+            name: "feature",
+            value: "other",
+          },
+        ],
+      }),
+      expectations: {
+        expected: {
+          label_values: {
+            feature: "scope",
+          },
+        },
+      },
+    },
+  ])(
+    "should report $checkName when $field is not met",
+    async ({ field, checkName, expectations, testResult, environmentId }) => {
+      const { findings } = await runInlineExpectationCase({
+        outputName: expectationOutputName(field, "missing"),
+        expectations,
+        testResult,
+        environmentId,
+      });
+
+      expect(findings).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            check_name: checkName,
+          }),
+        ]),
+      );
+    },
+  );
+
+  it.each([
+    {
+      field: "forbidden.environments",
+      environmentId: "web",
+      expectations: {
+        forbidden: {
+          environments: ["api"],
+        },
+      },
+    },
+    {
+      field: "forbidden.full_names",
+      expectations: {
+        forbidden: {
+          full_names: ["suite forbidden behavior"],
+        },
+      },
+    },
+    {
+      field: "forbidden.full_name_prefixes",
+      expectations: {
+        forbidden: {
+          full_name_prefixes: ["suite forbidden"],
+        },
+      },
+    },
+    {
+      field: "forbidden.label_values",
+      testResult: createTestResult({
+        id: "tr-forbidden-label-pass",
+        historyId: "forbidden-label-pass-history",
+        fullName: "suite expected behavior",
+        labels: [
+          {
+            name: "feature",
+            value: "scope",
+          },
+        ],
+      }),
+      expectations: {
+        forbidden: {
+          label_values: {
+            feature: "forbidden",
+          },
+        },
+      },
+    },
+  ])(
+    "should report no findings when $field is not matched",
+    async ({ field, expectations, testResult, environmentId }) => {
+      const { findings } = await runInlineExpectationCase({
+        outputName: expectationOutputName(field, "allowed"),
+        expectations,
+        testResult,
+        environmentId,
+      });
+
+      expect(findings).toEqual([]);
+    },
+  );
+
+  it.each([
+    {
+      field: "forbidden.environments",
+      checkName: "forbidden-selector-match",
+      environmentId: "api",
+      expectations: {
+        forbidden: {
+          environments: ["api"],
+        },
+      },
+    },
+    {
+      field: "forbidden.full_names",
+      checkName: "forbidden-selector-match",
+      expectations: {
+        forbidden: {
+          full_names: ["suite expected behavior"],
+        },
+      },
+    },
+    {
+      field: "forbidden.full_name_prefixes",
+      checkName: "forbidden-selector-match",
+      expectations: {
+        forbidden: {
+          full_name_prefixes: ["suite expected"],
+        },
+      },
+    },
+    {
+      field: "forbidden.label_values",
+      checkName: "forbidden-label-observed",
+      testResult: createTestResult({
+        id: "tr-forbidden-label-fail",
+        historyId: "forbidden-label-fail-history",
+        fullName: "suite expected behavior",
+        labels: [
+          {
+            name: "feature",
+            value: "forbidden",
+          },
+        ],
+      }),
+      expectations: {
+        forbidden: {
+          label_values: {
+            feature: "forbidden",
+          },
+        },
+      },
+    },
+  ])(
+    "should report $checkName when $field is matched",
+    async ({ field, checkName, expectations, testResult, environmentId }) => {
+      const { findings } = await runInlineExpectationCase({
+        outputName: expectationOutputName(field, "forbidden"),
+        expectations,
+        testResult,
+        environmentId,
+      });
+
+      expect(findings).toEqual(
+        expect.arrayContaining([
+          expect.objectContaining({
+            check_name: checkName,
+          }),
+        ]),
+      );
+    },
+  );
+
+  it.each([
+    {
+      field: "evidence.step_name_contains",
+      expectations: {
+        evidence: {
+          step_name_contains: ["expected behavior"],
+        },
+      },
+      testResult: createTestResult({
+        id: "tr-evidence-step-text-pass",
+        historyId: "evidence-step-text-pass-history",
+        fullName: "suite expected behavior",
+        steps: [createMeaningfulStep()],
+      }),
+    },
+    {
+      field: "evidence.min_steps",
+      expectations: {
+        evidence: {
+          min_steps: 1,
+        },
+      },
+      testResult: createTestResult({
+        id: "tr-evidence-steps-pass",
+        historyId: "evidence-steps-pass-history",
+        fullName: "suite expected behavior",
+        steps: [createMeaningfulStep()],
+      }),
+    },
+    {
+      field: "evidence.min_attachments",
+      expectations: {
+        evidence: {
+          min_attachments: 1,
+        },
+      },
+      attachments: [
+        createAttachment({
+          id: "evidence-attachment-pass",
+          name: "evidence.txt",
+          originalFileName: "evidence.txt",
+        }),
+      ],
+      attachmentContents: {
+        "evidence-attachment-pass": {
+          content: "evidence",
+          fileName: "evidence.txt",
+        },
+      },
+    },
+    {
+      field: "evidence.attachments.name",
+      expectations: {
+        evidence: {
+          attachments: [
+            {
+              name: "evidence.txt",
+            },
+          ],
+        },
+      },
+      attachments: [
+        createAttachment({
+          id: "evidence-name-pass",
+          name: "evidence.txt",
+          originalFileName: "evidence.txt",
+        }),
+      ],
+      attachmentContents: {
+        "evidence-name-pass": {
+          content: "evidence",
+          fileName: "evidence.txt",
+        },
+      },
+    },
+    {
+      field: "evidence.attachments.content_type",
+      expectations: {
+        evidence: {
+          attachments: [
+            {
+              content_type: "application/json",
+            },
+          ],
+        },
+      },
+      attachments: [
+        createAttachment({
+          id: "evidence-type-pass",
+          name: "evidence.json",
+          originalFileName: "evidence.json",
+          ext: ".json",
+          contentType: "application/json",
+        }),
+      ],
+      attachmentContents: {
+        "evidence-type-pass": {
+          content: "{}",
+          fileName: "evidence.json",
+        },
+      },
+    },
+  ])(
+    "should report no findings when $field is met",
+    async ({ field, expectations, testResult, attachments, attachmentContents }) => {
+      const { findings } = await runInlineExpectationCase({
+        outputName: expectationOutputName(field, "met"),
+        expectations,
+        testResult,
+        attachments,
+        attachmentContents,
+      });
+
+      expect(findings).toEqual([]);
+    },
+  );
+
+  it("should match expected step text in nested test-scoped steps", async () => {
+    const nestedStep = {
+      ...createMeaningfulStep("parent action"),
+      steps: [createMeaningfulStep("Validate order total includes discount")],
+    } as TestStepResult;
+    const { findings } = await runInlineExpectationCase({
+      outputName: "evidence-step-name-nested-met",
+      expectations: {
+        evidence: {
+          step_name_contains: ["order total includes discount"],
+        },
+      },
+      testResult: createTestResult({
+        id: "tr-nested-step",
+        historyId: "nested-step-history",
+        fullName: "suite expected behavior",
+        steps: [nestedStep],
+      }),
+    });
+
+    expect(findings).toEqual([]);
+  });
+
+  it("should not satisfy expected step text from global output only", async () => {
+    const { findings } = await runInlineExpectationCase({
+      outputName: "evidence-step-name-global-output-missing",
+      expectations: {
+        evidence: {
+          step_name_contains: ["global-only marker"],
+        },
+      },
+      attachmentContents: {
+        "global-stdout": {
+          content: "global-only marker",
+          fileName: "stdout.txt",
+        },
+      },
+    });
+
+    expect(findings).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          check_name: "expected-step-containing-missing",
+        }),
+      ]),
+    );
+  });
+
+  it.each([
+    {
+      field: "evidence.step_name_contains",
+      checkName: "expected-step-containing-missing",
+      expectations: {
+        evidence: {
+          step_name_contains: ["expected behavior"],
+        },
+      },
+    },
+    {
+      field: "evidence.min_steps",
+      checkName: "insufficient-expected-steps",
+      expectations: {
+        evidence: {
+          min_steps: 1,
+        },
+      },
+    },
+    {
+      field: "evidence.min_attachments",
+      checkName: "insufficient-expected-attachments",
+      expectations: {
+        evidence: {
+          min_attachments: 1,
+        },
+      },
+    },
+    {
+      field: "evidence.attachments.name",
+      checkName: "missing-expected-attachment",
+      expectations: {
+        evidence: {
+          attachments: [
+            {
+              name: "evidence.txt",
+            },
+          ],
+        },
+      },
+    },
+    {
+      field: "evidence.attachments.content_type",
+      checkName: "missing-expected-attachment",
+      expectations: {
+        evidence: {
+          attachments: [
+            {
+              content_type: "application/json",
+            },
+          ],
+        },
+      },
+    },
+  ])("should report $checkName when $field is not met", async ({ field, checkName, expectations }) => {
+    const { findings } = await runInlineExpectationCase({
+      outputName: expectationOutputName(field, "missing"),
+      expectations,
+    });
+
+    expect(findings).toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          check_name: checkName,
+        }),
+      ]),
+    );
   });
 
   it("should emit bootstrap findings when no visible tests are present", async () => {
@@ -791,12 +1697,12 @@ notes:
       check_name: string;
       severity: "info" | "warning" | "high";
     }>(join(outputDir, "manifest", "findings.jsonl"));
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
 
     expect(findingsManifest).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
-          check_name: "no-visible-tests",
+          check_name: "no-tests-observed",
           severity: "high",
         }),
         expect.objectContaining({
@@ -808,6 +1714,42 @@ notes:
     expect(indexContent).toContain("No visible test results were found in the run.");
   });
 
+  it("should accept zero observed logical tests when --expect-tests 0 was requested", async () => {
+    const outputDir = join(tempDir, "expect-zero-tests");
+    const store = createStore({
+      testsStatistic: vi.fn().mockResolvedValue({ total: 0 }),
+    });
+
+    await new AgentPlugin({
+      outputDir,
+      expectations: {
+        goal: "Verify no logical tests are selected",
+        expected: {
+          test_count: 0,
+        },
+      },
+    }).done(createContext(), store);
+
+    const runManifest = await readJson<{
+      expectation_result: {
+        status: string;
+        impact: string;
+      };
+    }>(join(outputDir, "manifest", "run.json"));
+    const findingsManifest = await readJsonl<{
+      check_name: string;
+    }>(join(outputDir, "manifest", "findings.jsonl"));
+
+    expect(runManifest.expectation_result).toEqual(expect.objectContaining({ status: "matched", impact: "accept" }));
+    expect(findingsManifest).not.toEqual(
+      expect.arrayContaining([
+        expect.objectContaining({
+          check_name: "no-tests-observed",
+        }),
+      ]),
+    );
+  });
+
   it("should surface partial runtime modeling and high-signal stderr summaries", async () => {
     const outputDir = join(tempDir, "partial-runtime");
     const stderrAttachment = createAttachment({
@@ -891,7 +1833,7 @@ notes:
       check_name: string;
       severity: "info" | "warning" | "high";
     }>(join(outputDir, "manifest", "findings.jsonl"));
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
 
     expect(runManifest.actual_exit_code).toBe(1);
     expect(runManifest.original_exit_code).toBe(1);
@@ -983,7 +1925,7 @@ notes:
         };
       };
     }>(join(outputDir, "manifest", "run.json"));
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
 
     expect(runManifest.modeling.stderr.actionableSamples).toEqual(
       expect.arrayContaining([expect.stringContaining('unable to find utility "xcresulttool"')]),
@@ -1047,10 +1989,7 @@ notes:
       "utf-8",
     );
 
-    process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath;
-    process.env.ALLURE_AGENT_COMMAND = "yarn test clean-run";
-
-    await new AgentPlugin({ outputDir }).done(createContext(), store);
+    await new AgentPlugin({ outputDir, expectationsPath, command: "yarn test clean-run" }).done(createContext(), store);
 
     const runManifest = await readJson<{
       actual_exit_code: number | null;
@@ -1071,7 +2010,7 @@ notes:
       };
     }>(join(outputDir, "manifest", "run.json"));
     const findingsManifest = await readJsonl(join(outputDir, "manifest", "findings.jsonl"));
-    const indexContent = await readFile(join(outputDir, "index.md"), "utf-8");
+    const indexContent = await readText(join(outputDir, "index.md"), "text/markdown");
 
     expect(runManifest.check_summary.total).toBe(0);
     expect(runManifest.actual_exit_code).toBeNull();
@@ -1113,25 +2052,26 @@ notes:
 
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const testContent = await readFile(join(outputDir, "tests", "default", "low-signal-history.md"), "utf-8");
+    const testContent = await readText(join(outputDir, "tests", "default", "low-signal-history.md"), "text/markdown");
     const findingsManifest = await readJsonl<{
       check_name: string;
-      subject: string;
+      subject?: unknown;
+      subject_ref?: string;
     }>(join(outputDir, "manifest", "findings.jsonl"));
 
-    expect(testContent).toContain("failed-without-useful-steps");
-    expect(testContent).toContain("failed-without-attachments");
-    expect(testContent).toContain("nontrivial-run-with-empty-trace");
+    expect(testContent).toContain("A failed or broken test has no useful runtime steps.");
+    expect(testContent).toContain("A failed or broken test has no test-scoped attachments.");
+    expect(testContent).toContain("A nontrivial test run recorded no steps or fixture activity.");
     expect(testContent).toContain("## Rerun Guidance");
     expect(findingsManifest).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
           check_name: "failed-without-useful-steps",
-          subject: "tests/default/low-signal-history.md",
+          subject_ref: "tests/default/low-signal-history.md",
         }),
         expect.objectContaining({
           check_name: "failed-without-attachments",
-          subject: "tests/default/low-signal-history.md",
+          subject_ref: "tests/default/low-signal-history.md",
         }),
       ]),
     );
@@ -1172,21 +2112,25 @@ notes:
 
     await new AgentPlugin({ outputDir }).done(createContext(), store);
 
-    const testContent = await readFile(join(outputDir, "tests", "default", "retry-evidence-history.md"), "utf-8");
+    const testContent = await readText(
+      join(outputDir, "tests", "default", "retry-evidence-history.md"),
+      "text/markdown",
+    );
     const findingsManifest = await readJsonl<{
       check_name: string;
       severity: "info" | "warning" | "high";
-      subject: string;
+      subject?: unknown;
+      subject_ref?: string;
     }>(join(outputDir, "manifest", "findings.jsonl"));
 
-    expect(testContent).toContain("retries-without-new-evidence");
+    expect(testContent).toContain("Retries did not add any new observable evidence.");
     expect(testContent).toContain("## Retry 1");
     expect(findingsManifest).toEqual(
       expect.arrayContaining([
         expect.objectContaining({
           check_name: "retries-without-new-evidence",
           severity: "info",
-          subject: "tests/default/retry-evidence-history.md",
+          subject_ref: "tests/default/retry-evidence-history.md",
         }),
       ]),
     );
diff --git a/packages/plugin-agent/test/inline-expectations.test.ts b/packages/plugin-agent/test/inline-expectations.test.ts
new file mode 100644
index 00000000000..6a5919b12bc
--- /dev/null
+++ b/packages/plugin-agent/test/inline-expectations.test.ts
@@ -0,0 +1,215 @@
+import { mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { epic, feature, label, story } from "allure-js-commons";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+import { AgentExpectationUsageError, AgentUsageError } from "../src/errors.js";
+import { buildAgentInlineExpectations, validateAgentExpectationsFile } from "../src/inline-expectations.js";
+
+let tempDir: string | undefined;
+
+const makeTempDir = async () => {
+  tempDir = await mkdtemp(join(tmpdir(), "allure-agent-expectations-test-"));
+
+  return tempDir;
+};
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("inline-expectations");
+  await label("coverage", "agent-mode");
+});
+
+afterEach(async () => {
+  if (tempDir) {
+    await rm(tempDir, { recursive: true, force: true });
+    tempDir = undefined;
+  }
+});
+
+describe("inline agent expectations", () => {
+  it.each([
+    {
+      option: "--goal",
+      input: { goal: "Review agent visibility" },
+      expected: { goal: "Review agent visibility" },
+    },
+    {
+      option: "--task-id",
+      input: { taskId: "agent-inline" },
+      expected: { task_id: "agent-inline" },
+    },
+    {
+      option: "--expect-tests",
+      input: { expectTests: "2" },
+      expected: { expected: { test_count: 2 } },
+    },
+    {
+      option: "--expect-label",
+      input: { expectLabels: ["module=plugin-agent", "module=cli"] },
+      expected: { expected: { label_values: { module: ["plugin-agent", "cli"] } } },
+    },
+    {
+      option: "--expect-env",
+      input: { expectEnvironments: ["node"] },
+      expected: { expected: { environments: ["node"] } },
+    },
+    {
+      option: "--expect-test",
+      input: { expectFullNames: ["suite should pass"] },
+      expected: { expected: { full_names: ["suite should pass"] } },
+    },
+    {
+      option: "--expect-prefix",
+      input: { expectPrefixes: ["suite"] },
+      expected: { expected: { full_name_prefixes: ["suite"] } },
+    },
+    {
+      option: "--forbid-label",
+      input: { forbidLabels: ["layer=e2e"] },
+      expected: { forbidden: { label_values: { layer: ["e2e"] } } },
+    },
+    {
+      option: "--expect-step-containing",
+      input: { expectStepContains: ["assert expected behavior"] },
+      expected: { evidence: { step_name_contains: ["assert expected behavior"] } },
+    },
+    {
+      option: "--expect-steps",
+      input: { expectSteps: "1" },
+      expected: { evidence: { min_steps: 1 } },
+    },
+    {
+      option: "--expect-attachments",
+      input: { expectAttachments: "1" },
+      expected: { evidence: { min_attachments: 1 } },
+    },
+    {
+      option: "--expect-attachment name",
+      input: { expectAttachmentFilters: ["trace.zip"] },
+      expected: { evidence: { attachments: [{ name: "trace.zip" }] } },
+    },
+    {
+      option: "--expect-attachment name=...",
+      input: { expectAttachmentFilters: ["name=trace.zip"] },
+      expected: { evidence: { attachments: [{ name: "trace.zip" }] } },
+    },
+    {
+      option: "--expect-attachment content-type=...",
+      input: { expectAttachmentFilters: ["content-type=application/json"] },
+      expected: { evidence: { attachments: [{ content_type: "application/json" }] } },
+    },
+    {
+      option: "--expect-attachment type=...",
+      input: { expectAttachmentFilters: ["type=image/png"] },
+      expected: { evidence: { attachments: [{ content_type: "image/png" }] } },
+    },
+  ])("should parse $option", ({ input, expected }) => {
+    expect(buildAgentInlineExpectations(input)).toEqual(expected);
+  });
+
+  it("should parse combined inline expectations", () => {
+    expect(
+      buildAgentInlineExpectations({
+        goal: "Review agent visibility",
+        taskId: "agent-inline",
+        expectTests: "2",
+        expectLabels: ["module=plugin-agent"],
+        expectEnvironments: ["node"],
+        expectFullNames: ["suite should pass"],
+        expectPrefixes: ["suite"],
+        forbidLabels: ["layer=e2e"],
+        expectStepContains: ["assert expected behavior"],
+        expectSteps: "1",
+        expectAttachments: "1",
+        expectAttachmentFilters: ["trace.zip", "content-type=application/json"],
+      }),
+    ).toEqual({
+      goal: "Review agent visibility",
+      task_id: "agent-inline",
+      expected: {
+        test_count: 2,
+        environments: ["node"],
+        full_names: ["suite should pass"],
+        full_name_prefixes: ["suite"],
+        label_values: {
+          module: ["plugin-agent"],
+        },
+      },
+      forbidden: {
+        label_values: {
+          layer: ["e2e"],
+        },
+      },
+      evidence: {
+        min_steps: 1,
+        min_attachments: 1,
+        step_name_contains: ["assert expected behavior"],
+        attachments: [{ name: "trace.zip" }, { content_type: "application/json" }],
+      },
+    });
+  });
+
+  it.each([
+    { option: "--expect-tests", input: { expectTests: "-1" } },
+    { option: "--expect-tests non-integer", input: { expectTests: "1.5" } },
+    { option: "--expect-tests empty", input: { expectTests: "   " } },
+    { option: "--expect-steps", input: { expectSteps: "1.5" } },
+    { option: "--expect-steps zero", input: { expectSteps: "0" } },
+    { option: "--expect-attachments", input: { expectAttachments: "many" } },
+    { option: "--expect-attachments zero", input: { expectAttachments: "0" } },
+    { option: "--expect-label", input: { expectLabels: ["module"] } },
+    { option: "--expect-label colon", input: { expectLabels: ["module:cli"] } },
+    { option: "--forbid-label", input: { forbidLabels: ["layer"] } },
+    { option: "--expect-attachment", input: { expectAttachmentFilters: ["extension=zip"] } },
+    { option: "--expect-attachment empty", input: { expectAttachmentFilters: ["   "] } },
+  ])("should reject invalid $option", ({ input }) => {
+    expect(() => buildAgentInlineExpectations(input)).toThrow(AgentExpectationUsageError);
+  });
+
+  it.each([
+    { option: "--goal", input: { goal: ["Review one", "Review two"] } },
+    { option: "--task-id", input: { taskId: ["TASK-1", "TASK-2"] } },
+    { option: "--expect-tests", input: { expectTests: ["1", "2"] } },
+    { option: "--expect-steps", input: { expectSteps: ["1", "2"] } },
+    { option: "--expect-attachments", input: { expectAttachments: ["1", "2"] } },
+  ])("should reject duplicate single-value option $option", ({ input }) => {
+    expect(() => buildAgentInlineExpectations(input)).toThrow(AgentExpectationUsageError);
+  });
+
+  it("should reject zero test count combined with positive scope", () => {
+    expect(() =>
+      buildAgentInlineExpectations({
+        expectTests: "0",
+        expectFullNames: ["suite should pass"],
+      }),
+    ).toThrow(AgentExpectationUsageError);
+  });
+
+  it("should validate expectation files and reject invalid file input", async () => {
+    const cwd = await makeTempDir();
+
+    await writeFile(join(cwd, "expected.yaml"), "goal: valid file expectations\n", "utf-8");
+    await expect(validateAgentExpectationsFile({ cwd, expectations: "expected.yaml" })).resolves.toBeUndefined();
+
+    await writeFile(join(cwd, "invalid.yaml"), "[]", "utf-8");
+    await expect(validateAgentExpectationsFile({ cwd, expectations: "invalid.yaml" })).rejects.toBeInstanceOf(
+      AgentExpectationUsageError,
+    );
+  });
+
+  it("should reject expectation files placed inside the output directory", async () => {
+    const cwd = await makeTempDir();
+
+    await expect(
+      validateAgentExpectationsFile({
+        cwd,
+        output: "agent-output",
+        expectations: "agent-output/expected.yaml",
+      }),
+    ).rejects.toBeInstanceOf(AgentUsageError);
+  });
+});
diff --git a/packages/plugin-agent/test/invalid-output.test.ts b/packages/plugin-agent/test/invalid-output.test.ts
new file mode 100644
index 00000000000..7c916b8ab78
--- /dev/null
+++ b/packages/plugin-agent/test/invalid-output.test.ts
@@ -0,0 +1,94 @@
+import { mkdtemp, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { epic, feature, label, story } from "allure-js-commons";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+import { AgentExpectationUsageError } from "../src/errors.js";
+import { writeInvalidAgentExpectationOutput } from "../src/invalid-output.js";
+import { attachJsonEvidence, attachTextEvidence } from "./evidence.js";
+
+let tempDir: string | undefined;
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("invalid-agent-output");
+  await label("coverage", "agent-mode");
+  tempDir = await mkdtemp(join(tmpdir(), "allure-agent-invalid-output-test-"));
+});
+
+afterEach(async () => {
+  if (tempDir) {
+    await rm(tempDir, { recursive: true, force: true });
+    tempDir = undefined;
+  }
+});
+
+describe("invalid expectation output", () => {
+  it("should write minimal agent artifacts when expectation input is invalid", async () => {
+    const outputDir = join(tempDir!, "agent-output");
+
+    const result = await writeInvalidAgentExpectationOutput({
+      outputDir,
+      command: "npm test",
+      error: new AgentExpectationUsageError(
+        'Invalid --expect-label "module". Expected the form name=value, for example module=cli',
+        "--expect-label",
+      ),
+    });
+
+    const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8"));
+    const finding = JSON.parse((await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8")).trim());
+    const tests = await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8");
+    const events = await readFile(join(outputDir, "manifest", "test-events.jsonl"), "utf-8");
+    const index = await readFile(join(outputDir, "index.md"), "utf-8");
+
+    await attachJsonEvidence("invalid expectation run manifest", runManifest);
+    await attachJsonEvidence("invalid expectation finding", finding);
+    await attachTextEvidence("invalid expectation empty tests manifest", tests);
+    await attachTextEvidence("invalid expectation empty events manifest", events);
+    await attachTextEvidence("invalid expectation index", index, "text/markdown");
+
+    expect(result.outputDir).toBe(outputDir);
+    expect(result.generatedAt).toEqual(expect.any(String));
+    expect(tests).toBe("");
+    expect(events).toBe("");
+    expect(index).toContain("Status: unavailable");
+    expect(runManifest).toEqual(
+      expect.objectContaining({
+        schema_version: "allure-agent-output/v1",
+        phase: "done",
+        command: "npm test",
+        expectations_present: false,
+        expectations: null,
+        expectation_result: expect.objectContaining({
+          status: "unavailable",
+          impact: "reject",
+          finding_ids: ["F0001"],
+        }),
+      }),
+    );
+    expect(finding).toEqual(
+      expect.objectContaining({
+        schema_version: "allure-agent-finding/v2",
+        check_id: "expectations-invalid",
+        instance_id: "F0001",
+        severity: "high",
+        impact: "reject",
+        source: {
+          kind: "inline-option",
+          option: "--expect-label",
+        },
+        subject: {
+          type: "run",
+        },
+        observed: expect.objectContaining({
+          execution_skipped: true,
+        }),
+        check_name: "expectations-invalid",
+      }),
+    );
+  });
+});
diff --git a/packages/plugin-agent/test/query.test.ts b/packages/plugin-agent/test/query.test.ts
new file mode 100644
index 00000000000..efc5e0d4d04
--- /dev/null
+++ b/packages/plugin-agent/test/query.test.ts
@@ -0,0 +1,322 @@
+import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import { epic, feature, label, story } from "allure-js-commons";
+import { afterEach, beforeEach, describe, expect, it } from "vitest";
+
+import { AgentUsageError } from "../src/errors.js";
+import type { AgentOutputBundle } from "../src/harness.js";
+import {
+  AGENT_TEST_STATUSES,
+  buildAgentQueryPayload,
+  normalizeAgentQueryLimit,
+  normalizeRepeatedEnumValues,
+} from "../src/query.js";
+import { attachJsonEvidence } from "./evidence.js";
+
+let tempDir: string | undefined;
+
+const createAgentOutput = (outputDir: string): AgentOutputBundle => ({
+  outputDir,
+  run: {
+    schema_version: "allure-agent-output/v1",
+    report_uuid: "report-uuid",
+    generated_at: "2026-06-02T12:00:00.000Z",
+    phase: "done",
+    command: "npm test",
+    actual_exit_code: 1,
+    original_exit_code: 1,
+    exit_code: {
+      original: 1,
+      actual: null,
+    },
+    summary: {
+      stats: {
+        total: 2,
+        failed: 1,
+        broken: 0,
+        skipped: 0,
+        unknown: 0,
+        passed: 1,
+      },
+      duration_ms: {
+        total: 30,
+        average: 15,
+        max: 20,
+      },
+      environments: [
+        {
+          environmentId: "default",
+          total: 2,
+          failed: 1,
+          broken: 0,
+          skipped: 0,
+          unknown: 0,
+          passed: 1,
+        },
+      ],
+    },
+    paths: {
+      index_md: "index.md",
+      agents_md: "AGENTS.md",
+      tests_manifest: "manifest/tests.jsonl",
+      findings_manifest: "manifest/findings.jsonl",
+      test_events_manifest: "manifest/test-events.jsonl",
+      expected_manifest: "manifest/expected.json",
+      process_logs: {
+        stdout: "artifacts/global/stdout.txt",
+        stderr: "artifacts/global/stderr.txt",
+      },
+    },
+    expectations_present: true,
+    expectations: {
+      goal: "Check query",
+    },
+    expectation_result: {
+      schema_version: "allure-agent-expectation-result/v1",
+      status: "failed",
+      impact: "reject",
+      source: {
+        kind: "inline",
+        path: null,
+      },
+      recognized_control_count: 2,
+      unsupported_controls: [],
+      degraded_controls: [],
+      summary: {
+        expected_tests: 0,
+        observed_tests: 2,
+        missing_expected: 1,
+        forbidden_observed: 0,
+        unexpected_observed: 0,
+        evidence_mismatches: 0,
+      },
+      finding_ids: ["finding-1"],
+    },
+    check_summary: {
+      total: 2,
+      countsBySeverity: {
+        high: 1,
+        warning: 1,
+        info: 0,
+      },
+      countsByCategory: {
+        bootstrap: 0,
+        scope: 1,
+        metadata: 0,
+        evidence: 1,
+        smells: 0,
+      },
+    },
+    agent_context: {
+      agent_name: null,
+      loop_id: null,
+      task_id: "agent-query",
+      conversation_id: null,
+    },
+  },
+  tests: [
+    {
+      environment_id: "default",
+      history_id: "history-1",
+      test_result_id: "tr-1",
+      full_name: "suite should fail",
+      package: "pkg-a",
+      labels: [{ name: "module", value: "cli" }],
+      status: "failed",
+      duration_ms: 20,
+      retries: 0,
+      flaky: false,
+      scope_match: "match",
+      finding_counts: {
+        total: 1,
+        high: 1,
+        warning: 0,
+        info: 0,
+      },
+      markdown_path: "tests/default/suite-should-fail.md",
+      assets_dir: "artifacts/tests/default/suite-should-fail",
+    },
+    {
+      environment_id: "default",
+      history_id: "history-2",
+      test_result_id: "tr-2",
+      full_name: "suite should pass",
+      package: "pkg-b",
+      labels: [{ name: "module", value: "ui" }],
+      status: "passed",
+      duration_ms: 10,
+      retries: 0,
+      flaky: false,
+      scope_match: "match",
+      finding_counts: {
+        total: 0,
+        high: 0,
+        warning: 0,
+        info: 0,
+      },
+      markdown_path: "tests/default/suite-should-pass.md",
+      assets_dir: "artifacts/tests/default/suite-should-pass",
+    },
+  ],
+  findings: [
+    {
+      schema_version: "allure-agent-finding/v2",
+      check_id: "expected-label-missing",
+      instance_id: "finding-1",
+      finding_id: "finding-1",
+      subject: {
+        type: "test",
+        id: "tests/default/suite-should-fail.md",
+        path: "tests/default/suite-should-fail.md",
+      },
+      subject_ref: "tests/default/suite-should-fail.md",
+      subject_type: "test",
+      severity: "high",
+      impact: "reject",
+      category: "scope",
+      check_name: "expected-label-missing",
+      message: "Expected label module=api was not found.",
+      explanation: "The observed labels did not satisfy the expectation.",
+      evidence_paths: ["tests/default/suite-should-fail.md"],
+      remediation_hint: "Run the intended test or update metadata.",
+    },
+    {
+      finding_id: "finding-2",
+      subject: "run",
+      severity: "warning",
+      category: "evidence",
+      check_name: "missing-evidence",
+      message: "Evidence is weak.",
+      explanation: "The run did not contain meaningful evidence.",
+      evidence_paths: ["index.md"],
+      remediation_hint: "Add steps or attachments.",
+    },
+  ],
+  expected: {
+    goal: "Check query",
+  },
+});
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("agent-query");
+  await label("coverage", "agent-mode");
+  tempDir = await mkdtemp(join(tmpdir(), "allure-agent-query-test-"));
+  await mkdir(join(tempDir, "tests/default"), { recursive: true });
+  await writeFile(join(tempDir, "tests/default/suite-should-fail.md"), "# Test Markdown\n\nRuntime evidence.", {
+    encoding: "utf-8",
+    flag: "w",
+  });
+});
+
+afterEach(async () => {
+  if (tempDir) {
+    await rm(tempDir, { recursive: true, force: true });
+    tempDir = undefined;
+  }
+});
+
+describe("agent query payloads", () => {
+  it("should build a summary payload", async () => {
+    const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "summary", {
+      labelFilters: [],
+    });
+
+    await attachJsonEvidence("summary query payload", payload);
+    expect(payload).toEqual(
+      expect.objectContaining({
+        schema: "allure-agent-query/v1",
+        view: "summary",
+        output_dir: tempDir,
+        index_md: join(tempDir!, "index.md"),
+        run: expect.objectContaining({
+          command: "npm test",
+          expectations_present: true,
+          expectation_result: expect.objectContaining({ status: "failed", impact: "reject" }),
+        }),
+        paths: expect.objectContaining({
+          tests_manifest: join(tempDir!, "manifest/tests.jsonl"),
+        }),
+        expected: {
+          goal: "Check query",
+        },
+      }),
+    );
+  });
+
+  it("should build filtered test payloads", async () => {
+    const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "tests", {
+      labelFilters: [{ name: "module", value: "cli" }],
+      statuses: ["failed"],
+      limit: 1,
+    });
+
+    await attachJsonEvidence("filtered tests query payload", payload);
+    expect(payload).toEqual(
+      expect.objectContaining({
+        view: "tests",
+        total_matches: 1,
+        returned: 1,
+        tests: [expect.objectContaining({ full_name: "suite should fail", status: "failed" })],
+      }),
+    );
+  });
+
+  it("should build filtered finding payloads", async () => {
+    const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "findings", {
+      labelFilters: [],
+      severities: ["high"],
+      categories: ["scope"],
+      checks: ["expected-label-missing"],
+      test: "suite should fail",
+    });
+
+    await attachJsonEvidence("filtered findings query payload", payload);
+    expect(payload).toEqual(
+      expect.objectContaining({
+        view: "findings",
+        total_matches: 1,
+        findings: [expect.objectContaining({ finding_id: "finding-1" })],
+      }),
+    );
+  });
+
+  it("should build one-test payloads with markdown when requested", async () => {
+    const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "test", {
+      labelFilters: [],
+      test: "suite should fail",
+      includeMarkdown: true,
+    });
+
+    await attachJsonEvidence("single test query payload", payload);
+    expect(payload).toEqual(
+      expect.objectContaining({
+        view: "test",
+        markdown_path: join(tempDir!, "tests/default/suite-should-fail.md"),
+        test: expect.objectContaining({ full_name: "suite should fail" }),
+        findings: [expect.objectContaining({ finding_id: "finding-1" })],
+        markdown: expect.stringContaining("Runtime evidence."),
+      }),
+    );
+  });
+
+  it("should reject ambiguous single-test queries and unsupported enum values", async () => {
+    await attachJsonEvidence("invalid query option cases", [
+      { view: "test", reason: "missing exact test selector" },
+      { option: "--status", value: "flaky", reason: "unsupported status" },
+      { option: "--limit", value: "1.5", reason: "limit must be an integer" },
+    ]);
+
+    await expect(
+      buildAgentQueryPayload(createAgentOutput(tempDir!), "test", {
+        labelFilters: [],
+      }),
+    ).rejects.toBeInstanceOf(AgentUsageError);
+
+    expect(() => normalizeRepeatedEnumValues(["flaky"], AGENT_TEST_STATUSES, "--status")).toThrow(AgentUsageError);
+    expect(() => normalizeAgentQueryLimit("1.5")).toThrow(AgentUsageError);
+  });
+});
diff --git a/packages/cli/test/utils/agent-select.test.ts b/packages/plugin-agent/test/selection.test.ts
similarity index 75%
rename from packages/cli/test/utils/agent-select.test.ts
rename to packages/plugin-agent/test/selection.test.ts
index b261e2ab480..13adee68b39 100644
--- a/packages/cli/test/utils/agent-select.test.ts
+++ b/packages/plugin-agent/test/selection.test.ts
@@ -6,12 +6,13 @@ import {
   parseAgentLabelFilters,
   resolveAgentSelectionOutputDir,
   selectAgentTestPlan,
-} from "../../src/utils/agent-select.js";
+} from "../src/selection.js";
+import { attachJsonEvidence } from "./evidence.js";
 
-vi.mock("../../src/utils/agent-state.js", () => ({
+vi.mock("../src/state.js", () => ({
   readLatestAgentState: vi.fn(),
 }));
-vi.mock("@allurereport/plugin-agent", () => ({
+vi.mock("../src/harness.js", () => ({
   loadAgentOutput: vi.fn(),
   planAgentEnrichmentReview: vi.fn(),
 }));
@@ -26,7 +27,7 @@ beforeEach(async () => {
 
 describe("agent-select utils", () => {
   it("should select review-targeted tests and apply environment and label filters", async () => {
-    const { loadAgentOutput, planAgentEnrichmentReview } = await import("@allurereport/plugin-agent");
+    const { loadAgentOutput, planAgentEnrichmentReview } = await import("../src/harness.js");
 
     (loadAgentOutput as Mock).mockResolvedValueOnce({
       outputDir: "/tmp/agent-output",
@@ -65,6 +66,7 @@ describe("agent-select utils", () => {
       labelFilters: [{ name: "feature", value: "checkout" }],
     });
 
+    await attachJsonEvidence("selected agent test plan", selection);
     expect(selection.outputDir).toBe("/tmp/agent-output");
     expect(selection.preset).toBe("review");
     expect(selection.selectedTests).toHaveLength(1);
@@ -76,17 +78,24 @@ describe("agent-select utils", () => {
   });
 
   it("should resolve latest output directories and parse supported filters", async () => {
-    const { readLatestAgentState } = await import("../../src/utils/agent-state.js");
+    const { readLatestAgentState } = await import("../src/state.js");
 
     (readLatestAgentState as Mock).mockResolvedValueOnce({
       outputDir: "/tmp/latest-agent-output",
     });
 
-    await expect(resolveAgentSelectionOutputDir({ cwd: "/cwd", latest: true })).resolves.toBe(
-      "/tmp/latest-agent-output",
-    );
-    expect(normalizeAgentRerunPreset("failed")).toBe("failed");
-    expect(parseAgentLabelFilters(["feature=checkout", "priority=high"])).toEqual([
+    const resolvedOutputDir = await resolveAgentSelectionOutputDir({ cwd: "/cwd", latest: true });
+    const preset = normalizeAgentRerunPreset("failed");
+    const labelFilters = parseAgentLabelFilters(["feature=checkout", "priority=high"]);
+
+    await attachJsonEvidence("latest selection resolution", {
+      resolvedOutputDir,
+      preset,
+      labelFilters,
+    });
+    expect(resolvedOutputDir).toBe("/tmp/latest-agent-output");
+    expect(preset).toBe("failed");
+    expect(labelFilters).toEqual([
       { name: "feature", value: "checkout" },
       { name: "priority", value: "high" },
     ]);
diff --git a/packages/plugin-agent/test/skills.test.ts b/packages/plugin-agent/test/skills.test.ts
deleted file mode 100644
index 0d58e1eab18..00000000000
--- a/packages/plugin-agent/test/skills.test.ts
+++ /dev/null
@@ -1,137 +0,0 @@
-import { readFile } from "node:fs/promises";
-import { dirname, join, resolve } from "node:path";
-import { fileURLToPath } from "node:url";
-
-import { story } from "allure-js-commons";
-import { beforeEach, describe, expect, it } from "vitest";
-
-beforeEach(async () => {
-  await story("skills");
-});
-const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../../..");
-
-describe("allure agent-mode skills bundle", () => {
-  it("should include the setup and feature-delivery skills with UI metadata", async () => {
-    const setupSkill = await readFile(join(repoRoot, "skills", "allure-agent-mode-setup", "SKILL.md"), "utf-8");
-    const setupUi = await readFile(
-      join(repoRoot, "skills", "allure-agent-mode-setup", "agents", "openai.yaml"),
-      "utf-8",
-    );
-    const featureSkill = await readFile(
-      join(repoRoot, "skills", "allure-agent-mode-feature-delivery", "SKILL.md"),
-      "utf-8",
-    );
-    const featureUi = await readFile(
-      join(repoRoot, "skills", "allure-agent-mode-feature-delivery", "agents", "openai.yaml"),
-      "utf-8",
-    );
-
-    expect(setupSkill).toContain("name: allure-agent-mode-setup");
-    expect(setupSkill).toContain("docs/allure-agent-mode.md");
-    expect(setupSkill).toContain("allure agent latest");
-    expect(setupSkill).toContain("allure agent state-dir");
-    expect(setupSkill).toContain("allure agent select --latest");
-    expect(setupSkill).toContain("allure agent --rerun-latest");
-    expect(setupSkill).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(setupUi).toContain('display_name: "Allure Agent Setup"');
-    expect(featureSkill).toContain("name: allure-agent-mode-feature-delivery");
-    expect(featureSkill).toContain("ALLURE_AGENT_OUTPUT");
-    expect(featureSkill).toContain("reviewing existing tests");
-    expect(featureSkill).toContain("auditing coverage");
-    expect(featureSkill).toContain("triaging failing suites");
-    expect(featureSkill).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(featureSkill).toContain(
-      "Use `allure agent` for smoke checks too, even when the change is small or mechanical.",
-    );
-    expect(featureSkill).toContain(
-      "Only skip agent mode when it is impossible or when you are debugging agent mode itself.",
-    );
-    expect(featureSkill).toContain("### Small Test Change Workflow");
-    expect(featureSkill).toContain("### Coverage Review Workflow");
-    expect(featureUi).toContain('display_name: "Allure Feature Delivery"');
-  });
-
-  it("should include the project guide and AGENTS router templates", async () => {
-    const projectGuide = await readFile(join(repoRoot, "docs", "allure-agent-mode.md"), "utf-8");
-    const rootAgents = await readFile(join(repoRoot, "AGENTS.md"), "utf-8");
-    const templateGuide = await readFile(
-      join(repoRoot, "skills", "allure-agent-mode-setup", "references", "project-guide-template.md"),
-      "utf-8",
-    );
-    const agentsSnippet = await readFile(
-      join(repoRoot, "skills", "allure-agent-mode-setup", "references", "root-agents-snippet.md"),
-      "utf-8",
-    );
-    const readme = await readFile(join(repoRoot, "packages", "plugin-agent", "README.md"), "utf-8");
-
-    expect(projectGuide).toContain("## Core Loops");
-    expect(projectGuide).toContain("### Test Review Loop");
-    expect(projectGuide).toContain("Runtime first, source second.");
-    expect(projectGuide).toContain("## Verification Standard");
-    expect(projectGuide).toContain("## Helpful Commands");
-    expect(projectGuide).toContain("allure agent latest");
-    expect(projectGuide).toContain("allure agent state-dir");
-    expect(projectGuide).toContain("allure agent select --latest");
-    expect(projectGuide).toContain("allure agent --rerun-latest");
-    expect(projectGuide).toContain("--rerun-preset review|failed|unsuccessful|all");
-    expect(projectGuide).toContain("--rerun-environment <id>");
-    expect(projectGuide).toContain("--rerun-label name=value");
-    expect(projectGuide).toContain("ALLURE_AGENT_STATE_DIR");
-    expect(projectGuide).toContain("print the `index.md` path");
-    expect(projectGuide).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(projectGuide).toContain(
-      "Use `allure agent` for smoke checks too, even when the change is small or mechanical.",
-    );
-    expect(projectGuide).toContain(
-      "Only skip agent mode when it is impossible or when you are debugging agent mode itself.",
-    );
-    expect(projectGuide).toContain("### Small Test Change Workflow");
-    expect(projectGuide).toContain("### Coverage Review Workflow");
-    expect(projectGuide).toContain("## Acceptance Rules");
-    expect(projectGuide).toContain("When Console Errors Are Not Represented As Test Results");
-    expect(projectGuide).toContain("yarn allure agent --");
-    expect(projectGuide).toContain("test/commands/run.integration.test.ts");
-    expect(rootAgents).toContain("docs/allure-agent-mode.md");
-    expect(rootAgents).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(rootAgents).toContain("Use `allure agent` for smoke checks too");
-    expect(rootAgents).toContain("reasoning, review, coverage analysis, debugging, or any user-facing conclusion");
-    expect(rootAgents).toContain("console-only review");
-    expect(templateGuide).toContain("ALLURE_AGENT_EXPECTATIONS");
-    expect(templateGuide).toContain("## Verification Standard");
-    expect(templateGuide).toContain("## Helpful Commands");
-    expect(templateGuide).toContain("allure agent latest");
-    expect(templateGuide).toContain("allure agent state-dir");
-    expect(templateGuide).toContain("allure agent select --latest");
-    expect(templateGuide).toContain("allure agent --rerun-latest");
-    expect(templateGuide).toContain("--rerun-preset review|failed|unsuccessful|all");
-    expect(templateGuide).toContain("--rerun-environment <id>");
-    expect(templateGuide).toContain("--rerun-label name=value");
-    expect(templateGuide).toContain("ALLURE_AGENT_STATE_DIR");
-    expect(templateGuide).toContain("print the `index.md` path");
-    expect(templateGuide).toContain("### Test Review Loop");
-    expect(templateGuide).toContain("### Small Test Change Workflow");
-    expect(templateGuide).toContain("### Coverage Review Workflow");
-    expect(templateGuide).toContain("Runtime first, source second.");
-    expect(templateGuide).toContain("partial runtime review");
-    expect(agentsSnippet).toContain("Use [Allure Agent Mode](docs/allure-agent-mode.md)");
-    expect(agentsSnippet).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(agentsSnippet).toContain("Use `allure agent` for smoke checks too");
-    expect(agentsSnippet).toContain("reasoning, review, coverage analysis, debugging, or any user-facing conclusion");
-    expect(readme).toContain("## Verification Standard");
-    expect(readme).toContain("For small mechanical test changes, use a scoped agent-mode run for the smoke check");
-    expect(readme).toContain(
-      "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.",
-    );
-    expect(readme).toContain("treat the review as partial");
-  });
-});
diff --git a/packages/cli/test/utils/agent-state.test.ts b/packages/plugin-agent/test/state.test.ts
similarity index 88%
rename from packages/cli/test/utils/agent-state.test.ts
rename to packages/plugin-agent/test/state.test.ts
index ebc1615240a..b8f1a7af2e0 100644
--- a/packages/cli/test/utils/agent-state.test.ts
+++ b/packages/plugin-agent/test/state.test.ts
@@ -9,7 +9,8 @@ import {
   readLatestAgentState,
   resolveAgentStateDir,
   writeLatestAgentState,
-} from "../../src/utils/agent-state.js";
+} from "../src/state.js";
+import { attachJsonEvidence } from "./evidence.js";
 
 vi.mock("node:os", async (importOriginal) => ({
   ...(await importOriginal()),
@@ -40,13 +41,21 @@ describe("agent-state utils", () => {
     const normalizedCwd = resolve(cwd);
     const projectHash = createHash("sha256").update(normalizedCwd).digest("hex").slice(0, 16);
     const statePath = join("/tmp", `allure-agent-state-${projectHash}`, "latest.json");
-
-    await writeLatestAgentState({
+    const latestState = {
       cwd,
       outputDir: "/tmp/allure-agent-123",
       command: "npm test",
       startedAt: "2026-04-15T18:00:00.000Z",
-      status: "running",
+      status: "running" as const,
+    };
+
+    await writeLatestAgentState(latestState);
+
+    await attachJsonEvidence("latest state write contract", {
+      normalizedCwd,
+      projectHash,
+      statePath,
+      latestState,
     });
 
     expect(fsModule.mkdir).toHaveBeenCalledWith(dirname(statePath), { recursive: true });
diff --git a/skills/allure-agent-mode-feature-delivery/SKILL.md b/skills/allure-agent-mode-feature-delivery/SKILL.md
deleted file mode 100644
index 825051650e5..00000000000
--- a/skills/allure-agent-mode-feature-delivery/SKILL.md
+++ /dev/null
@@ -1,92 +0,0 @@
----
-name: allure-agent-mode-feature-delivery
-description: Use Allure agent-mode to design or review test scope, create per-run expectations, run targeted tests, review runtime evidence, and iterate until the test loop is acceptable.
----
-
-# Allure Agent Mode Feature Delivery
-
-Use this skill for feature or bug work that changes tests, for reviewing existing tests, auditing coverage, triaging failing suites, investigating weak evidence, or debugging flaky and environment-sensitive failures.
-
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-
-## Read First
-
-If the project has `docs/allure-agent-mode.md`, read it before writing or reviewing tests.
-
-If it does not, use the guidance in this skill and suggest running the setup skill later.
-
-## Workflow
-
-1. Understand the feature, issue, or review goal and decide the intended test scope.
-2. Create a fresh expectations file for this run in a temp directory.
-3. Write or update the tests, or keep the current tests unchanged if the task is review-only.
-4. Run only the intended scope with `allure agent` before relying on raw console output.
-5. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and the relevant test markdown files before inspecting source code.
-6. If evidence is weak, enrich the tests with real steps, attachments, or minimal metadata.
-7. Rerun with a new temp output directory and a new expectations file.
-8. Accept only when scope matches, evidence is good enough to review, and any partial runtime modeling has been called out explicitly.
-
-## Review Variants
-
-### Small Test Change Workflow
-
-1. Create a fresh expectations file and temp output directory for the touched scope.
-2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.
-3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-4. Only then make a final statement about regression safety or test correctness.
-
-### Coverage Review Workflow
-
-1. Split command or package audits into scoped groups.
-2. Give each group its own expectations file and temp output directory.
-3. Run each group with `allure agent`.
-4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed.
-5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit.
-
-Compact coverage-review pattern:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-
-npx allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- npm test -- <scope>
-```
-
-Coverage-review expectations example:
-
-```yaml
-goal: Review package tests
-task_id: package-review
-expected:
-  label_values:
-    module: my-module
-notes:
-  - Review runtime evidence before source inspection.
-```
-
-## Requirements
-
-- Every run must use a unique temp `ALLURE_AGENT_OUTPUT`.
-- Every run must use a unique temp `ALLURE_AGENT_EXPECTATIONS`.
-- Parallel runs must never share those paths.
-- Prefer YAML expectations in v1.
-- Broad package-health audits may omit expectations, but the review must call out that scope checks are weaker.
-- Metadata enrichment is part of this loop, not a separate workflow.
-
-## Guardrails
-
-- Runtime first, source second.
-- Steps must represent real behavior, not filler.
-- Attachments must come from the current execution.
-- Keep metadata minimal and only add labels that help review or policy.
-- Prefer helper-boundary instrumentation over repetitive caller wrapping.
-- If runner-visible failures are not represented as logical tests, inspect global stderr and treat the run as a partial runtime review.
-
-## Reference Files
-
-- Expectations example: `references/expectations-example.yaml`
diff --git a/skills/allure-agent-mode-feature-delivery/agents/openai.yaml b/skills/allure-agent-mode-feature-delivery/agents/openai.yaml
deleted file mode 100644
index 3e6bdd92571..00000000000
--- a/skills/allure-agent-mode-feature-delivery/agents/openai.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-interface:
-  display_name: "Allure Feature Delivery"
-  short_description: "Use agent-mode for feature test loops"
-  default_prompt: "Use $allure-agent-mode-feature-delivery to write or validate tests for this feature with Allure agent-mode."
-
-policy:
-  allow_implicit_invocation: true
diff --git a/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml b/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml
deleted file mode 100644
index 2d2ecd53671..00000000000
--- a/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml
+++ /dev/null
@@ -1,17 +0,0 @@
-goal: Validate feature A
-task_id: feature-a
-expected:
-  environments:
-    - default
-  full_name_prefixes:
-    - feature A
-  label_values:
-    feature: feature-a
-forbidden:
-  full_name_prefixes:
-    - feature B
-  label_values:
-    feature:
-      - feature-b
-notes:
-  - Only feature A tests should run.
diff --git a/skills/allure-agent-mode-setup/SKILL.md b/skills/allure-agent-mode-setup/SKILL.md
deleted file mode 100644
index 348af6bb021..00000000000
--- a/skills/allure-agent-mode-setup/SKILL.md
+++ /dev/null
@@ -1,42 +0,0 @@
----
-name: allure-agent-mode-setup
-description: Set up Allure agent-mode guidance in a project by checking whether Allure results already exist, adding a short root AGENTS.md router, and creating docs/allure-agent-mode.md for downstream test authoring and review work.
----
-
-# Allure Agent Mode Setup
-
-Use this skill when a project wants to adopt Allure agent-mode for test work and test reviews.
-
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-
-## Goal
-
-Leave the project with:
-
-- a root `AGENTS.md` that routes test work to `docs/allure-agent-mode.md`
-- a project `docs/allure-agent-mode.md` guide
-- enough Allure bootstrap guidance for the agent to continue, even if the project is not fully configured yet
-
-## Workflow
-
-1. Check whether the project already emits Allure results or already has Allure configuration.
-2. If Allure is missing, add or suggest the smallest viable install/config path for the project. Treat this as best-effort bootstrap, not the main acceptance path.
-3. Create or update root `AGENTS.md` so test-related work points to `docs/allure-agent-mode.md`.
-4. Create `docs/allure-agent-mode.md` from the bundled template and adapt only the parts that must be project-specific.
-5. Keep the helper-command descriptions short and practical. Include `allure agent latest`, `allure agent state-dir`, `allure agent select --latest` / `--from <output-dir>`, and `allure agent --rerun-latest` / `--rerun-from <output-dir>` as small Helpful Commands entries so agents can recover the latest output directory, inspect where state is stored, inspect the review-targeted test plan, and rerun the same focused scope. Add one compact Advanced Reruns section that documents `--rerun-preset`, `--rerun-environment`, `--rerun-label`, and `ALLURE_AGENT_STATE_DIR` without turning the guide into a full CLI reference. Keep the verification section explicit that agents should print the run's `index.md` path after test execution.
-6. Keep changes minimal and additive. Preserve unrelated project guidance in `AGENTS.md`.
-
-## Files To Use
-
-- Project guide template: `references/project-guide-template.md`
-- Root router snippet: `references/root-agents-snippet.md`
-
-## Guardrails
-
-- Keep `AGENTS.md` short. It should route, not duplicate the whole guide.
-- Keep helper-command notes short. Prefer one-line descriptions over a growing command catalog.
-- Do not invent project-specific metadata conventions unless the repo already uses them.
-- Do not create persistent output or expectations paths in the project guide. Those are per-run temp artifacts.
-- If the project already has better Allure instructions, merge carefully instead of overwriting them.
diff --git a/skills/allure-agent-mode-setup/agents/openai.yaml b/skills/allure-agent-mode-setup/agents/openai.yaml
deleted file mode 100644
index b20dbb721eb..00000000000
--- a/skills/allure-agent-mode-setup/agents/openai.yaml
+++ /dev/null
@@ -1,7 +0,0 @@
-interface:
-  display_name: "Allure Agent Setup"
-  short_description: "Set up Allure agent-mode project guidance"
-  default_prompt: "Use $allure-agent-mode-setup to add Allure agent-mode guidance to this project."
-
-policy:
-  allow_implicit_invocation: true
diff --git a/skills/allure-agent-mode-setup/references/project-guide-template.md b/skills/allure-agent-mode-setup/references/project-guide-template.md
deleted file mode 100644
index 3468e221f47..00000000000
--- a/skills/allure-agent-mode-setup/references/project-guide-template.md
+++ /dev/null
@@ -1,174 +0,0 @@
-# Allure Agent Mode
-
-Use Allure agent-mode to design, review, validate, debug, and enrich tests in this project.
-
-## Review Principle
-
-Runtime first, source second.
-
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control.
-- If the agent-mode output is missing or incomplete, debug that first and treat console-only conclusions as provisional.
-
-## Verification Standard
-
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.
-
-## Helpful Commands
-
-- `allure agent latest` prints the latest agent output directory for the current project cwd. Use it when a prior run omitted `--output` and you want to reopen the most recent agent-mode artifacts.
-- `allure agent state-dir` prints the state directory for the current project cwd. Use it when you need to inspect where `latest` pointers are stored or debug sandbox behavior.
-- `allure agent select --latest` or `allure agent select --from <output-dir>` prints the review-targeted test plan from a prior agent run. Add `--preset failed` or exact `--label name=value` / `--environment <id>` filters when you need a narrower rerun plan.
-- `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` reruns only the selected tests through the framework-agnostic Allure testplan flow. The default rerun preset is `review`.
-
-## Advanced Reruns
-
-- `--rerun-preset review|failed|unsuccessful|all` changes how the rerun seed set is chosen. Use `review` for the default agent-targeted loop, `failed` for classic failure reruns, `unsuccessful` for any non-passed tests, and `all` when you want the whole previously observed set.
-- `--rerun-environment <id>` narrows the rerun selection to one or more environment ids from the previous agent output. Repeat the flag for multiple environments.
-- `--rerun-label name=value` narrows the rerun selection to tests whose prior results carried exact matching labels. Repeat the flag for multiple label filters.
-- `ALLURE_AGENT_STATE_DIR` overrides the default project-scoped state directory used by `allure agent latest`, `allure agent state-dir`, and `--rerun-latest`. Use it when you need a deterministic shared location in CI or a constrained sandbox.
-
-## Core Loops
-
-### Test Review Loop
-
-1. Identify the exact review scope.
-2. Create a fresh expectations file for this run in a temp directory.
-3. Run only that scope with `allure agent`.
-4. Read `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-5. Read per-test markdown only for tests that failed, drifted, or have findings.
-6. Only after runtime review, inspect source code for root cause or coverage gaps.
-7. If evidence is weak or partial, enrich the tests and rerun.
-8. When iterating on the same scope, prefer `allure agent --rerun-latest -- <command>` or `allure agent --rerun-from <output-dir> -- <command>` so the rerun stays focused on the review-targeted tests.
-
-### Feature Delivery Loop
-
-1. Understand the feature or issue.
-2. Create a fresh expectations file for this run in a temp directory.
-3. Write or update the tests.
-4. Run the target scope with `allure agent`.
-5. Review `index.md`, manifests, and per-test markdown.
-6. Enrich tests when evidence is weak.
-7. Rerun until scope and evidence are acceptable.
-
-### Metadata Enrichment Loop
-
-Use this when the run is functionally correct but too weak to review:
-
-1. Identify missing or low-signal findings.
-2. Add real steps, attachments, or minimal metadata.
-3. Rerun the same intended scope.
-4. Reject noop-style or placeholder evidence.
-
-### Small Test Change Workflow
-
-1. Create a fresh expectations file and temp output directory for the touched scope.
-2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.
-3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`.
-4. Only then make a final statement about regression safety or test correctness.
-
-### Coverage Review Workflow
-
-1. Split command or package audits into scoped groups.
-2. Give each group its own expectations file and temp output directory.
-3. Run each group with `allure agent`.
-4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed.
-5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit.
-
-## Per-Run Artifacts
-
-- `ALLURE_AGENT_OUTPUT` must use a unique temp directory per run.
-- `ALLURE_AGENT_EXPECTATIONS` must use a unique temp file per run.
-- Do not reuse those paths across parallel runs.
-
-YAML is preferred for expectations in v1.
-
-Review-oriented expectations example:
-
-```yaml
-goal: Review module tests
-task_id: module-review
-expected:
-  label_values:
-    module: my-module
-notes:
-  - Review runtime evidence before source inspection.
-```
-
-Broad package-health audits may omit expectations, but the resulting scope review is weaker and should be called out explicitly.
-
-Compact coverage-review pattern:
-
-```bash
-TMP_DIR="$(mktemp -d)"
-EXPECTATIONS="$TMP_DIR/expectations.yaml"
-
-npx allure agent \
-  --output "$TMP_DIR/agent-output" \
-  --expectations "$EXPECTATIONS" \
-  -- npm test -- <scope>
-```
-
-Single-spec expectations example:
-
-```yaml
-goal: Review one spec
-task_id: single-spec-review
-expected:
-  label_values:
-    package: path/to/spec.test.ts
-notes:
-  - Review runtime evidence before source inspection.
-```
-
-## Evidence Rules
-
-- Steps must wrap real setup, actions, state transitions, or assertions.
-- Attachments must contain real runtime evidence from that execution.
-- Metadata should stay minimal and purposeful.
-- Prefer helper-boundary instrumentation over repetitive caller wrapping.
-
-Good example:
-
-- instrument `runCommand` once instead of wrapping every `runCommand(...)` caller
-
-Rejected examples:
-
-- empty wrapper steps
-- static `test passed` attachments
-- labels that no review or policy step uses
-
-## When Console Errors Are Not Represented As Test Results
-
-- Suite-load, import, or setup failures may appear only in `artifacts/global/stderr.txt` or global errors.
-- If `manifest/tests.jsonl` does not account for all visible failures from the test runner, inspect global stderr before concluding the run is fully modeled.
-- Treat that state as a partial runtime review, not as a clean or complete result set.
-- If runner-visible failures are present outside logical test files, final conclusions must stay provisional until the missing modeling is understood.
-
-## Acceptance Rules
-
-Accept a run only when:
-
-- scope matches expectations
-- evidence is strong enough to explain what happened
-- no high-confidence noop or placeholder findings remain
-
-### Review Completeness
-
-A test review is not complete unless:
-
-- the relevant scope was run with agent mode, unless that is impossible
-- expectations were created for the intended scope, unless this is a broad package-health audit
-- agent artifacts were reviewed before final conclusions
-- missing or partial runtime modeling was called out explicitly
-- console-only conclusions are treated as provisional when agent output is absent or incomplete
-
-## Future Loops
-
-Planned separately:
-
-- flaky detection/fix
-- known-issue and mute handling
-- quality-gate adoption
diff --git a/skills/allure-agent-mode-setup/references/root-agents-snippet.md b/skills/allure-agent-mode-setup/references/root-agents-snippet.md
deleted file mode 100644
index 672519fd932..00000000000
--- a/skills/allure-agent-mode-setup/references/root-agents-snippet.md
+++ /dev/null
@@ -1,10 +0,0 @@
-# Project Guide
-
-Use [Allure Agent Mode](docs/allure-agent-mode.md) for all test-related work in this repository.
-
-- Read `docs/allure-agent-mode.md` before designing, writing, reviewing, validating, debugging, or enriching tests.
-- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.
-- Use `allure agent` for smoke checks too, even when the change is small or mechanical.
-- Only skip agent mode when it is impossible or when you are debugging agent mode itself.
-- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review.
-- Use Allure agent-mode when adding tests for features or fixes so expectations, evidence quality, and scope review are part of the loop.

From 6f6ba9b03c4a491abfd56d3a2786c98035e1bdbf Mon Sep 17 00:00:00 2001
From: Dmitry Baev <baev@users.noreply.github.com>
Date: Wed, 10 Jun 2026 19:23:37 +0100
Subject: [PATCH 2/5] fix lint

---
 packages/plugin-agent/package.json          | 4 ++--
 packages/plugin-agent/src/plugin.ts         | 1 -
 packages/plugin-agent/test/guidance.test.ts | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/packages/plugin-agent/package.json b/packages/plugin-agent/package.json
index f86939a50f1..23abda7ae78 100644
--- a/packages/plugin-agent/package.json
+++ b/packages/plugin-agent/package.json
@@ -27,8 +27,8 @@
     "build": "run clean && tsc --project ./tsconfig.json",
     "clean": "rimraf ./dist",
     "test": "rimraf ./out && vitest run",
-    "lint": "oxlint --import-plugin src test features stories",
-    "lint:fix": "oxlint --import-plugin --fix src test features stories"
+    "lint": "yarn run -T oxlint --import-plugin src test features stories",
+    "lint:fix": "yarn run -T oxlint --import-plugin --fix src test features stories"
   },
   "dependencies": {
     "@allurereport/core-api": "workspace:*",
diff --git a/packages/plugin-agent/src/plugin.ts b/packages/plugin-agent/src/plugin.ts
index fd84c668ba5..39991b8061f 100644
--- a/packages/plugin-agent/src/plugin.ts
+++ b/packages/plugin-agent/src/plugin.ts
@@ -2767,7 +2767,6 @@ const buildRunAndTestFindings = (params: {
 
   if (expectations) {
     const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name);
-    const targetEntries = getExpectationTargetEntries(entries, expectations);
     const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0;
     const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : "";
 
diff --git a/packages/plugin-agent/test/guidance.test.ts b/packages/plugin-agent/test/guidance.test.ts
index ff719b87b04..6415952ca25 100644
--- a/packages/plugin-agent/test/guidance.test.ts
+++ b/packages/plugin-agent/test/guidance.test.ts
@@ -3,7 +3,7 @@ import { dirname, join, resolve } from "node:path";
 import { fileURLToPath } from "node:url";
 
 import { story } from "allure-js-commons";
-import { beforeEach, describe, expect, it } from "vitest";
+import { beforeEach, describe, it } from "vitest";
 
 import { renderAgentsGuide } from "../src/guidance.js";
 import { expectTextToContainAll } from "./evidence.js";

From f0fb5a912df70721a644e14ffb01edee0df05469 Mon Sep 17 00:00:00 2001
From: Dmitry Baev <baev@users.noreply.github.com>
Date: Wed, 10 Jun 2026 20:07:50 +0100
Subject: [PATCH 3/5] fix format

---
 packages/cli/test/commands/agent.test.ts      |  4 +++-
 .../plugin-agent/src/inline-expectations.ts   | 10 ++++++++--
 packages/plugin-agent/src/query.ts            | 19 ++++++++++++++++---
 packages/plugin-agent/src/selection.ts        |  7 ++++++-
 packages/plugin-agent/test/index.test.ts      |  6 +++---
 5 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts
index 6de69f72c5c..5727749a23b 100644
--- a/packages/cli/test/commands/agent.test.ts
+++ b/packages/cli/test/commands/agent.test.ts
@@ -523,7 +523,9 @@ describe("agent command", () => {
       command: "npm test",
       error: expect.any(AgentExpectationUsageError),
     });
-    expect(consoleModule.error).toHaveBeenCalledWith("Use either --expectations <file> or inline expectation flags, not both");
+    expect(consoleModule.error).toHaveBeenCalledWith(
+      "Use either --expectations <file> or inline expectation flags, not both",
+    );
     expect(executeAllureRun).not.toHaveBeenCalled();
     expect(exitMock).toHaveBeenCalledWith(1);
   });
diff --git a/packages/plugin-agent/src/inline-expectations.ts b/packages/plugin-agent/src/inline-expectations.ts
index cf27ad8b7f0..d2e9de571e2 100644
--- a/packages/plugin-agent/src/inline-expectations.ts
+++ b/packages/plugin-agent/src/inline-expectations.ts
@@ -147,8 +147,14 @@ export const buildAgentInlineExpectations = (
   addLabelValues(expectedLabels, options.expectLabels, "--expect-label");
   addLabelValues(forbiddenLabels, options.forbidLabels, "--forbid-label");
 
-  const expectTests = readNonNegativeInteger(readSingleStringOption(options.expectTests, "--expect-tests"), "--expect-tests");
-  const expectSteps = readPositiveInteger(readSingleStringOption(options.expectSteps, "--expect-steps"), "--expect-steps");
+  const expectTests = readNonNegativeInteger(
+    readSingleStringOption(options.expectTests, "--expect-tests"),
+    "--expect-tests",
+  );
+  const expectSteps = readPositiveInteger(
+    readSingleStringOption(options.expectSteps, "--expect-steps"),
+    "--expect-steps",
+  );
   const expectAttachments = readPositiveInteger(
     readSingleStringOption(options.expectAttachments, "--expect-attachments"),
     "--expect-attachments",
diff --git a/packages/plugin-agent/src/query.ts b/packages/plugin-agent/src/query.ts
index 92f8e816444..396a776207a 100644
--- a/packages/plugin-agent/src/query.ts
+++ b/packages/plugin-agent/src/query.ts
@@ -4,14 +4,25 @@ import { join } from "node:path";
 import type { TestLabel, TestStatus } from "@allurereport/core-api";
 
 import { AgentUsageError } from "./errors.js";
-import type { AgentFindingCategory, AgentFindingSeverity, AgentOutputBundle, AgentTestManifestLine } from "./harness.js";
+import type {
+  AgentFindingCategory,
+  AgentFindingSeverity,
+  AgentOutputBundle,
+  AgentTestManifestLine,
+} from "./harness.js";
 import type { AgentLabelFilter } from "./selection.js";
 
 export const AGENT_QUERY_SCHEMA = "allure-agent-query/v1";
 export const AGENT_QUERY_VIEWS = ["summary", "tests", "findings", "test"] as const;
 export const AGENT_TEST_STATUSES: TestStatus[] = ["failed", "broken", "unknown", "skipped", "passed"];
 export const AGENT_FINDING_SEVERITIES: AgentFindingSeverity[] = ["high", "warning", "info"];
-export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = ["bootstrap", "scope", "metadata", "evidence", "smells"];
+export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = [
+  "bootstrap",
+  "scope",
+  "metadata",
+  "evidence",
+  "smells",
+];
 
 export type AgentQueryView = (typeof AGENT_QUERY_VIEWS)[number];
 
@@ -213,7 +224,9 @@ const buildAgentQueryTestPayload = async (output: AgentOutputBundle, filters: Ag
   }
 
   if (matched.length > 1) {
-    throw new AgentUsageError(`Query matched ${matched.length} tests in ${output.outputDir}. Use --test <full-name-or-id>.`);
+    throw new AgentUsageError(
+      `Query matched ${matched.length} tests in ${output.outputDir}. Use --test <full-name-or-id>.`,
+    );
   }
 
   const test = matched[0];
diff --git a/packages/plugin-agent/src/selection.ts b/packages/plugin-agent/src/selection.ts
index 16d93d8ecba..4e74be0d421 100644
--- a/packages/plugin-agent/src/selection.ts
+++ b/packages/plugin-agent/src/selection.ts
@@ -5,7 +5,12 @@ import { join, resolve } from "node:path";
 import type { TestLabel, TestPlan, TestPlanTest } from "@allurereport/core-api";
 
 import { AgentUsageError } from "./errors.js";
-import { loadAgentOutput, planAgentEnrichmentReview, type AgentOutputBundle, type AgentTestManifestLine } from "./harness.js";
+import {
+  loadAgentOutput,
+  planAgentEnrichmentReview,
+  type AgentOutputBundle,
+  type AgentTestManifestLine,
+} from "./harness.js";
 import { readLatestAgentState } from "./state.js";
 
 export type AgentRerunPreset = "review" | "failed" | "unsuccessful" | "all";
diff --git a/packages/plugin-agent/test/index.test.ts b/packages/plugin-agent/test/index.test.ts
index 45ba11badbe..8a654985d40 100644
--- a/packages/plugin-agent/test/index.test.ts
+++ b/packages/plugin-agent/test/index.test.ts
@@ -769,9 +769,9 @@ describe("AgentPlugin", () => {
     expect(content).toContain("missing attachment");
     expect(content).toContain("screenshot.png");
     expect(content).toContain("fixture.log");
-    expect(
-      await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png")),
-    ).toBe("png-bytes");
+    expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png"))).toBe(
+      "png-bytes",
+    );
     expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"))).toBe(
       "fixture log",
     );

From 22d5f3243bdb4d369d9d006646ad05d85f9755ed Mon Sep 17 00:00:00 2001
From: Dmitry Baev <baev@users.noreply.github.com>
Date: Thu, 11 Jun 2026 07:57:00 +0100
Subject: [PATCH 4/5] fix win tests

---
 packages/cli/test/commands/agent.test.ts | 10 ++++----
 packages/plugin-agent/test/paths.test.ts | 30 ++++++++++++++++++++++++
 2 files changed, 35 insertions(+), 5 deletions(-)
 create mode 100644 packages/plugin-agent/test/paths.test.ts

diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts
index 5727749a23b..49cdc66eddc 100644
--- a/packages/cli/test/commands/agent.test.ts
+++ b/packages/cli/test/commands/agent.test.ts
@@ -1,4 +1,4 @@
-import { resolve } from "node:path";
+import { join, resolve } from "node:path";
 
 import { readConfig } from "@allurereport/core";
 import {
@@ -359,7 +359,7 @@ describe("agent command", () => {
       }),
     );
     expect(logMock).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123");
-    expect(logMock).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md");
+    expect(logMock).toHaveBeenNthCalledWith(2, `agent index: ${join("/tmp/allure-agent-123", "index.md")}`);
     expect(logMock).toHaveBeenNthCalledWith(3, "npm test");
     expect(logMock.mock.invocationCallOrder[0]).toBeLessThan((executeAllureRun as Mock).mock.invocationCallOrder[0]);
     expect(writeLatestAgentState).toHaveBeenNthCalledWith(
@@ -437,7 +437,7 @@ describe("agent command", () => {
       },
     });
     expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${resolvedOutput}`);
-    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${resolvedOutput}/index.md`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(resolvedOutput, "index.md")}`);
     expect(consoleModule.log).toHaveBeenCalledWith(`agent expectations: ${resolvedExpectations}`);
   });
 
@@ -557,7 +557,7 @@ describe("agent command", () => {
     expect(readConfig).not.toHaveBeenCalled();
     expect(executeAllureRun).not.toHaveBeenCalled();
     expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`);
-    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(outputDir, "index.md")}`);
     expect(consoleModule.error).toHaveBeenCalledWith(
       'Invalid --expect-label "module". Expected the form name=value, for example module=cli',
     );
@@ -589,7 +589,7 @@ describe("agent command", () => {
     expect(readConfig).not.toHaveBeenCalled();
     expect(executeAllureRun).not.toHaveBeenCalled();
     expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`);
-    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`);
+    expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(outputDir, "index.md")}`);
     expect(consoleModule.error).toHaveBeenCalledWith(
       "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object",
     );
diff --git a/packages/plugin-agent/test/paths.test.ts b/packages/plugin-agent/test/paths.test.ts
new file mode 100644
index 00000000000..202407fed7f
--- /dev/null
+++ b/packages/plugin-agent/test/paths.test.ts
@@ -0,0 +1,30 @@
+import { join } from "node:path";
+
+import { epic, feature, label, story } from "allure-js-commons";
+import { beforeEach, describe, expect, it } from "vitest";
+
+import { formatAgentOutputLinks, resolveAgentIndexPath } from "../src/paths.js";
+
+beforeEach(async () => {
+  await epic("coverage");
+  await feature("agent-mode");
+  await story("agent-output-paths");
+  await label("coverage", "agent-mode");
+});
+
+describe("agent output path helpers", () => {
+  it("should resolve the agent index path using native path joining", () => {
+    const outputDir = join("tmp", "allure-agent-123");
+
+    expect(resolveAgentIndexPath(outputDir)).toBe(join(outputDir, "index.md"));
+  });
+
+  it("should format the output directory and index path links together", () => {
+    const outputDir = join("tmp", "allure-agent-123");
+
+    expect(formatAgentOutputLinks(outputDir)).toEqual([
+      `agent output: ${outputDir}`,
+      `agent index: ${join(outputDir, "index.md")}`,
+    ]);
+  });
+});

From 3db36a263c6aaa7c1d8f9aff828c7773a7f2b63d Mon Sep 17 00:00:00 2001
From: Dmitry Baev <baev@users.noreply.github.com>
Date: Thu, 11 Jun 2026 10:20:10 +0100
Subject: [PATCH 5/5] fix more tests

---
 packages/cli/test/commands/agentLatest.test.ts | 17 +++++++++++++----
 packages/cli/test/commands/agentSelect.test.ts | 17 +++++++++++++----
 2 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/packages/cli/test/commands/agentLatest.test.ts b/packages/cli/test/commands/agentLatest.test.ts
index dff18eac62a..9aa3910c82f 100644
--- a/packages/cli/test/commands/agentLatest.test.ts
+++ b/packages/cli/test/commands/agentLatest.test.ts
@@ -1,5 +1,7 @@
+import { join } from "node:path";
+
 import { readLatestAgentState, resolveAgentStateDir } from "@allurereport/plugin-agent";
-import { epic, feature, label, story } from "allure-js-commons";
+import { attachment, epic, feature, label, story } from "allure-js-commons";
 import { run } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
@@ -40,11 +42,13 @@ beforeEach(async () => {
 describe("agent latest command", () => {
   it("should print the latest output directory and index path for the resolved project cwd", async () => {
     const consoleModule = await import("node:console");
+    const outputDir = "/tmp/allure-agent-123";
+    const indexPath = join(outputDir, "index.md");
 
     (readLatestAgentState as Mock).mockResolvedValueOnce({
       schema: "allure-agent-latest/v1",
       cwd: "/cwd",
-      outputDir: "/tmp/allure-agent-123",
+      outputDir,
       command: "npm test",
       startedAt: "2026-04-15T18:00:00.000Z",
       status: "finished",
@@ -52,9 +56,14 @@ describe("agent latest command", () => {
 
     await run(AgentLatestCommand, ["agent", "latest"]);
 
+    await attachment(
+      "latest output path contract",
+      JSON.stringify({ outputDir, indexPath }, null, 2),
+      "application/json",
+    );
     expect(readLatestAgentState).toHaveBeenCalledWith("/cwd");
-    expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123");
-    expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(1, `agent output: ${outputDir}`);
+    expect(consoleModule.log).toHaveBeenNthCalledWith(2, `agent index: ${indexPath}`);
   });
 
   it("should exit with code 1 when no latest output exists for the project", async () => {
diff --git a/packages/cli/test/commands/agentSelect.test.ts b/packages/cli/test/commands/agentSelect.test.ts
index 50556a905bf..736e2e21cbb 100644
--- a/packages/cli/test/commands/agentSelect.test.ts
+++ b/packages/cli/test/commands/agentSelect.test.ts
@@ -1,5 +1,7 @@
+import { dirname, resolve } from "node:path";
+
 import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "@allurereport/plugin-agent";
-import { epic, feature, label, story } from "allure-js-commons";
+import { attachment, epic, feature, label, story } from "allure-js-commons";
 import { run, UsageError } from "clipanion";
 import { type Mock, beforeEach, describe, expect, it, vi } from "vitest";
 
@@ -90,6 +92,8 @@ describe("agent select command", () => {
   it("should write the selected test plan and print selection summary when output is provided", async () => {
     const consoleModule = await import("node:console");
     const fsModule = await import("node:fs/promises");
+    const outputPath = resolve("/cwd", "./testplan.json");
+    const outputDir = dirname(outputPath);
 
     (resolveAgentSelectionOutputDir as Mock).mockResolvedValueOnce("/tmp/agent-output");
     (selectAgentTestPlan as Mock).mockResolvedValueOnce({
@@ -113,13 +117,18 @@ describe("agent select command", () => {
       "./testplan.json",
     ]);
 
-    expect(fsModule.mkdir).toHaveBeenCalledWith("/cwd", { recursive: true });
+    await attachment(
+      "selected test plan output path contract",
+      JSON.stringify({ outputPath, outputDir }, null, 2),
+      "application/json",
+    );
+    expect(fsModule.mkdir).toHaveBeenCalledWith(outputDir, { recursive: true });
     expect(fsModule.writeFile).toHaveBeenCalledWith(
-      "/cwd/testplan.json",
+      outputPath,
       `{\n  "version": "1.0",\n  "tests": [\n    {\n      "selector": "suite feature A"\n    },\n    {\n      "selector": "suite feature B"\n    }\n  ]\n}\n`,
       "utf-8",
     );
-    expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent testplan: /cwd/testplan.json");
+    expect(consoleModule.log).toHaveBeenNthCalledWith(1, `agent testplan: ${outputPath}`);
     expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent selection source: /tmp/agent-output");
     expect(consoleModule.log).toHaveBeenNthCalledWith(3, "agent selection preset: failed");
     expect(consoleModule.log).toHaveBeenNthCalledWith(4, "agent selection tests: 2");