From 53edd7153fa4cf96bae33e37f7ba1c82d082735b Mon Sep 17 00:00:00 2001 From: Dmitry Baev Date: Wed, 10 Jun 2026 15:20:34 +0100 Subject: [PATCH 1/5] improve allure agent mode --- AGENTS.md | 16 +- README.md | 9 +- docs/agent_enrichment_loop.md | 181 --- docs/allure-agent-mode.md | 392 ------ docs/allure-test-agent.md | 230 +++ packages/cli/README.md | 17 +- packages/cli/src/commands/agent-run.ts | 216 +++ packages/cli/src/commands/agent.ts | 646 +++++---- packages/cli/src/commands/run.ts | 20 - packages/cli/src/index.ts | 22 +- packages/cli/src/utils/index.ts | 2 - packages/cli/test/commands/agent.test.ts | 443 +++++- .../cli/test/commands/agentLatest.test.ts | 22 +- packages/cli/test/commands/agentQuery.test.ts | 199 +++ .../cli/test/commands/agentSelect.test.ts | 77 +- .../cli/test/commands/run.integration.test.ts | 1039 ++++++++------ packages/cli/test/commands/run.test.ts | 40 - packages/plugin-agent/README.md | 164 +-- packages/plugin-agent/src/capabilities.ts | 178 +++ packages/plugin-agent/src/errors.ts | 21 + packages/plugin-agent/src/guidance.ts | 258 +++- packages/plugin-agent/src/harness.ts | 148 +- packages/plugin-agent/src/index.ts | 17 +- .../plugin-agent/src/inline-expectations.ts | 295 ++++ packages/plugin-agent/src/invalid-output.ts | 259 ++++ packages/plugin-agent/src/model.ts | 49 + packages/plugin-agent/src/paths.ts | 14 + packages/plugin-agent/src/plugin.ts | 1240 ++++++++++++++--- packages/plugin-agent/src/query.ts | 252 ++++ .../src/selection.ts} | 27 +- .../src/state.ts} | 0 .../plugin-agent/test/capabilities.test.ts | 61 + packages/plugin-agent/test/evidence.ts | 24 + packages/plugin-agent/test/guidance.test.ts | 70 + packages/plugin-agent/test/harness.test.ts | 480 ++++++- packages/plugin-agent/test/index.test.ts | 1192 ++++++++++++++-- .../test/inline-expectations.test.ts | 215 +++ .../plugin-agent/test/invalid-output.test.ts | 94 ++ packages/plugin-agent/test/query.test.ts | 322 +++++ .../test/selection.test.ts} | 29 +- packages/plugin-agent/test/skills.test.ts | 137 -- .../test/state.test.ts} | 17 +- .../SKILL.md | 92 -- .../agents/openai.yaml | 7 - .../references/expectations-example.yaml | 17 - skills/allure-agent-mode-setup/SKILL.md | 42 - .../agents/openai.yaml | 7 - .../references/project-guide-template.md | 174 --- .../references/root-agents-snippet.md | 10 - 49 files changed, 7021 insertions(+), 2462 deletions(-) delete mode 100644 docs/agent_enrichment_loop.md delete mode 100644 docs/allure-agent-mode.md create mode 100644 docs/allure-test-agent.md create mode 100644 packages/cli/src/commands/agent-run.ts create mode 100644 packages/cli/test/commands/agentQuery.test.ts create mode 100644 packages/plugin-agent/src/capabilities.ts create mode 100644 packages/plugin-agent/src/errors.ts create mode 100644 packages/plugin-agent/src/inline-expectations.ts create mode 100644 packages/plugin-agent/src/invalid-output.ts create mode 100644 packages/plugin-agent/src/paths.ts create mode 100644 packages/plugin-agent/src/query.ts rename packages/{cli/src/utils/agent-select.ts => plugin-agent/src/selection.ts} (89%) rename packages/{cli/src/utils/agent-state.ts => plugin-agent/src/state.ts} (100%) create mode 100644 packages/plugin-agent/test/capabilities.test.ts create mode 100644 packages/plugin-agent/test/evidence.ts create mode 100644 packages/plugin-agent/test/guidance.test.ts create mode 100644 packages/plugin-agent/test/inline-expectations.test.ts create mode 100644 packages/plugin-agent/test/invalid-output.test.ts create mode 100644 packages/plugin-agent/test/query.test.ts rename packages/{cli/test/utils/agent-select.test.ts => plugin-agent/test/selection.test.ts} (75%) delete mode 100644 packages/plugin-agent/test/skills.test.ts rename packages/{cli/test/utils/agent-state.test.ts => plugin-agent/test/state.test.ts} (88%) delete mode 100644 skills/allure-agent-mode-feature-delivery/SKILL.md delete mode 100644 skills/allure-agent-mode-feature-delivery/agents/openai.yaml delete mode 100644 skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml delete mode 100644 skills/allure-agent-mode-setup/SKILL.md delete mode 100644 skills/allure-agent-mode-setup/agents/openai.yaml delete mode 100644 skills/allure-agent-mode-setup/references/project-guide-template.md delete mode 100644 skills/allure-agent-mode-setup/references/root-agents-snippet.md diff --git a/AGENTS.md b/AGENTS.md index 9d6627171da..bba339fb9e0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -1,14 +1,12 @@ # Project Guide -Use [Allure Agent Mode](docs/allure-agent-mode.md) for all test-related work in this repository. +Use [Allure Test Agent](docs/allure-test-agent.md) for all test-related work in this repository. -- Read `docs/allure-agent-mode.md` before designing, writing, reviewing, validating, debugging, or enriching tests. -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- After changing a package in this repository, treat package builds as part of validation and run the changed package build command before finalizing (for example, `yarn workspace build`), because type-level regressions can break builds. -- **Workspace build order:** dependency edges come from each package's `dependencies` (including `workspace:*` links to other packages). The root `yarn build` script runs `yarn workspaces foreach -Avvpt run build`, where `-t`/`--topological` schedules a workspace only after its regular `dependencies` have built successfully (parallelism `-p` is within those constraints). For a single touched package, `yarn workspace build` is enough; when several packages change or downstream breakage is possible, prefer a full **`yarn build`** at the repo root so Yarn applies the same topological order as CI. -- **Lint, format, and type-aware lint:** before finalizing code changes, align with the `lint` job in `.github/workflows/build.yml`: run **`yarn build`**, then **`yarn lint`**, **`yarn format:check`**, and **`yarn lint:type`**. The root script `yarn verify` runs `format:check`, `lint`, and `lint:type` together but does not run `yarn build`; use it only when you already built and need the same static checks in one command. +- Read `docs/allure-test-agent.md` before designing, writing, reviewing, validating, debugging, or enriching tests. Use the `$allure-test-agent` skill as the durable behavior guide when it is installed; this project file contains local commands and conventions. +- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through the local agent test service when available, or through `allure agent` otherwise. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. +- Use agent-mode execution for smoke checks too, even when the change is small or mechanical. - Only skip agent mode when it is impossible or when you are debugging agent mode itself. -- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review; use the checklist in `docs/allure-agent-mode.md` (**Agent mode failures and unavailable runs**). +- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review. - Use Allure agent-mode when adding tests for features or fixes so expectations, evidence quality, and scope review are part of the loop. -- Keep any non-testing project guidance here short; the detailed Allure workflow belongs in the linked guide. +- Do not present ignored, excluded, swallowed, or non-gating tests as a passing validation signal. +- After changing a package, run the changed package build command before finalizing. For several packages or possible downstream breakage, prefer the root `yarn build` so Yarn applies the same topological order as CI. diff --git a/README.md b/README.md index 41eb2ca64d3..dd47023f37b 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,13 @@ For example: npx allure agent -- npm test ``` -`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps unless you explicitly fall back to the lower-level `ALLURE_AGENT_*` plus `allure run` flow. +`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, accepts compact inline expectations such as `--goal`, `--expect-tests`, `--expect-test`, `--expect-label`, and `--expect-step-containing`, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps for that run. + +Agents and setup tools can inspect the local structured capability contract without scraping help text: + +```bash +npx allure agent capabilities --json +``` ### Generating Reports Manually @@ -122,6 +128,7 @@ The Allure CLI includes several helpful global options. Use `--help` to explore ```bash npx allure run --help +npx allure agent capabilities --json npx allure agent --help npx allure watch --help ``` diff --git a/docs/agent_enrichment_loop.md b/docs/agent_enrichment_loop.md deleted file mode 100644 index 240f396bae0..00000000000 --- a/docs/agent_enrichment_loop.md +++ /dev/null @@ -1,181 +0,0 @@ -# Allure Agent Enrichment Loop - -Canonical downstream guidance now lives in two product-facing places: - -- generated `AGENTS.md` in every agent-mode output directory -- the published `@allurereport/plugin-agent` README -- project `docs/allure-agent-mode.md` when a repository adopts the skills-based setup flow - -This document remains a maintainer companion for developing the plugin and harness -inside this repository. - -## Goal - -The Allure agent plugin is intentionally read-only. It records what happened in the -test run, but it does not mutate tests or invent evidence. - -The enrichment loop sits above that output: - -1. Generate `ALLURE_AGENT_EXPECTATIONS` as a fresh per-run YAML or JSON file. -2. Run tests with `allure agent`, or use the lower-level `ALLURE_AGENT_*` plus `allure run` fallback when you need direct environment control. -3. Review `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -4. Enrich only the targeted tests with real runtime metadata. -5. Rerun the same scope and accept the change only when scope matches and the - resulting evidence is strong enough to review. - -The harness API exported by `@allurereport/plugin-agent` implements the machine -part of this loop: - -- `buildAgentExpectations(...)` creates the JSON payload to write to - `ALLURE_AGENT_EXPECTATIONS`. -- `loadAgentOutput(...)` reads the manifest contract from an agent output directory. -- `planAgentEnrichmentReview(...)` maps existing `check_name` values to concrete - enrichment actions and produces an acceptance decision. -- `reviewAgentOutput(...)` is the convenience wrapper that loads and reviews in one call. - -## Acceptance Policy - -The harness stays advisory for raw execution, but it is strict for enrichment review: - -- reject when scope drifts from expectations -- reject when high-confidence noop-style evidence remains -- iterate when evidence is still too weak -- accept only when scope matches, expectations are present, and no blocking evidence gaps remain - -Current blocking signals: - -- scope drift: - - `missing-expected-test` - - `missing-expected-prefix` - - `missing-expected-environment` - - `unexpected-environment` - - `forbidden-selector-match` - - `unexpected-test` -- evidence still missing: - - `failed-without-useful-steps` - - `failed-without-attachments` - - `nontrivial-run-with-empty-trace` - - `retries-without-new-evidence` - - `passed-without-observable-evidence` - - `metadata-mismatch` - - `history-id-collision` -- anti-dummy: - - `noop-dominated-steps` at or above the configured confidence threshold - -## Remediation Mapping - -The harness reuses the existing `check_name` values instead of inventing a second -diagnosis channel. - -| `check_name` | Action category | Expected remediation | -| --- | --- | --- | -| `failed-without-useful-steps` | `add-meaningful-steps` | Add setup, action, and assertion steps around real behavior | -| `nontrivial-run-with-empty-trace` | `add-meaningful-steps` | Make the execution path observable with real runtime state | -| `passed-without-observable-evidence` | `add-meaningful-steps` | Show what the passing path actually verified | -| `failed-without-attachments` | `add-test-attachments` | Add real payloads, responses, screenshots, DOM snapshots, diffs, or logs | -| `global-only-artifacts` | `add-test-attachments` | Move evidence closer to the relevant test or step | -| `metadata-mismatch` | `repair-test-metadata` | Add only the minimal labels or parameters needed for scope review | -| `retries-without-new-evidence` | `add-retry-diagnostics` | Add per-attempt evidence so retries show what changed | -| `noop-dominated-steps` | `collapse-low-signal-trace` | Remove noop wrappers and replace bulk event spam with compact evidence | -| `step-spam` | `collapse-low-signal-trace` | Reduce event spam and prefer one focused attachment when appropriate | - -## Metadata Baseline - -Keep metadata intentionally small: - -- require a feature or task label when the run is scoped to a feature or task -- add severity only when it matters for review or quality-gate policy -- keep owner, layer, epic, story, and similar taxonomy optional unless the repo already uses them -- do not add labels that are not used by scope checks, review, or downstream policy - -## Runtime Enrichment Examples - -Canonical JS/Vitest patterns already live in: - -- `packages/sandbox/test/bulk.spec.ts` -- `packages/sandbox/test/legacy.spec.ts` - -Use those APIs to add real evidence, not placeholders: - -```ts -import { attachment, label, step } from "allure-js-commons"; -import { expect, it } from "vitest"; - -it("creates an order", async () => { - await label("feature", "orders"); - await label("severity", "critical"); - - const request = await step("prepare order payload", async () => { - const payload = { sku: "book-123", quantity: 1 }; - - await attachment("request.json", JSON.stringify(payload, null, 2), "application/json"); - return payload; - }); - - const response = await step("submit order", async () => { - const result = await createOrder(request); - - await attachment("response.json", JSON.stringify(result, null, 2), "application/json"); - return result; - }); - - await step("assert order was created", () => { - expect(response.status).toBe(201); - expect(response.body.id).toBeDefined(); - }); -}); -``` - -## Anti-Dummy Rules - -Valid enrichment: - -- every step corresponds to a real action, state transition, or check -- every attachment captures real runtime data from that execution -- metadata exists because the review loop uses it - -Rejected enrichment: - -```ts -await step("success", () => {}); -await attachment("result.txt", "test passed", "text/plain"); -await label("feature", "placeholder"); -``` - -Why it is rejected: - -- the step records no real behavior -- the attachment is generic text, not runtime evidence -- the label is meaningless unless it is used by scope or policy - -## Minimal Harness Example - -```ts -import { buildAgentExpectations, reviewAgentOutput } from "@allurereport/plugin-agent"; -import { writeFile } from "node:fs/promises"; - -const expectations = buildAgentExpectations({ - goal: "Validate feature A", - taskId: "feature-a", - target: { - environments: ["default"], - fullNamePrefixes: ["feature A"], - labelValues: { feature: "feature-a" }, - }, - forbidden: { - fullNamePrefixes: ["feature B"], - labelValues: { feature: ["feature-b", "legacy-feature"] }, - }, - notes: ["Only feature A tests should run."], -}); - -await writeFile("./out/agent-expected.json", JSON.stringify(expectations, null, 2)); - -const review = await reviewAgentOutput("./out/agent-report"); - -if (review.status !== "accept") { - for (const item of review.plan) { - console.log(item.checkName, item.category, item.remediationHint); - } -} -``` diff --git a/docs/allure-agent-mode.md b/docs/allure-agent-mode.md deleted file mode 100644 index 548cf53e4cf..00000000000 --- a/docs/allure-agent-mode.md +++ /dev/null @@ -1,392 +0,0 @@ -# Allure Agent Mode - -## Purpose - -Use Allure agent-mode to review what the tests actually did, not just whether the command exited successfully. - -Use it when: - -- adding or updating tests for a feature or bug -- reviewing existing test suites, auditing coverage, or triaging failing suites -- validating that intended tests ran and unrelated scope did not drift in -- improving weak or low-signal runtime evidence -- preparing richer agent-mode reviews, quality gates, and future loop adoption - -## Review Principle - -Runtime first, source second. - -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control. -- If the agent-mode output is missing or incomplete, debug that first and treat any console-only conclusion as provisional. - -## Verification Standard - -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- Only skip agent mode when it is impossible or when you are debugging agent mode itself. -- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly. -- After changing a package in this repository, run that package build command before finalizing (for example, `yarn workspace build`). -- Monorepo build order, full-repo builds, and lint/format/type-aware lint expectations before finalizing live in `AGENTS.md` (keep agent-mode docs focused on runtime evidence loops). - -## Agent mode failures and unavailable runs - -Use this when `allure agent` errors, produces no usable output directory, exits non-zero before manifests exist, or cannot be run in the current environment. - -1. **Keep conclusions honest:** do not upgrade a plain test-runner log to a “full” review outcome. If agent artifacts are missing, any pass/fail or scope claim stays **provisional** until agent mode succeeds for the intended command. -2. **Confirm the invocation:** use the repo’s normal wrapper (here: `yarn allure agent -- …`) so the same subcommand runs with agent-mode instrumentation. Compare argv and cwd with a known-good run. -3. **Locate output:** if you did not pass `--output`, run `allure agent latest` or `allure agent state-dir` and inspect the resolved directory. Prefer a fresh explicit path via `--output` or `ALLURE_AGENT_OUTPUT` when debugging path or permission issues. -4. **Expectations and env:** ensure `ALLURE_AGENT_EXPECTATIONS` points at the file you intended (typos silently change behavior). When isolating bugs, set unique `ALLURE_AGENT_OUTPUT` / expectations paths per run (see [Per-Run Artifacts](#per-run-artifacts)). -5. **Partial artifacts:** if `index.md` or under `manifest/` is missing but the process exited zero, treat the run as **incomplete** and investigate before signing off. If the runner shows failures that never appear in `manifest/tests.jsonl`, check `artifacts/global/stderr.txt` and other logs under `artifacts/global/` (see [When Console Errors Are Not Represented As Test Results](#when-console-errors-are-not-represented-as-test-results)). -6. **CLI or environment blocked:** when agent mode truly cannot run (broken install, policy-blocked sandbox, missing binary), say so explicitly in your summary: what you ran instead, which artifacts are absent, and what to rerun with `allure agent` once unblocked. Do not silently default to “tests passed” narratives from console-only runs. -7. **Escalation:** repeated failures after the steps above are a **tooling** problem—collect command line, exit code, first/last log chunks, and Allure CLI version; fix or report that before relying on any substitute workflow. - -Skipping agent mode remains limited to the cases already stated in this guide (impossible here, or you are debugging agent mode itself). - -## Repository Status - -This repository already has a working Allure 3 setup. - -- Root report configuration lives in `allurerc.mjs`. -- Most package test suites emit results with `allure-vitest/reporter` into `./out/allure-results`. -- The normal feature-delivery path here is to run a targeted workspace test command under `yarn allure agent -- ...`. -- You usually do not need to bootstrap Allure from scratch in this repo; focus on expectations, evidence quality, and scope control. - -## Helpful Commands - -- `allure agent latest` prints the latest agent output directory for the current project cwd. Use it when a prior run omitted `--output` and you want to reopen the most recent agent-mode artifacts. -- `allure agent state-dir` prints the state directory for the current project cwd. Use it when you need to inspect where `latest` pointers are stored or debug sandbox behavior. -- `allure agent select --latest` or `allure agent select --from ` prints the review-targeted test plan from a prior agent run. Add `--preset failed` or exact `--label name=value` / `--environment ` filters when you need a narrower rerun plan. -- `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` reruns only the selected tests through the framework-agnostic Allure testplan flow. The default rerun preset is `review`. - -## Advanced Reruns - -- `--rerun-preset review|failed|unsuccessful|all` changes how the rerun seed set is chosen. Use `review` for the default agent-targeted loop, `failed` for classic failure reruns, `unsuccessful` for any non-passed tests, and `all` when you want the whole previously observed set. -- `--rerun-environment ` narrows the rerun selection to one or more environment ids from the previous agent output. Repeat the flag for multiple environments. -- `--rerun-label name=value` narrows the rerun selection to tests whose prior results carried exact matching labels. Repeat the flag for multiple label filters. -- `ALLURE_AGENT_STATE_DIR` overrides the default project-scoped state directory used by `allure agent latest`, `allure agent state-dir`, and `--rerun-latest`. Use it when you need a deterministic shared location in CI or a constrained sandbox. - -## Core Loops - -### Test Review Loop - -1. Identify the exact review scope. -2. Create a fresh expectations file for this run in a temp directory. -3. Run only that scope with `allure agent`. -4. Read `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -5. Read per-test markdown only for tests that failed, drifted, or have findings. -6. Only after runtime review, inspect source code for root cause or coverage gaps. -7. If evidence is weak or partial, enrich the tests and rerun. -8. When iterating on the same scope, prefer `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` so the rerun stays focused on the review-targeted tests. - -### Feature Delivery Loop - -1. Understand the feature or issue and the intended test scope. -2. Create a fresh expectations file for this run in a temp directory. -3. Write or update the tests. -4. Run the target scope with `allure agent`. -5. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and the relevant per-test markdown files. -6. Fix scope drift, weak evidence, or bad test design. -7. Rerun with a new temp output directory and a new expectations file until the run is acceptable. - -### Metadata Enrichment Loop - -Use this when the run is functionally correct but too weak to review: - -1. Identify missing or low-signal findings in agent output. -2. Add real steps, attachments, or minimal metadata only where they improve review quality. -3. Rerun the same intended scope. -4. Reject the run if noop-style or placeholder evidence remains. - -### Small Test Change Workflow - -Use this when the code change is mostly mechanical, such as typing cleanup, mock refactors, or helper extraction: - -1. Create a fresh expectations file and temp output directory for the touched scope. -2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a small or mechanical change. -3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -4. Only then make a final statement about regression safety or test correctness. - -### Coverage Review Workflow - -Use this for command matrices, package audits, or business-logic coverage reviews: - -1. Split the audit into scoped groups. -2. Give each group its own expectations file and temp output directory. -3. Run each group with `allure agent`. -4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed. -5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit. - -## Per-Run Artifacts - -Each run must use fresh temp paths so parallel runs stay isolated. `allure agent` creates a fresh temp output directory automatically when you omit `--output`, but this guide still uses explicit temp paths when you need deterministic file locations. - -- `ALLURE_AGENT_OUTPUT` should point to a unique temp directory per run. -- `ALLURE_AGENT_EXPECTATIONS` should point to a unique expectations file per run. -- Do not reuse output or expectations paths across parallel runs. - -YAML is the preferred format for expectations files in v1, though JSON also works. - -Example: - -Primary pattern: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" -cat >"$EXPECTATIONS" <<'YAML' -goal: Validate feature A -task_id: feature-a -expected: - environments: - - default - full_name_prefixes: - - feature A - label_values: - feature: feature-a -notes: - - Only feature A tests should run. -YAML - -npx allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- npm test -``` - -Lower-level fallback: - -```bash -ALLURE_AGENT_OUTPUT="$TMP_DIR/agent-output" \ -ALLURE_AGENT_EXPECTATIONS="$EXPECTATIONS" \ -npx allure run -- npm test -``` - -Repository-oriented examples: - -Review an entire package: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" -cat >"$EXPECTATIONS" <<'YAML' -goal: Review CLI package tests -task_id: cli-package-review -expected: - label_values: - module: cli -notes: - - Review runtime evidence before source inspection. -YAML - -yarn allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- yarn workspace allure test -``` - -Compact coverage-review pattern: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" - -yarn allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- yarn workspace test -``` - -Package review expectations example: - -```yaml -goal: Review package tests -task_id: package-review -expected: - label_values: - module: my-module -notes: - - Review runtime evidence before source inspection. -``` - -Review a single spec: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" -cat >"$EXPECTATIONS" <<'YAML' -goal: Review CLI run integration coverage -task_id: cli-run-integration-review -expected: - label_values: - package: test.commands.run.integration.test.ts -notes: - - Review runtime evidence before source inspection. -YAML - -yarn allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- yarn workspace allure test test/commands/run.integration.test.ts -``` - -Single-spec expectations example: - -```yaml -goal: Review one spec -task_id: single-spec-review -expected: - label_values: - package: test.commands.run.integration.test.ts -notes: - - Review runtime evidence before source inspection. -``` - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" -cat >"$EXPECTATIONS" <<'YAML' -goal: Validate plugin-agent behavior -task_id: plugin-agent -expected: - label_values: - package: test.index.test.ts -notes: - - Only plugin-agent tests should run. -YAML - -yarn allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- yarn workspace @allurereport/plugin-agent test -``` - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" -cat >"$EXPECTATIONS" <<'YAML' -goal: Validate CLI run integration coverage -task_id: cli-run-integration -expected: - label_values: - package: test.commands.run.integration.test.ts -YAML - -yarn allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- yarn workspace allure test test/commands/run.integration.test.ts -``` - -## Reviewing Agent Output - -Read in this order: - -1. `index.md` -2. `manifest/run.json` -3. `manifest/tests.jsonl` -4. `manifest/findings.jsonl` -5. the relevant `tests//.md` -6. copied attachments under `.assets/` and process logs under `artifacts/global/` - -Questions to answer: - -- Did only the intended tests run? -- Did the test prove the intended behavior? -- Is the runtime evidence strong enough to understand the result? -- Are there smells like noop steps, step spam, or generic attachments? - -## When Console Errors Are Not Represented As Test Results - -- Suite-load, import, or setup failures may appear only in `artifacts/global/stderr.txt` or global errors. -- If `manifest/tests.jsonl` does not account for all visible failures from the test runner, inspect global stderr before concluding the run is fully modeled. -- Treat that state as a partial runtime review, not as a clean or complete result set. -- If runner-visible failures are present outside logical test files, final conclusions must stay provisional until the missing modeling is understood. - -## Test Design Best Practices - -- Prefer a small setup/action/assertion story over event-by-event noise. -- Write tests that prove the intended behavior precisely and avoid unrelated actions. -- Use helper-boundary instrumentation when several call sites need the same evidence. -- Keep metadata minimal and purposeful. -- Add labels only when they help scope review, debugging, or downstream policy. - -Good helper-boundary example: - -- instrument `runCommand` once instead of wrapping every `runCommand(...)` call site in identical steps - -## Evidence Rules - -### Steps - -Valid steps: - -- real setup actions -- real user or API actions -- real state transitions -- real assertions and checks - -Invalid steps: - -- empty wrapper steps -- steps named only `success`, `done`, or similar generic outcomes -- steps that repeat logs without clarifying behavior - -### Attachments - -Valid attachments: - -- request and response payloads -- logs tied to the failing or verifying point -- screenshots, DOM snapshots, diffs, traces -- compact summaries derived from actual runtime data - -Invalid attachments: - -- static placeholder text like `test passed` -- generic “success” notes with no runtime evidence -- artifacts not tied to the current execution - -## Metadata Rules - -- Add feature or task labels when the run is scoped by feature or task. -- Add severity only when it matters for review or quality-gate policy. -- Keep owner, epic, story, layer, and similar taxonomy optional unless the project already uses them. -- Do not add metadata that no expectation, review step, or policy consumes. - -## Acceptance Rules - -Accept the run only when: - -- scope matches expectations -- evidence is strong enough to explain what happened -- retries include per-attempt diagnostics when needed -- no high-confidence noop or placeholder findings remain - -Iterate again when: - -- expected tests are missing -- unrelated tests or environments appeared -- steps are empty or uninformative -- attachments are missing or low-signal -- metadata drift makes scope review ambiguous - -### Review Completeness - -A test review is not complete unless: - -- the relevant scope was run with agent mode, unless that is impossible -- expectations were created for the intended scope, unless this is a broad package-health audit -- agent artifacts were reviewed before final conclusions -- missing or partial runtime modeling was called out explicitly -- console-only conclusions are treated as provisional when agent output is absent or incomplete -- agent-mode tooling failures were handled using [Agent mode failures and unavailable runs](#agent-mode-failures-and-unavailable-runs) (or agent mode was skipped only per the exceptions above) - -## Future Loops - -These are planned, but not part of the first stable core: - -- flaky detection and fix loop -- known-issue and mute loop -- quality-gate installation and adoption loop - -When these loops are added, they should build on the same evidence rules used here rather than bypassing them. diff --git a/docs/allure-test-agent.md b/docs/allure-test-agent.md new file mode 100644 index 00000000000..c6ec2ef6265 --- /dev/null +++ b/docs/allure-test-agent.md @@ -0,0 +1,230 @@ +# Allure Test Agent + +Use Allure agent mode to design, review, validate, debug, and enrich tests in this project. + +This file is project-specific guidance. Durable test-design, expectation, and evidence rules live in the `allure-test-agent` skill. If the skill is available, use it together with this file. If the skill is unavailable, follow this file as the local fallback and keep conclusions conservative. + +## Review Principle + +Runtime first, source second. + +- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through the local agent test service when available, or through `allure agent` otherwise. +- Use agent-mode execution for smoke checks too, even when the change is small or mechanical. +- Only skip agent mode when it is impossible or when debugging agent mode itself. +- If agent-mode output is missing or incomplete, debug that first and treat console-only conclusions as provisional. + +## Local Capability Snapshot + +Refresh this section when Allure, test runners, CI, or project wrappers change. Confirm local support with the project wrapper, `allure --version`, and `allure agent --help` before using optional commands. + +Do not store the exact Allure version here. Version output is a runtime fact; this file stores the wrapper, last snapshot marker, and how to refresh capabilities. + +- Allure wrapper: `yarn allure` +- Capability snapshot last checked: `2026-06-10` +- Refresh capabilities with: `yarn allure --version`, `yarn allure agent capabilities --json`, and `yarn allure agent --help` +- Agent execution: supported with `yarn allure agent -- ` +- Output option: `--output ` or `-o `; omitted output uses a fresh temporary directory +- Expectation controls: `--goal`, `--task-id`, `--expect-tests`, `--expect-test`, `--expect-prefix`, `--expect-label`, `--expect-env`, `--forbid-label`, `--expect-step-containing`, `--expect-steps`, `--expect-attachments`, `--expect-attachment`, and advanced `--expectations ` +- Latest/state directory recovery: `yarn allure agent latest`; `yarn allure agent state-dir`; `ALLURE_AGENT_STATE_DIR=` override +- Selection/rerun support: `yarn allure agent select --latest|--from ` and `yarn allure agent --rerun-latest|--rerun-from -- ` +- Discovery/configuration commands: unsupported by this local CLI +- Local agent test service: unsupported or unknown; use `yarn allure agent` directly + +## Local Agent Test Service + +Use the local agent test service when the project provides one and the task is query-heavy, stateful, or iterative. Use `allure agent` directly when service mode is unavailable or unnecessary. + +- Service status: unsupported or unknown +- Start or connect command: unknown +- Capability/status endpoint: unknown +- Supported intents: use direct CLI runs, query, select, and rerun commands +- Supported profiles and selectors: direct runner selectors plus agent expectation flags +- Query support: `yarn allure agent query --latest summary|tests|findings|test` or `--from ` +- Realtime and cancellation support: unknown for service mode +- Service logs or diagnostics: unknown +- Fallback when unavailable: `yarn allure agent -- ` + +## Local Test Surfaces + +- Test frameworks and runners: Yarn workspaces; Vitest for most packages; Playwright for `@allurereport/e2e` and `@allurereport/static-server` +- Test roots: package-local tests under `packages/*/test`, Playwright tests in `packages/e2e` and `packages/static-server`, plus package-specific config files +- Allure result paths: most Vitest packages write `./out/allure-results`; `packages/sandbox` writes `./allure-results`; Playwright packages write `./out/allure-results` +- Known selector support: Vitest file/name selectors, Playwright file/project selectors, workspace package selection through `yarn workspace ` +- Known environments or services needed for tests: Playwright browser dependencies for e2e/static-server; CI runs OS matrix environments + +## Allure Integrations + +Document only integrations detected or explicitly configured in this project. + +- Existing Allure adapters/integrations: `allure-vitest`, `allure-playwright`, Allure CLI `run`, `agent`, `generate`, and report plugins +- Runner config files: root `allurerc.mjs`; package `vitest.config.ts`; `packages/e2e/playwright.config.ts`; `packages/static-server/playwright.config.ts` +- Allure results directories: package `out/allure-results`, sandbox `allure-results`, CI dumps `allure-results-.zip` +- Supported integration configuration targets: discovered package runner configs +- Validation command for integration setup: focused package command through `yarn allure agent -- yarn workspace test` +- Known unsupported or skipped integrations: local agent service, discovery/configuration commands +- Integration-specific quirks: many package tests clean `./out`; CI uses `yarn allure run --config=./allurerc.gate.mjs --environment= --dump=allure-results- -- yarn test` + +## Project Test-Design Conventions + +Fill only conventions that exist in this project. Durable test-design rules stay in the `allure-test-agent` skill. + +- Accepted test layers: unit/package tests with Vitest; browser/e2e tests with Playwright; CLI integration tests in `packages/cli` +- Preferred assertion style: framework matchers and focused assertions from existing package tests +- Parameterized test style: use existing Vitest/Playwright conventions in the touched package +- Smoke coverage conventions: use focused package or file-level runs for small changes; root `yarn test` is broad package health +- Mocking and integration-test preference: follow the touched package's existing test style +- Suppression/quarantine policy: unknown; do not present skipped or non-gating tests as proof + +## Run Profiles + +Document only profiles that exist in this project. If a profile is inferred rather than confirmed, mark it as inferred. + +| Profile | Command or service intent | Expected use | Confidence limits | +| --- | --- | --- | --- | +| smoke | `yarn allure agent -- yarn workspace test ` when the runner supports narrowing | Quick signal for a touched package or test file | Does not prove downstream package behavior | +| affected | `yarn allure agent -- yarn workspace test` plus changed package build | Package-level validation after local edits | Mapping may miss indirect workspace impact | +| feature/component | `yarn allure agent --goal --expect-* -- yarn workspace test ` | Focused validation for one behavior or component | Depends on runner selector precision | +| full | `yarn allure agent -- yarn test` | Broad workspace test signal | Cost may be high and process-tree tests may be environment-sensitive | +| e2e | `yarn allure agent -- yarn workspace @allurereport/e2e test` or static-server e2e command | Browser workflow validation | Requires installed Playwright browsers/dependencies | + +## Execution Signal And CI Trust + +Do not present ignored, excluded, swallowed, advisory, or non-gating test execution as proof that behavior is safe. + +- Default local test command: `yarn test` +- Default local command exclusions: root `yarn test` excludes `packages/sandbox` +- CI test jobs: `.github/workflows/build.yml` job `test` runs across OS matrix +- CI gating status: branch protection unknown; workflow test job appears intended as a primary validation signal +- Known ignored, skipped, muted, quarantined, or disabled tests: package-specific and runtime-dependent; inspect run output before claiming proof +- Test artifacts retained by CI: `allure-results-.zip` dumps are uploaded and later used for report generation + +If CI or local execution is non-gating, excludes important tests, or swallows failures, call that out before using the run as proof. + +## Local Expectation Controls + +Before each validation run, decide whether expectations reduce a real risk for the intended conclusion. When they do, use the smallest fresh inline options supported by local `allure agent --help`. + +- Supported expectation mechanism: inline CLI options and advanced YAML/JSON file mode +- Exact test/file/suite/label/profile support: exact logical full name with `--expect-test`; full-name prefix with `--expect-prefix`; label with `--expect-label name=value`; environment with `--expect-env` +- Excluded-scope controls: `--forbid-label name=value` +- Evidence expectation controls: `--expect-step-containing `, `--expect-steps `, `--expect-attachments `, `--expect-attachment ` +- Check/assertion step-name controls: use `--expect-step-containing ` when the project records checks as test-scoped Allure steps +- Broad-audit fallback: run the narrowest practical command, then inspect `manifest/tests.jsonl` and `manifest/findings.jsonl` before claiming scope + +Prefer inline options. Use `--expectations ` only as advanced mode when the contract is too large, generated, or policy-controlled. + +When expectations are justified, they should state only the parts that matter for this run: + +- what claim or validation depth the run is meant to support +- what should run +- what should not run +- which profile, environment, variant, or parameter set is intended +- what important checks or evidence should be visible through supported reporting or documented step-name conventions +- why this scope is enough +- what the run cannot prove + +If local expectation support is unavailable or weak, run the narrowest practical command, review observed scope from manifests, and state that expectation checking was limited. + +Treat the run goal as a claim boundary for review, not as proof. If the goal is wrong or stale, keep the runtime evidence and report what the observed run actually supports. + +## Core Loops + +### Test Review Loop + +1. Identify the exact review scope and validation depth. +2. Create the smallest meaningful expectations using local supported controls when they protect the review conclusion. +3. Run only that scope through the local agent test service or `allure agent`. +4. Print the run's `index.md` path. +5. Review `index.md`, `manifest/run.json`, `manifest/test-events.jsonl`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and relevant per-test markdown. +6. Inspect source code only after runtime evidence explains what executed. +7. Call out weak scope, weak evidence, execution-signal limits, or partial runtime modeling. + +### Test Authoring Loop + +1. Understand the feature, issue, expected behavior, and risk. +2. Read the `allure-test-agent` skill's test-design guidance when available. +3. Create the smallest meaningful expectations for the intended scope when they reduce a real validation risk. +4. Write or update focused tests without weakening useful coverage. +5. Run the intended scope through agent mode. +6. Review scope, checks, evidence, and execution signal before claiming validation. +7. Enrich tests when evidence is weak, then rerun with fresh temp output. + +### Evidence And Metadata Enrichment Loop + +Use this when tests pass but are hard to review: + +1. Identify weak evidence, missing checks, missing setup state, missing artifacts, or noisy metadata. +2. Prefer framework integrations and helper-boundary instrumentation over wrapping every line. +3. Add useful steps, attachments, parameters, descriptions, labels, or links using project conventions. +4. Redact sensitive values while preserving useful artifact shape. +5. Rerun the same intended scope and report evidence changes. + +### Coverage Review Loop + +1. Split broad audits into scoped groups when practical. +2. Give each group a unique temp output directory and use expectations only when the group has a known scope or supports a validation conclusion. +3. Run each group through agent mode. +4. Separate observed runtime coverage from inferred source-code coverage. +5. Mark review incomplete until every scoped group was validated through matched expectations, reviewed observed scope, or documented as a broad package-health audit. + +## Runtime Artifact Review + +After each agent-mode run: + +- print the run's `index.md` path +- read `manifest/run.json` +- read `manifest/test-events.jsonl` +- read `manifest/tests.jsonl` +- read `manifest/findings.jsonl` +- read relevant per-test markdown before inspecting source +- inspect global stderr/log artifacts when runner-visible failures are not represented as logical tests + +## Output, State, And Reruns + +Do not create persistent output or expectation paths. Use unique temp paths for every run. + +- Agent output policy: use omitted output for fresh temp output or an explicit unique temp dir; do not reuse output directories across runs +- Latest output recovery: `yarn allure agent latest` +- State directory override: `ALLURE_AGENT_STATE_DIR=` +- Rerun from latest/prior output: `yarn allure agent --rerun-latest -- ` or `yarn allure agent --rerun-from -- ` +- Selection/test plan support: `yarn allure agent select --latest` or `--from ` with `--preset review|failed|unsuccessful|all` +- Parallel-run rule: output paths and expectation state must not be shared +- CI artifact retention: CI uploads Allure result dumps, not agent output directories unless a job is changed to do so + +## Project Metadata Conventions + +Fill only conventions that exist in this project. + +- Feature/story/component/service labels: existing tests commonly use Allure `epic`, `feature`, `story`, and `label`; package configs often set `module=` +- Owner/team metadata: unknown +- Severity or priority metadata: use only when already present or meaningful for review/policy +- Issue, bug, requirement, or known-defect links: unknown +- Suite/package/module taxonomy: package-level `module` labels from Vitest config are common +- Parameter naming and dynamic-history exclusions: follow existing package examples +- Metadata to avoid: decorative labels or unused taxonomy that does not help selection, triage, or review + +## Project Evidence Conventions + +Fill only conventions that exist in this project. + +- Test descriptions: follow existing package style +- Attachments: command output, manifests, text/JSON artifacts, screenshots/traces where relevant +- Step naming: use specific action/check names rather than generic wrappers +- Check/assertion step naming: use meaningful text that can be matched with `--expect-step-containing` when review requires visible checks +- Assertion/check visibility: prefer real test-scoped steps and useful attachments around behavior, not placeholder evidence +- Fixture/setup evidence: include only when it explains the behavior or failure +- Sensitive data redaction: redact secrets and tokens while preserving useful artifact shape + +## Acceptance Rules + +Accept a run only when: + +- observed scope matches the intended scope, or drift is explained +- coverage remains meaningful for the stated conclusion +- important checks are visible through supported reporting, documented step-name conventions, or source review covers the gap +- evidence is strong enough to explain what happened +- execution-signal limits are explicit +- no high-confidence placeholder or noop evidence findings remain +- partial runtime modeling is called out + +Console-only conclusions are provisional when agent output is absent or incomplete. diff --git a/packages/cli/README.md b/packages/cli/README.md index d9938daa6a8..952b9227b6b 100644 --- a/packages/cli/README.md +++ b/packages/cli/README.md @@ -74,7 +74,21 @@ For example: npx allure agent -- npm test ``` -`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, can load an expectations file with `--expectations`, and ignores configured presentation or export plugins such as Awesome or TestOps unless you explicitly fall back to the lower-level `ALLURE_AGENT_*` plus `allure run` flow. +`allure agent` runs with an agent-only profile by default. It creates a fresh output directory automatically, accepts compact inline expectations such as `--goal`, `--expect-tests`, `--expect-test`, `--expect-label`, and `--expect-step-containing`, and can still load an expectations file with `--expectations` when needed. Configured presentation or export plugins such as Awesome or TestOps are ignored for that run. + +Agents and setup tools can inspect the local structured capability contract without scraping help text: + +```bash +npx allure agent capabilities --json +``` + +After a run, agents can query the output directory without manually reading every manifest: + +```bash +npx allure agent query --latest summary +npx allure agent query --latest tests --status failed +npx allure agent query --from ./agent-output findings --severity high +``` ### Generating Reports Manually @@ -118,6 +132,7 @@ The Allure CLI includes several helpful global options. Use `--help` to explore ```bash npx allure run --help +npx allure agent capabilities --json npx allure agent --help npx allure watch --help ``` diff --git a/packages/cli/src/commands/agent-run.ts b/packages/cli/src/commands/agent-run.ts new file mode 100644 index 00000000000..ff8a19dfdb1 --- /dev/null +++ b/packages/cli/src/commands/agent-run.ts @@ -0,0 +1,216 @@ +import * as console from "node:console"; +import { mkdtemp, realpath, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join, resolve } from "node:path"; +import process, { exit } from "node:process"; + +import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core"; +import { + createAgentTestPlanContext, + AgentUsageError, + formatAgentOutputLinks, + isPathInside, + normalizeAgentRerunPreset, + parseAgentLabelFilters, + resolveAgentStateDir, + writeLatestAgentState, + type AgentExpectationsInput, +} from "@allurereport/plugin-agent"; + +import { normalizeCommandEnvironmentOptions, resolveCommandEnvironment } from "../utils/environment.js"; +import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js"; +import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js"; + +export const formatAgentCommand = (args: string[]) => args.join(" "); + +export const printAgentOutputLinks = (outputDir: string) => { + for (const line of formatAgentOutputLinks(outputDir)) { + console.log(line); + } +}; + +export const persistLatestAgentState = async (value: Parameters[0]) => { + try { + await writeLatestAgentState(value); + } catch (error) { + console.error( + `Could not update latest agent output in ${resolveAgentStateDir(value.cwd)}: ${(error as Error).message}`, + ); + } +}; + +export type ExecuteAgentModeParams = { + configPath?: string; + cwd?: string; + output?: string; + expectations?: string; + inlineExpectations?: AgentExpectationsInput; + environment?: string; + environmentName?: string; + silent?: boolean; + rerunFrom?: string; + rerunLatest?: boolean; + rerunPreset?: string; + rerunEnvironments?: string[]; + rerunLabels?: string[]; + args: string[]; +}; + +export const executeAgentMode = async (params: ExecuteAgentModeParams) => { + const { + configPath, + cwd: configuredCwd, + output, + expectations, + inlineExpectations, + environment, + environmentName, + silent, + rerunFrom, + rerunLatest, + rerunPreset, + rerunEnvironments, + rerunLabels, + args, + } = params; + const command = args[0]; + const commandArgs = args.slice(1); + const cwd = await realpath(configuredCwd ?? process.cwd()); + const commandString = formatAgentCommand(args); + const hasRerunSource = !!rerunFrom || !!rerunLatest; + const hasRerunFilters = !!rerunPreset || !!rerunEnvironments?.length || !!rerunLabels?.length; + + if (!hasRerunSource && hasRerunFilters) { + throw new AgentUsageError("Use rerun filters only together with --rerun-from or --rerun-latest"); + } + + const rerunContext = await createAgentTestPlanContext({ + cwd, + from: rerunFrom, + latest: rerunLatest, + preset: normalizeAgentRerunPreset(rerunPreset), + environments: rerunEnvironments?.length ? rerunEnvironments : undefined, + labelFilters: parseAgentLabelFilters(rerunLabels), + }); + const childEnvironmentVariables = { + ...createChildAllureCliEnvironment("agent"), + ...(rerunContext ? { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } : {}), + }; + + try { + if (getActiveAllureCliCommand()) { + console.log(commandString); + + const exitCode = await executeNestedAllureCommand({ + command, + commandArgs, + cwd, + ...(rerunContext ? { environmentVariables: { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } } : {}), + silent, + }); + + exit(exitCode ?? -1); + return; + } + + const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-")); + const expectationsPath = expectations ? resolve(cwd, expectations) : undefined; + const environmentOptions = { + environment, + environmentName, + }; + + normalizeCommandEnvironmentOptions(environmentOptions); + + if (expectationsPath && isPathInside(outputDir, expectationsPath)) { + throw new AgentUsageError( + `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`, + ); + } + + const config = await readConfig(cwd, configPath, { + output: outputDir, + plugins: { + agent: { + options: { + outputDir, + command: commandString, + ...(expectationsPath ? { expectationsPath } : {}), + ...(inlineExpectations ? { expectations: inlineExpectations } : {}), + }, + }, + }, + }); + const resolvedEnvironment = resolveCommandEnvironment(config, environmentOptions); + + try { + await rm(outputDir, { recursive: true }); + } catch (error) { + if (!isFileNotFoundError(error)) { + console.error("could not clean output directory", error); + } + } + + const startedAt = new Date().toISOString(); + + await persistLatestAgentState({ + cwd, + outputDir, + expectationsPath, + command: commandString, + startedAt, + status: "running", + }); + + printAgentOutputLinks(outputDir); + if (expectationsPath) { + console.log(`agent expectations: ${expectationsPath}`); + } else if (inlineExpectations) { + console.log("agent expectations: CLI options"); + } + console.log(commandString); + + const allureReport = new AllureReport({ + ...config, + output: outputDir, + environment: resolvedEnvironment?.id, + open: false, + port: undefined, + qualityGate: undefined, + allureService: undefined, + realTime: false, + plugins: config.plugins, + }); + const knownIssues = await allureReport.store.allKnownIssues(); + + const { globalExitCode } = await executeAllureRun({ + allureReport, + knownIssues, + cwd, + command, + commandArgs, + environmentVariables: childEnvironmentVariables, + environment: resolvedEnvironment?.id, + withQualityGate: false, + logs: "pipe", + silent, + ignoreLogs: false, + logProcessExit: false, + }); + + await persistLatestAgentState({ + cwd, + outputDir, + expectationsPath, + command: commandString, + startedAt, + finishedAt: new Date().toISOString(), + status: "finished", + exitCode: globalExitCode.actual ?? globalExitCode.original, + }); + + exit(globalExitCode.actual ?? globalExitCode.original); + } finally { + await rerunContext?.cleanup(); + } +}; diff --git a/packages/cli/src/commands/agent.ts b/packages/cli/src/commands/agent.ts index 8f63d6c05fe..b5b85d57106 100644 --- a/packages/cli/src/commands/agent.ts +++ b/packages/cli/src/commands/agent.ts @@ -1,61 +1,54 @@ import * as console from "node:console"; -import { mkdir, mkdtemp, realpath, rm, writeFile } from "node:fs/promises"; +import { mkdir, mkdtemp, realpath, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; -import { dirname, join, relative, resolve } from "node:path"; +import { dirname, join, resolve } from "node:path"; import process, { exit } from "node:process"; -import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core"; -import { Command, Option, UsageError } from "clipanion"; - import { - createAgentTestPlanContext, + AGENT_FINDING_CATEGORIES, + AGENT_FINDING_SEVERITIES, + AGENT_TASK_MAP_HELP, + AGENT_TEST_STATUSES, + AgentExpectationUsageError, + buildAgentInlineExpectations, + buildAgentQueryPayload, + createAgentCapabilities, + formatAgentOutputLinks, + isAgentExpectationUsageError, + isAgentTaskMapHelpRequest, + isAgentUsageError, + loadAgentOutput, + normalizeAgentQueryLimit, + normalizeAgentQueryView, normalizeAgentRerunPreset, + normalizeRepeatedEnumValues, + normalizeRepeatedStringValues, parseAgentLabelFilters, + readLatestAgentState, resolveAgentSelectionOutputDir, + resolveAgentStateDir, selectAgentTestPlan, -} from "../utils/agent-select.js"; -import { readLatestAgentState, resolveAgentStateDir, writeLatestAgentState } from "../utils/agent-state.js"; -import { - environmentNameOption, - environmentOption, - normalizeCommandEnvironmentOptions, - resolveCommandEnvironment, -} from "../utils/environment.js"; -import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js"; -import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js"; - -const withProcessEnv = async (overrides: Record, fn: () => Promise): Promise => { - const previousValues = new Map(); - - for (const [key, value] of Object.entries(overrides)) { - previousValues.set(key, process.env[key]); - - if (value === undefined) { - delete process.env[key]; - continue; - } + validateAgentExpectationsFile, + writeLatestAgentState, + writeInvalidAgentExpectationOutput, + type AgentExpectationsInput, +} from "@allurereport/plugin-agent"; +import { Command, Option, UsageError } from "clipanion"; - process.env[key] = value; - } +export { AGENT_TASK_MAP_HELP, createAgentCapabilities, isAgentTaskMapHelpRequest }; - try { - return await fn(); - } finally { - for (const [key, value] of previousValues) { - if (value === undefined) { - delete process.env[key]; - continue; - } +const readOptionalString = (value: unknown): string | undefined => (typeof value === "string" ? value : undefined); - process.env[key] = value; - } - } -}; +const readOptionalBoolean = (value: unknown): boolean => value === true; + +const readOptionalStringArray = (value: unknown): string[] | undefined => (Array.isArray(value) ? value : undefined); -const isPathInside = (parentPath: string, candidatePath: string) => { - const rel = relative(parentPath, candidatePath); +const formatAgentCommand = (args: string[]) => args.join(" "); - return rel === "" || (!rel.startsWith("..") && rel !== "." && !rel.startsWith("../")); +const printAgentOutputLinks = (outputDir: string) => { + for (const line of formatAgentOutputLinks(outputDir)) { + console.log(line); + } }; const persistLatestAgentState = async (value: Parameters[0]) => { @@ -68,11 +61,47 @@ const persistLatestAgentState = async (value: Parameters (typeof value === "string" ? value : undefined); +const agentEnvironmentOption = () => + Option.String("--environment,--env", { + description: + "Force specific environment ID to all tests in the run. Given environment has higher priority than the one defined in the config file (default: empty string)", + }); -const readOptionalBoolean = (value: unknown): boolean => value === true; +const agentEnvironmentNameOption = () => + Option.String("--environment-name", { + description: + "Force specific environment display name to all tests in the run. Has lower priority than --environment and higher than the config value (default: empty string)", + }); -const readOptionalStringArray = (value: unknown): string[] | undefined => (Array.isArray(value) ? value : undefined); +const throwCliUsageError = (error: unknown): never => { + if (isAgentUsageError(error)) { + throw new UsageError((error as Error).message); + } + + throw error; +}; + +export class AgentCapabilitiesCommand extends Command { + static paths = [["agent", "capabilities"]]; + + static usage = Command.Usage({ + description: "Print structured Allure agent capability information", + details: + "This command prints the locally supported agent-mode commands, expectation controls, output files, rerun support, and known unsupported capability families as JSON.", + examples: [ + ["agent capabilities", "Print agent capabilities as JSON"], + ["agent capabilities --json", "Print agent capabilities as JSON explicitly"], + ], + }); + + json = Option.Boolean("--json", true, { + description: "Print capabilities as JSON (default: true)", + }); + + async execute() { + console.log(JSON.stringify(createAgentCapabilities(), null, 2)); + } +} export class AgentCommand extends Command { static paths = [["agent"]]; @@ -105,9 +134,58 @@ export class AgentCommand extends Command { description: "The path to a YAML or JSON expectations file", }); - environment = environmentOption(); + goal = Option.Array("--goal", { + description: "The review goal to record in inline agent expectations", + }); + + taskId = Option.Array("--task-id", { + description: "The task or feature id to record in inline agent expectations", + }); + + expectTests = Option.Array("--expect-tests", { + description: "The expected number of visible logical tests in the intended scope", + }); + + expectLabels = Option.Array("--expect-label", { + description: "Expected label selector in name=value form. Repeat the option for multiple selectors", + }); + + expectEnvironments = Option.Array("--expect-env", { + description: "Expected environment id. Repeat the option for multiple environments", + }); + + expectFullNames = Option.Array("--expect-test", { + description: "Expected full test name. Repeat the option for multiple tests", + }); + + expectPrefixes = Option.Array("--expect-prefix", { + description: "Expected full-name prefix. Repeat the option for multiple prefixes", + }); + + forbidLabels = Option.Array("--forbid-label", { + description: "Forbidden label selector in name=value form. Repeat the option for multiple selectors", + }); + + expectStepContains = Option.Array("--expect-step-containing", { + description: "Require a test-scoped step name containing this text per evidence-target logical test", + }); - environmentName = environmentNameOption(); + expectSteps = Option.Array("--expect-steps", { + description: "Require at least this many meaningful steps per expected logical test", + }); + + expectAttachments = Option.Array("--expect-attachments", { + description: "Require at least this many non-missing attachments per expected logical test", + }); + + expectAttachmentFilters = Option.Array("--expect-attachment", { + description: + "Require a matching non-missing attachment per expected logical test. Use a file name or name=value/content-type=value", + }); + + environment = agentEnvironmentOption(); + + environmentName = agentEnvironmentNameOption(); silent = Option.Boolean("--silent", { description: "Don't pipe the process output logs to console (default: false)", @@ -137,22 +215,91 @@ export class AgentCommand extends Command { async execute() { const args = this.commandToRun.filter((arg) => arg !== "--") as string[] | undefined; + const configPath = readOptionalString(this.config); + const configuredCwd = readOptionalString(this.cwd); + const output = readOptionalString(this.output); + const expectations = readOptionalString(this.expectations); + + if (!args || !args.length) { + throw new UsageError("expecting command to be specified after --, e.g. allure agent -- npm run test"); + } + + try { + const inlineExpectations = buildAgentInlineExpectations({ + goal: this.goal, + taskId: this.taskId, + expectTests: this.expectTests, + expectLabels: readOptionalStringArray(this.expectLabels), + expectEnvironments: readOptionalStringArray(this.expectEnvironments), + expectFullNames: readOptionalStringArray(this.expectFullNames), + expectPrefixes: readOptionalStringArray(this.expectPrefixes), + forbidLabels: readOptionalStringArray(this.forbidLabels), + expectStepContains: readOptionalStringArray(this.expectStepContains), + expectSteps: this.expectSteps, + expectAttachments: this.expectAttachments, + expectAttachmentFilters: readOptionalStringArray(this.expectAttachmentFilters), + }); + + if (expectations && inlineExpectations) { + throw new AgentExpectationUsageError( + "Use either --expectations or inline expectation flags, not both", + "--expectations", + ); + } + + await validateAgentExpectationsFile({ + cwd: await realpath(configuredCwd ?? process.cwd()), + output, + expectations, + }); + + const { executeAgentMode } = await import("./agent-run.js"); + + await executeAgentMode({ + configPath, + cwd: configuredCwd, + output, + expectations, + inlineExpectations: inlineExpectations as AgentExpectationsInput | undefined, + environment: readOptionalString(this.environment), + environmentName: readOptionalString(this.environmentName), + silent: readOptionalBoolean(this.silent), + rerunFrom: readOptionalString(this.rerunFrom), + rerunLatest: readOptionalBoolean(this.rerunLatest), + rerunPreset: readOptionalString(this.rerunPreset), + rerunEnvironments: readOptionalStringArray(this.rerunEnvironments), + rerunLabels: readOptionalStringArray(this.rerunLabels), + args, + }); + } catch (error) { + if (!isAgentExpectationUsageError(error)) { + throwCliUsageError(error); + } - await executeAgentMode({ - configPath: readOptionalString(this.config), - cwd: readOptionalString(this.cwd), - output: readOptionalString(this.output), - expectations: readOptionalString(this.expectations), - environment: readOptionalString(this.environment), - environmentName: readOptionalString(this.environmentName), - silent: readOptionalBoolean(this.silent), - rerunFrom: readOptionalString(this.rerunFrom), - rerunLatest: readOptionalBoolean(this.rerunLatest), - rerunPreset: readOptionalString(this.rerunPreset), - rerunEnvironments: readOptionalStringArray(this.rerunEnvironments), - rerunLabels: readOptionalStringArray(this.rerunLabels), - args, - }); + const expectationError = error as AgentExpectationUsageError; + const cwd = await realpath(configuredCwd ?? process.cwd()); + const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-")); + const commandString = formatAgentCommand(args); + const { generatedAt } = await writeInvalidAgentExpectationOutput({ + outputDir, + command: commandString, + error: expectationError, + }); + + await persistLatestAgentState({ + cwd, + outputDir, + command: commandString, + startedAt: generatedAt, + finishedAt: generatedAt, + status: "finished", + exitCode: 1, + }); + + printAgentOutputLinks(outputDir); + console.error(expectationError.message); + exit(1); + } } } @@ -160,11 +307,15 @@ export class AgentLatestCommand extends Command { static paths = [["agent", "latest"]]; static usage = Command.Usage({ - description: "Print the latest Allure agent output directory for the current project", - details: "This command prints the latest agent output directory recorded for the resolved project cwd.", + description: "Print the latest Allure agent output directory and index path for the current project", + details: + "This command prints the latest agent output directory and index.md path recorded for the resolved project cwd.", examples: [ - ["agent latest", "Print the latest agent output directory for the current project"], - ["agent latest --cwd ./packages/cli", "Print the latest agent output directory for a specific project cwd"], + ["agent latest", "Print the latest agent output directory and index path for the current project"], + [ + "agent latest --cwd ./packages/cli", + "Print the latest agent output directory and index path for a specific project cwd", + ], ], }); @@ -190,7 +341,7 @@ export class AgentLatestCommand extends Command { return; } - console.log(latestState.outputDir); + printAgentOutputLinks(latestState.outputDir); } } @@ -218,269 +369,202 @@ export class AgentStateDirCommand extends Command { } } -export class AgentSelectCommand extends Command { - static paths = [["agent", "select"]]; +export class AgentQueryCommand extends Command { + static paths = [["agent", "query"]]; static usage = Command.Usage({ - description: "Select tests from an existing agent output and emit a test plan", + description: "Query an existing Allure agent output directory as focused JSON", details: - "This command resolves a set of tests from a prior agent run and prints or writes a testplan.json payload.", + "This command reads a prior agent output directory and prints focused JSON for a run summary, test list, findings list, or one test. Use --latest to query the latest recorded output for the project, or --from to query a specific output directory.", examples: [ - ["agent select --from ./out/agent-output", "Print a test plan for the default review-targeted tests"], - ["agent select --latest --preset failed", "Print a test plan for failed tests from the latest project run"], - ["agent select --from ./out/agent-output --output ./testplan.json", "Write the selected test plan to a file"], + ["agent query --latest summary", "Print a summary for the latest agent output"], + ["agent query --from ./out/agent-output tests --status failed", "List failed tests from a prior output"], + [ + "agent query --from ./out/agent-output findings --severity high", + "List high-severity findings from a prior output", + ], + [ + 'agent query --latest test --test "suite should pass" --include-markdown', + "Print one test summary with its per-test markdown", + ], ], }); + view = Option.String({ + required: false, + name: "Query view: summary, tests, findings, or test (default: summary)", + }); + cwd = Option.String("--cwd", { description: "The project directory used to resolve --latest and relative paths (default: current working directory)", }); from = Option.String("--from", { - description: "The prior agent output directory to select tests from", + description: "The prior agent output directory to query", }); latest = Option.Boolean("--latest", { description: "Use the latest recorded agent output for the current project cwd", }); - preset = Option.String("--preset", { - description: "The selection preset: review, failed, unsuccessful, or all (default: review)", + statuses = Option.Array("--status", { + description: "Filter tests by status: failed, broken, unknown, skipped, or passed. Repeat for multiple statuses", }); environments = Option.Array("--environment", { - description: "Filter selected tests by environment id. Repeat the option for multiple environments", + description: "Filter tests by environment id. Repeat the option for multiple environments", }); labels = Option.Array("--label", { - description: "Filter selected tests by exact label name=value. Repeat the option for multiple filters", + description: "Filter tests by exact label name=value. Repeat the option for multiple filters", }); - output = Option.String("--output,-o", { - description: "Write the resulting test plan to this file instead of printing it to stdout", + severities = Option.Array("--severity", { + description: "Filter findings by severity: high, warning, or info. Repeat for multiple severities", }); - async execute() { - const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd()); - const environments = readOptionalStringArray(this.environments); - const labels = readOptionalStringArray(this.labels); - const outputDir = await resolveAgentSelectionOutputDir({ - cwd, - from: readOptionalString(this.from), - latest: readOptionalBoolean(this.latest), - }); - const selection = await selectAgentTestPlan({ - outputDir, - preset: normalizeAgentRerunPreset(readOptionalString(this.preset)), - environments: environments?.length ? environments : undefined, - labelFilters: parseAgentLabelFilters(labels), - }); - - if (!selection.testPlan.tests.length) { - console.error(`No tests matched selection in ${selection.outputDir}`); - exit(1); - return; - } + categories = Option.Array("--category", { + description: "Filter findings by category. Repeat the option for multiple categories", + }); - const serialized = `${JSON.stringify(selection.testPlan, null, 2)}\n`; + checks = Option.Array("--check", { + description: "Filter findings by check name. Repeat the option for multiple checks", + }); - const output = readOptionalString(this.output); + test = Option.String("--test", { + description: "Filter to one test by full name, test result id, history id, or markdown path", + }); - if (!output) { - console.log(serialized.trimEnd()); - return; - } + limit = Option.String("--limit", { + description: "Limit returned tests or findings to this non-negative count", + }); - const outputPath = resolve(cwd, output); + includeMarkdown = Option.Boolean("--include-markdown", { + description: "Include the per-test markdown content for the test view", + }); - await mkdir(dirname(outputPath), { recursive: true }); - await writeFile(outputPath, serialized, "utf-8"); - console.log(outputPath); - } -} + async execute() { + try { + const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd()); + const view = normalizeAgentQueryView(readOptionalString(this.view)); + const outputDir = await resolveAgentSelectionOutputDir({ + cwd, + from: readOptionalString(this.from), + latest: readOptionalBoolean(this.latest), + }); + const output = await loadAgentOutput(outputDir); + const payload = await buildAgentQueryPayload(output, view, { + environments: normalizeRepeatedStringValues(readOptionalStringArray(this.environments)), + labelFilters: parseAgentLabelFilters(readOptionalStringArray(this.labels)), + statuses: normalizeRepeatedEnumValues(readOptionalStringArray(this.statuses), AGENT_TEST_STATUSES, "--status"), + severities: normalizeRepeatedEnumValues( + readOptionalStringArray(this.severities), + AGENT_FINDING_SEVERITIES, + "--severity", + ), + categories: normalizeRepeatedEnumValues( + readOptionalStringArray(this.categories), + AGENT_FINDING_CATEGORIES, + "--category", + ), + checks: normalizeRepeatedStringValues(readOptionalStringArray(this.checks)), + test: readOptionalString(this.test), + limit: normalizeAgentQueryLimit(readOptionalString(this.limit)), + includeMarkdown: readOptionalBoolean(this.includeMarkdown), + }); -export const executeAgentMode = async (params: { - configPath?: string; - cwd?: string; - output?: string; - expectations?: string; - environment?: string; - environmentName?: string; - silent?: boolean; - rerunFrom?: string; - rerunLatest?: boolean; - rerunPreset?: string; - rerunEnvironments?: string[]; - rerunLabels?: string[]; - args?: string[]; -}) => { - const { - configPath, - cwd: configuredCwd, - output, - expectations, - environment, - environmentName, - silent, - rerunFrom, - rerunLatest, - rerunPreset, - rerunEnvironments, - rerunLabels, - args, - } = params; - - if (!args || !args.length) { - throw new UsageError("expecting command to be specified after --, e.g. allure agent -- npm run test"); + console.log(JSON.stringify(payload, null, 2)); + } catch (error) { + throwCliUsageError(error); + } } +} - const command = args[0]; - const commandArgs = args.slice(1); - const cwd = await realpath(configuredCwd ?? process.cwd()); - const commandString = `${command} ${commandArgs.join(" ")}`; - const hasRerunSource = !!rerunFrom || !!rerunLatest; - const hasRerunFilters = !!rerunPreset || !!rerunEnvironments?.length || !!rerunLabels?.length; - - if (!hasRerunSource && hasRerunFilters) { - throw new UsageError("Use rerun filters only together with --rerun-from or --rerun-latest"); - } +export class AgentSelectCommand extends Command { + static paths = [["agent", "select"]]; - const rerunContext = await createAgentTestPlanContext({ - cwd, - from: rerunFrom, - latest: rerunLatest, - preset: normalizeAgentRerunPreset(rerunPreset), - environments: rerunEnvironments?.length ? rerunEnvironments : undefined, - labelFilters: parseAgentLabelFilters(rerunLabels), + static usage = Command.Usage({ + description: "Select tests from an existing agent output and emit a test plan", + details: + "This command resolves a set of tests from a prior agent run and prints or writes a testplan.json payload. When --output is used, stdout contains the written test plan path, source output directory, preset, and selected test count.", + examples: [ + ["agent select --from ./out/agent-output", "Print a test plan for the default review-targeted tests"], + ["agent select --latest --preset failed", "Print a test plan for failed tests from the latest project run"], + ["agent select --from ./out/agent-output --output ./testplan.json", "Write the selected test plan to a file"], + ], }); - const childEnvironmentVariables = { - ...createChildAllureCliEnvironment("agent"), - ...(rerunContext ? { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } : {}), - }; - try { - if (getActiveAllureCliCommand()) { - console.log(commandString); + cwd = Option.String("--cwd", { + description: + "The project directory used to resolve --latest and relative paths (default: current working directory)", + }); - const exitCode = await executeNestedAllureCommand({ - command, - commandArgs, - cwd, - ...(rerunContext ? { environmentVariables: { ALLURE_TESTPLAN_PATH: rerunContext.testPlanPath } } : {}), - silent, - }); + from = Option.String("--from", { + description: "The prior agent output directory to select tests from", + }); - exit(exitCode ?? -1); - return; - } + latest = Option.Boolean("--latest", { + description: "Use the latest recorded agent output for the current project cwd", + }); - const outputDir = output ? resolve(cwd, output) : await mkdtemp(join(tmpdir(), "allure-agent-")); - const expectationsPath = expectations ? resolve(cwd, expectations) : undefined; - const environmentOptions = { - environment, - environmentName, - }; + preset = Option.String("--preset", { + description: "The selection preset: review, failed, unsuccessful, or all (default: review)", + }); - normalizeCommandEnvironmentOptions(environmentOptions); + environments = Option.Array("--environment", { + description: "Filter selected tests by environment id. Repeat the option for multiple environments", + }); - if (expectationsPath && isPathInside(outputDir, expectationsPath)) { - throw new UsageError( - `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`, - ); - } + labels = Option.Array("--label", { + description: "Filter selected tests by exact label name=value. Repeat the option for multiple filters", + }); - const config = await readConfig(cwd, configPath, { - output: outputDir, - plugins: { - agent: { - options: { - outputDir, - }, - }, - }, - }); - const resolvedEnvironment = resolveCommandEnvironment(config, environmentOptions); + output = Option.String("--output,-o", { + description: "Write the resulting test plan to this file instead of printing it to stdout", + }); + async execute() { try { - await rm(outputDir, { recursive: true }); - } catch (error) { - if (!isFileNotFoundError(error)) { - console.error("could not clean output directory", error); + const cwd = await realpath(readOptionalString(this.cwd) ?? process.cwd()); + const environments = readOptionalStringArray(this.environments); + const labels = readOptionalStringArray(this.labels); + const outputDir = await resolveAgentSelectionOutputDir({ + cwd, + from: readOptionalString(this.from), + latest: readOptionalBoolean(this.latest), + }); + const selection = await selectAgentTestPlan({ + outputDir, + preset: normalizeAgentRerunPreset(readOptionalString(this.preset)), + environments: environments?.length ? environments : undefined, + labelFilters: parseAgentLabelFilters(labels), + }); + + if (!selection.testPlan.tests.length) { + console.error(`No tests matched selection in ${selection.outputDir}`); + exit(1); + return; } - } - const startedAt = new Date().toISOString(); + const serialized = `${JSON.stringify(selection.testPlan, null, 2)}\n`; + const output = readOptionalString(this.output); - await persistLatestAgentState({ - cwd, - outputDir, - expectationsPath, - command: commandString, - startedAt, - status: "running", - }); + if (!output) { + console.log(serialized.trimEnd()); + return; + } - console.log(`agent output: ${outputDir}`); - if (expectationsPath) { - console.log(`agent expectations: ${expectationsPath}`); - } - console.log(commandString); - - const allureReport = new AllureReport({ - ...config, - output: outputDir, - environment: resolvedEnvironment?.id, - open: false, - port: undefined, - qualityGate: undefined, - allureService: undefined, - realTime: false, - plugins: config.plugins, - }); - const knownIssues = await allureReport.store.allKnownIssues(); - - const { globalExitCode } = await withProcessEnv( - { - ALLURE_AGENT_OUTPUT: outputDir, - ALLURE_AGENT_EXPECTATIONS: expectationsPath, - ALLURE_AGENT_COMMAND: commandString, - ALLURE_AGENT_PROJECT_ROOT: cwd, - ALLURE_AGENT_NAME: undefined, - ALLURE_AGENT_LOOP_ID: undefined, - ALLURE_AGENT_TASK_ID: undefined, - ALLURE_AGENT_CONVERSATION_ID: undefined, - }, - async () => - await executeAllureRun({ - allureReport, - knownIssues, - cwd, - command, - commandArgs, - environmentVariables: childEnvironmentVariables, - environment: resolvedEnvironment?.id, - withQualityGate: false, - logs: "pipe", - silent, - ignoreLogs: false, - logProcessExit: false, - }), - ); + const outputPath = resolve(cwd, output); - await persistLatestAgentState({ - cwd, - outputDir, - expectationsPath, - command: commandString, - startedAt, - finishedAt: new Date().toISOString(), - status: "finished", - exitCode: globalExitCode.actual ?? globalExitCode.original, - }); - - exit(globalExitCode.actual ?? globalExitCode.original); - } finally { - await rerunContext?.cleanup(); + await mkdir(dirname(outputPath), { recursive: true }); + await writeFile(outputPath, serialized, "utf-8"); + console.log(`agent testplan: ${outputPath}`); + console.log(`agent selection source: ${selection.outputDir}`); + console.log(`agent selection preset: ${selection.preset}`); + console.log(`agent selection tests: ${selection.selectedTests.length}`); + } catch (error) { + throwCliUsageError(error); + } } -}; +} diff --git a/packages/cli/src/commands/run.ts b/packages/cli/src/commands/run.ts index 3aff1f40389..571ff73e646 100644 --- a/packages/cli/src/commands/run.ts +++ b/packages/cli/src/commands/run.ts @@ -1,6 +1,5 @@ import * as console from "node:console"; import { realpath, rm } from "node:fs/promises"; -import { resolve } from "node:path"; import process, { exit } from "node:process"; import { AllureReport, isFileNotFoundError, readConfig } from "@allurereport/core"; @@ -16,7 +15,6 @@ import { resolveCommandEnvironment, } from "../utils/environment.js"; import { createChildAllureCliEnvironment, getActiveAllureCliCommand } from "../utils/execution-context.js"; -import { executeAgentMode } from "./agent.js"; import { executeAllureRun, executeNestedAllureCommand } from "./commons/run.js"; export class RunCommand extends Command { @@ -105,24 +103,6 @@ export class RunCommand extends Command { throw new UsageError("expecting command to be specified after --, e.g. allure run -- npm run test"); } - const legacyAgentOutput = process.env.ALLURE_AGENT_OUTPUT; - - if (legacyAgentOutput) { - await executeAgentMode({ - configPath: this.config, - cwd: this.cwd, - output: resolve(process.cwd(), legacyAgentOutput), - expectations: process.env.ALLURE_AGENT_EXPECTATIONS - ? resolve(process.cwd(), process.env.ALLURE_AGENT_EXPECTATIONS) - : undefined, - environment: this.environment, - environmentName: this.environmentName, - silent: this.silent, - args, - }); - return; - } - const before = new Date().getTime(); process.on("exit", (exitCode) => { diff --git a/packages/cli/src/index.ts b/packages/cli/src/index.ts index 031b3ec44f0..bf0cc3ef344 100644 --- a/packages/cli/src/index.ts +++ b/packages/cli/src/index.ts @@ -1,11 +1,14 @@ import { readFileSync } from "node:fs"; -import { argv } from "node:process"; +import process, { argv } from "node:process"; import { Builtins, Cli } from "clipanion"; import { AgentCommand, + AGENT_TASK_MAP_HELP, + AgentCapabilitiesCommand, AgentLatestCommand, + AgentQueryCommand, AgentSelectCommand, AgentStateDirCommand, Allure2Command, @@ -27,6 +30,7 @@ import { SlackCommand, TestPlanCommand, WatchCommand, + isAgentTaskMapHelpRequest, } from "./commands/index.js"; const [node, app, ...args] = argv; @@ -43,7 +47,9 @@ const cli = new Cli({ cli.register(AwesomeCommand); cli.register(Allure2Command); +cli.register(AgentCapabilitiesCommand); cli.register(AgentLatestCommand); +cli.register(AgentQueryCommand); cli.register(AgentSelectCommand); cli.register(AgentStateDirCommand); cli.register(AgentCommand); @@ -66,7 +72,19 @@ cli.register(ResultsPackCommand); cli.register(ResultsUnpackCommand); cli.register(Builtins.HelpCommand); cli.register(Builtins.VersionCommand); -cli.runExit(args); +void cli + .run(args) + .then((exitCode) => { + if (exitCode === 0 && isAgentTaskMapHelpRequest(args)) { + process.stdout.write(`\n${AGENT_TASK_MAP_HELP}`); + } + + process.exitCode = exitCode; + }) + .catch((error: unknown) => { + console.error(error); + process.exitCode = 1; + }); export { type Config as AllureConfig, defineConfig } from "@allurereport/plugin-api"; export { defaultChartsConfig } from "@allurereport/charts-api"; diff --git a/packages/cli/src/utils/index.ts b/packages/cli/src/utils/index.ts index a13db4c6f55..1513f39e825 100644 --- a/packages/cli/src/utils/index.ts +++ b/packages/cli/src/utils/index.ts @@ -2,6 +2,4 @@ export * from "./process.js"; export * from "./terminal.js"; export * from "./logs.js"; export * from "./execution-context.js"; -export * from "./agent-state.js"; -export * from "./agent-select.js"; export * from "./fileSystem.js"; diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts index 9c32fb8bbe6..6de69f72c5c 100644 --- a/packages/cli/test/commands/agent.test.ts +++ b/packages/cli/test/commands/agent.test.ts @@ -1,14 +1,29 @@ import { resolve } from "node:path"; import { readConfig } from "@allurereport/core"; +import { + AgentExpectationUsageError, + AgentUsageError, + buildAgentInlineExpectations, + createAgentTestPlanContext, + validateAgentExpectationsFile, + writeInvalidAgentExpectationOutput, + writeLatestAgentState, +} from "@allurereport/plugin-agent"; import { epic, feature, label, story } from "allure-js-commons"; import { run, UsageError } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; -import { AgentCommand } from "../../src/commands/agent.js"; +import { + AgentCommand, + AgentCapabilitiesCommand, + AgentLatestCommand, + AgentQueryCommand, + AgentSelectCommand, + AgentStateDirCommand, + createAgentCapabilities, +} from "../../src/commands/agent.js"; import { executeAllureRun, executeNestedAllureCommand } from "../../src/commands/commons/run.js"; -import { createAgentTestPlanContext } from "../../src/utils/agent-select.js"; -import { writeLatestAgentState } from "../../src/utils/agent-state.js"; import { ALLURE_CLI_ACTIVE_COMMAND_ENV } from "../../src/utils/execution-context.js"; const { exitMock } = vi.hoisted(() => { @@ -29,8 +44,11 @@ vi.mock("node:process", async (importOriginal) => ({ vi.mock("node:fs/promises", async (importOriginal) => ({ ...(await importOriginal()), realpath: vi.fn().mockResolvedValue("/cwd"), + readFile: vi.fn().mockResolvedValue("goal: valid file expectations\n"), mkdtemp: vi.fn().mockResolvedValue("/tmp/allure-agent-123"), rm: vi.fn().mockResolvedValue(undefined), + mkdir: vi.fn().mockResolvedValue(undefined), + writeFile: vi.fn().mockResolvedValue(undefined), })); vi.mock("@allurereport/core", async () => { const { AllureReportMock } = await import("../utils.js"); @@ -51,27 +69,42 @@ vi.mock("../../src/commands/commons/run.js", () => ({ }), executeNestedAllureCommand: vi.fn().mockResolvedValue(0), })); -vi.mock("../../src/utils/agent-state.js", () => ({ - resolveAgentStateDir: vi.fn().mockReturnValue("/tmp/allure-agent-state-0f0810f05e3f7d8f"), - writeLatestAgentState: vi.fn().mockResolvedValue(undefined), - readLatestAgentState: vi.fn().mockResolvedValue(undefined), -})); -vi.mock("../../src/utils/agent-select.js", () => ({ - normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"), - parseAgentLabelFilters: vi.fn((values?: string[]) => - (values ?? []).map((value) => { - const [name, filterValue] = value.split("="); - - return { - name, - value: filterValue, - }; +vi.mock("@allurereport/plugin-agent", async (importOriginal) => { + const actual = await importOriginal(); + + return { + ...actual, + resolveAgentStateDir: vi.fn().mockReturnValue("/tmp/allure-agent-state-0f0810f05e3f7d8f"), + writeLatestAgentState: vi.fn().mockResolvedValue(undefined), + readLatestAgentState: vi.fn().mockResolvedValue(undefined), + normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"), + parseAgentLabelFilters: vi.fn((values?: string[]) => + (values ?? []).map((value) => { + const [name, filterValue] = value.split("="); + + return { + name, + value: filterValue, + }; + }), + ), + resolveAgentSelectionOutputDir: vi.fn(), + selectAgentTestPlan: vi.fn(), + createAgentTestPlanContext: vi.fn().mockResolvedValue(undefined), + buildAgentInlineExpectations: vi.fn((options: Record) => + Object.values(options).some((value) => + Array.isArray(value) ? value.length > 0 : typeof value === "string" && value.length > 0, + ) + ? { goal: "mock inline expectations" } + : undefined, + ), + validateAgentExpectationsFile: vi.fn().mockResolvedValue(undefined), + writeInvalidAgentExpectationOutput: vi.fn().mockResolvedValue({ + outputDir: "/tmp/allure-agent-123", + generatedAt: "2026-06-10T16:00:00.000Z", }), - ), - resolveAgentSelectionOutputDir: vi.fn(), - selectAgentTestPlan: vi.fn(), - createAgentTestPlanContext: vi.fn().mockResolvedValue(undefined), -})); + }; +}); beforeEach(async () => { await epic("coverage"); @@ -83,6 +116,37 @@ beforeEach(async () => { const { AllureReportMock } = await import("../utils.js"); + (executeAllureRun as Mock).mockReset(); + (executeNestedAllureCommand as Mock).mockReset(); + (writeLatestAgentState as Mock).mockReset(); + (createAgentTestPlanContext as Mock).mockReset(); + (buildAgentInlineExpectations as Mock).mockReset(); + (validateAgentExpectationsFile as Mock).mockReset(); + (writeInvalidAgentExpectationOutput as Mock).mockReset(); + (readConfig as Mock).mockReset(); + + (executeAllureRun as Mock).mockResolvedValue({ + globalExitCode: { + original: 0, + actual: undefined, + }, + testProcessResult: null, + }); + (executeNestedAllureCommand as Mock).mockResolvedValue(0); + (writeLatestAgentState as Mock).mockResolvedValue(undefined); + (createAgentTestPlanContext as Mock).mockResolvedValue(undefined); + (buildAgentInlineExpectations as Mock).mockImplementation((options: Record) => + Object.values(options).some((value) => + Array.isArray(value) ? value.length > 0 : typeof value === "string" && value.length > 0, + ) + ? { goal: "mock inline expectations" } + : undefined, + ); + (validateAgentExpectationsFile as Mock).mockResolvedValue(undefined); + (writeInvalidAgentExpectationOutput as Mock).mockResolvedValue({ + outputDir: "/tmp/allure-agent-123", + generatedAt: "2026-06-10T16:00:00.000Z", + }); AllureReportMock.prototype.store = { allKnownIssues: vi.fn().mockResolvedValue([]), }; @@ -109,6 +173,123 @@ beforeEach(async () => { }); describe("agent command", () => { + const stripAnsi = (value: string) => value.replace(new RegExp(`${String.fromCharCode(27)}\\[[0-9;]*m`, "g"), ""); + + const captureAgentHelp = async (args: string[]) => { + const stdout = { write: vi.fn() }; + + const exitCode = await run( + { binaryName: "allure" }, + [ + AgentCapabilitiesCommand, + AgentLatestCommand, + AgentQueryCommand, + AgentSelectCommand, + AgentStateDirCommand, + AgentCommand, + ], + args, + { + stdout: stdout as unknown as NodeJS.WritableStream, + }, + ); + + expect(exitCode).toBe(0); + + return stripAnsi(stdout.write.mock.calls.map(([chunk]) => String(chunk)).join("")); + }; + + it.each([ + { + command: "agent", + args: ["agent", "--help"], + expected: [ + "Multiple commands match your selection:", + "allure agent capabilities", + "allure agent latest", + "allure agent query", + "allure agent select", + "allure agent state-dir", + "allure agent [--config", + "--expect-tests #0", + "--expect-label #0", + "--expect-test #0", + "--expect-step-containing #0", + "--rerun-latest", + "Run again with -h=", + ], + }, + { + command: "agent capabilities", + args: ["agent", "capabilities", "--help"], + expected: ["Print structured Allure agent capability information", "$ allure agent capabilities", "--json"], + }, + { + command: "agent query", + args: ["agent", "query", "--help"], + expected: [ + "Query an existing Allure agent output directory as focused JSON", + "$ allure agent query", + "--latest", + "--from #0", + "--status #0", + "--severity #0", + "--include-markdown", + ], + }, + { + command: "agent select", + args: ["agent", "select", "--help"], + expected: [ + "Select tests from an existing agent output and emit a test plan", + "$ allure agent select", + "--latest", + "--preset #0", + "--environment #0", + "--label #0", + "--output,-o #0", + ], + }, + { + command: "agent latest", + args: ["agent", "latest", "--help"], + expected: [ + "Print the latest Allure agent output directory and index path for the current project", + "$ allure agent latest", + "--cwd #0", + ], + }, + { + command: "agent state-dir", + args: ["agent", "state-dir", "--help"], + expected: [ + "Print the Allure agent state directory for the current project", + "$ allure agent state-dir", + "--cwd #0", + ], + }, + ])("should expose $command help for local capability detection", async ({ args, expected }) => { + const output = await captureAgentHelp(args); + + expected.forEach((line) => { + expect(output).toContain(line); + }); + }); + + it("should print structured agent capabilities as JSON", async () => { + const consoleModule = await import("node:console"); + const logMock = consoleModule.log as Mock; + + const exitCode = await run(AgentCapabilitiesCommand, ["agent", "capabilities", "--json"]); + + expect(exitCode).toBe(0); + expect(logMock).toHaveBeenCalledTimes(1); + + const payload = JSON.parse(logMock.mock.calls[0][0]) as ReturnType; + + expect(payload).toEqual(createAgentCapabilities()); + }); + it("should fail with usage error when command to run is missing", async () => { const command = new AgentCommand(); @@ -117,12 +298,13 @@ describe("agent command", () => { await expect(command.execute()).rejects.toBeInstanceOf(UsageError); }); - it("should reject expectations files placed inside the output directory", async () => { + it("should translate plugin-agent expectation file validation failures to usage errors", async () => { const command = new AgentCommand(); command.output = "./custom-output"; command.expectations = "./custom-output/expected.yaml"; command.commandToRun = ["--", "npm", "test"]; + (validateAgentExpectationsFile as Mock).mockRejectedValueOnce(new AgentUsageError("invalid expectation path")); await expect(command.execute()).rejects.toBeInstanceOf(UsageError); @@ -143,6 +325,7 @@ describe("agent command", () => { agent: { options: { outputDir: "/tmp/allure-agent-123", + command: "npm test", }, }, }, @@ -176,7 +359,8 @@ describe("agent command", () => { }), ); expect(logMock).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123"); - expect(logMock).toHaveBeenNthCalledWith(2, "npm test"); + expect(logMock).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md"); + expect(logMock).toHaveBeenNthCalledWith(3, "npm test"); expect(logMock.mock.invocationCallOrder[0]).toBeLessThan((executeAllureRun as Mock).mock.invocationCallOrder[0]); expect(writeLatestAgentState).toHaveBeenNthCalledWith( 1, @@ -246,14 +430,170 @@ describe("agent command", () => { agent: { options: { outputDir: resolvedOutput, + command: "npm test", + expectationsPath: resolvedExpectations, }, }, }, }); expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${resolvedOutput}`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${resolvedOutput}/index.md`); expect(consoleModule.log).toHaveBeenCalledWith(`agent expectations: ${resolvedExpectations}`); }); + it("should pass inline expectation options to plugin-agent and readConfig", async () => { + const consoleModule = await import("node:console"); + + await run(AgentCommand, [ + "agent", + "--goal", + "Review agent visibility", + "--task-id", + "agent-inline", + "--expect-tests", + "2", + "--expect-label", + "module=plugin-agent", + "--expect-env", + "node", + "--expect-test", + "suite should pass", + "--expect-prefix", + "suite", + "--forbid-label", + "layer=e2e", + "--expect-step-containing", + "assert expected behavior", + "--expect-steps", + "1", + "--expect-attachments", + "1", + "--expect-attachment", + "trace.zip", + "--expect-attachment", + "content-type=application/json", + "--", + "npm", + "test", + ]); + + expect(buildAgentInlineExpectations).toHaveBeenCalledWith({ + goal: ["Review agent visibility"], + taskId: ["agent-inline"], + expectTests: ["2"], + expectLabels: ["module=plugin-agent"], + expectEnvironments: ["node"], + expectFullNames: ["suite should pass"], + expectPrefixes: ["suite"], + forbidLabels: ["layer=e2e"], + expectStepContains: ["assert expected behavior"], + expectSteps: ["1"], + expectAttachments: ["1"], + expectAttachmentFilters: ["trace.zip", "content-type=application/json"], + }); + expect(readConfig).toHaveBeenCalledWith( + "/cwd", + undefined, + expect.objectContaining({ + plugins: { + agent: { + options: expect.objectContaining({ + expectations: { goal: "mock inline expectations" }, + }), + }, + }, + }), + ); + expect(consoleModule.log).toHaveBeenCalledWith("agent expectations: CLI options"); + expect(exitMock).toHaveBeenCalledWith(0); + }); + + it("should reject mixing an expectations file with inline expectation flags", async () => { + const consoleModule = await import("node:console"); + const command = new AgentCommand(); + + command.expectations = "./expected.yaml"; + command.goal = ["Review"]; + command.commandToRun = ["--", "npm", "test"]; + + await command.execute(); + + expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({ + outputDir: "/tmp/allure-agent-123", + command: "npm test", + error: expect.any(AgentExpectationUsageError), + }); + expect(consoleModule.error).toHaveBeenCalledWith("Use either --expectations or inline expectation flags, not both"); + expect(executeAllureRun).not.toHaveBeenCalled(); + expect(exitMock).toHaveBeenCalledWith(1); + }); + + it("should write invalid agent output when plugin-agent inline expectation parsing fails", async () => { + const consoleModule = await import("node:console"); + const command = new AgentCommand(); + const outputDir = resolve("/cwd", "./agent-invalid"); + const error = new AgentExpectationUsageError( + 'Invalid --expect-label "module". Expected the form name=value, for example module=cli', + "--expect-label", + ); + + (buildAgentInlineExpectations as Mock).mockImplementationOnce(() => { + throw error; + }); + + command.output = "./agent-invalid"; + command.expectLabels = ["module"]; + command.commandToRun = ["--", "npm", "test"]; + + await command.execute(); + + expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({ + outputDir, + command: "npm test", + error, + }); + expect(readConfig).not.toHaveBeenCalled(); + expect(executeAllureRun).not.toHaveBeenCalled(); + expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`); + expect(consoleModule.error).toHaveBeenCalledWith( + 'Invalid --expect-label "module". Expected the form name=value, for example module=cli', + ); + expect(exitMock).toHaveBeenCalledWith(1); + }); + + it("should write invalid agent output when plugin-agent expectation file validation fails", async () => { + const consoleModule = await import("node:console"); + const command = new AgentCommand(); + const outputDir = resolve("/cwd", "./agent-invalid-file"); + const error = new AgentExpectationUsageError( + "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object", + "--expectations", + ); + + (validateAgentExpectationsFile as Mock).mockRejectedValueOnce(error); + + command.output = "./agent-invalid-file"; + command.expectations = "./expected.yaml"; + command.commandToRun = ["--", "npm", "test"]; + + await command.execute(); + + expect(writeInvalidAgentExpectationOutput).toHaveBeenCalledWith({ + outputDir, + command: "npm test", + error, + }); + expect(readConfig).not.toHaveBeenCalled(); + expect(executeAllureRun).not.toHaveBeenCalled(); + expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`); + expect(consoleModule.error).toHaveBeenCalledWith( + "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object", + ); + expect(exitMock).toHaveBeenCalledWith(1); + }); + it("should pass ALLURE_TESTPLAN_PATH to the child process when rerun-from is enabled", async () => { const cleanupMock = vi.fn().mockResolvedValue(undefined); @@ -338,36 +678,10 @@ describe("agent command", () => { delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV]; }); - it("should sandbox ALLURE_AGENT_* variables during execution and restore them afterwards", async () => { + it("should pass agent metadata to the plugin through options", async () => { const resolvedOutput = resolve("/cwd", "./custom-output"); const resolvedExpectations = resolve("/cwd", "./expected.yaml"); - process.env.ALLURE_AGENT_OUTPUT = "ambient-output"; - process.env.ALLURE_AGENT_EXPECTATIONS = "ambient-expected"; - process.env.ALLURE_AGENT_NAME = "ambient-name"; - process.env.ALLURE_AGENT_LOOP_ID = "ambient-loop"; - process.env.ALLURE_AGENT_TASK_ID = "ambient-task"; - process.env.ALLURE_AGENT_CONVERSATION_ID = "ambient-conversation"; - - (executeAllureRun as Mock).mockImplementationOnce(async () => { - expect(process.env.ALLURE_AGENT_OUTPUT).toBe(resolvedOutput); - expect(process.env.ALLURE_AGENT_EXPECTATIONS).toBe(resolvedExpectations); - expect(process.env.ALLURE_AGENT_COMMAND).toBe("npm test"); - expect(process.env.ALLURE_AGENT_PROJECT_ROOT).toBe("/cwd"); - expect(process.env.ALLURE_AGENT_NAME).toBeUndefined(); - expect(process.env.ALLURE_AGENT_LOOP_ID).toBeUndefined(); - expect(process.env.ALLURE_AGENT_TASK_ID).toBeUndefined(); - expect(process.env.ALLURE_AGENT_CONVERSATION_ID).toBeUndefined(); - - return { - globalExitCode: { - original: 0, - actual: undefined, - }, - testProcessResult: null, - }; - }); - await run(AgentCommand, [ "agent", "--output", @@ -379,18 +693,17 @@ describe("agent command", () => { "test", ]); - expect(process.env.ALLURE_AGENT_OUTPUT).toBe("ambient-output"); - expect(process.env.ALLURE_AGENT_EXPECTATIONS).toBe("ambient-expected"); - expect(process.env.ALLURE_AGENT_NAME).toBe("ambient-name"); - expect(process.env.ALLURE_AGENT_LOOP_ID).toBe("ambient-loop"); - expect(process.env.ALLURE_AGENT_TASK_ID).toBe("ambient-task"); - expect(process.env.ALLURE_AGENT_CONVERSATION_ID).toBe("ambient-conversation"); - - delete process.env.ALLURE_AGENT_OUTPUT; - delete process.env.ALLURE_AGENT_EXPECTATIONS; - delete process.env.ALLURE_AGENT_NAME; - delete process.env.ALLURE_AGENT_LOOP_ID; - delete process.env.ALLURE_AGENT_TASK_ID; - delete process.env.ALLURE_AGENT_CONVERSATION_ID; + expect(readConfig).toHaveBeenCalledWith("/cwd", undefined, { + output: resolvedOutput, + plugins: { + agent: { + options: { + outputDir: resolvedOutput, + command: "npm test", + expectationsPath: resolvedExpectations, + }, + }, + }, + }); }); }); diff --git a/packages/cli/test/commands/agentLatest.test.ts b/packages/cli/test/commands/agentLatest.test.ts index d0145e2f8fb..dff18eac62a 100644 --- a/packages/cli/test/commands/agentLatest.test.ts +++ b/packages/cli/test/commands/agentLatest.test.ts @@ -1,9 +1,9 @@ +import { readLatestAgentState, resolveAgentStateDir } from "@allurereport/plugin-agent"; import { epic, feature, label, story } from "allure-js-commons"; import { run } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; import { AgentLatestCommand, AgentStateDirCommand } from "../../src/commands/agent.js"; -import { readLatestAgentState, resolveAgentStateDir } from "../../src/utils/agent-state.js"; vi.mock("node:console", async (importOriginal) => ({ ...(await importOriginal()), @@ -18,11 +18,16 @@ vi.mock("node:fs/promises", async (importOriginal) => ({ ...(await importOriginal()), realpath: vi.fn().mockResolvedValue("/cwd"), })); -vi.mock("../../src/utils/agent-state.js", () => ({ - readLatestAgentState: vi.fn(), - resolveAgentStateDir: vi.fn(), - writeLatestAgentState: vi.fn(), -})); +vi.mock("@allurereport/plugin-agent", async (importOriginal) => { + const actual = await importOriginal(); + + return { + ...actual, + readLatestAgentState: vi.fn(), + resolveAgentStateDir: vi.fn(), + writeLatestAgentState: vi.fn(), + }; +}); beforeEach(async () => { await epic("coverage"); @@ -33,7 +38,7 @@ beforeEach(async () => { }); describe("agent latest command", () => { - it("should print the latest output directory for the resolved project cwd", async () => { + it("should print the latest output directory and index path for the resolved project cwd", async () => { const consoleModule = await import("node:console"); (readLatestAgentState as Mock).mockResolvedValueOnce({ @@ -48,7 +53,8 @@ describe("agent latest command", () => { await run(AgentLatestCommand, ["agent", "latest"]); expect(readLatestAgentState).toHaveBeenCalledWith("/cwd"); - expect(consoleModule.log).toHaveBeenCalledWith("/tmp/allure-agent-123"); + expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123"); + expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md"); }); it("should exit with code 1 when no latest output exists for the project", async () => { diff --git a/packages/cli/test/commands/agentQuery.test.ts b/packages/cli/test/commands/agentQuery.test.ts new file mode 100644 index 00000000000..f6a53254090 --- /dev/null +++ b/packages/cli/test/commands/agentQuery.test.ts @@ -0,0 +1,199 @@ +import { + AgentUsageError, + buildAgentQueryPayload, + loadAgentOutput, + resolveAgentSelectionOutputDir, + type AgentOutputBundle, +} from "@allurereport/plugin-agent"; +import { epic, feature, label, story } from "allure-js-commons"; +import { run } from "clipanion"; +import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; + +import { AgentQueryCommand } from "../../src/commands/agent.js"; + +vi.mock("node:console", async (importOriginal) => ({ + ...(await importOriginal()), + log: vi.fn(), + error: vi.fn(), +})); +vi.mock("node:fs/promises", async (importOriginal) => ({ + ...(await importOriginal()), + realpath: vi.fn().mockResolvedValue("/cwd"), +})); +vi.mock("@allurereport/plugin-agent", async (importOriginal) => { + const actual = await importOriginal(); + + return { + ...actual, + buildAgentQueryPayload: vi.fn(), + loadAgentOutput: vi.fn(), + resolveAgentSelectionOutputDir: vi.fn(), + }; +}); + +const agentOutput = { + outputDir: "/tmp/agent-output", +} as AgentOutputBundle; + +const readLoggedJson = async () => { + const consoleModule = await import("node:console"); + const logMock = consoleModule.log as Mock; + + expect(logMock).toHaveBeenCalledTimes(1); + + return JSON.parse(logMock.mock.calls[0][0]) as T; +}; + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("agentQuery"); + await label("coverage", "agent-mode"); + vi.clearAllMocks(); + (resolveAgentSelectionOutputDir as Mock).mockResolvedValue("/tmp/agent-output"); + (loadAgentOutput as Mock).mockResolvedValue(agentOutput); + (buildAgentQueryPayload as Mock).mockResolvedValue({ + schema: "allure-agent-query/v1", + view: "summary", + output_dir: "/tmp/agent-output", + }); +}); + +describe("agent query command", () => { + it("should resolve the latest output and print the plugin-agent summary payload", async () => { + await run(AgentQueryCommand, ["agent", "query", "--latest", "summary"]); + + expect(resolveAgentSelectionOutputDir).toHaveBeenCalledWith({ + cwd: "/cwd", + from: undefined, + latest: true, + }); + expect(loadAgentOutput).toHaveBeenCalledWith("/tmp/agent-output"); + expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "summary", { + environments: undefined, + labelFilters: [], + statuses: undefined, + severities: undefined, + categories: undefined, + checks: undefined, + test: undefined, + limit: undefined, + includeMarkdown: false, + }); + + await expect(readLoggedJson()).resolves.toEqual({ + schema: "allure-agent-query/v1", + view: "summary", + output_dir: "/tmp/agent-output", + }); + }); + + it("should pass test query filters to plugin-agent", async () => { + (buildAgentQueryPayload as Mock).mockResolvedValueOnce({ + schema: "allure-agent-query/v1", + view: "tests", + output_dir: "/tmp/agent-output", + tests: [], + }); + + await run(AgentQueryCommand, [ + "agent", + "query", + "tests", + "--from", + "./agent-output", + "--status", + "failed", + "--label", + "module=cli", + "--limit", + "1", + ]); + + expect(resolveAgentSelectionOutputDir).toHaveBeenCalledWith({ + cwd: "/cwd", + from: "./agent-output", + latest: false, + }); + expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "tests", { + environments: undefined, + labelFilters: [{ name: "module", value: "cli" }], + statuses: ["failed"], + severities: undefined, + categories: undefined, + checks: undefined, + test: undefined, + limit: 1, + includeMarkdown: false, + }); + + await expect(readLoggedJson()).resolves.toEqual( + expect.objectContaining({ + view: "tests", + }), + ); + }); + + it("should pass finding query filters to plugin-agent", async () => { + await run(AgentQueryCommand, [ + "agent", + "query", + "findings", + "--from", + "./agent-output", + "--severity", + "high", + "--category", + "scope", + "--check", + "expected-label-missing", + "--test", + "suite should fail", + ]); + + expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "findings", { + environments: undefined, + labelFilters: [], + statuses: undefined, + severities: ["high"], + categories: ["scope"], + checks: ["expected-label-missing"], + test: "suite should fail", + limit: undefined, + includeMarkdown: false, + }); + }); + + it("should pass one-test markdown requests to plugin-agent", async () => { + await run(AgentQueryCommand, [ + "agent", + "query", + "test", + "--from", + "./agent-output", + "--test", + "suite should fail", + "--include-markdown", + ]); + + expect(buildAgentQueryPayload).toHaveBeenCalledWith(agentOutput, "test", { + environments: undefined, + labelFilters: [], + statuses: undefined, + severities: undefined, + categories: undefined, + checks: undefined, + test: "suite should fail", + limit: undefined, + includeMarkdown: true, + }); + }); + + it("should translate plugin-agent query usage errors to CLI failures", async () => { + (buildAgentQueryPayload as Mock).mockRejectedValueOnce(new AgentUsageError("No tests matched query")); + + const exitCode = await run(AgentQueryCommand, ["agent", "query", "test", "--from", "./agent-output"]); + + expect(exitCode).toBe(1); + }); +}); diff --git a/packages/cli/test/commands/agentSelect.test.ts b/packages/cli/test/commands/agentSelect.test.ts index 53f6604ec5d..50556a905bf 100644 --- a/packages/cli/test/commands/agentSelect.test.ts +++ b/packages/cli/test/commands/agentSelect.test.ts @@ -1,9 +1,9 @@ +import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "@allurereport/plugin-agent"; import { epic, feature, label, story } from "allure-js-commons"; import { run, UsageError } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; import { AgentSelectCommand } from "../../src/commands/agent.js"; -import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "../../src/utils/agent-select.js"; vi.mock("node:console", async (importOriginal) => ({ ...(await importOriginal()), @@ -20,22 +20,27 @@ vi.mock("node:fs/promises", async (importOriginal) => ({ mkdir: vi.fn().mockResolvedValue(undefined), writeFile: vi.fn().mockResolvedValue(undefined), })); -vi.mock("../../src/utils/agent-select.js", () => ({ - normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"), - parseAgentLabelFilters: vi.fn((values?: string[]) => - (values ?? []).map((value) => { - const [name, filterValue] = value.split("="); - - return { - name, - value: filterValue, - }; - }), - ), - resolveAgentSelectionOutputDir: vi.fn(), - selectAgentTestPlan: vi.fn(), - createAgentTestPlanContext: vi.fn(), -})); +vi.mock("@allurereport/plugin-agent", async (importOriginal) => { + const actual = await importOriginal(); + + return { + ...actual, + normalizeAgentRerunPreset: vi.fn((value?: string) => value ?? "review"), + parseAgentLabelFilters: vi.fn((values?: string[]) => + (values ?? []).map((value) => { + const [name, filterValue] = value.split("="); + + return { + name, + value: filterValue, + }; + }), + ), + resolveAgentSelectionOutputDir: vi.fn(), + selectAgentTestPlan: vi.fn(), + createAgentTestPlanContext: vi.fn(), + }; +}); beforeEach(async () => { await epic("coverage"); @@ -81,4 +86,42 @@ describe("agent select command", () => { `{\n "version": "1.0",\n "tests": [\n {\n "selector": "suite feature A"\n }\n ]\n}`, ); }); + + it("should write the selected test plan and print selection summary when output is provided", async () => { + const consoleModule = await import("node:console"); + const fsModule = await import("node:fs/promises"); + + (resolveAgentSelectionOutputDir as Mock).mockResolvedValueOnce("/tmp/agent-output"); + (selectAgentTestPlan as Mock).mockResolvedValueOnce({ + outputDir: "/tmp/agent-output", + preset: "failed", + selectedTests: [{ full_name: "suite feature A" }, { full_name: "suite feature B" }], + testPlan: { + version: "1.0", + tests: [{ selector: "suite feature A" }, { selector: "suite feature B" }], + }, + }); + + await run(AgentSelectCommand, [ + "agent", + "select", + "--from", + "./agent-output", + "--preset", + "failed", + "--output", + "./testplan.json", + ]); + + expect(fsModule.mkdir).toHaveBeenCalledWith("/cwd", { recursive: true }); + expect(fsModule.writeFile).toHaveBeenCalledWith( + "/cwd/testplan.json", + `{\n "version": "1.0",\n "tests": [\n {\n "selector": "suite feature A"\n },\n {\n "selector": "suite feature B"\n }\n ]\n}\n`, + "utf-8", + ); + expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent testplan: /cwd/testplan.json"); + expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent selection source: /tmp/agent-output"); + expect(consoleModule.log).toHaveBeenNthCalledWith(3, "agent selection preset: failed"); + expect(consoleModule.log).toHaveBeenNthCalledWith(4, "agent selection tests: 2"); + }); }); diff --git a/packages/cli/test/commands/run.integration.test.ts b/packages/cli/test/commands/run.integration.test.ts index a93edb3a334..cc884d49ede 100644 --- a/packages/cli/test/commands/run.integration.test.ts +++ b/packages/cli/test/commands/run.integration.test.ts @@ -7,7 +7,7 @@ import process from "node:process"; import { fileURLToPath } from "node:url"; import { promisify } from "node:util"; -import { epic, feature, label, story } from "allure-js-commons"; +import { attachment, epic, feature, label, step, story } from "allure-js-commons"; import { afterAll, beforeAll, beforeEach, describe, expect, it } from "vitest"; const execFileAsync = promisify(execFile); @@ -43,6 +43,14 @@ const pathExists = async (filePath: string) => { } }; +const attachCommandOutput = async (name: string, output: { stdout: string; stderr: string }) => { + await attachment(`${name} stdout`, output.stdout || "", "text/plain"); + + if (output.stderr) { + await attachment(`${name} stderr`, output.stderr, "text/plain"); + } +}; + const writeJson = async (filePath: string, value: unknown) => { await writeFile(filePath, `${JSON.stringify(value, null, 2)}\n`, "utf-8"); }; @@ -123,39 +131,144 @@ describe("run command integration", () => { await rm(tempDir, { recursive: true, force: true }); }); - it("writes the full agent directory contract in the built CLI path", async () => { - const fixtureDir = join(tempDir, "built-run"); + it("prints the agent task map from built CLI help", async () => { + let stdout = ""; + let stderr = ""; + + await step("run built agent help", async () => { + const helpResult = await runCommand(process.execPath, [cliPath, "agent", "--help"]); + + stdout = helpResult.stdout; + stderr = helpResult.stderr; + await attachCommandOutput("agent help", helpResult); + }); + + await step("verify agent task map help", async () => { + expect(stderr).toBe(""); + expect(stdout).toContain("Multiple commands match your selection:"); + expect(stdout).toContain("Agent task map:"); + expect(stdout).toContain("allure --version"); + expect(stdout).toContain("allure agent --help"); + expect(stdout).toContain("allure agent capabilities"); + expect(stdout).toContain("allure agent --goal ... -- "); + expect(stdout).toContain("allure agent latest"); + expect(stdout).toContain("allure agent state-dir"); + expect(stdout).toContain("allure agent select --latest"); + expect(stdout).toContain("allure agent select --from "); + expect(stdout).toContain("allure agent --rerun-latest -- "); + expect(stdout).toContain("allure agent --rerun-from -- "); + expect(stdout).toContain("ALLURE_AGENT_STATE_DIR="); + }); + }, 240_000); + + it("prints structured agent capabilities from the built CLI", async () => { + let stdout = ""; + let stderr = ""; + + await step("run built agent capabilities command", async () => { + const result = await runYarnCommand(["allure", "agent", "capabilities", "--json"]); + + stdout = result.stdout; + stderr = result.stderr; + await attachCommandOutput("agent capabilities", result); + }); + + await step("verify built agent capabilities output", async () => { + const capabilities = JSON.parse(stdout) as { + schema: string; + commands: { + run: { + supported: boolean; + options: string[]; + }; + latest: { + output: string[]; + }; + select: { + supported: boolean; + presets: string[]; + output: string[]; + }; + query: { + supported: boolean; + }; + }; + expectations: { + inline: { + expected: { + fullNames: boolean; + }; + forbidden: { + labels: boolean; + fullNames: boolean; + }; + evidence: { + attachmentFilters: string[]; + }; + }; + }; + output: { + files: string[]; + }; + unsupported: { + discovery: boolean; + localAgentService: boolean; + }; + }; + + expect(stderr).toBe(""); + expect(capabilities.schema).toBe("allure-agent-capabilities/v1"); + expect(capabilities.commands.run.supported).toBe(true); + expect(capabilities.commands.run.options).toContain("--expect-test"); + expect(capabilities.commands.latest.output).toEqual(["agent output: ", "agent index: /index.md"]); + expect(capabilities.commands.select.supported).toBe(true); + expect(capabilities.commands.select.output).toEqual([ + "stdout-testplan-json", + "file-testplan-json", + "file-summary", + ]); + expect(capabilities.commands.select.presets).toEqual(["review", "failed", "unsuccessful", "all"]); + expect(capabilities.commands.query.supported).toBe(true); + expect(capabilities.expectations.inline.expected.fullNames).toBe(true); + expect(capabilities.expectations.inline.forbidden.labels).toBe(true); + expect(capabilities.expectations.inline.forbidden.fullNames).toBe(false); + expect(capabilities.expectations.inline.evidence.attachmentFilters).toEqual(["name", "content-type"]); + expect(capabilities.output.files).toContain("manifest/run.json"); + expect(capabilities.unsupported.discovery).toBe(true); + expect(capabilities.unsupported).not.toHaveProperty("query"); + expect(capabilities.unsupported.localAgentService).toBe(true); + }); + }, 240_000); + + it("runs the built agent command with an agent-only profile", async () => { + const fixtureDir = join(tempDir, "built-agent"); + const homeDir = join(fixtureDir, "home"); const outputDir = join(fixtureDir, "agent-output"); const reportDir = join(fixtureDir, "report"); const expectationsPath = join(fixtureDir, "expected.yaml"); const configPath = join(fixtureDir, "allurerc.mjs"); const emitResultsPath = join(fixtureDir, "emit-results.mjs"); - const projectGuidePath = join(fixtureDir, "docs", "allure-agent-mode.md"); - const expectationsSource = `goal: Validate built CLI agent output -task_id: cli-integration + const expectationsSource = `goal: Validate built CLI agent command +task_id: cli-agent-integration expected: environments: - default notes: - - The legacy run invocation should delegate to the agent command contract. -`; - const projectGuideSource = `# Fixture Agent Guide - -- This guide belongs to the fixture cwd used by the legacy run compatibility test. + - The agent command should ignore configured report and export plugins. `; const configSource = ` export default { - name: "CLI Integration Report", + name: "CLI Agent Report", output: ${JSON.stringify(reportDir)}, plugins: { awesome: { options: { - reportName: "CLI Integration Report" + reportName: "CLI Agent Report" } }, dashboard: { options: { - reportName: "CLI Integration Dashboard" + reportName: "CLI Agent Dashboard" } }, testops: { @@ -177,111 +290,141 @@ await cp(fixture, join(outDir, \`\${randomUUID()}-result.json\`)); console.log("emitted simple result"); `.trimStart(); - await mkdir(join(fixtureDir, "docs"), { recursive: true }); - await writeFile(expectationsPath, expectationsSource, "utf-8"); - await writeFile(configPath, configSource, "utf-8"); - await writeFile(emitResultsPath, emitResultsSource, "utf-8"); - await writeFile(projectGuidePath, projectGuideSource, "utf-8"); + let expectedStateDir = ""; + let stdout = ""; + let stderr = ""; + let latestStdout = ""; + let latestStderr = ""; + let stateDirStdout = ""; + let stateDirStderr = ""; + + await step("prepare built agent fixture", async () => { + await mkdir(fixtureDir, { recursive: true }); + const resolvedFixtureDir = await realpath(fixtureDir); + expectedStateDir = join( + tmpdir(), + `allure-agent-state-${createHash("sha256").update(resolvedFixtureDir).digest("hex").slice(0, 16)}`, + ); + await writeFile(expectationsPath, expectationsSource, "utf-8"); + await writeFile(configPath, configSource, "utf-8"); + await writeFile(emitResultsPath, emitResultsSource, "utf-8"); + await attachment( + "fixture paths", + JSON.stringify({ fixtureDir, outputDir, expectationsPath, expectedStateDir }, null, 2), + "application/json", + ); + }); - const { stdout, stderr } = await runCommand( - process.execPath, - [cliPath, "run", "--config", configPath, "--cwd", fixtureDir, "--", "node", emitResultsPath, simpleResultFixture], - { + await step("run built agent command and state commands", async () => { + const runResult = await runCommand( + process.execPath, + [ + cliPath, + "agent", + "--config", + configPath, + "--cwd", + fixtureDir, + "--output", + outputDir, + "--expectations", + expectationsPath, + "--", + "node", + emitResultsPath, + simpleResultFixture, + ], + { + env: { + ...process.env, + HOME: homeDir, + }, + }, + ); + stdout = runResult.stdout; + stderr = runResult.stderr; + await attachCommandOutput("agent command", runResult); + + const latestResult = await runCommand(process.execPath, [cliPath, "agent", "latest", "--cwd", fixtureDir], { env: { ...process.env, - ALLURE_AGENT_OUTPUT: outputDir, - ALLURE_AGENT_EXPECTATIONS: expectationsPath, + HOME: homeDir, }, - }, - ); - - await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy(); - - const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as { - command: string | null; - expectations_present: boolean; - paths: { - expected_manifest: string | null; - project_guide: string | null; + }); + latestStdout = latestResult.stdout; + latestStderr = latestResult.stderr; + await attachCommandOutput("agent latest", latestResult); + + const stateDirResult = await runCommand(process.execPath, [cliPath, "agent", "state-dir", "--cwd", fixtureDir], { + env: { + ...process.env, + HOME: homeDir, + }, + }); + stateDirStdout = stateDirResult.stdout; + stateDirStderr = stateDirResult.stderr; + await attachCommandOutput("agent state-dir", stateDirResult); + }); + + await step("verify built agent output contract", async () => { + await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy(); + await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy(); + await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy(); + await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy(); + await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy(); + + const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as { + command: string | null; + expectations_present: boolean; + paths: { + expected_manifest: string | null; + }; }; - }; - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); - const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8"); - const expectedCopy = await readFile(join(outputDir, "manifest", "expected.json"), "utf-8"); - const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); - const copiedProjectGuide = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8"); - - expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`); - expect(runManifest.expectations_present).toBe(true); - expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json"); - expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md"); - expect(expectedCopy).toContain('"task_id": "cli-integration"'); - expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)"); - expect(copiedProjectGuide).toContain("# Fixture Agent Guide"); - expect(indexContent).toContain("# CLI Integration Report"); - expect(indexContent).toContain("## Expected Scope"); - expect(indexContent).toContain("## Advisory Check Summary"); - expect(indexContent).toContain("## Passed"); - expect(findingsContent).toBe(""); - expect(await pathExists(join(outputDir, "awesome"))).toBe(false); - expect(await pathExists(join(outputDir, "dashboard"))).toBe(false); - expect(stdout).toContain(`agent output: ${outputDir}`); - expect(stdout).toContain(`agent expectations: ${expectationsPath}`); - expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`); - expect(stdout).toContain("emitted simple result"); - expect(stdout).not.toContain("process finished with code"); - expect(stdout).not.toContain("exit code "); - expect(stdout).not.toContain("[DEP0190]"); - expect(stdout).not.toContain("NO_COLOR"); - expect(stderr).not.toContain("[DEP0190]"); - expect(stderr).not.toContain("NO_COLOR"); - expect(stderr).not.toContain("Allure TestOps"); + const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); + const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8"); + + expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`); + expect(runManifest.expectations_present).toBe(true); + expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json"); + expect(agentsGuide).toContain("## Command Task Map"); + expect(agentsGuide).toContain("manifest/run.json"); + expect(await pathExists(join(outputDir, "project"))).toBe(false); + expect(findingsContent).toBe(""); + expect(await pathExists(join(outputDir, "awesome"))).toBe(false); + expect(await pathExists(join(outputDir, "dashboard"))).toBe(false); + expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`); + expect(stdout).toContain(`agent output: ${outputDir}`); + expect(stdout).toContain(`agent index: ${join(outputDir, "index.md")}`); + expect(stdout).toContain(`agent expectations: ${expectationsPath}`); + expect(stdout).toContain("emitted simple result"); + expect(stdout).not.toContain("process finished with code"); + expect(stdout).not.toContain("exit code "); + expect(stdout).not.toContain("[DEP0190]"); + expect(stdout).not.toContain("NO_COLOR"); + expect(stderr).not.toContain("[DEP0190]"); + expect(stderr).not.toContain("NO_COLOR"); + expect(stderr).not.toContain("Allure TestOps"); + expect(latestStdout).toContain(`agent output: ${outputDir}`); + expect(latestStdout).toContain(`agent index: ${join(outputDir, "index.md")}`); + expect(latestStderr).toBe(""); + expect(stateDirStdout.trim()).toBe(expectedStateDir); + expect(stateDirStderr).toBe(""); + }); }, 240_000); - it("runs the built agent command with an agent-only profile", async () => { - const fixtureDir = join(tempDir, "built-agent"); + it("runs agent mode with --expect-test to require a newly added test", async () => { + const fixtureDir = join(tempDir, "agent-expect-test"); const homeDir = join(fixtureDir, "home"); const outputDir = join(fixtureDir, "agent-output"); const reportDir = join(fixtureDir, "report"); - const expectationsPath = join(fixtureDir, "expected.yaml"); const configPath = join(fixtureDir, "allurerc.mjs"); - const emitResultsPath = join(fixtureDir, "emit-results.mjs"); - const projectGuidePath = join(fixtureDir, "docs", "allure-agent-mode.md"); - const expectationsSource = `goal: Validate built CLI agent command -task_id: cli-agent-integration -expected: - environments: - - default -notes: - - The agent command should ignore configured report and export plugins. -`; - const projectGuideSource = `# Fixture Agent Guide - -- This guide belongs to the fixture cwd used by the built agent integration test. -`; + const emitResultsPath = join(fixtureDir, "emit-new-test-result.mjs"); + const resultFixturePath = join(fixtureDir, "new-test-result.json"); + const expectedFullName = "agent flow reports the newly added test"; const configSource = ` export default { - name: "CLI Agent Report", - output: ${JSON.stringify(reportDir)}, - plugins: { - awesome: { - options: { - reportName: "CLI Agent Report" - } - }, - dashboard: { - options: { - reportName: "CLI Agent Dashboard" - } - }, - testops: { - options: {} - } - } + name: "CLI Agent Expect Test Report", + output: ${JSON.stringify(reportDir)} }; `.trimStart(); const emitResultsSource = ` @@ -294,108 +437,106 @@ const outDir = join(process.cwd(), "allure-results"); await mkdir(outDir, { recursive: true }); await cp(fixture, join(outDir, \`\${randomUUID()}-result.json\`)); -console.log("emitted simple result"); +console.log("emitted newly added test result"); `.trimStart(); - await mkdir(join(fixtureDir, "docs"), { recursive: true }); - const resolvedFixtureDir = await realpath(fixtureDir); - const expectedStateDir = join( - tmpdir(), - `allure-agent-state-${createHash("sha256").update(resolvedFixtureDir).digest("hex").slice(0, 16)}`, - ); - await writeFile(expectationsPath, expectationsSource, "utf-8"); - await writeFile(configPath, configSource, "utf-8"); - await writeFile(emitResultsPath, emitResultsSource, "utf-8"); - await writeFile(projectGuidePath, projectGuideSource, "utf-8"); - - const { stdout, stderr } = await runCommand( - process.execPath, - [ - cliPath, - "agent", - "--config", - configPath, - "--cwd", - fixtureDir, - "--output", - outputDir, - "--expectations", - expectationsPath, - "--", - "node", - emitResultsPath, - simpleResultFixture, - ], - { - env: { - ...process.env, - HOME: homeDir, - }, - }, - ); - const { stdout: latestStdout, stderr: latestStderr } = await runCommand( - process.execPath, - [cliPath, "agent", "latest", "--cwd", fixtureDir], - { - env: { - ...process.env, - HOME: homeDir, - }, - }, - ); - const { stdout: stateDirStdout, stderr: stateDirStderr } = await runCommand( - process.execPath, - [cliPath, "agent", "state-dir", "--cwd", fixtureDir], - { - env: { - ...process.env, - HOME: homeDir, + let stdout = ""; + let stderr = ""; + + await step("prepare new test fixture", async () => { + await mkdir(fixtureDir, { recursive: true }); + const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record; + const expectedResult = { + ...baseResult, + uuid: "agent-expect-test-uuid", + historyId: "agent-expect-test-history", + name: "reports the newly added test", + fullName: expectedFullName, + status: "passed", + labels: [ + { name: "suite", value: "agent flow" }, + { name: "feature", value: "expect-test" }, + ], + }; + + await writeFile(configPath, configSource, "utf-8"); + await writeFile(emitResultsPath, emitResultsSource, "utf-8"); + await writeJson(resultFixturePath, expectedResult); + await attachment( + "expect-test fixture", + JSON.stringify({ fixtureDir, outputDir, expectedFullName }, null, 2), + "application/json", + ); + }); + + await step("run built agent command with expected full test name", async () => { + const runResult = await runCommand( + process.execPath, + [ + cliPath, + "agent", + "--config", + configPath, + "--cwd", + fixtureDir, + "--output", + outputDir, + "--goal", + "Validate newly added test is reported", + "--expect-tests", + "1", + "--expect-test", + expectedFullName, + "--", + "node", + emitResultsPath, + resultFixturePath, + ], + { + env: { + ...process.env, + HOME: homeDir, + }, }, - }, - ); - - await expect(stat(join(outputDir, "index.md"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "AGENTS.md"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "run.json"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "tests.jsonl"))).resolves.toBeTruthy(); - await expect(stat(join(outputDir, "manifest", "findings.jsonl"))).resolves.toBeTruthy(); - - const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as { - command: string | null; - expectations_present: boolean; - paths: { - expected_manifest: string | null; - project_guide: string | null; + ); + + stdout = runResult.stdout; + stderr = runResult.stderr; + await attachCommandOutput("agent expect-test", runResult); + }); + + await step("verify expect-test output", async () => { + const expectedManifest = JSON.parse(await readFile(join(outputDir, "manifest", "expected.json"), "utf-8")) as { + expected: { + full_names?: string[]; + test_count?: number; + }; }; - }; - const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); - const copiedProjectGuide = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8"); - const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8"); - - expect(runManifest.command).toBe(`node ${emitResultsPath} ${simpleResultFixture}`); - expect(runManifest.expectations_present).toBe(true); - expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json"); - expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md"); - expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)"); - expect(copiedProjectGuide).toContain("# Fixture Agent Guide"); - expect(findingsContent).toBe(""); - expect(await pathExists(join(outputDir, "awesome"))).toBe(false); - expect(await pathExists(join(outputDir, "dashboard"))).toBe(false); - expect(stdout).toContain(`node ${emitResultsPath} ${simpleResultFixture}`); - expect(stdout).toContain(`agent output: ${outputDir}`); - expect(stdout).toContain(`agent expectations: ${expectationsPath}`); - expect(stdout).toContain("emitted simple result"); - expect(stdout).not.toContain("process finished with code"); - expect(stdout).not.toContain("exit code "); - expect(stdout).not.toContain("[DEP0190]"); - expect(stdout).not.toContain("NO_COLOR"); - expect(stderr).not.toContain("[DEP0190]"); - expect(stderr).not.toContain("NO_COLOR"); - expect(stderr).not.toContain("Allure TestOps"); - expect(latestStdout.trim()).toBe(outputDir); - expect(latestStderr).toBe(""); - expect(stateDirStdout.trim()).toBe(expectedStateDir); - expect(stateDirStderr).toBe(""); + const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")) as { + expectations_present: boolean; + }; + const tests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8")) + .trim() + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line) as { full_name: string }); + const findingsContent = await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8"); + const indexMarkdown = await readFile(join(outputDir, "index.md"), "utf-8"); + + expect(runManifest.expectations_present).toBe(true); + expect(expectedManifest.expected.test_count).toBe(1); + expect(expectedManifest.expected.full_names).toEqual([expectedFullName]); + expect(tests).toEqual([ + expect.objectContaining({ + full_name: expectedFullName, + }), + ]); + expect(findingsContent).toBe(""); + expect(indexMarkdown).toContain(expectedFullName); + expect(stdout).toContain("agent expectations: CLI options"); + expect(stdout).toContain("emitted newly added test result"); + expect(stderr).toBe(""); + }); }, 240_000); it("supports agent select and rerun-from with the default review preset", async () => { @@ -407,6 +548,7 @@ console.log("emitted simple result"); const configPath = join(fixtureDir, "allurerc.mjs"); const emitResultsPath = join(fixtureDir, "emit-plan-results.mjs"); const fixturesManifestPath = join(fixtureDir, "fixtures.json"); + const selectedTestPlanPath = join(fixtureDir, "selected-testplan.json"); const featureAFixturePath = join(fixtureDir, "feature-a-result.json"); const featureBFixturePath = join(fixtureDir, "feature-b-result.json"); const previousManifestDir = join(previousOutputDir, "manifest"); @@ -447,79 +589,65 @@ for (const fixture of fixtures) { console.log(\`selected selectors: \${Array.from(selectors).join(",")}\`); `.trimStart(); - await mkdir(previousManifestDir, { recursive: true }); - await writeFile(configPath, configSource, "utf-8"); - await writeFile(emitResultsPath, emitResultsSource, "utf-8"); - - const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record; - const featureAResult = { - ...baseResult, - uuid: "feature-a-uuid", - historyId: "feature-a-history", - name: "feature A", - fullName: "suite feature A", - status: "passed", - labels: [ - { name: "suite", value: "suite" }, - { name: "feature", value: "checkout" }, - { name: "priority", value: "high" }, - ], - }; - const featureBResult = { - ...baseResult, - uuid: "feature-b-uuid", - historyId: "feature-b-history", - name: "feature B", - fullName: "suite feature B", - status: "passed", - labels: [ - { name: "suite", value: "suite" }, - { name: "feature", value: "payments" }, - { name: "priority", value: "low" }, - ], - }; + await step("prepare previous agent output and rerun fixtures", async () => { + await mkdir(previousManifestDir, { recursive: true }); + await writeFile(configPath, configSource, "utf-8"); + await writeFile(emitResultsPath, emitResultsSource, "utf-8"); + + const baseResult = JSON.parse(await readFile(simpleResultFixture, "utf-8")) as Record; + const featureAResult = { + ...baseResult, + uuid: "feature-a-uuid", + historyId: "feature-a-history", + name: "feature A", + fullName: "suite feature A", + status: "passed", + labels: [ + { name: "suite", value: "suite" }, + { name: "feature", value: "checkout" }, + { name: "priority", value: "high" }, + ], + }; + const featureBResult = { + ...baseResult, + uuid: "feature-b-uuid", + historyId: "feature-b-history", + name: "feature B", + fullName: "suite feature B", + status: "passed", + labels: [ + { name: "suite", value: "suite" }, + { name: "feature", value: "payments" }, + { name: "priority", value: "low" }, + ], + }; - await writeJson(featureAFixturePath, featureAResult); - await writeJson(featureBFixturePath, featureBResult); - await writeJson(fixturesManifestPath, [ - { - selector: "suite feature A", - file: featureAFixturePath, - }, - { - selector: "suite feature B", - file: featureBFixturePath, - }, - ]); - - await writeJson(join(previousManifestDir, "run.json"), { - schema_version: "allure-agent-output/v1", - report_uuid: "previous-report", - generated_at: "2026-04-15T18:00:00.000Z", - command: "node prior-run", - actual_exit_code: 0, - original_exit_code: 0, - exit_code: { - original: 0, - actual: 0, - }, - summary: { - stats: { - total: 2, - failed: 1, - broken: 0, - skipped: 0, - unknown: 0, - passed: 1, + await writeJson(featureAFixturePath, featureAResult); + await writeJson(featureBFixturePath, featureBResult); + await writeJson(fixturesManifestPath, [ + { + selector: "suite feature A", + file: featureAFixturePath, }, - duration_ms: { - total: 10, - average: 5, - max: 5, + { + selector: "suite feature B", + file: featureBFixturePath, }, - environments: [ - { - environmentId: "default", + ]); + + await writeJson(join(previousManifestDir, "run.json"), { + schema_version: "allure-agent-output/v1", + report_uuid: "previous-report", + generated_at: "2026-04-15T18:00:00.000Z", + command: "node prior-run", + actual_exit_code: 0, + original_exit_code: 0, + exit_code: { + original: 0, + actual: 0, + }, + summary: { + stats: { total: 2, failed: 1, broken: 0, @@ -527,165 +655,230 @@ console.log(\`selected selectors: \${Array.from(selectors).join(",")}\`); unknown: 0, passed: 1, }, - ], - }, - paths: { - index_md: "index.md", - agents_md: "AGENTS.md", - tests_manifest: "manifest/tests.jsonl", - findings_manifest: "manifest/findings.jsonl", - expected_manifest: null, - project_guide: null, - process_logs: { - stdout: null, - stderr: null, - }, - }, - expectations_present: false, - check_summary: { - total: 1, - countsBySeverity: { - high: 1, - warning: 0, - info: 0, + duration_ms: { + total: 10, + average: 5, + max: 5, + }, + environments: [ + { + environmentId: "default", + total: 2, + failed: 1, + broken: 0, + skipped: 0, + unknown: 0, + passed: 1, + }, + ], }, - countsByCategory: { - bootstrap: 0, - scope: 0, - metadata: 0, - evidence: 1, - smells: 0, + paths: { + index_md: "index.md", + agents_md: "AGENTS.md", + tests_manifest: "manifest/tests.jsonl", + findings_manifest: "manifest/findings.jsonl", + expected_manifest: null, + process_logs: { + stdout: null, + stderr: null, + }, }, - }, - agent_context: { - agent_name: null, - loop_id: null, - task_id: null, - conversation_id: null, - }, - }); - await writeJsonl(join(previousManifestDir, "tests.jsonl"), [ - { - environment_id: "default", - history_id: "feature-a-history", - test_result_id: "feature-a-tr", - full_name: "suite feature A", - package: "suite", - labels: [ - { name: "feature", value: "checkout" }, - { name: "priority", value: "high" }, - ], - status: "failed", - duration_ms: 5, - retries: 0, - flaky: false, - scope_match: "match", - finding_counts: { + expectations_present: false, + check_summary: { total: 1, - high: 1, - warning: 0, - info: 0, + countsBySeverity: { + high: 1, + warning: 0, + info: 0, + }, + countsByCategory: { + bootstrap: 0, + scope: 0, + metadata: 0, + evidence: 1, + smells: 0, + }, }, - markdown_path: "tests/default/feature-a.md", - assets_dir: "tests/default/feature-a.assets", - }, - { - environment_id: "default", - history_id: "feature-b-history", - test_result_id: "feature-b-tr", - full_name: "suite feature B", - package: "suite", - labels: [ - { name: "feature", value: "payments" }, - { name: "priority", value: "low" }, - ], - status: "passed", - duration_ms: 5, - retries: 0, - flaky: false, - scope_match: "match", - finding_counts: { - total: 0, - high: 0, - warning: 0, - info: 0, + agent_context: { + agent_name: null, + loop_id: null, + task_id: null, + conversation_id: null, }, - markdown_path: "tests/default/feature-b.md", - assets_dir: "tests/default/feature-b.assets", - }, - ]); - await writeJsonl(join(previousManifestDir, "findings.jsonl"), [ - { - finding_id: "finding-feature-a", - subject: "tests/default/feature-a.md", - severity: "high", - category: "evidence", - check_name: "failed-without-useful-steps", - message: "Feature A needs focused rerun coverage", - explanation: "Feature A should be the only review-targeted rerun candidate.", - evidence_paths: [], - remediation_hint: "Rerun only feature A.", - }, - ]); - - const { stdout: selectStdout, stderr: selectStderr } = await runCommand( - process.execPath, - [cliPath, "agent", "select", "--from", previousOutputDir], - { - env: { - ...process.env, - HOME: homeDir, + }); + await writeJsonl(join(previousManifestDir, "tests.jsonl"), [ + { + environment_id: "default", + history_id: "feature-a-history", + test_result_id: "feature-a-tr", + full_name: "suite feature A", + package: "suite", + labels: [ + { name: "feature", value: "checkout" }, + { name: "priority", value: "high" }, + ], + status: "failed", + duration_ms: 5, + retries: 0, + flaky: false, + scope_match: "match", + finding_counts: { + total: 1, + high: 1, + warning: 0, + info: 0, + }, + markdown_path: "tests/default/feature-a.md", + assets_dir: "tests/default/feature-a.assets", }, - }, - ); - const { stdout, stderr } = await runCommand( - process.execPath, - [ - cliPath, - "agent", - "--config", - configPath, - "--cwd", - fixtureDir, - "--output", - outputDir, - "--rerun-from", - previousOutputDir, - "--", - "node", - emitResultsPath, - fixturesManifestPath, - ], - { - env: { - ...process.env, - HOME: homeDir, + { + environment_id: "default", + history_id: "feature-b-history", + test_result_id: "feature-b-tr", + full_name: "suite feature B", + package: "suite", + labels: [ + { name: "feature", value: "payments" }, + { name: "priority", value: "low" }, + ], + status: "passed", + duration_ms: 5, + retries: 0, + flaky: false, + scope_match: "match", + finding_counts: { + total: 0, + high: 0, + warning: 0, + info: 0, + }, + markdown_path: "tests/default/feature-b.md", + assets_dir: "tests/default/feature-b.assets", + }, + ]); + await writeJsonl(join(previousManifestDir, "findings.jsonl"), [ + { + finding_id: "finding-feature-a", + subject: "tests/default/feature-a.md", + severity: "high", + category: "evidence", + check_name: "failed-without-useful-steps", + message: "Feature A needs focused rerun coverage", + explanation: "Feature A should be the only review-targeted rerun candidate.", + evidence_paths: [], + remediation_hint: "Rerun only feature A.", }, - }, - ); + ]); + await attachment( + "previous run summary", + JSON.stringify({ previousOutputDir, selected: "suite feature A", skipped: "suite feature B" }, null, 2), + "application/json", + ); + }); - expect(JSON.parse(selectStdout)).toEqual({ - version: "1.0", - tests: [ + let selectStdout = ""; + let selectStderr = ""; + let selectFileStdout = ""; + let selectFileStderr = ""; + let stdout = ""; + let stderr = ""; + + await step("select tests and rerun built agent command", async () => { + const selectResult = await runCommand( + process.execPath, + [cliPath, "agent", "select", "--from", previousOutputDir], { - selector: "suite feature A", + env: { + ...process.env, + HOME: homeDir, + }, + }, + ); + selectStdout = selectResult.stdout; + selectStderr = selectResult.stderr; + await attachCommandOutput("agent select", selectResult); + + const selectFileResult = await runCommand( + process.execPath, + [cliPath, "agent", "select", "--from", previousOutputDir, "--output", selectedTestPlanPath], + { + env: { + ...process.env, + HOME: homeDir, + }, + }, + ); + selectFileStdout = selectFileResult.stdout; + selectFileStderr = selectFileResult.stderr; + await attachCommandOutput("agent select output file", selectFileResult); + + const runResult = await runCommand( + process.execPath, + [ + cliPath, + "agent", + "--config", + configPath, + "--cwd", + fixtureDir, + "--output", + outputDir, + "--rerun-from", + previousOutputDir, + "--", + "node", + emitResultsPath, + fixturesManifestPath, + ], + { + env: { + ...process.env, + HOME: homeDir, + }, }, - ], + ); + stdout = runResult.stdout; + stderr = runResult.stderr; + await attachCommandOutput("agent rerun-from", runResult); + }); + + await step("verify selected rerun output", async () => { + expect(JSON.parse(selectStdout)).toEqual({ + version: "1.0", + tests: [ + { + selector: "suite feature A", + }, + ], + }); + expect(selectStderr).toBe(""); + expect(JSON.parse(await readFile(selectedTestPlanPath, "utf-8"))).toEqual({ + version: "1.0", + tests: [ + { + selector: "suite feature A", + }, + ], + }); + expect(selectFileStdout).toContain(`agent testplan: ${selectedTestPlanPath}`); + expect(selectFileStdout).toContain(`agent selection source: ${previousOutputDir}`); + expect(selectFileStdout).toContain("agent selection preset: review"); + expect(selectFileStdout).toContain("agent selection tests: 1"); + expect(selectFileStderr).toBe(""); + expect(stdout).toContain("selected selectors: suite feature A"); + expect(stderr).toBe(""); + + const selectedTests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8")) + .trim() + .split("\n") + .filter(Boolean) + .map((line) => JSON.parse(line) as { full_name: string }); + + expect(selectedTests).toEqual([ + expect.objectContaining({ + full_name: "suite feature A", + }), + ]); }); - expect(selectStderr).toBe(""); - expect(stdout).toContain("selected selectors: suite feature A"); - expect(stderr).toBe(""); - - const selectedTests = (await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8")) - .trim() - .split("\n") - .filter(Boolean) - .map((line) => JSON.parse(line) as { full_name: string }); - - expect(selectedTests).toEqual([ - expect.objectContaining({ - full_name: "suite feature A", - }), - ]); }, 240_000); }); diff --git a/packages/cli/test/commands/run.test.ts b/packages/cli/test/commands/run.test.ts index f10cbeba0ca..07e594ad11b 100644 --- a/packages/cli/test/commands/run.test.ts +++ b/packages/cli/test/commands/run.test.ts @@ -1,12 +1,9 @@ -import { resolve } from "node:path"; - import { readConfig } from "@allurereport/core"; import AwesomePlugin from "@allurereport/plugin-awesome"; import { epic, feature, label, story } from "allure-js-commons"; import { run, UsageError } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; -import { executeAgentMode } from "../../src/commands/agent.js"; import { RunCommand } from "../../src/commands/run.js"; import { ALLURE_CLI_ACTIVE_COMMAND_ENV } from "../../src/utils/execution-context.js"; @@ -90,10 +87,6 @@ vi.mock("@allurereport/static-server", async (importOriginal) => ({ ...(await importOriginal()), serve: vi.fn(), })); -vi.mock("../../src/commands/agent.js", () => ({ - executeAgentMode: vi.fn().mockResolvedValue(undefined), -})); - beforeEach(async () => { await epic("coverage"); await feature("cli-run"); @@ -101,8 +94,6 @@ beforeEach(async () => { await label("coverage", "cli-run"); vi.clearAllMocks(); delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV]; - delete process.env.ALLURE_AGENT_OUTPUT; - delete process.env.ALLURE_AGENT_EXPECTATIONS; const { AllureReportMock } = await import("../utils.js"); @@ -235,35 +226,4 @@ describe("run command", () => { delete process.env[ALLURE_CLI_ACTIVE_COMMAND_ENV]; }); - - it("should delegate legacy env-based agent mode to the agent command", async () => { - await epic("coverage"); - await feature("agent-mode"); - await story("run"); - await label("coverage", "agent-mode"); - const { AllureReportMock } = await import("../utils.js"); - const { runProcess } = await import("../../src/utils/index.js"); - const consoleModule = await import("node:console"); - - process.env.ALLURE_AGENT_OUTPUT = "./legacy-agent-output"; - process.env.ALLURE_AGENT_EXPECTATIONS = "./legacy-expected.yaml"; - - await run(RunCommand, ["run", "--cwd", "./fixture", "--silent", "--", "npm", "test"]); - - expect(executeAgentMode).toHaveBeenCalledWith({ - configPath: undefined, - cwd: "./fixture", - output: resolve(process.cwd(), "./legacy-agent-output"), - expectations: resolve(process.cwd(), "./legacy-expected.yaml"), - environment: undefined, - environmentName: undefined, - silent: true, - args: ["npm", "test"], - }); - expect(readConfig).not.toHaveBeenCalled(); - expect(AllureReportMock).not.toHaveBeenCalled(); - expect(runProcess).not.toHaveBeenCalled(); - expect(consoleModule.log).not.toHaveBeenCalled(); - expect(exitMock).not.toHaveBeenCalled(); - }); }); diff --git a/packages/plugin-agent/README.md b/packages/plugin-agent/README.md index d2727073d6b..4bb44365307 100644 --- a/packages/plugin-agent/README.md +++ b/packages/plugin-agent/README.md @@ -27,14 +27,13 @@ When enabled, the plugin writes: - `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` for machine-readable review - copied run logs and other artifacts under `artifacts/` - `AGENTS.md` with guidance for consuming the directory -- `manifest/expected.json` when `ALLURE_AGENT_EXPECTATIONS` is provided -- `project/docs/allure-agent-mode.md` when the project has a guide at `docs/allure-agent-mode.md` +- `manifest/expected.json` when inline flags, `--expectations `, or plugin options provide expectations If no output directory is configured, the plugin does nothing. The plugin stays read-only by design. A separate harness layer can consume the generated manifests, plan enrichment work, and decide whether a rerun is ready to -accept. See [the enrichment loop guide](../../docs/agent_enrichment_loop.md). +accept. ## Verification Standard @@ -42,19 +41,34 @@ accept. See [the enrichment loop guide](../../docs/agent_enrichment_loop.md). - Use `allure agent` for smoke checks too, even when the change is small or mechanical. - Only skip agent mode when it is impossible or when you are debugging agent mode itself. -## Skills-First Workflow +## CLI Capability Workflow -The downstream workflow is intended to be skills-first: +The installed CLI help is the local contract for agent mode. When an agent needs +to choose supported commands or flags, detect the local CLI surface first: -1. install the Allure skills bundle -2. run the setup skill in a project -3. let the setup skill create or update root `AGENTS.md` -4. let the setup skill create `docs/allure-agent-mode.md` -5. use Allure agent-mode in future test work through the project guide plus per-run manifests +```shell +allure --version +allure agent capabilities --json +allure agent --help +allure agent query --help +allure agent select --help +allure agent latest --help +allure agent state-dir --help +``` -Every generated run includes an `AGENTS.md` playbook. When the project has -`docs/allure-agent-mode.md`, the run output also copies that guide and tells agents -to read it first. +`allure agent capabilities --json` is the structured local contract for agents. +`allure agent --help` includes the human-readable command task map. Each +agent-mode command names the loop it supports, the problem signal that calls for +it, and the task the agent should perform with it. For example, `allure agent +latest` belongs to output recovery, `allure agent state-dir` belongs to tooling +diagnosis, `allure agent query` belongs to output inspection, +`allure agent select` belongs to rerun planning, and `--rerun-*` belongs to +focused retry loops. + +Every generated run includes an `AGENTS.md` playbook with the same stable +artifact-reading order, command task map, workflow guidance, and remediation +rules. Reusable skills and common knowledge files should not hard-code +version-specific flags; they should ask the local CLI when support is unclear. ## Install @@ -90,30 +104,28 @@ The preferred CLI entrypoint is: npx allure agent -- npm test ``` -You can provide an explicit expectations file and output directory when you need deterministic paths: +You can provide compact inline expectations for the common review path: ```shell npx allure agent \ - --output ./out/agent-report \ - --expectations ./out/agent-expected.yaml \ - -- npm test + --goal "Review feature A" \ + --expect-tests 3 \ + --expect-label feature=feature-a \ + --expect-step-containing "validate feature A" \ + --expect-steps 1 \ + -- npm test ``` -That command uses an agent-only profile by default, so configured presentation and export plugins such as Awesome, Dashboard, or TestOps are ignored for that run. - -You can also enable the plugin through lower-level environment variables when you need direct env control: +Use an explicit expectations file and output directory when inline flags become awkward or you need deterministic paths: ```shell -ALLURE_AGENT_OUTPUT=./out/agent-report npx allure run -- npm test +npx allure agent \ + --output ./out/agent-report \ + --expectations ./out/agent-expected.yaml \ + -- npm test ``` -To compare the run against an intended scope, provide an expectations file: - -```shell -ALLURE_AGENT_OUTPUT=./out/agent-report \ -ALLURE_AGENT_EXPECTATIONS=./out/agent-expected.yaml \ -npx allure run -- npm test -``` +That command uses an agent-only profile by default, so configured presentation and export plugins such as Awesome, Dashboard, or TestOps are ignored for that run. ## Options @@ -121,19 +133,14 @@ The plugin accepts the following options: | Option | Description | Type | Default | |--------|-------------|------|---------| -| `outputDir` | Directory where the markdown report will be written. Relative paths are resolved from the `allure` process working directory | `string` | `ALLURE_AGENT_OUTPUT` | - -## Environment Variables - -| Variable | Description | -|----------|-------------| -| `ALLURE_AGENT_OUTPUT` | Directory where the agent output should be written when `outputDir` is not set | -| `ALLURE_AGENT_EXPECTATIONS` | Optional path to a YAML or JSON file describing expected and forbidden test scope | -| `ALLURE_AGENT_COMMAND` | The executed command string recorded in `manifest/run.json` and `index.md` | -| `ALLURE_AGENT_NAME` | Optional agent identifier recorded in `manifest/run.json` | -| `ALLURE_AGENT_LOOP_ID` | Optional loop identifier recorded in `manifest/run.json` | -| `ALLURE_AGENT_TASK_ID` | Optional task identifier recorded in `manifest/run.json` | -| `ALLURE_AGENT_CONVERSATION_ID` | Optional conversation identifier recorded in `manifest/run.json` | +| `outputDir` | Directory where the markdown report will be written. Relative paths are resolved from the `allure` process working directory | `string` | none | +| `expectationsPath` | Path to a YAML or JSON file describing expected and forbidden test scope | `string` | none | +| `expectations` | Inline expectations object. Use either `expectationsPath` or `expectations`, not both | `AgentExpectationsInput` | none | +| `command` | Executed command string recorded in `manifest/run.json` and `index.md` | `string` | none | +| `agentName` | Optional agent identifier recorded in `manifest/run.json` | `string` | none | +| `loopId` | Optional loop identifier recorded in `manifest/run.json` | `string` | none | +| `taskId` | Optional task identifier recorded in `manifest/run.json` | `string` | expectations task id | +| `conversationId` | Optional conversation identifier recorded in `manifest/run.json` | `string` | none | ## Manifest Contract @@ -148,8 +155,7 @@ The plugin emits a hybrid output: - `manifest/test-events.jsonl` - `manifest/tests.jsonl` - `manifest/findings.jsonl` - - `manifest/expected.json` when an expectations file is provided - - `project/docs/allure-agent-mode.md` when the project guide is available + - `manifest/expected.json` when expectations are provided `index.md` is the landing page for the run. It includes run identity, expected scope, advisory check summary, process logs, and grouped test links. @@ -162,10 +168,20 @@ Each test markdown file includes: - retry history - advisory findings and rerun guidance when evidence is weak +## Expectations + +The preferred `allure agent` workflow uses inline flags: + +- `--goal ` records the review intent. +- `--expect-tests ` checks visible logical test count. +- `--expect-label name=value`, `--expect-env `, `--expect-test ""`, and `--expect-prefix ` define expected scope. For a newly added test, use `--expect-test ""` so a missing reported test becomes an explicit finding. +- `--expect-step-containing `, `--expect-steps `, `--expect-attachments `, and `--expect-attachment ` define evidence expectations per evidence-target logical test. + +The plugin normalizes inline expectations into `manifest/expected.json`. + ## Expectations File -When `ALLURE_AGENT_EXPECTATIONS` is set, the plugin accepts YAML or JSON, normalizes -it into `manifest/expected.json`, and compares the run against it. +When `--expectations ` or the plugin `expectationsPath` option is set, the plugin accepts YAML or JSON, normalizes it into `manifest/expected.json`, and compares the run against it. Expected top-level fields: @@ -173,6 +189,7 @@ Expected top-level fields: goal: Validate feature A task_id: feature-a expected: + test_count: 3 environments: - default full_names: @@ -197,23 +214,27 @@ notes: Selectors are advisory. The plugin does not fail the run; it records findings in markdown and `manifest/findings.jsonl`. -## Review Loop +## Agent Workflow Pattern -The intended usage pattern is: +Use the smallest workflow that matches the task. For the common change-validation path: -1. Run tests with `allure agent -- `. +1. Run tests with `allure agent --goal --expect-test "" --expect-label name=value --expect-step-containing -- `. 2. Watch `manifest/run.json` and `manifest/test-events.jsonl` while the run is active. 3. Review `index.md` plus the manifest files. 4. If evidence is weak, add steps, attachments, labels, or parameters. -5. Rerun the same scope with the same expectations file. +5. Rerun the same scope with the same expectations. 6. Accept the run or iterate based on advisory findings. +When a prior agent run already captured failed tests, prefer +`allure agent --rerun-latest --rerun-preset failed -- ` or +`allure agent --rerun-from --rerun-preset failed -- ` +instead of spending context reconstructing runner-specific test names. + For small mechanical test changes, use a scoped agent-mode run for the smoke check too. Plain runner commands should be reserved for cases where agent mode is impossible or when you are debugging agent mode itself. -For grouped coverage reviews, prefer one temp output directory and one expectations -file per scope instead of trying to review a whole command matrix from a single run. +For grouped coverage reviews, prefer one scoped expectation set per group instead of trying to review a whole command matrix from a single run. ## Test Enrichment Best Practices @@ -246,25 +267,12 @@ When agent output does not fully model runner-visible failures: - treat the review as partial when suite-load, import, or setup failures are visible outside logical test files - keep console-only conclusions provisional until the missing modeling is understood -## Project Guide - -Projects using the skills flow should keep a short root `AGENTS.md` and a deeper -`docs/allure-agent-mode.md`. - -`AGENTS.md` should route all test-related work to the deeper guide. The deeper guide -should explain: - -- the feature-delivery loop -- the metadata-enrichment loop -- per-run temp expectations and output paths -- meaningful evidence rules -- minimal metadata rules -- future loops like flaky, known-issue, mute, and quality gates - ## Copyable Agent Instructions The generated `AGENTS.md` already contains this guidance for each run. If you want -the same policy in a project-level skill or agent prompt, you can start with: +the same stable policy in a reusable skill or agent prompt, keep version-specific +CLI details out of that reusable body and make agents inspect local CLI help when +support is unclear: ```md ## Allure Agent Mode Instructions @@ -273,15 +281,13 @@ the same policy in a project-level skill or agent prompt, you can start with: - Use `allure agent` for smoke checks too, even when the change is small or mechanical. - Only skip agent mode when it is impossible or when you are debugging agent mode itself. - After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly. -- Use `allure agent latest` to reopen the newest run when `--output` was omitted. -- Use `allure agent state-dir` to inspect where the current project stores its latest-agent state. -- Use `allure agent select --latest` or `allure agent select --from ` to inspect the review-targeted test plan before rerunning. -- Use `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` to rerun only the selected tests. -- Use `--rerun-preset review|failed|unsuccessful|all`, repeated `--rerun-environment `, and repeated `--rerun-label name=value` when you need a narrower rerun selection from the previous output. -- Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`. -- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control. -- Generate or refresh `ALLURE_AGENT_EXPECTATIONS` before each targeted rerun. -- Run tests with `ALLURE_AGENT_OUTPUT` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. +- Use `allure --version`, `allure agent capabilities --json`, and `allure agent --help` before choosing flags when the local CLI surface is unknown. +- Use `allure agent latest` to print the newest output directory and `index.md` path when `--output` was omitted. +- Use `allure agent latest`, `state-dir`, `query`, `select`, and `--rerun-*` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands. +- Use `allure agent query --latest summary|tests|findings|test` or `allure agent query --from ...` to inspect prior output as focused JSON before manually opening raw manifests. +- Use `allure agent select --from --output ` when you want the CLI to write the test plan and print a short summary with the file path, source output, preset, and selected count. +- When rerunning previous failures, use `allure agent --rerun-latest --rerun-preset failed -- ` or `allure agent --rerun-from --rerun-preset failed -- ` instead of manually rebuilding runner-specific test names. +- Run tests with `allure agent` and review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. - Enrich only the intended tests. Add real steps for real setup, actions, and assertions. - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces. - Keep metadata minimal. Add labels or severity only when scope review, debugging, or quality policy uses them. @@ -303,7 +309,7 @@ import { ``` - `buildAgentExpectations(...)` converts a goal plus target/forbidden selectors into - the JSON shape expected by `ALLURE_AGENT_EXPECTATIONS`. + the expectations shape accepted by inline flags, expectations files, and the plugin expectations option. - `loadAgentOutput(...)` reads `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. - `planAgentEnrichmentReview(...)` maps `check_name` values to enrichment actions @@ -325,5 +331,5 @@ The enrichment loop should add only real runtime evidence: Avoid dummy enrichment such as empty wrapper steps, placeholder `"passed"` text attachments, or labels that are never used downstream. -For a fuller policy, remediation mapping, and JS/Vitest examples based on the -existing sandbox tests, see [the enrichment loop guide](../../docs/agent_enrichment_loop.md). +For remediation mapping and JS/Vitest examples based on the existing sandbox +tests, inspect the package tests and generated run `AGENTS.md` guidance. diff --git a/packages/plugin-agent/src/capabilities.ts b/packages/plugin-agent/src/capabilities.ts new file mode 100644 index 00000000000..ff822c9af2e --- /dev/null +++ b/packages/plugin-agent/src/capabilities.ts @@ -0,0 +1,178 @@ +export const AGENT_CAPABILITIES_SCHEMA = "allure-agent-capabilities/v1"; + +export const createAgentCapabilities = () => + ({ + schema: AGENT_CAPABILITIES_SCHEMA, + commands: { + help: { + supported: true, + usage: "allure --version; allure agent --help; allure agent capabilities", + output: ["human", "json"], + }, + run: { + supported: true, + usage: "allure agent [options] -- ", + options: [ + "--config", + "--cwd", + "--output", + "--expectations", + "--goal", + "--task-id", + "--expect-tests", + "--expect-label", + "--expect-env", + "--expect-test", + "--expect-prefix", + "--expect-step-containing", + "--forbid-label", + "--expect-steps", + "--expect-attachments", + "--expect-attachment", + "--environment", + "--environment-name", + "--silent", + "--rerun-from", + "--rerun-latest", + "--rerun-preset", + "--rerun-environment", + "--rerun-label", + ], + }, + latest: { + supported: true, + usage: "allure agent latest [--cwd ]", + output: ["agent output: ", "agent index: /index.md"], + }, + stateDir: { + supported: true, + usage: "allure agent state-dir [--cwd ]", + environmentVariable: "ALLURE_AGENT_STATE_DIR", + }, + select: { + supported: true, + usage: "allure agent select (--latest | --from ) [options]", + presets: ["review", "failed", "unsuccessful", "all"], + filters: ["environment", "label"], + output: ["stdout-testplan-json", "file-testplan-json", "file-summary"], + }, + query: { + supported: true, + usage: "allure agent query (--latest | --from ) [summary|tests|findings|test] [options]", + views: ["summary", "tests", "findings", "test"], + filters: ["status", "environment", "label", "severity", "category", "check", "test"], + output: ["json"], + }, + rerun: { + supported: true, + usage: "allure agent (--rerun-latest | --rerun-from ) [filters] -- ", + presets: ["review", "failed", "unsuccessful", "all"], + filters: ["environment", "label"], + transport: "ALLURE_TESTPLAN_PATH", + }, + }, + expectations: { + inline: { + supported: true, + goal: true, + taskId: true, + expected: { + testCount: true, + labels: true, + environments: true, + fullNames: true, + fullNamePrefixes: true, + }, + forbidden: { + labels: true, + environments: false, + fullNames: false, + fullNamePrefixes: false, + }, + evidence: { + stepNameContains: true, + minSteps: true, + minAttachments: true, + attachmentFilters: ["name", "content-type"], + }, + }, + file: { + supported: true, + formats: ["yaml", "json"], + }, + }, + output: { + automaticTempDirectory: true, + explicitOutputOption: "--output ", + schema: "allure-agent-output/v1", + files: [ + "index.md", + "AGENTS.md", + "manifest/run.json", + "manifest/test-events.jsonl", + "manifest/tests.jsonl", + "manifest/findings.jsonl", + "manifest/expected.json", + "tests//.md", + "artifacts/global/", + ], + }, + unsupported: { + discovery: true, + configureIntegration: true, + executionSignal: true, + compare: true, + flaky: true, + duplicates: true, + stale: true, + suppressions: true, + observe: true, + interrupt: true, + localAgentService: true, + expectationControls: ["--expect-evidence"], + }, + }) as const; + +export const AGENT_TASK_MAP_HELP = `Agent task map: + allure --version + allure agent --help + allure agent capabilities + Setup and capability detection. Use when the local CLI surface is unknown, + generated guidance may be stale, or an agent needs supported flags without + guessing. + + allure agent --goal ... -- + Run a test command with runtime evidence, scope expectations, and + agent-readable artifacts for review, debugging, smoke checks, or validation. + + allure agent latest + Recover the newest agent output directory and index.md when --output was + omitted or a follow-up task needs the previous run. + + allure agent state-dir + Show where project-scoped latest-run pointers are stored. Useful when + latest cannot find a run or CI/sandbox state looks wrong. + + allure agent select --latest + allure agent select --from + Inspect/filter prior results and write an Allure test plan before rerun. + + allure agent query --latest summary + allure agent query --from tests + allure agent query --from findings + Inspect prior agent output as focused JSON without manually loading raw + manifests. Use for summaries, filtered test lists, findings, or one test. + + allure agent --rerun-latest -- + allure agent --rerun-from -- + Rerun the failed, unsuccessful, or selected tests from prior agent output + through Allure test plan support. + +Environment: + ALLURE_AGENT_STATE_DIR= + Override the project-scoped state directory. Useful in CI, sandboxes, or + multi-job setups that need a deterministic shared state location. +`; + +export const isAgentTaskMapHelpRequest = (args: string[]) => + args.length === 2 && args[0] === "agent" && (args[1] === "--help" || args[1] === "-h"); diff --git a/packages/plugin-agent/src/errors.ts b/packages/plugin-agent/src/errors.ts new file mode 100644 index 00000000000..d4f38e9b576 --- /dev/null +++ b/packages/plugin-agent/src/errors.ts @@ -0,0 +1,21 @@ +export class AgentUsageError extends Error { + constructor(message: string) { + super(message); + this.name = "AgentUsageError"; + } +} + +export class AgentExpectationUsageError extends AgentUsageError { + readonly sourceOption?: string; + + constructor(message: string, sourceOption?: string) { + super(message); + this.name = "AgentExpectationUsageError"; + this.sourceOption = sourceOption; + } +} + +export const isAgentUsageError = (error: unknown): error is AgentUsageError => error instanceof AgentUsageError; + +export const isAgentExpectationUsageError = (error: unknown): error is AgentExpectationUsageError => + error instanceof AgentExpectationUsageError; diff --git a/packages/plugin-agent/src/guidance.ts b/packages/plugin-agent/src/guidance.ts index 0a83960dbf7..d6fe062e255 100644 --- a/packages/plugin-agent/src/guidance.ts +++ b/packages/plugin-agent/src/guidance.ts @@ -15,12 +15,28 @@ export type EnrichmentActionDefinition = { }; export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record = { - "invalid-expectations-file": { + "expectations-invalid": { category: "bootstrap-allure", - title: "Repair the expectations file", - guidance: "Regenerate a valid YAML or JSON expectations file before the next enrichment iteration.", + title: "Repair the expectations input", + guidance: "Regenerate valid inline expectations or a valid YAML/JSON expectations file before the next iteration.", }, - "no-visible-tests": { + "expectations-empty": { + category: "narrow-test-scope", + title: "Add recognized expectation controls", + guidance: "Rerun with supported M1 expectation controls or omit expectations for an intentionally broad review.", + }, + "expectations-unsupported-control": { + category: "review-manually", + title: "Use supported expectation controls", + guidance: "Replace unsupported controls with supported M1 flags or report weaker checking explicitly.", + }, + "expectations-weak-goal": { + category: "review-manually", + title: "Use a more specific goal next time", + guidance: + "Base conclusions on observed evidence and rerun with a specific goal when expectation precision matters.", + }, + "no-tests-observed": { category: "bootstrap-allure", title: "Restore Allure result generation", guidance: "Make sure the test command emits Allure results before rerunning the enrichment loop.", @@ -42,22 +58,27 @@ export const ENRICHMENT_ACTIONS_BY_CHECK_NAME: Record --expectations -- `.", - "After each test run, print the `index.md` path from that output directory so collaborators can open the run overview quickly.", - "Use `allure agent latest` to recover the newest output directory when a prior run omitted `--output`.", - "Use `allure agent state-dir` to inspect where the current project stores its latest-agent state.", - "Use `ALLURE_AGENT_STATE_DIR` when you need to override where the current project stores latest-agent state for `latest`, `state-dir`, or `--rerun-latest`.", - "Use `allure agent select --latest` or `allure agent select --from ` to inspect the review-targeted test plan before rerunning.", - "Use `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` to rerun only the selected tests through Allure testplan support. Add `--rerun-preset`, repeated `--rerun-environment`, or repeated `--rerun-label name=value` filters when you need a narrower rerun slice.", - "Inspect `manifest/run.json`, tail `manifest/test-events.jsonl`, then review `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before editing tests.", - "Enrich only the intended tests, rerun the same scope, and compare the rerun against `manifest/expected.json` when present.", - "Accept the rerun only when scope is clean, evidence is strong enough to review, and no high-confidence dummy findings remain.", +export const AGENT_WORKFLOWS_MARKDOWN = `Use the smallest workflow that matches the task. Each workflow has the same shape: when to use it, which agent-mode commands help, and what must be true before you call the task done. + +### Validate A Change + +Use when code or tests changed and you need a user-facing safety conclusion. For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut. + +Commands: + +- \`allure agent --goal --expect-* -- \` + +Done when: + +- the expected scope ran and no forbidden scope appeared +- \`index.md\`, \`manifest/run.json\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\` were reviewed +- the \`index.md\` path was reported +- the changed package build and required static checks passed when this repository guide requires them + +### Add Or Update Tests + +Use when creating or changing tests for a feature, fix, or behavior gap. + +Commands: + +- \`allure agent --goal --expect-tests --expect-test "" --expect-label name=value --expect-step-containing -- \` + +Done when: + +- the tests prove the intended behavior rather than only touching the code path +- scope expectations match the intended feature, issue, or package slice +- each expected test has enough steps or attachments for a reviewer to understand what happened +- weak evidence, scope drift, and unexpected-test findings are fixed or explicitly accepted as out of scope + +### Review Existing Coverage + +Use when auditing a package, command matrix, feature area, or business behavior without necessarily changing tests first. + +Commands: + +- one scoped \`allure agent --goal --expect-* -- \` run per review group + +Done when: + +- the audit is split into reviewable groups, or it is explicitly documented as a broad package-health run +- each group has expectations that describe the intended scope +- runtime artifacts are reviewed before source-only coverage conclusions +- uncovered behavior is recorded as follow-up test work instead of being hidden in a broad pass/fail summary + +### Triage Failures + +Use when tests failed, broke, or runner output does not match agent artifacts. + +Commands: + +- \`allure agent latest\` +- \`allure agent --rerun-latest --rerun-preset failed -- \` +- \`allure agent --rerun-from --rerun-preset failed -- \` + +Done when: + +- failing, broken, or unmodeled runner-visible failures are represented in agent artifacts, or partial modeling is called out explicitly +- \`artifacts/global/stderr.txt\` and global errors were checked when failures are missing from \`manifest/tests.jsonl\` +- reruns use prior agent output instead of hand-built runner test names whenever the runner can consume the generated test plan + +### Rerun A Prior Scope + +Use when prior agent output already identifies failed, unsuccessful, or review-targeted tests and the next run should stay focused. + +Commands: + +- \`allure agent select --latest [--preset review|failed|unsuccessful|all]\` +- \`allure agent select --from [--environment ] [--label name=value]\` +- \`allure agent --rerun-latest -- \` +- \`allure agent --rerun-from -- \` + +Done when: + +- the rerun scope comes from Allure testplan support +- \`--rerun-preset\`, \`--rerun-environment\`, or \`--rerun-label\` filters explain any narrowed selection +- manual test names are used only as a fallback when testplan support is unavailable +- the rerun output is reviewed before making a new conclusion + +### Improve Evidence Quality + +Use when tests pass or fail but the runtime story is too weak to review. + +Commands: + +- \`allure agent --expect-step-containing --expect-steps --expect-attachments -- \` +- \`allure agent --expect-attachment -- \` + +Done when: + +- steps describe real setup, actions, state transitions, or assertions +- attachments contain runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces +- placeholder steps, generic \`"passed"\` attachments, and other dummy evidence are removed +- the same intended scope was rerun and no high-confidence evidence findings remain + +### Recover Or Diagnose Agent Mode + +Use when agent output is missing, the latest run cannot be found, local CLI support is unclear, or state behaves differently in CI or a sandbox. + +Commands: + +- \`allure --version\` +- \`allure agent capabilities --json\` +- \`allure agent --help\` +- \`allure agent latest\` +- \`allure agent state-dir\` +- \`ALLURE_AGENT_STATE_DIR=\` + +Done when: + +- supported local commands and flags are known from capabilities or help output +- the output directory, \`index.md\` path, or state directory is identified, or the reason it is unavailable is documented +- console-only conclusions stay provisional until agent-mode artifacts are available`; + +export const AGENT_COMMAND_TASK_MAP = [ + "`allure --version`, `allure agent capabilities --json`, and `allure agent --help`: setup and capability-detection loop. Use when the local CLI surface is unknown, generated guidance may be stale, or you need to choose supported flags without guessing.", + "`allure agent --goal ... -- `: test review, feature delivery, smoke-check, and coverage loops. Use when a test command needs runtime evidence, scope expectations, and user-facing conclusions based on agent artifacts rather than console output alone.", + "`allure agent latest`: output recovery loop. Use when a previous run omitted `--output`, you need the newest output directory and `index.md` path, or a follow-up task needs prior output before selecting or rerunning tests.", + "`allure agent state-dir`: tooling diagnosis loop. Use when `latest` cannot find a run, CI or sandbox state looks wrong, or you need to explain where project-scoped latest pointers are stored.", + "`allure agent query --latest summary|tests|findings|test` / `allure agent query --from ...`: output inspection loop. Use when you need a focused JSON summary, filtered tests, filtered findings, or one test from prior agent output without manually loading raw manifests first.", + "`allure agent select --latest` / `allure agent select --from `: rerun-planning loop. Use when you need to inspect, filter, or write the Allure test plan from prior results before executing another run. Without `--output`, stdout is raw testplan JSON; with `--output`, stdout summarizes the file path, source output, preset, and selected count.", + "`allure agent --rerun-latest` / `allure agent --rerun-from `: focused retry loop. Use when prior output already identifies failed, unsuccessful, or review-targeted tests and you should rerun that slice through Allure testplan support instead of rebuilding runner-specific test names.", + "`ALLURE_AGENT_STATE_DIR=`: CI and sandbox state-control loop. Use when multiple jobs need a deterministic state location, the default temp state is not shared, or the default state directory is not writable.", ] as const; export const AGENT_VERIFICATION_RULES = [ @@ -154,18 +313,6 @@ export const AGENT_VERIFICATION_RULES = [ "After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly.", ] as const; -export const AGENT_SMALL_TEST_CHANGE_WORKFLOW = [ - "Create a fresh temp `ALLURE_AGENT_OUTPUT` and `ALLURE_AGENT_EXPECTATIONS` for the touched scope before closing the task.", - "Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction.", - "Review `manifest/run.json`, `manifest/test-events.jsonl`, `index.md`, `manifest/tests.jsonl`, and `manifest/findings.jsonl` before making any final claim.", -] as const; - -export const AGENT_COVERAGE_REVIEW_WORKFLOW = [ - "Split package or business-logic audits into scoped groups and give each group its own temp output directory and expectations file.", - "Review agent-mode artifacts first for each group, then inspect source code only after the runtime evidence shows what actually ran.", - "Treat grouped coverage review as incomplete until each scoped run has matching expectations or an explicit note that the audit is intentionally broad.", -] as const; - export const AGENT_TEST_ENRICHMENT_BEST_PRACTICES = [ "Steps must wrap real actions, state transitions, or assertions. Prefer a small setup/action/assertion narrative over event-by-event step spam.", "Attachments must capture real runtime evidence from that execution: payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces.", @@ -205,14 +352,17 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions - Use \`allure agent\` for smoke checks too, even when the change is small or mechanical. - Only skip agent mode when it is impossible or when you are debugging agent mode itself. - After each agent-mode test run, print the \`index.md\` path from that run's output directory so users can open the run overview quickly. -- Use \`ALLURE_AGENT_*\` with \`allure run\` only as the lower-level fallback when you need direct environment control. -- Use \`allure agent latest\` to reopen the newest run when \`--output\` was omitted. +- Use \`allure agent latest\` to print the newest output directory and \`index.md\` path when \`--output\` was omitted. +- Use \`allure agent capabilities --json\` when you need structured supported-command, expectation, output, rerun, and unsupported-feature data without scraping help text. - Use \`allure agent state-dir\` to inspect where the current project stores its latest-agent state. -- Use \`allure agent select --latest\` or \`allure agent select --from \` to inspect the review-targeted test plan before rerunning. +- Use \`allure agent latest\`, \`state-dir\`, \`query\`, \`select\`, and \`--rerun-*\` according to their loop/task/problem mapping instead of treating them as interchangeable helper commands. +- Use \`allure agent query --latest summary|tests|findings|test\` or \`allure agent query --from ...\` to inspect prior output as focused JSON before manually opening raw manifests. +- Use \`allure agent select --latest\` or \`allure agent select --from \` to inspect the review-targeted test plan before rerunning; add \`--output \` when you want the CLI to write the plan and print a short selection summary. - Use \`allure agent --rerun-latest -- \` or \`allure agent --rerun-from -- \` to rerun only the selected tests. +- When rerunning previous failures, use \`allure agent --rerun-latest --rerun-preset failed -- \` or \`allure agent --rerun-from --rerun-preset failed -- \` instead of manually rebuilding runner-specific test names. - Use \`--rerun-preset review|failed|unsuccessful|all\`, repeated \`--rerun-environment \`, and repeated \`--rerun-label name=value\` when you need a narrower rerun selection from the previous output. - Use \`ALLURE_AGENT_STATE_DIR\` when you need to override where the current project stores latest-agent state for \`latest\`, \`state-dir\`, or \`--rerun-latest\`. -- Generate or refresh \`ALLURE_AGENT_EXPECTATIONS\` before each targeted rerun. +- Prefer inline \`allure agent\` expectation flags such as \`--goal\`, \`--expect-tests\`, \`--expect-test\`, \`--expect-label\`, and \`--expect-step-containing\`; use \`--expectations \` only when flags become awkward. - Run tests with \`allure agent\` and review \`manifest/run.json\`, \`manifest/test-events.jsonl\`, \`index.md\`, \`manifest/tests.jsonl\`, and \`manifest/findings.jsonl\`. - Enrich only the intended tests. Add real steps for real setup, actions, and assertions. - Attach only real runtime evidence such as payloads, responses, screenshots, DOM snapshots, diffs, logs, or traces. @@ -222,32 +372,21 @@ export const AGENT_INSTRUCTIONS_TEMPLATE = `## Allure Agent Mode Instructions const renderBullets = (items: readonly string[]) => items.map((item) => `- ${item}`).join("\n"); -const renderNumbered = (items: readonly string[]) => items.map((item, index) => `${index + 1}. ${item}`).join("\n"); - const renderRemediationGuide = () => Object.entries(ENRICHMENT_ACTIONS_BY_CHECK_NAME) .map(([checkName, action]) => `- \`${checkName}\`: ${action.title}. ${action.guidance}`) .join("\n"); -export const renderAgentsGuide = (projectGuidePath?: string) => +export const renderAgentsGuide = () => `# AGENTS Guide ## Reading Order -${ - projectGuidePath - ? `1. Read [project guidance](${projectGuidePath}) first for repo-specific testing conventions and loop expectations. -2. Read \`manifest/run.json\` for the current phase, counts, and modeling summary. -3. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active. -4. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings. -5. Open the relevant file under \`tests//.md\` for evidence review. -6. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.` - : `1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary. +1. Read \`manifest/run.json\` for the current phase, counts, and modeling summary. 2. Tail \`manifest/test-events.jsonl\` for the newest structured updates while the run is active. 3. Open \`index.md\` for run-level status, scope summary, and the highest-priority findings. 4. Open the relevant file under \`tests//.md\` for evidence review. -5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr.` -} +5. Follow links into \`.assets/\` for test-scoped artifacts and into \`artifacts/global/\` for process logs such as stdout and stderr. ## Directory Contract @@ -256,28 +395,23 @@ ${ - \`manifest/test-events.jsonl\` is the append-only live event stream for machine consumers during the run. - \`manifest/tests.jsonl\` contains one logical test summary per line. - \`manifest/findings.jsonl\` contains one advisory finding per line. -- \`manifest/expected.json\` is copied from \`ALLURE_AGENT_EXPECTATIONS\` when provided. -- \`project/docs/allure-agent-mode.md\` is copied from the project when available so each run keeps the guide used for that execution. +- \`manifest/expected.json\` contains normalized expectations from inline flags or \`--expectations \` when provided. - \`tests//.md\` contains one logical test per file. - Retries from the same run are nested inside the same logical test file. - \`tests//.assets/\` contains copied attachments for that logical test. - \`artifacts/global/\` contains copied global artifacts for the whole run. -## Enrichment Loop Workflow +## Command Task Map -${renderNumbered(AGENT_ENRICHMENT_WORKFLOW)} +${renderBullets(AGENT_COMMAND_TASK_MAP)} -## Verification Standard +## Agent Workflows -${renderBullets(AGENT_VERIFICATION_RULES)} +${AGENT_WORKFLOWS_MARKDOWN} -## Small Test Change Workflow - -${renderNumbered(AGENT_SMALL_TEST_CHANGE_WORKFLOW)} - -## Coverage Review Workflow +## Verification Standard -${renderNumbered(AGENT_COVERAGE_REVIEW_WORKFLOW)} +${renderBullets(AGENT_VERIFICATION_RULES)} ## Test Enrichment Best Practices diff --git a/packages/plugin-agent/src/harness.ts b/packages/plugin-agent/src/harness.ts index ccef9cfcb16..333fefce79a 100644 --- a/packages/plugin-agent/src/harness.ts +++ b/packages/plugin-agent/src/harness.ts @@ -10,6 +10,15 @@ export type AgentFindingCategory = "bootstrap" | "scope" | "metadata" | "evidenc export type AgentScopeMatch = "match" | "unexpected" | "forbidden" | "unknown"; export type AgentAcceptanceStatus = "accept" | "iterate" | "reject"; export type AgentAcceptanceImpact = "advisory" | "iterate" | "reject"; +export type AgentExpectationResultStatus = + | "matched" + | "failed" + | "partial" + | "degraded" + | "unsupported" + | "unavailable" + | "not_requested"; +export type AgentExpectationResultImpact = "accept" | "reject" | "iterate" | "advisory"; export type AgentEnrichmentActionCategory = EnrichmentActionCategory; export type AgentExpectationSelector = { @@ -17,6 +26,18 @@ export type AgentExpectationSelector = { full_names?: string[]; full_name_prefixes?: string[]; label_values?: Record; + test_count?: number; +}; + +export type AgentEvidenceExpectations = { + required?: boolean; + min_steps?: number; + min_attachments?: number; + step_name_contains?: string[]; + attachments?: Array<{ + name?: string; + content_type?: string; + }>; }; export type AgentExpectations = { @@ -24,6 +45,7 @@ export type AgentExpectations = { task_id?: string; expected?: AgentExpectationSelector; forbidden?: AgentExpectationSelector; + evidence?: AgentEvidenceExpectations; notes?: string[]; }; @@ -107,7 +129,6 @@ export type AgentRunManifest = { findings_manifest: string; test_events_manifest?: string; expected_manifest: string | null; - project_guide: string | null; process_logs: { stdout: string | null; stderr: string | null; @@ -158,6 +179,28 @@ export type AgentRunManifest = { }; }; expectations_present: boolean; + expectations: AgentExpectations | null; + expectation_result: { + schema_version: "allure-agent-expectation-result/v1"; + status: AgentExpectationResultStatus; + impact: AgentExpectationResultImpact; + source: { + kind: "inline" | "file" | "none"; + path: string | null; + }; + recognized_control_count: number; + unsupported_controls: string[]; + degraded_controls: string[]; + summary: { + expected_tests: number; + observed_tests: number; + missing_expected: number; + forbidden_observed: number; + unexpected_observed: number; + evidence_mismatches: number; + }; + finding_ids: string[]; + }; check_summary: { total: number; countsBySeverity: Record; @@ -195,17 +238,48 @@ export type AgentTestManifestLine = { }; export type AgentFindingManifestLine = { + schema_version?: "allure-agent-finding/v2"; + check_id?: string; + instance_id?: string; finding_id: string; - subject: string; + subject: + | string + | { + type: "run" | "test" | "environment" | "attachment" | "global"; + id?: string; + path?: string; + full_name?: string; + environment?: string; + }; + subject_ref?: string; + subject_type?: "run" | "test"; severity: AgentFindingSeverity; + impact?: AgentAcceptanceImpact; category: AgentFindingCategory; check_name: string; + title?: string; message: string; explanation: string; evidence_paths: string[]; remediation_hint: string; expected_reference?: string; confidence?: number; + expected?: Record; + observed?: Record; + evidence?: { + paths?: string[]; + }; + action?: string; + legacy?: { + finding_id: string; + subject: string; + subject_type?: "run" | "test"; + check_name: string; + explanation?: string; + evidence_paths?: string[]; + remediation_hint: string; + expected_reference?: string; + }; }; export type AgentOutputBundle = { @@ -284,21 +358,28 @@ export const AGENT_ENRICHMENT_ACTIONS: Record = O ) as Record; export const SCOPE_REJECTING_CHECKS = [ - "missing-expected-test", - "missing-expected-prefix", - "missing-expected-environment", + "expected-test-missing", + "expected-count-mismatch", + "expected-prefix-missing", + "expected-label-missing", + "expected-environment-missing", + "no-tests-observed", "unexpected-environment", - "forbidden-selector-match", + "forbidden-label-observed", "unexpected-test", ] as const; export const ITERATION_REQUIRED_CHECKS = [ - "invalid-expectations-file", - "no-visible-tests", + "expectations-invalid", + "expectations-empty", + "expectations-unsupported-control", "runner-failures-outside-logical-results", - "missing-expected-label-selector", "metadata-mismatch", "history-id-collision", + "expected-step-containing-missing", + "insufficient-expected-steps", + "insufficient-expected-attachments", + "missing-expected-attachment", "failed-without-useful-steps", "failed-without-attachments", "nontrivial-run-with-empty-trace", @@ -322,6 +403,28 @@ const IMPACT_ORDER: Record = { const uniqueValues = (values: string[]) => Array.from(new Set(values)); +const checkNameForFinding = (finding: AgentFindingManifestLine) => finding.check_id ?? finding.check_name; + +const subjectRefForFinding = (finding: AgentFindingManifestLine) => { + if (finding.subject_ref) { + return finding.subject_ref; + } + + if (typeof finding.subject === "string") { + return finding.subject; + } + + return finding.subject.path ?? finding.subject.id ?? finding.subject.type; +}; + +const subjectTypeForFinding = (finding: AgentFindingManifestLine): "run" | "test" => + finding.subject_type ?? + (typeof finding.subject === "object" && finding.subject.type === "test" + ? "test" + : subjectRefForFinding(finding) === "run" + ? "run" + : "test"); + const normalizeStringArray = (value?: string | string[]) => { if (typeof value === "string") { return value.length ? [value] : []; @@ -426,18 +529,24 @@ const impactForFinding = ( finding: AgentFindingManifestLine, antiDummyConfidenceThreshold: number, ): AgentAcceptanceImpact => { - if (SCOPE_REJECTING_CHECKS.includes(finding.check_name as (typeof SCOPE_REJECTING_CHECKS)[number])) { + if (finding.impact === "reject" || finding.impact === "iterate" || finding.impact === "advisory") { + return finding.impact; + } + + const checkName = checkNameForFinding(finding); + + if (SCOPE_REJECTING_CHECKS.includes(checkName as (typeof SCOPE_REJECTING_CHECKS)[number])) { return "reject"; } if ( - ANTI_DUMMY_CHECKS.includes(finding.check_name as (typeof ANTI_DUMMY_CHECKS)[number]) && + ANTI_DUMMY_CHECKS.includes(checkName as (typeof ANTI_DUMMY_CHECKS)[number]) && (finding.confidence ?? 0) >= antiDummyConfidenceThreshold ) { return "reject"; } - if (ITERATION_REQUIRED_CHECKS.includes(finding.check_name as (typeof ITERATION_REQUIRED_CHECKS)[number])) { + if (ITERATION_REQUIRED_CHECKS.includes(checkName as (typeof ITERATION_REQUIRED_CHECKS)[number])) { return "iterate"; } @@ -463,7 +572,7 @@ export const buildAgentExpectations = (input: AgentHarnessRequest): AgentExpecta }; export const mapFindingToEnrichmentAction = (finding: AgentFindingManifestLine | string): AgentEnrichmentAction => { - const checkName = typeof finding === "string" ? finding : finding.check_name; + const checkName = typeof finding === "string" ? finding : checkNameForFinding(finding); const mapped = AGENT_ENRICHMENT_ACTIONS[checkName]; return mapped ?? { ...FALLBACK_ACTION, checkName }; @@ -498,17 +607,18 @@ export const planAgentEnrichmentReview = ( const plan = sortPlan( output.findings.map((finding) => { const action = mapFindingToEnrichmentAction(finding); - const matchedTest = testsByPath.get(finding.subject); + const subject = subjectRefForFinding(finding); + const matchedTest = testsByPath.get(subject); return { ...action, - subject: finding.subject, - subjectType: finding.subject === "run" ? "run" : "test", + subject, + subjectType: subjectTypeForFinding(finding), severity: finding.severity, message: finding.message, explanation: finding.explanation, - remediationHint: finding.remediation_hint, - evidencePaths: finding.evidence_paths, + remediationHint: finding.action ?? finding.remediation_hint, + evidencePaths: finding.evidence?.paths ?? finding.evidence_paths, expectedReference: finding.expected_reference, confidence: finding.confidence, acceptanceImpact: impactForFinding(finding, antiDummyConfidenceThreshold), @@ -525,7 +635,7 @@ export const planAgentEnrichmentReview = ( if (!output.run.expectations_present) { notes.push( - "Generate ALLURE_AGENT_EXPECTATIONS before the next enrichment iteration so scope checks are comparable.", + "Declare inline expectations or provide an expectations file before the next enrichment iteration so scope checks are comparable.", ); } diff --git a/packages/plugin-agent/src/index.ts b/packages/plugin-agent/src/index.ts index 57de8d402c7..b7ae6d8553b 100644 --- a/packages/plugin-agent/src/index.ts +++ b/packages/plugin-agent/src/index.ts @@ -1,3 +1,18 @@ -export { type AgentPluginOptions } from "./model.js"; +export { + type AgentAttachmentExpectationInput, + type AgentEvidenceExpectationInput, + type AgentExpectationSelectorInput, + type AgentExpectationsInput, + type AgentPluginOptions, + parseAgentExpectations, +} from "./model.js"; +export * from "./capabilities.js"; +export * from "./errors.js"; export * from "./harness.js"; +export * from "./inline-expectations.js"; +export * from "./invalid-output.js"; +export * from "./paths.js"; +export * from "./query.js"; +export * from "./selection.js"; +export * from "./state.js"; export { AgentPlugin as default } from "./plugin.js"; diff --git a/packages/plugin-agent/src/inline-expectations.ts b/packages/plugin-agent/src/inline-expectations.ts new file mode 100644 index 00000000000..cf27ad8b7f0 --- /dev/null +++ b/packages/plugin-agent/src/inline-expectations.ts @@ -0,0 +1,295 @@ +import { readFile } from "node:fs/promises"; +import { resolve } from "node:path"; + +import { AgentExpectationUsageError, AgentUsageError } from "./errors.js"; +import type { AgentAttachmentExpectationInput, AgentExpectationsInput } from "./model.js"; +import { parseAgentExpectations } from "./model.js"; +import { isPathInside } from "./paths.js"; + +type SingleStringOptionValue = string | string[] | undefined; + +export type AgentInlineExpectationOptions = { + goal?: SingleStringOptionValue; + taskId?: SingleStringOptionValue; + expectTests?: SingleStringOptionValue; + expectLabels?: string[]; + expectEnvironments?: string[]; + expectFullNames?: string[]; + expectPrefixes?: string[]; + forbidLabels?: string[]; + expectStepContains?: string[]; + expectSteps?: SingleStringOptionValue; + expectAttachments?: SingleStringOptionValue; + expectAttachmentFilters?: string[]; +}; + +const readNonNegativeInteger = (value: string | undefined, optionName: string): number | undefined => { + if (value === undefined) { + return undefined; + } + + if (!/^\d+$/.test(value)) { + throw new AgentExpectationUsageError(`${optionName} must be a non-negative integer`, optionName); + } + + const parsed = Number(value); + + if (!Number.isSafeInteger(parsed)) { + throw new AgentExpectationUsageError(`${optionName} must be a non-negative integer`, optionName); + } + + return parsed; +}; + +const readPositiveInteger = (value: string | undefined, optionName: string): number | undefined => { + if (value === undefined) { + return undefined; + } + + if (!/^[1-9]\d*$/.test(value)) { + throw new AgentExpectationUsageError(`${optionName} must be a positive integer`, optionName); + } + + const parsed = Number(value); + + if (!Number.isSafeInteger(parsed)) { + throw new AgentExpectationUsageError(`${optionName} must be a positive integer`, optionName); + } + + return parsed; +}; + +const readSingleStringOption = (value: SingleStringOptionValue, optionName: string): string | undefined => { + const values = Array.isArray(value) ? value : typeof value === "string" ? [value] : []; + + if (values.length > 1) { + throw new AgentExpectationUsageError(`Duplicate ${optionName} is not allowed`, optionName); + } + + if (!values.length) { + return undefined; + } + + const normalized = values[0].trim(); + + if (!normalized) { + throw new AgentExpectationUsageError(`${optionName} value must be non-empty`, optionName); + } + + return normalized; +}; + +const parseNameValue = (value: string, optionName: string, example: string) => { + const parts = value.split("="); + + if (parts.length !== 2) { + throw new AgentExpectationUsageError( + `Invalid ${optionName} ${JSON.stringify(value)}. Expected ${example}`, + optionName, + ); + } + + const name = parts[0].trim(); + const filterValue = parts[1].trim(); + + if (!name || !filterValue) { + throw new AgentExpectationUsageError( + `Invalid ${optionName} ${JSON.stringify(value)}. Expected ${example}`, + optionName, + ); + } + + return { + name, + value: filterValue, + }; +}; + +const addLabelValues = (target: Record, values: string[] | undefined, optionName: string) => { + for (const rawValue of values ?? []) { + const { name, value } = parseNameValue(rawValue, optionName, "the form name=value, for example module=cli"); + const current = target[name] ?? []; + + if (!current.includes(value)) { + current.push(value); + } + + target[name] = current; + } +}; + +const normalizeStringValues = (values: string[] | undefined, optionName: string) => { + const normalized: string[] = []; + + for (const value of values ?? []) { + const trimmed = value.trim(); + + if (!trimmed) { + throw new AgentExpectationUsageError(`${optionName} value must be non-empty`, optionName); + } + + normalized.push(trimmed); + } + + return normalized; +}; + +export const buildAgentInlineExpectations = ( + options: AgentInlineExpectationOptions, +): AgentExpectationsInput | undefined => { + const expectedLabels: Record = {}; + const forbiddenLabels: Record = {}; + const expected: NonNullable = {}; + const forbidden: NonNullable = {}; + const evidence: NonNullable = {}; + const attachmentFilters: AgentAttachmentExpectationInput[] = []; + + addLabelValues(expectedLabels, options.expectLabels, "--expect-label"); + addLabelValues(forbiddenLabels, options.forbidLabels, "--forbid-label"); + + const expectTests = readNonNegativeInteger(readSingleStringOption(options.expectTests, "--expect-tests"), "--expect-tests"); + const expectSteps = readPositiveInteger(readSingleStringOption(options.expectSteps, "--expect-steps"), "--expect-steps"); + const expectAttachments = readPositiveInteger( + readSingleStringOption(options.expectAttachments, "--expect-attachments"), + "--expect-attachments", + ); + const expectedEnvironments = normalizeStringValues(options.expectEnvironments, "--expect-env"); + const expectedFullNames = normalizeStringValues(options.expectFullNames, "--expect-test"); + const expectedPrefixes = normalizeStringValues(options.expectPrefixes, "--expect-prefix"); + const expectedStepContains = normalizeStringValues(options.expectStepContains, "--expect-step-containing"); + + for (const rawValue of options.expectAttachmentFilters ?? []) { + const parsed = rawValue.includes("=") + ? parseNameValue( + rawValue, + "--expect-attachment", + "a file name or a filter such as name=trace.zip or content-type=application/json", + ) + : { name: "name", value: rawValue.trim() }; + const normalizedName = parsed.name.toLowerCase().replace(/_/g, "-"); + + if (!parsed.value) { + throw new AgentExpectationUsageError( + "Invalid --expect-attachment value. Expected a non-empty file name or filter such as name=trace.zip", + "--expect-attachment", + ); + } + + if (normalizedName === "name") { + attachmentFilters.push({ name: parsed.value }); + continue; + } + + if (normalizedName === "content-type" || normalizedName === "type") { + attachmentFilters.push({ content_type: parsed.value }); + continue; + } + + throw new AgentExpectationUsageError( + `Invalid --expect-attachment key ${JSON.stringify(parsed.name)}. Expected name or content-type`, + "--expect-attachment", + ); + } + + if (expectTests !== undefined) { + expected.test_count = expectTests; + } + + if (expectedEnvironments.length) { + expected.environments = expectedEnvironments; + } + + if (expectedFullNames.length) { + expected.full_names = expectedFullNames; + } + + if (expectedPrefixes.length) { + expected.full_name_prefixes = expectedPrefixes; + } + + if (Object.keys(expectedLabels).length) { + expected.label_values = expectedLabels; + } + + if (Object.keys(forbiddenLabels).length) { + forbidden.label_values = forbiddenLabels; + } + + if (expectSteps !== undefined) { + evidence.min_steps = expectSteps; + } + + if (expectAttachments !== undefined) { + evidence.min_attachments = expectAttachments; + } + + if (expectedStepContains.length) { + evidence.step_name_contains = expectedStepContains; + } + + if (attachmentFilters.length) { + evidence.attachments = attachmentFilters; + } + + if ( + expected.test_count === 0 && + (expected.environments?.length || + expected.full_names?.length || + expected.full_name_prefixes?.length || + Object.keys(expected.label_values ?? {}).length || + evidence.step_name_contains?.length || + evidence.min_steps !== undefined || + evidence.min_attachments !== undefined || + evidence.attachments?.length) + ) { + throw new AgentExpectationUsageError( + "--expect-tests 0 cannot be combined with positive scope or evidence expectations", + "--expect-tests", + ); + } + + const inlineExpectations: AgentExpectationsInput = { + ...(readSingleStringOption(options.goal, "--goal") ? { goal: readSingleStringOption(options.goal, "--goal") } : {}), + ...(readSingleStringOption(options.taskId, "--task-id") + ? { task_id: readSingleStringOption(options.taskId, "--task-id") } + : {}), + ...(Object.keys(expected).length ? { expected } : {}), + ...(Object.keys(forbidden).length ? { forbidden } : {}), + ...(Object.keys(evidence).length ? { evidence } : {}), + }; + + return Object.keys(inlineExpectations).length ? inlineExpectations : undefined; +}; + +export const validateAgentExpectationsFile = async (params: { + cwd: string; + output?: string; + expectations?: string; +}) => { + const { cwd, output, expectations } = params; + + if (!expectations) { + return; + } + + const expectationsPath = resolve(cwd, expectations); + + if (output) { + const outputDir = resolve(cwd, output); + + if (isPathInside(outputDir, expectationsPath)) { + throw new AgentUsageError( + `--expectations path ${JSON.stringify(expectationsPath)} must not be inside the agent output directory ${JSON.stringify(outputDir)}`, + ); + } + } + + try { + parseAgentExpectations(await readFile(expectationsPath, "utf-8")); + } catch (error) { + throw new AgentExpectationUsageError( + `Could not load expectations from ${expectationsPath}: ${(error as Error).message}`, + "--expectations", + ); + } +}; diff --git a/packages/plugin-agent/src/invalid-output.ts b/packages/plugin-agent/src/invalid-output.ts new file mode 100644 index 00000000000..db7092dd1d2 --- /dev/null +++ b/packages/plugin-agent/src/invalid-output.ts @@ -0,0 +1,259 @@ +import { mkdir, rm, writeFile } from "node:fs/promises"; +import { dirname, join } from "node:path"; + +import { AgentExpectationUsageError } from "./errors.js"; + +const isFileNotFoundError = (error: unknown): error is NodeJS.ErrnoException => + typeof error === "object" && error !== null && "code" in error && error.code === "ENOENT"; + +const emptyAgentStats = () => ({ + total: 0, + failed: 0, + broken: 0, + skipped: 0, + unknown: 0, + passed: 0, +}); + +const writeJson = async (path: string, value: unknown) => { + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, `${JSON.stringify(value, null, 2)}\n`, "utf-8"); +}; + +const writeText = async (path: string, value: string) => { + await mkdir(dirname(path), { recursive: true }); + await writeFile(path, value, "utf-8"); +}; + +const writeJsonl = async (path: string, values: unknown[]) => { + await writeText(path, values.map((value) => JSON.stringify(value)).join("\n") + (values.length ? "\n" : "")); +}; + +export const createInvalidExpectationFinding = (params: { message: string; sourceOption?: string }) => { + const action = "Fix expectation syntax before using the run as validation."; + + return { + schema_version: "allure-agent-finding/v2", + check_id: "expectations-invalid", + instance_id: "F0001", + severity: "high", + impact: "reject", + confidence: 1, + category: "bootstrap", + title: "Expectation input is invalid", + message: params.message, + subject: { + type: "run", + }, + expected: params.sourceOption ? { option: params.sourceOption } : { expectations: "valid M1 expectation input" }, + observed: { + error: params.message, + execution_skipped: true, + }, + evidence: { + paths: ["manifest/run.json"], + }, + action, + source: params.sourceOption + ? { + kind: "inline-option", + option: params.sourceOption, + } + : undefined, + legacy: { + finding_id: "F0001", + check_name: "expectations-invalid", + remediation_hint: action, + }, + finding_id: "F0001", + subject_ref: "run", + subject_type: "run", + check_name: "expectations-invalid", + explanation: "The agent expectation controls could not be parsed, so the test command was not executed.", + evidence_paths: ["manifest/run.json"], + remediation_hint: action, + expected_reference: params.sourceOption, + }; +}; + +export const writeInvalidAgentExpectationOutput = async (params: { + outputDir: string; + command: string; + error: AgentExpectationUsageError; +}) => { + const { outputDir, command, error } = params; + const generatedAt = new Date().toISOString(); + const finding = createInvalidExpectationFinding({ + message: error.message, + sourceOption: error.sourceOption, + }); + const stats = emptyAgentStats(); + + try { + await rm(outputDir, { recursive: true }); + } catch (rmError) { + if (!isFileNotFoundError(rmError)) { + console.error("could not clean output directory", rmError); + } + } + + const runManifest = { + schema_version: "allure-agent-output/v1", + report_uuid: null, + generated_at: generatedAt, + phase: "done", + command, + actual_exit_code: null, + original_exit_code: null, + exit_code: null, + summary: { + stats, + modeled_stats: stats, + unmodeled_from_stats: stats, + compact: { + visible_results: 0, + logical_tests: 0, + unmodeled_visible_results: 0, + runner_failures_outside_logical_tests: 0, + completeness: "complete", + findings: 1, + }, + duration_ms: { + total: 0, + average: 0, + max: 0, + }, + environments: [], + }, + modeling: { + completeness: "complete", + reasons: ["test command was skipped because agent expectations were invalid"], + modeledStats: stats, + unmodeledFromStats: stats, + runnerFailures: { + total: 0, + globalErrors: 0, + stderrActionable: 0, + samples: [], + }, + stderr: { + actionableCount: 0, + actionableSamples: [], + noisyWarningCount: 0, + noisyWarningSamples: [], + }, + compact: { + visible_results: 0, + logical_tests: 0, + unmodeled_visible_results: 0, + runner_failures_outside_logical_tests: 0, + completeness: "complete", + }, + }, + paths: { + index_md: "index.md", + agents_md: "AGENTS.md", + tests_manifest: "manifest/tests.jsonl", + findings_manifest: "manifest/findings.jsonl", + test_events_manifest: "manifest/test-events.jsonl", + expected_manifest: null, + process_logs: { + stdout: null, + stderr: null, + }, + }, + expectations_present: false, + expectations: null, + expectation_result: { + schema_version: "allure-agent-expectation-result/v1", + status: "unavailable", + impact: "reject", + source: { + kind: "none", + path: null, + }, + recognized_control_count: 0, + unsupported_controls: [], + degraded_controls: [], + summary: { + expected_tests: 0, + observed_tests: 0, + missing_expected: 0, + forbidden_observed: 0, + unexpected_observed: 0, + evidence_mismatches: 0, + }, + finding_ids: ["F0001"], + }, + check_summary: { + total: 1, + countsBySeverity: { + high: 1, + warning: 0, + info: 0, + }, + countsByCategory: { + bootstrap: 1, + scope: 0, + metadata: 0, + evidence: 0, + smells: 0, + }, + }, + agent_context: { + agent_name: null, + loop_id: null, + task_id: null, + conversation_id: null, + }, + }; + const index = [ + "# Allure Agent Run", + "", + "- Phase: done", + `- Command: ${command || "(not executed)"}`, + "- Exit code: not available", + "", + "## Expectation Result", + "", + "- Status: unavailable", + "- Impact: reject", + "- Recognized controls: 0", + "- Summary: expectation input was invalid; test execution was skipped", + "", + "## Findings", + "", + `- [HIGH][reject][bootstrap] ${finding.title}`, + ` Expected: ${error.sourceOption ?? "valid M1 expectation input"}`, + ` Observed: ${error.message}`, + ` Action: ${finding.action}`, + "", + "## Machine-Readable Artifacts", + "", + "- Run Manifest: [manifest/run.json](manifest/run.json)", + "- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)", + "", + ].join("\n"); + + await Promise.all([ + writeJson(join(outputDir, "manifest", "run.json"), runManifest), + writeJsonl(join(outputDir, "manifest", "findings.jsonl"), [finding]), + writeJsonl(join(outputDir, "manifest", "tests.jsonl"), []), + writeJsonl(join(outputDir, "manifest", "test-events.jsonl"), []), + writeText(join(outputDir, "index.md"), index), + writeText( + join(outputDir, "AGENTS.md"), + [ + "# Allure Agent Output", + "", + "Read `index.md`, `manifest/run.json`, and `manifest/findings.jsonl` before using this run.", + "", + ].join("\n"), + ), + ]); + + return { + outputDir, + generatedAt, + }; +}; diff --git a/packages/plugin-agent/src/model.ts b/packages/plugin-agent/src/model.ts index 24374ceda4a..47a32c33e04 100644 --- a/packages/plugin-agent/src/model.ts +++ b/packages/plugin-agent/src/model.ts @@ -1,3 +1,52 @@ +import { parse } from "yaml"; + +export type AgentExpectationSelectorInput = { + environments?: string[]; + full_names?: string[]; + full_name_prefixes?: string[]; + label_values?: Record; + test_count?: number; +}; + +export type AgentAttachmentExpectationInput = { + name?: string; + content_type?: string; +}; + +export type AgentEvidenceExpectationInput = { + required?: boolean; + min_steps?: number; + min_attachments?: number; + step_name_contains?: string[]; + attachments?: AgentAttachmentExpectationInput[]; +}; + +export type AgentExpectationsInput = { + goal?: string; + task_id?: string; + expected?: AgentExpectationSelectorInput; + forbidden?: AgentExpectationSelectorInput; + evidence?: AgentEvidenceExpectationInput; + notes?: string | string[]; +}; + export type AgentPluginOptions = { outputDir?: string; + expectationsPath?: string; + expectations?: AgentExpectationsInput; + command?: string; + agentName?: string; + loopId?: string; + taskId?: string; + conversationId?: string; +}; + +export const parseAgentExpectations = (rawContent: string): AgentExpectationsInput => { + const parsed = parse(rawContent) as AgentExpectationsInput; + + if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { + throw new Error("Expected a YAML or JSON object"); + } + + return parsed; }; diff --git a/packages/plugin-agent/src/paths.ts b/packages/plugin-agent/src/paths.ts new file mode 100644 index 00000000000..d181c06e490 --- /dev/null +++ b/packages/plugin-agent/src/paths.ts @@ -0,0 +1,14 @@ +import { join, relative } from "node:path"; + +export const isPathInside = (parentPath: string, candidatePath: string) => { + const rel = relative(parentPath, candidatePath); + + return rel === "" || (!rel.startsWith("..") && rel !== "." && !rel.startsWith("../")); +}; + +export const resolveAgentIndexPath = (outputDir: string) => join(outputDir, "index.md"); + +export const formatAgentOutputLinks = (outputDir: string) => [ + `agent output: ${outputDir}`, + `agent index: ${resolveAgentIndexPath(outputDir)}`, +]; diff --git a/packages/plugin-agent/src/plugin.ts b/packages/plugin-agent/src/plugin.ts index 588fbcdd277..fd84c668ba5 100644 --- a/packages/plugin-agent/src/plugin.ts +++ b/packages/plugin-agent/src/plugin.ts @@ -1,6 +1,6 @@ import { appendFile, mkdir, readFile, rename, rm, writeFile } from "node:fs/promises"; import { basename, dirname, extname, join, relative, resolve } from "node:path"; -import process, { env } from "node:process"; +import process from "node:process"; import { type AttachmentLink, @@ -24,21 +24,18 @@ import type { RealtimeSubscriber, ResultFile, } from "@allurereport/plugin-api"; -import { parse } from "yaml"; import { renderAgentsGuide } from "./guidance.js"; -import type { AgentPluginOptions } from "./model.js"; - -const AGENT_OUTPUT_ENV = "ALLURE_AGENT_OUTPUT"; -const AGENT_EXPECTATIONS_ENV = "ALLURE_AGENT_EXPECTATIONS"; -const AGENT_COMMAND_ENV = "ALLURE_AGENT_COMMAND"; -const AGENT_PROJECT_ROOT_ENV = "ALLURE_AGENT_PROJECT_ROOT"; -const AGENT_NAME_ENV = "ALLURE_AGENT_NAME"; -const AGENT_LOOP_ID_ENV = "ALLURE_AGENT_LOOP_ID"; -const AGENT_TASK_ID_ENV = "ALLURE_AGENT_TASK_ID"; -const AGENT_CONVERSATION_ID_ENV = "ALLURE_AGENT_CONVERSATION_ID"; +import type { + AgentEvidenceExpectationInput, + AgentExpectationSelectorInput, + AgentExpectationsInput, + AgentPluginOptions, +} from "./model.js"; +import { parseAgentExpectations } from "./model.js"; + const AGENT_SCHEMA_VERSION = "allure-agent-output/v1"; -const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest", "project"] as const; +const MANAGED_ENTRIES = ["index.md", "AGENTS.md", "tests", "artifacts", "manifest"] as const; const STATUS_ORDER: Record = { failed: 0, broken: 1, @@ -86,6 +83,7 @@ const STACK_TRACE_LINE_PATTERN = /^\s*(at\s+|file:|node:internal|Caused by:\s*$| type FindingSeverity = "info" | "warning" | "high"; type FindingCategory = "bootstrap" | "scope" | "metadata" | "evidence" | "smells"; +type FindingImpact = "reject" | "iterate" | "advisory"; type ScopeMatch = "match" | "unexpected" | "forbidden" | "unknown"; type ModelingCompleteness = "complete" | "partial"; type RunnerIssueKind = "import" | "suite-load" | "setup" | "global-error"; @@ -191,10 +189,15 @@ type AgentRuntimeState = { store: AllureStore; generatedAt: string; command?: string; + agentContext: { + agentName?: string; + loopId?: string; + taskId?: string; + conversationId?: string; + }; createFinding: ReturnType; expectations?: LoadedExpectations; expectationLoadFindings: AgentFinding[]; - projectGuide?: LoadedProjectGuide; unsubscribers: Array<() => void>; queue: Promise; lastError?: Error; @@ -233,29 +236,23 @@ type AgentFinding = { subject: string; subjectType: "run" | "test"; severity: FindingSeverity; + impact?: FindingImpact; category: FindingCategory; checkName: string; + title?: string; message: string; explanation: string; evidencePaths: string[]; remediationHint: string; expectedReference?: string; confidence?: number; -}; - -type ExpectationSelectorInput = { - environments?: string[]; - full_names?: string[]; - full_name_prefixes?: string[]; - label_values?: Record; -}; - -type ExpectationsInput = { - goal?: string; - task_id?: string; - expected?: ExpectationSelectorInput; - forbidden?: ExpectationSelectorInput; - notes?: string | string[]; + expected?: Record; + observed?: Record; + action?: string; + source?: Record; + limits?: string; + affected?: Record; + moreCount?: number; }; type NormalizedExpectationSelectors = { @@ -263,22 +260,32 @@ type NormalizedExpectationSelectors = { fullNames: string[]; fullNamePrefixes: string[]; labelValues: Record; + testCount?: number; +}; + +type NormalizedAttachmentExpectation = { + name?: string; + contentType?: string; +}; + +type NormalizedEvidenceExpectations = { + minSteps?: number; + minAttachments?: number; + stepNameContains: string[]; + attachments: NormalizedAttachmentExpectation[]; }; type LoadedExpectations = { - sourcePath: string; + sourcePath?: string; + sourceKind: "file" | "inline"; relativePath: string; - raw: ExpectationsInput; + raw: AgentExpectationsInput; goal?: string; taskId?: string; notes: string[]; expected: NormalizedExpectationSelectors; forbidden: NormalizedExpectationSelectors; -}; - -type LoadedProjectGuide = { - sourcePath: string; - relativePath: string; + evidence: NormalizedEvidenceExpectations; }; type ScopeEvaluation = { @@ -393,11 +400,18 @@ const normalizeLabelValues = (value: unknown) => { ); }; -const normalizeSelectors = (input?: ExpectationSelectorInput): NormalizedExpectationSelectors => ({ +const normalizeNonNegativeInteger = (value: unknown) => + typeof value === "number" && Number.isInteger(value) && value >= 0 ? value : undefined; + +const normalizePositiveInteger = (value: unknown) => + typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined; + +const normalizeSelectors = (input?: AgentExpectationSelectorInput): NormalizedExpectationSelectors => ({ environments: normalizeStringArray(input?.environments), fullNames: normalizeStringArray(input?.full_names), fullNamePrefixes: normalizeStringArray(input?.full_name_prefixes), labelValues: normalizeLabelValues(input?.label_values), + testCount: normalizeNonNegativeInteger(input?.test_count), }); const hasSelector = (selectors: NormalizedExpectationSelectors) => @@ -406,6 +420,25 @@ const hasSelector = (selectors: NormalizedExpectationSelectors) => selectors.fullNamePrefixes.length > 0 || Object.keys(selectors.labelValues).length > 0; +const normalizeEvidenceExpectations = (input?: AgentEvidenceExpectationInput): NormalizedEvidenceExpectations => ({ + minSteps: normalizePositiveInteger(input?.min_steps), + minAttachments: normalizePositiveInteger(input?.min_attachments), + stepNameContains: normalizeStringArray(input?.step_name_contains), + attachments: (Array.isArray(input?.attachments) ? input.attachments : []).flatMap((attachment) => { + if (!attachment || typeof attachment !== "object") { + return []; + } + + const name = typeof attachment.name === "string" && attachment.name.length > 0 ? attachment.name : undefined; + const contentType = + typeof attachment.content_type === "string" && attachment.content_type.length > 0 + ? attachment.content_type + : undefined; + + return name || contentType ? [{ ...(name ? { name } : {}), ...(contentType ? { contentType } : {}) }] : []; + }), +}); + const normalizeNotes = (value: string | string[] | undefined) => { if (typeof value === "string") { return value.length > 0 ? [value] : []; @@ -591,6 +624,36 @@ const mergeStepSummaries = (items: StepTreeSummary[]) => }, ); +const collectStepNames = (steps: TestStepResult[], path: string[] = []): Array<{ name: string; path: string[] }> => { + const names: Array<{ name: string; path: string[] }> = []; + + for (const node of steps) { + if (!isStep(node)) { + continue; + } + + const nextPath = [...path, node.name]; + + names.push({ name: node.name, path: nextPath }); + + if (node.steps.length) { + names.push(...collectStepNames(node.steps, nextPath)); + } + } + + return names; +}; + +const testStepContainsText = (entry: TestEntry, expectedText: string) => { + const expected = normalizeStepText(expectedText); + + if (!expected) { + return false; + } + + return collectStepNames(entry.attempts[0].tr.steps).some(({ name }) => normalizeStepText(name).includes(expected)); +}; + const buildAttemptSignature = (attempt: AttemptRecord) => JSON.stringify({ status: attempt.tr.status, @@ -699,6 +762,8 @@ const summarizeStatusCounts = (counts: StatusCounts) => const normalizeLogLine = (value: string) => value.replace(/\s+/g, " ").trim(); +const normalizeStepText = (value: string) => value.replace(/\s+/g, " ").trim().toLocaleLowerCase(); + const normalizeWarningLine = (value: string) => normalizeLogLine(value).replace(/^\(node:\d+\)\s+Warning:\s*/i, "Warning: "); @@ -1213,12 +1278,16 @@ const renderModelingSummary = (modeling: ModelingSummary) => { }; const renderSelectorSummary = (title: string, selectors: NormalizedExpectationSelectors) => { - if (!hasSelector(selectors)) { + if (!hasSelector(selectors) && selectors.testCount === undefined) { return `- ${title}: None`; } const parts: string[] = []; + if (selectors.testCount !== undefined) { + parts.push(`test count: ${selectors.testCount}`); + } + if (selectors.environments.length) { parts.push(`environments: ${selectors.environments.join(", ")}`); } @@ -1242,6 +1311,39 @@ const renderSelectorSummary = (title: string, selectors: NormalizedExpectationSe return `- ${title}: ${parts.join(" | ")}`; }; +const renderEvidenceExpectationSummary = (evidence: NormalizedEvidenceExpectations) => { + const parts: string[] = []; + + if (evidence.minSteps !== undefined) { + parts.push(`meaningful steps per test: >= ${evidence.minSteps}`); + } + + if (evidence.minAttachments !== undefined) { + parts.push(`attachments per test: >= ${evidence.minAttachments}`); + } + + if (evidence.stepNameContains.length) { + parts.push(`step contains: ${evidence.stepNameContains.join("; ")}`); + } + + if (evidence.attachments.length) { + parts.push( + `attachments: ${evidence.attachments + .map((attachment) => + [ + attachment.name ? `name=${attachment.name}` : undefined, + attachment.contentType ? `content-type=${attachment.contentType}` : undefined, + ] + .filter(Boolean) + .join(", "), + ) + .join("; ")}`, + ); + } + + return `- Evidence expectations: ${parts.length ? parts.join(" | ") : "None"}`; +}; + const buildCheckSummary = (findings: AgentFinding[]) => { const countsBySeverity = { high: 0, @@ -1268,6 +1370,281 @@ const buildCheckSummary = (findings: AgentFinding[]) => { }; }; +const EXPECTATION_CHECK_IDS = new Set([ + "expectations-invalid", + "expectations-empty", + "expectations-unsupported-control", + "expectations-weak-goal", + "expected-test-missing", + "expected-prefix-missing", + "expected-label-missing", + "expected-environment-missing", + "expected-count-mismatch", + "expected-step-containing-missing", + "insufficient-expected-steps", + "insufficient-expected-attachments", + "missing-expected-attachment", + "forbidden-label-observed", + "no-tests-observed", +] as const); + +const MISSING_EXPECTED_CHECK_IDS = new Set([ + "expected-test-missing", + "expected-prefix-missing", + "expected-label-missing", + "expected-environment-missing", +] as const); + +const EVIDENCE_MISMATCH_CHECK_IDS = new Set([ + "expected-step-containing-missing", + "insufficient-expected-steps", + "insufficient-expected-attachments", + "missing-expected-attachment", +] as const); + +const countLabelValues = (labelValues: Record) => + Object.values(labelValues).reduce((total, values) => total + values.length, 0); + +const recognizedControlCount = (expectations?: LoadedExpectations) => { + if (!expectations) { + return 0; + } + + return ( + (expectations.goal ? 1 : 0) + + (expectations.taskId ? 1 : 0) + + (expectations.expected.testCount !== undefined ? 1 : 0) + + expectations.expected.environments.length + + expectations.expected.fullNames.length + + expectations.expected.fullNamePrefixes.length + + countLabelValues(expectations.expected.labelValues) + + countLabelValues(expectations.forbidden.labelValues) + + (expectations.evidence.minSteps !== undefined ? 1 : 0) + + (expectations.evidence.minAttachments !== undefined ? 1 : 0) + + expectations.evidence.stepNameContains.length + + expectations.evidence.attachments.length + ); +}; + +const runtimeMatchingControlCount = (expectations?: LoadedExpectations) => { + if (!expectations) { + return 0; + } + + return ( + (expectations.expected.testCount !== undefined ? 1 : 0) + + expectations.expected.environments.length + + expectations.expected.fullNames.length + + expectations.expected.fullNamePrefixes.length + + countLabelValues(expectations.expected.labelValues) + + countLabelValues(expectations.forbidden.labelValues) + + (expectations.evidence.minSteps !== undefined ? 1 : 0) + + (expectations.evidence.minAttachments !== undefined ? 1 : 0) + + expectations.evidence.stepNameContains.length + + expectations.evidence.attachments.length + ); +}; + +const toExpectationModel = (expectations: LoadedExpectations) => { + const expected: AgentExpectationSelectorInput = {}; + const forbidden: AgentExpectationSelectorInput = {}; + const evidence: AgentEvidenceExpectationInput = {}; + + if (expectations.expected.testCount !== undefined) { + expected.test_count = expectations.expected.testCount; + } + + if (expectations.expected.environments.length) { + expected.environments = expectations.expected.environments; + } + + if (expectations.expected.fullNames.length) { + expected.full_names = expectations.expected.fullNames; + } + + if (expectations.expected.fullNamePrefixes.length) { + expected.full_name_prefixes = expectations.expected.fullNamePrefixes; + } + + if (Object.keys(expectations.expected.labelValues).length) { + expected.label_values = expectations.expected.labelValues; + } + + if (Object.keys(expectations.forbidden.labelValues).length) { + forbidden.label_values = expectations.forbidden.labelValues; + } + + if (expectations.evidence.minSteps !== undefined) { + evidence.min_steps = expectations.evidence.minSteps; + } + + if (expectations.evidence.minAttachments !== undefined) { + evidence.min_attachments = expectations.evidence.minAttachments; + } + + if (expectations.evidence.stepNameContains.length) { + evidence.step_name_contains = expectations.evidence.stepNameContains; + } + + if (expectations.evidence.attachments.length) { + evidence.attachments = expectations.evidence.attachments.map((attachment) => ({ + ...(attachment.name ? { name: attachment.name } : {}), + ...(attachment.contentType ? { content_type: attachment.contentType } : {}), + })); + } + + return { + ...(expectations.goal ? { goal: expectations.goal } : {}), + ...(expectations.taskId ? { task_id: expectations.taskId } : {}), + ...(Object.keys(expected).length ? { expected } : {}), + ...(Object.keys(forbidden).length ? { forbidden } : {}), + ...(Object.keys(evidence).length ? { evidence } : {}), + ...(expectations.notes.length ? { notes: expectations.notes } : {}), + }; +}; + +const defaultImpactForFinding = (finding: AgentFinding): FindingImpact => { + if (finding.impact) { + return finding.impact; + } + + if ( + [ + "expected-test-missing", + "expected-prefix-missing", + "expected-label-missing", + "expected-environment-missing", + "forbidden-label-observed", + "no-tests-observed", + ].includes(finding.checkName) + ) { + return "reject"; + } + + if (finding.checkName === "noop-dominated-steps" && (finding.confidence ?? 0) >= 0.75) { + return "reject"; + } + + if ( + [ + "expectations-invalid", + "expectations-empty", + "expectations-unsupported-control", + "expected-count-mismatch", + "expected-step-containing-missing", + "insufficient-expected-steps", + "insufficient-expected-attachments", + "missing-expected-attachment", + "runner-failures-outside-logical-results", + "metadata-mismatch", + "history-id-collision", + "failed-without-useful-steps", + "failed-without-attachments", + "nontrivial-run-with-empty-trace", + "retries-without-new-evidence", + "passed-without-observable-evidence", + ].includes(finding.checkName) + ) { + return "iterate"; + } + + if (finding.severity === "high") { + return "iterate"; + } + + return "advisory"; +}; + +const strongestImpact = (findings: AgentFinding[], fallback: FindingImpact): FindingImpact => { + if (findings.some((finding) => defaultImpactForFinding(finding) === "reject")) { + return "reject"; + } + + if (findings.some((finding) => defaultImpactForFinding(finding) === "iterate")) { + return "iterate"; + } + + return fallback; +}; + +const buildExpectationResult = (params: { + expectations?: LoadedExpectations; + findings: AgentFinding[]; + observedTestCount: number; + modelingSummary: ModelingSummary; +}) => { + const { expectations, findings, observedTestCount, modelingSummary } = params; + const expectationFindings = findings.filter((finding) => EXPECTATION_CHECK_IDS.has(finding.checkName)); + const recognized = recognizedControlCount(expectations); + const runtimeMatching = runtimeMatchingControlCount(expectations); + const invalidFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-invalid"); + const emptyFindings = expectationFindings.filter((finding) => finding.checkName === "expectations-empty"); + const unsupportedFindings = expectationFindings.filter( + (finding) => finding.checkName === "expectations-unsupported-control", + ); + const blockingFindings = expectationFindings.filter((finding) => finding.checkName !== "expectations-weak-goal"); + const expectedTests = expectations?.expected.testCount ?? expectations?.expected.fullNames.length ?? 0; + let status: "matched" | "failed" | "partial" | "degraded" | "unsupported" | "unavailable" | "not_requested"; + let impact: "accept" | "reject" | "iterate" | "advisory"; + + if (invalidFindings.length) { + status = "unavailable"; + impact = + strongestImpact(invalidFindings, "reject") === "advisory" ? "reject" : strongestImpact(invalidFindings, "reject"); + } else if (emptyFindings.length || unsupportedFindings.length) { + status = "unsupported"; + impact = strongestImpact([...emptyFindings, ...unsupportedFindings], "iterate") === "reject" ? "reject" : "iterate"; + } else if (blockingFindings.some((finding) => finding.checkName === "no-tests-observed")) { + status = "failed"; + impact = "reject"; + } else if (runtimeMatching === 0) { + status = "not_requested"; + impact = "advisory"; + } else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "reject")) { + status = "failed"; + impact = "reject"; + } else if (blockingFindings.some((finding) => defaultImpactForFinding(finding) === "iterate")) { + status = "failed"; + impact = "iterate"; + } else if (modelingSummary.completeness === "partial") { + status = "partial"; + impact = "iterate"; + } else { + status = "matched"; + impact = "accept"; + } + + return { + schema_version: "allure-agent-expectation-result/v1", + status, + impact, + source: expectations + ? { + kind: expectations.sourceKind, + path: expectations.sourceKind === "file" ? (expectations.sourcePath ?? null) : null, + } + : { + kind: "none", + path: null, + }, + recognized_control_count: recognized, + unsupported_controls: unsupportedFindings.map((finding) => finding.expectedReference ?? finding.message), + degraded_controls: [] as string[], + summary: { + expected_tests: expectedTests, + observed_tests: observedTestCount, + missing_expected: expectationFindings.filter((finding) => MISSING_EXPECTED_CHECK_IDS.has(finding.checkName)) + .length, + forbidden_observed: expectationFindings.filter((finding) => finding.checkName === "forbidden-label-observed") + .length, + unexpected_observed: 0, + evidence_mismatches: expectationFindings.filter((finding) => EVIDENCE_MISMATCH_CHECK_IDS.has(finding.checkName)) + .length, + }, + finding_ids: expectationFindings.map((finding) => finding.findingId), + }; +}; + const sortFindings = (findings: AgentFinding[]) => [...findings].sort((left, right) => { const bySeverity = FINDING_SEVERITY_ORDER[left.severity] - FINDING_SEVERITY_ORDER[right.severity]; @@ -1302,6 +1679,35 @@ const renderFindingEvidenceLinks = (params: { finding: AgentFinding; currentFile .join("\n"); }; +const formatFindingStructuredValue = (value: unknown): string | undefined => { + if (value === undefined || value === null) { + return undefined; + } + + if (typeof value === "string" || typeof value === "number" || typeof value === "boolean") { + return String(value); + } + + if (Array.isArray(value)) { + return value + .map((item) => formatFindingStructuredValue(item)) + .filter(Boolean) + .join(", "); + } + + if (typeof value === "object") { + const parts = Object.entries(value as Record).flatMap(([key, item]) => { + const formatted = formatFindingStructuredValue(item); + + return formatted ? [`${key}: ${formatted}`] : []; + }); + + return parts.length ? parts.join("; ") : undefined; + } + + return undefined; +}; + const renderFindingsSection = (params: { title: string; findings: AgentFinding[]; @@ -1317,32 +1723,34 @@ const renderFindingsSection = (params: { const lines: string[] = [`## ${title}`, ""]; for (const finding of sortFindings(findings)) { + const impact = defaultImpactForFinding(finding); + const expected = + formatFindingStructuredValue(finding.expected) ?? + (finding.expectedReference ? `reference: ${finding.expectedReference}` : undefined); + const observed = formatFindingStructuredValue(finding.observed) ?? finding.explanation; + const evidenceLinks = renderFindingEvidenceLinks({ + finding, + currentFilePath, + outputDir, + }); + lines.push( - `### [${finding.severity.toUpperCase()}] ${escapeInlineMarkdown(finding.category)} / ${escapeInlineMarkdown(finding.checkName)}`, + `- [${finding.severity.toUpperCase()}][${impact}][${escapeInlineMarkdown(finding.category)}] ${escapeInlineMarkdown(finding.title ?? finding.message)}`, ); - lines.push(""); - lines.push(`- Message: ${escapeInlineMarkdown(finding.message)}`); - lines.push(`- Explanation: ${escapeInlineMarkdown(finding.explanation)}`); - lines.push(`- Remediation: ${escapeInlineMarkdown(finding.remediationHint)}`); - if (finding.expectedReference) { - lines.push(`- Expected Reference: ${escapeInlineMarkdown(finding.expectedReference)}`); + if (expected) { + lines.push(` Expected: ${escapeInlineMarkdown(expected)}`); } - if (finding.confidence !== undefined) { - lines.push(`- Confidence: ${finding.confidence}`); + if (observed) { + lines.push(` Observed: ${escapeInlineMarkdown(observed)}`); } - lines.push("- Evidence:"); - lines.push(""); - lines.push( - renderFindingEvidenceLinks({ - finding, - currentFilePath, - outputDir, - }), - ); - lines.push(""); + lines.push(` Action: ${escapeInlineMarkdown(finding.action ?? finding.remediationHint)}`); + + if (evidenceLinks !== "None") { + lines.push(` Evidence: ${escapeInlineMarkdown(finding.evidencePaths.join(", "))}`); + } } return lines.join("\n").trimEnd(); @@ -1361,6 +1769,32 @@ const renderExpectationSection = (entry: TestEntry) => { return lines.join("\n"); }; +const renderExpectationResultSection = (params: { + expectations?: LoadedExpectations; + findings: AgentFinding[]; + observedTestCount: number; + modelingSummary: ModelingSummary; +}) => { + const result = buildExpectationResult(params); + const summary = result.summary; + + return [ + "## Expectation Result", + "", + `- Status: ${result.status}`, + `- Impact: ${result.impact}`, + `- Recognized Controls: ${result.recognized_control_count}`, + `- Source: ${result.source.kind}${result.source.path ? ` (${result.source.path})` : ""}`, + `- Expected Tests: ${summary.expected_tests}`, + `- Observed Tests: ${summary.observed_tests}`, + `- Missing Expected: ${summary.missing_expected}`, + `- Forbidden Observed: ${summary.forbidden_observed}`, + `- Evidence Mismatches: ${summary.evidence_mismatches}`, + `- Run Manifest: [manifest/run.json](manifest/run.json)`, + `- Findings Manifest: [manifest/findings.jsonl](manifest/findings.jsonl)`, + ].join("\n"); +}; + const renderRerunGuidance = (findings: AgentFinding[]) => { const relevant = findings.filter( ({ category }) => category === "evidence" || category === "smells" || category === "metadata", @@ -1385,9 +1819,7 @@ const renderRerunGuidance = (findings: AgentFinding[]) => { lines.push("- Replace repetitive event-style steps with a compact text attachment when the signal is mostly logs."); } - lines.push( - "- Rerun only the relevant tests with the same expectations file so the next review is scoped and comparable.", - ); + lines.push("- Rerun only the relevant tests with the same expectations so the next review is scoped and comparable."); return lines.join("\n"); }; @@ -1588,16 +2020,29 @@ const renderIndex = (params: { lines.push(`- Goal: ${escapeInlineMarkdown(expectations.goal ?? "unknown")}`); lines.push(`- Feature / Task: ${escapeInlineMarkdown(expectations.taskId ?? "unknown")}`); lines.push( - `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`, + expectations.sourceKind === "inline" + ? `- Expectations Source: CLI options (normalized: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)}))` + : `- Expectations Source: [${escapeInlineMarkdown(expectations.relativePath)}](${normalizeMarkdownPath(expectations.relativePath)})`, ); lines.push(renderSelectorSummary("Expected selectors", expectations.expected)); lines.push(renderSelectorSummary("Forbidden selectors", expectations.forbidden)); + lines.push(renderEvidenceExpectationSummary(expectations.evidence)); if (expectations.notes.length) { lines.push(`- Notes: ${expectations.notes.map((note) => escapeInlineMarkdown(note)).join(" | ")}`); } } + lines.push(""); + lines.push( + renderExpectationResultSection({ + expectations, + findings, + observedTestCount: tests.length, + modelingSummary, + }), + ); + lines.push(""); lines.push("## Advisory Check Summary"); lines.push(""); @@ -1884,11 +2329,7 @@ const readMaterializedArtifactText = async (outputDir: string, artifact?: Materi } }; -const resolveOutputDir = (options: AgentPluginOptions) => { - const outputDir = options.outputDir ?? env[AGENT_OUTPUT_ENV]; - - return outputDir ? resolve(outputDir) : undefined; -}; +const resolveOutputDir = (options: AgentPluginOptions) => (options.outputDir ? resolve(options.outputDir) : undefined); const cleanupManagedEntries = async (outputDir: string) => { await Promise.all( @@ -1939,18 +2380,116 @@ const createFindingFactory = () => { }; }; -const parseExpectations = (rawContent: string) => { - const parsed = parse(rawContent) as ExpectationsInput; - +const assertExpectationsObject = (parsed: AgentExpectationsInput) => { if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) { - throw new Error("Expected a YAML or JSON object"); + throw new Error("Expected an expectations object"); } +}; + +const writeExpectedManifest = async (outputDir: string, parsed: AgentExpectationsInput) => { + const relativePath = normalizeMarkdownPath("manifest/expected.json"); - return parsed; + await mkdir(join(outputDir, "manifest"), { recursive: true }); + await writeFile(join(outputDir, relativePath), `${JSON.stringify(parsed, null, 2)}\n`, "utf-8"); + + return relativePath; }; -const loadExpectations = async (outputDir: string, createFinding: ReturnType) => { - const configuredPath = env[AGENT_EXPECTATIONS_ENV]; +const toLoadedExpectations = (params: { + parsed: AgentExpectationsInput; + relativePath: string; + sourceKind: "file" | "inline"; + sourcePath?: string; +}) => { + const { parsed, relativePath, sourceKind, sourcePath } = params; + + return { + sourcePath, + sourceKind, + relativePath, + raw: parsed, + goal: parsed.goal, + taskId: parsed.task_id, + notes: normalizeNotes(parsed.notes), + expected: normalizeSelectors(parsed.expected), + forbidden: normalizeSelectors(parsed.forbidden), + evidence: normalizeEvidenceExpectations(parsed.evidence), + } satisfies LoadedExpectations; +}; + +const loadExpectations = async ( + outputDir: string, + createFinding: ReturnType, + options: AgentPluginOptions, +) => { + const configuredPath = options.expectationsPath; + const inlineExpectations = options.expectations; + + if (!configuredPath && !inlineExpectations) { + return { + expectations: undefined, + findings: [] as AgentFinding[], + }; + } + + if (configuredPath && inlineExpectations) { + return { + expectations: undefined, + findings: [ + createFinding({ + subject: "run", + subjectType: "run", + severity: "high", + category: "bootstrap", + impact: "reject", + checkName: "expectations-invalid", + title: "Expectation input is invalid", + message: "Both file and inline agent expectations were provided.", + explanation: "Set either expectationsPath or expectations in the agent plugin options, not both.", + evidencePaths: [], + remediationHint: "Rerun with one expectations source so scope checks are unambiguous.", + expectedReference: undefined, + }), + ], + }; + } + + if (inlineExpectations) { + try { + assertExpectationsObject(inlineExpectations); + + const relativePath = await writeExpectedManifest(outputDir, inlineExpectations); + + return { + expectations: toLoadedExpectations({ + parsed: inlineExpectations, + relativePath, + sourceKind: "inline", + }), + findings: [] as AgentFinding[], + }; + } catch (error) { + return { + expectations: undefined, + findings: [ + createFinding({ + subject: "run", + subjectType: "run", + severity: "high", + category: "bootstrap", + impact: "reject", + checkName: "expectations-invalid", + title: "Expectation input is invalid", + message: "Could not load inline agent expectations", + explanation: `The inline expectations option could not be normalized: ${(error as Error).message}`, + evidencePaths: [], + remediationHint: "Provide a valid expectations object before rerunning.", + expectedReference: undefined, + }), + ], + }; + } + } if (!configuredPath) { return { @@ -1963,24 +2502,16 @@ const loadExpectations = async (outputDir: string, createFinding: ReturnType => { - const projectRoot = resolve(env[AGENT_PROJECT_ROOT_ENV] ?? process.cwd()); - const sourcePath = join(projectRoot, "docs", "allure-agent-mode.md"); - - try { - const content = await readFile(sourcePath, "utf-8"); - const relativePath = normalizeMarkdownPath(join("project", "docs", "allure-agent-mode.md")); - - await mkdir(join(outputDir, "project", "docs"), { recursive: true }); - await writeFile(join(outputDir, relativePath), content, "utf-8"); - - return { - sourcePath, - relativePath, - }; - } catch (error) { - if ((error as NodeJS.ErrnoException).code === "ENOENT") { - return undefined; - } - - throw error; - } -}; - const computeScopeEvaluation = (params: { tr: TestResult; environmentId: string; @@ -2120,6 +2629,43 @@ const collectTestEvidencePaths = (entry: TestEntry) => { return uniqueValues(paths); }; +const getExpectationTargetEntries = (entries: TestEntry[], expectations: LoadedExpectations) => { + if (!hasSelector(expectations.expected)) { + return entries; + } + + return entries.filter((entry) => entry.scope.scopeMatch === "match"); +}; + +const currentAttemptStepSummary = (entry: TestEntry) => + mergeStepSummaries([entry.attempts[0].stepSummary, entry.attempts[0].fixtureStepSummary]); + +const nonMissingArtifacts = (entry: TestEntry) => entry.allArtifacts.filter((artifact) => !artifact.missing); + +const formatAttachmentExpectation = (expectation: NormalizedAttachmentExpectation) => + [ + expectation.name ? `name=${expectation.name}` : undefined, + expectation.contentType ? `content-type=${expectation.contentType}` : undefined, + ] + .filter(Boolean) + .join(", "); + +const matchesAttachmentExpectation = (artifact: MaterializedArtifact, expectation: NormalizedAttachmentExpectation) => { + if (artifact.missing) { + return false; + } + + if (expectation.name && artifact.displayName !== expectation.name) { + return false; + } + + if (expectation.contentType && artifact.contentType !== expectation.contentType) { + return false; + } + + return true; +}; + const buildRunAndTestFindings = (params: { entries: TestEntry[]; expectations?: LoadedExpectations; @@ -2132,19 +2678,24 @@ const buildRunAndTestFindings = (params: { const stdoutArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt"); const stderrArtifact = globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt"); - if (entries.length === 0) { + if (entries.length === 0 && expectations?.expected.testCount !== 0) { runFindings.push( createFinding({ subject: "run", subjectType: "run", severity: "high", + impact: "reject", category: "bootstrap", - checkName: "no-visible-tests", + checkName: "no-tests-observed", + title: "No logical tests were observed", message: "No visible test results were found in the run.", explanation: "The agent output was generated, but there were no visible logical test results to review.", - evidencePaths: [], - remediationHint: - "Verify that Allure results are being generated and that the test command actually executed the intended tests.", + evidencePaths: ["manifest/run.json", "manifest/tests.jsonl"], + remediationHint: "Fix command, adapter, discovery, or modeling before calling the run passing validation.", + expected: { test_count: expectations?.expected.testCount ?? "one or more logical tests" }, + observed: { test_count: 0 }, + action: "Do not call the run passing validation. Fix command, adapter, discovery, or modeling.", + confidence: 1, }), ); } @@ -2162,7 +2713,7 @@ const buildRunAndTestFindings = (params: { "Global process logs help agents debug bootstrap failures and compare the recorded results with console output.", evidencePaths: [], remediationHint: - "Run tests through `allure agent -- ` without `--silent` when you need bootstrap diagnostics, or use `ALLURE_AGENT_*` with `allure run` for lower-level control.", + "Run tests through `allure agent -- ` without `--silent` when you need bootstrap diagnostics.", confidence: 0.9, }), ); @@ -2216,6 +2767,90 @@ const buildRunAndTestFindings = (params: { if (expectations) { const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name); + const targetEntries = getExpectationTargetEntries(entries, expectations); + const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0; + const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : ""; + + if (recognizedControlCount(expectations) === 0) { + runFindings.push( + createFinding({ + subject: "run", + subjectType: "run", + severity: "high", + impact: "iterate", + category: "scope", + checkName: "expectations-empty", + title: "Expectation source did not contain recognized controls", + message: "Expectation source was provided but no recognized M1 controls were parsed.", + explanation: "The run can still be reviewed, but expectation precision was not requested.", + evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], + remediationHint: "Do not claim expectation precision. Fix expectation input or rerun without expectations.", + observed: { recognized_control_count: 0 }, + action: "Do not claim expectation precision. Fix expectation input or rerun without expectations.", + confidence: 1, + }), + ); + } + + if ( + (hasRuntimeControls && !expectations.goal) || + ["run tests", "validate", "make sure it passes", "check", "test"].includes(genericGoal) + ) { + runFindings.push( + createFinding({ + subject: "run", + subjectType: "run", + severity: "info", + impact: "advisory", + category: "scope", + checkName: "expectations-weak-goal", + title: "Run goal is missing or too generic", + message: expectations.goal + ? `The run goal is too generic: ${expectations.goal}` + : "Runtime expectations were provided without a goal.", + explanation: "The goal is intent metadata and does not change the runtime evidence.", + evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], + remediationHint: + "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.", + expected: { goal: "specific validation claim" }, + observed: { goal: expectations.goal ?? null }, + action: + "Use observed evidence for the actual conclusion. Do not discard the run only because the goal is weak.", + confidence: 0.9, + }), + ); + } + + if (expectations.expected.testCount !== undefined && entries.length !== expectations.expected.testCount) { + const severity: FindingSeverity = + expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "high" : "warning"; + const impact: FindingImpact = + expectations.expected.testCount === 0 || expectations.expected.testCount === 1 ? "reject" : "iterate"; + + runFindings.push( + createFinding({ + subject: "run", + subjectType: "run", + severity, + impact, + category: "scope", + checkName: "expected-count-mismatch", + title: "Observed logical test count did not match", + message: `Expected ${expectations.expected.testCount} visible logical tests, got ${entries.length}.`, + explanation: "The expected count is evaluated against all visible logical tests after agent-mode modeling.", + evidencePaths: expectations.relativePath + ? [expectations.relativePath, "manifest/tests.jsonl"] + : ["manifest/tests.jsonl"], + remediationHint: + "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.", + expectedReference: "expected.test_count", + expected: { test_count: expectations.expected.testCount }, + observed: { test_count: entries.length }, + action: "Check selector, parameter expansion, retries, missing tests, or unexpected tests before concluding.", + confidence: 1, + }), + ); + } expectations.expected.fullNames.forEach((fullName, index) => { if (!allFullNames.includes(fullName)) { @@ -2224,14 +2859,23 @@ const buildRunAndTestFindings = (params: { subject: "run", subjectType: "run", severity: "high", + impact: "reject", category: "scope", - checkName: "missing-expected-test", - message: `Expected test did not run: ${fullName}`, - explanation: - "The expectations file explicitly listed this test, but it did not appear in the agentic output.", - evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], - remediationHint: "Check the test selection, environment, and feature branch scope before rerunning.", + checkName: "expected-test-missing", + title: "Expected test was not observed", + message: "The expected test did not appear in the observed logical results.", + explanation: `Expected test did not run: ${fullName}`, + evidencePaths: expectations.relativePath + ? [expectations.relativePath, "manifest/tests.jsonl"] + : ["manifest/tests.jsonl"], + remediationHint: + "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.", expectedReference: `expected.full_names[${index}]`, + expected: { full_names: [fullName] }, + observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) }, + action: + "Do not claim the target behavior was validated. Fix selector, restore coverage, or rerun the intended test.", + confidence: 1, }), ); } @@ -2243,15 +2887,22 @@ const buildRunAndTestFindings = (params: { createFinding({ subject: "run", subjectType: "run", - severity: "warning", + severity: "high", + impact: "reject", category: "scope", - checkName: "missing-expected-prefix", - message: `No executed test matched the expected prefix: ${prefix}`, - explanation: "The expectations file asked for tests within this name prefix, but none were recorded.", - evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], - remediationHint: - "Check the expected selector or adjust the executed test target so the intended scope is covered.", + checkName: "expected-prefix-missing", + title: "Expected test prefix was not observed", + message: `No observed test full name started with the expected prefix: ${prefix}`, + explanation: "The expectations asked for tests within this name prefix, but none were recorded.", + evidencePaths: expectations.relativePath + ? [expectations.relativePath, "manifest/tests.jsonl"] + : ["manifest/tests.jsonl"], + remediationHint: "Treat the run as wrong scope or missing coverage.", expectedReference: `expected.full_name_prefixes[${index}]`, + expected: { full_name_prefixes: [prefix] }, + observed: { test_count: entries.length, closest_full_names: allFullNames.slice(0, 3) }, + action: "Treat the run as wrong scope or missing coverage.", + confidence: 1, }), ); } @@ -2263,15 +2914,22 @@ const buildRunAndTestFindings = (params: { createFinding({ subject: "run", subjectType: "run", - severity: "warning", + severity: "high", + impact: "reject", category: "scope", - checkName: "missing-expected-environment", + checkName: "expected-environment-missing", + title: "Expected environment was not observed", message: `Expected environment did not appear in the run: ${environment}`, - explanation: - "The expectations file scoped the run to this environment, but no logical test result matched it.", - evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], - remediationHint: "Check the environment selector or rerun the intended environment explicitly.", + explanation: "The expectations scoped the run to this environment, but no logical test result matched it.", + evidencePaths: expectations.relativePath + ? [expectations.relativePath, "manifest/tests.jsonl"] + : ["manifest/tests.jsonl"], + remediationHint: "Rerun in the intended environment before making environment-specific claims.", expectedReference: `expected.environments[${index}]`, + expected: { environments: [environment] }, + observed: { environments: actualEnvironments }, + action: "Rerun in the intended environment before making environment-specific claims.", + confidence: 1, }), ); } @@ -2285,16 +2943,23 @@ const buildRunAndTestFindings = (params: { createFinding({ subject: "run", subjectType: "run", - severity: "warning", + severity: "high", + impact: "reject", category: "scope", - checkName: "missing-expected-label-selector", - message: `No executed test matched ${formatLabelRequirement(labelName, values)}`, + checkName: "expected-label-missing", + title: "Expected label was not observed", + message: `No observed test had ${formatLabelRequirement(labelName, values)}`, explanation: - "The expectations file defined a label selector for the intended scope, but no logical test result satisfied it.", - evidencePaths: expectations.relativePath ? [expectations.relativePath] : [], - remediationHint: - "Add the expected label metadata to the intended tests or adjust the expectations selector.", + "The expectations defined a label selector for the intended scope, but no logical test result satisfied it.", + evidencePaths: expectations.relativePath + ? [expectations.relativePath, "manifest/tests.jsonl"] + : ["manifest/tests.jsonl"], + remediationHint: "Fix metadata, selector, or run the correct labeled scope.", expectedReference: `expected.label_values/${escapeJsonPointerSegment(labelName)}`, + expected: { label_values: { [labelName]: values } }, + observed: { test_count: entries.length }, + action: "Fix metadata, selector, or run the correct labeled scope.", + confidence: 1, }), ); } @@ -2322,6 +2987,10 @@ const buildRunAndTestFindings = (params: { } } + const evidenceTargetKeys = expectations + ? new Set(getExpectationTargetEntries(entries, expectations).map((entry) => entry.key)) + : new Set(); + for (const entry of entries) { const currentAttempt = entry.attempts[0]; const attemptSignatures = uniqueValues(entry.attempts.map(buildAttemptSignature)); @@ -2329,26 +2998,51 @@ const buildRunAndTestFindings = (params: { const allStepSummary = mergeStepSummaries( entry.attempts.map((attempt) => mergeStepSummaries([attempt.stepSummary, attempt.fixtureStepSummary])), ); + const expectedEvidenceApplies = expectations ? evidenceTargetKeys.has(entry.key) : false; + const expectedEvidence = expectations?.evidence; + const currentStepSummary = currentAttemptStepSummary(entry); + const currentMeaningfulSteps = currentStepSummary.meaningfulSteps; + const currentAttachments = nonMissingArtifacts(entry); const hasUsefulSteps = currentAttempt.stepSummary.meaningfulSteps + currentAttempt.fixtureStepSummary.meaningfulSteps > 0; const hasAnyAttachments = entry.allArtifacts.some((artifact) => !artifact.missing); const noopRatio = allStepSummary.totalSteps > 0 ? allStepSummary.noopSteps / allStepSummary.totalSteps : 0; if (entry.scope.scopeMatch === "forbidden") { + const forbiddenLabelReference = entry.scope.expectedReferences.find((reference) => + reference.startsWith("forbidden.label_values"), + ); + const checkName = forbiddenLabelReference ? "forbidden-label-observed" : "forbidden-selector-match"; + entry.findings.push( createFinding({ subject: entry.key, subjectType: "test", severity: "high", + impact: "reject", category: "scope", - checkName: "forbidden-selector-match", - message: "This test matched a forbidden selector from the expectations file.", - explanation: "The logical test belongs to a scope that the expectations file explicitly marked as forbidden.", + checkName, + title: forbiddenLabelReference ? "Forbidden label was observed" : "Forbidden selector was observed", + message: forbiddenLabelReference + ? "This test has a label value that was explicitly forbidden." + : "This test matched a forbidden selector from the expectations.", + explanation: "The logical test belongs to a scope that the expectations explicitly marked as forbidden.", evidencePaths: expectations?.relativePath ? [entry.relativePath, expectations.relativePath] : [entry.relativePath], - remediationHint: "Tighten the test selection or update the expectations file before accepting the run.", - expectedReference: entry.scope.expectedReferences[0], + remediationHint: forbiddenLabelReference + ? "Treat as scope drift. Split or correct the run before using it as focused validation." + : "Tighten the test selection or update the expectations before accepting the run.", + expectedReference: forbiddenLabelReference ?? entry.scope.expectedReferences[0], + expected: forbiddenLabelReference ? { forbidden_label: forbiddenLabelReference } : { forbidden: true }, + observed: { + full_name: entry.tr.fullName ?? entry.tr.name, + labels: toLabelEntries(entry.tr.labels), + }, + action: forbiddenLabelReference + ? "Treat as scope drift. Split or correct the run before using it as focused validation." + : "Tighten the test selection or update the expectations before accepting the run.", + confidence: 1, }), ); } else if (entry.scope.scopeMatch === "unexpected") { @@ -2361,12 +3055,12 @@ const buildRunAndTestFindings = (params: { checkName: "unexpected-test", message: "This test ran outside the expected scope.", explanation: - "The expectations file defined positive scope selectors, but this logical test did not match any of them.", + "The expectations defined positive scope selectors, but this logical test did not match any of them.", evidencePaths: expectations?.relativePath ? [entry.relativePath, expectations.relativePath] : [entry.relativePath], remediationHint: - "Rerun only the intended tests or broaden the expectations file if this test is part of the plan.", + "Rerun only the intended tests or broaden the expectations if this test is part of the plan.", }), ); } @@ -2411,6 +3105,142 @@ const buildRunAndTestFindings = (params: { ); } + expectedEvidence?.stepNameContains.forEach((expectedText, index) => { + if (!expectedEvidenceApplies || testStepContainsText(entry, expectedText)) { + return; + } + + entry.findings.push( + createFinding({ + subject: entry.key, + subjectType: "test", + severity: "warning", + impact: "iterate", + category: "evidence", + checkName: "expected-step-containing-missing", + title: "Expected step text was not observed", + message: `Expected a test-scoped step containing ${JSON.stringify(expectedText)}.`, + explanation: `The current attempt has ${currentStepSummary.totalSteps} test-scoped steps, but none contained the expected text. Global runner output is not considered test-scoped step evidence.`, + evidencePaths: expectations?.relativePath + ? [entry.relativePath, expectations.relativePath] + : [entry.relativePath], + remediationHint: + "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.", + expectedReference: `evidence.step_name_contains[${index}]`, + expected: { step_name_contains: [expectedText] }, + observed: { steps: currentStepSummary.totalSteps, matched: false }, + action: + "Add or fix meaningful step evidence, or correct the expectation if the project uses different wording.", + confidence: 0.9, + }), + ); + }); + + if ( + expectedEvidenceApplies && + expectedEvidence?.minSteps !== undefined && + currentMeaningfulSteps < expectedEvidence.minSteps + ) { + entry.findings.push( + createFinding({ + subject: entry.key, + subjectType: "test", + severity: "warning", + impact: "iterate", + category: "evidence", + checkName: "insufficient-expected-steps", + title: "Expected step count was not met", + message: `Expected at least ${expectedEvidence.minSteps} meaningful steps, got ${currentMeaningfulSteps}.`, + explanation: + "Meaningful steps have parameters, nested actions, attachments, messages, traces, or error context.", + evidencePaths: expectations?.relativePath + ? [entry.relativePath, expectations.relativePath] + : [entry.relativePath], + remediationHint: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.", + expectedReference: "evidence.min_steps", + expected: { min_steps: expectedEvidence.minSteps }, + observed: { meaningful_steps: currentMeaningfulSteps }, + action: "Add meaningful step evidence only if the missing steps reflect real behavior, not filler.", + confidence: 0.9, + }), + ); + } + + if ( + expectedEvidenceApplies && + expectedEvidence?.minAttachments !== undefined && + currentAttachments.length < expectedEvidence.minAttachments + ) { + entry.findings.push( + createFinding({ + subject: entry.key, + subjectType: "test", + severity: "warning", + impact: "iterate", + category: "evidence", + checkName: "insufficient-expected-attachments", + title: "Expected attachment count was not met", + message: `Expected at least ${expectedEvidence.minAttachments} non-missing attachments, got ${currentAttachments.length}.`, + explanation: "Only materialized test-scoped or step-scoped attachments count toward this expectation.", + evidencePaths: expectations?.relativePath + ? [entry.relativePath, expectations.relativePath] + : [entry.relativePath], + remediationHint: "Attach real runtime artifacts only when they are needed for debugging or review.", + expectedReference: "evidence.min_attachments", + expected: { min_attachments: expectedEvidence.minAttachments }, + observed: { attachments: currentAttachments.length }, + action: "Attach real runtime artifacts only when they are needed for debugging or review.", + confidence: 0.9, + }), + ); + } + + expectedEvidence?.attachments.forEach((attachmentExpectation, index) => { + if (!expectedEvidenceApplies) { + return; + } + + if (currentAttachments.some((artifact) => matchesAttachmentExpectation(artifact, attachmentExpectation))) { + return; + } + + entry.findings.push( + createFinding({ + subject: entry.key, + subjectType: "test", + severity: "warning", + impact: "iterate", + category: "evidence", + checkName: "missing-expected-attachment", + title: "Expected attachment was not observed", + message: `Expected attachment not found: ${formatAttachmentExpectation(attachmentExpectation)}`, + explanation: + "The expectations require every expected logical test to include a non-missing attachment matching this filter.", + evidencePaths: expectations?.relativePath + ? [entry.relativePath, expectations.relativePath] + : [entry.relativePath], + remediationHint: + "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.", + expectedReference: `evidence.attachments[${index}]`, + expected: { + attachment: { + ...(attachmentExpectation.name ? { name: attachmentExpectation.name } : {}), + ...(attachmentExpectation.contentType ? { content_type: attachmentExpectation.contentType } : {}), + }, + }, + observed: { + attachments: currentAttachments.map((attachment) => ({ + name: attachment.displayName, + content_type: attachment.contentType ?? null, + })), + }, + action: + "Capture the artifact or correct the expectation if the project uses different attachment naming or content types.", + confidence: 0.95, + }), + ); + }); + if (isFailedLikeStatus(currentAttempt.tr.status) && !hasUsefulSteps) { entry.findings.push( createFinding({ @@ -2790,17 +3620,23 @@ const appendJsonlLine = async (path: string, item: unknown) => { const toRunManifest = (params: { context: PluginContext; command?: string; + agentContext: AgentRuntimeState["agentContext"]; generatedAt: string; phase: RunPhase; expectations?: LoadedExpectations; - projectGuide?: LoadedProjectGuide; snapshot: AgentSnapshot; }) => { - const { context, command, generatedAt, phase, expectations, projectGuide, snapshot } = params; + const { context, command, agentContext, generatedAt, phase, expectations, snapshot } = params; const stdoutArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stdout.txt"); const stderrArtifact = snapshot.globalArtifacts.find((artifact) => artifact.displayName === "stderr.txt"); const originalExitCode = snapshot.globalExitCode?.original ?? null; const actualExitCode = snapshot.globalExitCode?.actual ?? snapshot.globalExitCode?.original ?? null; + const expectationResult = buildExpectationResult({ + expectations, + findings: snapshot.combinedAllFindings, + observedTestCount: snapshot.entries.length, + modelingSummary: snapshot.modelingSummary, + }); return { schema_version: AGENT_SCHEMA_VERSION, @@ -2835,26 +3671,27 @@ const toRunManifest = (params: { findings_manifest: "manifest/findings.jsonl", test_events_manifest: "manifest/test-events.jsonl", expected_manifest: expectations?.relativePath ?? null, - project_guide: projectGuide?.relativePath ?? null, process_logs: { stdout: stdoutArtifact?.relativePath ?? null, stderr: stderrArtifact?.relativePath ?? null, }, }, expectations_present: Boolean(expectations), + expectations: expectations ? toExpectationModel(expectations) : null, + expectation_result: expectationResult, check_summary: buildCheckSummary(snapshot.combinedAllFindings), agent_context: { - agent_name: env[AGENT_NAME_ENV] ?? null, - loop_id: env[AGENT_LOOP_ID_ENV] ?? null, - task_id: env[AGENT_TASK_ID_ENV] ?? expectations?.taskId ?? null, - conversation_id: env[AGENT_CONVERSATION_ID_ENV] ?? null, + agent_name: agentContext.agentName ?? null, + loop_id: agentContext.loopId ?? null, + task_id: agentContext.taskId ?? expectations?.taskId ?? null, + conversation_id: agentContext.conversationId ?? null, }, }; }; const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot: AgentSnapshot; phase: RunPhase }) => { const { runtime, snapshot, phase } = params; - const { outputDir, context, command, generatedAt, expectations, projectGuide } = runtime; + const { outputDir, context, command, generatedAt, expectations } = runtime; const nextTestPaths = new Set(snapshot.entries.map((entry) => entry.filePath)); const nextAssetDirs = new Set(snapshot.entries.map((entry) => join(outputDir, entry.relativeAssetDir))); @@ -2890,10 +3727,10 @@ const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot toRunManifest({ context, command, + agentContext: runtime.agentContext, generatedAt, phase, expectations, - projectGuide, snapshot, }), ), @@ -2922,7 +3759,7 @@ const writeSnapshotFiles = async (params: { runtime: AgentRuntimeState; snapshot findings: snapshot.combinedAllFindings, }), ), - writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide(projectGuide?.relativePath)), + writeTextAtomic(join(outputDir, "AGENTS.md"), renderAgentsGuide()), ]); }; @@ -2968,7 +3805,7 @@ const createBootstrapSnapshot = (): AgentSnapshot => ({ }); const writeBootstrapFiles = async (runtime: AgentRuntimeState) => { - await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide(runtime.projectGuide?.relativePath)); + await writeTextAtomic(join(runtime.outputDir, "AGENTS.md"), renderAgentsGuide()); await initializeJsonlStream(join(runtime.outputDir, "manifest", "test-events.jsonl")); await writeSnapshotFiles({ runtime, @@ -2995,20 +3832,57 @@ const toTestsManifestLine = (entry: TestEntry) => ({ assets_dir: entry.relativeAssetDir, }); -const toFindingManifestLine = (finding: AgentFinding) => ({ - finding_id: finding.findingId, - subject: finding.subject, - severity: finding.severity, - category: finding.category, - check_name: finding.checkName, - message: finding.message, - explanation: finding.explanation, - evidence_paths: finding.evidencePaths, - remediation_hint: finding.remediationHint, - expected_reference: finding.expectedReference, - confidence: finding.confidence, +const toFindingSubject = (finding: AgentFinding) => ({ + type: finding.subjectType, + ...(finding.subjectType === "test" ? { id: finding.subject, path: finding.subject } : {}), }); +const toFindingManifestLine = (finding: AgentFinding) => { + const impact = defaultImpactForFinding(finding); + const confidence = finding.confidence ?? 1; + + return { + schema_version: "allure-agent-finding/v2", + check_id: finding.checkName, + instance_id: finding.findingId, + severity: finding.severity, + impact, + confidence, + category: finding.category, + title: finding.title ?? finding.message, + message: finding.message, + subject: toFindingSubject(finding), + expected: finding.expected ?? (finding.expectedReference ? { reference: finding.expectedReference } : {}), + observed: finding.observed ?? { detail: finding.explanation }, + evidence: { + paths: finding.evidencePaths, + }, + action: finding.action ?? finding.remediationHint, + ...(finding.source ? { source: finding.source } : {}), + ...(finding.limits ? { limits: finding.limits } : {}), + ...(finding.affected ? { affected: finding.affected } : {}), + ...(finding.moreCount !== undefined ? { more_count: finding.moreCount } : {}), + legacy: { + finding_id: finding.findingId, + subject: finding.subject, + subject_type: finding.subjectType, + check_name: finding.checkName, + explanation: finding.explanation, + evidence_paths: finding.evidencePaths, + remediation_hint: finding.remediationHint, + expected_reference: finding.expectedReference, + }, + finding_id: finding.findingId, + subject_ref: finding.subject, + subject_type: finding.subjectType, + check_name: finding.checkName, + explanation: finding.explanation, + evidence_paths: finding.evidencePaths, + remediation_hint: finding.remediationHint, + expected_reference: finding.expectedReference, + }; +}; + const queueRuntimeTask = (runtime: AgentRuntimeState, task: () => Promise) => { runtime.queue = runtime.queue .catch(() => undefined) @@ -3171,18 +4045,22 @@ const createRuntimeState = async (params: { const generatedAt = new Date().toISOString(); const createFinding = createFindingFactory(); - const expectationLoadResult = await loadExpectations(outputDir, createFinding); - const projectGuide = await loadProjectGuide(outputDir); + const expectationLoadResult = await loadExpectations(outputDir, createFinding, options); const runtime: AgentRuntimeState = { outputDir, context, store, generatedAt, - command: env[AGENT_COMMAND_ENV], + command: options.command, + agentContext: { + agentName: options.agentName, + loopId: options.loopId, + taskId: options.taskId, + conversationId: options.conversationId, + }, createFinding, expectations: expectationLoadResult.expectations, expectationLoadFindings: expectationLoadResult.findings, - projectGuide, unsubscribers: [], queue: Promise.resolve(), seenLogicalKeys: new Set(), diff --git a/packages/plugin-agent/src/query.ts b/packages/plugin-agent/src/query.ts new file mode 100644 index 00000000000..92f8e816444 --- /dev/null +++ b/packages/plugin-agent/src/query.ts @@ -0,0 +1,252 @@ +import { readFile } from "node:fs/promises"; +import { join } from "node:path"; + +import type { TestLabel, TestStatus } from "@allurereport/core-api"; + +import { AgentUsageError } from "./errors.js"; +import type { AgentFindingCategory, AgentFindingSeverity, AgentOutputBundle, AgentTestManifestLine } from "./harness.js"; +import type { AgentLabelFilter } from "./selection.js"; + +export const AGENT_QUERY_SCHEMA = "allure-agent-query/v1"; +export const AGENT_QUERY_VIEWS = ["summary", "tests", "findings", "test"] as const; +export const AGENT_TEST_STATUSES: TestStatus[] = ["failed", "broken", "unknown", "skipped", "passed"]; +export const AGENT_FINDING_SEVERITIES: AgentFindingSeverity[] = ["high", "warning", "info"]; +export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = ["bootstrap", "scope", "metadata", "evidence", "smells"]; + +export type AgentQueryView = (typeof AGENT_QUERY_VIEWS)[number]; + +export type AgentQueryFilters = { + environments?: string[]; + labelFilters: AgentLabelFilter[]; + statuses?: TestStatus[]; + severities?: AgentFindingSeverity[]; + categories?: AgentFindingCategory[]; + checks?: string[]; + test?: string; + limit?: number; + includeMarkdown?: boolean; +}; + +export const normalizeAgentQueryView = (value?: string): AgentQueryView => { + if (!value) { + return "summary"; + } + + const normalized = value.trim().toLowerCase(); + + if (!AGENT_QUERY_VIEWS.includes(normalized as AgentQueryView)) { + throw new AgentUsageError( + `Invalid query view ${JSON.stringify(value)}. Expected one of: ${AGENT_QUERY_VIEWS.join(", ")}`, + ); + } + + return normalized as AgentQueryView; +}; + +const normalizeOptionalStringValues = (values: string[] | undefined) => + values?.map((value) => value.trim()).filter(Boolean) ?? []; + +export const normalizeRepeatedEnumValues = ( + values: string[] | undefined, + allowed: readonly T[], + optionName: string, +): T[] | undefined => { + const normalized = normalizeOptionalStringValues(values).map((value) => value.toLowerCase()); + + if (!normalized.length) { + return undefined; + } + + const invalid = normalized.find((value) => !allowed.includes(value as T)); + + if (invalid) { + throw new AgentUsageError( + `Invalid ${optionName} value ${JSON.stringify(invalid)}. Expected one of: ${allowed.join(", ")}`, + ); + } + + return normalized as T[]; +}; + +export const normalizeRepeatedStringValues = (values: string[] | undefined): string[] | undefined => { + const normalized = normalizeOptionalStringValues(values); + + return normalized.length ? normalized : undefined; +}; + +export const normalizeAgentQueryLimit = (value?: string): number | undefined => { + if (value === undefined) { + return undefined; + } + + if (!/^\d+$/.test(value)) { + throw new AgentUsageError("--limit must be a non-negative integer"); + } + + const parsed = Number(value); + + if (!Number.isSafeInteger(parsed)) { + throw new AgentUsageError("--limit must be a non-negative integer"); + } + + return parsed; +}; + +const matchesLabelFilters = (labels: TestLabel[], filters: AgentLabelFilter[]) => + filters.every((filter) => labels.some((label) => label.name === filter.name && label.value === filter.value)); + +const matchesAgentTestIdentifier = (test: AgentTestManifestLine, identifier: string) => + test.full_name === identifier || + test.test_result_id === identifier || + test.history_id === identifier || + test.markdown_path === identifier; + +const agentFindingSubjectRef = (finding: AgentOutputBundle["findings"][number]) => { + if (finding.subject_ref) { + return finding.subject_ref; + } + + if (typeof finding.subject === "string") { + return finding.subject; + } + + return finding.subject.path ?? finding.subject.id ?? finding.subject.type; +}; + +const agentFindingCheckName = (finding: AgentOutputBundle["findings"][number]) => + finding.check_id ?? finding.check_name; + +const filterAgentQueryTests = (tests: AgentTestManifestLine[], filters: AgentQueryFilters) => + tests + .filter((test) => (filters.statuses?.length ? filters.statuses.includes(test.status) : true)) + .filter((test) => (filters.environments?.length ? filters.environments.includes(test.environment_id) : true)) + .filter((test) => (filters.labelFilters.length ? matchesLabelFilters(test.labels, filters.labelFilters) : true)) + .filter((test) => (filters.test ? matchesAgentTestIdentifier(test, filters.test) : true)); + +const hasAgentQueryTestFilters = (filters: AgentQueryFilters) => + Boolean(filters.statuses?.length || filters.environments?.length || filters.labelFilters.length || filters.test); + +const filterAgentQueryFindings = (output: AgentOutputBundle, filters: AgentQueryFilters) => { + const matchedSubjects = hasAgentQueryTestFilters(filters) + ? new Set(filterAgentQueryTests(output.tests, filters).map((test) => test.markdown_path)) + : undefined; + + return output.findings + .filter((finding) => (matchedSubjects ? matchedSubjects.has(agentFindingSubjectRef(finding)) : true)) + .filter((finding) => (filters.severities?.length ? filters.severities.includes(finding.severity) : true)) + .filter((finding) => (filters.categories?.length ? filters.categories.includes(finding.category) : true)) + .filter((finding) => (filters.checks?.length ? filters.checks.includes(agentFindingCheckName(finding)) : true)); +}; + +const applyAgentQueryLimit = (items: T[], limit: number | undefined): T[] => + limit === undefined ? items : items.slice(0, limit); + +const resolveAgentOutputPath = (output: AgentOutputBundle, relativePath: string | null | undefined) => + relativePath ? join(output.outputDir, relativePath) : null; + +const buildAgentQuerySummaryPayload = (output: AgentOutputBundle) => ({ + schema: AGENT_QUERY_SCHEMA, + view: "summary", + output_dir: output.outputDir, + index_md: resolveAgentOutputPath(output, output.run.paths.index_md), + run: { + schema_version: output.run.schema_version, + generated_at: output.run.generated_at, + phase: output.run.phase ?? null, + command: output.run.command, + exit_code: output.run.exit_code, + expectations_present: output.run.expectations_present, + expectation_result: output.run.expectation_result, + agent_context: output.run.agent_context, + }, + summary: output.run.summary, + modeling: output.run.modeling ?? null, + check_summary: output.run.check_summary, + paths: { + index_md: resolveAgentOutputPath(output, output.run.paths.index_md), + agents_md: resolveAgentOutputPath(output, output.run.paths.agents_md), + tests_manifest: resolveAgentOutputPath(output, output.run.paths.tests_manifest), + findings_manifest: resolveAgentOutputPath(output, output.run.paths.findings_manifest), + test_events_manifest: resolveAgentOutputPath(output, output.run.paths.test_events_manifest), + expected_manifest: resolveAgentOutputPath(output, output.run.paths.expected_manifest), + process_logs: { + stdout: resolveAgentOutputPath(output, output.run.paths.process_logs.stdout), + stderr: resolveAgentOutputPath(output, output.run.paths.process_logs.stderr), + }, + }, + ...(output.expected ? { expected: output.expected } : {}), +}); + +const buildAgentQueryTestsPayload = (output: AgentOutputBundle, filters: AgentQueryFilters) => { + const matched = filterAgentQueryTests(output.tests, filters); + const returned = applyAgentQueryLimit(matched, filters.limit); + + return { + schema: AGENT_QUERY_SCHEMA, + view: "tests", + output_dir: output.outputDir, + total_matches: matched.length, + returned: returned.length, + tests: returned, + }; +}; + +const buildAgentQueryFindingsPayload = (output: AgentOutputBundle, filters: AgentQueryFilters) => { + const matched = filterAgentQueryFindings(output, filters); + const returned = applyAgentQueryLimit(matched, filters.limit); + + return { + schema: AGENT_QUERY_SCHEMA, + view: "findings", + output_dir: output.outputDir, + total_matches: matched.length, + returned: returned.length, + findings: returned, + }; +}; + +const buildAgentQueryTestPayload = async (output: AgentOutputBundle, filters: AgentQueryFilters) => { + const matched = filterAgentQueryTests(output.tests, filters); + + if (!matched.length) { + throw new AgentUsageError(`No tests matched query in ${output.outputDir}`); + } + + if (matched.length > 1) { + throw new AgentUsageError(`Query matched ${matched.length} tests in ${output.outputDir}. Use --test .`); + } + + const test = matched[0]; + const markdownPath = resolveAgentOutputPath(output, test.markdown_path); + const findings = output.findings.filter((finding) => agentFindingSubjectRef(finding) === test.markdown_path); + + return { + schema: AGENT_QUERY_SCHEMA, + view: "test", + output_dir: output.outputDir, + markdown_path: markdownPath, + test, + findings, + ...(filters.includeMarkdown && markdownPath ? { markdown: await readFile(markdownPath, "utf-8") } : {}), + }; +}; + +export const buildAgentQueryPayload = async ( + output: AgentOutputBundle, + view: AgentQueryView, + filters: AgentQueryFilters, +) => { + switch (view) { + case "summary": + return buildAgentQuerySummaryPayload(output); + + case "tests": + return buildAgentQueryTestsPayload(output, filters); + + case "findings": + return buildAgentQueryFindingsPayload(output, filters); + + case "test": + return buildAgentQueryTestPayload(output, filters); + } +}; diff --git a/packages/cli/src/utils/agent-select.ts b/packages/plugin-agent/src/selection.ts similarity index 89% rename from packages/cli/src/utils/agent-select.ts rename to packages/plugin-agent/src/selection.ts index 32437ca1e77..16d93d8ecba 100644 --- a/packages/cli/src/utils/agent-select.ts +++ b/packages/plugin-agent/src/selection.ts @@ -3,15 +3,10 @@ import { tmpdir } from "node:os"; import { join, resolve } from "node:path"; import type { TestLabel, TestPlan, TestPlanTest } from "@allurereport/core-api"; -import { - loadAgentOutput, - planAgentEnrichmentReview, - type AgentOutputBundle, - type AgentTestManifestLine, -} from "@allurereport/plugin-agent"; -import { UsageError } from "clipanion"; -import { readLatestAgentState } from "./agent-state.js"; +import { AgentUsageError } from "./errors.js"; +import { loadAgentOutput, planAgentEnrichmentReview, type AgentOutputBundle, type AgentTestManifestLine } from "./harness.js"; +import { readLatestAgentState } from "./state.js"; export type AgentRerunPreset = "review" | "failed" | "unsuccessful" | "all"; @@ -35,7 +30,7 @@ export type AgentTestPlanContext = { cleanup: () => Promise; }; -const AGENT_RERUN_PRESETS: AgentRerunPreset[] = ["review", "failed", "unsuccessful", "all"]; +export const AGENT_RERUN_PRESETS: AgentRerunPreset[] = ["review", "failed", "unsuccessful", "all"]; const ALLURE_ID_LABEL = "ALLURE_ID"; @@ -108,7 +103,7 @@ export const normalizeAgentRerunPreset = (value?: string): AgentRerunPreset => { const normalized = value.trim().toLowerCase(); if (!isAgentRerunPreset(normalized)) { - throw new UsageError( + throw new AgentUsageError( `Invalid rerun preset ${JSON.stringify(value)}. Expected one of: ${AGENT_RERUN_PRESETS.join(", ")}`, ); } @@ -121,7 +116,7 @@ export const parseAgentLabelFilters = (values?: string[]): AgentLabelFilter[] => const separatorIndex = value.indexOf("="); if (separatorIndex <= 0 || separatorIndex === value.length - 1) { - throw new UsageError( + throw new AgentUsageError( `Invalid label filter ${JSON.stringify(value)}. Expected the form name=value, for example feature=checkout`, ); } @@ -130,7 +125,7 @@ export const parseAgentLabelFilters = (values?: string[]): AgentLabelFilter[] => const filterValue = value.slice(separatorIndex + 1).trim(); if (!name || !filterValue) { - throw new UsageError( + throw new AgentUsageError( `Invalid label filter ${JSON.stringify(value)}. Expected the form name=value, for example feature=checkout`, ); } @@ -149,11 +144,11 @@ export const resolveAgentSelectionOutputDir = async (params: { const { cwd, from, latest } = params; if (from && latest) { - throw new UsageError("Use either --from or --latest, not both"); + throw new AgentUsageError("Use either --from or --latest, not both"); } if (!from && !latest) { - throw new UsageError("Expected either --from or --latest"); + throw new AgentUsageError("Expected either --from or --latest"); } if (from) { @@ -163,7 +158,7 @@ export const resolveAgentSelectionOutputDir = async (params: { const latestState = await readLatestAgentState(cwd); if (!latestState) { - throw new UsageError(`No latest agent output found for ${cwd}`); + throw new AgentUsageError(`No latest agent output found for ${cwd}`); } return latestState.outputDir; @@ -216,7 +211,7 @@ export const createAgentTestPlanContext = async (params: { }); if (!selection.testPlan.tests.length) { - throw new UsageError( + throw new AgentUsageError( `No tests matched rerun selection in ${selection.outputDir}. Adjust the preset or filters before rerunning.`, ); } diff --git a/packages/cli/src/utils/agent-state.ts b/packages/plugin-agent/src/state.ts similarity index 100% rename from packages/cli/src/utils/agent-state.ts rename to packages/plugin-agent/src/state.ts diff --git a/packages/plugin-agent/test/capabilities.test.ts b/packages/plugin-agent/test/capabilities.test.ts new file mode 100644 index 00000000000..cfae6ef8a62 --- /dev/null +++ b/packages/plugin-agent/test/capabilities.test.ts @@ -0,0 +1,61 @@ +import { epic, feature, label, story } from "allure-js-commons"; +import { beforeEach, describe, expect, it } from "vitest"; + +import { AGENT_TASK_MAP_HELP, createAgentCapabilities, isAgentTaskMapHelpRequest } from "../src/capabilities.js"; +import { attachJsonEvidence, expectTextToContainAll } from "./evidence.js"; + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("agent-capabilities"); + await label("coverage", "agent-mode"); +}); + +describe("agent capabilities", () => { + it("should describe the supported local agent command surface", async () => { + const payload = createAgentCapabilities(); + + await attachJsonEvidence("agent capabilities payload", payload); + expect(payload.schema).toBe("allure-agent-capabilities/v1"); + expect(payload.commands.run.supported).toBe(true); + expect(payload.commands.run.options).toContain("--expect-test"); + expect(payload.commands.latest.output).toEqual(["agent output: ", "agent index: /index.md"]); + expect(payload.commands.select.output).toEqual(["stdout-testplan-json", "file-testplan-json", "file-summary"]); + expect(payload.commands.select.presets).toEqual(["review", "failed", "unsuccessful", "all"]); + expect(payload.commands.query.supported).toBe(true); + expect(payload.commands.query.views).toEqual(["summary", "tests", "findings", "test"]); + expect(payload.commands.query.filters).toContain("status"); + expect(payload.expectations.inline.expected.fullNames).toBe(true); + expect(payload.expectations.inline.forbidden.labels).toBe(true); + expect(payload.expectations.inline.forbidden.fullNames).toBe(false); + expect(payload.expectations.inline.evidence.stepNameContains).toBe(true); + expect(payload.expectations.inline.evidence.attachmentFilters).toEqual(["name", "content-type"]); + expect(payload.commands.run.options).not.toContain("--expect-evidence"); + expect(payload.output.files).toContain("manifest/run.json"); + expect(payload.unsupported.discovery).toBe(true); + expect(payload.unsupported).not.toHaveProperty("query"); + expect(payload.unsupported.localAgentService).toBe(true); + }); + + it("should define the task-map help request and help content", async () => { + const helpRequestCases = [ + { args: ["agent", "--help"], expected: true }, + { args: ["agent", "-h"], expected: true }, + { args: ["agent", "-h=3"], expected: false }, + { args: ["agent", "latest", "--help"], expected: false }, + ]; + + await attachJsonEvidence("task map help request cases", helpRequestCases); + expect(helpRequestCases.map(({ args }) => isAgentTaskMapHelpRequest(args))).toEqual( + helpRequestCases.map(({ expected }) => expected), + ); + await expectTextToContainAll("agent task map help", AGENT_TASK_MAP_HELP, [ + "Agent task map:", + "allure agent capabilities", + "allure agent --goal ... -- ", + "allure agent query --from tests", + "allure agent select --from ", + "ALLURE_AGENT_STATE_DIR=", + ]); + }); +}); diff --git a/packages/plugin-agent/test/evidence.ts b/packages/plugin-agent/test/evidence.ts new file mode 100644 index 00000000000..88bc0b35dca --- /dev/null +++ b/packages/plugin-agent/test/evidence.ts @@ -0,0 +1,24 @@ +import { attachment, step } from "allure-js-commons"; +import { expect } from "vitest"; + +const formatJson = (value: unknown) => JSON.stringify(value, null, 2); + +export const attachJsonEvidence = async (name: string, value: unknown) => { + await attachment(name, formatJson(value), "application/json"); +}; + +export const attachTextEvidence = async (name: string, value: string, contentType: string = "text/plain") => { + await attachment(name, value, contentType); +}; + +export const expectTextToContainAll = async (artifactName: string, content: string, expectedText: string[]) => { + await step(`verify ${artifactName} required text`, async () => { + const missing = expectedText.filter((expected) => !content.includes(expected)); + + await attachJsonEvidence(`${artifactName} required text`, { + checked: expectedText, + missing, + }); + expect(missing).toEqual([]); + }); +}; diff --git a/packages/plugin-agent/test/guidance.test.ts b/packages/plugin-agent/test/guidance.test.ts new file mode 100644 index 00000000000..ff719b87b04 --- /dev/null +++ b/packages/plugin-agent/test/guidance.test.ts @@ -0,0 +1,70 @@ +import { readFile } from "node:fs/promises"; +import { dirname, join, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; + +import { story } from "allure-js-commons"; +import { beforeEach, describe, expect, it } from "vitest"; + +import { renderAgentsGuide } from "../src/guidance.js"; +import { expectTextToContainAll } from "./evidence.js"; + +beforeEach(async () => { + await story("guidance"); +}); + +const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../../.."); + +describe("allure agent-mode guidance", () => { + it("should keep stable guidance in the package README and generated run playbook", async () => { + const readme = await readFile(join(repoRoot, "packages", "plugin-agent", "README.md"), "utf-8"); + const agentsGuide = renderAgentsGuide(); + + await expectTextToContainAll("generated AGENTS guide", agentsGuide, [ + "## Reading Order", + "## Command Task Map", + "## Agent Workflows", + "Use the smallest workflow that matches the task.", + "### Validate A Change", + "### Add Or Update Tests", + "### Review Existing Coverage", + "### Triage Failures", + "### Rerun A Prior Scope", + "### Improve Evidence Quality", + "### Recover Or Diagnose Agent Mode", + "allure agent --goal --expect-tests --expect-test", + "allure agent latest", + "allure agent state-dir", + "allure agent query --latest summary|tests|findings|test", + "allure agent select --latest", + "allure agent --rerun-latest", + "--preset review|failed|unsuccessful|all", + "--environment ", + "--label name=value", + "--rerun-environment", + "--rerun-label", + "ALLURE_AGENT_STATE_DIR", + "manifest/run.json", + "manifest/test-events.jsonl", + ]); + + await expectTextToContainAll("plugin-agent README", readme, [ + "## Verification Standard", + "## CLI Capability Workflow", + "allure --version", + "allure agent capabilities --json", + "allure agent --help", + "allure agent query --help", + "allure agent select --help", + "allure agent latest --help", + "allure agent state-dir --help", + "`allure agent capabilities --json` is the structured local contract for agents.", + "`allure agent --help` includes the human-readable command task map", + '--expect-test ""', + "instead of spending context reconstructing runner-specific test names", + "instead of manually rebuilding runner-specific test names", + "For small mechanical test changes, use a scoped agent-mode run for the smoke check", + "treat the review as partial", + "Use `allure --version`, `allure agent capabilities --json`, and `allure agent --help` before choosing flags", + ]); + }); +}); diff --git a/packages/plugin-agent/test/harness.test.ts b/packages/plugin-agent/test/harness.test.ts index bdc5647513e..7d5d6b0d354 100644 --- a/packages/plugin-agent/test/harness.test.ts +++ b/packages/plugin-agent/test/harness.test.ts @@ -1,8 +1,8 @@ -import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; import { tmpdir } from "node:os"; import { join } from "node:path"; -import type { AttachmentLink, TestResult } from "@allurereport/core-api"; +import type { AttachmentLink, DefaultTestStepResult, TestResult } from "@allurereport/core-api"; import type { AllureStore, PluginContext } from "@allurereport/plugin-api"; import { BufferResultFile } from "@allurereport/reader-api"; import { story } from "allure-js-commons"; @@ -10,6 +10,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; import AgentPlugin, { type AgentFindingManifestLine, + type AgentExpectationsInput, type AgentOutputBundle, AGENT_ENRICHMENT_ACTIONS, buildAgentExpectations, @@ -17,11 +18,11 @@ import AgentPlugin, { planAgentEnrichmentReview, reviewAgentOutput, } from "../src/index.js"; +import { attachJsonEvidence } from "./evidence.js"; beforeEach(async () => { await story("harness"); }); -const AGENT_ENV_VARS = ["ALLURE_AGENT_EXPECTATIONS", "ALLURE_AGENT_COMMAND", "ALLURE_AGENT_PROJECT_ROOT"] as const; const createContext = (reportName: string = "Harness Report"): PluginContext => ({ @@ -64,6 +65,16 @@ const createAttachment = (overrides: Partial = {}): AttachmentLi ...overrides, }) as AttachmentLink; +const createStep = (overrides: Partial = {}): DefaultTestStepResult => ({ + name: "assert expected behavior", + parameters: [], + status: "passed", + steps: [], + type: "step", + message: "checked", + ...overrides, +}); + const createStore = (overrides: Partial = {}): AllureStore => ({ allTestResults: vi.fn().mockResolvedValue([]), @@ -80,6 +91,28 @@ const createStore = (overrides: Partial = {}): AllureStore => ...overrides, }) as AllureStore; +const readJson = async (path: string): Promise => { + const value = JSON.parse(await readFile(path, "utf-8")) as T; + + await attachJsonEvidence(`parsed ${path}`, value); + + return value; +}; + +const readJsonl = async (path: string): Promise => { + const content = await readFile(path, "utf-8"); + + const values = content + .split("\n") + .map((line) => line.trim()) + .filter(Boolean) + .map((line) => JSON.parse(line) as T); + + await attachJsonEvidence(`parsed ${path}`, values); + + return values; +}; + const createFinding = (overrides: Partial = {}): AgentFindingManifestLine => ({ finding_id: "finding-1", subject: "run", @@ -158,7 +191,6 @@ const createOutputBundle = (overrides: Partial = {}): AgentOu tests_manifest: "manifest/tests.jsonl", findings_manifest: "manifest/findings.jsonl", expected_manifest: "manifest/expected.json", - project_guide: null, process_logs: { stdout: "artifacts/global/stdout.txt", stderr: null, @@ -204,6 +236,30 @@ const createOutputBundle = (overrides: Partial = {}): AgentOu }, }, expectations_present: true, + expectations: { + goal: "Verify harness fixture", + }, + expectation_result: { + schema_version: "allure-agent-expectation-result/v1", + status: "matched", + impact: "accept", + source: { + kind: "inline", + path: null, + }, + recognized_control_count: 1, + unsupported_controls: [], + degraded_controls: [], + summary: { + expected_tests: 0, + observed_tests: 1, + missing_expected: 0, + forbidden_observed: 0, + unexpected_observed: 0, + evidence_mismatches: 0, + }, + finding_ids: [], + }, check_summary: { total: 0, countsBySeverity: { @@ -267,18 +323,83 @@ describe("agent enrichment harness", () => { beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), "plugin-agent-harness-")); - AGENT_ENV_VARS.forEach((name) => { - delete process.env[name]; - }); }); afterEach(async () => { - AGENT_ENV_VARS.forEach((name) => { - delete process.env[name]; - }); await rm(tempDir, { recursive: true, force: true }); }); + type ExpectationHarnessRun = { + expectations: AgentExpectationsInput; + tests?: TestResult[]; + environmentByTestId?: Record; + attachmentsByTestId?: Record; + contentByAttachmentId?: Record; + }; + + const runExpectationHarness = async (name: string, params: ExpectationHarnessRun) => { + const outputDir = join(tempDir, name); + const tests = params.tests ?? [createTestResult()]; + const stats = tests.reduce>( + (acc, test) => { + acc.total += 1; + acc[test.status] = (acc[test.status] ?? 0) + 1; + + return acc; + }, + { + total: 0, + }, + ); + + await new AgentPlugin({ + outputDir, + expectations: params.expectations, + command: "yarn test expectation-harness", + }).done( + createContext(), + createStore({ + allTestResults: vi.fn().mockResolvedValue(tests), + testsStatistic: vi.fn().mockResolvedValue(stats), + environmentIdByTrId: vi.fn().mockImplementation(async (id: string) => { + return params.environmentByTestId?.[id] ?? "default"; + }), + attachmentsByTrId: vi.fn().mockImplementation(async (id: string) => { + return params.attachmentsByTestId?.[id] ?? []; + }), + attachmentContentById: vi.fn().mockImplementation(async (id: string) => { + return params.contentByAttachmentId?.[id]; + }), + }), + ); + + return { + outputDir, + run: await readJson(join(outputDir, "manifest", "run.json")), + findings: await readJsonl(join(outputDir, "manifest", "findings.jsonl")), + }; + }; + + const expectNoExpectationFinding = (findings: AgentFindingManifestLine[], checkName: string) => { + expect(findings).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: checkName, + }), + ]), + ); + }; + + const expectExpectationFinding = (findings: AgentFindingManifestLine[], checkName: string) => { + expect(findings).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: checkName, + }), + ]), + ); + }; + it("should build expectations from a harness request", () => { expect( buildAgentExpectations({ @@ -321,22 +442,311 @@ describe("agent enrichment harness", () => { }); }); - it("should map enrichment findings to the intended remediation categories", () => { - expect(AGENT_ENRICHMENT_ACTIONS["failed-without-useful-steps"].category).toBe("add-meaningful-steps"); - expect(mapFindingToEnrichmentAction("nontrivial-run-with-empty-trace").category).toBe("add-meaningful-steps"); - expect(mapFindingToEnrichmentAction("passed-without-observable-evidence").category).toBe("add-meaningful-steps"); - expect(mapFindingToEnrichmentAction("failed-without-attachments").category).toBe("add-test-attachments"); - expect(mapFindingToEnrichmentAction("global-only-artifacts").category).toBe("add-test-attachments"); - expect(mapFindingToEnrichmentAction("runner-failures-outside-logical-results").category).toBe("bootstrap-allure"); - expect(mapFindingToEnrichmentAction("unmodeled-visible-results").category).toBe("review-manually"); - expect(mapFindingToEnrichmentAction("metadata-mismatch").category).toBe("repair-test-metadata"); - expect(mapFindingToEnrichmentAction("retries-without-new-evidence").category).toBe("add-retry-diagnostics"); - expect(mapFindingToEnrichmentAction("noop-dominated-steps").category).toBe("collapse-low-signal-trace"); - expect(mapFindingToEnrichmentAction("step-spam").category).toBe("collapse-low-signal-trace"); - expect(mapFindingToEnrichmentAction("unexpected-test").category).toBe("narrow-test-scope"); + it.each([ + { + name: "expected test count", + checkName: "expected-count-mismatch", + matched: { + expectations: { + expected: { + test_count: 1, + }, + }, + }, + unmet: { + expectations: { + expected: { + test_count: 2, + }, + }, + }, + }, + { + name: "expected test full name", + checkName: "expected-test-missing", + matched: { + expectations: { + expected: { + full_names: ["suite should pass"], + }, + }, + }, + unmet: { + expectations: { + expected: { + full_names: ["suite should be visible"], + }, + }, + }, + }, + { + name: "expected test full-name prefix", + checkName: "expected-prefix-missing", + matched: { + expectations: { + expected: { + full_name_prefixes: ["suite should"], + }, + }, + }, + unmet: { + expectations: { + expected: { + full_name_prefixes: ["api should"], + }, + }, + }, + }, + { + name: "expected environment", + checkName: "expected-environment-missing", + matched: { + expectations: { + expected: { + environments: ["default"], + }, + }, + }, + unmet: { + expectations: { + expected: { + environments: ["web"], + }, + }, + }, + }, + { + name: "expected label value", + checkName: "expected-label-missing", + matched: { + expectations: { + expected: { + label_values: { + module: "cli", + }, + }, + }, + tests: [ + createTestResult({ + labels: [{ name: "module", value: "cli" }], + }), + ], + }, + unmet: { + expectations: { + expected: { + label_values: { + module: "cli", + }, + }, + }, + }, + }, + { + name: "forbidden label value", + checkName: "forbidden-label-observed", + matched: { + expectations: { + forbidden: { + label_values: { + layer: "e2e", + }, + }, + }, + }, + unmet: { + expectations: { + forbidden: { + label_values: { + layer: "e2e", + }, + }, + }, + tests: [ + createTestResult({ + labels: [{ name: "layer", value: "e2e" }], + }), + ], + }, + }, + { + name: "expected step text", + checkName: "expected-step-containing-missing", + matched: { + expectations: { + evidence: { + step_name_contains: ["assert expected behavior"], + }, + }, + tests: [ + createTestResult({ + steps: [createStep()], + }), + ], + }, + unmet: { + expectations: { + evidence: { + step_name_contains: ["assert expected behavior"], + }, + }, + }, + }, + { + name: "expected meaningful step count", + checkName: "insufficient-expected-steps", + matched: { + expectations: { + evidence: { + min_steps: 1, + }, + }, + tests: [ + createTestResult({ + steps: [createStep()], + }), + ], + }, + unmet: { + expectations: { + evidence: { + min_steps: 1, + }, + }, + }, + }, + { + name: "expected attachment count", + checkName: "insufficient-expected-attachments", + matched: { + expectations: { + evidence: { + min_attachments: 1, + }, + }, + attachmentsByTestId: { + "tr-1": [createAttachment()], + }, + contentByAttachmentId: { + "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"), + }, + }, + unmet: { + expectations: { + evidence: { + min_attachments: 1, + }, + }, + }, + }, + { + name: "expected attachment name", + checkName: "missing-expected-attachment", + matched: { + expectations: { + evidence: { + attachments: [{ name: "artifact.txt" }], + }, + }, + attachmentsByTestId: { + "tr-1": [createAttachment()], + }, + contentByAttachmentId: { + "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"), + }, + }, + unmet: { + expectations: { + evidence: { + attachments: [{ name: "missing.txt" }], + }, + }, + attachmentsByTestId: { + "tr-1": [createAttachment()], + }, + contentByAttachmentId: { + "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"), + }, + }, + }, + { + name: "expected attachment content type", + checkName: "missing-expected-attachment", + matched: { + expectations: { + evidence: { + attachments: [{ content_type: "text/plain" }], + }, + }, + attachmentsByTestId: { + "tr-1": [createAttachment()], + }, + contentByAttachmentId: { + "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"), + }, + }, + unmet: { + expectations: { + evidence: { + attachments: [{ content_type: "application/json" }], + }, + }, + attachmentsByTestId: { + "tr-1": [createAttachment()], + }, + contentByAttachmentId: { + "attachment-1": new BufferResultFile(Buffer.from("artifact", "utf-8"), "artifact.txt"), + }, + }, + }, + ] satisfies Array<{ + name: string; + checkName: string; + matched: ExpectationHarnessRun; + unmet: ExpectationHarnessRun; + }>)("should report $checkName only when $name is unmet", async ({ name, checkName, matched, unmet }) => { + const matchedOutput = await runExpectationHarness(`${name.replace(/[^a-z0-9]+/gi, "-")}-matched`, matched); + const unmetOutput = await runExpectationHarness(`${name.replace(/[^a-z0-9]+/gi, "-")}-unmet`, unmet); + + expectNoExpectationFinding(matchedOutput.findings, checkName); + expectExpectationFinding(unmetOutput.findings, checkName); + }); + + it("should map enrichment findings to the intended remediation categories", async () => { + const mappedActions = { + "failed-without-useful-steps": AGENT_ENRICHMENT_ACTIONS["failed-without-useful-steps"].category, + "nontrivial-run-with-empty-trace": mapFindingToEnrichmentAction("nontrivial-run-with-empty-trace").category, + "passed-without-observable-evidence": mapFindingToEnrichmentAction("passed-without-observable-evidence").category, + "failed-without-attachments": mapFindingToEnrichmentAction("failed-without-attachments").category, + "global-only-artifacts": mapFindingToEnrichmentAction("global-only-artifacts").category, + "runner-failures-outside-logical-results": mapFindingToEnrichmentAction("runner-failures-outside-logical-results") + .category, + "unmodeled-visible-results": mapFindingToEnrichmentAction("unmodeled-visible-results").category, + "metadata-mismatch": mapFindingToEnrichmentAction("metadata-mismatch").category, + "retries-without-new-evidence": mapFindingToEnrichmentAction("retries-without-new-evidence").category, + "noop-dominated-steps": mapFindingToEnrichmentAction("noop-dominated-steps").category, + "step-spam": mapFindingToEnrichmentAction("step-spam").category, + "unexpected-test": mapFindingToEnrichmentAction("unexpected-test").category, + }; + + await attachJsonEvidence("enrichment action category map", mappedActions); + expect(mappedActions).toEqual({ + "failed-without-useful-steps": "add-meaningful-steps", + "nontrivial-run-with-empty-trace": "add-meaningful-steps", + "passed-without-observable-evidence": "add-meaningful-steps", + "failed-without-attachments": "add-test-attachments", + "global-only-artifacts": "add-test-attachments", + "runner-failures-outside-logical-results": "bootstrap-allure", + "unmodeled-visible-results": "review-manually", + "metadata-mismatch": "repair-test-metadata", + "retries-without-new-evidence": "add-retry-diagnostics", + "noop-dominated-steps": "collapse-low-signal-trace", + "step-spam": "collapse-low-signal-trace", + "unexpected-test": "narrow-test-scope", + }); }); - it("should reject high-confidence noop-style evidence", () => { + it("should reject high-confidence noop-style evidence", async () => { const review = planAgentEnrichmentReview( createOutputBundle({ findings: [ @@ -352,6 +762,7 @@ describe("agent enrichment harness", () => { }), ); + await attachJsonEvidence("noop-style evidence review decision", review); expect(review.status).toBe("reject"); expect(review.rejecting).toEqual( expect.arrayContaining([ @@ -403,10 +814,7 @@ describe("agent enrichment harness", () => { "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - process.env.ALLURE_AGENT_COMMAND = "yarn test clean-run"; - - await new AgentPlugin({ outputDir }).done( + await new AgentPlugin({ outputDir, expectationsPath, command: "yarn test clean-run" }).done( createContext(), createStore({ allTestResults: vi.fn().mockResolvedValue([testResult]), @@ -424,6 +832,7 @@ describe("agent enrichment harness", () => { const review = await reviewAgentOutput(outputDir); + await attachJsonEvidence("clean scoped run review decision", review); expect(review.status).toBe("accept"); expect(review.plan).toEqual([]); expect(review.rerun.useExistingExpectations).toBe(true); @@ -479,9 +888,7 @@ describe("agent enrichment harness", () => { "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - - await new AgentPlugin({ outputDir }).done( + await new AgentPlugin({ outputDir, expectationsPath }).done( createContext(), createStore({ allTestResults: vi.fn().mockResolvedValue([matching, forbidden]), @@ -496,7 +903,7 @@ describe("agent enrichment harness", () => { expect(review.rejecting).toEqual( expect.arrayContaining([ expect.objectContaining({ - checkName: "forbidden-selector-match", + checkName: "forbidden-label-observed", category: "narrow-test-scope", }), ]), @@ -539,9 +946,7 @@ describe("agent enrichment harness", () => { "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - - await new AgentPlugin({ outputDir }).done( + await new AgentPlugin({ outputDir, expectationsPath }).done( createContext(), createStore({ allTestResults: vi.fn().mockResolvedValue([testResult]), @@ -551,6 +956,7 @@ describe("agent enrichment harness", () => { const review = await reviewAgentOutput(outputDir); + await attachJsonEvidence("low-signal failure review decision", review); expect(review.status).toBe("iterate"); expect(review.iterate).toEqual( expect.arrayContaining([ @@ -644,9 +1050,7 @@ describe("agent enrichment harness", () => { "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - - await new AgentPlugin({ outputDir }).done( + await new AgentPlugin({ outputDir, expectationsPath }).done( createContext(), createStore({ allTestResults: vi.fn().mockResolvedValue([current]), diff --git a/packages/plugin-agent/test/index.test.ts b/packages/plugin-agent/test/index.test.ts index e0f32106d1e..45ba11badbe 100644 --- a/packages/plugin-agent/test/index.test.ts +++ b/packages/plugin-agent/test/index.test.ts @@ -13,25 +13,16 @@ import type { ResultFile, } from "@allurereport/plugin-api"; import { BufferResultFile } from "@allurereport/reader-api"; -import { story } from "allure-js-commons"; +import { attachment, step, story } from "allure-js-commons"; import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; +import type { AgentExpectationsInput } from "../src/index.js"; import { AgentPlugin } from "../src/plugin.js"; +import { attachJsonEvidence, attachTextEvidence } from "./evidence.js"; beforeEach(async () => { await story("index"); }); -const AGENT_ENV_VARS = [ - "ALLURE_AGENT_OUTPUT", - "ALLURE_AGENT_EXPECTATIONS", - "ALLURE_AGENT_COMMAND", - "ALLURE_AGENT_PROJECT_ROOT", - "ALLURE_AGENT_NAME", - "ALLURE_AGENT_LOOP_ID", - "ALLURE_AGENT_TASK_ID", - "ALLURE_AGENT_CONVERSATION_ID", -] as const; - const createContext = (reportName: string = "Agent Report"): PluginContext => ({ reportName, @@ -165,29 +156,146 @@ const createRealtimeSubscriber = () => { }; }; -const readJson = async (path: string): Promise => JSON.parse(await readFile(path, "utf-8")) as T; +const readText = async (path: string, contentType: string = "text/plain"): Promise => { + const content = await readFile(path, "utf-8"); + + await attachTextEvidence(`agent artifact ${path}`, content, contentType); + + return content; +}; + +const readJson = async (path: string): Promise => { + const value = JSON.parse(await readFile(path, "utf-8")) as T; + + await attachJsonEvidence(`parsed ${path}`, value); -const readJsonl = async (path: string): Promise => - (await readFile(path, "utf-8")) + return value; +}; + +const readJsonl = async (path: string): Promise => { + const values = (await readFile(path, "utf-8")) .trim() .split("\n") .filter(Boolean) .map((line) => JSON.parse(line) as T); + await attachJsonEvidence(`parsed ${path}`, values); + + return values; +}; + +type TestFindingLine = { + schema_version?: string; + check_id?: string; + instance_id?: string; + check_name: string; + severity: "info" | "warning" | "high"; + impact?: "reject" | "iterate" | "advisory"; + subject: unknown; + subject_ref?: string; +}; + +type AttachmentContentFixture = { + content: string; + fileName: string; +}; + +const createMeaningfulStep = (name: string = "assert expected behavior"): TestStepResult => + ({ + type: "step", + name, + parameters: [ + { + name: "state", + value: "verified", + }, + ], + status: "passed", + steps: [], + }) as TestStepResult; + +const createStoreWithGlobalLogs = ( + overrides: Partial = {}, + attachmentContents: Record = {}, +): AllureStore => { + const stdout = createAttachment({ + id: "global-stdout", + name: "stdout.txt", + originalFileName: "stdout.txt", + }); + const contents = new Map([ + [ + stdout.id, + { + content: "stdout", + fileName: "stdout.txt", + }, + ], + ...Object.entries(attachmentContents), + ]); + + return createStore({ + ...overrides, + allGlobalAttachments: overrides.allGlobalAttachments ?? vi.fn().mockResolvedValue([stdout]), + attachmentContentById: + overrides.attachmentContentById ?? + vi.fn().mockImplementation(async (id: string) => { + const fixture = contents.get(id); + + return fixture ? new BufferResultFile(Buffer.from(fixture.content, "utf-8"), fixture.fileName) : undefined; + }), + }); +}; + +const expectationOutputName = (field: string, suffix: string) => `${field.replace(/\./g, "-")}-${suffix}`; + describe("AgentPlugin", () => { let tempDir: string; + const runInlineExpectationCase = async (params: { + outputName: string; + expectations: AgentExpectationsInput; + testResult?: TestResult; + environmentId?: string; + attachments?: AttachmentLink[]; + attachmentContents?: Record; + }) => { + const outputDir = join(tempDir, params.outputName); + const testResult = + params.testResult ?? + createTestResult({ + id: "tr-expectation", + historyId: "expectation-history", + fullName: "suite expected behavior", + }); + + await new AgentPlugin({ + outputDir, + expectations: { goal: "Verify expectation case", ...params.expectations }, + }).done( + createContext(), + createStoreWithGlobalLogs( + { + allTestResults: vi.fn().mockResolvedValue([testResult]), + testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }), + environmentIdByTrId: vi.fn().mockResolvedValue(params.environmentId ?? "default"), + attachmentsByTrId: vi.fn().mockResolvedValue(params.attachments ?? []), + }, + params.attachmentContents, + ), + ); + + return { + outputDir, + findings: await readJsonl(join(outputDir, "manifest", "findings.jsonl")), + }; + }; + beforeEach(async () => { tempDir = await mkdtemp(join(tmpdir(), "plugin-agent-")); - AGENT_ENV_VARS.forEach((name) => { - delete process.env[name]; - }); }); afterEach(async () => { - AGENT_ENV_VARS.forEach((name) => { - delete process.env[name]; - }); await rm(tempDir, { recursive: true, force: true }); }); @@ -212,9 +320,9 @@ describe("AgentPlugin", () => { test_events_manifest: string; }; }>(join(outputDir, "manifest", "run.json")); - const guide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); - const testEvents = await readFile(join(outputDir, "manifest", "test-events.jsonl"), "utf-8"); + const guide = await readText(join(outputDir, "AGENTS.md"), "text/markdown"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); + const testEvents = await readText(join(outputDir, "manifest", "test-events.jsonl"), "application/x-jsonlines"); expect(runManifest.phase).toBe("running"); expect(runManifest.paths.test_events_manifest).toBe("manifest/test-events.jsonl"); @@ -250,7 +358,7 @@ describe("AgentPlugin", () => { const runningManifest = await readJson<{ phase: "running" | "done"; }>(join(outputDir, "manifest", "run.json")); - const testContent = await readFile(join(outputDir, "tests", "default", "live-history.md"), "utf-8"); + const testContent = await readText(join(outputDir, "tests", "default", "live-history.md"), "text/markdown"); const eventLines = await readJsonl<{ event_type: string; markdown_path?: string; @@ -284,21 +392,17 @@ describe("AgentPlugin", () => { expect(finalEvents.at(-1)).toEqual(expect.objectContaining({ event_type: "run_finished" })); }); - it("should prefer option outputDir over ALLURE_AGENT_OUTPUT", async () => { + it("should write output only when outputDir is configured", async () => { const optionDir = join(tempDir, "option-output"); - const envDir = join(tempDir, "env-output"); const store = createStore({ allTestResults: vi.fn().mockResolvedValue([createTestResult()]), testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }), }); - process.env.ALLURE_AGENT_OUTPUT = envDir; - await new AgentPlugin({ outputDir: optionDir }).done(createContext(), store); await expect(stat(join(optionDir, "index.md"))).resolves.toBeTruthy(); await expect(stat(join(optionDir, "AGENTS.md"))).resolves.toBeTruthy(); - await expect(stat(join(envDir, "index.md"))).rejects.toThrow(); }); it("should clean only managed entries before writing", async () => { @@ -312,9 +416,9 @@ describe("AgentPlugin", () => { await new AgentPlugin({ outputDir }).done(createContext(), createStore()); - expect(await readFile(join(outputDir, "notes.txt"), "utf-8")).toBe("keep me"); - expect(await readFile(join(outputDir, "index.md"), "utf-8")).toContain("# Agent Report"); - expect(await readFile(join(outputDir, "AGENTS.md"), "utf-8")).toContain("# AGENTS Guide"); + expect(await readText(join(outputDir, "notes.txt"))).toBe("keep me"); + expect(await readText(join(outputDir, "index.md"), "text/markdown")).toContain("# Agent Report"); + expect(await readText(join(outputDir, "AGENTS.md"), "text/markdown")).toContain("# AGENTS Guide"); }); it("should use historyId-based file names and fall back to the test result id", async () => { @@ -358,8 +462,8 @@ describe("AgentPlugin", () => { await new AgentPlugin({ outputDir }).done(createContext(), store); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); - const testContent = await readFile(join(outputDir, "tests", "default", "history.id_1.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); + const testContent = await readText(join(outputDir, "tests", "default", "history.id_1.md"), "text/markdown"); expect(indexContent).toContain("test/index.test.ts#AgentPlugin should keep markdown readable (v1)"); expect(testContent).toContain("Name: should keep markdown readable (v1)"); @@ -411,7 +515,7 @@ describe("AgentPlugin", () => { await new AgentPlugin({ outputDir }).done(createContext(), store); - const primaryContent = await readFile(join(outputDir, "tests", "default", "shared-history.md"), "utf-8"); + const primaryContent = await readText(join(outputDir, "tests", "default", "shared-history.md"), "text/markdown"); expect(primaryContent).toContain("## Retry 1"); expect(primaryContent).toContain("retry failure"); @@ -460,7 +564,7 @@ describe("AgentPlugin", () => { await new AgentPlugin({ outputDir }).done(createContext("My Report"), store); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); expect(indexContent).toContain("# My Report"); expect(indexContent).toContain("## Process Logs"); @@ -470,73 +574,109 @@ describe("AgentPlugin", () => { expect(indexContent).toContain("stdout.txt"); expect(indexContent).toContain("stderr.txt"); expect(indexContent).toContain("Too many failures"); - expect(await readFile(join(outputDir, "artifacts", "global", "stdout.txt"), "utf-8")).toBe("stdout content"); - expect(await readFile(join(outputDir, "artifacts", "global", "stderr.txt"), "utf-8")).toBe("stderr content"); - expect(await readFile(join(outputDir, "AGENTS.md"), "utf-8")).toContain("## Reading Order"); + expect(await readText(join(outputDir, "artifacts", "global", "stdout.txt"))).toBe("stdout content"); + expect(await readText(join(outputDir, "artifacts", "global", "stderr.txt"))).toBe("stderr content"); + expect(await readText(join(outputDir, "AGENTS.md"), "text/markdown")).toContain("## Reading Order"); }); - it("should copy project guidance and reference it from AGENTS.md and run manifest", async () => { - const outputDir = join(tempDir, "project-guide"); - const projectRoot = join(tempDir, "project-root"); - const guidePath = join(projectRoot, "docs", "allure-agent-mode.md"); + it("should generate standalone AGENTS guidance", async () => { + const outputDir = join(tempDir, "standalone-agents-guide"); const store = createStore({ allTestResults: vi.fn().mockResolvedValue([createTestResult()]), testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }), }); - await mkdir(join(projectRoot, "docs"), { recursive: true }); - await writeFile(guidePath, "# Project Allure Guide\n\nUse agent mode here.\n", "utf-8"); - process.env.ALLURE_AGENT_PROJECT_ROOT = projectRoot; - await new AgentPlugin({ outputDir }).done(createContext(), store); - const guideCopy = await readFile(join(outputDir, "project", "docs", "allure-agent-mode.md"), "utf-8"); - const agentsGuide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); + const agentsGuide = await readText(join(outputDir, "AGENTS.md"), "text/markdown"); const runManifest = await readJson<{ - paths: { - project_guide: string | null; - }; + paths: Record; }>(join(outputDir, "manifest", "run.json")); - expect(guideCopy).toContain("Project Allure Guide"); - expect(agentsGuide).toContain("[project guidance](project/docs/allure-agent-mode.md)"); - expect(runManifest.paths.project_guide).toBe("project/docs/allure-agent-mode.md"); + expect(agentsGuide).toContain("## Reading Order"); + expect(agentsGuide).toContain("## Command Task Map"); + expect(runManifest.paths).toEqual(expect.objectContaining({ index_md: "index.md", agents_md: "AGENTS.md" })); }); it("should include downstream enrichment best practices in AGENTS.md", async () => { const outputDir = join(tempDir, "agents-guide"); - await new AgentPlugin({ outputDir }).done(createContext(), createStore()); + const guide = await step("render AGENTS guidance", async () => { + await new AgentPlugin({ outputDir }).done(createContext(), createStore()); - const guide = await readFile(join(outputDir, "AGENTS.md"), "utf-8"); + return await readText(join(outputDir, "AGENTS.md"), "text/markdown"); + }); - expect(guide).toContain("## Enrichment Loop Workflow"); - expect(guide).toContain("## Verification Standard"); - expect(guide).toContain("manifest/test-events.jsonl"); - expect(guide).toContain("allure agent latest"); - expect(guide).toContain("allure agent state-dir"); - expect(guide).toContain("allure agent select --latest"); - expect(guide).toContain("allure agent --rerun-latest"); - expect(guide).toContain("--rerun-preset"); - expect(guide).toContain("--rerun-environment"); - expect(guide).toContain("--rerun-label"); - expect(guide).toContain("ALLURE_AGENT_STATE_DIR"); - expect(guide).toContain("print the `index.md` path"); - expect(guide).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.", - ); - expect(guide).toContain("Use `allure agent` for smoke checks too, even when the change is small or mechanical."); - expect(guide).toContain("Only skip agent mode when it is impossible or when you are debugging agent mode itself."); - expect(guide).toContain("## Small Test Change Workflow"); - expect(guide).toContain("## Coverage Review Workflow"); - expect(guide).toContain("## Test Enrichment Best Practices"); - expect(guide).toContain("## Anti-Dummy Policy"); - expect(guide).toContain("## Acceptance Checklist"); - expect(guide).toContain("## Review Completeness"); - expect(guide).toContain("## Partial Runtime Review"); - expect(guide).toContain("teach `runCommand` to emit a step"); - expect(guide).toContain("`failed-without-useful-steps`"); - expect(guide).toContain("`noop-dominated-steps`"); + await step("verify generated workflow guidance", async () => { + await attachment( + "verified AGENTS guidance sections", + JSON.stringify( + { + sections: [ + "Agent Workflows", + "Command Task Map", + "Verification Standard", + "Test Enrichment Best Practices", + ], + command: 'allure agent --goal --expect-tests --expect-test ""', + }, + null, + 2, + ), + "application/json", + ); + expect(guide).toContain("## Agent Workflows"); + expect(guide).toContain("Use the smallest workflow that matches the task."); + expect(guide).toContain("### Validate A Change"); + expect(guide).toContain("### Add Or Update Tests"); + expect(guide).toContain("### Review Existing Coverage"); + expect(guide).toContain("### Triage Failures"); + expect(guide).toContain("### Rerun A Prior Scope"); + expect(guide).toContain("### Improve Evidence Quality"); + expect(guide).toContain("### Recover Or Diagnose Agent Mode"); + expect(guide).toContain("Use when code or tests changed and you need a user-facing safety conclusion."); + expect(guide).toContain("Commands:"); + expect(guide).toContain("Done when:"); + expect(guide).toContain("## Verification Standard"); + expect(guide).toContain("manifest/test-events.jsonl"); + expect(guide).toContain("allure agent latest"); + expect(guide).toContain("allure agent state-dir"); + expect(guide).toContain("allure agent select --latest"); + expect(guide).toContain("allure agent --rerun-latest"); + expect(guide).toContain("## Command Task Map"); + expect(guide).toContain("setup and capability-detection loop"); + expect(guide).toContain("output recovery loop"); + expect(guide).toContain("tooling diagnosis loop"); + expect(guide).toContain("rerun-planning loop"); + expect(guide).toContain("focused retry loop"); + expect(guide).toContain("state-control loop"); + expect(guide).toContain("--rerun-preset"); + expect(guide).toContain("instead of rebuilding runner-specific test names"); + expect(guide).toContain("allure agent --rerun-latest --rerun-preset failed -- "); + expect(guide).toContain("--rerun-environment"); + expect(guide).toContain("--rerun-label"); + expect(guide).toContain("ALLURE_AGENT_STATE_DIR"); + expect(guide).toContain('allure agent --goal --expect-tests --expect-test ""'); + expect(guide).toContain("print the `index.md` path"); + expect(guide).toContain( + "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config.", + ); + expect(guide).toContain("Use `allure agent` for smoke checks too, even when the change is small or mechanical."); + expect(guide).toContain( + "Only skip agent mode when it is impossible or when you are debugging agent mode itself.", + ); + expect(guide).toContain( + "For small mechanical changes, use this same workflow with narrower expectations rather than a separate shortcut.", + ); + expect(guide).toContain("## Test Enrichment Best Practices"); + expect(guide).toContain("## Anti-Dummy Policy"); + expect(guide).toContain("## Acceptance Checklist"); + expect(guide).toContain("## Review Completeness"); + expect(guide).toContain("## Partial Runtime Review"); + expect(guide).toContain("teach `runCommand` to emit a step"); + expect(guide).toContain("`failed-without-useful-steps`"); + expect(guide).toContain("`noop-dominated-steps`"); + }); }); it("should render fixtures, copy attachments, and keep missing attachments visible", async () => { @@ -622,7 +762,7 @@ describe("AgentPlugin", () => { await new AgentPlugin({ outputDir }).done(createContext(), store); - const content = await readFile(join(outputDir, "tests", "default", "artifact-history.md"), "utf-8"); + const content = await readText(join(outputDir, "tests", "default", "artifact-history.md"), "text/markdown"); expect(content).toContain("### Before Fixture: setup"); expect(content).toContain("### Steps"); @@ -630,9 +770,9 @@ describe("AgentPlugin", () => { expect(content).toContain("screenshot.png"); expect(content).toContain("fixture.log"); expect( - await readFile(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png"), "utf-8"), + await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png")), ).toBe("png-bytes"); - expect(await readFile(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"), "utf-8")).toBe( + expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"))).toBe( "fixture log", ); }); @@ -694,13 +834,14 @@ notes: "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - process.env.ALLURE_AGENT_COMMAND = "yarn test feature-a"; - process.env.ALLURE_AGENT_NAME = "codex"; - process.env.ALLURE_AGENT_LOOP_ID = "loop-1"; - process.env.ALLURE_AGENT_CONVERSATION_ID = "conversation-1"; - - await new AgentPlugin({ outputDir }).done(createContext(), store); + await new AgentPlugin({ + outputDir, + expectationsPath, + command: "yarn test feature-a", + agentName: "codex", + loopId: "loop-1", + conversationId: "conversation-1", + }).done(createContext(), store); const runManifest = await readJson<{ command: string; @@ -713,7 +854,6 @@ notes: }; paths: { expected_manifest: string; - project_guide: string | null; }; check_summary: { total: number; @@ -726,15 +866,15 @@ notes: const findingsManifest = await readJsonl<{ check_name: string; severity: "info" | "warning" | "high"; - subject: string; + subject?: unknown; + subject_ref?: string; }>(join(outputDir, "manifest", "findings.jsonl")); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); - const forbiddenContent = await readFile(join(outputDir, "tests", "api", "feature-b-history.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); + const forbiddenContent = await readText(join(outputDir, "tests", "api", "feature-b-history.md"), "text/markdown"); expect(runManifest.command).toBe("yarn test feature-a"); expect(runManifest.expectations_present).toBe(true); expect(runManifest.paths.expected_manifest).toBe("manifest/expected.json"); - expect(runManifest.paths.project_guide).toBeNull(); expect(runManifest.agent_context).toEqual({ agent_name: "codex", loop_id: "loop-1", @@ -756,14 +896,14 @@ notes: expect(findingsManifest).toEqual( expect.arrayContaining([ expect.objectContaining({ - check_name: "forbidden-selector-match", + check_name: "forbidden-label-observed", severity: "high", - subject: "tests/api/feature-b-history.md", + subject_ref: "tests/api/feature-b-history.md", }), expect.objectContaining({ check_name: "unexpected-environment", severity: "warning", - subject: "run", + subject_ref: "run", }), ]), ); @@ -776,7 +916,773 @@ notes: expect(forbiddenContent).toContain("## Expectation Comparison"); expect(forbiddenContent).toContain("Scope Match: forbidden"); expect(forbiddenContent).toContain("## Quality Findings"); - expect(await readFile(join(outputDir, "manifest", "expected.json"), "utf-8")).toContain('"task_id": "feature-a"'); + expect(await readText(join(outputDir, "manifest", "expected.json"), "application/json")).toContain( + '"task_id": "feature-a"', + ); + }); + + it("should load inline expectations and report count and evidence gaps", async () => { + const outputDir = join(tempDir, "inline-expectations"); + const matching = createTestResult({ + id: "tr-inline", + historyId: "inline-history", + name: "inline should be visible", + fullName: "inline should be visible", + labels: [ + { + name: "feature", + value: "inline", + }, + ], + }); + const store = createStore({ + allTestResults: vi.fn().mockResolvedValue([matching]), + testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }), + }); + + const expectations = { + goal: "Review inline expectations", + expected: { + test_count: 2, + label_values: { + feature: "inline", + }, + }, + evidence: { + min_steps: 1, + min_attachments: 1, + step_name_contains: ["assert expected behavior"], + attachments: [ + { + name: "evidence.json", + }, + ], + }, + }; + + await new AgentPlugin({ outputDir, expectations }).done(createContext(), store); + + const expectedManifest = await readJson<{ + expected: { + test_count: number; + }; + evidence: { + step_name_contains: string[]; + }; + }>(join(outputDir, "manifest", "expected.json")); + const findingsManifest = await readJsonl<{ + check_name: string; + severity: "info" | "warning" | "high"; + subject?: unknown; + subject_ref?: string; + }>(join(outputDir, "manifest", "findings.jsonl")); + const runManifest = await readJson<{ + expectations: { + evidence: { + step_name_contains: string[]; + }; + }; + expectation_result: { + status: string; + impact: string; + recognized_control_count: number; + summary: { + expected_tests: number; + observed_tests: number; + evidence_mismatches: number; + }; + }; + }>(join(outputDir, "manifest", "run.json")); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); + + expect(expectedManifest.expected.test_count).toBe(2); + expect(expectedManifest.evidence.step_name_contains).toEqual(["assert expected behavior"]); + expect(runManifest.expectations.evidence.step_name_contains).toEqual(["assert expected behavior"]); + expect(runManifest.expectation_result.status).toBe("failed"); + expect(runManifest.expectation_result.impact).toBe("iterate"); + expect(runManifest.expectation_result.recognized_control_count).toBe(7); + expect(runManifest.expectation_result.summary).toEqual( + expect.objectContaining({ + expected_tests: 2, + observed_tests: 1, + evidence_mismatches: 4, + }), + ); + expect(indexContent).toContain("Expectations Source: CLI options"); + expect(indexContent).toContain("## Expectation Result"); + expect(indexContent).toContain("Status: failed"); + expect(indexContent).toContain("test count: 2"); + expect(indexContent).toContain("step contains: assert expected behavior"); + expect(findingsManifest).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + schema_version: "allure-agent-finding/v2", + check_id: "expected-count-mismatch", + check_name: "expected-count-mismatch", + severity: "warning", + impact: "iterate", + subject_ref: "run", + }), + expect.objectContaining({ + check_name: "expected-step-containing-missing", + severity: "warning", + subject_ref: "tests/default/inline-history.md", + }), + expect.objectContaining({ + check_name: "insufficient-expected-steps", + severity: "warning", + subject_ref: "tests/default/inline-history.md", + }), + expect.objectContaining({ + check_name: "insufficient-expected-attachments", + severity: "warning", + subject_ref: "tests/default/inline-history.md", + }), + expect.objectContaining({ + check_name: "missing-expected-attachment", + severity: "warning", + subject_ref: "tests/default/inline-history.md", + }), + ]), + ); + }); + + it("should mark metadata-only expectations as not requested", async () => { + const outputDir = join(tempDir, "metadata-only-expectations"); + const matching = createTestResult({ + id: "tr-metadata-only", + historyId: "metadata-only-history", + name: "metadata-only test", + fullName: "metadata-only test", + }); + const store = createStore({ + allTestResults: vi.fn().mockResolvedValue([matching]), + testsStatistic: vi.fn().mockResolvedValue({ total: 1, passed: 1 }), + }); + + await new AgentPlugin({ + outputDir, + expectations: { + goal: "record review context", + task_id: "TASK-1", + }, + }).done(createContext(), store); + + const runManifest = await readJson<{ + expectation_result: { + status: string; + impact: string; + recognized_control_count: number; + summary: { + observed_tests: number; + }; + }; + }>(join(outputDir, "manifest", "run.json")); + + expect(runManifest.expectation_result).toEqual( + expect.objectContaining({ + status: "not_requested", + impact: "advisory", + recognized_control_count: 2, + }), + ); + expect(runManifest.expectation_result.summary.observed_tests).toBe(1); + }); + + it("should render every parsed inline expectation config field", async () => { + const traceAttachment = createAttachment({ + id: "trace-json", + name: "trace.json", + originalFileName: "trace.json", + ext: ".json", + contentType: "application/json", + }); + const { outputDir, findings } = await runInlineExpectationCase({ + outputName: "parsed-inline-config-fields", + environmentId: "web", + testResult: createTestResult({ + id: "tr-parsed-config", + historyId: "parsed-config-history", + fullName: "suite expected behavior", + labels: [ + { + name: "feature", + value: "scope", + }, + ], + steps: [createMeaningfulStep()], + }), + attachments: [traceAttachment], + attachmentContents: { + "trace-json": { + content: "{}", + fileName: "trace.json", + }, + }, + expectations: { + goal: "Review parsed inline config fields", + task_id: "agent-inline-fields", + expected: { + test_count: 1, + environments: ["web"], + full_names: ["suite expected behavior"], + full_name_prefixes: ["suite expected"], + label_values: { + feature: "scope", + }, + }, + forbidden: { + environments: ["api"], + full_names: ["suite forbidden behavior"], + full_name_prefixes: ["suite forbidden"], + label_values: { + feature: ["forbidden"], + }, + }, + evidence: { + min_steps: 1, + min_attachments: 1, + step_name_contains: ["assert expected behavior"], + attachments: [ + { + name: "trace.json", + }, + { + content_type: "application/json", + }, + ], + }, + notes: "Keep every field visible to reviewers", + }, + }); + + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); + + expect(findings).toEqual([]); + expect(indexContent).toContain("Goal: Review parsed inline config fields"); + expect(indexContent).toContain("Feature / Task: agent-inline-fields"); + expect(indexContent).toContain( + "Expected selectors: test count: 1 | environments: web | full names: suite expected behavior | prefixes: suite expected | labels: feature in [scope]", + ); + expect(indexContent).toContain( + "Forbidden selectors: environments: api | full names: suite forbidden behavior | prefixes: suite forbidden | labels: feature in [forbidden]", + ); + expect(indexContent).toContain( + "Evidence expectations: meaningful steps per test: >= 1 | attachments per test: >= 1 | step contains: assert expected behavior | attachments: name=trace.json; content-type=application/json", + ); + expect(indexContent).toContain("Notes: Keep every field visible to reviewers"); + }); + + it.each([ + { + field: "expected.test_count", + expectations: { + expected: { + test_count: 1, + }, + }, + }, + { + field: "expected.environments", + environmentId: "web", + expectations: { + expected: { + environments: ["web"], + }, + }, + }, + { + field: "expected.full_names", + expectations: { + expected: { + full_names: ["suite expected behavior"], + }, + }, + }, + { + field: "expected.full_name_prefixes", + expectations: { + expected: { + full_name_prefixes: ["suite expected"], + }, + }, + }, + { + field: "expected.label_values", + testResult: createTestResult({ + id: "tr-expected-label-pass", + historyId: "expected-label-pass-history", + fullName: "suite expected behavior", + labels: [ + { + name: "feature", + value: "scope", + }, + ], + }), + expectations: { + expected: { + label_values: { + feature: "scope", + }, + }, + }, + }, + ])("should report no findings when $field is met", async ({ field, expectations, testResult, environmentId }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "met"), + expectations, + testResult, + environmentId, + }); + + expect(findings).toEqual([]); + }); + + it.each([ + { + field: "expected.test_count", + checkName: "expected-count-mismatch", + expectations: { + expected: { + test_count: 2, + }, + }, + }, + { + field: "expected.environments", + checkName: "expected-environment-missing", + environmentId: "api", + expectations: { + expected: { + environments: ["web"], + }, + }, + }, + { + field: "expected.full_names", + checkName: "expected-test-missing", + expectations: { + expected: { + full_names: ["suite missing behavior"], + }, + }, + }, + { + field: "expected.full_name_prefixes", + checkName: "expected-prefix-missing", + expectations: { + expected: { + full_name_prefixes: ["suite missing"], + }, + }, + }, + { + field: "expected.label_values", + checkName: "expected-label-missing", + testResult: createTestResult({ + id: "tr-expected-label-fail", + historyId: "expected-label-fail-history", + fullName: "suite expected behavior", + labels: [ + { + name: "feature", + value: "other", + }, + ], + }), + expectations: { + expected: { + label_values: { + feature: "scope", + }, + }, + }, + }, + ])( + "should report $checkName when $field is not met", + async ({ field, checkName, expectations, testResult, environmentId }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "missing"), + expectations, + testResult, + environmentId, + }); + + expect(findings).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: checkName, + }), + ]), + ); + }, + ); + + it.each([ + { + field: "forbidden.environments", + environmentId: "web", + expectations: { + forbidden: { + environments: ["api"], + }, + }, + }, + { + field: "forbidden.full_names", + expectations: { + forbidden: { + full_names: ["suite forbidden behavior"], + }, + }, + }, + { + field: "forbidden.full_name_prefixes", + expectations: { + forbidden: { + full_name_prefixes: ["suite forbidden"], + }, + }, + }, + { + field: "forbidden.label_values", + testResult: createTestResult({ + id: "tr-forbidden-label-pass", + historyId: "forbidden-label-pass-history", + fullName: "suite expected behavior", + labels: [ + { + name: "feature", + value: "scope", + }, + ], + }), + expectations: { + forbidden: { + label_values: { + feature: "forbidden", + }, + }, + }, + }, + ])( + "should report no findings when $field is not matched", + async ({ field, expectations, testResult, environmentId }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "allowed"), + expectations, + testResult, + environmentId, + }); + + expect(findings).toEqual([]); + }, + ); + + it.each([ + { + field: "forbidden.environments", + checkName: "forbidden-selector-match", + environmentId: "api", + expectations: { + forbidden: { + environments: ["api"], + }, + }, + }, + { + field: "forbidden.full_names", + checkName: "forbidden-selector-match", + expectations: { + forbidden: { + full_names: ["suite expected behavior"], + }, + }, + }, + { + field: "forbidden.full_name_prefixes", + checkName: "forbidden-selector-match", + expectations: { + forbidden: { + full_name_prefixes: ["suite expected"], + }, + }, + }, + { + field: "forbidden.label_values", + checkName: "forbidden-label-observed", + testResult: createTestResult({ + id: "tr-forbidden-label-fail", + historyId: "forbidden-label-fail-history", + fullName: "suite expected behavior", + labels: [ + { + name: "feature", + value: "forbidden", + }, + ], + }), + expectations: { + forbidden: { + label_values: { + feature: "forbidden", + }, + }, + }, + }, + ])( + "should report $checkName when $field is matched", + async ({ field, checkName, expectations, testResult, environmentId }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "forbidden"), + expectations, + testResult, + environmentId, + }); + + expect(findings).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: checkName, + }), + ]), + ); + }, + ); + + it.each([ + { + field: "evidence.step_name_contains", + expectations: { + evidence: { + step_name_contains: ["expected behavior"], + }, + }, + testResult: createTestResult({ + id: "tr-evidence-step-text-pass", + historyId: "evidence-step-text-pass-history", + fullName: "suite expected behavior", + steps: [createMeaningfulStep()], + }), + }, + { + field: "evidence.min_steps", + expectations: { + evidence: { + min_steps: 1, + }, + }, + testResult: createTestResult({ + id: "tr-evidence-steps-pass", + historyId: "evidence-steps-pass-history", + fullName: "suite expected behavior", + steps: [createMeaningfulStep()], + }), + }, + { + field: "evidence.min_attachments", + expectations: { + evidence: { + min_attachments: 1, + }, + }, + attachments: [ + createAttachment({ + id: "evidence-attachment-pass", + name: "evidence.txt", + originalFileName: "evidence.txt", + }), + ], + attachmentContents: { + "evidence-attachment-pass": { + content: "evidence", + fileName: "evidence.txt", + }, + }, + }, + { + field: "evidence.attachments.name", + expectations: { + evidence: { + attachments: [ + { + name: "evidence.txt", + }, + ], + }, + }, + attachments: [ + createAttachment({ + id: "evidence-name-pass", + name: "evidence.txt", + originalFileName: "evidence.txt", + }), + ], + attachmentContents: { + "evidence-name-pass": { + content: "evidence", + fileName: "evidence.txt", + }, + }, + }, + { + field: "evidence.attachments.content_type", + expectations: { + evidence: { + attachments: [ + { + content_type: "application/json", + }, + ], + }, + }, + attachments: [ + createAttachment({ + id: "evidence-type-pass", + name: "evidence.json", + originalFileName: "evidence.json", + ext: ".json", + contentType: "application/json", + }), + ], + attachmentContents: { + "evidence-type-pass": { + content: "{}", + fileName: "evidence.json", + }, + }, + }, + ])( + "should report no findings when $field is met", + async ({ field, expectations, testResult, attachments, attachmentContents }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "met"), + expectations, + testResult, + attachments, + attachmentContents, + }); + + expect(findings).toEqual([]); + }, + ); + + it("should match expected step text in nested test-scoped steps", async () => { + const nestedStep = { + ...createMeaningfulStep("parent action"), + steps: [createMeaningfulStep("Validate order total includes discount")], + } as TestStepResult; + const { findings } = await runInlineExpectationCase({ + outputName: "evidence-step-name-nested-met", + expectations: { + evidence: { + step_name_contains: ["order total includes discount"], + }, + }, + testResult: createTestResult({ + id: "tr-nested-step", + historyId: "nested-step-history", + fullName: "suite expected behavior", + steps: [nestedStep], + }), + }); + + expect(findings).toEqual([]); + }); + + it("should not satisfy expected step text from global output only", async () => { + const { findings } = await runInlineExpectationCase({ + outputName: "evidence-step-name-global-output-missing", + expectations: { + evidence: { + step_name_contains: ["global-only marker"], + }, + }, + attachmentContents: { + "global-stdout": { + content: "global-only marker", + fileName: "stdout.txt", + }, + }, + }); + + expect(findings).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: "expected-step-containing-missing", + }), + ]), + ); + }); + + it.each([ + { + field: "evidence.step_name_contains", + checkName: "expected-step-containing-missing", + expectations: { + evidence: { + step_name_contains: ["expected behavior"], + }, + }, + }, + { + field: "evidence.min_steps", + checkName: "insufficient-expected-steps", + expectations: { + evidence: { + min_steps: 1, + }, + }, + }, + { + field: "evidence.min_attachments", + checkName: "insufficient-expected-attachments", + expectations: { + evidence: { + min_attachments: 1, + }, + }, + }, + { + field: "evidence.attachments.name", + checkName: "missing-expected-attachment", + expectations: { + evidence: { + attachments: [ + { + name: "evidence.txt", + }, + ], + }, + }, + }, + { + field: "evidence.attachments.content_type", + checkName: "missing-expected-attachment", + expectations: { + evidence: { + attachments: [ + { + content_type: "application/json", + }, + ], + }, + }, + }, + ])("should report $checkName when $field is not met", async ({ field, checkName, expectations }) => { + const { findings } = await runInlineExpectationCase({ + outputName: expectationOutputName(field, "missing"), + expectations, + }); + + expect(findings).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: checkName, + }), + ]), + ); }); it("should emit bootstrap findings when no visible tests are present", async () => { @@ -791,12 +1697,12 @@ notes: check_name: string; severity: "info" | "warning" | "high"; }>(join(outputDir, "manifest", "findings.jsonl")); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); expect(findingsManifest).toEqual( expect.arrayContaining([ expect.objectContaining({ - check_name: "no-visible-tests", + check_name: "no-tests-observed", severity: "high", }), expect.objectContaining({ @@ -808,6 +1714,42 @@ notes: expect(indexContent).toContain("No visible test results were found in the run."); }); + it("should accept zero observed logical tests when --expect-tests 0 was requested", async () => { + const outputDir = join(tempDir, "expect-zero-tests"); + const store = createStore({ + testsStatistic: vi.fn().mockResolvedValue({ total: 0 }), + }); + + await new AgentPlugin({ + outputDir, + expectations: { + goal: "Verify no logical tests are selected", + expected: { + test_count: 0, + }, + }, + }).done(createContext(), store); + + const runManifest = await readJson<{ + expectation_result: { + status: string; + impact: string; + }; + }>(join(outputDir, "manifest", "run.json")); + const findingsManifest = await readJsonl<{ + check_name: string; + }>(join(outputDir, "manifest", "findings.jsonl")); + + expect(runManifest.expectation_result).toEqual(expect.objectContaining({ status: "matched", impact: "accept" })); + expect(findingsManifest).not.toEqual( + expect.arrayContaining([ + expect.objectContaining({ + check_name: "no-tests-observed", + }), + ]), + ); + }); + it("should surface partial runtime modeling and high-signal stderr summaries", async () => { const outputDir = join(tempDir, "partial-runtime"); const stderrAttachment = createAttachment({ @@ -891,7 +1833,7 @@ notes: check_name: string; severity: "info" | "warning" | "high"; }>(join(outputDir, "manifest", "findings.jsonl")); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); expect(runManifest.actual_exit_code).toBe(1); expect(runManifest.original_exit_code).toBe(1); @@ -983,7 +1925,7 @@ notes: }; }; }>(join(outputDir, "manifest", "run.json")); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); expect(runManifest.modeling.stderr.actionableSamples).toEqual( expect.arrayContaining([expect.stringContaining('unable to find utility "xcresulttool"')]), @@ -1047,10 +1989,7 @@ notes: "utf-8", ); - process.env.ALLURE_AGENT_EXPECTATIONS = expectationsPath; - process.env.ALLURE_AGENT_COMMAND = "yarn test clean-run"; - - await new AgentPlugin({ outputDir }).done(createContext(), store); + await new AgentPlugin({ outputDir, expectationsPath, command: "yarn test clean-run" }).done(createContext(), store); const runManifest = await readJson<{ actual_exit_code: number | null; @@ -1071,7 +2010,7 @@ notes: }; }>(join(outputDir, "manifest", "run.json")); const findingsManifest = await readJsonl(join(outputDir, "manifest", "findings.jsonl")); - const indexContent = await readFile(join(outputDir, "index.md"), "utf-8"); + const indexContent = await readText(join(outputDir, "index.md"), "text/markdown"); expect(runManifest.check_summary.total).toBe(0); expect(runManifest.actual_exit_code).toBeNull(); @@ -1113,25 +2052,26 @@ notes: await new AgentPlugin({ outputDir }).done(createContext(), store); - const testContent = await readFile(join(outputDir, "tests", "default", "low-signal-history.md"), "utf-8"); + const testContent = await readText(join(outputDir, "tests", "default", "low-signal-history.md"), "text/markdown"); const findingsManifest = await readJsonl<{ check_name: string; - subject: string; + subject?: unknown; + subject_ref?: string; }>(join(outputDir, "manifest", "findings.jsonl")); - expect(testContent).toContain("failed-without-useful-steps"); - expect(testContent).toContain("failed-without-attachments"); - expect(testContent).toContain("nontrivial-run-with-empty-trace"); + expect(testContent).toContain("A failed or broken test has no useful runtime steps."); + expect(testContent).toContain("A failed or broken test has no test-scoped attachments."); + expect(testContent).toContain("A nontrivial test run recorded no steps or fixture activity."); expect(testContent).toContain("## Rerun Guidance"); expect(findingsManifest).toEqual( expect.arrayContaining([ expect.objectContaining({ check_name: "failed-without-useful-steps", - subject: "tests/default/low-signal-history.md", + subject_ref: "tests/default/low-signal-history.md", }), expect.objectContaining({ check_name: "failed-without-attachments", - subject: "tests/default/low-signal-history.md", + subject_ref: "tests/default/low-signal-history.md", }), ]), ); @@ -1172,21 +2112,25 @@ notes: await new AgentPlugin({ outputDir }).done(createContext(), store); - const testContent = await readFile(join(outputDir, "tests", "default", "retry-evidence-history.md"), "utf-8"); + const testContent = await readText( + join(outputDir, "tests", "default", "retry-evidence-history.md"), + "text/markdown", + ); const findingsManifest = await readJsonl<{ check_name: string; severity: "info" | "warning" | "high"; - subject: string; + subject?: unknown; + subject_ref?: string; }>(join(outputDir, "manifest", "findings.jsonl")); - expect(testContent).toContain("retries-without-new-evidence"); + expect(testContent).toContain("Retries did not add any new observable evidence."); expect(testContent).toContain("## Retry 1"); expect(findingsManifest).toEqual( expect.arrayContaining([ expect.objectContaining({ check_name: "retries-without-new-evidence", severity: "info", - subject: "tests/default/retry-evidence-history.md", + subject_ref: "tests/default/retry-evidence-history.md", }), ]), ); diff --git a/packages/plugin-agent/test/inline-expectations.test.ts b/packages/plugin-agent/test/inline-expectations.test.ts new file mode 100644 index 00000000000..6a5919b12bc --- /dev/null +++ b/packages/plugin-agent/test/inline-expectations.test.ts @@ -0,0 +1,215 @@ +import { mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { epic, feature, label, story } from "allure-js-commons"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { AgentExpectationUsageError, AgentUsageError } from "../src/errors.js"; +import { buildAgentInlineExpectations, validateAgentExpectationsFile } from "../src/inline-expectations.js"; + +let tempDir: string | undefined; + +const makeTempDir = async () => { + tempDir = await mkdtemp(join(tmpdir(), "allure-agent-expectations-test-")); + + return tempDir; +}; + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("inline-expectations"); + await label("coverage", "agent-mode"); +}); + +afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } +}); + +describe("inline agent expectations", () => { + it.each([ + { + option: "--goal", + input: { goal: "Review agent visibility" }, + expected: { goal: "Review agent visibility" }, + }, + { + option: "--task-id", + input: { taskId: "agent-inline" }, + expected: { task_id: "agent-inline" }, + }, + { + option: "--expect-tests", + input: { expectTests: "2" }, + expected: { expected: { test_count: 2 } }, + }, + { + option: "--expect-label", + input: { expectLabels: ["module=plugin-agent", "module=cli"] }, + expected: { expected: { label_values: { module: ["plugin-agent", "cli"] } } }, + }, + { + option: "--expect-env", + input: { expectEnvironments: ["node"] }, + expected: { expected: { environments: ["node"] } }, + }, + { + option: "--expect-test", + input: { expectFullNames: ["suite should pass"] }, + expected: { expected: { full_names: ["suite should pass"] } }, + }, + { + option: "--expect-prefix", + input: { expectPrefixes: ["suite"] }, + expected: { expected: { full_name_prefixes: ["suite"] } }, + }, + { + option: "--forbid-label", + input: { forbidLabels: ["layer=e2e"] }, + expected: { forbidden: { label_values: { layer: ["e2e"] } } }, + }, + { + option: "--expect-step-containing", + input: { expectStepContains: ["assert expected behavior"] }, + expected: { evidence: { step_name_contains: ["assert expected behavior"] } }, + }, + { + option: "--expect-steps", + input: { expectSteps: "1" }, + expected: { evidence: { min_steps: 1 } }, + }, + { + option: "--expect-attachments", + input: { expectAttachments: "1" }, + expected: { evidence: { min_attachments: 1 } }, + }, + { + option: "--expect-attachment name", + input: { expectAttachmentFilters: ["trace.zip"] }, + expected: { evidence: { attachments: [{ name: "trace.zip" }] } }, + }, + { + option: "--expect-attachment name=...", + input: { expectAttachmentFilters: ["name=trace.zip"] }, + expected: { evidence: { attachments: [{ name: "trace.zip" }] } }, + }, + { + option: "--expect-attachment content-type=...", + input: { expectAttachmentFilters: ["content-type=application/json"] }, + expected: { evidence: { attachments: [{ content_type: "application/json" }] } }, + }, + { + option: "--expect-attachment type=...", + input: { expectAttachmentFilters: ["type=image/png"] }, + expected: { evidence: { attachments: [{ content_type: "image/png" }] } }, + }, + ])("should parse $option", ({ input, expected }) => { + expect(buildAgentInlineExpectations(input)).toEqual(expected); + }); + + it("should parse combined inline expectations", () => { + expect( + buildAgentInlineExpectations({ + goal: "Review agent visibility", + taskId: "agent-inline", + expectTests: "2", + expectLabels: ["module=plugin-agent"], + expectEnvironments: ["node"], + expectFullNames: ["suite should pass"], + expectPrefixes: ["suite"], + forbidLabels: ["layer=e2e"], + expectStepContains: ["assert expected behavior"], + expectSteps: "1", + expectAttachments: "1", + expectAttachmentFilters: ["trace.zip", "content-type=application/json"], + }), + ).toEqual({ + goal: "Review agent visibility", + task_id: "agent-inline", + expected: { + test_count: 2, + environments: ["node"], + full_names: ["suite should pass"], + full_name_prefixes: ["suite"], + label_values: { + module: ["plugin-agent"], + }, + }, + forbidden: { + label_values: { + layer: ["e2e"], + }, + }, + evidence: { + min_steps: 1, + min_attachments: 1, + step_name_contains: ["assert expected behavior"], + attachments: [{ name: "trace.zip" }, { content_type: "application/json" }], + }, + }); + }); + + it.each([ + { option: "--expect-tests", input: { expectTests: "-1" } }, + { option: "--expect-tests non-integer", input: { expectTests: "1.5" } }, + { option: "--expect-tests empty", input: { expectTests: " " } }, + { option: "--expect-steps", input: { expectSteps: "1.5" } }, + { option: "--expect-steps zero", input: { expectSteps: "0" } }, + { option: "--expect-attachments", input: { expectAttachments: "many" } }, + { option: "--expect-attachments zero", input: { expectAttachments: "0" } }, + { option: "--expect-label", input: { expectLabels: ["module"] } }, + { option: "--expect-label colon", input: { expectLabels: ["module:cli"] } }, + { option: "--forbid-label", input: { forbidLabels: ["layer"] } }, + { option: "--expect-attachment", input: { expectAttachmentFilters: ["extension=zip"] } }, + { option: "--expect-attachment empty", input: { expectAttachmentFilters: [" "] } }, + ])("should reject invalid $option", ({ input }) => { + expect(() => buildAgentInlineExpectations(input)).toThrow(AgentExpectationUsageError); + }); + + it.each([ + { option: "--goal", input: { goal: ["Review one", "Review two"] } }, + { option: "--task-id", input: { taskId: ["TASK-1", "TASK-2"] } }, + { option: "--expect-tests", input: { expectTests: ["1", "2"] } }, + { option: "--expect-steps", input: { expectSteps: ["1", "2"] } }, + { option: "--expect-attachments", input: { expectAttachments: ["1", "2"] } }, + ])("should reject duplicate single-value option $option", ({ input }) => { + expect(() => buildAgentInlineExpectations(input)).toThrow(AgentExpectationUsageError); + }); + + it("should reject zero test count combined with positive scope", () => { + expect(() => + buildAgentInlineExpectations({ + expectTests: "0", + expectFullNames: ["suite should pass"], + }), + ).toThrow(AgentExpectationUsageError); + }); + + it("should validate expectation files and reject invalid file input", async () => { + const cwd = await makeTempDir(); + + await writeFile(join(cwd, "expected.yaml"), "goal: valid file expectations\n", "utf-8"); + await expect(validateAgentExpectationsFile({ cwd, expectations: "expected.yaml" })).resolves.toBeUndefined(); + + await writeFile(join(cwd, "invalid.yaml"), "[]", "utf-8"); + await expect(validateAgentExpectationsFile({ cwd, expectations: "invalid.yaml" })).rejects.toBeInstanceOf( + AgentExpectationUsageError, + ); + }); + + it("should reject expectation files placed inside the output directory", async () => { + const cwd = await makeTempDir(); + + await expect( + validateAgentExpectationsFile({ + cwd, + output: "agent-output", + expectations: "agent-output/expected.yaml", + }), + ).rejects.toBeInstanceOf(AgentUsageError); + }); +}); diff --git a/packages/plugin-agent/test/invalid-output.test.ts b/packages/plugin-agent/test/invalid-output.test.ts new file mode 100644 index 00000000000..7c916b8ab78 --- /dev/null +++ b/packages/plugin-agent/test/invalid-output.test.ts @@ -0,0 +1,94 @@ +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { epic, feature, label, story } from "allure-js-commons"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { AgentExpectationUsageError } from "../src/errors.js"; +import { writeInvalidAgentExpectationOutput } from "../src/invalid-output.js"; +import { attachJsonEvidence, attachTextEvidence } from "./evidence.js"; + +let tempDir: string | undefined; + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("invalid-agent-output"); + await label("coverage", "agent-mode"); + tempDir = await mkdtemp(join(tmpdir(), "allure-agent-invalid-output-test-")); +}); + +afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } +}); + +describe("invalid expectation output", () => { + it("should write minimal agent artifacts when expectation input is invalid", async () => { + const outputDir = join(tempDir!, "agent-output"); + + const result = await writeInvalidAgentExpectationOutput({ + outputDir, + command: "npm test", + error: new AgentExpectationUsageError( + 'Invalid --expect-label "module". Expected the form name=value, for example module=cli', + "--expect-label", + ), + }); + + const runManifest = JSON.parse(await readFile(join(outputDir, "manifest", "run.json"), "utf-8")); + const finding = JSON.parse((await readFile(join(outputDir, "manifest", "findings.jsonl"), "utf-8")).trim()); + const tests = await readFile(join(outputDir, "manifest", "tests.jsonl"), "utf-8"); + const events = await readFile(join(outputDir, "manifest", "test-events.jsonl"), "utf-8"); + const index = await readFile(join(outputDir, "index.md"), "utf-8"); + + await attachJsonEvidence("invalid expectation run manifest", runManifest); + await attachJsonEvidence("invalid expectation finding", finding); + await attachTextEvidence("invalid expectation empty tests manifest", tests); + await attachTextEvidence("invalid expectation empty events manifest", events); + await attachTextEvidence("invalid expectation index", index, "text/markdown"); + + expect(result.outputDir).toBe(outputDir); + expect(result.generatedAt).toEqual(expect.any(String)); + expect(tests).toBe(""); + expect(events).toBe(""); + expect(index).toContain("Status: unavailable"); + expect(runManifest).toEqual( + expect.objectContaining({ + schema_version: "allure-agent-output/v1", + phase: "done", + command: "npm test", + expectations_present: false, + expectations: null, + expectation_result: expect.objectContaining({ + status: "unavailable", + impact: "reject", + finding_ids: ["F0001"], + }), + }), + ); + expect(finding).toEqual( + expect.objectContaining({ + schema_version: "allure-agent-finding/v2", + check_id: "expectations-invalid", + instance_id: "F0001", + severity: "high", + impact: "reject", + source: { + kind: "inline-option", + option: "--expect-label", + }, + subject: { + type: "run", + }, + observed: expect.objectContaining({ + execution_skipped: true, + }), + check_name: "expectations-invalid", + }), + ); + }); +}); diff --git a/packages/plugin-agent/test/query.test.ts b/packages/plugin-agent/test/query.test.ts new file mode 100644 index 00000000000..efc5e0d4d04 --- /dev/null +++ b/packages/plugin-agent/test/query.test.ts @@ -0,0 +1,322 @@ +import { mkdir, mkdtemp, rm, writeFile } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { epic, feature, label, story } from "allure-js-commons"; +import { afterEach, beforeEach, describe, expect, it } from "vitest"; + +import { AgentUsageError } from "../src/errors.js"; +import type { AgentOutputBundle } from "../src/harness.js"; +import { + AGENT_TEST_STATUSES, + buildAgentQueryPayload, + normalizeAgentQueryLimit, + normalizeRepeatedEnumValues, +} from "../src/query.js"; +import { attachJsonEvidence } from "./evidence.js"; + +let tempDir: string | undefined; + +const createAgentOutput = (outputDir: string): AgentOutputBundle => ({ + outputDir, + run: { + schema_version: "allure-agent-output/v1", + report_uuid: "report-uuid", + generated_at: "2026-06-02T12:00:00.000Z", + phase: "done", + command: "npm test", + actual_exit_code: 1, + original_exit_code: 1, + exit_code: { + original: 1, + actual: null, + }, + summary: { + stats: { + total: 2, + failed: 1, + broken: 0, + skipped: 0, + unknown: 0, + passed: 1, + }, + duration_ms: { + total: 30, + average: 15, + max: 20, + }, + environments: [ + { + environmentId: "default", + total: 2, + failed: 1, + broken: 0, + skipped: 0, + unknown: 0, + passed: 1, + }, + ], + }, + paths: { + index_md: "index.md", + agents_md: "AGENTS.md", + tests_manifest: "manifest/tests.jsonl", + findings_manifest: "manifest/findings.jsonl", + test_events_manifest: "manifest/test-events.jsonl", + expected_manifest: "manifest/expected.json", + process_logs: { + stdout: "artifacts/global/stdout.txt", + stderr: "artifacts/global/stderr.txt", + }, + }, + expectations_present: true, + expectations: { + goal: "Check query", + }, + expectation_result: { + schema_version: "allure-agent-expectation-result/v1", + status: "failed", + impact: "reject", + source: { + kind: "inline", + path: null, + }, + recognized_control_count: 2, + unsupported_controls: [], + degraded_controls: [], + summary: { + expected_tests: 0, + observed_tests: 2, + missing_expected: 1, + forbidden_observed: 0, + unexpected_observed: 0, + evidence_mismatches: 0, + }, + finding_ids: ["finding-1"], + }, + check_summary: { + total: 2, + countsBySeverity: { + high: 1, + warning: 1, + info: 0, + }, + countsByCategory: { + bootstrap: 0, + scope: 1, + metadata: 0, + evidence: 1, + smells: 0, + }, + }, + agent_context: { + agent_name: null, + loop_id: null, + task_id: "agent-query", + conversation_id: null, + }, + }, + tests: [ + { + environment_id: "default", + history_id: "history-1", + test_result_id: "tr-1", + full_name: "suite should fail", + package: "pkg-a", + labels: [{ name: "module", value: "cli" }], + status: "failed", + duration_ms: 20, + retries: 0, + flaky: false, + scope_match: "match", + finding_counts: { + total: 1, + high: 1, + warning: 0, + info: 0, + }, + markdown_path: "tests/default/suite-should-fail.md", + assets_dir: "artifacts/tests/default/suite-should-fail", + }, + { + environment_id: "default", + history_id: "history-2", + test_result_id: "tr-2", + full_name: "suite should pass", + package: "pkg-b", + labels: [{ name: "module", value: "ui" }], + status: "passed", + duration_ms: 10, + retries: 0, + flaky: false, + scope_match: "match", + finding_counts: { + total: 0, + high: 0, + warning: 0, + info: 0, + }, + markdown_path: "tests/default/suite-should-pass.md", + assets_dir: "artifacts/tests/default/suite-should-pass", + }, + ], + findings: [ + { + schema_version: "allure-agent-finding/v2", + check_id: "expected-label-missing", + instance_id: "finding-1", + finding_id: "finding-1", + subject: { + type: "test", + id: "tests/default/suite-should-fail.md", + path: "tests/default/suite-should-fail.md", + }, + subject_ref: "tests/default/suite-should-fail.md", + subject_type: "test", + severity: "high", + impact: "reject", + category: "scope", + check_name: "expected-label-missing", + message: "Expected label module=api was not found.", + explanation: "The observed labels did not satisfy the expectation.", + evidence_paths: ["tests/default/suite-should-fail.md"], + remediation_hint: "Run the intended test or update metadata.", + }, + { + finding_id: "finding-2", + subject: "run", + severity: "warning", + category: "evidence", + check_name: "missing-evidence", + message: "Evidence is weak.", + explanation: "The run did not contain meaningful evidence.", + evidence_paths: ["index.md"], + remediation_hint: "Add steps or attachments.", + }, + ], + expected: { + goal: "Check query", + }, +}); + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("agent-query"); + await label("coverage", "agent-mode"); + tempDir = await mkdtemp(join(tmpdir(), "allure-agent-query-test-")); + await mkdir(join(tempDir, "tests/default"), { recursive: true }); + await writeFile(join(tempDir, "tests/default/suite-should-fail.md"), "# Test Markdown\n\nRuntime evidence.", { + encoding: "utf-8", + flag: "w", + }); +}); + +afterEach(async () => { + if (tempDir) { + await rm(tempDir, { recursive: true, force: true }); + tempDir = undefined; + } +}); + +describe("agent query payloads", () => { + it("should build a summary payload", async () => { + const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "summary", { + labelFilters: [], + }); + + await attachJsonEvidence("summary query payload", payload); + expect(payload).toEqual( + expect.objectContaining({ + schema: "allure-agent-query/v1", + view: "summary", + output_dir: tempDir, + index_md: join(tempDir!, "index.md"), + run: expect.objectContaining({ + command: "npm test", + expectations_present: true, + expectation_result: expect.objectContaining({ status: "failed", impact: "reject" }), + }), + paths: expect.objectContaining({ + tests_manifest: join(tempDir!, "manifest/tests.jsonl"), + }), + expected: { + goal: "Check query", + }, + }), + ); + }); + + it("should build filtered test payloads", async () => { + const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "tests", { + labelFilters: [{ name: "module", value: "cli" }], + statuses: ["failed"], + limit: 1, + }); + + await attachJsonEvidence("filtered tests query payload", payload); + expect(payload).toEqual( + expect.objectContaining({ + view: "tests", + total_matches: 1, + returned: 1, + tests: [expect.objectContaining({ full_name: "suite should fail", status: "failed" })], + }), + ); + }); + + it("should build filtered finding payloads", async () => { + const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "findings", { + labelFilters: [], + severities: ["high"], + categories: ["scope"], + checks: ["expected-label-missing"], + test: "suite should fail", + }); + + await attachJsonEvidence("filtered findings query payload", payload); + expect(payload).toEqual( + expect.objectContaining({ + view: "findings", + total_matches: 1, + findings: [expect.objectContaining({ finding_id: "finding-1" })], + }), + ); + }); + + it("should build one-test payloads with markdown when requested", async () => { + const payload = await buildAgentQueryPayload(createAgentOutput(tempDir!), "test", { + labelFilters: [], + test: "suite should fail", + includeMarkdown: true, + }); + + await attachJsonEvidence("single test query payload", payload); + expect(payload).toEqual( + expect.objectContaining({ + view: "test", + markdown_path: join(tempDir!, "tests/default/suite-should-fail.md"), + test: expect.objectContaining({ full_name: "suite should fail" }), + findings: [expect.objectContaining({ finding_id: "finding-1" })], + markdown: expect.stringContaining("Runtime evidence."), + }), + ); + }); + + it("should reject ambiguous single-test queries and unsupported enum values", async () => { + await attachJsonEvidence("invalid query option cases", [ + { view: "test", reason: "missing exact test selector" }, + { option: "--status", value: "flaky", reason: "unsupported status" }, + { option: "--limit", value: "1.5", reason: "limit must be an integer" }, + ]); + + await expect( + buildAgentQueryPayload(createAgentOutput(tempDir!), "test", { + labelFilters: [], + }), + ).rejects.toBeInstanceOf(AgentUsageError); + + expect(() => normalizeRepeatedEnumValues(["flaky"], AGENT_TEST_STATUSES, "--status")).toThrow(AgentUsageError); + expect(() => normalizeAgentQueryLimit("1.5")).toThrow(AgentUsageError); + }); +}); diff --git a/packages/cli/test/utils/agent-select.test.ts b/packages/plugin-agent/test/selection.test.ts similarity index 75% rename from packages/cli/test/utils/agent-select.test.ts rename to packages/plugin-agent/test/selection.test.ts index b261e2ab480..13adee68b39 100644 --- a/packages/cli/test/utils/agent-select.test.ts +++ b/packages/plugin-agent/test/selection.test.ts @@ -6,12 +6,13 @@ import { parseAgentLabelFilters, resolveAgentSelectionOutputDir, selectAgentTestPlan, -} from "../../src/utils/agent-select.js"; +} from "../src/selection.js"; +import { attachJsonEvidence } from "./evidence.js"; -vi.mock("../../src/utils/agent-state.js", () => ({ +vi.mock("../src/state.js", () => ({ readLatestAgentState: vi.fn(), })); -vi.mock("@allurereport/plugin-agent", () => ({ +vi.mock("../src/harness.js", () => ({ loadAgentOutput: vi.fn(), planAgentEnrichmentReview: vi.fn(), })); @@ -26,7 +27,7 @@ beforeEach(async () => { describe("agent-select utils", () => { it("should select review-targeted tests and apply environment and label filters", async () => { - const { loadAgentOutput, planAgentEnrichmentReview } = await import("@allurereport/plugin-agent"); + const { loadAgentOutput, planAgentEnrichmentReview } = await import("../src/harness.js"); (loadAgentOutput as Mock).mockResolvedValueOnce({ outputDir: "/tmp/agent-output", @@ -65,6 +66,7 @@ describe("agent-select utils", () => { labelFilters: [{ name: "feature", value: "checkout" }], }); + await attachJsonEvidence("selected agent test plan", selection); expect(selection.outputDir).toBe("/tmp/agent-output"); expect(selection.preset).toBe("review"); expect(selection.selectedTests).toHaveLength(1); @@ -76,17 +78,24 @@ describe("agent-select utils", () => { }); it("should resolve latest output directories and parse supported filters", async () => { - const { readLatestAgentState } = await import("../../src/utils/agent-state.js"); + const { readLatestAgentState } = await import("../src/state.js"); (readLatestAgentState as Mock).mockResolvedValueOnce({ outputDir: "/tmp/latest-agent-output", }); - await expect(resolveAgentSelectionOutputDir({ cwd: "/cwd", latest: true })).resolves.toBe( - "/tmp/latest-agent-output", - ); - expect(normalizeAgentRerunPreset("failed")).toBe("failed"); - expect(parseAgentLabelFilters(["feature=checkout", "priority=high"])).toEqual([ + const resolvedOutputDir = await resolveAgentSelectionOutputDir({ cwd: "/cwd", latest: true }); + const preset = normalizeAgentRerunPreset("failed"); + const labelFilters = parseAgentLabelFilters(["feature=checkout", "priority=high"]); + + await attachJsonEvidence("latest selection resolution", { + resolvedOutputDir, + preset, + labelFilters, + }); + expect(resolvedOutputDir).toBe("/tmp/latest-agent-output"); + expect(preset).toBe("failed"); + expect(labelFilters).toEqual([ { name: "feature", value: "checkout" }, { name: "priority", value: "high" }, ]); diff --git a/packages/plugin-agent/test/skills.test.ts b/packages/plugin-agent/test/skills.test.ts deleted file mode 100644 index 0d58e1eab18..00000000000 --- a/packages/plugin-agent/test/skills.test.ts +++ /dev/null @@ -1,137 +0,0 @@ -import { readFile } from "node:fs/promises"; -import { dirname, join, resolve } from "node:path"; -import { fileURLToPath } from "node:url"; - -import { story } from "allure-js-commons"; -import { beforeEach, describe, expect, it } from "vitest"; - -beforeEach(async () => { - await story("skills"); -}); -const repoRoot = resolve(dirname(fileURLToPath(import.meta.url)), "../../.."); - -describe("allure agent-mode skills bundle", () => { - it("should include the setup and feature-delivery skills with UI metadata", async () => { - const setupSkill = await readFile(join(repoRoot, "skills", "allure-agent-mode-setup", "SKILL.md"), "utf-8"); - const setupUi = await readFile( - join(repoRoot, "skills", "allure-agent-mode-setup", "agents", "openai.yaml"), - "utf-8", - ); - const featureSkill = await readFile( - join(repoRoot, "skills", "allure-agent-mode-feature-delivery", "SKILL.md"), - "utf-8", - ); - const featureUi = await readFile( - join(repoRoot, "skills", "allure-agent-mode-feature-delivery", "agents", "openai.yaml"), - "utf-8", - ); - - expect(setupSkill).toContain("name: allure-agent-mode-setup"); - expect(setupSkill).toContain("docs/allure-agent-mode.md"); - expect(setupSkill).toContain("allure agent latest"); - expect(setupSkill).toContain("allure agent state-dir"); - expect(setupSkill).toContain("allure agent select --latest"); - expect(setupSkill).toContain("allure agent --rerun-latest"); - expect(setupSkill).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(setupUi).toContain('display_name: "Allure Agent Setup"'); - expect(featureSkill).toContain("name: allure-agent-mode-feature-delivery"); - expect(featureSkill).toContain("ALLURE_AGENT_OUTPUT"); - expect(featureSkill).toContain("reviewing existing tests"); - expect(featureSkill).toContain("auditing coverage"); - expect(featureSkill).toContain("triaging failing suites"); - expect(featureSkill).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(featureSkill).toContain( - "Use `allure agent` for smoke checks too, even when the change is small or mechanical.", - ); - expect(featureSkill).toContain( - "Only skip agent mode when it is impossible or when you are debugging agent mode itself.", - ); - expect(featureSkill).toContain("### Small Test Change Workflow"); - expect(featureSkill).toContain("### Coverage Review Workflow"); - expect(featureUi).toContain('display_name: "Allure Feature Delivery"'); - }); - - it("should include the project guide and AGENTS router templates", async () => { - const projectGuide = await readFile(join(repoRoot, "docs", "allure-agent-mode.md"), "utf-8"); - const rootAgents = await readFile(join(repoRoot, "AGENTS.md"), "utf-8"); - const templateGuide = await readFile( - join(repoRoot, "skills", "allure-agent-mode-setup", "references", "project-guide-template.md"), - "utf-8", - ); - const agentsSnippet = await readFile( - join(repoRoot, "skills", "allure-agent-mode-setup", "references", "root-agents-snippet.md"), - "utf-8", - ); - const readme = await readFile(join(repoRoot, "packages", "plugin-agent", "README.md"), "utf-8"); - - expect(projectGuide).toContain("## Core Loops"); - expect(projectGuide).toContain("### Test Review Loop"); - expect(projectGuide).toContain("Runtime first, source second."); - expect(projectGuide).toContain("## Verification Standard"); - expect(projectGuide).toContain("## Helpful Commands"); - expect(projectGuide).toContain("allure agent latest"); - expect(projectGuide).toContain("allure agent state-dir"); - expect(projectGuide).toContain("allure agent select --latest"); - expect(projectGuide).toContain("allure agent --rerun-latest"); - expect(projectGuide).toContain("--rerun-preset review|failed|unsuccessful|all"); - expect(projectGuide).toContain("--rerun-environment "); - expect(projectGuide).toContain("--rerun-label name=value"); - expect(projectGuide).toContain("ALLURE_AGENT_STATE_DIR"); - expect(projectGuide).toContain("print the `index.md` path"); - expect(projectGuide).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(projectGuide).toContain( - "Use `allure agent` for smoke checks too, even when the change is small or mechanical.", - ); - expect(projectGuide).toContain( - "Only skip agent mode when it is impossible or when you are debugging agent mode itself.", - ); - expect(projectGuide).toContain("### Small Test Change Workflow"); - expect(projectGuide).toContain("### Coverage Review Workflow"); - expect(projectGuide).toContain("## Acceptance Rules"); - expect(projectGuide).toContain("When Console Errors Are Not Represented As Test Results"); - expect(projectGuide).toContain("yarn allure agent --"); - expect(projectGuide).toContain("test/commands/run.integration.test.ts"); - expect(rootAgents).toContain("docs/allure-agent-mode.md"); - expect(rootAgents).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(rootAgents).toContain("Use `allure agent` for smoke checks too"); - expect(rootAgents).toContain("reasoning, review, coverage analysis, debugging, or any user-facing conclusion"); - expect(rootAgents).toContain("console-only review"); - expect(templateGuide).toContain("ALLURE_AGENT_EXPECTATIONS"); - expect(templateGuide).toContain("## Verification Standard"); - expect(templateGuide).toContain("## Helpful Commands"); - expect(templateGuide).toContain("allure agent latest"); - expect(templateGuide).toContain("allure agent state-dir"); - expect(templateGuide).toContain("allure agent select --latest"); - expect(templateGuide).toContain("allure agent --rerun-latest"); - expect(templateGuide).toContain("--rerun-preset review|failed|unsuccessful|all"); - expect(templateGuide).toContain("--rerun-environment "); - expect(templateGuide).toContain("--rerun-label name=value"); - expect(templateGuide).toContain("ALLURE_AGENT_STATE_DIR"); - expect(templateGuide).toContain("print the `index.md` path"); - expect(templateGuide).toContain("### Test Review Loop"); - expect(templateGuide).toContain("### Small Test Change Workflow"); - expect(templateGuide).toContain("### Coverage Review Workflow"); - expect(templateGuide).toContain("Runtime first, source second."); - expect(templateGuide).toContain("partial runtime review"); - expect(agentsSnippet).toContain("Use [Allure Agent Mode](docs/allure-agent-mode.md)"); - expect(agentsSnippet).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(agentsSnippet).toContain("Use `allure agent` for smoke checks too"); - expect(agentsSnippet).toContain("reasoning, review, coverage analysis, debugging, or any user-facing conclusion"); - expect(readme).toContain("## Verification Standard"); - expect(readme).toContain("For small mechanical test changes, use a scoped agent-mode run for the smoke check"); - expect(readme).toContain( - "If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`.", - ); - expect(readme).toContain("treat the review as partial"); - }); -}); diff --git a/packages/cli/test/utils/agent-state.test.ts b/packages/plugin-agent/test/state.test.ts similarity index 88% rename from packages/cli/test/utils/agent-state.test.ts rename to packages/plugin-agent/test/state.test.ts index ebc1615240a..b8f1a7af2e0 100644 --- a/packages/cli/test/utils/agent-state.test.ts +++ b/packages/plugin-agent/test/state.test.ts @@ -9,7 +9,8 @@ import { readLatestAgentState, resolveAgentStateDir, writeLatestAgentState, -} from "../../src/utils/agent-state.js"; +} from "../src/state.js"; +import { attachJsonEvidence } from "./evidence.js"; vi.mock("node:os", async (importOriginal) => ({ ...(await importOriginal()), @@ -40,13 +41,21 @@ describe("agent-state utils", () => { const normalizedCwd = resolve(cwd); const projectHash = createHash("sha256").update(normalizedCwd).digest("hex").slice(0, 16); const statePath = join("/tmp", `allure-agent-state-${projectHash}`, "latest.json"); - - await writeLatestAgentState({ + const latestState = { cwd, outputDir: "/tmp/allure-agent-123", command: "npm test", startedAt: "2026-04-15T18:00:00.000Z", - status: "running", + status: "running" as const, + }; + + await writeLatestAgentState(latestState); + + await attachJsonEvidence("latest state write contract", { + normalizedCwd, + projectHash, + statePath, + latestState, }); expect(fsModule.mkdir).toHaveBeenCalledWith(dirname(statePath), { recursive: true }); diff --git a/skills/allure-agent-mode-feature-delivery/SKILL.md b/skills/allure-agent-mode-feature-delivery/SKILL.md deleted file mode 100644 index 825051650e5..00000000000 --- a/skills/allure-agent-mode-feature-delivery/SKILL.md +++ /dev/null @@ -1,92 +0,0 @@ ---- -name: allure-agent-mode-feature-delivery -description: Use Allure agent-mode to design or review test scope, create per-run expectations, run targeted tests, review runtime evidence, and iterate until the test loop is acceptable. ---- - -# Allure Agent Mode Feature Delivery - -Use this skill for feature or bug work that changes tests, for reviewing existing tests, auditing coverage, triaging failing suites, investigating weak evidence, or debugging flaky and environment-sensitive failures. - -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- Only skip agent mode when it is impossible or when you are debugging agent mode itself. - -## Read First - -If the project has `docs/allure-agent-mode.md`, read it before writing or reviewing tests. - -If it does not, use the guidance in this skill and suggest running the setup skill later. - -## Workflow - -1. Understand the feature, issue, or review goal and decide the intended test scope. -2. Create a fresh expectations file for this run in a temp directory. -3. Write or update the tests, or keep the current tests unchanged if the task is review-only. -4. Run only the intended scope with `allure agent` before relying on raw console output. -5. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, `manifest/findings.jsonl`, and the relevant test markdown files before inspecting source code. -6. If evidence is weak, enrich the tests with real steps, attachments, or minimal metadata. -7. Rerun with a new temp output directory and a new expectations file. -8. Accept only when scope matches, evidence is good enough to review, and any partial runtime modeling has been called out explicitly. - -## Review Variants - -### Small Test Change Workflow - -1. Create a fresh expectations file and temp output directory for the touched scope. -2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction. -3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -4. Only then make a final statement about regression safety or test correctness. - -### Coverage Review Workflow - -1. Split command or package audits into scoped groups. -2. Give each group its own expectations file and temp output directory. -3. Run each group with `allure agent`. -4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed. -5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit. - -Compact coverage-review pattern: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" - -npx allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- npm test -- -``` - -Coverage-review expectations example: - -```yaml -goal: Review package tests -task_id: package-review -expected: - label_values: - module: my-module -notes: - - Review runtime evidence before source inspection. -``` - -## Requirements - -- Every run must use a unique temp `ALLURE_AGENT_OUTPUT`. -- Every run must use a unique temp `ALLURE_AGENT_EXPECTATIONS`. -- Parallel runs must never share those paths. -- Prefer YAML expectations in v1. -- Broad package-health audits may omit expectations, but the review must call out that scope checks are weaker. -- Metadata enrichment is part of this loop, not a separate workflow. - -## Guardrails - -- Runtime first, source second. -- Steps must represent real behavior, not filler. -- Attachments must come from the current execution. -- Keep metadata minimal and only add labels that help review or policy. -- Prefer helper-boundary instrumentation over repetitive caller wrapping. -- If runner-visible failures are not represented as logical tests, inspect global stderr and treat the run as a partial runtime review. - -## Reference Files - -- Expectations example: `references/expectations-example.yaml` diff --git a/skills/allure-agent-mode-feature-delivery/agents/openai.yaml b/skills/allure-agent-mode-feature-delivery/agents/openai.yaml deleted file mode 100644 index 3e6bdd92571..00000000000 --- a/skills/allure-agent-mode-feature-delivery/agents/openai.yaml +++ /dev/null @@ -1,7 +0,0 @@ -interface: - display_name: "Allure Feature Delivery" - short_description: "Use agent-mode for feature test loops" - default_prompt: "Use $allure-agent-mode-feature-delivery to write or validate tests for this feature with Allure agent-mode." - -policy: - allow_implicit_invocation: true diff --git a/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml b/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml deleted file mode 100644 index 2d2ecd53671..00000000000 --- a/skills/allure-agent-mode-feature-delivery/references/expectations-example.yaml +++ /dev/null @@ -1,17 +0,0 @@ -goal: Validate feature A -task_id: feature-a -expected: - environments: - - default - full_name_prefixes: - - feature A - label_values: - feature: feature-a -forbidden: - full_name_prefixes: - - feature B - label_values: - feature: - - feature-b -notes: - - Only feature A tests should run. diff --git a/skills/allure-agent-mode-setup/SKILL.md b/skills/allure-agent-mode-setup/SKILL.md deleted file mode 100644 index 348af6bb021..00000000000 --- a/skills/allure-agent-mode-setup/SKILL.md +++ /dev/null @@ -1,42 +0,0 @@ ---- -name: allure-agent-mode-setup -description: Set up Allure agent-mode guidance in a project by checking whether Allure results already exist, adding a short root AGENTS.md router, and creating docs/allure-agent-mode.md for downstream test authoring and review work. ---- - -# Allure Agent Mode Setup - -Use this skill when a project wants to adopt Allure agent-mode for test work and test reviews. - -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- Only skip agent mode when it is impossible or when you are debugging agent mode itself. - -## Goal - -Leave the project with: - -- a root `AGENTS.md` that routes test work to `docs/allure-agent-mode.md` -- a project `docs/allure-agent-mode.md` guide -- enough Allure bootstrap guidance for the agent to continue, even if the project is not fully configured yet - -## Workflow - -1. Check whether the project already emits Allure results or already has Allure configuration. -2. If Allure is missing, add or suggest the smallest viable install/config path for the project. Treat this as best-effort bootstrap, not the main acceptance path. -3. Create or update root `AGENTS.md` so test-related work points to `docs/allure-agent-mode.md`. -4. Create `docs/allure-agent-mode.md` from the bundled template and adapt only the parts that must be project-specific. -5. Keep the helper-command descriptions short and practical. Include `allure agent latest`, `allure agent state-dir`, `allure agent select --latest` / `--from `, and `allure agent --rerun-latest` / `--rerun-from ` as small Helpful Commands entries so agents can recover the latest output directory, inspect where state is stored, inspect the review-targeted test plan, and rerun the same focused scope. Add one compact Advanced Reruns section that documents `--rerun-preset`, `--rerun-environment`, `--rerun-label`, and `ALLURE_AGENT_STATE_DIR` without turning the guide into a full CLI reference. Keep the verification section explicit that agents should print the run's `index.md` path after test execution. -6. Keep changes minimal and additive. Preserve unrelated project guidance in `AGENTS.md`. - -## Files To Use - -- Project guide template: `references/project-guide-template.md` -- Root router snippet: `references/root-agents-snippet.md` - -## Guardrails - -- Keep `AGENTS.md` short. It should route, not duplicate the whole guide. -- Keep helper-command notes short. Prefer one-line descriptions over a growing command catalog. -- Do not invent project-specific metadata conventions unless the repo already uses them. -- Do not create persistent output or expectations paths in the project guide. Those are per-run temp artifacts. -- If the project already has better Allure instructions, merge carefully instead of overwriting them. diff --git a/skills/allure-agent-mode-setup/agents/openai.yaml b/skills/allure-agent-mode-setup/agents/openai.yaml deleted file mode 100644 index b20dbb721eb..00000000000 --- a/skills/allure-agent-mode-setup/agents/openai.yaml +++ /dev/null @@ -1,7 +0,0 @@ -interface: - display_name: "Allure Agent Setup" - short_description: "Set up Allure agent-mode project guidance" - default_prompt: "Use $allure-agent-mode-setup to add Allure agent-mode guidance to this project." - -policy: - allow_implicit_invocation: true diff --git a/skills/allure-agent-mode-setup/references/project-guide-template.md b/skills/allure-agent-mode-setup/references/project-guide-template.md deleted file mode 100644 index 3468e221f47..00000000000 --- a/skills/allure-agent-mode-setup/references/project-guide-template.md +++ /dev/null @@ -1,174 +0,0 @@ -# Allure Agent Mode - -Use Allure agent-mode to design, review, validate, debug, and enrich tests in this project. - -## Review Principle - -Runtime first, source second. - -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `ALLURE_AGENT_*` with `allure run` only as the lower-level fallback when you need direct environment control. -- If the agent-mode output is missing or incomplete, debug that first and treat console-only conclusions as provisional. - -## Verification Standard - -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- Only skip agent mode when it is impossible or when you are debugging agent mode itself. -- After each agent-mode test run, print the `index.md` path from that run's output directory so users can open the run overview quickly. - -## Helpful Commands - -- `allure agent latest` prints the latest agent output directory for the current project cwd. Use it when a prior run omitted `--output` and you want to reopen the most recent agent-mode artifacts. -- `allure agent state-dir` prints the state directory for the current project cwd. Use it when you need to inspect where `latest` pointers are stored or debug sandbox behavior. -- `allure agent select --latest` or `allure agent select --from ` prints the review-targeted test plan from a prior agent run. Add `--preset failed` or exact `--label name=value` / `--environment ` filters when you need a narrower rerun plan. -- `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` reruns only the selected tests through the framework-agnostic Allure testplan flow. The default rerun preset is `review`. - -## Advanced Reruns - -- `--rerun-preset review|failed|unsuccessful|all` changes how the rerun seed set is chosen. Use `review` for the default agent-targeted loop, `failed` for classic failure reruns, `unsuccessful` for any non-passed tests, and `all` when you want the whole previously observed set. -- `--rerun-environment ` narrows the rerun selection to one or more environment ids from the previous agent output. Repeat the flag for multiple environments. -- `--rerun-label name=value` narrows the rerun selection to tests whose prior results carried exact matching labels. Repeat the flag for multiple label filters. -- `ALLURE_AGENT_STATE_DIR` overrides the default project-scoped state directory used by `allure agent latest`, `allure agent state-dir`, and `--rerun-latest`. Use it when you need a deterministic shared location in CI or a constrained sandbox. - -## Core Loops - -### Test Review Loop - -1. Identify the exact review scope. -2. Create a fresh expectations file for this run in a temp directory. -3. Run only that scope with `allure agent`. -4. Read `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -5. Read per-test markdown only for tests that failed, drifted, or have findings. -6. Only after runtime review, inspect source code for root cause or coverage gaps. -7. If evidence is weak or partial, enrich the tests and rerun. -8. When iterating on the same scope, prefer `allure agent --rerun-latest -- ` or `allure agent --rerun-from -- ` so the rerun stays focused on the review-targeted tests. - -### Feature Delivery Loop - -1. Understand the feature or issue. -2. Create a fresh expectations file for this run in a temp directory. -3. Write or update the tests. -4. Run the target scope with `allure agent`. -5. Review `index.md`, manifests, and per-test markdown. -6. Enrich tests when evidence is weak. -7. Rerun until scope and evidence are acceptable. - -### Metadata Enrichment Loop - -Use this when the run is functionally correct but too weak to review: - -1. Identify missing or low-signal findings. -2. Add real steps, attachments, or minimal metadata. -3. Rerun the same intended scope. -4. Reject noop-style or placeholder evidence. - -### Small Test Change Workflow - -1. Create a fresh expectations file and temp output directory for the touched scope. -2. Run the touched scope with `allure agent`, even if the goal is only a smoke check after a mechanical change such as typing cleanup, mock refactors, or helper extraction. -3. Review `index.md`, `manifest/run.json`, `manifest/tests.jsonl`, and `manifest/findings.jsonl`. -4. Only then make a final statement about regression safety or test correctness. - -### Coverage Review Workflow - -1. Split command or package audits into scoped groups. -2. Give each group its own expectations file and temp output directory. -3. Run each group with `allure agent`. -4. Review runtime artifacts first, then inspect source code only after the run explains what actually executed. -5. Mark the review incomplete until each scoped group either matched expectations or was explicitly documented as a broad package-health audit. - -## Per-Run Artifacts - -- `ALLURE_AGENT_OUTPUT` must use a unique temp directory per run. -- `ALLURE_AGENT_EXPECTATIONS` must use a unique temp file per run. -- Do not reuse those paths across parallel runs. - -YAML is preferred for expectations in v1. - -Review-oriented expectations example: - -```yaml -goal: Review module tests -task_id: module-review -expected: - label_values: - module: my-module -notes: - - Review runtime evidence before source inspection. -``` - -Broad package-health audits may omit expectations, but the resulting scope review is weaker and should be called out explicitly. - -Compact coverage-review pattern: - -```bash -TMP_DIR="$(mktemp -d)" -EXPECTATIONS="$TMP_DIR/expectations.yaml" - -npx allure agent \ - --output "$TMP_DIR/agent-output" \ - --expectations "$EXPECTATIONS" \ - -- npm test -- -``` - -Single-spec expectations example: - -```yaml -goal: Review one spec -task_id: single-spec-review -expected: - label_values: - package: path/to/spec.test.ts -notes: - - Review runtime evidence before source inspection. -``` - -## Evidence Rules - -- Steps must wrap real setup, actions, state transitions, or assertions. -- Attachments must contain real runtime evidence from that execution. -- Metadata should stay minimal and purposeful. -- Prefer helper-boundary instrumentation over repetitive caller wrapping. - -Good example: - -- instrument `runCommand` once instead of wrapping every `runCommand(...)` caller - -Rejected examples: - -- empty wrapper steps -- static `test passed` attachments -- labels that no review or policy step uses - -## When Console Errors Are Not Represented As Test Results - -- Suite-load, import, or setup failures may appear only in `artifacts/global/stderr.txt` or global errors. -- If `manifest/tests.jsonl` does not account for all visible failures from the test runner, inspect global stderr before concluding the run is fully modeled. -- Treat that state as a partial runtime review, not as a clean or complete result set. -- If runner-visible failures are present outside logical test files, final conclusions must stay provisional until the missing modeling is understood. - -## Acceptance Rules - -Accept a run only when: - -- scope matches expectations -- evidence is strong enough to explain what happened -- no high-confidence noop or placeholder findings remain - -### Review Completeness - -A test review is not complete unless: - -- the relevant scope was run with agent mode, unless that is impossible -- expectations were created for the intended scope, unless this is a broad package-health audit -- agent artifacts were reviewed before final conclusions -- missing or partial runtime modeling was called out explicitly -- console-only conclusions are treated as provisional when agent output is absent or incomplete - -## Future Loops - -Planned separately: - -- flaky detection/fix -- known-issue and mute handling -- quality-gate adoption diff --git a/skills/allure-agent-mode-setup/references/root-agents-snippet.md b/skills/allure-agent-mode-setup/references/root-agents-snippet.md deleted file mode 100644 index 672519fd932..00000000000 --- a/skills/allure-agent-mode-setup/references/root-agents-snippet.md +++ /dev/null @@ -1,10 +0,0 @@ -# Project Guide - -Use [Allure Agent Mode](docs/allure-agent-mode.md) for all test-related work in this repository. - -- Read `docs/allure-agent-mode.md` before designing, writing, reviewing, validating, debugging, or enriching tests. -- If a command executes tests and its result will be used for smoke checking, reasoning, review, coverage analysis, debugging, or any user-facing conclusion, run it through `allure agent`. It preserves the original console logs and adds agent-mode artifacts without inheriting the normal report or export plugins from the project config. -- Use `allure agent` for smoke checks too, even when the change is small or mechanical. -- Only skip agent mode when it is impossible or when you are debugging agent mode itself. -- If agent-mode output is missing or incomplete, debug that first rather than silently falling back to console-only review. -- Use Allure agent-mode when adding tests for features or fixes so expectations, evidence quality, and scope review are part of the loop. From 6f6ba9b03c4a491abfd56d3a2786c98035e1bdbf Mon Sep 17 00:00:00 2001 From: Dmitry Baev Date: Wed, 10 Jun 2026 19:23:37 +0100 Subject: [PATCH 2/5] fix lint --- packages/plugin-agent/package.json | 4 ++-- packages/plugin-agent/src/plugin.ts | 1 - packages/plugin-agent/test/guidance.test.ts | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/packages/plugin-agent/package.json b/packages/plugin-agent/package.json index f86939a50f1..23abda7ae78 100644 --- a/packages/plugin-agent/package.json +++ b/packages/plugin-agent/package.json @@ -27,8 +27,8 @@ "build": "run clean && tsc --project ./tsconfig.json", "clean": "rimraf ./dist", "test": "rimraf ./out && vitest run", - "lint": "oxlint --import-plugin src test features stories", - "lint:fix": "oxlint --import-plugin --fix src test features stories" + "lint": "yarn run -T oxlint --import-plugin src test features stories", + "lint:fix": "yarn run -T oxlint --import-plugin --fix src test features stories" }, "dependencies": { "@allurereport/core-api": "workspace:*", diff --git a/packages/plugin-agent/src/plugin.ts b/packages/plugin-agent/src/plugin.ts index fd84c668ba5..39991b8061f 100644 --- a/packages/plugin-agent/src/plugin.ts +++ b/packages/plugin-agent/src/plugin.ts @@ -2767,7 +2767,6 @@ const buildRunAndTestFindings = (params: { if (expectations) { const allFullNames = entries.map(({ tr }) => tr.fullName ?? tr.name); - const targetEntries = getExpectationTargetEntries(entries, expectations); const hasRuntimeControls = runtimeMatchingControlCount(expectations) > 0; const genericGoal = expectations.goal ? normalizeStepText(expectations.goal).replace(/[^\p{L}\p{N}\s]/gu, "") : ""; diff --git a/packages/plugin-agent/test/guidance.test.ts b/packages/plugin-agent/test/guidance.test.ts index ff719b87b04..6415952ca25 100644 --- a/packages/plugin-agent/test/guidance.test.ts +++ b/packages/plugin-agent/test/guidance.test.ts @@ -3,7 +3,7 @@ import { dirname, join, resolve } from "node:path"; import { fileURLToPath } from "node:url"; import { story } from "allure-js-commons"; -import { beforeEach, describe, expect, it } from "vitest"; +import { beforeEach, describe, it } from "vitest"; import { renderAgentsGuide } from "../src/guidance.js"; import { expectTextToContainAll } from "./evidence.js"; From f0fb5a912df70721a644e14ffb01edee0df05469 Mon Sep 17 00:00:00 2001 From: Dmitry Baev Date: Wed, 10 Jun 2026 20:07:50 +0100 Subject: [PATCH 3/5] fix format --- packages/cli/test/commands/agent.test.ts | 4 +++- .../plugin-agent/src/inline-expectations.ts | 10 ++++++++-- packages/plugin-agent/src/query.ts | 19 ++++++++++++++++--- packages/plugin-agent/src/selection.ts | 7 ++++++- packages/plugin-agent/test/index.test.ts | 6 +++--- 5 files changed, 36 insertions(+), 10 deletions(-) diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts index 6de69f72c5c..5727749a23b 100644 --- a/packages/cli/test/commands/agent.test.ts +++ b/packages/cli/test/commands/agent.test.ts @@ -523,7 +523,9 @@ describe("agent command", () => { command: "npm test", error: expect.any(AgentExpectationUsageError), }); - expect(consoleModule.error).toHaveBeenCalledWith("Use either --expectations or inline expectation flags, not both"); + expect(consoleModule.error).toHaveBeenCalledWith( + "Use either --expectations or inline expectation flags, not both", + ); expect(executeAllureRun).not.toHaveBeenCalled(); expect(exitMock).toHaveBeenCalledWith(1); }); diff --git a/packages/plugin-agent/src/inline-expectations.ts b/packages/plugin-agent/src/inline-expectations.ts index cf27ad8b7f0..d2e9de571e2 100644 --- a/packages/plugin-agent/src/inline-expectations.ts +++ b/packages/plugin-agent/src/inline-expectations.ts @@ -147,8 +147,14 @@ export const buildAgentInlineExpectations = ( addLabelValues(expectedLabels, options.expectLabels, "--expect-label"); addLabelValues(forbiddenLabels, options.forbidLabels, "--forbid-label"); - const expectTests = readNonNegativeInteger(readSingleStringOption(options.expectTests, "--expect-tests"), "--expect-tests"); - const expectSteps = readPositiveInteger(readSingleStringOption(options.expectSteps, "--expect-steps"), "--expect-steps"); + const expectTests = readNonNegativeInteger( + readSingleStringOption(options.expectTests, "--expect-tests"), + "--expect-tests", + ); + const expectSteps = readPositiveInteger( + readSingleStringOption(options.expectSteps, "--expect-steps"), + "--expect-steps", + ); const expectAttachments = readPositiveInteger( readSingleStringOption(options.expectAttachments, "--expect-attachments"), "--expect-attachments", diff --git a/packages/plugin-agent/src/query.ts b/packages/plugin-agent/src/query.ts index 92f8e816444..396a776207a 100644 --- a/packages/plugin-agent/src/query.ts +++ b/packages/plugin-agent/src/query.ts @@ -4,14 +4,25 @@ import { join } from "node:path"; import type { TestLabel, TestStatus } from "@allurereport/core-api"; import { AgentUsageError } from "./errors.js"; -import type { AgentFindingCategory, AgentFindingSeverity, AgentOutputBundle, AgentTestManifestLine } from "./harness.js"; +import type { + AgentFindingCategory, + AgentFindingSeverity, + AgentOutputBundle, + AgentTestManifestLine, +} from "./harness.js"; import type { AgentLabelFilter } from "./selection.js"; export const AGENT_QUERY_SCHEMA = "allure-agent-query/v1"; export const AGENT_QUERY_VIEWS = ["summary", "tests", "findings", "test"] as const; export const AGENT_TEST_STATUSES: TestStatus[] = ["failed", "broken", "unknown", "skipped", "passed"]; export const AGENT_FINDING_SEVERITIES: AgentFindingSeverity[] = ["high", "warning", "info"]; -export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = ["bootstrap", "scope", "metadata", "evidence", "smells"]; +export const AGENT_FINDING_CATEGORIES: AgentFindingCategory[] = [ + "bootstrap", + "scope", + "metadata", + "evidence", + "smells", +]; export type AgentQueryView = (typeof AGENT_QUERY_VIEWS)[number]; @@ -213,7 +224,9 @@ const buildAgentQueryTestPayload = async (output: AgentOutputBundle, filters: Ag } if (matched.length > 1) { - throw new AgentUsageError(`Query matched ${matched.length} tests in ${output.outputDir}. Use --test .`); + throw new AgentUsageError( + `Query matched ${matched.length} tests in ${output.outputDir}. Use --test .`, + ); } const test = matched[0]; diff --git a/packages/plugin-agent/src/selection.ts b/packages/plugin-agent/src/selection.ts index 16d93d8ecba..4e74be0d421 100644 --- a/packages/plugin-agent/src/selection.ts +++ b/packages/plugin-agent/src/selection.ts @@ -5,7 +5,12 @@ import { join, resolve } from "node:path"; import type { TestLabel, TestPlan, TestPlanTest } from "@allurereport/core-api"; import { AgentUsageError } from "./errors.js"; -import { loadAgentOutput, planAgentEnrichmentReview, type AgentOutputBundle, type AgentTestManifestLine } from "./harness.js"; +import { + loadAgentOutput, + planAgentEnrichmentReview, + type AgentOutputBundle, + type AgentTestManifestLine, +} from "./harness.js"; import { readLatestAgentState } from "./state.js"; export type AgentRerunPreset = "review" | "failed" | "unsuccessful" | "all"; diff --git a/packages/plugin-agent/test/index.test.ts b/packages/plugin-agent/test/index.test.ts index 45ba11badbe..8a654985d40 100644 --- a/packages/plugin-agent/test/index.test.ts +++ b/packages/plugin-agent/test/index.test.ts @@ -769,9 +769,9 @@ describe("AgentPlugin", () => { expect(content).toContain("missing attachment"); expect(content).toContain("screenshot.png"); expect(content).toContain("fixture.log"); - expect( - await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png")), - ).toBe("png-bytes"); + expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "screenshot.png"))).toBe( + "png-bytes", + ); expect(await readText(join(outputDir, "tests", "default", "artifact-history.assets", "fixture.log"))).toBe( "fixture log", ); From 22d5f3243bdb4d369d9d006646ad05d85f9755ed Mon Sep 17 00:00:00 2001 From: Dmitry Baev Date: Thu, 11 Jun 2026 07:57:00 +0100 Subject: [PATCH 4/5] fix win tests --- packages/cli/test/commands/agent.test.ts | 10 ++++---- packages/plugin-agent/test/paths.test.ts | 30 ++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 5 deletions(-) create mode 100644 packages/plugin-agent/test/paths.test.ts diff --git a/packages/cli/test/commands/agent.test.ts b/packages/cli/test/commands/agent.test.ts index 5727749a23b..49cdc66eddc 100644 --- a/packages/cli/test/commands/agent.test.ts +++ b/packages/cli/test/commands/agent.test.ts @@ -1,4 +1,4 @@ -import { resolve } from "node:path"; +import { join, resolve } from "node:path"; import { readConfig } from "@allurereport/core"; import { @@ -359,7 +359,7 @@ describe("agent command", () => { }), ); expect(logMock).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123"); - expect(logMock).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md"); + expect(logMock).toHaveBeenNthCalledWith(2, `agent index: ${join("/tmp/allure-agent-123", "index.md")}`); expect(logMock).toHaveBeenNthCalledWith(3, "npm test"); expect(logMock.mock.invocationCallOrder[0]).toBeLessThan((executeAllureRun as Mock).mock.invocationCallOrder[0]); expect(writeLatestAgentState).toHaveBeenNthCalledWith( @@ -437,7 +437,7 @@ describe("agent command", () => { }, }); expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${resolvedOutput}`); - expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${resolvedOutput}/index.md`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(resolvedOutput, "index.md")}`); expect(consoleModule.log).toHaveBeenCalledWith(`agent expectations: ${resolvedExpectations}`); }); @@ -557,7 +557,7 @@ describe("agent command", () => { expect(readConfig).not.toHaveBeenCalled(); expect(executeAllureRun).not.toHaveBeenCalled(); expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`); - expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(outputDir, "index.md")}`); expect(consoleModule.error).toHaveBeenCalledWith( 'Invalid --expect-label "module". Expected the form name=value, for example module=cli', ); @@ -589,7 +589,7 @@ describe("agent command", () => { expect(readConfig).not.toHaveBeenCalled(); expect(executeAllureRun).not.toHaveBeenCalled(); expect(consoleModule.log).toHaveBeenCalledWith(`agent output: ${outputDir}`); - expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${outputDir}/index.md`); + expect(consoleModule.log).toHaveBeenCalledWith(`agent index: ${join(outputDir, "index.md")}`); expect(consoleModule.error).toHaveBeenCalledWith( "Could not load expectations from /cwd/expected.yaml: Expected a YAML or JSON object", ); diff --git a/packages/plugin-agent/test/paths.test.ts b/packages/plugin-agent/test/paths.test.ts new file mode 100644 index 00000000000..202407fed7f --- /dev/null +++ b/packages/plugin-agent/test/paths.test.ts @@ -0,0 +1,30 @@ +import { join } from "node:path"; + +import { epic, feature, label, story } from "allure-js-commons"; +import { beforeEach, describe, expect, it } from "vitest"; + +import { formatAgentOutputLinks, resolveAgentIndexPath } from "../src/paths.js"; + +beforeEach(async () => { + await epic("coverage"); + await feature("agent-mode"); + await story("agent-output-paths"); + await label("coverage", "agent-mode"); +}); + +describe("agent output path helpers", () => { + it("should resolve the agent index path using native path joining", () => { + const outputDir = join("tmp", "allure-agent-123"); + + expect(resolveAgentIndexPath(outputDir)).toBe(join(outputDir, "index.md")); + }); + + it("should format the output directory and index path links together", () => { + const outputDir = join("tmp", "allure-agent-123"); + + expect(formatAgentOutputLinks(outputDir)).toEqual([ + `agent output: ${outputDir}`, + `agent index: ${join(outputDir, "index.md")}`, + ]); + }); +}); From 3db36a263c6aaa7c1d8f9aff828c7773a7f2b63d Mon Sep 17 00:00:00 2001 From: Dmitry Baev Date: Thu, 11 Jun 2026 10:20:10 +0100 Subject: [PATCH 5/5] fix more tests --- packages/cli/test/commands/agentLatest.test.ts | 17 +++++++++++++---- packages/cli/test/commands/agentSelect.test.ts | 17 +++++++++++++---- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/packages/cli/test/commands/agentLatest.test.ts b/packages/cli/test/commands/agentLatest.test.ts index dff18eac62a..9aa3910c82f 100644 --- a/packages/cli/test/commands/agentLatest.test.ts +++ b/packages/cli/test/commands/agentLatest.test.ts @@ -1,5 +1,7 @@ +import { join } from "node:path"; + import { readLatestAgentState, resolveAgentStateDir } from "@allurereport/plugin-agent"; -import { epic, feature, label, story } from "allure-js-commons"; +import { attachment, epic, feature, label, story } from "allure-js-commons"; import { run } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; @@ -40,11 +42,13 @@ beforeEach(async () => { describe("agent latest command", () => { it("should print the latest output directory and index path for the resolved project cwd", async () => { const consoleModule = await import("node:console"); + const outputDir = "/tmp/allure-agent-123"; + const indexPath = join(outputDir, "index.md"); (readLatestAgentState as Mock).mockResolvedValueOnce({ schema: "allure-agent-latest/v1", cwd: "/cwd", - outputDir: "/tmp/allure-agent-123", + outputDir, command: "npm test", startedAt: "2026-04-15T18:00:00.000Z", status: "finished", @@ -52,9 +56,14 @@ describe("agent latest command", () => { await run(AgentLatestCommand, ["agent", "latest"]); + await attachment( + "latest output path contract", + JSON.stringify({ outputDir, indexPath }, null, 2), + "application/json", + ); expect(readLatestAgentState).toHaveBeenCalledWith("/cwd"); - expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent output: /tmp/allure-agent-123"); - expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent index: /tmp/allure-agent-123/index.md"); + expect(consoleModule.log).toHaveBeenNthCalledWith(1, `agent output: ${outputDir}`); + expect(consoleModule.log).toHaveBeenNthCalledWith(2, `agent index: ${indexPath}`); }); it("should exit with code 1 when no latest output exists for the project", async () => { diff --git a/packages/cli/test/commands/agentSelect.test.ts b/packages/cli/test/commands/agentSelect.test.ts index 50556a905bf..736e2e21cbb 100644 --- a/packages/cli/test/commands/agentSelect.test.ts +++ b/packages/cli/test/commands/agentSelect.test.ts @@ -1,5 +1,7 @@ +import { dirname, resolve } from "node:path"; + import { resolveAgentSelectionOutputDir, selectAgentTestPlan } from "@allurereport/plugin-agent"; -import { epic, feature, label, story } from "allure-js-commons"; +import { attachment, epic, feature, label, story } from "allure-js-commons"; import { run, UsageError } from "clipanion"; import { type Mock, beforeEach, describe, expect, it, vi } from "vitest"; @@ -90,6 +92,8 @@ describe("agent select command", () => { it("should write the selected test plan and print selection summary when output is provided", async () => { const consoleModule = await import("node:console"); const fsModule = await import("node:fs/promises"); + const outputPath = resolve("/cwd", "./testplan.json"); + const outputDir = dirname(outputPath); (resolveAgentSelectionOutputDir as Mock).mockResolvedValueOnce("/tmp/agent-output"); (selectAgentTestPlan as Mock).mockResolvedValueOnce({ @@ -113,13 +117,18 @@ describe("agent select command", () => { "./testplan.json", ]); - expect(fsModule.mkdir).toHaveBeenCalledWith("/cwd", { recursive: true }); + await attachment( + "selected test plan output path contract", + JSON.stringify({ outputPath, outputDir }, null, 2), + "application/json", + ); + expect(fsModule.mkdir).toHaveBeenCalledWith(outputDir, { recursive: true }); expect(fsModule.writeFile).toHaveBeenCalledWith( - "/cwd/testplan.json", + outputPath, `{\n "version": "1.0",\n "tests": [\n {\n "selector": "suite feature A"\n },\n {\n "selector": "suite feature B"\n }\n ]\n}\n`, "utf-8", ); - expect(consoleModule.log).toHaveBeenNthCalledWith(1, "agent testplan: /cwd/testplan.json"); + expect(consoleModule.log).toHaveBeenNthCalledWith(1, `agent testplan: ${outputPath}`); expect(consoleModule.log).toHaveBeenNthCalledWith(2, "agent selection source: /tmp/agent-output"); expect(consoleModule.log).toHaveBeenNthCalledWith(3, "agent selection preset: failed"); expect(consoleModule.log).toHaveBeenNthCalledWith(4, "agent selection tests: 2");