diff --git a/internal/cli/run.go b/internal/cli/run.go index 93b9d2125..37a4dbe12 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -780,7 +780,27 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep printer.StepDone(fmt.Sprintf("Agent-input files copied (%.1fs)", time.Since(inputStart).Seconds())) } - // 8c. Host-side scan (Path A): scan the target repo's context files + // 8c. Make the target repo read-only if the harness opts in. + // Runs after all repo-directory writes (8a, 8a-2) are complete. + // Excludes .git/ so git operations (index.lock, etc.) still work. + if h.ReadonlyRepo { + // -prune skips .git traversal entirely; -not -type l prevents symlink traversal. + // chown ensures the sandbox user (run_as_user) owns .git/ after root-owned extraction. + chmodCmd := fmt.Sprintf( + "find %s -path '*/.git' -prune -o -not -type l -exec chmod a-w {} + && chown -R sandbox:sandbox %s/.git && chmod -R u+w %s/.git", + remoteRepositoryDir, remoteRepositoryDir, remoteRepositoryDir, + ) + if _, stderr, exitCode, err := sandbox.Exec(sandboxName, chmodCmd, 30*time.Second); err != nil { + printer.StepFail("Could not make repo read-only: " + err.Error()) + return fmt.Errorf("Read-only repo enforcement failed: %w", err) + } else if exitCode != 0 { + printer.StepFail("Could not make repo read-only (exit " + fmt.Sprintf("%d", exitCode) + "): " + stderr) + return fmt.Errorf("read-only repo enforcement failed: exit code %d", exitCode) + } + printer.StepDone("Target repo set to read-only") + } + + // 8d. Host-side scan (Path A): scan the target repo's context files // (CLAUDE.md, AGENTS.md, SKILL.md, etc.) before the agent processes them. // The target branch may contain attacker-controlled files from a PR. if h.SecurityEnabled() { diff --git a/internal/harness/harness.go b/internal/harness/harness.go index 62af46f3d..06f9ca0ce 100644 --- a/internal/harness/harness.go +++ b/internal/harness/harness.go @@ -257,6 +257,7 @@ type Harness struct { RunnerEnv map[string]string `yaml:"runner_env,omitempty"` Env *EnvConfig `yaml:"env,omitempty"` TimeoutMinutes int `yaml:"timeout_minutes,omitempty"` + ReadonlyRepo bool `yaml:"readonly_repo,omitempty"` SandboxTimeoutSeconds int `yaml:"sandbox_timeout_seconds,omitempty"` Security *SecurityConfig `yaml:"security,omitempty"` AllowedRemoteResources []string `yaml:"allowed_remote_resources,omitempty"` diff --git a/internal/harness/harness_test.go b/internal/harness/harness_test.go index 236db54c5..692f44cd8 100644 --- a/internal/harness/harness_test.go +++ b/internal/harness/harness_test.go @@ -1503,6 +1503,35 @@ role: test assert.Nil(t, h.MaxRuntimeFetches) } +func TestLoad_ReadonlyRepoField(t *testing.T) { + content := ` +agent: agents/review.md +readonly_repo: true +role: review +` + dir := t.TempDir() + path := filepath.Join(dir, "test.yaml") + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + + h, err := Load(path) + require.NoError(t, err) + assert.True(t, h.ReadonlyRepo) +} + +func TestLoad_ReadonlyRepoFieldOmitted(t *testing.T) { + content := ` +agent: agents/code.md +role: code +` + dir := t.TempDir() + path := filepath.Join(dir, "test.yaml") + require.NoError(t, os.WriteFile(path, []byte(content), 0o644)) + + h, err := Load(path) + require.NoError(t, err) + assert.False(t, h.ReadonlyRepo) +} + // --- ValidForgePlatform tests --- func TestValidForgePlatform(t *testing.T) { diff --git a/internal/scaffold/fullsend-repo/agents/review.md b/internal/scaffold/fullsend-repo/agents/review.md index c30e683e0..8354b4c24 100644 --- a/internal/scaffold/fullsend-repo/agents/review.md +++ b/internal/scaffold/fullsend-repo/agents/review.md @@ -76,31 +76,15 @@ findings equally. If filtering removes all findings from a You **either**: - When invoked for local/pre-push review, evaluate sequentially via the -`code-review` skill. + `code-review` skill. **or** - Otherwise orchestrate code reviews by dispatching specialized -sub-agents in parallel across six review dimensions: - -1. **Correctness** — logic errors, edge cases, nil handling, API - contracts, test adequacy, test integrity (opus) -2. **Security** — RBAC, authentication, data exposure, privilege - escalation, injection defense, content sandboxing (opus) -3. **Intent & coherence** — whether the change matches authorized work, - is appropriately scoped, and fits the project's architectural - direction (sonnet) -4. **Style/conventions** — naming, error handling idioms, API shape, - code organization (sonnet) -5. **Documentation currency** — whether the PR's code changes have - made in-repo documentation stale, incomplete, or misleading (sonnet) -6. **Cross-repo contracts** — whether the change breaks APIs, schemas, - or interfaces other repos depend on (sonnet, conditional) - -Sub-agent definitions live in `skills/pr-review/sub-agents/`. Each -sub-agent is a markdown file with frontmatter specifying its `model` -pin. The `pr-review` skill (orchestrator) handles triage, dispatch, -and synthesis. + sub-agents in parallel across six review dimensions + + The `pr-review` skill (orchestrator) handles triage, dispatch, + and synthesis. ## Skill routing @@ -111,9 +95,7 @@ This agent has three skills. Select based on invocation context: dispatches specialized sub-agents in parallel, collects and synthesizes their findings, runs PR-specific checks (protected paths, scope authorization, PR body injection defense), and - produces a structured review result. Sub-agent definitions live in - `skills/pr-review/sub-agents/`. Each sub-agent is dispatched with - `model` from its frontmatter and `subagent_type: Explore`. + produces a structured review result. - **`code-review`** — the prompt is about a local branch diff with no PR, or another skill is delegating code evaluation. This skill evaluates the diff and source files directly across the original diff --git a/internal/scaffold/fullsend-repo/harness/review.yaml b/internal/scaffold/fullsend-repo/harness/review.yaml index 7a029c2da..ba51ecdd5 100644 --- a/internal/scaffold/fullsend-repo/harness/review.yaml +++ b/internal/scaffold/fullsend-repo/harness/review.yaml @@ -4,6 +4,7 @@ doc: docs/agents/review.md model: opus image: ghcr.io/fullsend-ai/fullsend-code:latest policy: policies/review.yaml +readonly_repo: true role: review slug: fullsend-ai-review diff --git a/internal/scaffold/fullsend-repo/policies/review.yaml b/internal/scaffold/fullsend-repo/policies/review.yaml index 00f10b630..3871b6d8b 100644 --- a/internal/scaffold/fullsend-repo/policies/review.yaml +++ b/internal/scaffold/fullsend-repo/policies/review.yaml @@ -8,7 +8,7 @@ version: 1 # handles mutations on the runner with a separate write-scoped token. filesystem_policy: - include_workdir: true + include_workdir: false read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] read_write: [/sandbox, /tmp, /dev/null] landlock: diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/SKILL.md b/internal/scaffold/fullsend-repo/skills/pr-review/SKILL.md index 7d3226834..32fd54790 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/SKILL.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/SKILL.md @@ -33,23 +33,18 @@ post-script to post. In interactive mode, it posts directly via ## Sub-agent roster -Sub-agent definitions live in `sub-agents/` relative to this file. -Each is a markdown file with frontmatter specifying `name`, `model`, -and `description`. - -| Sub-agent | Model | Dispatch | Dimension | -|------------------------|--------|------------|--------------------------------------------------------------------------------| -| `correctness` | opus | parallel | Logic errors, edge cases, nil handling, API contracts, test adequacy/integrity | -| `security` | opus | parallel | Auth, data exposure, privilege escalation, injection defense, content security | -| `intent-coherence` | sonnet | parallel | Authorization, scope, tier matching, architectural fit, design coherence | -| `style-conventions` | sonnet | parallel | Naming, error handling idioms, API shape, code organization | -| `docs-currency` | sonnet | parallel | Documentation staleness (follows docs-review skill inline) | -| `cross-repo-contracts` | sonnet | parallel | API contract breakage affecting other repos (conditional) | -| `challenger` | opus | sequential | Adversarial challenge of findings, false-positive removal, deduplication | - -The Model column reflects each sub-agent's current frontmatter. Any -value accepted by the Agent tool's `model` parameter is valid in -sub-agent frontmatter. +Sub-agent discovery: The sub-agents' definitions are in `sub-agents/` +relative to this file. + +| Sub-agent | Dispatch | Dimensions | +|------------------------|------------|-------------------------------------------------------------------------------------------------------------------------| +| `correctness` | parallel | Logic errors, edge cases, nil handling, API contracts, test adequacy/integrity | +| `security` | parallel | Security vulnerabilities, auth/access control, data exposure, injection defense, privilege escalation, content security | +| `intent-coherence` | parallel | Architectural coherence & fit, design coherence, intent alignment, PR scope, scope authorization, tier matching | +| `style-conventions` | parallel | Repo-specific naming, error-handling idioms, API shape, code organization | +| `docs-currency` | parallel | Documentation staleness (follows docs-review skill inline) | +| `cross-repo-contracts` | parallel | API contract breakage affecting other repos (conditional) | +| `challenger` | sequential | Adversarial challenge of findings, false-positive removal, deduplication | ## Findings vs inline comments @@ -255,7 +250,8 @@ dimensions are relevant: #### 3c. Select sub-agents Based on the domain classification, select sub-agents for dispatch. -All selected sub-agents run in parallel. +All selected sub-agents run in parallel (with the exception of the +challenger, which runs by itself after all other sub-agents have finished). **Dispatch sub-agents based on the classification — typically 3-6.** The orchestrator should auto-select which sub-agents are relevant for @@ -311,9 +307,7 @@ For each selected sub-agent, assemble a context package containing: For each selected sub-agent: -1. Read the sub-agent definition from `sub-agents/{name}.md` -2. Extract the `model` from frontmatter -3. Compose the spawn prompt from three parts: +1. Compose the spawn prompt from: **Part 1 — Sub-agent definition:** the full markdown body of the sub-agent file (everything after the frontmatter) @@ -359,12 +353,8 @@ For each selected sub-agent: REVIEW_SUB_AGENT_TRUE ``` -4. Spawn via Agent tool with: - - `model`: from the sub-agent frontmatter (any value accepted by - the Agent tool's `model` parameter) - - `subagent_type`: `Explore` (read-only — sub-agents do not write) - - `run_in_background`: `true` - - `prompt`: composed from parts 1–5 +2. Spawn the subagents with their `prompt` argument composed from parts + 1–5 above **All sub-agents MUST be dispatched simultaneously** — include all Agent calls in a single message so they run concurrently. This is the @@ -459,8 +449,7 @@ fresh context. The challenger has not seen the orchestrator's synthesis — it receives only the raw findings and the diff, preserving context isolation. -1. Read `sub-agents/challenger.md` for the sub-agent definition -2. Compose the spawn prompt from: +1. Compose the spawn prompt from: **Part 1 — Sub-agent definition:** the full markdown body of the challenger sub-agent file (everything after the frontmatter) @@ -494,10 +483,8 @@ isolation. REVIEW_SUB_AGENT_TRUE ``` -3. Spawn via Agent tool with: - - `model`: from the challenger sub-agent frontmatter (`opus`) - - `subagent_type`: `Explore` (read-only) - - `prompt`: composed from parts 1–4 +2. Spawn the subagents with their `prompt` argument composed from parts + 1–4 above **Prompt size guard:** If the combined context package (findings JSON + diff + file list + PR metadata) exceeds 80 000 tokens, @@ -510,7 +497,7 @@ isolation. needs their findings as input), so it is dispatched sequentially, not in the parallel batch from step 4. -4. Consume the challenger's output. The challenger returns a **different +3. Consume the challenger's output. The challenger returns a **different format** from dimension sub-agents: an object with `adjudicated_findings` and `removed_findings` arrays (not a flat finding array). Parse accordingly: @@ -522,15 +509,15 @@ isolation. part of the standard finding schema. - If `adjudicated_findings` is empty but the pre-challenger finding set was non-empty, treat this as a challenger failure (fall back - per step 5 below). A legitimate challenger pass that removes all - findings is unlikely — an empty result more likely indicates a - parsing error or context truncation. + per the immediate next step below). A legitimate challenger pass + that removes all findings is unlikely — an empty result more likely + indicates a parsing error or context truncation. - Otherwise, replace the merged finding set with the challenger's `adjudicated_findings`. - Log any `removed_findings` for transparency but do not include them in the final review. -5. If the challenger sub-agent fails (timeout, error, empty +4. If the challenger sub-agent fails (timeout, error, empty response), fall back to using the pre-challenger merged finding set from steps 6a–6c. Record an **info**-level finding: diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/challenger.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/challenger.md index 7375dba4e..b82a4bf70 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/challenger.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/challenger.md @@ -1,7 +1,12 @@ --- -name: review-challenger -description: Adversarially challenges review findings, removes false positives, deduplicates across dimensions, and produces an adjudicated finding list. +name: challenger +description: >- + Adversarially challenges review findings, removes false positives, + deduplicates across dimensions, and produces an adjudicated finding list. model: opus +tools: Read, Grep, Glob +permissionMode: dontAsk +background: false --- # Challenger diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/correctness.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/correctness.md index f8658f7d8..8fc422c29 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/correctness.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/correctness.md @@ -1,7 +1,12 @@ --- -name: review-correctness -description: Evaluates logic correctness, edge cases, test adequacy, and test integrity. +name: correctness +description: >- + Evaluates logic correctness, edge cases, nil handling, API contracts, + test adequacy/integrity. model: opus +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Correctness diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/cross-repo-contracts.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/cross-repo-contracts.md index 463b4c0c1..ee15860a6 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/cross-repo-contracts.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/cross-repo-contracts.md @@ -1,7 +1,11 @@ --- -name: review-cross-repo-contracts -description: Evaluates backward compatibility of exported interfaces and API contracts. +name: cross-repo-contracts +description: >- + Evaluates potential API contract breakage affecting other repos. model: claude-sonnet-4-6@default +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Cross-Repo Contracts diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/docs-currency.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/docs-currency.md index 71f47995e..16c2fa112 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/docs-currency.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/docs-currency.md @@ -1,7 +1,11 @@ --- -name: review-docs-currency -description: Evaluates documentation staleness against code changes. +name: docs-currency +description: >- + Evaluates documentation staleness against code changes. model: claude-sonnet-4-6@default +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Docs Currency diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/intent-coherence.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/intent-coherence.md index 1e8f789d2..222db30b5 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/intent-coherence.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/intent-coherence.md @@ -1,7 +1,12 @@ --- -name: review-intent-coherence -description: Evaluates intent alignment, scope authorization, and architectural coherence. +name: intent-coherence +description: >- + Evaluates architectural coherence & fit, design coherence, + intent alignment, PR scope, scope authorization, and tier matching model: claude-sonnet-4-6@default +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Intent & Coherence diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md index 9191126ec..b60365058 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/security.md @@ -1,7 +1,12 @@ --- -name: review-security -description: Evaluates security vulnerabilities, auth/access control, data exposure, and injection defense. +name: security +description: >- + Evaluates security vulnerabilities, auth/access control, data exposure, + injection defense, privilege escalation, and content security. model: opus +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Security diff --git a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/style-conventions.md b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/style-conventions.md index 1dcd0ecd2..7ef08ef79 100644 --- a/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/style-conventions.md +++ b/internal/scaffold/fullsend-repo/skills/pr-review/sub-agents/style-conventions.md @@ -1,7 +1,12 @@ --- -name: review-style-conventions -description: Evaluates repo-specific naming, error-handling idioms, API shape, and code organization. +name: style-conventions +description: >- + Evaluates repo-specific naming, error-handling idioms, API shape, + and code organization. model: claude-sonnet-4-6@default +tools: Read, Grep, Glob +permissionMode: dontAsk +background: true --- # Style & Conventions