Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion internal/cli/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -780,7 +780,27 @@ func runAgent(ctx context.Context, agentName, fullsendDir, outputBase, targetRep
printer.StepDone(fmt.Sprintf("Agent-input files copied (%.1fs)", time.Since(inputStart).Seconds()))
}

// 8c. Host-side scan (Path A): scan the target repo's context files
// 8c. Make the target repo read-only if the harness opts in.
// Runs after all repo-directory writes (8a, 8a-2) are complete.
Comment thread
ben-alkov marked this conversation as resolved.
// Excludes .git/ so git operations (index.lock, etc.) still work.
if h.ReadonlyRepo {
// -prune skips .git traversal entirely; -not -type l prevents symlink traversal.
// chown ensures the sandbox user (run_as_user) owns .git/ after root-owned extraction.
chmodCmd := fmt.Sprintf(
Comment thread
ben-alkov marked this conversation as resolved.
"find %s -path '*/.git' -prune -o -not -type l -exec chmod a-w {} + && chown -R sandbox:sandbox %s/.git && chmod -R u+w %s/.git",
Comment thread
ben-alkov marked this conversation as resolved.
remoteRepositoryDir, remoteRepositoryDir, remoteRepositoryDir,
)
if _, stderr, exitCode, err := sandbox.Exec(sandboxName, chmodCmd, 30*time.Second); err != nil {
printer.StepFail("Could not make repo read-only: " + err.Error())
return fmt.Errorf("Read-only repo enforcement failed: %w", err)
Comment thread
ben-alkov marked this conversation as resolved.
} else if exitCode != 0 {
printer.StepFail("Could not make repo read-only (exit " + fmt.Sprintf("%d", exitCode) + "): " + stderr)
return fmt.Errorf("read-only repo enforcement failed: exit code %d", exitCode)
}
printer.StepDone("Target repo set to read-only")
}

// 8d. Host-side scan (Path A): scan the target repo's context files
// (CLAUDE.md, AGENTS.md, SKILL.md, etc.) before the agent processes them.
// The target branch may contain attacker-controlled files from a PR.
if h.SecurityEnabled() {
Expand Down
1 change: 1 addition & 0 deletions internal/harness/harness.go
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ type Harness struct {
RunnerEnv map[string]string `yaml:"runner_env,omitempty"`
Env *EnvConfig `yaml:"env,omitempty"`
TimeoutMinutes int `yaml:"timeout_minutes,omitempty"`
ReadonlyRepo bool `yaml:"readonly_repo,omitempty"`
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
SandboxTimeoutSeconds int `yaml:"sandbox_timeout_seconds,omitempty"`
Security *SecurityConfig `yaml:"security,omitempty"`
AllowedRemoteResources []string `yaml:"allowed_remote_resources,omitempty"`
Expand Down
29 changes: 29 additions & 0 deletions internal/harness/harness_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1503,6 +1503,35 @@ role: test
assert.Nil(t, h.MaxRuntimeFetches)
}

func TestLoad_ReadonlyRepoField(t *testing.T) {
content := `
agent: agents/review.md
readonly_repo: true
Comment thread
ben-alkov marked this conversation as resolved.
role: review
`
dir := t.TempDir()
path := filepath.Join(dir, "test.yaml")
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))

h, err := Load(path)
require.NoError(t, err)
assert.True(t, h.ReadonlyRepo)
}

func TestLoad_ReadonlyRepoFieldOmitted(t *testing.T) {
content := `
agent: agents/code.md
role: code
`
dir := t.TempDir()
path := filepath.Join(dir, "test.yaml")
require.NoError(t, os.WriteFile(path, []byte(content), 0o644))

h, err := Load(path)
require.NoError(t, err)
assert.False(t, h.ReadonlyRepo)
}

// --- ValidForgePlatform tests ---

func TestValidForgePlatform(t *testing.T) {
Expand Down
30 changes: 6 additions & 24 deletions internal/scaffold/fullsend-repo/agents/review.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,31 +76,15 @@ findings equally. If filtering removes all findings from a
You **either**:

- When invoked for local/pre-push review, evaluate sequentially via the
`code-review` skill.
`code-review` skill.

**or**

- Otherwise orchestrate code reviews by dispatching specialized
sub-agents in parallel across six review dimensions:

1. **Correctness** — logic errors, edge cases, nil handling, API
contracts, test adequacy, test integrity (opus)
2. **Security** — RBAC, authentication, data exposure, privilege
escalation, injection defense, content sandboxing (opus)
3. **Intent & coherence** — whether the change matches authorized work,
is appropriately scoped, and fits the project's architectural
direction (sonnet)
4. **Style/conventions** — naming, error handling idioms, API shape,
code organization (sonnet)
5. **Documentation currency** — whether the PR's code changes have
made in-repo documentation stale, incomplete, or misleading (sonnet)
6. **Cross-repo contracts** — whether the change breaks APIs, schemas,
or interfaces other repos depend on (sonnet, conditional)

Sub-agent definitions live in `skills/pr-review/sub-agents/`. Each
sub-agent is a markdown file with frontmatter specifying its `model`
pin. The `pr-review` skill (orchestrator) handles triage, dispatch,
and synthesis.
sub-agents in parallel across six review dimensions

The `pr-review` skill (orchestrator) handles triage, dispatch,
and synthesis.

## Skill routing

Expand All @@ -111,9 +95,7 @@ This agent has three skills. Select based on invocation context:
dispatches specialized sub-agents in parallel, collects and
synthesizes their findings, runs PR-specific checks (protected
paths, scope authorization, PR body injection defense), and
produces a structured review result. Sub-agent definitions live in
`skills/pr-review/sub-agents/`. Each sub-agent is dispatched with
`model` from its frontmatter and `subagent_type: Explore`.
produces a structured review result.
- **`code-review`** — the prompt is about a local branch diff with
no PR, or another skill is delegating code evaluation. This skill
evaluates the diff and source files directly across the original
Expand Down
1 change: 1 addition & 0 deletions internal/scaffold/fullsend-repo/harness/review.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ doc: docs/agents/review.md
model: opus
image: ghcr.io/fullsend-ai/fullsend-code:latest
policy: policies/review.yaml
readonly_repo: true
Comment thread
ben-alkov marked this conversation as resolved.

role: review
slug: fullsend-ai-review
Expand Down
2 changes: 1 addition & 1 deletion internal/scaffold/fullsend-repo/policies/review.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ version: 1
# handles mutations on the runner with a separate write-scoped token.

filesystem_policy:
include_workdir: true
include_workdir: false
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
Comment thread
ben-alkov marked this conversation as resolved.
read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log]
read_write: [/sandbox, /tmp, /dev/null]
landlock:
Expand Down
63 changes: 25 additions & 38 deletions internal/scaffold/fullsend-repo/skills/pr-review/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,18 @@ post-script to post. In interactive mode, it posts directly via

## Sub-agent roster

Sub-agent definitions live in `sub-agents/` relative to this file.
Each is a markdown file with frontmatter specifying `name`, `model`,
and `description`.

| Sub-agent | Model | Dispatch | Dimension |
|------------------------|--------|------------|--------------------------------------------------------------------------------|
| `correctness` | opus | parallel | Logic errors, edge cases, nil handling, API contracts, test adequacy/integrity |
| `security` | opus | parallel | Auth, data exposure, privilege escalation, injection defense, content security |
| `intent-coherence` | sonnet | parallel | Authorization, scope, tier matching, architectural fit, design coherence |
| `style-conventions` | sonnet | parallel | Naming, error handling idioms, API shape, code organization |
| `docs-currency` | sonnet | parallel | Documentation staleness (follows docs-review skill inline) |
| `cross-repo-contracts` | sonnet | parallel | API contract breakage affecting other repos (conditional) |
| `challenger` | opus | sequential | Adversarial challenge of findings, false-positive removal, deduplication |

The Model column reflects each sub-agent's current frontmatter. Any
value accepted by the Agent tool's `model` parameter is valid in
sub-agent frontmatter.
Sub-agent discovery: The sub-agents' definitions are in `sub-agents/`
relative to this file.

| Sub-agent | Dispatch | Dimensions |
|------------------------|------------|-------------------------------------------------------------------------------------------------------------------------|
| `correctness` | parallel | Logic errors, edge cases, nil handling, API contracts, test adequacy/integrity |
| `security` | parallel | Security vulnerabilities, auth/access control, data exposure, injection defense, privilege escalation, content security |
| `intent-coherence` | parallel | Architectural coherence & fit, design coherence, intent alignment, PR scope, scope authorization, tier matching |
| `style-conventions` | parallel | Repo-specific naming, error-handling idioms, API shape, code organization |
| `docs-currency` | parallel | Documentation staleness (follows docs-review skill inline) |
| `cross-repo-contracts` | parallel | API contract breakage affecting other repos (conditional) |
| `challenger` | sequential | Adversarial challenge of findings, false-positive removal, deduplication |

## Findings vs inline comments

Expand Down Expand Up @@ -255,7 +250,8 @@ dimensions are relevant:
#### 3c. Select sub-agents

Based on the domain classification, select sub-agents for dispatch.
All selected sub-agents run in parallel.
All selected sub-agents run in parallel (with the exception of the
challenger, which runs by itself after all other sub-agents have finished).

**Dispatch sub-agents based on the classification — typically 3-6.**
The orchestrator should auto-select which sub-agents are relevant for
Expand Down Expand Up @@ -311,9 +307,7 @@ For each selected sub-agent, assemble a context package containing:

For each selected sub-agent:

1. Read the sub-agent definition from `sub-agents/{name}.md`
2. Extract the `model` from frontmatter
3. Compose the spawn prompt from three parts:
1. Compose the spawn prompt from:

**Part 1 — Sub-agent definition:** the full markdown body of the
sub-agent file (everything after the frontmatter)
Expand Down Expand Up @@ -359,12 +353,8 @@ For each selected sub-agent:
REVIEW_SUB_AGENT_TRUE
```

4. Spawn via Agent tool with:
- `model`: from the sub-agent frontmatter (any value accepted by
the Agent tool's `model` parameter)
- `subagent_type`: `Explore` (read-only — sub-agents do not write)
- `run_in_background`: `true`
- `prompt`: composed from parts 1–5
2. Spawn the subagents with their `prompt` argument composed from parts
1–5 above
Comment thread
ben-alkov marked this conversation as resolved.

**All sub-agents MUST be dispatched simultaneously** — include all
Agent calls in a single message so they run concurrently. This is the
Expand Down Expand Up @@ -459,8 +449,7 @@ fresh context. The challenger has not seen the orchestrator's synthesis
— it receives only the raw findings and the diff, preserving context
isolation.

1. Read `sub-agents/challenger.md` for the sub-agent definition
2. Compose the spawn prompt from:
1. Compose the spawn prompt from:

**Part 1 — Sub-agent definition:** the full markdown body of the
challenger sub-agent file (everything after the frontmatter)
Expand Down Expand Up @@ -494,10 +483,8 @@ isolation.
REVIEW_SUB_AGENT_TRUE
```

3. Spawn via Agent tool with:
- `model`: from the challenger sub-agent frontmatter (`opus`)
- `subagent_type`: `Explore` (read-only)
- `prompt`: composed from parts 1–4
2. Spawn the subagents with their `prompt` argument composed from parts
1–4 above

**Prompt size guard:** If the combined context package (findings
JSON + diff + file list + PR metadata) exceeds 80 000 tokens,
Expand All @@ -510,7 +497,7 @@ isolation.
needs their findings as input), so it is dispatched sequentially,
not in the parallel batch from step 4.

4. Consume the challenger's output. The challenger returns a **different
3. Consume the challenger's output. The challenger returns a **different
format** from dimension sub-agents: an object with
`adjudicated_findings` and `removed_findings` arrays (not a flat
finding array). Parse accordingly:
Expand All @@ -522,15 +509,15 @@ isolation.
part of the standard finding schema.
- If `adjudicated_findings` is empty but the pre-challenger finding
set was non-empty, treat this as a challenger failure (fall back
per step 5 below). A legitimate challenger pass that removes all
findings is unlikely — an empty result more likely indicates a
parsing error or context truncation.
per the immediate next step below). A legitimate challenger pass
that removes all findings is unlikely — an empty result more likely
indicates a parsing error or context truncation.
- Otherwise, replace the merged finding set with the challenger's
`adjudicated_findings`.
- Log any `removed_findings` for transparency but do not include
them in the final review.

5. If the challenger sub-agent fails (timeout, error, empty
4. If the challenger sub-agent fails (timeout, error, empty
response), fall back to using the pre-challenger merged finding
set from steps 6a–6c. Record an **info**-level finding:

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
---
name: review-challenger
description: Adversarially challenges review findings, removes false positives, deduplicates across dimensions, and produces an adjudicated finding list.
name: challenger
description: >-
Adversarially challenges review findings, removes false positives,
deduplicates across dimensions, and produces an adjudicated finding list.
model: opus
tools: Read, Grep, Glob
permissionMode: dontAsk
background: false
---

# Challenger
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
---
name: review-correctness
description: Evaluates logic correctness, edge cases, test adequacy, and test integrity.
name: correctness
description: >-
Evaluates logic correctness, edge cases, nil handling, API contracts,
test adequacy/integrity.
model: opus
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Correctness
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
---
name: review-cross-repo-contracts
description: Evaluates backward compatibility of exported interfaces and API contracts.
name: cross-repo-contracts
description: >-
Evaluates potential API contract breakage affecting other repos.
model: claude-sonnet-4-6@default
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Cross-Repo Contracts
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
---
name: review-docs-currency
description: Evaluates documentation staleness against code changes.
name: docs-currency
description: >-
Evaluates documentation staleness against code changes.
model: claude-sonnet-4-6@default
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Docs Currency
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
---
name: review-intent-coherence
description: Evaluates intent alignment, scope authorization, and architectural coherence.
name: intent-coherence
description: >-
Evaluates architectural coherence & fit, design coherence,
intent alignment, PR scope, scope authorization, and tier matching
model: claude-sonnet-4-6@default
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Intent & Coherence
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
---
name: review-security
description: Evaluates security vulnerabilities, auth/access control, data exposure, and injection defense.
name: security
description: >-
Evaluates security vulnerabilities, auth/access control, data exposure,
injection defense, privilege escalation, and content security.
model: opus
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Security
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
---
name: review-style-conventions
description: Evaluates repo-specific naming, error-handling idioms, API shape, and code organization.
name: style-conventions
description: >-
Evaluates repo-specific naming, error-handling idioms, API shape,
and code organization.
model: claude-sonnet-4-6@default
tools: Read, Grep, Glob
permissionMode: dontAsk
background: true
---

# Style & Conventions
Expand Down
Loading