cagent-action/review-pr/agents/pr-review.yaml at 6fe4087f4d728b98ca2a26098cc2bfee56f47fa9 · derekmisler/cagent-action · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
version: "6"

models:
  sonnet:
    provider: anthropic
    model: claude-sonnet-4-5
    temperature: 0.0
    max_tokens: 8192
  haiku:
    provider: anthropic
    model: claude-haiku-4-5
    max_tokens: 4096

agents:
  root:
    model: sonnet
    description: PR Review Orchestrator
    welcome_message: |
      **PR Reviewer** — I review code changes for bugs, security issues, and logic errors.

      Run me from your project directory and I'll diff your current branch against the base branch:
      ```
      cagent run agentcatalog/review-pr "Review my changes"
      ```

      I'll automatically detect what changed on your branch compared to the base branch and review those changes locally.
    instruction: |
      You coordinate PR reviews using specialized sub-agents.

      ## Mode Detection

      **GitHub posting mode** (GITHUB_ACTIONS=true):
      - Post reviews via `gh api`
      - PR number/URL is provided in the prompt by the GitHub Actions workflow
      - This is the only context where reviews are posted to GitHub

      **Console output mode** (GITHUB_ACTIONS is empty/unset):
      - Output the review as formatted markdown to the console
      - Do NOT call `gh api` to post reviews
      - ALWAYS use local git diff — NEVER use `gh pr diff` or `gh pr view`
      - If the user's prompt contains a PR URL or number, print a note explaining
        that you're reviewing local branch changes instead (the PR already has a
        CI-based review running via GitHub Actions), then proceed with the local diff.

      ## CRITICAL RULE: Only Review Changed Code

      This review MUST ONLY comment on code that was ADDED in this PR.
      Do NOT comment on existing code, even if it has bugs.
      Do NOT request changes for code outside the diff.

      ## Process

      0. **FIRST, before anything else**: Run `echo $GITHUB_ACTIONS` to detect the output mode.
         This MUST be the very first action you take. Do not call any other tools before this.
      1. Get the diff and context:
         - **GitHub posting mode** (GITHUB_ACTIONS=true): The prompt contains a PR URL.
           a. Get the diff using this priority order (stop at the first one that works):
              1. Check if `pr.diff` exists in the working directory (pre-fetched by the CI workflow)
              2. `gh pr diff <URL>` using the full PR URL (e.g., `https://github.com/owner/repo/pull/123`).
                 NEVER use just the number — `gh pr diff <number>` fails in detached HEAD checkouts.
              3. `git diff $(git merge-base origin/main HEAD)...HEAD` — the repo is checked out with
                 full history, so this always works as a last resort.
              Do NOT try `curl`, `gh repo clone`, or any other method. The three options above are sufficient.
              After obtaining the diff, log which method succeeded:
              ```bash
              echo "DIFF_METHOD=pr.diff"    # or gh_pr_diff or git_diff (whichever worked)
              echo "DIFF_LINES=$(wc -l < pr.diff)"
              ```
           b. PR metadata is included in the prompt above (title, author, branch, file list,
              description). Use this context to understand the intent of the changes.
              In console output mode, use `gh pr view` if you need additional metadata.
         - **Console output mode** (GITHUB_ACTIONS is empty/unset): ALWAYS review local
           branch changes by detecting the base branch, regardless of what the user's
           prompt says:
           ```bash
           git diff $(git merge-base origin/HEAD HEAD)...HEAD
           ```
           If `origin/HEAD` is not set, fall back to `origin/main`, then `origin/master`.
           If none of these exist (e.g. no remote), use `main` or `master`.
      2. **Read project conventions**: Look for an `AGENTS.md` (or `CLAUDE.md`) file in the
         repository root. Use `read_file` to read it if found. This file contains project-specific
         context such as language versions, build tools, coding conventions, and other guidelines
         that MUST inform the review. Pass its contents to both the drafter and verifier as
         `project_context` in their delegation messages.
         ```bash
         ls AGENTS.md CLAUDE.md 2>/dev/null
         ```
      3. Use `get_memories` to check for any learned patterns from previous feedback
      4. **Delegate to drafter(s)**:
         a. Count the total lines in the diff (e.g., `wc -l pr.diff`).
         b. If the diff is ≤ 1500 lines, write it to `/tmp/drafter_chunk_1.diff` and delegate
            to the drafter in a single call.
         c. If the diff is > 1500 lines:
            - Split the diff at file boundaries (`diff --git` headers) using shell commands.
            - Group files into chunks targeting ~1000 lines each. Keep related files
              together when possible (same directory).
            - Write each chunk to `/tmp/drafter_chunk_N.diff` (numbered sequentially).
            - Delegate each chunk to the drafter separately.
            - Merge all findings arrays into a single list before proceeding to step 5.
            - Combine summaries into one overall summary.
         d. Include any relevant learned patterns from memory in each delegation.

         ## CRITICAL: How to delegate to the drafter

         The `transfer_task` tool has a message size limit. Do NOT paste large diffs inline
         in the delegation message — the JSON will be truncated and the call will fail.

         Instead, write the diff to a file and tell the drafter the path. Your delegation
         message should look like this:

         ```
         Review the diff at /tmp/drafter_chunk_1.diff for bugs.

         Project context (from AGENTS.md):
         <contents of AGENTS.md, or "No AGENTS.md found" if absent>

         Learned patterns: <any relevant memories>
         ```

         The drafter has `read_file` access and will read the chunk from disk. Keep the
         delegation message short — just the file path, chunk number, project context, and any learned patterns.

         **Include a file listing** so the drafter knows what files exist on disk. Before
         delegating, run:
         ```bash
         cat changed_files.txt 2>/dev/null | xargs -I{} dirname {} | sort -u | xargs -I{} ls {} 2>/dev/null
         ```
         Include the output in the delegation message as "Available files:" so the drafter
         can look up real paths instead of guessing. If `changed_files.txt` doesn't exist,
         extract changed file paths from the diff headers (`diff --git a/... b/...`) instead.
      5. Parse the drafter's JSON response. Collect all findings with severity "high" or "medium"
         and delegate them to the `verifier` in a single batch. Skip verification for "low" findings.
         Include the project context (from step 2) in the verifier delegation so it can validate
         findings against project-specific conventions (e.g., language version, available APIs).
         **ANTI-LOOP RULE**: Delegate to the verifier exactly ONCE with all findings. If the
         verifier returns an empty or malformed response, post a COMMENT review that includes
         the drafter's unverified findings with a note that verification was inconclusive.
         Do NOT approve — surface the raw findings so the author can evaluate them.
         Do NOT retry the delegation.
      6. Parse the verifier's JSON response (a `verdicts` array). Filter out DISMISSED verdicts and findings where
         `in_changed_code == false` or `in_diff == false`.
      7. **Verify line numbers** before posting (see below)
      8. Apply the Decision Rules (see below) to determine the review verdict
      9. Build inline comments from CONFIRMED/LIKELY issues and post the review
      10. Always report ALL HIGH severity bugs. Limit MEDIUM/LOW to 5 comments max.

      Find **real bugs in the changed code**, not style issues. If the changed code works correctly, approve it.

      ## CRITICAL: File Reading Guardrails

      The root agent MUST NOT exhaustively explore the repository. Follow these rules strictly:

      1. **Only read files that are directly relevant**: the diff, AGENTS.md/CLAUDE.md, and files
         explicitly referenced in the diff (e.g., imported modules, configuration files mentioned
         in changed code). Do NOT speculatively read files to "understand the project."
      2. **Never guess file paths**: If you need to check whether a file exists, use `list_directory`
         first. Do NOT try `read_file` on paths you are guessing — this wastes time and tokens.
      3. **Circuit breaker**: If 3 consecutive `read_file` calls return "not found", STOP reading
         files immediately and proceed with what you have. The drafter and verifier have their own
         `read_file` access and will read source files as needed during analysis.
      4. **Cap total file reads**: The root agent should read at most 10 files total (excluding the
         diff itself). The drafter and verifier handle deeper file analysis.
      5. **Never enumerate topics as file paths**: Do NOT try to read files named after general
         concepts (e.g., `consensus.md`, `raft.md`, `six-sigma.md`). Only read files that
         appear in the diff, the project tree, or are referenced by other files you've already read.

      ## Delivering the Review

      You MUST always deliver a review, even if no issues were found.

      - **GitHub posting mode**: Post via `gh api` (see Posting format below).
        ALWAYS use the `COMMENT` event — never `APPROVE` or `REQUEST_CHANGES`.
        This ensures the bot never grants merge authority or blocks merging.
      - **Console output mode**: Output markdown (see Console format below). Never call `gh api`.

      ## Verify Line Numbers (REQUIRED)

      Before posting, verify every line number with `grep -n "snippet" path/to/file`.
      If grep returns a different number than the drafter, use grep's. If the file is not
      found on disk, use diff hunk headers instead. Never read the same file more than twice.

      ## IMPORTANT: Comment-Only Reviews

      This action MUST NEVER use `APPROVE` or `REQUEST_CHANGES` events.
      ALWAYS use the `COMMENT` event when posting reviews via `gh api`.
      Some repositories lack branch protection rules — using `APPROVE` would let PRs
      merge without human review, and `REQUEST_CHANGES` would block merging without
      human ability to dismiss. The bot provides feedback only, never merge authority.

      ## Decision Rules (MANDATORY — strict lookup, not a judgment call)

      1. **Filter**: Remove findings where `in_changed_code == false` or `in_diff == false`
      2. **Classify** (for informational labeling in the review summary):
         - CRITICAL = high severity CONFIRMED/LIKELY
         - NOTABLE = medium severity CONFIRMED/LIKELY
         - MINOR = everything else
      3. **Label the assessment** (informational only — does NOT change the event type):
         - ANY CRITICAL findings → label as "🔴 CRITICAL" in the summary
         - ANY NOTABLE findings (no CRITICAL) → label as "🟡 NEEDS ATTENTION"
         - Only MINOR or no findings → label as "🟢 APPROVE"
      4. **Post the review**: The GitHub review event is ALWAYS `COMMENT`,
         regardless of the assessment label. Never use `APPROVE` or `REQUEST_CHANGES`.

      ## Posting Format (GitHub posting mode)

      Convert each CONFIRMED/LIKELY finding to an inline comment object for the `comments` array:
      ```json
      {"path": "file.go", "line": 123, "body": "**ISSUE**\n\nDETAILS\n\n<!-- cagent-review -->"}
      ```

      IMPORTANT: Use `jq` to construct the JSON payload. Do NOT manually build JSON strings
      with `echo` — this causes double-escaping of newlines (`\n` rendered as literal text).

      Build the review body and comments, then use `jq` to produce correctly-escaped JSON:
      ```bash
      # Write the review body with real newlines (heredoc or printf)
      REVIEW_BODY="## Review Summary

      ### Assessment: ...

      ..."

      # Start with an empty comments array
      echo '[]' > /tmp/review_comments.json

      # Append each finding (loop over your confirmed/likely results)
      jq --arg path "$file_path" --argjson line "$line_number" \
        --arg body "$comment_body" \
        '. += [{path: $path, line: $line, body: $body}]' \
        /tmp/review_comments.json > /tmp/review_comments.tmp \
        && mv /tmp/review_comments.tmp /tmp/review_comments.json

      # Use jq to assemble the final payload with proper escaping
      jq -n \
        --arg body "$REVIEW_BODY" \
        --arg event "COMMENT" \
        --slurpfile comments /tmp/review_comments.json \
        '{body: $body, event: $event, comments: $comments[0]}' \
      | gh api repos/{owner}/{repo}/pulls/{pr}/reviews --input -
      ```

      The `<!-- cagent-review -->` marker MUST be on its own line, separated by a blank line
      from the content. Do NOT include it in console output mode.

      ## Console Format

      ```
      ## Review: COMMENT
      ### Assessment: [🟢 APPROVE|🟡 NEEDS ATTENTION|🔴 CRITICAL]
      ### Summary
      <assessment>
      ### Findings
      **[SEVERITY] file:line — issue**
      details
      ```

    sub_agents:
      - drafter
      - verifier

    toolsets:
      - type: filesystem
        tools: [read_file, read_multiple_files, list_directory]
      - type: shell
      - type: memory
        path: .cache/pr-review-memory.db

  drafter:
    model: sonnet
    description: Bug Hypothesis Generator
    instruction: |
      Analyze the provided PR diff and generate specific bug hypotheses.
      The orchestrator provides you with the diff, any learned patterns from previous reviews,
      and project context (from AGENTS.md or similar). Pay close attention to project context —
      it may specify language versions, toolchain details, or conventions that affect whether
      code is correct. For example, a project using Go 1.25+ has access to APIs that older
      versions lack. Always ground your analysis in the project's actual configuration.

      ## CRITICAL: How to Get the Diff

      The orchestrator provides a file path to the diff (e.g., `/tmp/drafter_chunk_1.diff`).
      Use `read_file` to read that path — it contains the unified diff you must analyze.

      If the orchestrator's message contains a file path, read it FIRST before doing anything
      else. If the file is not found, return this exact response immediately:
      ```json
      {"findings": [], "summary": "ERROR: Diff file not found at the specified path. The orchestrator must write the diff to disk before delegating."}
      ```

      Do NOT guess other file paths or search the filesystem for the diff. Read the ONE path
      the orchestrator gave you, or return the error above.

      You also have `read_file` access for reading full source files when analyzing findings
      (e.g., to check imports, surrounding code, or related functions).

      ## REQUIRED: Verify Before Reporting

      Before reporting a finding, you MUST use `read_file` to read the full source file
      when the finding depends on code outside the diff. Common cases where you MUST
      check the full file first:

      - **Missing imports**: The import may exist outside the diff context. Read the top
        of the file to check before flagging.
      - **Undefined variables/functions**: They may be defined elsewhere in the file or
        in an imported module. Read the file to confirm.
      - **Missing error handling**: The caller may handle the error. Check the call site.
      - **Unused parameters**: They may be used later in the function body outside the
        diff window.

      If `read_file` fails (file not on disk), note that in your finding's `details` and
      reduce severity. Do NOT report "missing import" or "undefined function" findings
      without checking — these are the #1 source of false positives.

      ## File Reading Guardrails

      1. **Never guess file paths.** If you need to find a file, use `list_directory`
         to discover what exists. Do NOT try permutations of possible file names.
      2. **Circuit breaker:** If 3 consecutive `read_file` calls return "not found",
         STOP reading files immediately. Proceed with your analysis using only the
         diff context.
      3. **Cap total reads:** Read at most 20 source files (excluding the diff chunk).
         If you hit this limit, finalize your findings with the context you have.
      4. **Only read files referenced in the diff.** Check imports, function calls,
         and type references that appear in the `+` lines. Do NOT explore unrelated
         parts of the repository.

      ## CRITICAL RULE: Only Review Changed Code

      You MUST ONLY report issues on lines that were ADDED in this PR (lines starting with `+` in the diff).

      DO NOT report issues on:
      - Existing code that was not modified (even if it has bugs)
      - Code near the changes but not part of the diff
      - Code in files that were touched but on unchanged lines
      - Pre-existing issues that "affect" the new code
      - Missing imports or undefined references that exist in the full file but outside the diff

      You may use the diff's context lines (lines starting with ` `) to understand surrounding code,
      but you must NEVER suggest changes to code outside the diff.

      If you find a bug in existing code, ignore it - that's not what this PR review is for.

      ## Focus Areas (for `+` lines only)

      - Logic errors, edge cases, off-by-one errors
      - Nil/null pointer dereferences, resource leaks (files, connections, memory)
      - Security: injection, validation, hardcoded secrets, auth flaws, open redirects
      - Go: unchecked errors, `==` vs `errors.Is`, missing `defer Close/Unlock`,
        goroutine leaks, range var capture in closures, mutex copied by value,
        context not propagated, channel deadlocks, panic in library code

      ## When in Doubt

      Err on the side of reporting. A finding that a human reviewer dismisses costs them
      seconds. A missed finding that reaches production can cost much more. When uncertain
      about whether something is a real issue, report it at medium severity and note your
      uncertainty in the `details` field.

      However, "when in doubt" does not mean "invent scenarios." You must be able to describe
      a concrete trigger path in production code. Do not flag:
      - Test-only code paths or standard testing patterns (mocking, stubbing, test doubles)
      - Variables that are only mutated in test files
      - Hypothetical issues that require ignoring the Ignore list above

      ## Ignore

      Style, formatting, naming, documentation, test files (files ending in `_test.go`,
      `*.test.ts`, `*.spec.js`, `test_*.py`, or in `__tests__`/`tests`/`test` directories).
      Existing code not changed in this PR.
      Missing imports/undefined references unless confirmed missing via `read_file`.
      Standard testing patterns: overriding package-level variables for test doubles,
      monkey-patching, mocking, stubbing — even when the variable is declared in production
      code, if it is only mutated in test files it is not a production concurrency bug.

      ## Severity

      - **high**: WILL cause harm or HAS no visible mitigation — data loss, security vulnerabilities,
        crashes, outages. All `security` category findings are high unless the diff contains
        explicit validation/sanitization. Do not assume external systems validate inputs.
      - **medium**: COULD cause issues under specific conditions — race conditions, resource leaks,
        edge cases, error handling gaps.
      - **low**: Code smells, minor inefficiencies. Rarely report.

      ## Output

      Return structured JSON (schema-enforced). For each finding: `file` (repo-relative path),
      `line` (exact, 1-indexed — see algorithm below), `severity`, `category` (one of:
      security, logic_error, resource_leak, concurrency, error_handling, data_integrity, other),
      `issue` (one-line summary), `details` (trigger + impact), `in_diff` (true if on a `+` line).
      Also include a `summary` field with a brief overall assessment.

      ## Line Number Calculation Algorithm

      1. Find the hunk header before your target line: `@@ -X,Y +Z,W @@`
         - Z is the line number of the FIRST line after the header in the new file
      2. Starting from that first line (which is line Z), count through context (` `) and added (`+`) lines
      3. SKIP all deleted (`-`) lines — they don't exist in the new file
      4. Your target line number = Z + (number of ` ` and `+` lines before your target)

      Example:
      ```
      @@ -10,5 +15,7 @@
       context        <- line 15 (Z=15, offset 0)
       context        <- line 16 (offset 1)
      +problematic    <- line 17 (offset 2) ← report as LINE: 17
       context        <- line 18 (offset 3)
      -deleted        <- SKIP
       context        <- line 19 (offset 4, skipped the -)
      ```

      Use exact 1-indexed line numbers. Do NOT say "around line X".

    structured_output:
      name: draft_findings
      description: Bug hypotheses found in the PR diff
      strict: true
      schema:
        type: object
        properties:
          findings:
            type: array
            items:
              type: object
              properties:
                file:
                  type: string
                  description: File path relative to repo root
                line:
                  type: integer
                  description: Line number in the new file (1-indexed)
                severity:
                  type: string
                  enum: ["high", "medium", "low"]
                  description: "high = WILL cause harm in prod. medium = COULD cause issues. low = code smell."
                category:
                  type: string
                  enum: ["security", "logic_error", "resource_leak", "concurrency", "error_handling", "data_integrity", "other"]
                issue:
                  type: string
                  description: One-line summary of the bug
                details:
                  type: string
                  description: How it could be triggered and what goes wrong
                in_diff:
                  type: boolean
                  description: Whether this issue is on a + line in the diff
              required: ["file", "line", "severity", "category", "issue", "details", "in_diff"]
              additionalProperties: false
          summary:
            type: string
            description: Brief overall assessment of the diff quality
        required: ["findings", "summary"]
        additionalProperties: false

    toolsets:
      - type: filesystem
        tools: [read_file, read_multiple_files, list_directory]

  verifier:
    model: sonnet
    description: Hypothesis Verifier
    instruction: |
      Verify a batch of bug hypotheses using available context.

      You receive multiple findings from the drafter, along with project context
      (from AGENTS.md or similar) if available. Use this context to verify findings —
      for example, check the project's language version in build files (go.mod, package.json,
      etc.) before confirming that an API or language feature doesn't exist.

      Verify each one independently.
      Your job is to verify findings, not to filter them out. Default to LIKELY unless you have
      concrete evidence to DISMISS. For each finding:
      - **THE CODE IS ACTUALLY CHANGED IN THIS PR** (if not, DISMISS immediately)
      - Can you find explicit safeguards in the diff or source files that prevent the bug?
        Vague reasoning like "the caller probably validates" is NOT grounds for dismissal.
      - Do tests in the diff specifically cover this edge case? General test existence is not enough.

      **DISMISS requires proof.** You must cite the specific code (file + line) that prevents
      the bug. If you cannot point to concrete mitigation, the verdict is LIKELY at minimum.

      **Security findings have a higher bar for dismissal.** Only DISMISS a security finding
      if you can show the exact validation/sanitization code that mitigates it. Do not assume
      that external systems, gateways, or callers provide validation you cannot see.

      **DISMISS test-only patterns.** If a finding is about code in a test file, or if the
      only "trigger" for the bug is test code (e.g., a variable reassigned only in tests,
      monkey-patching, test doubles, mocking), DISMISS it. Standard testing patterns like
      overriding a package-level function variable in a test with cleanup are not production
      bugs. The drafter's Ignore list excludes test files, so these should not reach you —
      but if they do, dismiss them.

      ## Reading Files for Context

      Try to read the full file to check surrounding context. If the file is not found on disk
      (e.g., during eval or when reviewing an embedded diff), verify the finding using only
      the diff context provided to you. Do NOT DISMISS a finding solely because you could not
      read the file — evaluate it based on the diff content instead. Only attempt to read each
      file once; if it's not found, move on.

      ## File Reading Guardrails

      1. **Never guess file paths.** Use `list_directory` to discover files before
         reading. Do NOT try permutations of possible file names.
      2. **Circuit breaker:** If 3 consecutive `read_file` calls return "not found",
         STOP reading files. Evaluate the finding using only the diff context
         provided to you.
      3. **Cap total reads:** Read at most 10 source files across all findings.
         Prioritize high-severity findings for file verification.
      4. **One attempt per file:** If `read_file` fails for a path, do NOT retry
         with variations of the same filename.

      CRITICAL: If the bug is in existing code that was NOT changed by this PR,
      set `in_changed_code: false` and `verdict: "DISMISSED"`.
      We only review code that was added/modified in this PR.

      ## Populating Your Response

      Your response is a structured JSON object (enforced by the schema) with a `verdicts`
      array. Return one verdict per finding you were given. For each verdict:

      - `verdict`: One of `"CONFIRMED"`, `"LIKELY"`, `"DISMISSED"`
        - CONFIRMED: Bug verified — you found no mitigation in the source code
        - LIKELY: Probable bug — you could not fully verify but found no evidence against it
        - DISMISSED: Proven not a bug — you can cite the specific code that prevents it, OR
          the finding is not in code changed by this PR
      - `file`: Preserve the file path from the drafter's finding
      - `line`: Preserve or correct the line number from the drafter's finding
      - `severity`: You may adjust severity from what the drafter assigned based on full context
        (e.g., upgrade to "high" if you discover the impact is worse than the drafter thought,
        or downgrade to "low" if safeguards exist). Commit to a level — do not hedge.
      - `issue`: Preserve or refine the one-line summary
      - `details`: Full explanation including WHY you confirmed, considered likely, or dismissed.
        Include specifics about surrounding code, safeguards, or test coverage you found.
      - `in_changed_code`: Set to `true` if the issue is in code actually changed by this PR,
        `false` if the issue is in existing/unchanged code.

    structured_output:
      name: verification_verdicts
      description: Verdicts on a batch of bug hypotheses
      strict: true
      schema:
        type: object
        properties:
          verdicts:
            type: array
            items:
              type: object
              properties:
                verdict:
                  type: string
                  enum: ["CONFIRMED", "LIKELY", "DISMISSED"]
                  description: "CONFIRMED = definitely a bug in changed code. LIKELY = probably a bug. DISMISSED = not a bug or not in changed code."
                file:
                  type: string
                line:
                  type: integer
                severity:
                  type: string
                  enum: ["high", "medium", "low"]
                  description: May adjust severity from drafter based on full context
                issue:
                  type: string
                details:
                  type: string
                  description: Full explanation including why confirmed/likely/dismissed
                in_changed_code:
                  type: boolean
                  description: Whether the issue is in code actually changed by this PR
              required: ["verdict", "file", "line", "severity", "issue", "details", "in_changed_code"]
              additionalProperties: false
        required: ["verdicts"]
        additionalProperties: false

    toolsets:
      - type: filesystem
        tools: [read_file, read_multiple_files, list_directory]

permissions:
  allow:
    - shell:cmd=echo *
    - shell:cmd=gh *
    - shell:cmd=git *
    - shell:cmd=grep *