forked from docker/cagent-action
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathlarge-diff-chunking-1.json
More file actions
26 lines (26 loc) · 5.32 KB
/
large-diff-chunking-1.json
File metadata and controls
26 lines (26 loc) · 5.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
{
"id": "b2c3d4e5-large-chunk-test-001",
"title": "Large diff chunking - agent delegates pre-split chunks (run 1)",
"evals": {
"setup": "apk add --no-cache github-cli && mkdir -p /tmp && python3 -c \"\nimport json, os\n\n# Generate a large diff (>1500 lines) across multiple files\nfiles = {\n 'pkg/api/handlers.go': 400,\n 'pkg/api/middleware.go': 350,\n 'pkg/storage/db.go': 400,\n 'pkg/config/loader.go': 200,\n 'internal/worker/processor.go': 300\n}\n\ndiff_lines = []\nfor filepath, num_lines in files.items():\n diff_lines.append(f'diff --git a/{filepath} b/{filepath}')\n diff_lines.append(f'index abc1234..def5678 100644')\n diff_lines.append(f'--- a/{filepath}')\n diff_lines.append(f'+++ b/{filepath}')\n diff_lines.append(f'@@ -1,5 +1,{num_lines} @@')\n diff_lines.append(' package ' + filepath.split('/')[-2])\n diff_lines.append(' ')\n diff_lines.append(' import (')\n diff_lines.append('+\\t\\\"database/sql\\\"')\n diff_lines.append('+\\t\\\"fmt\\\"')\n diff_lines.append('+\\t\\\"net/http\\\"')\n diff_lines.append(' )')\n diff_lines.append(' ')\n\n if 'db.go' in filepath:\n # Include a real SQL injection bug\n for i in range(num_lines - 10):\n if i == 50:\n diff_lines.append('+// GetUser fetches a user by ID from the database.')\n diff_lines.append('+func GetUser(db *sql.DB, userID string) (*User, error) {')\n diff_lines.append('+\\tquery := fmt.Sprintf(\\\"SELECT id, name, email FROM users WHERE id = \\'%s\\'\\\" , userID)')\n diff_lines.append('+\\trow := db.QueryRow(query)')\n diff_lines.append('+\\tvar u User')\n diff_lines.append('+\\tif err := row.Scan(&u.ID, &u.Name, &u.Email); err != nil {')\n diff_lines.append('+\\t\\treturn nil, fmt.Errorf(\\\"scan user: %w\\\", err)')\n diff_lines.append('+\\t}')\n diff_lines.append('+\\treturn &u, nil')\n diff_lines.append('+}')\n else:\n diff_lines.append(f'+// line {i} of {filepath}')\n else:\n for i in range(num_lines - 10):\n diff_lines.append(f'+// line {i} of {filepath}')\n\nfull_diff = '\\n'.join(diff_lines) + '\\n'\nwith open('pr.diff', 'w') as f:\n f.write(full_diff)\n\nprint(f'Total diff lines: {len(diff_lines)}')\n\n# Split into chunks at file boundaries targeting ~1000 lines\nchunks = []\ncurrent_chunk = []\ncurrent_lines = 0\nfor line in diff_lines:\n if line.startswith('diff --git') and current_lines > 1000:\n chunks.append('\\n'.join(current_chunk) + '\\n')\n current_chunk = []\n current_lines = 0\n current_chunk.append(line)\n current_lines += 1\nif current_chunk:\n chunks.append('\\n'.join(current_chunk) + '\\n')\n\nfor i, chunk in enumerate(chunks, 1):\n with open(f'/tmp/drafter_chunk_{i}.diff', 'w') as f:\n f.write(chunk)\n print(f'Chunk {i}: {chunk.count(chr(10))} lines')\n\n# Create supporting files\nwith open('changed_files.txt', 'w') as f:\n for fp in files:\n f.write(fp + '\\n')\n\nwith open('pr_metadata.json', 'w') as f:\n json.dump({\n 'title': 'Add API handlers, storage layer, config loader, and worker',\n 'author': {'login': 'testuser'},\n 'body': 'Large feature PR adding multiple packages.',\n 'baseRefName': 'main',\n 'headRefName': 'feature/big-feature'\n }, f)\n\nprint('Setup complete')\n\"",
"relevance": [
"The agent ran 'echo $GITHUB_ACTIONS' before performing the review to detect the output mode",
"The agent output the review to the console as formatted markdown instead of posting via gh api",
"The agent detected and used pre-split chunk files at /tmp/drafter_chunk_*.diff for delegation rather than splitting the diff itself",
"The drafter response is valid JSON containing a 'findings' array and a 'summary' field",
"At least one finding flags the SQL injection vulnerability in pkg/storage/db.go where user input is interpolated into a SQL query via fmt.Sprintf",
"The SQL injection finding has severity 'high' because unsanitized user input in SQL queries is a critical security vulnerability"
]
},
"messages": [
{
"message": {
"agentName": "",
"message": {
"role": "user",
"content": "Review the following PR.\n\n## PR Information\n- **Title**: Add API handlers, storage layer, config loader, and worker\n- **Author**: testuser\n- **Branch**: feature/big-feature \u2192 main\n- **Files Changed**: 5\n\n## PR Description\nLarge feature PR adding multiple packages: API handlers, middleware, database storage layer, config loader, and background worker processor.\n\n## Changed Files\n\npkg/api/handlers.go\npkg/api/middleware.go\npkg/storage/db.go\npkg/config/loader.go\ninternal/worker/processor.go\n\n---\n\n## Instructions\n\nExecute the review pipeline:\n\n1. **Gather**: Read the pre-fetched `pr.diff` file. If missing, run `gh pr diff` (use the full URL, not just the number)\n2. **Draft**: Delegate to `drafter` agent to generate bug hypotheses\n3. **Verify**: For each hypothesis, delegate to `verifier` agent\n4. **Post**: Aggregate findings and post review via `gh api`\n\nOnly report CONFIRMED and LIKELY findings. Always post as COMMENT (never APPROVE or REQUEST_CHANGES)."
}
}
}
]
}