Skip to content

Commit dcd7bfd

Browse files
committed
dev: setup smithers workflow
1 parent 02e5da1 commit dcd7bfd

20 files changed

Lines changed: 795 additions & 430 deletions

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,6 @@ fixtures/runner_crate/target
5151
.snfoundry_cache
5252

5353
python/starklings_results/
54+
55+
56+
smithers.db*

.smithers/agents.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import { ClaudeCodeAgent, CodexAgent } from "smithers-orchestrator";
2+
import { REPO_ROOT } from "./config";
3+
4+
const SYSTEM_PROMPT_BASE = `You are working on Cairo Coder, a RAG-based Cairo code generation service.
5+
Stack: Python/FastAPI backend, DSPy for LLM orchestration, PostgreSQL/pgvector.
6+
Package manager: uv (NEVER pip/poetry).
7+
Read CLAUDE.md for full project conventions before making any changes.
8+
9+
CRITICAL OUTPUT REQUIREMENT:
10+
When you have completed your work, you MUST end your response with a JSON object
11+
wrapped in a code fence. The JSON format is specified in your task prompt.`;
12+
13+
export const planner = new ClaudeCodeAgent({
14+
model: "opus",
15+
systemPrompt: SYSTEM_PROMPT_BASE,
16+
cwd: REPO_ROOT,
17+
permissionMode: "default",
18+
});
19+
20+
export const implementer = new CodexAgent({
21+
model: "gpt-5.3-codex",
22+
dangerouslyBypassApprovalsAndSandbox: true,
23+
systemPrompt: SYSTEM_PROMPT_BASE,
24+
cwd: REPO_ROOT,
25+
});
26+
27+
export const reviewer = new ClaudeCodeAgent({
28+
model: "opus",
29+
systemPrompt: SYSTEM_PROMPT_BASE,
30+
cwd: REPO_ROOT,
31+
permissionMode: "default",
32+
});

.smithers/config.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
export const MAX_PASSES = 5;
2+
3+
export const REPO_ROOT = process.cwd();
4+
5+
export const VERIFICATION_COMMANDS = {
6+
test: "cd python && uv run pytest -v",
7+
lint: "trunk check --fix",
8+
} as const;

.smithers/preload.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
import { mdxPlugin } from "smithers-orchestrator/mdx-plugin";
2+
mdxPlugin();

.smithers/prompts/final-review.mdx

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# Final Review — STRICT GATE — Pass {props.pass}
2+
3+
You are the final quality gate. REFUSE to approve unless ALL criteria are met.
4+
5+
## Completed Tasks
6+
7+
{props.completedTasks || "None"}
8+
9+
## Latest Review
10+
11+
{props.latestReview ?? "No review yet"}
12+
13+
## Verification
14+
15+
Run ALL checks:
16+
17+
```bash
18+
cd python && uv run pytest -v
19+
trunk check --fix
20+
```
21+
22+
## Criteria — ALL must pass
23+
24+
- [ ] ALL tests pass (`uv run pytest` exits 0)
25+
- [ ] ALL lint/type checks pass (`trunk check` exits 0)
26+
- [ ] Implementation matches SPEC.md requirements
27+
- [ ] No dead code left behind (old McpGenerationProgram removed if replaced)
28+
- [ ] New code has unit tests
29+
- [ ] DSPy patterns followed correctly
30+
- [ ] No hardcoded paths or credentials
31+
32+
Set `readyToMoveOn: true` ONLY if you genuinely cannot find ANYTHING to improve.
33+
34+
If `readyToMoveOn: false`, explain exactly what must be fixed — this feeds into the next pass's implement step.
35+
36+
## REQUIRED OUTPUT
37+
38+
{props.schema}

.smithers/prompts/plan.mdx

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Plan — Pass {props.pass}
2+
3+
You are a senior Python/DSPy architect. Read the spec and examine the codebase to plan the next atomic unit of work.
4+
5+
## Context
6+
7+
- **Spec file:** {props.specPath}
8+
- **Completed tasks:** {props.completedTasks || "None yet"}
9+
- **Codebase areas:** `python/src/cairo_coder/dspy/`, `python/src/cairo_coder/core/`, `python/tests/`
10+
11+
{props.previousFeedback ? `## Feedback from Previous Pass\n${props.previousFeedback}` : ""}
12+
13+
## Instructions
14+
15+
1. Read CLAUDE.md for project conventions
16+
2. Read the spec at {props.specPath}
17+
3. Examine the codebase — especially the files mentioned in the spec
18+
4. Identify the NEXT smallest atomic unit of work (one signature, one module, one test file). Balance properly so as not to pick tasks too small or too large: organize logically for reviewers.
19+
5. Research what exists and what needs to change
20+
6. Write a detailed implementation prompt
21+
22+
If the SPEC mentions _CRITICAL_ or _REORIENTATION_ or _STEERING_ sections, make sure to follow them; make sure those tasks are prioritized. This is what we are using to steer to the right direction.
23+
24+
## Constraints
25+
26+
- Pick ONE atomic unit, not the entire feature
27+
- The implementation prompt must be specific enough for another agent to execute without ambiguity
28+
- List exact file paths for files to create/modify
29+
30+
## REQUIRED OUTPUT
31+
32+
{props.schema}

.smithers/prompts/review.mdx

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
# Review — Pass {props.pass}
2+
3+
You are a senior code reviewer for Cairo Coder, a Python/FastAPI/DSPy project.
4+
5+
## What Changed
6+
7+
**Task:** {props.taskName}
8+
**Summary:** {props.summary}
9+
**Files changed:** {JSON.stringify(props.filesChanged ?? [])}
10+
**Test output:** {props.testOutput ?? "No test output provided"}
11+
12+
## Instructions
13+
14+
1. Read CLAUDE.md for project conventions
15+
2. Read ALL changed files listed above — do not review blindly
16+
3. Run verification checks in order:
17+
18+
```bash
19+
cd python && uv run pytest -v
20+
trunk check --fix
21+
```
22+
23+
4. Check for:
24+
- Correct DSPy patterns (Signature → Module → factory function)
25+
- Type hints on all functions
26+
- No hardcoded paths
27+
- No duplicated fixtures (all shared fixtures in `tests/conftest.py`)
28+
- Async/await for I/O operations
29+
- Proper structlog usage
30+
31+
## Decision
32+
33+
Set `lgtm: true` ONLY if:
34+
35+
- ALL tests pass
36+
- ALL lint/type checks pass
37+
- Code follows CLAUDE.md conventions
38+
- No security issues (OWASP top 10)
39+
40+
If ANY check fails, set `lgtm: false` and list every specific issue.
41+
42+
## REQUIRED OUTPUT
43+
44+
{props.schema}

.smithers/schemas.ts

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import { z } from "zod";
2+
3+
export const PlanSchema = z.object({
4+
taskName: z.string().describe("Name of the next task to implement"),
5+
research: z
6+
.string()
7+
.describe("What was discovered by examining the codebase"),
8+
implementationPrompt: z
9+
.string()
10+
.describe("Detailed prompt for the implementer"),
11+
filesToCreate: z.array(z.string()).describe("Files that need to be created"),
12+
filesToModify: z.array(z.string()).describe("Files that need to be modified"),
13+
nextSmallestUnit: z
14+
.string()
15+
.nullable()
16+
.describe("Next smallest atomic unit after this task"),
17+
});
18+
19+
export const ImplementSchema = z.object({
20+
summary: z.string().describe("What was implemented"),
21+
filesChanged: z
22+
.array(z.string())
23+
.describe("Files that were created or modified"),
24+
testOutput: z.string().describe("Output from running pytest"),
25+
commitMessage: z.string().describe("Git commit message for this change"),
26+
nextSmallestUnit: z
27+
.string()
28+
.nullable()
29+
.describe("Next smallest atomic unit to implement"),
30+
});
31+
32+
export const ReviewSchema = z.object({
33+
lgtm: z.boolean().describe("true ONLY if ALL checks pass"),
34+
review: z.string().describe("Summary of the review findings"),
35+
issues: z.array(z.string()).describe("Specific issues found"),
36+
});
37+
38+
export const FixSchema = z.object({
39+
summary: z.string().describe("What was fixed"),
40+
filesChanged: z.array(z.string()).describe("Files that were modified"),
41+
});
42+
43+
export const FinalReviewSchema = z.object({
44+
readyToMoveOn: z.boolean().describe("true ONLY if all criteria met"),
45+
reasoning: z
46+
.string()
47+
.describe("Why ready or not ready — feeds back into next pass"),
48+
qualityScore: z.number().min(1).max(10).describe("Overall quality score"),
49+
});
50+
51+
export const PassTrackerSchema = z.object({
52+
totalIterations: z.number(),
53+
tasksCompleted: z.array(z.string()),
54+
summary: z.string(),
55+
});

.smithers/workflow.db

-172 KB
Binary file not shown.

.smithers/workflow.db-shm

32 KB
Binary file not shown.

0 commit comments

Comments
 (0)