Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
576 changes: 576 additions & 0 deletions .claude/cache/agents/scout/output-20260216_114928.md

Large diffs are not rendered by default.

827 changes: 827 additions & 0 deletions .claude/cache/agents/scout/output-20260216_115215.md

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
root
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ fixtures/runner_crate/target
.snfoundry_cache

python/starklings_results/


smithers.db*
Empty file added .smithers/.gitkeep
Empty file.
32 changes: 32 additions & 0 deletions .smithers/agents.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { ClaudeCodeAgent, CodexAgent } from "smithers-orchestrator";
import { REPO_ROOT } from "./config";

const SYSTEM_PROMPT_BASE = `You are working on Cairo Coder, a RAG-based Cairo code generation service.
Stack: Python/FastAPI backend, DSPy for LLM orchestration, PostgreSQL/pgvector.
Package manager: uv (NEVER pip/poetry).
Read CLAUDE.md for full project conventions before making any changes.

CRITICAL OUTPUT REQUIREMENT:
When you have completed your work, you MUST end your response with a JSON object
wrapped in a code fence. The JSON format is specified in your task prompt.`;

export const planner = new ClaudeCodeAgent({
model: "opus",
systemPrompt: SYSTEM_PROMPT_BASE,
cwd: REPO_ROOT,
permissionMode: "default",
});

export const implementer = new CodexAgent({
model: "gpt-5.3-codex",
dangerouslyBypassApprovalsAndSandbox: true,
systemPrompt: SYSTEM_PROMPT_BASE,
cwd: REPO_ROOT,
});

export const reviewer = new ClaudeCodeAgent({
model: "opus",
systemPrompt: SYSTEM_PROMPT_BASE,
cwd: REPO_ROOT,
permissionMode: "default",
});
8 changes: 8 additions & 0 deletions .smithers/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
export const MAX_PASSES = 5;

export const REPO_ROOT = process.cwd();

export const VERIFICATION_COMMANDS = {
test: "cd python && uv run pytest -v",
lint: "trunk check --fix",
} as const;
2 changes: 2 additions & 0 deletions .smithers/preload.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
import { mdxPlugin } from "smithers-orchestrator/mdx-plugin";
mdxPlugin();
38 changes: 38 additions & 0 deletions .smithers/prompts/final-review.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Final Review — STRICT GATE — Pass {props.pass}

You are the final quality gate. REFUSE to approve unless ALL criteria are met.

## Completed Tasks

{props.completedTasks || "None"}

## Latest Review

{props.latestReview ?? "No review yet"}

## Verification

Run ALL checks:

```bash
cd python && uv run pytest -v
trunk check --fix
```

## Criteria — ALL must pass

- [ ] ALL tests pass (`uv run pytest` exits 0)
- [ ] ALL lint/type checks pass (`trunk check` exits 0)
- [ ] Implementation matches SPEC.md requirements
- [ ] No dead code left behind (old McpGenerationProgram removed if replaced)
- [ ] New code has unit tests
- [ ] DSPy patterns followed correctly
- [ ] No hardcoded paths or credentials

Set `readyToMoveOn: true` ONLY if you genuinely cannot find ANYTHING to improve.

If `readyToMoveOn: false`, explain exactly what must be fixed — this feeds into the next pass's implement step.

## REQUIRED OUTPUT

{props.schema}
41 changes: 41 additions & 0 deletions .smithers/prompts/fix.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
# Fix Review Issues — Pass {props.pass}

You are a senior Python engineer fixing code review issues in Cairo Coder.

## Task

### {props.taskName}

## Issues to Fix

{props.issues?.map((issue, i) => `${i + 1}. ${issue}`).join("\n") ?? "No issues listed"}

{props.reviewFeedback ? `## Reviewer Notes\n${props.reviewFeedback}` : ""}

## Instructions

1. Read CLAUDE.md for project conventions
2. Fix each issue listed above
3. Do NOT introduce new features — only fix the reported issues

## Verification

After fixing, run:

```bash
cd python && uv run pytest -v
trunk check --fix
```

## GIT COMMIT RULES

- Make atomic commits — one fix per commit
- Format: `🐛 fix(scope): what was fixed`
- Examples:
- `🐛 fix(dspy): add missing type hint on SkillGenerationProgram.forward()`
- `🐛 fix(pipeline): correct skill field name in MCP branch`
- `git add` the specific files changed, then `git commit`

## REQUIRED OUTPUT

{props.schema}
54 changes: 54 additions & 0 deletions .smithers/prompts/implement.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Implement — Pass {props.pass}

You are a senior Python engineer implementing a change in Cairo Coder.

## Task

### {props.taskName}

{props.implementationPrompt}

{props.previousWork
? `## Previous Work\nPrevious implementation did: ${props.previousWork.summary}\nNext smallest unit: ${props.previousWork.nextSmallestUnit}`
: ""}

{props.reviewFixes ? `## Review Fixes Applied\n${props.reviewFixes}` : ""}

## Files

- **Create:** {JSON.stringify(props.filesToCreate ?? [])}
- **Modify:** {JSON.stringify(props.filesToModify ?? [])}

## Instructions

1. Read CLAUDE.md for project conventions
2. Read the existing files before modifying them
3. Implement the SMALLEST atomic unit described above
4. Follow DSPy patterns: Signatures → Modules → Programs
5. Use type hints everywhere (enforced by mypy)
6. Use `structlog` for logging: `get_logger(__name__)`

## Verification

After implementing, run:

```bash
cd python && uv run pytest -v
trunk check --fix
```

## GIT COMMIT RULES

- Make atomic commits — one logical change per commit
- Commit EACH smallest unit of work separately, do NOT batch
- Use emoji prefixes: 🐛 fix, ♻️ refactor, 🧪 test, ⚡ perf, ✨ feat
- Format: `EMOJI type(scope): description`
- Examples:
- `✨ feat(dspy): add SkillGeneration signature`
- `🧪 test(dspy): add unit tests for SkillGenerationProgram`
- `♻️ refactor(pipeline): replace McpGenerationProgram with SkillGenerationProgram`
- `git add` the specific files changed, then `git commit` with the emoji message

## REQUIRED OUTPUT

{props.schema}
32 changes: 32 additions & 0 deletions .smithers/prompts/plan.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Plan — Pass {props.pass}

You are a senior Python/DSPy architect. Read the spec and examine the codebase to plan the next atomic unit of work.

## Context

- **Spec file:** {props.specPath}
- **Completed tasks:** {props.completedTasks || "None yet"}
- **Codebase areas:** `python/src/cairo_coder/dspy/`, `python/src/cairo_coder/core/`, `python/tests/`

{props.previousFeedback ? `## Feedback from Previous Pass\n${props.previousFeedback}` : ""}

## Instructions

1. Read CLAUDE.md for project conventions
2. Read the spec at {props.specPath}
3. Examine the codebase — especially the files mentioned in the spec
4. Identify the NEXT smallest atomic unit of work (one signature, one module, one test file). Balance properly so as not to pick tasks too small or too large: organize logically for reviewers.
5. Research what exists and what needs to change
6. Write a detailed implementation prompt

If the SPEC mentions _CRITICAL_ or _REORIENTATION_ or _STEERING_ sections, make sure to follow them; make sure those tasks are prioritized. This is what we are using to steer to the right direction.

## Constraints

- Pick ONE atomic unit, not the entire feature
- The implementation prompt must be specific enough for another agent to execute without ambiguity
- List exact file paths for files to create/modify

## REQUIRED OUTPUT

{props.schema}
44 changes: 44 additions & 0 deletions .smithers/prompts/review.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Review — Pass {props.pass}

You are a senior code reviewer for Cairo Coder, a Python/FastAPI/DSPy project.

## What Changed

**Task:** {props.taskName}
**Summary:** {props.summary}
**Files changed:** {JSON.stringify(props.filesChanged ?? [])}
**Test output:** {props.testOutput ?? "No test output provided"}

## Instructions

1. Read CLAUDE.md for project conventions
2. Read ALL changed files listed above — do not review blindly
3. Run verification checks in order:

```bash
cd python && uv run pytest -v
trunk check --fix
```

4. Check for:
- Correct DSPy patterns (Signature → Module → factory function)
- Type hints on all functions
- No hardcoded paths
- No duplicated fixtures (all shared fixtures in `tests/conftest.py`)
- Async/await for I/O operations
- Proper structlog usage

## Decision

Set `lgtm: true` ONLY if:

- ALL tests pass
- ALL lint/type checks pass
- Code follows CLAUDE.md conventions
- No security issues (OWASP top 10)

If ANY check fails, set `lgtm: false` and list every specific issue.

## REQUIRED OUTPUT

{props.schema}
55 changes: 55 additions & 0 deletions .smithers/schemas.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import { z } from "zod";

export const PlanSchema = z.object({
taskName: z.string().describe("Name of the next task to implement"),
research: z
.string()
.describe("What was discovered by examining the codebase"),
implementationPrompt: z
.string()
.describe("Detailed prompt for the implementer"),
filesToCreate: z.array(z.string()).describe("Files that need to be created"),
filesToModify: z.array(z.string()).describe("Files that need to be modified"),
nextSmallestUnit: z
.string()
.nullable()
.describe("Next smallest atomic unit after this task"),
});

export const ImplementSchema = z.object({
summary: z.string().describe("What was implemented"),
filesChanged: z
.array(z.string())
.describe("Files that were created or modified"),
testOutput: z.string().describe("Output from running pytest"),
commitMessage: z.string().describe("Git commit message for this change"),
nextSmallestUnit: z
.string()
.nullable()
.describe("Next smallest atomic unit to implement"),
});

export const ReviewSchema = z.object({
lgtm: z.boolean().describe("true ONLY if ALL checks pass"),
review: z.string().describe("Summary of the review findings"),
issues: z.array(z.string()).describe("Specific issues found"),
});

export const FixSchema = z.object({
summary: z.string().describe("What was fixed"),
filesChanged: z.array(z.string()).describe("Files that were modified"),
});

export const FinalReviewSchema = z.object({
readyToMoveOn: z.boolean().describe("true ONLY if all criteria met"),
reasoning: z
.string()
.describe("Why ready or not ready — feeds back into next pass"),
qualityScore: z.number().min(1).max(10).describe("Overall quality score"),
});

export const PassTrackerSchema = z.object({
totalIterations: z.number(),
tasksCompleted: z.array(z.string()),
summary: z.string(),
});
Binary file added .smithers/workflow.db
Binary file not shown.
Binary file added .smithers/workflow.db-shm
Binary file not shown.
Binary file added .smithers/workflow.db-wal
Binary file not shown.
Loading