Skip to content

Commit e4874d1

Browse files
committed
chore: dirty-worktree checkpoint (dspy copro optimizer + benchmarks retrieval barrel)
1 parent 5d6c422 commit e4874d1

3 files changed

Lines changed: 212 additions & 1 deletion

File tree

packages/benchmarks/lib/src/index.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,9 @@ export {
2020
resolveTier,
2121
} from "./model-tiers.ts";
2222
export * from "./local-llama-cpp.ts";
23+
export {
24+
type RetrievalStageName,
25+
type RetrievalTierDefaults,
26+
RETRIEVAL_DEFAULTS_BY_TIER,
27+
resolveRetrievalDefaults,
28+
} from "./retrieval-defaults.ts";

packages/elizaos/templates-manifest.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"version": "1.0.0",
3-
"generatedAt": "2026-05-11T17:08:56.816Z",
3+
"generatedAt": "2026-05-11T17:10:59.482Z",
44
"repoUrl": "https://github.com/elizaos/eliza",
55
"templates": [
66
{
Lines changed: 205 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,205 @@
1+
/**
2+
* DSPy-style COPRO (Coordinate ascent over Prompt instructions) optimizer.
3+
*
4+
* Loop:
5+
* for round in 1..depth:
6+
* 1. Propose N instruction variants via the teacher LM.
7+
* 2. Score each variant on a held-out subset of the dataset.
8+
* 3. Keep the highest-scoring variant as the new baseline for the next
9+
* round (greedy coordinate ascent).
10+
*
11+
* Returns the best (instructions, demonstrations=[]) pair observed. The
12+
* caller can then re-train demonstrations on top of the winning instructions
13+
* via BootstrapFewshot, which is what MIPRO does internally.
14+
*/
15+
16+
import { Predict } from "../predict.js";
17+
import type { Example } from "../examples.js";
18+
import type { LanguageModelAdapter } from "../lm-adapter.js";
19+
import { Signature } from "../signature.js";
20+
import type {
21+
DspyOptimizerInput,
22+
DspyOptimizerResult,
23+
Metric,
24+
OptimizerLineageEntry,
25+
} from "./types.js";
26+
27+
export interface DspyCoproOptions {
28+
/** Variants to propose per round. Defaults to 6. */
29+
variants?: number;
30+
/** Rounds of coordinate ascent. Defaults to 3. */
31+
depth?: number;
32+
/** Eval-set size per variant. Defaults to all examples. */
33+
evalSubset?: number;
34+
/** Teacher temperature for instruction proposal. Defaults to 0.8. */
35+
teacherTemperature?: number;
36+
/** Deterministic RNG for evalSubset sampling. Defaults to Math.random. */
37+
rng?: () => number;
38+
}
39+
40+
const PROPOSAL_SYSTEM = `You are rewriting the INSTRUCTIONS field of a task-prompt signature. The signature declares input fields, output fields, and a natural-language INSTRUCTIONS body. Your job is to produce a new INSTRUCTIONS body that makes a downstream language model perform the task more reliably.
41+
42+
Hard constraints:
43+
- Preserve the semantic contract — the new instructions must still describe the same task and reference the same output fields.
44+
- Keep the rewrite concise (no preamble, no markdown headers, no role-play framing).
45+
- Output ONLY the rewritten instructions body — no fences, no commentary.`;
46+
47+
export async function runDspyCopro(
48+
input: DspyOptimizerInput,
49+
options: DspyCoproOptions = {},
50+
): Promise<DspyOptimizerResult> {
51+
const variants = Math.max(1, options.variants ?? 6);
52+
const depth = Math.max(1, options.depth ?? 3);
53+
const teacher = input.teacher ?? input.lm;
54+
const teacherTemperature = options.teacherTemperature ?? 0.8;
55+
const rng = options.rng ?? Math.random;
56+
const lineage: OptimizerLineageEntry[] = [];
57+
58+
const heldOut =
59+
typeof options.evalSubset === "number"
60+
? subsample(input.dataset, options.evalSubset, rng)
61+
: input.dataset;
62+
63+
const baselineInstructions = input.signature.spec.instructions;
64+
const baselineScore = await scoreInstructions(
65+
baselineInstructions,
66+
input.signature,
67+
heldOut,
68+
input.lm,
69+
input.metric,
70+
);
71+
lineage.push({
72+
round: 0,
73+
variant: 0,
74+
score: baselineScore,
75+
notes: "baseline",
76+
});
77+
78+
let bestInstructions = baselineInstructions;
79+
let bestScore = baselineScore;
80+
let currentInstructions = baselineInstructions;
81+
82+
for (let round = 1; round <= depth; round += 1) {
83+
let roundBestInstructions = currentInstructions;
84+
let roundBestScore = bestScore;
85+
for (let variant = 1; variant <= variants; variant += 1) {
86+
const candidate = await proposeInstructions(
87+
teacher,
88+
input.signature,
89+
currentInstructions,
90+
teacherTemperature,
91+
);
92+
if (candidate.trim().length === 0) {
93+
lineage.push({
94+
round,
95+
variant,
96+
score: 0,
97+
notes: "empty proposal — skipped",
98+
});
99+
continue;
100+
}
101+
const score = await scoreInstructions(
102+
candidate,
103+
input.signature,
104+
heldOut,
105+
input.lm,
106+
input.metric,
107+
);
108+
lineage.push({ round, variant, score });
109+
if (score > roundBestScore) {
110+
roundBestScore = score;
111+
roundBestInstructions = candidate;
112+
}
113+
}
114+
if (roundBestScore > bestScore) {
115+
bestScore = roundBestScore;
116+
bestInstructions = roundBestInstructions;
117+
}
118+
currentInstructions = roundBestInstructions;
119+
}
120+
121+
return {
122+
optimizer: "dspy-copro",
123+
signature: new Signature({
124+
...input.signature.spec,
125+
instructions: bestInstructions,
126+
}),
127+
instructions: bestInstructions,
128+
demonstrations: [],
129+
score: bestScore,
130+
baselineScore,
131+
lineage,
132+
};
133+
}
134+
135+
async function proposeInstructions(
136+
teacher: LanguageModelAdapter,
137+
signature: import("../signature.js").Signature,
138+
current: string,
139+
temperature: number,
140+
): Promise<string> {
141+
const ioSummary = [
142+
`signature name: ${signature.spec.name}`,
143+
"input fields:",
144+
...signature.spec.inputs.map(
145+
(f) => `- ${f.name} (${f.type}): ${f.description}`,
146+
),
147+
"output fields:",
148+
...signature.spec.outputs.map(
149+
(f) => `- ${f.name} (${f.type}): ${f.description}`,
150+
),
151+
"",
152+
"Current INSTRUCTIONS body:",
153+
current,
154+
].join("\n");
155+
const result = await teacher.generate({
156+
system: PROPOSAL_SYSTEM,
157+
messages: [{ role: "user", content: ioSummary }],
158+
temperature,
159+
maxTokens: 1024,
160+
});
161+
return result.text
162+
.trim()
163+
.replace(/^```[a-z0-9_-]*\s*/i, "")
164+
.replace(/\s*```$/i, "")
165+
.trim();
166+
}
167+
168+
async function scoreInstructions(
169+
instructions: string,
170+
signature: import("../signature.js").Signature,
171+
dataset: Example[],
172+
lm: LanguageModelAdapter,
173+
metric: Metric,
174+
): Promise<number> {
175+
if (dataset.length === 0) return 0;
176+
const predict = new Predict({
177+
signature,
178+
lm,
179+
instructionsOverride: instructions,
180+
});
181+
let total = 0;
182+
for (const example of dataset) {
183+
try {
184+
const { output } = await predict.forward(example.inputs);
185+
total += metric(output, example.outputs);
186+
} catch {
187+
// Parse failure is a real failure for scoring. No silent fallback.
188+
}
189+
}
190+
return total / dataset.length;
191+
}
192+
193+
function subsample<T>(items: T[], count: number, rng: () => number): T[] {
194+
if (count >= items.length) return [...items];
195+
const indices = new Set<number>();
196+
const out: T[] = [];
197+
while (out.length < count) {
198+
const idx = Math.floor(rng() * items.length);
199+
if (indices.has(idx)) continue;
200+
indices.add(idx);
201+
const item = items[idx];
202+
if (item !== undefined) out.push(item);
203+
}
204+
return out;
205+
}

0 commit comments

Comments
 (0)