|
| 1 | +/** |
| 2 | + * DSPy-style COPRO (Coordinate ascent over Prompt instructions) optimizer. |
| 3 | + * |
| 4 | + * Loop: |
| 5 | + * for round in 1..depth: |
| 6 | + * 1. Propose N instruction variants via the teacher LM. |
| 7 | + * 2. Score each variant on a held-out subset of the dataset. |
| 8 | + * 3. Keep the highest-scoring variant as the new baseline for the next |
| 9 | + * round (greedy coordinate ascent). |
| 10 | + * |
| 11 | + * Returns the best (instructions, demonstrations=[]) pair observed. The |
| 12 | + * caller can then re-train demonstrations on top of the winning instructions |
| 13 | + * via BootstrapFewshot, which is what MIPRO does internally. |
| 14 | + */ |
| 15 | + |
| 16 | +import { Predict } from "../predict.js"; |
| 17 | +import type { Example } from "../examples.js"; |
| 18 | +import type { LanguageModelAdapter } from "../lm-adapter.js"; |
| 19 | +import { Signature } from "../signature.js"; |
| 20 | +import type { |
| 21 | + DspyOptimizerInput, |
| 22 | + DspyOptimizerResult, |
| 23 | + Metric, |
| 24 | + OptimizerLineageEntry, |
| 25 | +} from "./types.js"; |
| 26 | + |
| 27 | +export interface DspyCoproOptions { |
| 28 | + /** Variants to propose per round. Defaults to 6. */ |
| 29 | + variants?: number; |
| 30 | + /** Rounds of coordinate ascent. Defaults to 3. */ |
| 31 | + depth?: number; |
| 32 | + /** Eval-set size per variant. Defaults to all examples. */ |
| 33 | + evalSubset?: number; |
| 34 | + /** Teacher temperature for instruction proposal. Defaults to 0.8. */ |
| 35 | + teacherTemperature?: number; |
| 36 | + /** Deterministic RNG for evalSubset sampling. Defaults to Math.random. */ |
| 37 | + rng?: () => number; |
| 38 | +} |
| 39 | + |
| 40 | +const PROPOSAL_SYSTEM = `You are rewriting the INSTRUCTIONS field of a task-prompt signature. The signature declares input fields, output fields, and a natural-language INSTRUCTIONS body. Your job is to produce a new INSTRUCTIONS body that makes a downstream language model perform the task more reliably. |
| 41 | +
|
| 42 | +Hard constraints: |
| 43 | +- Preserve the semantic contract — the new instructions must still describe the same task and reference the same output fields. |
| 44 | +- Keep the rewrite concise (no preamble, no markdown headers, no role-play framing). |
| 45 | +- Output ONLY the rewritten instructions body — no fences, no commentary.`; |
| 46 | + |
| 47 | +export async function runDspyCopro( |
| 48 | + input: DspyOptimizerInput, |
| 49 | + options: DspyCoproOptions = {}, |
| 50 | +): Promise<DspyOptimizerResult> { |
| 51 | + const variants = Math.max(1, options.variants ?? 6); |
| 52 | + const depth = Math.max(1, options.depth ?? 3); |
| 53 | + const teacher = input.teacher ?? input.lm; |
| 54 | + const teacherTemperature = options.teacherTemperature ?? 0.8; |
| 55 | + const rng = options.rng ?? Math.random; |
| 56 | + const lineage: OptimizerLineageEntry[] = []; |
| 57 | + |
| 58 | + const heldOut = |
| 59 | + typeof options.evalSubset === "number" |
| 60 | + ? subsample(input.dataset, options.evalSubset, rng) |
| 61 | + : input.dataset; |
| 62 | + |
| 63 | + const baselineInstructions = input.signature.spec.instructions; |
| 64 | + const baselineScore = await scoreInstructions( |
| 65 | + baselineInstructions, |
| 66 | + input.signature, |
| 67 | + heldOut, |
| 68 | + input.lm, |
| 69 | + input.metric, |
| 70 | + ); |
| 71 | + lineage.push({ |
| 72 | + round: 0, |
| 73 | + variant: 0, |
| 74 | + score: baselineScore, |
| 75 | + notes: "baseline", |
| 76 | + }); |
| 77 | + |
| 78 | + let bestInstructions = baselineInstructions; |
| 79 | + let bestScore = baselineScore; |
| 80 | + let currentInstructions = baselineInstructions; |
| 81 | + |
| 82 | + for (let round = 1; round <= depth; round += 1) { |
| 83 | + let roundBestInstructions = currentInstructions; |
| 84 | + let roundBestScore = bestScore; |
| 85 | + for (let variant = 1; variant <= variants; variant += 1) { |
| 86 | + const candidate = await proposeInstructions( |
| 87 | + teacher, |
| 88 | + input.signature, |
| 89 | + currentInstructions, |
| 90 | + teacherTemperature, |
| 91 | + ); |
| 92 | + if (candidate.trim().length === 0) { |
| 93 | + lineage.push({ |
| 94 | + round, |
| 95 | + variant, |
| 96 | + score: 0, |
| 97 | + notes: "empty proposal — skipped", |
| 98 | + }); |
| 99 | + continue; |
| 100 | + } |
| 101 | + const score = await scoreInstructions( |
| 102 | + candidate, |
| 103 | + input.signature, |
| 104 | + heldOut, |
| 105 | + input.lm, |
| 106 | + input.metric, |
| 107 | + ); |
| 108 | + lineage.push({ round, variant, score }); |
| 109 | + if (score > roundBestScore) { |
| 110 | + roundBestScore = score; |
| 111 | + roundBestInstructions = candidate; |
| 112 | + } |
| 113 | + } |
| 114 | + if (roundBestScore > bestScore) { |
| 115 | + bestScore = roundBestScore; |
| 116 | + bestInstructions = roundBestInstructions; |
| 117 | + } |
| 118 | + currentInstructions = roundBestInstructions; |
| 119 | + } |
| 120 | + |
| 121 | + return { |
| 122 | + optimizer: "dspy-copro", |
| 123 | + signature: new Signature({ |
| 124 | + ...input.signature.spec, |
| 125 | + instructions: bestInstructions, |
| 126 | + }), |
| 127 | + instructions: bestInstructions, |
| 128 | + demonstrations: [], |
| 129 | + score: bestScore, |
| 130 | + baselineScore, |
| 131 | + lineage, |
| 132 | + }; |
| 133 | +} |
| 134 | + |
| 135 | +async function proposeInstructions( |
| 136 | + teacher: LanguageModelAdapter, |
| 137 | + signature: import("../signature.js").Signature, |
| 138 | + current: string, |
| 139 | + temperature: number, |
| 140 | +): Promise<string> { |
| 141 | + const ioSummary = [ |
| 142 | + `signature name: ${signature.spec.name}`, |
| 143 | + "input fields:", |
| 144 | + ...signature.spec.inputs.map( |
| 145 | + (f) => `- ${f.name} (${f.type}): ${f.description}`, |
| 146 | + ), |
| 147 | + "output fields:", |
| 148 | + ...signature.spec.outputs.map( |
| 149 | + (f) => `- ${f.name} (${f.type}): ${f.description}`, |
| 150 | + ), |
| 151 | + "", |
| 152 | + "Current INSTRUCTIONS body:", |
| 153 | + current, |
| 154 | + ].join("\n"); |
| 155 | + const result = await teacher.generate({ |
| 156 | + system: PROPOSAL_SYSTEM, |
| 157 | + messages: [{ role: "user", content: ioSummary }], |
| 158 | + temperature, |
| 159 | + maxTokens: 1024, |
| 160 | + }); |
| 161 | + return result.text |
| 162 | + .trim() |
| 163 | + .replace(/^```[a-z0-9_-]*\s*/i, "") |
| 164 | + .replace(/\s*```$/i, "") |
| 165 | + .trim(); |
| 166 | +} |
| 167 | + |
| 168 | +async function scoreInstructions( |
| 169 | + instructions: string, |
| 170 | + signature: import("../signature.js").Signature, |
| 171 | + dataset: Example[], |
| 172 | + lm: LanguageModelAdapter, |
| 173 | + metric: Metric, |
| 174 | +): Promise<number> { |
| 175 | + if (dataset.length === 0) return 0; |
| 176 | + const predict = new Predict({ |
| 177 | + signature, |
| 178 | + lm, |
| 179 | + instructionsOverride: instructions, |
| 180 | + }); |
| 181 | + let total = 0; |
| 182 | + for (const example of dataset) { |
| 183 | + try { |
| 184 | + const { output } = await predict.forward(example.inputs); |
| 185 | + total += metric(output, example.outputs); |
| 186 | + } catch { |
| 187 | + // Parse failure is a real failure for scoring. No silent fallback. |
| 188 | + } |
| 189 | + } |
| 190 | + return total / dataset.length; |
| 191 | +} |
| 192 | + |
| 193 | +function subsample<T>(items: T[], count: number, rng: () => number): T[] { |
| 194 | + if (count >= items.length) return [...items]; |
| 195 | + const indices = new Set<number>(); |
| 196 | + const out: T[] = []; |
| 197 | + while (out.length < count) { |
| 198 | + const idx = Math.floor(rng() * items.length); |
| 199 | + if (indices.has(idx)) continue; |
| 200 | + indices.add(idx); |
| 201 | + const item = items[idx]; |
| 202 | + if (item !== undefined) out.push(item); |
| 203 | + } |
| 204 | + return out; |
| 205 | +} |
0 commit comments