|
1 | 1 | import { describe, expect, test } from "bun:test"; |
2 | | -import type { ExtensionAPI } from "@mariozechner/pi-coding-agent"; |
| 2 | +import { |
| 3 | + fauxAssistantMessage, |
| 4 | + type Model, |
| 5 | + registerFauxProvider, |
| 6 | +} from "@mariozechner/pi-ai"; |
| 7 | +import type { |
| 8 | + ExtensionAPI, |
| 9 | + ExtensionContext, |
| 10 | +} from "@mariozechner/pi-coding-agent"; |
3 | 11 | import webMinimalExtension from "./extensions/web-minimal.ts"; |
4 | 12 | import { |
5 | 13 | buildDistillationPrompt, |
@@ -38,6 +46,20 @@ function firstText(result: unknown): string { |
38 | 46 | return content?.[0]?.text ?? ""; |
39 | 47 | } |
40 | 48 |
|
| 49 | +function fauxContext(model: Model<string>): ExtensionContext { |
| 50 | + return { |
| 51 | + model, |
| 52 | + modelRegistry: { |
| 53 | + find() { |
| 54 | + return model; |
| 55 | + }, |
| 56 | + async getApiKeyAndHeaders() { |
| 57 | + return { ok: true, apiKey: "test-key", headers: {} }; |
| 58 | + }, |
| 59 | + }, |
| 60 | + } as unknown as ExtensionContext; |
| 61 | +} |
| 62 | + |
41 | 63 | describe("pi-web-minimal extension", () => { |
42 | 64 | test("registers only the minimal retrieval tools", () => { |
43 | 65 | expect(registeredTools().map((tool) => tool.name)).toEqual([ |
@@ -90,6 +112,38 @@ describe("pi-web-minimal extension", () => { |
90 | 112 | "maxCharacters", |
91 | 113 | ); |
92 | 114 | }); |
| 115 | + |
| 116 | + test("fetch results use a compact one-line raw retrieval notice", async () => { |
| 117 | + const server = Bun.serve({ |
| 118 | + port: 0, |
| 119 | + fetch() { |
| 120 | + return new Response( |
| 121 | + `<html><body><article><h1>Fixture</h1><p>${"Fixture content for compact notice. ".repeat(12)}</p></article></body></html>`, |
| 122 | + { headers: { "content-type": "text/html" } }, |
| 123 | + ); |
| 124 | + }, |
| 125 | + }); |
| 126 | + try { |
| 127 | + const tool = registeredTools().find( |
| 128 | + (candidate) => candidate.name === "fetch_content", |
| 129 | + ); |
| 130 | + expect(tool).toBeDefined(); |
| 131 | + const result = await tool?.execute( |
| 132 | + "call", |
| 133 | + { url: `http://127.0.0.1:${server.port}/` }, |
| 134 | + undefined, |
| 135 | + undefined, |
| 136 | + {} as never, |
| 137 | + ); |
| 138 | + const text = firstText(result); |
| 139 | + expect(text).toMatch( |
| 140 | + /---\nraw: get_search_content\(\{ responseId: "[a-z0-9]+", urlIndex: 0 \}\)$/, |
| 141 | + ); |
| 142 | + expect(text).not.toContain("Use get_search_content"); |
| 143 | + } finally { |
| 144 | + await server.stop(); |
| 145 | + } |
| 146 | + }); |
93 | 147 | }); |
94 | 148 |
|
95 | 149 | describe("distillation", () => { |
@@ -178,15 +232,117 @@ describe("distillation", () => { |
178 | 232 | expect(result.details.fallbackReason).toContain("model"); |
179 | 233 | }); |
180 | 234 |
|
181 | | - test("citation validation rejects uncited substantive lines", () => { |
| 235 | + test("citation validation rejects uncited substantive answer lines", () => { |
182 | 236 | const result = validateDistilledOutput( |
183 | | - "## Answer\nReact returns state and setter.\n## Key evidence\n- useState returns a pair [S1]", |
| 237 | + "## Answer\nReact returns state and setter.\n## Sources\n- [S1] React docs", |
184 | 238 | 1, |
185 | 239 | ); |
186 | 240 |
|
187 | 241 | expect(result.ok).toBe(false); |
188 | 242 | expect(result.error).toContain("missing source reference"); |
189 | 243 | }); |
| 244 | + |
| 245 | + test("model distillation uses answer and sources without extra sections", async () => { |
| 246 | + const faux = registerFauxProvider(); |
| 247 | + try { |
| 248 | + faux.setResponses([ |
| 249 | + fauxAssistantMessage( |
| 250 | + "## Answer\n- Alpha is supported by the source. [S1]\n\n## Sources\n- [S1] Alpha docs — https://alpha.test", |
| 251 | + ), |
| 252 | + ]); |
| 253 | + |
| 254 | + const result = await distillRetrieval({ |
| 255 | + ctx: fauxContext(faux.getModel()), |
| 256 | + toolName: "web_search", |
| 257 | + task: "alpha", |
| 258 | + sources: [ |
| 259 | + { |
| 260 | + title: "Alpha docs", |
| 261 | + url: "https://alpha.test", |
| 262 | + content: "Alpha is supported. ".repeat(120), |
| 263 | + }, |
| 264 | + ], |
| 265 | + }); |
| 266 | + |
| 267 | + expect(result.text).toContain("## Answer"); |
| 268 | + expect(result.text).toContain("## Sources"); |
| 269 | + expect(result.text).not.toContain("## Key evidence"); |
| 270 | + expect(result.text).not.toContain("## Next actions"); |
| 271 | + expect(result.details.mode).toBe("distilled"); |
| 272 | + expect(result.details.truncated).toBe(false); |
| 273 | + } finally { |
| 274 | + faux.unregister(); |
| 275 | + } |
| 276 | + }); |
| 277 | + |
| 278 | + test("model distillation retries over-budget output instead of truncating it", async () => { |
| 279 | + const faux = registerFauxProvider(); |
| 280 | + try { |
| 281 | + const longAnswer = `## Answer\n- ${"too long ".repeat(300)}[S1]\n\n## Sources\n- [S1] Alpha docs — https://alpha.test`; |
| 282 | + faux.setResponses([ |
| 283 | + fauxAssistantMessage(longAnswer), |
| 284 | + fauxAssistantMessage( |
| 285 | + "## Answer\n- Alpha is supported. [S1]\n\n## Sources\n- [S1] Alpha docs — https://alpha.test", |
| 286 | + ), |
| 287 | + ]); |
| 288 | + |
| 289 | + const result = await distillRetrieval({ |
| 290 | + ctx: fauxContext(faux.getModel()), |
| 291 | + toolName: "web_search", |
| 292 | + task: "alpha", |
| 293 | + sources: [ |
| 294 | + { |
| 295 | + title: "Alpha docs", |
| 296 | + url: "https://alpha.test", |
| 297 | + content: "Alpha is supported. ".repeat(120), |
| 298 | + }, |
| 299 | + ], |
| 300 | + }); |
| 301 | + |
| 302 | + expect(faux.state.callCount).toBe(2); |
| 303 | + expect(result.text).toContain("Alpha is supported. [S1]"); |
| 304 | + expect(result.text).not.toContain("[Content truncated]"); |
| 305 | + expect(result.details.mode).toBe("distilled"); |
| 306 | + expect(result.details.retryCount).toBe(1); |
| 307 | + expect(result.details.truncated).toBe(false); |
| 308 | + } finally { |
| 309 | + faux.unregister(); |
| 310 | + } |
| 311 | + }); |
| 312 | + |
| 313 | + test("model distillation retries length-stopped output even when short", async () => { |
| 314 | + const faux = registerFauxProvider(); |
| 315 | + try { |
| 316 | + faux.setResponses([ |
| 317 | + fauxAssistantMessage("## Answer\n- Alpha is partially", { |
| 318 | + stopReason: "length", |
| 319 | + }), |
| 320 | + fauxAssistantMessage( |
| 321 | + "## Answer\n- Alpha is supported. [S1]\n\n## Sources\n- [S1] Alpha docs — https://alpha.test", |
| 322 | + ), |
| 323 | + ]); |
| 324 | + |
| 325 | + const result = await distillRetrieval({ |
| 326 | + ctx: fauxContext(faux.getModel()), |
| 327 | + toolName: "web_search", |
| 328 | + task: "alpha", |
| 329 | + sources: [ |
| 330 | + { |
| 331 | + title: "Alpha docs", |
| 332 | + url: "https://alpha.test", |
| 333 | + content: "Alpha is supported. ".repeat(120), |
| 334 | + }, |
| 335 | + ], |
| 336 | + }); |
| 337 | + |
| 338 | + expect(faux.state.callCount).toBe(2); |
| 339 | + expect(result.text).toContain("Alpha is supported. [S1]"); |
| 340 | + expect(result.details.stopReason).toBe("stop"); |
| 341 | + expect(result.details.retryCount).toBe(1); |
| 342 | + } finally { |
| 343 | + faux.unregister(); |
| 344 | + } |
| 345 | + }); |
190 | 346 | }); |
191 | 347 |
|
192 | 348 | describe("search option helpers", () => { |
|
0 commit comments