Skip to content

Commit 6c1086f

Browse files
authored
feat: add reasoning support across all providers (#81)
## Summary - Add reasoning/thinking output to every provider that previously lacked it - OpenAI Chat Completions: `reasoning_content` field on message + streaming deltas - Gemini: `thought: true` text parts before answer parts - Bedrock InvokeModel: Anthropic-style `thinking` content blocks - Bedrock Converse: `reasoningContent` blocks in Converse format - Ollama: `reasoning_content` on both `/api/chat` and `/api/generate` (streaming + non-streaming) Existing `TextResponse.reasoning` fixture field now produces correct provider-native reasoning output on all endpoints. No fixture format changes, no router changes, no new types. ## Test plan - [ ] 22 new tests in `reasoning-all-providers.test.ts` covering all 5 providers - [ ] Each provider tested for streaming + non-streaming with reasoning present and absent - [ ] Unit tests for `buildBedrockStreamTextEvents` reasoning event structure - [ ] Full test suite passes (2048 tests, 0 failures) - [ ] Lint and format clean 🤖 Generated with [Claude Code](https://claude.com/claude-code)
2 parents e1bcaaf + a482f2d commit 6c1086f

8 files changed

Lines changed: 974 additions & 30 deletions

File tree

src/__tests__/reasoning-all-providers.test.ts

Lines changed: 786 additions & 0 deletions
Large diffs are not rendered by default.

src/bedrock-converse.ts

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -156,12 +156,20 @@ export function converseToCompletionRequest(
156156

157157
// ─── Response builders ──────────────────────────────────────────────────────
158158

159-
function buildConverseTextResponse(content: string): object {
159+
function buildConverseTextResponse(content: string, reasoning?: string): object {
160+
const contentBlocks: object[] = [];
161+
if (reasoning) {
162+
contentBlocks.push({
163+
reasoningContent: { reasoningText: { text: reasoning } },
164+
});
165+
}
166+
contentBlocks.push({ text: content });
167+
160168
return {
161169
output: {
162170
message: {
163171
role: "assistant",
164-
content: [{ text: content }],
172+
content: contentBlocks,
165173
},
166174
},
167175
stopReason: "end_turn",
@@ -368,7 +376,7 @@ export async function handleConverse(
368376
body: completionReq,
369377
response: { status: 200, fixture },
370378
});
371-
const body = buildConverseTextResponse(response.content);
379+
const body = buildConverseTextResponse(response.content, response.reasoning);
372380
res.writeHead(200, { "Content-Type": "application/json" });
373381
res.end(JSON.stringify(body));
374382
return;
@@ -578,7 +586,7 @@ export async function handleConverseStream(
578586
body: completionReq,
579587
response: { status: 200, fixture },
580588
});
581-
const events = buildBedrockStreamTextEvents(response.content, chunkSize);
589+
const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning);
582590
const interruption = createInterruptionSignal(fixture);
583591
const completed = await writeEventStream(res, events, {
584592
latency,

src/bedrock.ts

Lines changed: 46 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -198,12 +198,18 @@ export function bedrockToCompletionRequest(
198198

199199
// ─── Response builders ──────────────────────────────────────────────────────
200200

201-
function buildBedrockTextResponse(content: string, model: string): object {
201+
function buildBedrockTextResponse(content: string, model: string, reasoning?: string): object {
202+
const contentBlocks: object[] = [];
203+
if (reasoning) {
204+
contentBlocks.push({ type: "thinking", thinking: reasoning });
205+
}
206+
contentBlocks.push({ type: "text", text: content });
207+
202208
return {
203209
id: generateMessageId(),
204210
type: "message",
205211
role: "assistant",
206-
content: [{ type: "text", text: content }],
212+
content: contentBlocks,
207213
model,
208214
stop_reason: "end_turn",
209215
stop_sequence: null,
@@ -422,7 +428,11 @@ export async function handleBedrock(
422428
body: completionReq,
423429
response: { status: 200, fixture },
424430
});
425-
const body = buildBedrockTextResponse(response.content, completionReq.model);
431+
const body = buildBedrockTextResponse(
432+
response.content,
433+
completionReq.model,
434+
response.reasoning,
435+
);
426436
res.writeHead(200, { "Content-Type": "application/json" });
427437
res.end(JSON.stringify(body));
428438
return;
@@ -468,6 +478,7 @@ export async function handleBedrock(
468478
export function buildBedrockStreamTextEvents(
469479
content: string,
470480
chunkSize: number,
481+
reasoning?: string,
471482
): Array<{ eventType: string; payload: object }> {
472483
const events: Array<{ eventType: string; payload: object }> = [];
473484

@@ -476,25 +487,53 @@ export function buildBedrockStreamTextEvents(
476487
payload: { role: "assistant" },
477488
});
478489

490+
// Thinking block (emitted before text when reasoning is present)
491+
if (reasoning) {
492+
const blockIndex = 0;
493+
events.push({
494+
eventType: "contentBlockStart",
495+
payload: { contentBlockIndex: blockIndex, start: { type: "thinking" } },
496+
});
497+
498+
for (let i = 0; i < reasoning.length; i += chunkSize) {
499+
const slice = reasoning.slice(i, i + chunkSize);
500+
events.push({
501+
eventType: "contentBlockDelta",
502+
payload: {
503+
contentBlockIndex: blockIndex,
504+
delta: { type: "thinking_delta", thinking: slice },
505+
},
506+
});
507+
}
508+
509+
events.push({
510+
eventType: "contentBlockStop",
511+
payload: { contentBlockIndex: blockIndex },
512+
});
513+
}
514+
515+
// Text block
516+
const textBlockIndex = reasoning ? 1 : 0;
517+
479518
events.push({
480519
eventType: "contentBlockStart",
481-
payload: { contentBlockIndex: 0, start: {} },
520+
payload: { contentBlockIndex: textBlockIndex, start: {} },
482521
});
483522

484523
for (let i = 0; i < content.length; i += chunkSize) {
485524
const slice = content.slice(i, i + chunkSize);
486525
events.push({
487526
eventType: "contentBlockDelta",
488527
payload: {
489-
contentBlockIndex: 0,
528+
contentBlockIndex: textBlockIndex,
490529
delta: { type: "text_delta", text: slice },
491530
},
492531
});
493532
}
494533

495534
events.push({
496535
eventType: "contentBlockStop",
497-
payload: { contentBlockIndex: 0 },
536+
payload: { contentBlockIndex: textBlockIndex },
498537
});
499538

500539
events.push({
@@ -738,7 +777,7 @@ export async function handleBedrockStream(
738777
body: completionReq,
739778
response: { status: 200, fixture },
740779
});
741-
const events = buildBedrockStreamTextEvents(response.content, chunkSize);
780+
const events = buildBedrockStreamTextEvents(response.content, chunkSize, response.reasoning);
742781
const interruption = createInterruptionSignal(fixture);
743782
const completed = await writeEventStream(res, events, {
744783
latency,

src/gemini.ts

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import { proxyAndRecord } from "./recorder.js";
3636

3737
interface GeminiPart {
3838
text?: string;
39+
thought?: boolean;
3940
functionCall?: { name: string; args: Record<string, unknown>; id?: string };
4041
functionResponse?: { name: string; response: unknown };
4142
}
@@ -187,10 +188,29 @@ interface GeminiResponseChunk {
187188
};
188189
}
189190

190-
function buildGeminiTextStreamChunks(content: string, chunkSize: number): GeminiResponseChunk[] {
191+
function buildGeminiTextStreamChunks(
192+
content: string,
193+
chunkSize: number,
194+
reasoning?: string,
195+
): GeminiResponseChunk[] {
191196
const chunks: GeminiResponseChunk[] = [];
192197

193-
// Content chunks
198+
// Reasoning chunks (thought: true)
199+
if (reasoning) {
200+
for (let i = 0; i < reasoning.length; i += chunkSize) {
201+
const slice = reasoning.slice(i, i + chunkSize);
202+
chunks.push({
203+
candidates: [
204+
{
205+
content: { role: "model", parts: [{ text: slice, thought: true }] },
206+
index: 0,
207+
},
208+
],
209+
});
210+
}
211+
}
212+
213+
// Content chunks (original logic unchanged)
194214
for (let i = 0; i < content.length; i += chunkSize) {
195215
const slice = content.slice(i, i + chunkSize);
196216
const isLast = i + chunkSize >= content.length;
@@ -215,7 +235,7 @@ function buildGeminiTextStreamChunks(content: string, chunkSize: number): Gemini
215235
chunks.push(chunk);
216236
}
217237

218-
// Handle empty content
238+
// Handle empty content (original logic unchanged)
219239
if (content.length === 0) {
220240
chunks.push({
221241
candidates: [
@@ -276,11 +296,17 @@ function buildGeminiToolCallStreamChunks(
276296

277297
// Non-streaming response builders
278298

279-
function buildGeminiTextResponse(content: string): GeminiResponseChunk {
299+
function buildGeminiTextResponse(content: string, reasoning?: string): GeminiResponseChunk {
300+
const parts: GeminiPart[] = [];
301+
if (reasoning) {
302+
parts.push({ text: reasoning, thought: true });
303+
}
304+
parts.push({ text: content });
305+
280306
return {
281307
candidates: [
282308
{
283-
content: { role: "model", parts: [{ text: content }] },
309+
content: { role: "model", parts },
284310
finishReason: "STOP",
285311
index: 0,
286312
},
@@ -533,11 +559,11 @@ export async function handleGemini(
533559
response: { status: 200, fixture },
534560
});
535561
if (!streaming) {
536-
const body = buildGeminiTextResponse(response.content);
562+
const body = buildGeminiTextResponse(response.content, response.reasoning);
537563
res.writeHead(200, { "Content-Type": "application/json" });
538564
res.end(JSON.stringify(body));
539565
} else {
540-
const chunks = buildGeminiTextStreamChunks(response.content, chunkSize);
566+
const chunks = buildGeminiTextStreamChunks(response.content, chunkSize, response.reasoning);
541567
const interruption = createInterruptionSignal(fixture);
542568
const completed = await writeGeminiSSEStream(res, chunks, {
543569
latency,

src/helpers.ts

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,12 @@ export function isEmbeddingResponse(r: FixtureResponse): r is EmbeddingResponse
6262
return "embedding" in r && Array.isArray((r as EmbeddingResponse).embedding);
6363
}
6464

65-
export function buildTextChunks(content: string, model: string, chunkSize: number): SSEChunk[] {
65+
export function buildTextChunks(
66+
content: string,
67+
model: string,
68+
chunkSize: number,
69+
reasoning?: string,
70+
): SSEChunk[] {
6671
const id = generateId();
6772
const created = Math.floor(Date.now() / 1000);
6873
const chunks: SSEChunk[] = [];
@@ -76,6 +81,20 @@ export function buildTextChunks(content: string, model: string, chunkSize: numbe
7681
choices: [{ index: 0, delta: { role: "assistant", content: "" }, finish_reason: null }],
7782
});
7883

84+
// Reasoning chunks (emitted before content chunks)
85+
if (reasoning) {
86+
for (let i = 0; i < reasoning.length; i += chunkSize) {
87+
const slice = reasoning.slice(i, i + chunkSize);
88+
chunks.push({
89+
id,
90+
object: "chat.completion.chunk",
91+
created,
92+
model,
93+
choices: [{ index: 0, delta: { reasoning_content: slice }, finish_reason: null }],
94+
});
95+
}
96+
}
97+
7998
// Content chunks
8099
for (let i = 0; i < content.length; i += chunkSize) {
81100
const slice = content.slice(i, i + chunkSize);
@@ -183,7 +202,11 @@ export function buildToolCallChunks(
183202

184203
// Non-streaming response builders
185204

186-
export function buildTextCompletion(content: string, model: string): ChatCompletion {
205+
export function buildTextCompletion(
206+
content: string,
207+
model: string,
208+
reasoning?: string,
209+
): ChatCompletion {
187210
return {
188211
id: generateId(),
189212
object: "chat.completion",
@@ -192,7 +215,12 @@ export function buildTextCompletion(content: string, model: string): ChatComplet
192215
choices: [
193216
{
194217
index: 0,
195-
message: { role: "assistant", content, refusal: null },
218+
message: {
219+
role: "assistant",
220+
content,
221+
refusal: null,
222+
...(reasoning ? { reasoning_content: reasoning } : {}),
223+
},
196224
finish_reason: "stop",
197225
},
198226
],

0 commit comments

Comments
 (0)