Skip to content

Commit 9c16658

Browse files
committed
(fix) responses-api support in dreamer/normalizer layers
1 parent cb6ba43 commit 9c16658

10 files changed

Lines changed: 321 additions & 44 deletions

File tree

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,9 @@ Thumbs.db
3434
# VS Code extension build artifacts
3535
*.vsix
3636

37+
# Buildinfo files
38+
*.tsbuildinfo
39+
3740
plans/
3841
plans/DREAMGRAPH EVENTS AND HOOKS.txt
3942
plans/DG - THE DREAMGRAPH CLI.txt

extensions/vscode/src/architect-llm.ts

Lines changed: 14 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,12 @@ import * as vscode from "vscode";
99
import {
1010
buildOpenAIResponsesRequest,
1111
extractOpenAIResponsesRawItems,
12-
extractOpenAIResponsesText,
1312
extractOpenAIResponsesToolCalls,
13+
normalizeOpenAIResponsesResult,
1414
toOpenAIResponsesContent,
1515
translateRawToOpenAIResponses,
1616
usesOpenAIResponsesApi,
17+
type OpenAIResponsesData,
1718
} from "./openai-responses-adapter";
1819
import {
1920
applySharedRequestCompaction,
@@ -791,17 +792,6 @@ export class ArchitectLlm implements vscode.Disposable {
791792
return projection ? projection.legacyContent : content;
792793
}
793794

794-
private _extractOpenAIResponsesText(data: {
795-
output_text?: string;
796-
output?: Array<Record<string, unknown>>;
797-
}): string {
798-
return extractOpenAIResponsesText(data);
799-
}
800-
801-
private _extractOpenAIResponsesToolCalls(data: { output?: Array<Record<string, unknown>> }): ToolUseRequest[] {
802-
return extractOpenAIResponsesToolCalls(data);
803-
}
804-
805795
private async _callOpenAIResponses(
806796
config: ArchitectConfig,
807797
messages: ArchitectMessage[],
@@ -821,16 +811,13 @@ export class ArchitectLlm implements vscode.Disposable {
821811

822812
if (!res.ok) throw new Error(`OpenAI Responses API error (${res.status}): ${await res.text()}`);
823813

824-
const data = (await res.json()) as {
825-
output_text?: string;
826-
output?: Array<Record<string, unknown>>;
827-
usage?: { input_tokens?: number; output_tokens?: number };
828-
};
814+
const data = (await res.json()) as OpenAIResponsesData;
815+
const result = normalizeOpenAIResponsesResult(data);
829816

830817
return {
831-
content: this._maybeProjectStructuredContent(config, this._extractOpenAIResponsesText(data)),
832-
promptTokens: data.usage?.input_tokens ?? 0,
833-
completionTokens: data.usage?.output_tokens ?? 0,
818+
content: this._maybeProjectStructuredContent(config, result.text),
819+
promptTokens: result.usage?.input_tokens ?? 0,
820+
completionTokens: result.usage?.output_tokens ?? 0,
834821
durationMs: Date.now() - start,
835822
};
836823
}
@@ -856,28 +843,19 @@ export class ArchitectLlm implements vscode.Disposable {
856843

857844
if (!res.ok) throw new Error(`OpenAI Responses API error (${res.status}): ${await res.text()}`);
858845

859-
const data = (await res.json()) as {
860-
output_text?: string;
861-
output?: Array<Record<string, unknown>>;
862-
usage?: { input_tokens?: number; output_tokens?: number };
863-
status?: string;
864-
incomplete_details?: { reason?: string };
865-
};
866-
const toolCalls = this._extractOpenAIResponsesToolCalls(data);
846+
const data = (await res.json()) as OpenAIResponsesData;
847+
const result = normalizeOpenAIResponsesResult(data);
848+
const toolCalls = extractOpenAIResponsesToolCalls(data);
867849

868850
return {
869-
content: this._maybeProjectStructuredContent(config, this._extractOpenAIResponsesText(data)),
870-
promptTokens: data.usage?.input_tokens ?? 0,
871-
completionTokens: data.usage?.output_tokens ?? 0,
851+
content: this._maybeProjectStructuredContent(config, result.text),
852+
promptTokens: result.usage?.input_tokens ?? 0,
853+
completionTokens: result.usage?.output_tokens ?? 0,
872854
durationMs: Date.now() - start,
873855
toolCalls,
874856
// Verbatim output[] items (incl. reasoning) for stateless replay.
875857
providerRawAssistant: extractOpenAIResponsesRawItems(data),
876-
stopReason: toolCalls.length > 0
877-
? "tool_use"
878-
: data.incomplete_details?.reason === "max_output_tokens"
879-
? "max_tokens"
880-
: data.status ?? "end_turn",
858+
stopReason: result.finishReason ?? "end_turn",
881859
};
882860
}
883861

extensions/vscode/src/openai-responses-adapter.ts

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,9 +30,24 @@ export interface OpenAIResponsesOptions {
3030
structuredOutput?: boolean;
3131
}
3232

33+
export interface OpenAIResponsesUsage {
34+
input_tokens?: number;
35+
output_tokens?: number;
36+
total_tokens?: number;
37+
}
38+
3339
export interface OpenAIResponsesData {
3440
output_text?: string;
3541
output?: Array<Record<string, unknown>>;
42+
usage?: OpenAIResponsesUsage;
43+
status?: string;
44+
incomplete_details?: { reason?: string };
45+
}
46+
47+
export interface OpenAIResponsesResult {
48+
text: string;
49+
finishReason?: string;
50+
usage?: OpenAIResponsesUsage;
3651
}
3752

3853
type ArchitectMessageContent = ArchitectMessage["content"];
@@ -312,6 +327,22 @@ export function extractOpenAIResponsesText(data: OpenAIResponsesData): string {
312327
return out;
313328
}
314329

330+
export function normalizeOpenAIResponsesResult(data: OpenAIResponsesData): OpenAIResponsesResult {
331+
const text = extractOpenAIResponsesText(data);
332+
const toolCalls = extractOpenAIResponsesToolCalls(data);
333+
const finishReason = toolCalls.length > 0
334+
? "tool_use"
335+
: data.incomplete_details?.reason === "max_output_tokens"
336+
? "max_tokens"
337+
: data.status;
338+
339+
return {
340+
text,
341+
finishReason,
342+
usage: data.usage,
343+
};
344+
}
345+
315346
export function extractOpenAIResponsesToolCalls(data: OpenAIResponsesData): ToolUseRequest[] {
316347
const toolCalls: ToolUseRequest[] = [];
317348

extensions/vscode/src/test/openai-responses-adapter.test.ts

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import {
44
buildOpenAIResponsesRequest,
55
extractOpenAIResponsesText,
66
extractOpenAIResponsesToolCalls,
7+
normalizeOpenAIResponsesResult,
78
translateRawToOpenAIResponses,
89
usesOpenAIResponsesApi,
910
} from '../openai-responses-adapter';
@@ -166,6 +167,40 @@ test('keeps streamed sub-blocks within a single message tightly joined', () => {
166167
assert.equal(text, 'Step 1: reading file.');
167168
});
168169

170+
test('normalizes Responses output to the provider boundary contract', () => {
171+
const result = normalizeOpenAIResponsesResult({
172+
output: [
173+
{
174+
type: 'message',
175+
content: [{ type: 'output_text', text: 'Done.' }],
176+
},
177+
],
178+
status: 'completed',
179+
usage: { input_tokens: 10, output_tokens: 4, total_tokens: 14 },
180+
});
181+
182+
assert.deepEqual(result, {
183+
text: 'Done.',
184+
finishReason: 'completed',
185+
usage: { input_tokens: 10, output_tokens: 4, total_tokens: 14 },
186+
});
187+
});
188+
189+
test('normalizes Responses tool calls as tool_use finish reason', () => {
190+
const result = normalizeOpenAIResponsesResult({
191+
output: [
192+
{ type: 'function_call', call_id: 'call_valid', name: 'read_source_code', arguments: '{"filePath":"src/a.ts"}' },
193+
],
194+
status: 'completed',
195+
});
196+
197+
assert.deepEqual(result, {
198+
text: '',
199+
finishReason: 'tool_use',
200+
usage: undefined,
201+
});
202+
});
203+
169204
test('extracts function calls and hardens malformed or unknown items', () => {
170205
const calls = extractOpenAIResponsesToolCalls({
171206
output: [

src/architect/native-tool-loop.ts

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import type { IncomingMessage } from "node:http";
22
import type { ArchitectLlmConfig, LlmMessage, LlmProvider } from "../cognitive/llm.js";
3+
import { getModelCapabilities } from "../cognitive/llm.js";
34
import { mcpCallTool, mcpListTools, type McpCallResult } from "../cli/utils/mcp-call.js";
45
import { logger } from "../utils/logger.js";
56

@@ -330,11 +331,12 @@ async function callOpenAiCompatibleWithTools(
330331
messages: NeutralMessage[],
331332
tools: ArchitectToolDefinition[],
332333
): Promise<ToolLoopResponse> {
333-
const useNewTokenParam = /^(o[1-9]|gpt-[4-9]\.[1-9]|gpt-5)/.test(config.model);
334+
const capabilities = getModelCapabilities(config.provider, config.model);
335+
const useNewTokenParam = /^(o[1-9]|gpt-[4-9]\.[1-9]|gpt-5)/i.test(config.model);
334336
const body: Record<string, unknown> = {
335337
model: config.model,
336338
messages: toOpenAiMessages(messages),
337-
temperature: config.temperature,
339+
...(capabilities.supportsTemperature ? { temperature: config.temperature } : {}),
338340
...(useNewTokenParam
339341
? { max_completion_tokens: config.maxTokens }
340342
: { max_tokens: config.maxTokens }),

src/cognitive/llm.ts

Lines changed: 111 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,13 +66,35 @@ export interface LlmMessage {
6666
content: string;
6767
}
6868

69-
export interface LlmResponse {
69+
export interface TokenUsage {
70+
inputTokens?: number;
71+
outputTokens?: number;
72+
totalTokens?: number;
73+
}
74+
75+
export interface LlmResult {
7076
text: string;
77+
finishReason?: string;
78+
usage?: TokenUsage;
79+
}
80+
81+
export interface LlmResponse extends LlmResult {
7182
model: string;
7283
tokensUsed?: number;
7384
stopReason?: string;
7485
}
7586

87+
export type LlmModelApi = "chat-completions" | "responses" | "anthropic-messages" | "ollama-chat" | "mcp-sampling" | "none";
88+
89+
export interface ModelCapabilities {
90+
model: string;
91+
api: LlmModelApi;
92+
supportsTemperature: boolean;
93+
supportsReasoningEffort: boolean;
94+
supportsStructuredOutputs: boolean;
95+
supportsJsonSchema: boolean;
96+
}
97+
7698
/**
7799
* Options for LLM completion requests.
78100
*
@@ -256,6 +278,85 @@ class OllamaProvider implements LlmProvider {
256278
*/
257279
const _jsonSchemaUnsupported = new Set<string>();
258280

281+
const OPENAI_MODEL_CAPABILITIES: Array<{ pattern: RegExp; capabilities: Omit<ModelCapabilities, "model"> }> = [
282+
{
283+
pattern: /^gpt-5\.5(?:\b|[-_])/i,
284+
capabilities: {
285+
api: "responses",
286+
supportsTemperature: false,
287+
supportsReasoningEffort: true,
288+
supportsStructuredOutputs: true,
289+
supportsJsonSchema: true,
290+
},
291+
},
292+
{
293+
pattern: /^gpt-[4-9]\.[1-9]/i,
294+
capabilities: {
295+
api: "chat-completions",
296+
supportsTemperature: true,
297+
supportsReasoningEffort: false,
298+
supportsStructuredOutputs: true,
299+
supportsJsonSchema: true,
300+
},
301+
},
302+
{
303+
pattern: /^(o[1-9]|gpt-5(?:\b|[-_]))/i,
304+
capabilities: {
305+
api: "chat-completions",
306+
supportsTemperature: false,
307+
supportsReasoningEffort: true,
308+
supportsStructuredOutputs: true,
309+
supportsJsonSchema: true,
310+
},
311+
},
312+
];
313+
314+
export function getModelCapabilities(provider: LlmProviderType | string, model: string): ModelCapabilities {
315+
const normalizedProvider = provider.toLowerCase();
316+
const normalizedModel = model.trim();
317+
if (normalizedProvider === "openai") {
318+
const match = OPENAI_MODEL_CAPABILITIES.find((entry) => entry.pattern.test(normalizedModel));
319+
return {
320+
model: normalizedModel,
321+
...(match?.capabilities ?? {
322+
api: "chat-completions" as const,
323+
supportsTemperature: true,
324+
supportsReasoningEffort: false,
325+
supportsStructuredOutputs: true,
326+
supportsJsonSchema: true,
327+
}),
328+
};
329+
}
330+
if (normalizedProvider === "anthropic") {
331+
return {
332+
model: normalizedModel,
333+
api: "anthropic-messages",
334+
supportsTemperature: true,
335+
supportsReasoningEffort: false,
336+
supportsStructuredOutputs: false,
337+
supportsJsonSchema: false,
338+
};
339+
}
340+
if (normalizedProvider === "ollama" || normalizedProvider === "lmstudio") {
341+
return {
342+
model: normalizedModel,
343+
api: normalizedProvider === "ollama" ? "ollama-chat" : "chat-completions",
344+
supportsTemperature: true,
345+
supportsReasoningEffort: false,
346+
supportsStructuredOutputs: false,
347+
supportsJsonSchema: false,
348+
};
349+
}
350+
return {
351+
model: normalizedModel,
352+
api: normalizedProvider === "sampling" ? "mcp-sampling" : "none",
353+
supportsTemperature: false,
354+
supportsReasoningEffort: false,
355+
supportsStructuredOutputs: false,
356+
supportsJsonSchema: false,
357+
};
358+
}
359+
259360
/** Heuristic: error body indicates the strict json_schema form is unsupported. */
260361
function _isJsonSchemaUnsupportedError(status: number, body: string): boolean {
261362
if (status < 400 || status >= 500) return false;
@@ -303,10 +404,11 @@ class OpenAiCompatibleProvider implements LlmProvider {
303404
const temp = options?.temperature ?? this.defaultTemperature;
304405
const maxTokens = options?.maxTokens ?? this.defaultMaxTokens;
305406
const model = options?.model ?? this.model;
407+
const capabilities = getModelCapabilities(this.name, model);
306408

307409
// Newer OpenAI models (o1/o3/o4-mini, gpt-4.1, gpt-5.4-nano, etc.) require
308410
// "max_completion_tokens" instead of the legacy "max_tokens" parameter.
309-
const useNewTokenParam = /^(o[1-9]|gpt-[4-9]\.[1-9]|gpt-5)/.test(model);
411+
const useNewTokenParam = /^(o[1-9]|gpt-[4-9]\.[1-9]|gpt-5)/i.test(model);
310412

311413
const downgradeKey = `${this.name}:${model}`;
312414
const knownUnsupported = _jsonSchemaUnsupported.has(downgradeKey);
@@ -315,7 +417,7 @@ class OpenAiCompatibleProvider implements LlmProvider {
315417
const body: Record<string, unknown> = {
316418
model,
317419
messages: messages.map(m => ({ role: m.role, content: m.content })),
318-
temperature: temp,
420+
...(capabilities.supportsTemperature ? { temperature: temp } : {}),
319421
...(useNewTokenParam
320422
? { max_completion_tokens: maxTokens }
321423
: { max_tokens: maxTokens }),
@@ -385,11 +487,16 @@ class OpenAiCompatibleProvider implements LlmProvider {
385487
};
386488

387489
const choice = data.choices?.[0];
490+
const usage: TokenUsage | undefined = data.usage?.completion_tokens === undefined
491+
? undefined
492+
: { outputTokens: data.usage.completion_tokens };
388493
return {
389494
text: choice?.message?.content ?? "",
390495
model: data.model ?? this.model,
391-
tokensUsed: data.usage?.completion_tokens,
496+
tokensUsed: usage?.outputTokens,
392497
stopReason: choice?.finish_reason,
498+
finishReason: choice?.finish_reason,
499+
usage,
393500
};
394501
}
395502
}

0 commit comments

Comments
 (0)