@@ -66,13 +66,35 @@ export interface LlmMessage {
6666 content : string ;
6767}
6868
69- export interface LlmResponse {
69+ export interface TokenUsage {
70+ inputTokens ?: number ;
71+ outputTokens ?: number ;
72+ totalTokens ?: number ;
73+ }
74+
75+ export interface LlmResult {
7076 text : string ;
77+ finishReason ?: string ;
78+ usage ?: TokenUsage ;
79+ }
80+
81+ export interface LlmResponse extends LlmResult {
7182 model : string ;
7283 tokensUsed ?: number ;
7384 stopReason ?: string ;
7485}
7586
87+ export type LlmModelApi = "chat-completions" | "responses" | "anthropic-messages" | "ollama-chat" | "mcp-sampling" | "none" ;
88+
89+ export interface ModelCapabilities {
90+ model : string ;
91+ api : LlmModelApi ;
92+ supportsTemperature : boolean ;
93+ supportsReasoningEffort : boolean ;
94+ supportsStructuredOutputs : boolean ;
95+ supportsJsonSchema : boolean ;
96+ }
97+
7698/**
7799 * Options for LLM completion requests.
78100 *
@@ -256,6 +278,85 @@ class OllamaProvider implements LlmProvider {
256278 */
257279const _jsonSchemaUnsupported = new Set < string > ( ) ;
258280
281+ const OPENAI_MODEL_CAPABILITIES : Array < { pattern : RegExp ; capabilities : Omit < ModelCapabilities , "model" > } > = [
282+ {
283+ pattern : / ^ g p t - 5 \. 5 (?: \b | [ - _ ] ) / i,
284+ capabilities : {
285+ api : "responses" ,
286+ supportsTemperature : false ,
287+ supportsReasoningEffort : true ,
288+ supportsStructuredOutputs : true ,
289+ supportsJsonSchema : true ,
290+ } ,
291+ } ,
292+ {
293+ pattern : / ^ g p t - [ 4 - 9 ] \. [ 1 - 9 ] / i,
294+ capabilities : {
295+ api : "chat-completions" ,
296+ supportsTemperature : true ,
297+ supportsReasoningEffort : false ,
298+ supportsStructuredOutputs : true ,
299+ supportsJsonSchema : true ,
300+ } ,
301+ } ,
302+ {
303+ pattern : / ^ ( o [ 1 - 9 ] | g p t - 5 (?: \b | [ - _ ] ) ) / i,
304+ capabilities : {
305+ api : "chat-completions" ,
306+ supportsTemperature : false ,
307+ supportsReasoningEffort : true ,
308+ supportsStructuredOutputs : true ,
309+ supportsJsonSchema : true ,
310+ } ,
311+ } ,
312+ ] ;
313+
314+ export function getModelCapabilities ( provider : LlmProviderType | string , model : string ) : ModelCapabilities {
315+ const normalizedProvider = provider . toLowerCase ( ) ;
316+ const normalizedModel = model . trim ( ) ;
317+ if ( normalizedProvider === "openai" ) {
318+ const match = OPENAI_MODEL_CAPABILITIES . find ( ( entry ) => entry . pattern . test ( normalizedModel ) ) ;
319+ return {
320+ model : normalizedModel ,
321+ ...( match ?. capabilities ?? {
322+ api : "chat-completions" as const ,
323+ supportsTemperature : true ,
324+ supportsReasoningEffort : false ,
325+ supportsStructuredOutputs : true ,
326+ supportsJsonSchema : true ,
327+ } ) ,
328+ } ;
329+ }
330+ if ( normalizedProvider === "anthropic" ) {
331+ return {
332+ model : normalizedModel ,
333+ api : "anthropic-messages" ,
334+ supportsTemperature : true ,
335+ supportsReasoningEffort : false ,
336+ supportsStructuredOutputs : false ,
337+ supportsJsonSchema : false ,
338+ } ;
339+ }
340+ if ( normalizedProvider === "ollama" || normalizedProvider === "lmstudio" ) {
341+ return {
342+ model : normalizedModel ,
343+ api : normalizedProvider === "ollama" ? "ollama-chat" : "chat-completions" ,
344+ supportsTemperature : true ,
345+ supportsReasoningEffort : false ,
346+ supportsStructuredOutputs : false ,
347+ supportsJsonSchema : false ,
348+ } ;
349+ }
350+ return {
351+ model : normalizedModel ,
352+ api : normalizedProvider === "sampling" ? "mcp-sampling" : "none" ,
353+ supportsTemperature : false ,
354+ supportsReasoningEffort : false ,
355+ supportsStructuredOutputs : false ,
356+ supportsJsonSchema : false ,
357+ } ;
358+ }
359+
259360/** Heuristic: error body indicates the strict json_schema form is unsupported. */
260361function _isJsonSchemaUnsupportedError ( status : number , body : string ) : boolean {
261362 if ( status < 400 || status >= 500 ) return false ;
@@ -303,10 +404,11 @@ class OpenAiCompatibleProvider implements LlmProvider {
303404 const temp = options ?. temperature ?? this . defaultTemperature ;
304405 const maxTokens = options ?. maxTokens ?? this . defaultMaxTokens ;
305406 const model = options ?. model ?? this . model ;
407+ const capabilities = getModelCapabilities ( this . name , model ) ;
306408
307409 // Newer OpenAI models (o1/o3/o4-mini, gpt-4.1, gpt-5.4-nano, etc.) require
308410 // "max_completion_tokens" instead of the legacy "max_tokens" parameter.
309- const useNewTokenParam = / ^ ( o [ 1 - 9 ] | g p t - [ 4 - 9 ] \. [ 1 - 9 ] | g p t - 5 ) / . test ( model ) ;
411+ const useNewTokenParam = / ^ ( o [ 1 - 9 ] | g p t - [ 4 - 9 ] \. [ 1 - 9 ] | g p t - 5 ) / i . test ( model ) ;
310412
311413 const downgradeKey = `${ this . name } :${ model } ` ;
312414 const knownUnsupported = _jsonSchemaUnsupported . has ( downgradeKey ) ;
@@ -315,7 +417,7 @@ class OpenAiCompatibleProvider implements LlmProvider {
315417 const body : Record < string , unknown > = {
316418 model,
317419 messages : messages . map ( m => ( { role : m . role , content : m . content } ) ) ,
318- temperature : temp ,
420+ ... ( capabilities . supportsTemperature ? { temperature : temp } : { } ) ,
319421 ...( useNewTokenParam
320422 ? { max_completion_tokens : maxTokens }
321423 : { max_tokens : maxTokens } ) ,
@@ -385,11 +487,16 @@ class OpenAiCompatibleProvider implements LlmProvider {
385487 } ;
386488
387489 const choice = data . choices ?. [ 0 ] ;
490+ const usage : TokenUsage | undefined = data . usage ?. completion_tokens === undefined
491+ ? undefined
492+ : { outputTokens : data . usage . completion_tokens } ;
388493 return {
389494 text : choice ?. message ?. content ?? "" ,
390495 model : data . model ?? this . model ,
391- tokensUsed : data . usage ?. completion_tokens ,
496+ tokensUsed : usage ?. outputTokens ,
392497 stopReason : choice ?. finish_reason ,
498+ finishReason : choice ?. finish_reason ,
499+ usage,
393500 } ;
394501 }
395502}
0 commit comments