Add support for GPT-5.1 model and reasoning controls

jelveh · jelveh · commit 5a9fed443262 · 2025-11-13T18:54:38.000-08:00
closes #1966
diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/OpenAICompletionService.mjs
@@ -109,15 +109,18 @@ export class OpenAICompletionService {
         return this.#defaultModel;
     }
 
-    async complete({ messages, stream, model, tools, max_tokens, temperature }) {
+    async complete({ messages, stream, model, tools, max_tokens, temperature, reasoning, text, reasoning_effort, verbosity }) {
         return await this.#complete(messages, {
             model: model,
             tools,
             moderation: true,
             stream,
             max_tokens,
             temperature,
-
+            reasoning,
+            text,
+            reasoning_effort,
+            verbosity,
         });
     }
 
@@ -166,6 +169,7 @@ export class OpenAICompletionService {
     async #complete(messages, {
         stream, moderation, model, tools,
         temperature, max_tokens,
+        reasoning, text, reasoning_effort, verbosity,
     }) {
         // Validate messages
         if ( ! Array.isArray(messages) ) {
@@ -252,7 +256,11 @@ export class OpenAICompletionService {
         // that's missing. We normalise it here so the token count code works.
         messages = await OpenAIUtil.process_input_messages(messages);
 
-        const completion = await this.#openAi.chat.completions.create({
+        const requestedReasoningEffort = reasoning_effort ?? reasoning?.effort;
+        const requestedVerbosity = verbosity ?? text?.verbosity;
+        const supportsReasoningControls = typeof model === 'string' && model.startsWith('gpt-5');
+
+        const completionParams = {
             user: user_private_uid,
             messages: messages,
             model: model,
@@ -263,7 +271,18 @@ export class OpenAICompletionService {
             ...(stream ? {
                 stream_options: { include_usage: true },
             } : {}),
-        });
+        };
+
+        if ( supportsReasoningControls ) {
+            if ( requestedReasoningEffort ) {
+                completionParams.reasoning_effort = requestedReasoningEffort;
+            }
+            if ( requestedVerbosity ) {
+                completionParams.verbosity = requestedVerbosity;
+            }
+        }
+
+        const completion = await this.#openAi.chat.completions.create(completionParams);
         // TODO DS: simplify this logic for all the ai services, each service should handle its cost calculation in the service
         // for now I'm overloading this usage calculator to handle the future promise resolution...
         return OpenAIUtil.handle_completion_output({
diff --git a/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs b/src/backend/src/modules/puterai/OpenAiCompletionService/models.mjs
@@ -1,6 +1,16 @@
 // TODO DS: centralize somewhere
 
 export const OPEN_AI_MODELS = [
+    {
+        id: 'gpt-5.1',
+        cost: {
+            currency: 'usd-cents',
+            tokens: 1_000_000,
+            input: 125,
+            output: 1000,
+        },
+        max_tokens: 128000,
+    },
     {
         id: 'gpt-5-2025-08-07',
         aliases: ['gpt-5'],
@@ -163,4 +173,4 @@ export const OPEN_AI_MODELS = [
             output: 15000,
         },
     },
-];
+];
diff --git a/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openAiCostMap.ts
@@ -20,6 +20,9 @@
 
 export const OPENAI_COST_MAP = {
     // GPT-5 models
+    'openai:gpt-5.1:prompt_tokens': 125,
+    'openai:gpt-5.1:cached_tokens': 13,
+    'openai:gpt-5.1:completion_tokens': 1000,
     'openai:gpt-5-2025-08-07:prompt_tokens': 125,
     'openai:gpt-5-2025-08-07:cached_tokens': 13,
     'openai:gpt-5-2025-08-07:completion_tokens': 1000,
diff --git a/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts b/src/backend/src/services/MeteringService/costMaps/openrouterCostMap.ts
@@ -130,6 +130,10 @@ export const OPENROUTER_COST_MAP = {
     'openrouter:openai/gpt-5-chat:prompt': 125,
     'openrouter:openai/gpt-5-chat:completion': 1000,
     'openrouter:openai/gpt-5-chat:input_cache_read': 12,
+    'openrouter:openai/gpt-5.1:prompt': 125,
+    'openrouter:openai/gpt-5.1:completion': 1000,
+    'openrouter:openai/gpt-5.1:web_search': 1000000,
+    'openrouter:openai/gpt-5.1:input_cache_read': 12,
     'openrouter:openai/gpt-5:prompt': 125,
     'openrouter:openai/gpt-5:completion': 1000,
     'openrouter:openai/gpt-5:web_search': 1000000,
diff --git a/src/puter-js/index.d.ts b/src/puter-js/index.d.ts
@@ -78,6 +78,16 @@ interface ChatOptions {
     stream?: boolean;
     max_tokens?: number;
     temperature?: number;
+    reasoning?: {
+        effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
+        [key: string]: unknown;
+    };
+    reasoning_effort?: 'none' | 'low' | 'medium' | 'high' | 'minimal';
+    text?: {
+        verbosity?: 'low' | 'medium' | 'high';
+        [key: string]: unknown;
+    };
+    verbosity?: 'low' | 'medium' | 'high';
     tools?: ToolDefinition[];
 }
 
diff --git a/src/puter-js/src/modules/AI.js b/src/puter-js/src/modules/AI.js
@@ -809,7 +809,7 @@ class AI{
         }
 
         // Additional parameters to pass from userParams to requestParams
-        const PARAMS_TO_PASS = ['tools', 'response'];
+        const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];
         for ( const name of PARAMS_TO_PASS ) {
             if ( userParams[name] ) {
                 requestParams[name] = userParams[name];
diff --git a/src/puter-js/test/ai.test.js b/src/puter-js/test/ai.test.js
@@ -7,6 +7,7 @@ const TEST_MODELS = [
     "openrouter:anthropic/claude-sonnet-4",
     "google/gemini-2.5-pro",
     "deepseek-chat",
+    "gpt-5.1",
     "gpt-5-nano",
     "openai/gpt-5-nano",
     "claude-sonnet-4-latest",
@@ -48,7 +49,9 @@ const testChatWithParametersCore = async function(model) {
     const result = await puter.ai.chat("What is 2+2?", { 
         model: model,
         temperature: 0.7,
-        max_tokens: 50
+        max_tokens: 50,
+        reasoning: { effort: 'low' },
+        text: { verbosity: 'low' },
     });
     
     // Check basic result structure
@@ -212,4 +215,4 @@ const generateAllTests = function() {
 };
 
 // Export the generated tests
-window.aiTests = generateAllTests(); 
+window.aiTests = generateAllTests(); 

Original file line number	Diff line number	Diff line change
`@@ -809,7 +809,7 @@ class AI{`
`809`	`809`	`}`
`810`	`810`
`811`	`811`	`// Additional parameters to pass from userParams to requestParams`
`812`		`- const PARAMS_TO_PASS = ['tools', 'response'];`
	`812`	`+ const PARAMS_TO_PASS = ['tools', 'response', 'reasoning', 'reasoning_effort', 'text', 'verbosity'];`
`813`	`813`	`for ( const name of PARAMS_TO_PASS ) {`
`814`	`814`	`if ( userParams[name] ) {`
`815`	`815`	`requestParams[name] = userParams[name];`