Integrate grok-4-fast-non-reasoning for xai provider (#5028)

juliettech13 · web-flow · commit 4f73e5f0e5e3 · 2025-10-08T18:17:12.000-04:00
* integrate grok-4-fast-non-reasoning for xai provider

* update description
diff --git a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap
@@ -3128,14 +3128,38 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = `
         "*",
       ],
     },
+    "grok-4-fast-non-reasoning:xai": {
+      "context": 2000000,
+      "crossRegion": false,
+      "maxTokens": 2000000,
+      "modelId": "grok-4-fast-non-reasoning",
+      "parameters": [
+        "logprobs",
+        "max_tokens",
+        "response_format",
+        "seed",
+        "structured_outputs",
+        "temperature",
+        "tool_choice",
+        "tools",
+        "top_logprobs",
+        "top_p",
+      ],
+      "provider": "xai",
+      "ptbEnabled": true,
+      "regions": [
+        "*",
+      ],
+    },
     "grok-4-fast-reasoning:xai": {
       "context": 2000000,
       "crossRegion": false,
-      "maxTokens": 30000,
+      "maxTokens": 2000000,
       "modelId": "grok-4-fast",
       "parameters": [
         "logprobs",
         "max_tokens",
+        "reasoning",
         "response_format",
         "seed",
         "structured_outputs",
@@ -3483,6 +3507,7 @@ exports[`Registry Snapshots model coverage snapshot 1`] = `
     "xai",
     "xai",
     "xai",
+    "xai",
   ],
   "zai/glm-4": [
     "novita",
@@ -4673,6 +4698,12 @@ exports[`Registry Snapshots verify registry state 1`] = `
         "xai",
       ],
     },
+    {
+      "model": "grok-4-fast-non-reasoning",
+      "providers": [
+        "xai",
+      ],
+    },
     {
       "model": "grok-4-fast-reasoning",
       "providers": [
@@ -4897,7 +4928,7 @@ exports[`Registry Snapshots verify registry state 1`] = `
       "provider": "vertex",
     },
     {
-      "modelCount": 5,
+      "modelCount": 6,
       "provider": "xai",
     },
   ],
@@ -4935,6 +4966,7 @@ exports[`Registry Snapshots verify registry state 1`] = `
     "grok-3",
     "grok-3-mini",
     "grok-4",
+    "grok-4-fast-non-reasoning",
     "grok-4-fast-reasoning",
     "grok-code-fast-1",
     "kimi-k2",
@@ -4970,9 +5002,9 @@ exports[`Registry Snapshots verify registry state 1`] = `
     "claude-3.5-haiku:openrouter:*",
   ],
   "totalArchivedConfigs": 0,
-  "totalEndpoints": 138,
-  "totalModelProviderConfigs": 138,
-  "totalModelsWithPtb": 58,
+  "totalEndpoints": 139,
+  "totalModelProviderConfigs": 139,
+  "totalModelsWithPtb": 59,
   "totalProviders": 13,
 }
 `;
diff --git a/packages/cost/models/authors/xai/endpoints.ts b/packages/cost/models/authors/xai/endpoints.ts
@@ -120,7 +120,7 @@ export const endpoints = {
       },
     ],
     contextLength: 2_000_000,
-    maxCompletionTokens: 30_000,
+    maxCompletionTokens: 2_000_000,
     supportedParameters: [
       "tools",
       "tool_choice",
@@ -131,7 +131,51 @@ export const endpoints = {
       "top_p",
       "seed",
       "logprobs",
-      "top_logprobs"
+      "top_logprobs",
+      "reasoning",
+    ],
+    ptbEnabled: true,
+    endpointConfigs: {
+      "*": {},
+    },
+  },
+  "grok-4-fast-non-reasoning:xai": {
+    providerModelId: "grok-4-fast-non-reasoning",
+    provider: "xai",
+    author: "xai",
+    pricing: [
+      {
+        threshold: 0,
+        input: 0.0000002, // $0.20 per 1M tokens (up to 128K context)
+        output: 0.0000005, // $0.50 per 1M tokens (up to 128K context)
+        web_search: 0.025, // $25.00 per 1K sources
+        cacheMultipliers: {
+          cachedInput: 0.25, // $0.05 / $0.20 = 0.25
+        },
+      },
+      {
+        threshold: 128000, // Above 128K context window
+        input: 0.0000004, // $0.40 per 1M tokens (over 128K context)
+        output: 0.000001, // $1.00 per 1M tokens (over 128K context)
+        web_search: 0.025, // $25.00 per 1K sources
+        cacheMultipliers: {
+          cachedInput: 0.125, // $0.05 / $0.40 = 0.125
+        },
+      },
+    ],
+    contextLength: 2_000_000,
+    maxCompletionTokens: 2_000_000,
+    supportedParameters: [
+      "tools",
+      "tool_choice",
+      "structured_outputs",
+      "response_format",
+      "max_tokens",
+      "temperature",
+      "top_p",
+      "seed",
+      "logprobs",
+      "top_logprobs",
     ],
     ptbEnabled: true,
     endpointConfigs: {
diff --git a/packages/cost/models/authors/xai/models.ts b/packages/cost/models/authors/xai/models.ts
@@ -28,12 +28,23 @@ export const models = {
     author: "xai",
     description:
       "Grok 4 Fast is xAI's latest advancement in cost-efficient reasoning models. Built on xAI’s learnings from Grok 4, Grok 4 Fast delivers frontier-level performance across Enterprise and Consumer domains—with exceptional token efficiency. This model pushes the boundaries for smaller and faster AI, making high-quality reasoning accessible to more users and developers. Grok 4 Fast features state-of-the-art (SOTA) cost-efficiency, cutting-edge web and X search capabilities, a 2M token context window, and a unified architecture that blends reasoning and non-reasoning modes in one model.",
-    contextLength: 2_000_000,
-    maxOutputTokens: 30_000,
+    contextLength: 2000000,
+    maxOutputTokens: 2000000,
     created: "2025-09-01T00:00:00.000Z",
     modality: { inputs: ["text", "image"], outputs: ["text"] },
     tokenizer: "Grok",
   },
+  "grok-4-fast-non-reasoning": {
+    name: "xAI Grok 4 Fast Non-Reasoning",
+    author: "xai",
+    description:
+      "Grok 4 Fast is xAI's latest advancement in cost-efficient reasoning models. Built on xAI’s learnings from Grok 4, Grok 4 Fast delivers frontier-level performance across Enterprise and Consumer domains—with exceptional token efficiency. This model pushes the boundaries for smaller and faster AI, making high-quality reasoning accessible to more users and developers. Grok 4 Fast features state-of-the-art (SOTA) cost-efficiency, cutting-edge web and X search capabilities, a 2M token context window. This non-reasoning model specifically excludes reasoning capabilities.",
+    contextLength: 2000000,
+    maxOutputTokens: 2000000,
+    created: "2025-09-19T00:00:00.000Z",
+    modality: { inputs: ["text", "image", "audio"], outputs: ["text"] },
+    tokenizer: "Grok",
+  },
   "grok-3": {
     name: "xAI: Grok 3",
     author: "xai",
diff --git a/worker/test/ai-gateway/registry-xai.spec.ts b/worker/test/ai-gateway/registry-xai.spec.ts