diff --git a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap index a55441b574..0ad950ace1 100644 --- a/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap +++ b/packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap @@ -3128,14 +3128,38 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = ` "*", ], }, + "grok-4-fast-non-reasoning:xai": { + "context": 2000000, + "crossRegion": false, + "maxTokens": 2000000, + "modelId": "grok-4-fast-non-reasoning", + "parameters": [ + "logprobs", + "max_tokens", + "response_format", + "seed", + "structured_outputs", + "temperature", + "tool_choice", + "tools", + "top_logprobs", + "top_p", + ], + "provider": "xai", + "ptbEnabled": true, + "regions": [ + "*", + ], + }, "grok-4-fast-reasoning:xai": { "context": 2000000, "crossRegion": false, - "maxTokens": 30000, + "maxTokens": 2000000, "modelId": "grok-4-fast", "parameters": [ "logprobs", "max_tokens", + "reasoning", "response_format", "seed", "structured_outputs", @@ -3483,6 +3507,7 @@ exports[`Registry Snapshots model coverage snapshot 1`] = ` "xai", "xai", "xai", + "xai", ], "zai/glm-4": [ "novita", @@ -4673,6 +4698,12 @@ exports[`Registry Snapshots verify registry state 1`] = ` "xai", ], }, + { + "model": "grok-4-fast-non-reasoning", + "providers": [ + "xai", + ], + }, { "model": "grok-4-fast-reasoning", "providers": [ @@ -4897,7 +4928,7 @@ exports[`Registry Snapshots verify registry state 1`] = ` "provider": "vertex", }, { - "modelCount": 5, + "modelCount": 6, "provider": "xai", }, ], @@ -4935,6 +4966,7 @@ exports[`Registry Snapshots verify registry state 1`] = ` "grok-3", "grok-3-mini", "grok-4", + "grok-4-fast-non-reasoning", "grok-4-fast-reasoning", "grok-code-fast-1", "kimi-k2", @@ -4970,9 +5002,9 @@ exports[`Registry Snapshots verify registry state 1`] = ` "claude-3.5-haiku:openrouter:*", ], "totalArchivedConfigs": 0, - "totalEndpoints": 138, - "totalModelProviderConfigs": 138, - "totalModelsWithPtb": 58, + "totalEndpoints": 139, + "totalModelProviderConfigs": 139, + "totalModelsWithPtb": 59, "totalProviders": 13, } `; diff --git a/packages/cost/models/authors/xai/endpoints.ts b/packages/cost/models/authors/xai/endpoints.ts index b9a832203a..4a343729ec 100644 --- a/packages/cost/models/authors/xai/endpoints.ts +++ b/packages/cost/models/authors/xai/endpoints.ts @@ -120,7 +120,7 @@ export const endpoints = { }, ], contextLength: 2_000_000, - maxCompletionTokens: 30_000, + maxCompletionTokens: 2_000_000, supportedParameters: [ "tools", "tool_choice", @@ -131,7 +131,51 @@ export const endpoints = { "top_p", "seed", "logprobs", - "top_logprobs" + "top_logprobs", + "reasoning", + ], + ptbEnabled: true, + endpointConfigs: { + "*": {}, + }, + }, + "grok-4-fast-non-reasoning:xai": { + providerModelId: "grok-4-fast-non-reasoning", + provider: "xai", + author: "xai", + pricing: [ + { + threshold: 0, + input: 0.0000002, // $0.20 per 1M tokens (up to 128K context) + output: 0.0000005, // $0.50 per 1M tokens (up to 128K context) + web_search: 0.025, // $25.00 per 1K sources + cacheMultipliers: { + cachedInput: 0.25, // $0.05 / $0.20 = 0.25 + }, + }, + { + threshold: 128000, // Above 128K context window + input: 0.0000004, // $0.40 per 1M tokens (over 128K context) + output: 0.000001, // $1.00 per 1M tokens (over 128K context) + web_search: 0.025, // $25.00 per 1K sources + cacheMultipliers: { + cachedInput: 0.125, // $0.05 / $0.40 = 0.125 + }, + }, + ], + contextLength: 2_000_000, + maxCompletionTokens: 2_000_000, + supportedParameters: [ + "tools", + "tool_choice", + "structured_outputs", + "response_format", + "max_tokens", + "temperature", + "top_p", + "seed", + "logprobs", + "top_logprobs", ], ptbEnabled: true, endpointConfigs: { diff --git a/packages/cost/models/authors/xai/models.ts b/packages/cost/models/authors/xai/models.ts index f7024b988f..3d2a4530b5 100644 --- a/packages/cost/models/authors/xai/models.ts +++ b/packages/cost/models/authors/xai/models.ts @@ -28,12 +28,23 @@ export const models = { author: "xai", description: "Grok 4 Fast is xAI's latest advancement in cost-efficient reasoning models. Built on xAI’s learnings from Grok 4, Grok 4 Fast delivers frontier-level performance across Enterprise and Consumer domains—with exceptional token efficiency. This model pushes the boundaries for smaller and faster AI, making high-quality reasoning accessible to more users and developers. Grok 4 Fast features state-of-the-art (SOTA) cost-efficiency, cutting-edge web and X search capabilities, a 2M token context window, and a unified architecture that blends reasoning and non-reasoning modes in one model.", - contextLength: 2_000_000, - maxOutputTokens: 30_000, + contextLength: 2000000, + maxOutputTokens: 2000000, created: "2025-09-01T00:00:00.000Z", modality: { inputs: ["text", "image"], outputs: ["text"] }, tokenizer: "Grok", }, + "grok-4-fast-non-reasoning": { + name: "xAI Grok 4 Fast Non-Reasoning", + author: "xai", + description: + "Grok 4 Fast is xAI's latest advancement in cost-efficient reasoning models. Built on xAI’s learnings from Grok 4, Grok 4 Fast delivers frontier-level performance across Enterprise and Consumer domains—with exceptional token efficiency. This model pushes the boundaries for smaller and faster AI, making high-quality reasoning accessible to more users and developers. Grok 4 Fast features state-of-the-art (SOTA) cost-efficiency, cutting-edge web and X search capabilities, a 2M token context window. This non-reasoning model specifically excludes reasoning capabilities.", + contextLength: 2000000, + maxOutputTokens: 2000000, + created: "2025-09-19T00:00:00.000Z", + modality: { inputs: ["text", "image", "audio"], outputs: ["text"] }, + tokenizer: "Grok", + }, "grok-3": { name: "xAI: Grok 3", author: "xai", diff --git a/worker/test/ai-gateway/registry-xai.spec.ts b/worker/test/ai-gateway/registry-xai.spec.ts index 8efeb56a63..9132b3b5c1 100644 --- a/worker/test/ai-gateway/registry-xai.spec.ts +++ b/worker/test/ai-gateway/registry-xai.spec.ts @@ -281,6 +281,204 @@ describe("xAI Registry Tests", () => { })); }); + describe("grok-4-fast-non-reasoning", () => { + it("should handle xai provider", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: xaiAuthExpectations, + }, + ], + finalStatus: 200, + }, + })); + + it("should auto-select xai provider when none specified", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: xaiAuthExpectations, + }, + ], + finalStatus: 200, + }, + })); + + it("should handle tool calls with xai provider", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + request: { + body: { + messages: [{ role: "user", content: "What's the weather?" }], + tools: [ + { + type: "function", + function: { + name: "get_weather", + description: "Get current weather", + parameters: { + type: "object", + properties: { + location: { type: "string" } + }, + required: ["location"] + } + } + } + ], + tool_choice: "auto", + temperature: 0.7, + max_tokens: 1000 + } + }, + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: { + ...xaiAuthExpectations, + bodyContains: [ + "tools", + "tool_choice", + "get_weather", + "temperature", + "max_tokens" + ] + } + } + ], + finalStatus: 200 + } + })); + + it("should handle response format with xai provider", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + request: { + body: { + messages: [{ role: "user", content: "Generate JSON data" }], + response_format: { type: "json_object" }, + temperature: 0.1, + top_p: 0.9 + } + }, + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: { + ...xaiAuthExpectations, + bodyContains: [ + "response_format", + "json_object", + "temperature", + "top_p" + ] + } + } + ], + finalStatus: 200 + } + })); + + it("should handle all supported parameters", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + request: { + body: { + messages: [ + { role: "user", content: "Test comprehensive parameters" } + ], + max_tokens: 1000, + temperature: 0.8, + top_p: 0.95, + seed: 12345, + logprobs: true, + top_logprobs: 5 + } + }, + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: { + ...xaiAuthExpectations, + bodyContains: [ + "max_tokens", + "temperature", + "top_p", + "seed", + "logprobs", + "top_logprobs" + ] + } + } + ], + finalStatus: 200 + } + })); + + it("should handle structured outputs", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + request: { + body: { + messages: [ + { role: "user", content: "Generate structured data" } + ], + response_format: { + type: "json_schema", + json_schema: { + name: "user_data", + schema: { + type: "object", + properties: { + name: { type: "string" }, + age: { type: "number" } + }, + required: ["name", "age"] + } + } + } + } + }, + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "success", + model: "grok-4-fast-non-reasoning", + expects: { + ...xaiAuthExpectations, + bodyContains: [ + "response_format", + "json_schema", + "user_data" + ] + } + } + ], + finalStatus: 200 + } + })); + }); + describe("grok-3", () => { it("should handle xai provider", () => runGatewayTest({ @@ -497,4 +695,102 @@ describe("xAI Registry Tests", () => { } })); }); + + describe("Error scenarios - grok-4-fast-non-reasoning", () => { + it("should handle xAI provider failure for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 500, + errorMessage: "xAI service unavailable" + } + ], + finalStatus: 500 + } + })); + + it("should handle rate limiting from xAI for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 429, + errorMessage: "Rate limit exceeded" + } + ], + finalStatus: 429 + } + })); + + it("should handle authentication failure for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 401, + errorMessage: "Invalid API key" + } + ], + finalStatus: 401 + } + })); + + it("should handle model not found error for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 404, + errorMessage: "Model not found" + } + ], + finalStatus: 500 + } + })); + + it("should handle timeout for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 408, + errorMessage: "Request timeout" + } + ], + finalStatus: 500 + } + })); + + it("should handle bad request error for grok-4-fast-non-reasoning", () => + runGatewayTest({ + model: "grok-4-fast-non-reasoning/xai", + expected: { + providers: [ + { + url: "https://api.x.ai/v1/chat/completions", + response: "failure", + statusCode: 400, + errorMessage: "Bad request" + } + ], + finalStatus: 500 + } + })); + }); });