Skip to content

Commit d61b5ce

Browse files
authored
integrate nebius llama 3.1-8b-turbo (#5023)
* integrate nebius llama 3.1-8b-fast * remove unneeded usage processor
1 parent cbe299a commit d61b5ce

File tree

10 files changed

+993
-5
lines changed

10 files changed

+993
-5
lines changed

packages/__tests__/cost/__snapshots__/registrySnapshots.test.ts.snap

Lines changed: 45 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,35 @@ exports[`Registry Snapshots endpoint configurations snapshot 1`] = `
14591459
"*",
14601460
],
14611461
},
1462+
"llama-3.1-8b-instruct-turbo:nebius": {
1463+
"context": 128000,
1464+
"crossRegion": false,
1465+
"maxTokens": 8192,
1466+
"modelId": "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
1467+
"parameters": [
1468+
"frequency_penalty",
1469+
"functions",
1470+
"logit_bias",
1471+
"logprobs",
1472+
"max_tokens",
1473+
"presence_penalty",
1474+
"response_format",
1475+
"seed",
1476+
"stop",
1477+
"structured_outputs",
1478+
"temperature",
1479+
"tool_choice",
1480+
"tools",
1481+
"top_k",
1482+
"top_logprobs",
1483+
"top_p",
1484+
],
1485+
"provider": "nebius",
1486+
"ptbEnabled": true,
1487+
"regions": [
1488+
"*",
1489+
],
1490+
},
14621491
"llama-3.1-8b-instruct:deepinfra": {
14631492
"context": 131072,
14641493
"crossRegion": false,
@@ -3366,6 +3395,7 @@ exports[`Registry Snapshots model coverage snapshot 1`] = `
33663395
"groq",
33673396
"groq",
33683397
"groq",
3398+
"nebius",
33693399
"novita",
33703400
"novita",
33713401
"novita",
@@ -4107,6 +4137,13 @@ exports[`Registry Snapshots pricing snapshot 1`] = `
41074137
"web_search": 0,
41084138
},
41094139
],
4140+
"nebius": [
4141+
{
4142+
"input": 3e-8,
4143+
"output": 9e-8,
4144+
"threshold": 0,
4145+
},
4146+
],
41104147
"novita": [
41114148
{
41124149
"input": 1.3e-7,
@@ -4688,6 +4725,7 @@ exports[`Registry Snapshots verify registry state 1`] = `
46884725
"model": "llama-3.1-8b-instruct-turbo",
46894726
"providers": [
46904727
"deepinfra",
4728+
"nebius",
46914729
],
46924730
},
46934731
{
@@ -4838,6 +4876,10 @@ exports[`Registry Snapshots verify registry state 1`] = `
48384876
"modelCount": 14,
48394877
"provider": "groq",
48404878
},
4879+
{
4880+
"modelCount": 1,
4881+
"provider": "nebius",
4882+
},
48414883
{
48424884
"modelCount": 11,
48434885
"provider": "novita",
@@ -4928,9 +4970,9 @@ exports[`Registry Snapshots verify registry state 1`] = `
49284970
"claude-3.5-haiku:openrouter:*",
49294971
],
49304972
"totalArchivedConfigs": 0,
4931-
"totalEndpoints": 137,
4932-
"totalModelProviderConfigs": 137,
4973+
"totalEndpoints": 138,
4974+
"totalModelProviderConfigs": 138,
49334975
"totalModelsWithPtb": 58,
4934-
"totalProviders": 12,
4976+
"totalProviders": 13,
49354977
}
49364978
`;

packages/cost/models/authors/meta/llama/endpoints.ts

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ export const endpoints = {
101101
"logit_bias",
102102
"functions",
103103
"tools",
104-
"tool_choice"
104+
"tool_choice",
105105
],
106106
ptbEnabled: true,
107107
endpointConfigs: {
@@ -529,6 +529,43 @@ export const endpoints = {
529529
"*": {},
530530
},
531531
},
532+
"llama-3.1-8b-instruct-turbo:nebius": {
533+
providerModelId: "meta-llama/Meta-Llama-3.1-8B-Instruct-fast",
534+
provider: "nebius",
535+
author: "meta-llama",
536+
pricing: [
537+
{
538+
threshold: 0,
539+
input: 0.00000003, // $0.03/1M tokens
540+
output: 0.00000009, // $0.09/1M tokens
541+
},
542+
],
543+
quantization: "fp8",
544+
contextLength: 128_000,
545+
maxCompletionTokens: 8_192,
546+
supportedParameters: [
547+
"structured_outputs",
548+
"response_format",
549+
"max_tokens",
550+
"temperature",
551+
"top_p",
552+
"stop",
553+
"frequency_penalty",
554+
"presence_penalty",
555+
"seed",
556+
"top_k",
557+
"logit_bias",
558+
"logprobs",
559+
"top_logprobs",
560+
"functions",
561+
"tool_choice",
562+
"tools",
563+
],
564+
ptbEnabled: true,
565+
endpointConfigs: {
566+
"*": {},
567+
},
568+
},
532569
"llama-3.1-8b-instruct:novita": {
533570
providerModelId: "meta-llama/llama-3.1-8b-instruct",
534571
provider: "novita",

packages/cost/models/provider-helpers.ts

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,8 @@ export function heliconeProviderToModelProviderName(
5151
return "deepinfra";
5252
case "NOVITA":
5353
return "novita";
54+
case "NEBIUS":
55+
return "nebius";
5456
// new registry does not have
5557
case "LOCAL":
5658
case "HELICONE":
@@ -66,7 +68,6 @@ export function heliconeProviderToModelProviderName(
6668
case "QSTASH":
6769
case "FIRECRAWL":
6870
case "AVIAN":
69-
case "NEBIUS":
7071
case "OPENPIPE":
7172
case "CHUTES":
7273
case "LLAMA":

packages/cost/models/providers/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import { GoogleProvider } from "./google";
1212
import { DeepInfraProvider } from "./deepinfra";
1313
import { OpenRouterProvider } from "./openrouter";
1414
import { NovitaProvider } from "./novita";
15+
import { NebiusProvider } from "./nebius";
1516

1617
// Create singleton instances (stateless, so safe to share)
1718
export const providers = {
@@ -29,6 +30,7 @@ export const providers = {
2930
"google-ai-studio": new GoogleProvider(),
3031
openrouter: new OpenRouterProvider(),
3132
novita: new NovitaProvider(),
33+
nebius: new NebiusProvider()
3234
} as const;
3335

3436
export type ModelProviderName = keyof typeof providers;
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { BaseProvider } from "./base";
2+
import type { Endpoint, RequestParams } from "../types";
3+
4+
export class NebiusProvider extends BaseProvider {
5+
readonly displayName = "Nebius";
6+
readonly baseUrl = "https://api.studio.nebius.com/v1/";
7+
readonly auth = "api-key" as const;
8+
readonly pricingPages = [
9+
"https://nebius.com/prices-ai-studio",
10+
"https://nebius.com/prices",
11+
];
12+
readonly modelPages = ["https://studio.nebius.com/"];
13+
14+
buildUrl(endpoint: Endpoint, requestParams: RequestParams): string {
15+
return `${this.baseUrl}chat/completions`;
16+
}
17+
}

packages/cost/usage/getUsageProcessor.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ export function getUsageProcessor(
1717
case "openai":
1818
case "azure":
1919
case "deepinfra":
20+
case "nebius":
2021
case "novita":
2122
return new OpenAIUsageProcessor();
2223
case "anthropic":

web/data/providers.ts

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -224,6 +224,16 @@ export const providers: Provider[] = [
224224
apiKeyPlaceholder: "...",
225225
relevanceScore: 10,
226226
},
227+
{
228+
id: "nebius",
229+
name: "Nebius",
230+
logoUrl: "/assets/home/providers/nebius.webp",
231+
description: "Configure your Nebius API keys",
232+
docsUrl: "https://docs.helicone.ai/getting-started/integration-methods",
233+
apiKeyLabel: "Nebius API Key",
234+
apiKeyPlaceholder: "...",
235+
relevanceScore: 5,
236+
},
227237
];
228238

229239
// Mock recently used providers - in a real app, this would come from user data
2.38 KB
Loading

0 commit comments

Comments
 (0)