Skip to content

Commit ee4af94

Browse files
authored
Merge pull request #62 from donvito/feature/baseten-provider
Feature/baseten provider
2 parents 2197015 + 070a6ac commit ee4af94

File tree

7 files changed

+139
-3
lines changed

7 files changed

+139
-3
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ More to come...check swagger docs for updated endpoints.
5050
| [Vercel AI Gateway](https://vercel.com/ai-gateway) | Open source and private models | Available |
5151
| [LlamaCpp](https://github.com/ggml-org/llama.cpp) | Local models via llama.cpp server (self-hosted) | Available |
5252
| [Google Gemini](https://ai.google.dev/) | Gemini models via OpenAI-compatible interface | Available |
53+
| [Baseten](https://baseten.co/) | Cloud-hosted ML models with OpenAI-compatible API | Available |
5354

5455

5556
## Run the project
@@ -169,6 +170,10 @@ LMSTUDIO_BASE_URL=http://localhost:1234
169170
170171
# OpenRouter Configuration
171172
OPENROUTER_API_KEY=your-openrouter-api-key
173+
174+
# Baseten Configuration
175+
BASETEN_API_KEY=your-baseten-api-key
176+
BASETEN_BASE_URL=https://inference.baseten.co/v1
172177
```
173178

174179
### Google Gemini Setup

src/config/models.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,12 @@
7979
"models": [
8080
{ "name": "default", "capabilities": ["summarize", "pdf-summarizer", "rewrite", "compose", "planning", "keywords", "sentiment", "askText", "emailReply", "translate", "outline"], "notes": "Default model used is what you configued when running llama-server" }
8181
]
82+
},
83+
"baseten": {
84+
"enabled": true,
85+
"models": [
86+
{ "name": "openai/gpt-oss-120b", "capabilities": ["summarize", "pdf-summarizer", "pdf-translate", "rewrite", "compose", "planning", "keywords", "sentiment", "askText", "emailReply", "translate", "meetingNotes", "outline"], "notes": "Baseten hosted OpenAI GPT OSS 120B model with comprehensive capabilities." }
87+
]
8288
}
8389
}
8490
}

src/config/services.ts

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,14 @@ export interface GoogleConfig extends ServiceConfig {
5656
model: string;
5757
}
5858

59+
export interface BasetenConfig extends ServiceConfig {
60+
apiKey: string;
61+
baseURL: string;
62+
model: string;
63+
chatModel: string;
64+
timeout?: number;
65+
}
66+
5967
// OpenAI Configuration
6068
export const openaiConfig: OpenAIConfig = {
6169
name: 'OpenAI',
@@ -137,8 +145,20 @@ export const googleConfig: GoogleConfig = {
137145
model: process.env.GEMINI_MODEL || 'gemini-2.5-flash-lite',
138146
};
139147

148+
// Baseten Configuration
149+
export const basetenConfig: BasetenConfig = {
150+
name: 'Baseten',
151+
enabled: !!process.env.BASETEN_API_KEY,
152+
priority: 9,
153+
apiKey: process.env.BASETEN_API_KEY || '',
154+
baseURL: process.env.BASETEN_BASE_URL || 'https://inference.baseten.co/v1',
155+
model: process.env.BASETEN_MODEL || 'default',
156+
chatModel: process.env.BASETEN_CHAT_MODEL || process.env.BASETEN_MODEL || 'openai/gpt-oss-120b',
157+
timeout: parseInt(process.env.BASETEN_TIMEOUT || '30000'),
158+
};
159+
140160
// Available services
141-
export const availableServices = [openaiConfig, anthropicConfig, ollamaConfig, openrouterConfig, lmstudioConfig, aigatewayConfig, llamacppConfig, googleConfig];
161+
export const availableServices = [openaiConfig, anthropicConfig, ollamaConfig, openrouterConfig, lmstudioConfig, aigatewayConfig, llamacppConfig, googleConfig, basetenConfig];
142162

143163
// Get the primary service (highest priority enabled service)
144164
export function getPrimaryService(): ServiceConfig | null {

src/services/baseten.ts

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import { z } from 'zod';
2+
import { createOpenAICompatible } from '@ai-sdk/openai-compatible';
3+
import { generateText, streamText, generateObject } from 'ai';
4+
import type { AIProvider } from './interfaces';
5+
import { basetenConfig } from '../config/services';
6+
7+
const normalizedBase = (basetenConfig.baseURL || 'https://inference.baseten.co/v1').replace(/\/$/, '');
8+
const BASETEN_BASE_URL = normalizedBase;
9+
10+
const baseten = createOpenAICompatible({
11+
name: 'baseten',
12+
baseURL: `${BASETEN_BASE_URL}`,
13+
headers: {
14+
'Authorization': `Api-Key ${basetenConfig.apiKey}`,
15+
},
16+
});
17+
18+
class BasetenProvider implements AIProvider {
19+
name = 'baseten' as const;
20+
21+
async generateChatStructuredResponse(
22+
prompt: string,
23+
schema: z.ZodType,
24+
model: string = basetenConfig.chatModel,
25+
temperature: number = 0
26+
): Promise<any> {
27+
try {
28+
const modelToUse = model || basetenConfig.chatModel;
29+
30+
const result = await generateObject({
31+
model: baseten(modelToUse),
32+
schema,
33+
prompt,
34+
temperature,
35+
});
36+
37+
return {
38+
object: result.object,
39+
finishReason: result.finishReason,
40+
usage: {
41+
promptTokens: result.usage?.promptTokens || 0,
42+
completionTokens: result.usage?.completionTokens || 0,
43+
totalTokens: result.usage?.totalTokens || 0,
44+
},
45+
warnings: result.warnings,
46+
};
47+
} catch (error) {
48+
throw new Error(`Baseten structured response error: ${error}`);
49+
}
50+
}
51+
52+
53+
async generateChatTextResponse(
54+
prompt: string,
55+
model?: string,
56+
temperature: number = 0
57+
): Promise<any> {
58+
try {
59+
const modelToUse = baseten(model || basetenConfig.model);
60+
61+
const result = await generateText({
62+
model: modelToUse,
63+
prompt,
64+
temperature,
65+
});
66+
67+
return result;
68+
} catch (error) {
69+
console.error('Baseten text response error: ', error);
70+
throw new Error(`Baseten text response error: ${error}`);
71+
}
72+
}
73+
74+
async generateChatTextStreamResponse(
75+
prompt: string,
76+
model?: string,
77+
temperature: number = 0
78+
): Promise<any> {
79+
try {
80+
const modelToUse = baseten(model || basetenConfig.model);
81+
82+
const result = await streamText({
83+
model: modelToUse,
84+
prompt,
85+
temperature,
86+
});
87+
88+
return result;
89+
} catch (error) {
90+
console.error('Baseten streaming response error: ', error);
91+
throw new Error(`Baseten streaming response error: ${error}`);
92+
}
93+
}
94+
95+
async getAvailableModels(): Promise<string[]> {
96+
return ['openai/gpt-oss-120b'];
97+
}
98+
}
99+
100+
const provider = new BasetenProvider();
101+
102+
export default provider;
103+
export { BASETEN_BASE_URL };

src/services/interfaces.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import { z } from 'zod';
22

3-
export type ProviderName = 'openai' | 'anthropic' | 'ollama' | 'openrouter' | 'lmstudio' | 'aigateway' | 'llamacpp' | 'google';
3+
export type ProviderName = 'openai' | 'anthropic' | 'ollama' | 'openrouter' | 'lmstudio' | 'aigateway' | 'llamacpp' | 'google' | 'baseten';
44

55
export interface AIProvider {
66
name: ProviderName;

src/services/providers.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
import { z } from 'zod';
22

3-
export const providersSupported = z.enum(['ollama', 'openai', 'anthropic', 'openrouter', 'lmstudio', 'aigateway', 'llamacpp', 'google']);
3+
export const providersSupported = z.enum(['ollama', 'openai', 'anthropic', 'openrouter', 'lmstudio', 'aigateway', 'llamacpp', 'google', 'baseten']);

src/services/registry.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import lmstudioProvider from './lmstudio';
88
import aigatewayProvider from './aigateway';
99
import llamacppProvider from './llamacpp';
1010
import geminiProvider from './google';
11+
import basetenProvider from './baseten';
1112

1213
export class ServiceRegistry {
1314
private providers = new Map<ProviderName, AIProvider>();
@@ -44,6 +45,7 @@ serviceRegistry.register(lmstudioProvider);
4445
serviceRegistry.register(aigatewayProvider);
4546
serviceRegistry.register(llamacppProvider);
4647
serviceRegistry.register(geminiProvider);
48+
serviceRegistry.register(basetenProvider);
4749

4850
// Helper for tests to replace the registry content
4951
export function replaceRegistryForTests(registry: ServiceRegistry) {

0 commit comments

Comments
 (0)