Skip to content

Commit 221993b

Browse files
committed
fix: open router cache token metering
1 parent bb90595 commit 221993b

File tree

6 files changed

+308
-338
lines changed

6 files changed

+308
-338
lines changed

src/backend/src/modules/puterai/ClaudeService.js

Lines changed: 20 additions & 74 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ const Messages = require('./lib/Messages');
2525
const FSNodeParam = require('../../api/filesystem/FSNodeParam');
2626
const { LLRead } = require('../../filesystem/ll_operations/ll_read');
2727
const { Context } = require('../../util/context');
28+
const mime = require('mime-types');
2829

2930
/**
3031
* ClaudeService class extends BaseService to provide integration with Anthropic's Claude AI models.
@@ -34,10 +35,6 @@ const { Context } = require('../../util/context');
3435
* @extends BaseService
3536
*/
3637
class ClaudeService extends BaseService {
37-
static MODULES = {
38-
Anthropic: require('@anthropic-ai/sdk'),
39-
};
40-
4138
/**
4239
* @type {import('@anthropic-ai/sdk').Anthropic}
4340
*/
@@ -53,7 +50,7 @@ class ClaudeService extends BaseService {
5350
/** @type {import('../../services/MeteringService/MeteringService').MeteringService} */
5451
#meteringService;
5552

56-
async _init() {
53+
async _init () {
5754
this.anthropic = new Anthropic({
5855
apiKey: this.config.apiKey,
5956
// 10 minutes is the default; we need to override the timeout to
@@ -75,7 +72,7 @@ class ClaudeService extends BaseService {
7572
* Returns the default model identifier for Claude API interactions
7673
* @returns {string} The default model ID 'claude-3-5-sonnet-latest'
7774
*/
78-
get_default_model() {
75+
get_default_model () {
7976
return 'claude-3-5-sonnet-latest';
8077
}
8178

@@ -87,7 +84,7 @@ class ClaudeService extends BaseService {
8784
*
8885
* @returns Promise<Array<Object>> Array of model details
8986
*/
90-
async models() {
87+
async models () {
9188
return this.models_();
9289
},
9390

@@ -97,7 +94,7 @@ class ClaudeService extends BaseService {
9794
* @description Retrieves all available model IDs and their aliases,
9895
* flattening them into a single array of strings that can be used for model selection
9996
*/
100-
async list() {
97+
async list () {
10198
const models = this.models_();
10299
const model_names = [];
103100
for ( const model of models ) {
@@ -118,10 +115,10 @@ class ClaudeService extends BaseService {
118115
* @returns {Object} Returns either a TypedValue with streaming response or a completion object
119116
* @this {ClaudeService}
120117
*/
121-
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
118+
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
122119
tools = FunctionCalling.make_claude_tools(tools);
123120
// console.log("here are the messages: ", messages)
124-
121+
125122
let system_prompts;
126123
// unsure why system_prompts is an array but it always seems to only have exactly one element,
127124
// and the real array of system_prompts seems to be the [0].content -- NS
@@ -140,32 +137,32 @@ class ClaudeService extends BaseService {
140137
}
141138

142139
messages = messages.map(message => {
143-
if (message.cache_control) {
140+
if ( message.cache_control ) {
144141
message.content[0].cache_control = message.cache_control;
145142
}
146143
delete message.cache_control;
147-
return message
148-
})
144+
return message;
145+
});
149146

150147
const sdk_params = {
151148
model: model ?? this.get_default_model(),
152149
max_tokens: Math.floor(max_tokens) ||
153150
((
154151
model === 'claude-3-5-sonnet-20241022'
155152
|| model === 'claude-3-5-sonnet-20240620'
156-
) ? 8192 : this.models_().filter(e=>e.name === model)[0].max_tokens || 4096), //required
153+
) ? 8192 : this.models_().filter(e => e.name === model)[0].max_tokens || 4096), //required
157154
temperature: temperature || 0, // required
158155
...( (system_prompts && system_prompts[0]?.content) ? {
159-
system: system_prompts[0]?.content
156+
system: system_prompts[0]?.content,
160157
} : {}),
161158
tool_choice: {
162-
type: "auto",
163-
disable_parallel_tool_use: true
159+
type: 'auto',
160+
disable_parallel_tool_use: true,
164161
},
165162
messages,
166163
...(tools ? { tools } : {}),
167164
};
168-
console.log(sdk_params.max_tokens)
165+
console.log(sdk_params.max_tokens);
169166

170167
// console.log('\x1B[26;1m ===== SDK PARAMETERS', require('util').inspect(sdk_params, undefined, Infinity));
171168

@@ -201,8 +198,6 @@ class ClaudeService extends BaseService {
201198
fsNode: task.node,
202199
});
203200

204-
const require = this.require;
205-
const mime = require('mime-types');
206201
const mimeType = mime.contentType(await task.node.get('name'));
207202

208203
beta_mode = true;
@@ -228,12 +223,6 @@ class ClaudeService extends BaseService {
228223
return 'container_upload';
229224
})();
230225

231-
// {
232-
// 'application/pdf': 'document',
233-
// 'text/plain': 'document',
234-
// 'image/': 'image'
235-
// }[mimeType];
236-
237226
delete task.contentPart.puter_path,
238227
task.contentPart.type = contentBlockTypeForFileBasedOnMime;
239228
task.contentPart.source = {
@@ -251,7 +240,7 @@ class ClaudeService extends BaseService {
251240
try {
252241
await this.anthropic.beta.files.delete(task.file_id,
253242
{ betas: ['files-api-2025-04-14'] });
254-
} catch (e) {
243+
} catch (e) {
255244
this.errors.report('claude:file-delete-task', {
256245
source: e,
257246
trace: true,
@@ -328,26 +317,7 @@ class ClaudeService extends BaseService {
328317
}
329318
chatStream.end();
330319

331-
this.billForUsage(actor, model || this.get_default_model(), usageSum);
332-
333-
// Log token usage statistics
334-
const totalTokens = usageSum.input_tokens + usageSum.output_tokens;
335-
const cachedTokens = usageSum.ephemeral_5m_input_tokens + usageSum.ephemeral_1h_input_tokens;
336-
const cacheHits = usageSum.cache_read_input_tokens;
337-
const uncachedTokens = usageSum.input_tokens - cacheHits - cachedTokens;
338-
339-
// console.log(`
340-
// ╔══════════════════════════════════════════════════════════════╗
341-
// ║ 🎯 Token Usage Statistics 🎯 ║
342-
// ╠══════════════════════════════════════════════════════════════╣
343-
// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║
344-
// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║
345-
// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║
346-
// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║
347-
// ║ 📥 Input Tokens: ${String(usageSum.input_tokens).padStart(10)} 📥 ║
348-
// ║ 📤 Output Tokens: ${String(usageSum.output_tokens).padStart(10)} 📤 ║
349-
// ╚══════════════════════════════════════════════════════════════╝
350-
// `);
320+
this.#meteringService.utilRecordUsageObject(usageSum, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
351321
};
352322

353323
return {
@@ -361,26 +331,7 @@ class ClaudeService extends BaseService {
361331
await cleanup_files();
362332

363333
const usage = this.usageFormatterUtil(msg.usage);
364-
this.billForUsage(actor, model || this.get_default_model(), usage);
365-
366-
// Log token usage statistics
367-
const totalTokens = usage.input_tokens + usage.output_tokens;
368-
const cachedTokens = usage.ephemeral_5m_input_tokens + usage.ephemeral_1h_input_tokens;
369-
const cacheHits = usage.cache_read_input_tokens;
370-
const uncachedTokens = usage.input_tokens - cacheHits - cachedTokens;
371-
372-
// console.log(`
373-
// ╔══════════════════════════════════════════════════════════════╗
374-
// ║ 🎯 Token Usage Statistics 🎯 ║
375-
// ╠══════════════════════════════════════════════════════════════╣
376-
// ║ 📊 Total Tokens Used: ${String(totalTokens).padStart(10)} 📊 ║
377-
// ║ 💾 Cached Tokens: ${String(cachedTokens).padStart(10)} 💾 ║
378-
// ║ ✅ Cache Hits: ${String(cacheHits).padStart(10)} ✅ ║
379-
// ║ 🔄 Uncached Tokens: ${String(uncachedTokens).padStart(10)} 🔄 ║
380-
// ║ 📥 Input Tokens: ${String(usage.input_tokens).padStart(10)} 📥 ║
381-
// ║ 📤 Output Tokens: ${String(usage.output_tokens).padStart(10)} 📤 ║
382-
// ╚══════════════════════════════════════════════════════════════╝
383-
// `);
334+
this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model || this.get_default_model())).id}`);
384335

385336
// TODO DS: cleanup old usage tracking
386337
return {
@@ -394,7 +345,7 @@ class ClaudeService extends BaseService {
394345

395346
// TODO DS: get this inside the class as a private method once the methods aren't exported directly
396347
/** @type {(usage: import("@anthropic-ai/sdk/resources/messages.js").Usage | import("@anthropic-ai/sdk/resources/beta/messages/messages.js").BetaUsage) => {}}) */
397-
usageFormatterUtil(usage) {
348+
usageFormatterUtil (usage) {
398349
return {
399350
input_tokens: usage?.input_tokens || 0,
400351
ephemeral_5m_input_tokens: usage?.cache_creation?.ephemeral_5m_input_tokens || usage.cache_creation_input_tokens || 0, // this is because they're api is a bit inconsistent
@@ -404,11 +355,6 @@ class ClaudeService extends BaseService {
404355
};
405356
};
406357

407-
// TODO DS: get this inside the class as a private method once the methods aren't exported directly
408-
billForUsage(actor, model, usage) {
409-
this.#meteringService.utilRecordUsageObject(usage, actor, `claude:${this.models_().find(m => [m.id, ...(m.aliases || [])].includes(model)).id}`);
410-
};
411-
412358
/**
413359
* Retrieves available Claude AI models and their specifications
414360
* @returns Array of model objects containing:
@@ -421,7 +367,7 @@ class ClaudeService extends BaseService {
421367
* - max_output: Maximum output tokens
422368
* - training_cutoff: Training data cutoff date
423369
*/
424-
models_() {
370+
models_ () {
425371
return [
426372
{
427373
id: 'claude-haiku-4-5-20251001',

src/backend/src/modules/puterai/OllamaService.js

Lines changed: 27 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
*/
1919

2020
// METADATA // {"ai-commented":{"service":"claude"}}
21-
const APIError = require('../../api/APIError');
2221
const BaseService = require('../../services/BaseService');
2322
const OpenAIUtil = require('./lib/OpenAIUtil');
2423
const { Context } = require('../../util/context');
@@ -42,7 +41,7 @@ class OllamaService extends BaseService {
4241
* Gets the system prompt used for AI interactions
4342
* @returns {string} The base system prompt that identifies the AI as running on Puter
4443
*/
45-
adapt_model(model) {
44+
adapt_model (model) {
4645
return model;
4746
}
4847

@@ -51,14 +50,14 @@ class OllamaService extends BaseService {
5150
* @private
5251
* @returns {Promise<void>} Resolves when initialization is complete
5352
*/
54-
async _init() {
53+
async _init () {
5554
// Ollama typically runs on HTTP, not HTTPS
5655
this.api_base_url = this.config?.api_base_url || 'http://localhost:11434';
5756

5857
// OpenAI SDK is used to interact with the Ollama API
5958
this.openai = new this.modules.openai.OpenAI({
60-
apiKey: "ollama", // Ollama doesn't use an API key, it uses the "ollama" string
61-
baseURL: this.api_base_url + '/v1',
59+
apiKey: 'ollama', // Ollama doesn't use an API key, it uses the "ollama" string
60+
baseURL: `${this.api_base_url }/v1`,
6261
});
6362
this.kvkey = this.modules.uuidv4();
6463

@@ -74,7 +73,7 @@ class OllamaService extends BaseService {
7473
* Returns the default model identifier for the Ollama service
7574
* @returns {string} The default model ID 'gpt-oss:20b'
7675
*/
77-
get_default_model() {
76+
get_default_model () {
7877
return 'gpt-oss:20b';
7978
}
8079

@@ -86,7 +85,7 @@ class OllamaService extends BaseService {
8685
*
8786
* @returns Promise<Array<Object>> Array of model details
8887
*/
89-
async models() {
88+
async models () {
9089
return await this.models_();
9190
},
9291
/**
@@ -95,7 +94,7 @@ class OllamaService extends BaseService {
9594
* @description Retrieves all available model IDs and their aliases,
9695
* flattening them into a single array of strings that can be used for model selection
9796
*/
98-
async list() {
97+
async list () {
9998
const models = await this.models_();
10099
const model_names = [];
101100
for ( const model of models ) {
@@ -108,7 +107,7 @@ class OllamaService extends BaseService {
108107
* AI Chat completion method.
109108
* See AIChatService for more details.
110109
*/
111-
async complete({ messages, stream, model, tools, max_tokens, temperature }) {
110+
async complete ({ messages, stream, model, tools, max_tokens, temperature }) {
112111
model = this.adapt_model(model);
113112

114113
if ( model.startsWith('ollama:') ) {
@@ -128,19 +127,23 @@ class OllamaService extends BaseService {
128127
...(stream ? {
129128
stream_options: { include_usage: true },
130129
} : {}),
131-
}
130+
};
132131

133132
const completion = await this.openai.chat.completions.create(sdk_params);
134133

135-
const modelDetails = (await this.models_()).find(m => m.id === 'ollama:' + model);
134+
const modelDetails = (await this.models_()).find(m => m.id === `ollama:${model}`);
136135
return OpenAIUtil.handle_completion_output({
137136
usage_calculator: ({ usage }) => {
138-
// custom open router logic because its free
137+
139138
const trackedUsage = {
140-
prompt: 0,
141-
completion: 0,
142-
input_cache_read: 0,
139+
prompt: (usage.prompt_tokens ?? 0 ) - (usage.prompt_tokens_details?.cached_tokens ?? 0),
140+
completion: usage.completion_tokens ?? 0,
141+
input_cache_read: usage.prompt_tokens_details?.cached_tokens ?? 0,
143142
};
143+
const costOverwrites = Object.fromEntries(Object.keys(trackedUsage).map((k) => {
144+
return [k, 0]; // override to 0 since local is free
145+
}));
146+
this.meteringService.utilRecordUsageObject(trackedUsage, actor, modelDetails.id, { costOverwrites });
144147
const legacyCostCalculator = OpenAIUtil.create_usage_calculator({
145148
model_details: modelDetails,
146149
});
@@ -162,31 +165,31 @@ class OllamaService extends BaseService {
162165
* - cost: Pricing information object with currency and rates
163166
* @private
164167
*/
165-
async models_(rawPriceKeys = false) {
168+
async models_ (_rawPriceKeys = false) {
166169
const axios = this.require('axios');
167170

168171
let models = this.modules.kv.get(`${this.kvkey}:models`);
169-
if ( !models ) {
172+
if ( ! models ) {
170173
try {
171174
const resp = await axios.request({
172175
method: 'GET',
173-
url: this.api_base_url + '/api/tags',
176+
url: `${this.api_base_url }/api/tags`,
174177
});
175178
models = resp.data.models || [];
176179
if ( models.length > 0 ) {
177180
this.modules.kv.set(`${this.kvkey}:models`, models);
178181
}
179-
} catch (error) {
182+
} catch ( error ) {
180183
this.log.error('Failed to fetch models from Ollama:', error.message);
181184
// Return empty array if Ollama is not available
182185
return [];
183186
}
184187
}
185-
188+
186189
if ( !models || models.length === 0 ) {
187190
return [];
188191
}
189-
192+
190193
const coerced_models = [];
191194
for ( const model of models ) {
192195
// Ollama API returns models with 'name' property, not 'model'
@@ -196,8 +199,8 @@ class OllamaService extends BaseService {
196199
output: 0,
197200
};
198201
coerced_models.push({
199-
id: 'ollama:' + modelName,
200-
name: modelName + ' (Ollama)',
202+
id: `ollama:${ modelName}`,
203+
name: `${modelName } (Ollama)`,
201204
max_tokens: model.size || model.max_context || 8192,
202205
cost: {
203206
currency: 'usd-cents',
@@ -206,7 +209,7 @@ class OllamaService extends BaseService {
206209
},
207210
});
208211
}
209-
console.log("coerced_models", coerced_models);
212+
console.log('coerced_models', coerced_models);
210213
return coerced_models;
211214
}
212215
}

0 commit comments

Comments
 (0)