Skip to content

Commit 04ab83d

Browse files
authored
feat: 1hr cache creation cost and pricing mode (#1017)
1 parent 5319149 commit 04ab83d

36 files changed

Lines changed: 133 additions & 115 deletions

.github/scripts/autogen/types.ts

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ export interface components {
1717
Cost: {
1818
cache_creation_input_audio_token_cost?: number;
1919
cache_creation_input_token_cost?: number;
20+
cache_creation_input_token_cost_per_hour?: number;
2021
cache_read_input_audio_token_cost?: number;
2122
cache_read_input_token_cost?: number;
22-
cache_storage_cost_per_token_per_hour?: number;
2323
input_cost_per_annotated_page?: number;
2424
input_cost_per_audio_token?: number;
2525
input_cost_per_character?: number;
@@ -136,6 +136,14 @@ export interface components {
136136
ModelParamKey: "json_schema" | "max_completion_tokens" | "max_tokens" | "min_tokens" | "n" | "parallel_tool_calls" | "reasoning" | "reasoning_effort" | "response_format" | "seed" | "stop" | "stream" | "temperature" | "thinking" | "tool_choice" | "top_k" | "top_p" | "verbosity";
137137
/** @enum {string} */
138138
ModelParamType: "array-of-strings" | "boolean" | "json" | "number" | "string";
139+
/**
140+
* @description How the model prices long context tokens
141+
* marginal: remaining tokens after long context are priced under long context pricing
142+
* cumulative: all input tokens are priced under long context pricing
143+
* @default marginal
144+
* @enum {string}
145+
*/
146+
PricingMode: "marginal" | "cumulative";
139147
PricingTier: {
140148
cost_per_token: number;
141149
from: number;
@@ -155,6 +163,7 @@ export interface components {
155163
cache_write?: components["schemas"]["PricingTier"][];
156164
input?: components["schemas"]["PricingTier"][];
157165
output?: components["schemas"]["PricingTier"][];
166+
pricing_mode?: components["schemas"]["PricingMode"];
158167
};
159168
/**
160169
* @description Vertex region identifiers
@@ -188,6 +197,7 @@ export type ModelConfig = components['schemas']['ModelConfig'];
188197
export type ModelParam = components['schemas']['ModelParam'];
189198
export type ModelParamKey = components['schemas']['ModelParamKey'];
190199
export type ModelParamType = components['schemas']['ModelParamType'];
200+
export type PricingMode = components['schemas']['PricingMode'];
191201
export type PricingTier = components['schemas']['PricingTier'];
192202
export type Provisioning = components['schemas']['Provisioning'];
193203
export type Status = components['schemas']['Status'];

.github/test/model.cue

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,11 +104,11 @@ package model
104104
"global"
105105

106106
#Cost: {
107-
cache_creation_input_audio_token_cost?: number & >= 0
108-
cache_creation_input_token_cost?: number & >= 0
109-
cache_read_input_audio_token_cost?: number & >= 0
110-
cache_read_input_token_cost?: number & >= 0
111-
cache_storage_cost_per_token_per_hour?: number & >= 0
107+
cache_creation_input_audio_token_cost?: number & >= 0
108+
cache_creation_input_token_cost?: number & >= 0
109+
cache_creation_input_token_cost_per_hour?: number & >= 0
110+
cache_read_input_audio_token_cost?: number & >= 0
111+
cache_read_input_token_cost?: number & >= 0
112112
input_cost_per_annotated_page?: number & >= 0
113113
input_cost_per_audio_token?: number & >= 0
114114
input_cost_per_character?: number & >= 0
@@ -291,6 +291,13 @@ package model
291291
from: int & >= 0
292292
}
293293

294+
// How the model prices long context tokens
295+
// marginal: remaining tokens after long context are priced under long context pricing
296+
// cumulative: all input tokens are priced under long context pricing
297+
#PricingMode:
298+
*"marginal" | "cumulative"
299+
// defaults to "marginal"
300+
294301
// How the model is made available to callers
295302
#Provisioning:
296303
"serverless" | // Managed API, pay-per-token/request
@@ -308,4 +315,5 @@ package model
308315
cache_write?: [...#PricingTier]
309316
input?: [...#PricingTier]
310317
output?: [...#PricingTier]
318+
pricing_mode?: #PricingMode
311319
}

providers/deepinfra/google/gemini-2.5-pro.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
costs:
2-
- cache_read_input_token_cost: 1.25e-7
3-
cache_storage_cost_per_token_per_hour: 0.0000045
2+
- cache_creation_input_token_cost_per_hour: 0.0000045
3+
cache_read_input_token_cost: 1.25e-7
44
input_cost_per_token: 0.00000125
55
input_cost_per_token_batches: 6.25e-7
66
output_cost_per_token: 0.00001

providers/google-gemini/deep-research-max-preview-04-2026.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
costs:
2-
- cache_read_input_token_cost: 2e-7
3-
cache_storage_cost_per_token_per_hour: 0.0000045
2+
- cache_creation_input_token_cost_per_hour: 0.0000045
3+
cache_read_input_token_cost: 2e-7
44
input_cost_per_token: 0.000002
55
input_cost_per_token_batches: 0.000001
66
output_cost_per_token: 0.000012

providers/google-gemini/deep-research-preview-04-2026.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
costs:
2-
- cache_read_input_token_cost: 2e-7
3-
cache_storage_cost_per_token_per_hour: 0.0000045
2+
- cache_creation_input_token_cost_per_hour: 0.0000045
3+
cache_read_input_token_cost: 2e-7
44
input_cost_per_token: 0.000002
55
input_cost_per_token_batches: 0.000001
66
output_cost_per_token: 0.000012

providers/google-gemini/deep-research-pro-preview-12-2025.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
costs:
2-
- cache_read_input_token_cost: 2e-7
3-
cache_storage_cost_per_token_per_hour: 0.0000045
2+
- cache_creation_input_token_cost_per_hour: 0.0000045
3+
cache_read_input_token_cost: 2e-7
44
input_cost_per_token: 0.000002
55
input_cost_per_token_batches: 0.000001
66
output_cost_per_token: 0.000012

providers/google-gemini/gemini-2.0-flash-001.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
costs:
2-
- cache_read_input_audio_token_cost: 1.75e-7
2+
- cache_creation_input_token_cost_per_hour: 0.000001
3+
cache_read_input_audio_token_cost: 1.75e-7
34
cache_read_input_token_cost: 2.5e-8
4-
cache_storage_cost_per_token_per_hour: 0.000001
55
input_cost_per_audio_token: 7e-7
66
input_cost_per_token: 1e-7
77
input_cost_per_token_batches: 5e-8

providers/google-gemini/gemini-2.0-flash.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
costs:
2-
- cache_read_input_audio_token_cost: 1.75e-7
2+
- cache_creation_input_token_cost_per_hour: 0.000001
3+
cache_read_input_audio_token_cost: 1.75e-7
34
cache_read_input_token_cost: 2.5e-8
4-
cache_storage_cost_per_token_per_hour: 0.000001
55
input_cost_per_audio_token: 7e-7
66
input_cost_per_token: 1e-7
77
input_cost_per_token_batches: 5e-8

providers/google-gemini/gemini-2.5-flash-image.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
costs:
2-
- cache_read_input_token_cost: 3e-8
3-
cache_storage_cost_per_token_per_hour: 0.000001
2+
- cache_creation_input_token_cost_per_hour: 0.000001
3+
cache_read_input_token_cost: 3e-8
44
input_cost_per_token: 3e-7
55
input_cost_per_token_batches: 1.5e-7
66
output_cost_per_image_1k: 0.039

providers/google-gemini/gemini-2.5-flash-lite-preview-09-2025.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
costs:
2-
- cache_read_input_audio_token_cost: 3e-8
2+
- cache_creation_input_token_cost_per_hour: 0.000001
3+
cache_read_input_audio_token_cost: 3e-8
34
cache_read_input_token_cost: 1e-8
4-
cache_storage_cost_per_token_per_hour: 0.000001
55
input_cost_per_audio_token: 3e-7
66
input_cost_per_token: 1e-7
77
input_cost_per_token_batches: 5e-8

0 commit comments

Comments
 (0)