feat: 1hr cache creation cost and pricing mode (#1017)

harshiv-26 · web-flow · commit 04ab83d6cd94 · 2026-05-13T16:23:16.000+05:30
diff --git a/.github/scripts/autogen/types.ts b/.github/scripts/autogen/types.ts
@@ -17,9 +17,9 @@ export interface components {
         Cost: {
             cache_creation_input_audio_token_cost?: number;
             cache_creation_input_token_cost?: number;
+            cache_creation_input_token_cost_per_hour?: number;
             cache_read_input_audio_token_cost?: number;
             cache_read_input_token_cost?: number;
-            cache_storage_cost_per_token_per_hour?: number;
             input_cost_per_annotated_page?: number;
             input_cost_per_audio_token?: number;
             input_cost_per_character?: number;
@@ -136,6 +136,14 @@ export interface components {
         ModelParamKey: "json_schema" | "max_completion_tokens" | "max_tokens" | "min_tokens" | "n" | "parallel_tool_calls" | "reasoning" | "reasoning_effort" | "response_format" | "seed" | "stop" | "stream" | "temperature" | "thinking" | "tool_choice" | "top_k" | "top_p" | "verbosity";
         /** @enum {string} */
         ModelParamType: "array-of-strings" | "boolean" | "json" | "number" | "string";
+        /**
+         * @description How the model prices long context tokens
+         *     marginal: remaining tokens after long context are priced under long context pricing
+         *     cumulative: all input tokens are priced under long context pricing
+         * @default marginal
+         * @enum {string}
+         */
+        PricingMode: "marginal" | "cumulative";
         PricingTier: {
             cost_per_token: number;
             from: number;
@@ -155,6 +163,7 @@ export interface components {
             cache_write?: components["schemas"]["PricingTier"][];
             input?: components["schemas"]["PricingTier"][];
             output?: components["schemas"]["PricingTier"][];
+            pricing_mode?: components["schemas"]["PricingMode"];
         };
         /**
          * @description Vertex region identifiers
@@ -188,6 +197,7 @@ export type ModelConfig = components['schemas']['ModelConfig'];
 export type ModelParam = components['schemas']['ModelParam'];
 export type ModelParamKey = components['schemas']['ModelParamKey'];
 export type ModelParamType = components['schemas']['ModelParamType'];
+export type PricingMode = components['schemas']['PricingMode'];
 export type PricingTier = components['schemas']['PricingTier'];
 export type Provisioning = components['schemas']['Provisioning'];
 export type Status = components['schemas']['Status'];
diff --git a/.github/test/model.cue b/.github/test/model.cue
@@ -104,11 +104,11 @@ package model
 	"global"
 
 #Cost: {
-	cache_creation_input_audio_token_cost?: number & >= 0
-	cache_creation_input_token_cost?:       number & >= 0
-	cache_read_input_audio_token_cost?:     number & >= 0
-	cache_read_input_token_cost?:           number & >= 0
-	cache_storage_cost_per_token_per_hour?: number & >= 0
+	cache_creation_input_audio_token_cost?:    number & >= 0
+	cache_creation_input_token_cost?:          number & >= 0
+	cache_creation_input_token_cost_per_hour?: number & >= 0
+	cache_read_input_audio_token_cost?:        number & >= 0
+	cache_read_input_token_cost?:              number & >= 0
 	input_cost_per_annotated_page?:         number & >= 0
 	input_cost_per_audio_token?:            number & >= 0
 	input_cost_per_character?:              number & >= 0
@@ -291,6 +291,13 @@ package model
 	from:           int & >= 0
 }
 
+// How the model prices long context tokens
+// marginal: remaining tokens after long context are priced under long context pricing
+// cumulative: all input tokens are priced under long context pricing
+#PricingMode:
+	*"marginal" | "cumulative"
+	// defaults to "marginal"
+
 // How the model is made available to callers
 #Provisioning:
 	"serverless" |   // Managed API, pay-per-token/request
@@ -308,4 +315,5 @@ package model
 	cache_write?: [...#PricingTier]
 	input?:       [...#PricingTier]
 	output?:      [...#PricingTier]
+	pricing_mode?: #PricingMode
 }
diff --git a/providers/deepinfra/google/gemini-2.5-pro.yaml b/providers/deepinfra/google/gemini-2.5-pro.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 1.25e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 1.25e-7
       input_cost_per_token: 0.00000125
       input_cost_per_token_batches: 6.25e-7
       output_cost_per_token: 0.00001
diff --git a/providers/google-gemini/deep-research-max-preview-04-2026.yaml b/providers/google-gemini/deep-research-max-preview-04-2026.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/deep-research-preview-04-2026.yaml b/providers/google-gemini/deep-research-preview-04-2026.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/deep-research-pro-preview-12-2025.yaml b/providers/google-gemini/deep-research-pro-preview-12-2025.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/gemini-2.0-flash-001.yaml b/providers/google-gemini/gemini-2.0-flash-001.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1.75e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1.75e-7
       cache_read_input_token_cost: 2.5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 7e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-gemini/gemini-2.0-flash.yaml b/providers/google-gemini/gemini-2.0-flash.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1.75e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1.75e-7
       cache_read_input_token_cost: 2.5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 7e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-gemini/gemini-2.5-flash-image.yaml b/providers/google-gemini/gemini-2.5-flash-image.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 3e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_token_cost: 3e-8
       input_cost_per_token: 3e-7
       input_cost_per_token_batches: 1.5e-7
       output_cost_per_image_1k: 0.039
diff --git a/providers/google-gemini/gemini-2.5-flash-lite-preview-09-2025.yaml b/providers/google-gemini/gemini-2.5-flash-lite-preview-09-2025.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 3e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 3e-8
       cache_read_input_token_cost: 1e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 3e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-gemini/gemini-2.5-flash-lite.yaml b/providers/google-gemini/gemini-2.5-flash-lite.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 3e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 3e-8
       cache_read_input_token_cost: 1e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 3e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-gemini/gemini-2.5-flash-preview-09-2025.yaml b/providers/google-gemini/gemini-2.5-flash-preview-09-2025.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1e-7
       cache_read_input_token_cost: 3e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 0.000001
       input_cost_per_token: 3e-7
       input_cost_per_token_batches: 1.5e-7
diff --git a/providers/google-gemini/gemini-2.5-flash.yaml b/providers/google-gemini/gemini-2.5-flash.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1e-7
       cache_read_input_token_cost: 3e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 0.000001
       input_cost_per_token: 3e-7
       input_cost_per_token_batches: 1.5e-7
diff --git a/providers/google-gemini/gemini-2.5-pro.yaml b/providers/google-gemini/gemini-2.5-pro.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 1.25e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 1.25e-7
       input_cost_per_token: 0.00000125
       input_cost_per_token_batches: 6.25e-7
       output_cost_per_token: 0.00001
diff --git a/providers/google-gemini/gemini-3-flash-preview.yaml b/providers/google-gemini/gemini-3-flash-preview.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1e-7
       cache_read_input_token_cost: 5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 0.000001
       input_cost_per_token: 5e-7
       input_cost_per_token_batches: 2.5e-7
diff --git a/providers/google-gemini/gemini-3-pro-preview.yaml b/providers/google-gemini/gemini-3-pro-preview.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/gemini-3.1-flash-lite-preview.yaml b/providers/google-gemini/gemini-3.1-flash-lite-preview.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 5e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 5e-8
       cache_read_input_token_cost: 2.5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 5e-7
       input_cost_per_token: 2.5e-7
       input_cost_per_token_batches: 1.25e-7
diff --git a/providers/google-gemini/gemini-3.1-flash-lite.yaml b/providers/google-gemini/gemini-3.1-flash-lite.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 5e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 5e-8
       cache_read_input_token_cost: 2.5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 5e-7
       input_cost_per_token: 2.5e-7
       input_cost_per_token_batches: 1.25e-7
diff --git a/providers/google-gemini/gemini-3.1-pro-preview-customtools.yaml b/providers/google-gemini/gemini-3.1-pro-preview-customtools.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/gemini-3.1-pro-preview.yaml b/providers/google-gemini/gemini-3.1-pro-preview.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/gemini-flash-latest.yaml b/providers/google-gemini/gemini-flash-latest.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 1e-7
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 1e-7
       cache_read_input_token_cost: 5e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 0.000001
       input_cost_per_token: 5e-7
       input_cost_per_token_batches: 2.5e-7
diff --git a/providers/google-gemini/gemini-flash-lite-latest.yaml b/providers/google-gemini/gemini-flash-lite-latest.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 3e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 3e-8
       cache_read_input_token_cost: 1e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 3e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-gemini/gemini-pro-latest.yaml b/providers/google-gemini/gemini-pro-latest.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 2e-7
-      cache_storage_cost_per_token_per_hour: 0.0000045
+    - cache_creation_input_token_cost_per_hour: 0.0000045
+      cache_read_input_token_cost: 2e-7
       input_cost_per_token: 0.000002
       input_cost_per_token_batches: 0.000001
       output_cost_per_token: 0.000012
diff --git a/providers/google-gemini/gemma-3-27b-it.yaml b/providers/google-gemini/gemma-3-27b-it.yaml
@@ -1,7 +1,7 @@
 costs:
     - cache_creation_input_token_cost: 0
+      cache_creation_input_token_cost_per_hour: 0
       cache_read_input_token_cost: 0
-      cache_storage_cost_per_token_per_hour: 0
       input_cost_per_character: 0
       input_cost_per_image: 0
       input_cost_per_token: 0
diff --git a/providers/google-gemini/gemma-3n-e2b-it.yaml b/providers/google-gemini/gemma-3n-e2b-it.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 0
-      cache_storage_cost_per_token_per_hour: 0
+    - cache_creation_input_token_cost_per_hour: 0
+      cache_read_input_token_cost: 0
       input_cost_per_token: 0
       output_cost_per_token: 0
       region: "*"
diff --git a/providers/google-vertex/gemini-2.5-flash-lite-preview-09-2025.yaml b/providers/google-vertex/gemini-2.5-flash-lite-preview-09-2025.yaml
@@ -1,7 +1,7 @@
 costs:
-    - cache_read_input_audio_token_cost: 3e-8
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_audio_token_cost: 3e-8
       cache_read_input_token_cost: 1e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
       input_cost_per_audio_token: 3e-7
       input_cost_per_token: 1e-7
       input_cost_per_token_batches: 5e-8
diff --git a/providers/google-vertex/gemini-2.5-flash-preview-09-2025.yaml b/providers/google-vertex/gemini-2.5-flash-preview-09-2025.yaml
@@ -1,6 +1,6 @@
 costs:
-    - cache_read_input_token_cost: 3e-8
-      cache_storage_cost_per_token_per_hour: 0.000001
+    - cache_creation_input_token_cost_per_hour: 0.000001
+      cache_read_input_token_cost: 3e-8
       input_cost_per_audio_token: 0.000001
       input_cost_per_token: 3e-7
       output_cost_per_token: 0.0000025
diff --git a/providers/google-vertex/gemini-2.5-flash.yaml b/providers/google-vertex/gemini-2.5-flash.yaml
diff --git a/providers/google-vertex/google/gemini-2.5-flash-lite-preview-09-2025.yaml b/providers/google-vertex/google/gemini-2.5-flash-lite-preview-09-2025.yaml
diff --git a/providers/google-vertex/google/gemini-2.5-flash-preview-09-2025.yaml b/providers/google-vertex/google/gemini-2.5-flash-preview-09-2025.yaml
diff --git a/providers/openrouter/gemini-2.5-flash-lite.yaml b/providers/openrouter/gemini-2.5-flash-lite.yaml
diff --git a/providers/openrouter/google/gemini-2.0-flash-001.yaml b/providers/openrouter/google/gemini-2.0-flash-001.yaml
diff --git a/providers/openrouter/google/gemini-2.5-flash-lite-preview-09-2025.yaml b/providers/openrouter/google/gemini-2.5-flash-lite-preview-09-2025.yaml
diff --git a/providers/openrouter/google/gemini-2.5-pro.yaml b/providers/openrouter/google/gemini-2.5-pro.yaml
diff --git a/providers/openrouter/google/gemini-3.1-pro-preview-customtools.yaml b/providers/openrouter/google/gemini-3.1-pro-preview-customtools.yaml
diff --git a/providers/openrouter/google/gemini-3.1-pro-preview.yaml b/providers/openrouter/google/gemini-3.1-pro-preview.yaml