missing models (#61)

harshiv-26 · web-flow · commit e806db517921 · 2026-02-16T18:24:39.000+05:30
* missing models

* meta llama models folder
diff --git a/providers/google-vertex/deepseek-ai/deepseek-ocr-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-ocr-maas.yaml
@@ -0,0 +1,6 @@
+model: deepseek-ai/deepseek-ocr-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 3.e-7
+    output_cost_per_token: 0.0000012
+mode: chat
diff --git a/providers/google-vertex/deepseek-ai/deepseek-r1-0528-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-r1-0528-maas.yaml
@@ -0,0 +1,8 @@
+model: deepseek-ai/deepseek-r1-0528-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 0.00000135
+    output_cost_per_token: 0.0000054
+    input_cost_per_token_batches: 6.75e-7
+    output_cost_per_token_batches: 0.0000027
+mode: chat
diff --git a/providers/google-vertex/deepseek-ai/deepseek-v3.1-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-v3.1-maas.yaml
@@ -0,0 +1,9 @@
+model: deepseek-ai/deepseek-v3.1-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 6.e-7
+    output_cost_per_token: 0.0000017
+    cache_read_input_token_cost: 6.e-8
+    input_cost_per_token_batches: 3.e-7
+    output_cost_per_token_batches: 8.5e-7
+mode: chat
diff --git a/providers/google-vertex/deepseek-ai/deepseek-v3.2-maas.yaml b/providers/google-vertex/deepseek-ai/deepseek-v3.2-maas.yaml
@@ -0,0 +1,9 @@
+model: deepseek-ai/deepseek-v3.2-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 5.6e-7
+    output_cost_per_token: 0.00000168
+    cache_read_input_token_cost: 5.6e-8
+    input_cost_per_token_batches: 2.8e-7
+    output_cost_per_token_batches: 8.4e-7
+mode: chat
diff --git a/providers/google-vertex/meta/llama-3.1-405b-instruct-maas.yaml b/providers/google-vertex/meta/llama-3.1-405b-instruct-maas.yaml
@@ -1,4 +1,4 @@
-model: llama-3.1-405b-instruct-maas
+model: meta/llama-3.1-405b-instruct-maas
 costs:
   - region: '*'
     input_cost_per_token: 0.000005
diff --git a/providers/google-vertex/meta/llama-3.1-70b-instruct-maas.yaml b/providers/google-vertex/meta/llama-3.1-70b-instruct-maas.yaml
@@ -1,4 +1,4 @@
-model: llama-3.1-70b-instruct-maas
+model: meta/llama-3.1-70b-instruct-maas
 costs:
   - region: '*'
     input_cost_per_token: 0
diff --git a/providers/google-vertex/meta/llama-3.1-8b-instruct-maas.yaml b/providers/google-vertex/meta/llama-3.1-8b-instruct-maas.yaml
@@ -1,4 +1,4 @@
-model: llama-3.1-8b-instruct-maas
+model: meta/llama-3.1-8b-instruct-maas
 costs:
   - region: '*'
     input_cost_per_token: 0
diff --git a/providers/google-vertex/meta/llama-3.3-70b-instruct-maas.yaml b/providers/google-vertex/meta/llama-3.3-70b-instruct-maas.yaml
@@ -0,0 +1,8 @@
+model: meta/llama-3.3-70b-instruct-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 7.2e-7
+    output_cost_per_token: 7.2e-7
+    input_cost_per_token_batches: 3.6e-7
+    output_cost_per_token_batches: 3.6e-7
+mode: chat
diff --git a/providers/google-vertex/meta/llama-4-maverick-17b-128e-instruct-maas.yaml b/providers/google-vertex/meta/llama-4-maverick-17b-128e-instruct-maas.yaml
@@ -1,8 +1,10 @@
-model: llama-4-maverick-17b-128e-instruct-maas
+model: meta/llama-4-maverick-17b-128e-instruct-maas
 costs:
   - region: '*'
     input_cost_per_token: 3.5e-7
     output_cost_per_token: 0.00000115
+    input_cost_per_token_batches: 1.75e-7
+    output_cost_per_token_batches: 5.75e-7
 limits:
   max_tokens: 1000000
   max_input_tokens: 1000000
diff --git a/providers/google-vertex/meta/llama-4-scout-17b-16e-instruct-maas.yaml b/providers/google-vertex/meta/llama-4-scout-17b-16e-instruct-maas.yaml
@@ -1,8 +1,10 @@
-model: llama-4-scout-17b-16e-instruct-maas
+model: meta/llama-4-scout-17b-16e-instruct-maas
 costs:
   - region: '*'
     input_cost_per_token: 2.5e-7
     output_cost_per_token: 7.e-7
+    input_cost_per_token_batches: 1.25e-7
+    output_cost_per_token_batches: 3.5e-7
 limits:
   max_tokens: 10000000
   max_input_tokens: 10000000
diff --git a/providers/google-vertex/minimaxai/minimax-m2-maas.yaml b/providers/google-vertex/minimaxai/minimax-m2-maas.yaml
@@ -0,0 +1,7 @@
+model: minimaxai/minimax-m2-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 3.e-7
+    output_cost_per_token: 0.0000012
+    cache_read_input_token_cost: 3.e-8
+mode: chat
diff --git a/providers/google-vertex/mistralai/mistral-ocr-2505.yaml b/providers/google-vertex/mistralai/mistral-ocr-2505.yaml
@@ -0,0 +1,6 @@
+model: mistralai/mistral-ocr-2505
+costs:
+  - region: '*'
+    input_cost_per_token: 5.e-10
+    output_cost_per_token: 5.e-10
+mode: chat
diff --git a/providers/google-vertex/mistralai/mistral-small-2503.yaml b/providers/google-vertex/mistralai/mistral-small-2503.yaml
@@ -0,0 +1,6 @@
+model: mistralai/mistral-small-2503
+costs:
+  - region: '*'
+    input_cost_per_token: 1.e-7
+    output_cost_per_token: 3.e-7
+mode: chat
diff --git a/providers/google-vertex/moonshotai/kimi-k2-thinking-maas.yaml b/providers/google-vertex/moonshotai/kimi-k2-thinking-maas.yaml
@@ -0,0 +1,7 @@
+model: moonshotai/kimi-k2-thinking-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 6.e-7
+    output_cost_per_token: 0.0000025
+    cache_read_input_token_cost: 6.e-8
+mode: chat
diff --git a/providers/google-vertex/openai/gpt-oss-120b-maas.yaml b/providers/google-vertex/openai/gpt-oss-120b-maas.yaml
@@ -0,0 +1,8 @@
+model: openai/gpt-oss-120b-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 9.e-8
+    output_cost_per_token: 3.6e-7
+    input_cost_per_token_batches: 4.5e-8
+    output_cost_per_token_batches: 1.8e-7
+mode: chat
diff --git a/providers/google-vertex/openai/gpt-oss-20b-maas.yaml b/providers/google-vertex/openai/gpt-oss-20b-maas.yaml
@@ -0,0 +1,9 @@
+model: openai/gpt-oss-20b-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 7.e-8
+    output_cost_per_token: 2.5e-7
+    cache_read_input_token_cost: 7.e-9
+    input_cost_per_token_batches: 3.5e-8
+    output_cost_per_token_batches: 1.25e-7
+mode: chat
diff --git a/providers/google-vertex/qwen/qwen3-235b-a22b-instruct-2507-maas.yaml b/providers/google-vertex/qwen/qwen3-235b-a22b-instruct-2507-maas.yaml
@@ -0,0 +1,8 @@
+model: qwen/qwen3-235b-a22b-instruct-2507-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 2.2e-7
+    output_cost_per_token: 8.8e-7
+    input_cost_per_token_batches: 1.1e-7
+    output_cost_per_token_batches: 4.4e-7
+mode: chat
diff --git a/providers/google-vertex/qwen/qwen3-coder-480b-a35b-instruct-maas.yaml b/providers/google-vertex/qwen/qwen3-coder-480b-a35b-instruct-maas.yaml
@@ -0,0 +1,9 @@
+model: qwen/qwen3-coder-480b-a35b-instruct-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 2.2e-7
+    output_cost_per_token: 0.0000018
+    cache_read_input_token_cost: 2.2e-8
+    input_cost_per_token_batches: 1.1e-7
+    output_cost_per_token_batches: 9.e-7
+mode: chat
diff --git a/providers/google-vertex/qwen/qwen3-next-80b-a3b-instruct-maas.yaml b/providers/google-vertex/qwen/qwen3-next-80b-a3b-instruct-maas.yaml
@@ -0,0 +1,6 @@
+model: qwen/qwen3-next-80b-a3b-instruct-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 1.5e-7
+    output_cost_per_token: 0.0000012
+mode: chat
diff --git a/providers/google-vertex/qwen/qwen3-next-80b-a3b-thinking-maas.yaml b/providers/google-vertex/qwen/qwen3-next-80b-a3b-thinking-maas.yaml
@@ -0,0 +1,6 @@
+model: qwen/qwen3-next-80b-a3b-thinking-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 1.5e-7
+    output_cost_per_token: 0.0000012
+mode: chat
diff --git a/providers/google-vertex/zai-org/glm-4.7-maas.yaml b/providers/google-vertex/zai-org/glm-4.7-maas.yaml
@@ -0,0 +1,6 @@
+model: zai-org/glm-4.7-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 6.e-7
+    output_cost_per_token: 0.0000022
+mode: chat
diff --git a/providers/google-vertex/zai-org/glm-5-maas.yaml b/providers/google-vertex/zai-org/glm-5-maas.yaml
@@ -0,0 +1,7 @@
+model: zai-org/glm-5-maas
+costs:
+  - region: '*'
+    input_cost_per_token: 0.000001
+    output_cost_per_token: 0.0000032
+    cache_read_input_token_cost: 1.e-7
+mode: chat

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-model: llama-3.1-405b-instruct-maas`
	`1`	`+model: meta/llama-3.1-405b-instruct-maas`
`2`	`2`	`costs:`
`3`	`3`	`- region: '*'`
`4`	`4`	`input_cost_per_token: 0.000005`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-model: llama-3.1-70b-instruct-maas`
	`1`	`+model: meta/llama-3.1-70b-instruct-maas`
`2`	`2`	`costs:`
`3`	`3`	`- region: '*'`
`4`	`4`	`input_cost_per_token: 0`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-model: llama-3.1-8b-instruct-maas`
	`1`	`+model: meta/llama-3.1-8b-instruct-maas`
`2`	`2`	`costs:`
`3`	`3`	`- region: '*'`
`4`	`4`	`input_cost_per_token: 0`