Skip to content

Commit 004bdf4

Browse files
feat(gcp): add Vertex AI pricing collector
Adds a Vertex AI collector to the GCP provider, enabled via `-gcp.services VERTEX` Exports three metric types: - cloudcost_gcp_vertex_token_input_usd_per_1k_tokens {model, region} - cloudcost_gcp_vertex_token_output_usd_per_1k_tokens {model, region} - cloudcost_gcp_vertex_instance_total_usd_per_hour {machine_type, use_case, region, price_tier} Token metrics cover any model family billed through Vertex AI (Gemini, Claude, etc.). SKUs with no regional breakdown fall back to region="global". Compute metrics cover custom training and online prediction nodes with on_demand and spot price tiers. Pricing is fetched from the GCP Billing API at startup and refreshed every 24 hours. Other Vertex AI products (embeddings, reranking, AutoML, Vector Search) are not yet collected. SKU description regex patterns must be verified against the live GCP Billing API before production use.
1 parent 1fad376 commit 004bdf4

5 files changed

Lines changed: 34 additions & 20 deletions

File tree

docs/metrics/gcp/vertex.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,4 +33,4 @@ Metrics exported for the GCP Vertex AI service.
3333

3434
## Notes
3535

36-
Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new Gemini models or machine types.
36+
Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new models or machine types.

pkg/google/vertex/pricing_map.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,13 @@ import (
1515
const vertexAIServiceName = "Vertex AI"
1616

1717
var (
18-
// tokenInputRegex matches Vertex AI Gemini input token SKU descriptions.
19-
// Example: "Gemini 1.5 Flash Input tokens"
18+
// tokenInputRegex matches Vertex AI input token SKU descriptions for any model family.
19+
// Examples: "Gemini 1.5 Flash Input tokens", "Claude 3.5 Sonnet Input tokens"
2020
// NOTE: Exact SKU description strings must be verified against the live GCP Billing API.
21-
tokenInputRegex = regexp.MustCompile(`(?i)^Gemini\s+(.+?)\s+Input\s+tokens?$`)
22-
// tokenOutputRegex matches Vertex AI Gemini output token SKU descriptions.
23-
// Example: "Gemini 1.5 Flash Output tokens"
24-
tokenOutputRegex = regexp.MustCompile(`(?i)^Gemini\s+(.+?)\s+Output\s+tokens?$`)
21+
tokenInputRegex = regexp.MustCompile(`(?i)^(.+?)\s+Input\s+tokens?$`)
22+
// tokenOutputRegex matches Vertex AI output token SKU descriptions for any model family.
23+
// Examples: "Gemini 1.5 Flash Output tokens", "Claude 3.5 Sonnet Output tokens"
24+
tokenOutputRegex = regexp.MustCompile(`(?i)^(.+?)\s+Output\s+tokens?$`)
2525
// computeRegex matches custom training/prediction compute SKU descriptions.
2626
// Example: "Custom Training n1-standard-4 running in us-central1"
2727
// Example: "Spot Custom Prediction n1-highmem-8 running in europe-west1"

pkg/google/vertex/pricing_map_test.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ func TestParseSkus_TokenInputSKU(t *testing.T) {
1818

1919
snap := pm.Snapshot()
2020
require.NotNil(t, snap.tokens["us-central1"])
21-
require.NotNil(t, snap.tokens["us-central1"]["1.5-flash"])
22-
assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["1.5-flash"].InputPer1kTokens, 1e-9)
21+
require.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-flash"])
22+
assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["gemini-1.5-flash"].InputPer1kTokens, 1e-9)
2323
}
2424

2525
func TestParseSkus_TokenOutputSKU(t *testing.T) {
@@ -30,8 +30,22 @@ func TestParseSkus_TokenOutputSKU(t *testing.T) {
3030
require.NoError(t, err)
3131

3232
snap := pm.Snapshot()
33-
require.NotNil(t, snap.tokens["us-central1"]["1.5-flash"])
34-
assert.InDelta(t, 0.005, snap.tokens["us-central1"]["1.5-flash"].OutputPer1kTokens, 1e-9)
33+
require.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-flash"])
34+
assert.InDelta(t, 0.005, snap.tokens["us-central1"]["gemini-1.5-flash"].OutputPer1kTokens, 1e-9)
35+
}
36+
37+
func TestParseSkus_ClaudeTokenSKU(t *testing.T) {
38+
pm := &PricingMap{}
39+
err := pm.ParseSkus([]*billingpb.Sku{
40+
newTokenSKU("Claude 3.5 Sonnet Input tokens", "global", "k{char}", 0, 3000000),
41+
newTokenSKU("Claude 3.5 Sonnet Output tokens", "global", "k{char}", 0, 15000000),
42+
})
43+
require.NoError(t, err)
44+
45+
snap := pm.Snapshot()
46+
require.NotNil(t, snap.tokens["global"]["claude-3.5-sonnet"])
47+
assert.InDelta(t, 0.003, snap.tokens["global"]["claude-3.5-sonnet"].InputPer1kTokens, 1e-9)
48+
assert.InDelta(t, 0.015, snap.tokens["global"]["claude-3.5-sonnet"].OutputPer1kTokens, 1e-9)
3549
}
3650

3751
func TestParseSkus_TokenSKUNormalizesPerUnitPrice(t *testing.T) {
@@ -43,7 +57,7 @@ func TestParseSkus_TokenSKUNormalizesPerUnitPrice(t *testing.T) {
4357
require.NoError(t, err)
4458

4559
snap := pm.Snapshot()
46-
assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["1.0-pro"].InputPer1kTokens, 1e-9)
60+
assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["gemini-1.0-pro"].InputPer1kTokens, 1e-9)
4761
}
4862

4963
func TestParseSkus_ComputeOnDemand(t *testing.T) {
@@ -116,8 +130,8 @@ func TestParseSkus_GlobalFallbackForTokenSKUWithNoRegion(t *testing.T) {
116130

117131
snap := pm.Snapshot()
118132
require.NotNil(t, snap.tokens["global"])
119-
require.NotNil(t, snap.tokens["global"]["1.5-flash"])
120-
assert.InDelta(t, 0.00125, snap.tokens["global"]["1.5-flash"].InputPer1kTokens, 1e-9)
133+
require.NotNil(t, snap.tokens["global"]["gemini-1.5-flash"])
134+
assert.InDelta(t, 0.00125, snap.tokens["global"]["gemini-1.5-flash"].InputPer1kTokens, 1e-9)
121135
}
122136

123137
func TestParseSkus_MultipleRegions(t *testing.T) {
@@ -141,8 +155,8 @@ func TestParseSkus_MultipleRegions(t *testing.T) {
141155
require.NoError(t, err)
142156

143157
snap := pm.Snapshot()
144-
assert.NotNil(t, snap.tokens["us-central1"]["1.5-pro"])
145-
assert.NotNil(t, snap.tokens["europe-west1"]["1.5-pro"])
158+
assert.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-pro"])
159+
assert.NotNil(t, snap.tokens["europe-west1"]["gemini-1.5-pro"])
146160
}
147161

148162
func newTokenSKU(description, region, usageUnit string, units int64, nanos int32) *billingpb.Sku {

pkg/google/vertex/vertex.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,12 @@ const (
2222
var (
2323
vertexTokenInputDesc = utils.GenerateDesc(
2424
cloudcostexporter.MetricPrefix, subsystem, utils.TokenInputCostSuffix,
25-
"Vertex AI Gemini input token cost in USD per 1k tokens.",
25+
"Vertex AI input token cost in USD per 1k tokens.",
2626
[]string{"model", "region"},
2727
)
2828
vertexTokenOutputDesc = utils.GenerateDesc(
2929
cloudcostexporter.MetricPrefix, subsystem, utils.TokenOutputCostSuffix,
30-
"Vertex AI Gemini output token cost in USD per 1k tokens.",
30+
"Vertex AI output token cost in USD per 1k tokens.",
3131
[]string{"model", "region"},
3232
)
3333
vertexComputeCostDesc = utils.GenerateDesc(

pkg/google/vertex/vertex_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,13 +50,13 @@ func TestCollect_EmitsTokenMetrics(t *testing.T) {
5050

5151
inputMetric := metricByName(results, "cloudcost_gcp_vertex_token_input_usd_per_1k_tokens")
5252
require.NotNil(t, inputMetric)
53-
assert.Equal(t, "1.5-flash", inputMetric.Labels["model"])
53+
assert.Equal(t, "gemini-1.5-flash", inputMetric.Labels["model"])
5454
assert.Equal(t, "us-central1", inputMetric.Labels["region"])
5555
assert.InDelta(t, 0.00125, inputMetric.Value, 1e-9)
5656

5757
outputMetric := metricByName(results, "cloudcost_gcp_vertex_token_output_usd_per_1k_tokens")
5858
require.NotNil(t, outputMetric)
59-
assert.Equal(t, "1.5-flash", outputMetric.Labels["model"])
59+
assert.Equal(t, "gemini-1.5-flash", outputMetric.Labels["model"])
6060
assert.Equal(t, "us-central1", outputMetric.Labels["region"])
6161
assert.InDelta(t, 0.005, outputMetric.Value, 1e-9)
6262
}

0 commit comments

Comments
 (0)