feat(gcp): add Vertex AI pricing collector

stephan-rayner · stephan-rayner · commit 004bdf489bcf · 2026-04-24T15:48:06.000-06:00
Adds a Vertex AI collector to the GCP provider, enabled via
`-gcp.services VERTEX`

Exports three metric types:
- cloudcost_gcp_vertex_token_input_usd_per_1k_tokens  {model, region}
- cloudcost_gcp_vertex_token_output_usd_per_1k_tokens {model, region}
- cloudcost_gcp_vertex_instance_total_usd_per_hour    {machine_type, use_case, region, price_tier}

Token metrics cover any model family billed through Vertex AI (Gemini,
Claude, etc.). SKUs with no regional breakdown fall back to
region="global". Compute metrics cover custom training and online
prediction nodes with on_demand and spot price tiers.

Pricing is fetched from the GCP Billing API at startup and refreshed
every 24 hours. Other Vertex AI products (embeddings, reranking,
AutoML, Vector Search) are not yet collected.

SKU description regex patterns must be verified against the live GCP
Billing API before production use.
diff --git a/docs/metrics/gcp/vertex.md b/docs/metrics/gcp/vertex.md
@@ -33,4 +33,4 @@ Metrics exported for the GCP Vertex AI service.
 
 ## Notes
 
-Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new Gemini models or machine types.
+Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new models or machine types.
diff --git a/pkg/google/vertex/pricing_map.go b/pkg/google/vertex/pricing_map.go
@@ -15,13 +15,13 @@ import (
 const vertexAIServiceName = "Vertex AI"
 
 var (
-	// tokenInputRegex matches Vertex AI Gemini input token SKU descriptions.
-	// Example: "Gemini 1.5 Flash Input tokens"
+	// tokenInputRegex matches Vertex AI input token SKU descriptions for any model family.
+	// Examples: "Gemini 1.5 Flash Input tokens", "Claude 3.5 Sonnet Input tokens"
 	// NOTE: Exact SKU description strings must be verified against the live GCP Billing API.
-	tokenInputRegex = regexp.MustCompile(`(?i)^Gemini\s+(.+?)\s+Input\s+tokens?$`)
-	// tokenOutputRegex matches Vertex AI Gemini output token SKU descriptions.
-	// Example: "Gemini 1.5 Flash Output tokens"
-	tokenOutputRegex = regexp.MustCompile(`(?i)^Gemini\s+(.+?)\s+Output\s+tokens?$`)
+	tokenInputRegex = regexp.MustCompile(`(?i)^(.+?)\s+Input\s+tokens?$`)
+	// tokenOutputRegex matches Vertex AI output token SKU descriptions for any model family.
+	// Examples: "Gemini 1.5 Flash Output tokens", "Claude 3.5 Sonnet Output tokens"
+	tokenOutputRegex = regexp.MustCompile(`(?i)^(.+?)\s+Output\s+tokens?$`)
 	// computeRegex matches custom training/prediction compute SKU descriptions.
 	// Example: "Custom Training n1-standard-4 running in us-central1"
 	// Example: "Spot Custom Prediction n1-highmem-8 running in europe-west1"
diff --git a/pkg/google/vertex/pricing_map_test.go b/pkg/google/vertex/pricing_map_test.go
@@ -18,8 +18,8 @@ func TestParseSkus_TokenInputSKU(t *testing.T) {
 
 	snap := pm.Snapshot()
 	require.NotNil(t, snap.tokens["us-central1"])
-	require.NotNil(t, snap.tokens["us-central1"]["1.5-flash"])
-	assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["1.5-flash"].InputPer1kTokens, 1e-9)
+	require.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-flash"])
+	assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["gemini-1.5-flash"].InputPer1kTokens, 1e-9)
 }
 
 func TestParseSkus_TokenOutputSKU(t *testing.T) {
@@ -30,8 +30,22 @@ func TestParseSkus_TokenOutputSKU(t *testing.T) {
 	require.NoError(t, err)
 
 	snap := pm.Snapshot()
-	require.NotNil(t, snap.tokens["us-central1"]["1.5-flash"])
-	assert.InDelta(t, 0.005, snap.tokens["us-central1"]["1.5-flash"].OutputPer1kTokens, 1e-9)
+	require.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-flash"])
+	assert.InDelta(t, 0.005, snap.tokens["us-central1"]["gemini-1.5-flash"].OutputPer1kTokens, 1e-9)
+}
+
+func TestParseSkus_ClaudeTokenSKU(t *testing.T) {
+	pm := &PricingMap{}
+	err := pm.ParseSkus([]*billingpb.Sku{
+		newTokenSKU("Claude 3.5 Sonnet Input tokens", "global", "k{char}", 0, 3000000),
+		newTokenSKU("Claude 3.5 Sonnet Output tokens", "global", "k{char}", 0, 15000000),
+	})
+	require.NoError(t, err)
+
+	snap := pm.Snapshot()
+	require.NotNil(t, snap.tokens["global"]["claude-3.5-sonnet"])
+	assert.InDelta(t, 0.003, snap.tokens["global"]["claude-3.5-sonnet"].InputPer1kTokens, 1e-9)
+	assert.InDelta(t, 0.015, snap.tokens["global"]["claude-3.5-sonnet"].OutputPer1kTokens, 1e-9)
 }
 
 func TestParseSkus_TokenSKUNormalizesPerUnitPrice(t *testing.T) {
@@ -43,7 +57,7 @@ func TestParseSkus_TokenSKUNormalizesPerUnitPrice(t *testing.T) {
 	require.NoError(t, err)
 
 	snap := pm.Snapshot()
-	assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["1.0-pro"].InputPer1kTokens, 1e-9)
+	assert.InDelta(t, 0.00125, snap.tokens["us-central1"]["gemini-1.0-pro"].InputPer1kTokens, 1e-9)
 }
 
 func TestParseSkus_ComputeOnDemand(t *testing.T) {
@@ -116,8 +130,8 @@ func TestParseSkus_GlobalFallbackForTokenSKUWithNoRegion(t *testing.T) {
 
 	snap := pm.Snapshot()
 	require.NotNil(t, snap.tokens["global"])
-	require.NotNil(t, snap.tokens["global"]["1.5-flash"])
-	assert.InDelta(t, 0.00125, snap.tokens["global"]["1.5-flash"].InputPer1kTokens, 1e-9)
+	require.NotNil(t, snap.tokens["global"]["gemini-1.5-flash"])
+	assert.InDelta(t, 0.00125, snap.tokens["global"]["gemini-1.5-flash"].InputPer1kTokens, 1e-9)
 }
 
 func TestParseSkus_MultipleRegions(t *testing.T) {
@@ -141,8 +155,8 @@ func TestParseSkus_MultipleRegions(t *testing.T) {
 	require.NoError(t, err)
 
 	snap := pm.Snapshot()
-	assert.NotNil(t, snap.tokens["us-central1"]["1.5-pro"])
-	assert.NotNil(t, snap.tokens["europe-west1"]["1.5-pro"])
+	assert.NotNil(t, snap.tokens["us-central1"]["gemini-1.5-pro"])
+	assert.NotNil(t, snap.tokens["europe-west1"]["gemini-1.5-pro"])
 }
 
 func newTokenSKU(description, region, usageUnit string, units int64, nanos int32) *billingpb.Sku {
diff --git a/pkg/google/vertex/vertex.go b/pkg/google/vertex/vertex.go
@@ -22,12 +22,12 @@ const (
 var (
 	vertexTokenInputDesc = utils.GenerateDesc(
 		cloudcostexporter.MetricPrefix, subsystem, utils.TokenInputCostSuffix,
-		"Vertex AI Gemini input token cost in USD per 1k tokens.",
+		"Vertex AI input token cost in USD per 1k tokens.",
 		[]string{"model", "region"},
 	)
 	vertexTokenOutputDesc = utils.GenerateDesc(
 		cloudcostexporter.MetricPrefix, subsystem, utils.TokenOutputCostSuffix,
-		"Vertex AI Gemini output token cost in USD per 1k tokens.",
+		"Vertex AI output token cost in USD per 1k tokens.",
 		[]string{"model", "region"},
 	)
 	vertexComputeCostDesc = utils.GenerateDesc(
diff --git a/pkg/google/vertex/vertex_test.go b/pkg/google/vertex/vertex_test.go
@@ -50,13 +50,13 @@ func TestCollect_EmitsTokenMetrics(t *testing.T) {
 
 	inputMetric := metricByName(results, "cloudcost_gcp_vertex_token_input_usd_per_1k_tokens")
 	require.NotNil(t, inputMetric)
-	assert.Equal(t, "1.5-flash", inputMetric.Labels["model"])
+	assert.Equal(t, "gemini-1.5-flash", inputMetric.Labels["model"])
 	assert.Equal(t, "us-central1", inputMetric.Labels["region"])
 	assert.InDelta(t, 0.00125, inputMetric.Value, 1e-9)
 
 	outputMetric := metricByName(results, "cloudcost_gcp_vertex_token_output_usd_per_1k_tokens")
 	require.NotNil(t, outputMetric)
-	assert.Equal(t, "1.5-flash", outputMetric.Labels["model"])
+	assert.Equal(t, "gemini-1.5-flash", outputMetric.Labels["model"])
 	assert.Equal(t, "us-central1", outputMetric.Labels["region"])
 	assert.InDelta(t, 0.005, outputMetric.Value, 1e-9)
 }

Original file line number	Diff line number	Diff line change
`@@ -33,4 +33,4 @@ Metrics exported for the GCP Vertex AI service.`
`33`	`33`
`34`	`34`	`## Notes`
`35`	`35`
`36`		`-Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new Gemini models or machine types.`
	`36`	`+Pricing data is fetched from the GCP Billing API at startup and refreshed every 24 hours. SKU descriptions are matched using regular expressions; unknown SKUs are skipped. Verify SKU description patterns against the live Billing API when adding new models or machine types.`