Skip to content

Commit e339355

Browse files
committed
Cache Azure blob cost query on scrape interval
1 parent 3d89be8 commit e339355

File tree

3 files changed

+84
-6
lines changed

3 files changed

+84
-6
lines changed

docs/metrics/azure/blob.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
Pass `blob` in `--azure.services` to enable this collector. Matching is case-insensitive.
44

5-
The collector defines a storage cost `GaugeVec` that the Azure provider includes in its `Describe` and `Collect` fan-out (same gatherer pattern as `azure_aks`). `Collect` calls `StorageCostQuerier.QueryBlobStorage` with a **30-day** lookback (`defaultQueryLookback` in `pkg/azure/blob/cost_query.go`) and sets the gauge from each row. `Config.CostQuerier` supplies the querier; when it is nil the collector uses a no-op querier (no rows). The parent Azure collector forwards `StorageGauge.Collect(ch)` so blob cost metrics share one registration path with the rest of the Azure exporter. Scrape instrumentation publishes `cloudcost_exporter_collector_*` with label `collector="azure_blob"`.
5+
The collector defines a storage cost `GaugeVec` that the Azure provider includes in its `Describe` and `Collect` fan-out (same gatherer pattern as `azure_aks`). `Collect` calls `StorageCostQuerier.QueryBlobStorage` when `ScrapeInterval` has elapsed since the last successful query (similar billing refresh cadence to `pkg/aws/s3`). Each query uses a **30-day** lookback (`defaultQueryLookback` in `pkg/azure/blob/cost_query.go`). Cached rows are applied to the gauge every scrape. `Config.CostQuerier` supplies the querier; when it is nil the collector uses a no-op querier (no rows). The parent Azure collector forwards `StorageGauge.Collect(ch)` so blob cost metrics share one registration path with the rest of the Azure exporter. Scrape instrumentation publishes `cloudcost_exporter_collector_*` with label `collector="azure_blob"`.
66

77
## Cost metrics
88

pkg/azure/blob/blob.go

Lines changed: 23 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package blob
33
import (
44
"context"
55
"log/slog"
6+
"sync"
67
"time"
78

89
"github.com/grafana/cloudcost-exporter/pkg/provider"
@@ -49,6 +50,10 @@ type Collector struct {
4950
querier StorageCostQuerier
5051
subscriptionID string
5152
scrapeInterval time.Duration
53+
54+
mu sync.Mutex
55+
cachedRows []StorageCostRow
56+
nextRefresh time.Time // QueryBlobStorage when time.Now is on or after this (S3 billing refresh pattern).
5257
}
5358

5459
// Config holds settings for the blob collector.
@@ -76,21 +81,34 @@ func New(cfg *Config) (*Collector, error) {
7681
querier: q,
7782
subscriptionID: cfg.SubscriptionId,
7883
scrapeInterval: interval,
84+
// First Collect runs a query immediately (same idea as pkg/aws/s3 nextScrape).
85+
nextRefresh: time.Now().Add(-interval),
7986
}, nil
8087
}
8188

8289
// Collect queries cost rows, updates the storage vec, then forwards metrics on ch for the parent gatherer.
8390
func (c *Collector) Collect(ctx context.Context, ch chan<- prometheus.Metric) error {
8491
c.logger.LogAttrs(ctx, slog.LevelInfo, "collecting metrics")
85-
rows, err := c.querier.QueryBlobStorage(ctx, c.subscriptionID, defaultQueryLookback)
86-
if err != nil {
87-
return err
92+
c.mu.Lock()
93+
defer c.mu.Unlock()
94+
now := time.Now()
95+
if !now.Before(c.nextRefresh) {
96+
rows, err := c.querier.QueryBlobStorage(ctx, c.subscriptionID, defaultQueryLookback)
97+
if err != nil {
98+
return err
99+
}
100+
c.cachedRows = rows
101+
c.nextRefresh = now.Add(c.scrapeInterval)
88102
}
103+
c.applyRowsToGauge(c.cachedRows)
104+
c.metrics.StorageGauge.Collect(ch)
105+
return nil
106+
}
107+
108+
func (c *Collector) applyRowsToGauge(rows []StorageCostRow) {
89109
for _, row := range rows {
90110
c.metrics.StorageGauge.WithLabelValues(row.Region, row.Class).Set(row.Rate)
91111
}
92-
c.metrics.StorageGauge.Collect(ch)
93-
return nil
94112
}
95113

96114
// Describe satisfies provider.Collector.

pkg/azure/blob/blob_test.go

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"log/slog"
77
"os"
88
"strings"
9+
"sync"
910
"testing"
1011
"time"
1112

@@ -32,6 +33,39 @@ func (s stubCostQuerier) QueryBlobStorage(context.Context, string, time.Duration
3233
return s.rows, s.err
3334
}
3435

36+
type countingCostQuerier struct {
37+
mu sync.Mutex
38+
n int
39+
rows []StorageCostRow
40+
err error
41+
}
42+
43+
func (c *countingCostQuerier) QueryBlobStorage(context.Context, string, time.Duration) ([]StorageCostRow, error) {
44+
c.mu.Lock()
45+
defer c.mu.Unlock()
46+
c.n++
47+
return c.rows, c.err
48+
}
49+
50+
func (c *countingCostQuerier) calls() int {
51+
c.mu.Lock()
52+
defer c.mu.Unlock()
53+
return c.n
54+
}
55+
56+
func newCollectorWithCountingQuerier(t *testing.T, rows []StorageCostRow, querierErr error) (*Collector, *countingCostQuerier) {
57+
t.Helper()
58+
q := &countingCostQuerier{rows: rows, err: querierErr}
59+
c, err := New(&Config{
60+
Logger: testLogger,
61+
SubscriptionId: "sub",
62+
ScrapeInterval: time.Hour,
63+
CostQuerier: q,
64+
})
65+
require.NoError(t, err)
66+
return c, q
67+
}
68+
3569
func TestCollector_Collect_queryError(t *testing.T) {
3670
c, err := New(&Config{
3771
Logger: testLogger,
@@ -42,6 +76,32 @@ func TestCollector_Collect_queryError(t *testing.T) {
4276
assert.Error(t, c.Collect(t.Context(), testCollectSink()))
4377
}
4478

79+
func TestCollector_Collect_costQueryRefresh(t *testing.T) {
80+
sampleRows := []StorageCostRow{{Region: "eastus", Class: "Hot", Rate: 0.001}}
81+
82+
t.Run("skips_until_interval", func(t *testing.T) {
83+
c, q := newCollectorWithCountingQuerier(t, sampleRows, nil)
84+
require.NoError(t, c.Collect(t.Context(), testCollectSink()))
85+
require.NoError(t, c.Collect(t.Context(), testCollectSink()))
86+
assert.Equal(t, 1, q.calls(), "second scrape within interval should not call querier")
87+
})
88+
89+
t.Run("refetches_when_next_refresh_elapsed", func(t *testing.T) {
90+
c, q := newCollectorWithCountingQuerier(t, sampleRows, nil)
91+
require.NoError(t, c.Collect(t.Context(), testCollectSink()))
92+
c.nextRefresh = time.Now().Add(-time.Second)
93+
require.NoError(t, c.Collect(t.Context(), testCollectSink()))
94+
assert.Equal(t, 2, q.calls())
95+
})
96+
97+
t.Run("retries_after_error", func(t *testing.T) {
98+
c, q := newCollectorWithCountingQuerier(t, nil, errors.New("query failed"))
99+
assert.Error(t, c.Collect(t.Context(), testCollectSink()))
100+
assert.Error(t, c.Collect(t.Context(), testCollectSink()))
101+
assert.Equal(t, 2, q.calls(), "errors do not advance nextRefresh; querier should run again")
102+
})
103+
}
104+
45105
func TestCollector_Collect_setsGaugeFromQuerier(t *testing.T) {
46106
c, err := New(&Config{
47107
Logger: testLogger,

0 commit comments

Comments
 (0)