Skip to content

Commit a4c4629

Browse files
committed
blockbuilder: set up per-tenant tsdb metrics
Signed-off-by: Vladimir Varankin <vladimir.varankin@grafana.com>
1 parent 8ecdd16 commit a4c4629

File tree

4 files changed

+70
-31
lines changed

4 files changed

+70
-31
lines changed

pkg/blockbuilder/blockbuilder.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"github.com/grafana/mimir/pkg/blockbuilder/schedulerpb"
2929
"github.com/grafana/mimir/pkg/storage/bucket"
3030
"github.com/grafana/mimir/pkg/storage/ingest"
31+
mimir_tsdb "github.com/grafana/mimir/pkg/storage/tsdb"
3132
"github.com/grafana/mimir/pkg/storage/tsdb/block"
3233
"github.com/grafana/mimir/pkg/util"
3334
"github.com/grafana/mimir/pkg/util/spanlogger"
@@ -50,6 +51,7 @@ type BlockBuilder struct {
5051

5152
blockBuilderMetrics blockBuilderMetrics
5253
tsdbBuilderMetrics tsdbBuilderMetrics
54+
tsdbMetrics *mimir_tsdb.TSDBMetrics
5355
readerMetrics *ingest.ReaderMetrics
5456
readerMetricsSource swappableReaderMetricsSource
5557
kpromMetrics *kprom.Metrics
@@ -88,7 +90,8 @@ func newWithSchedulerClient(
8890
register: reg,
8991
limits: limits,
9092
blockBuilderMetrics: newBlockBuilderMetrics(reg),
91-
tsdbBuilderMetrics: newTSDBBBuilderMetrics(reg),
93+
tsdbBuilderMetrics: newTSDBBuilderMetrics(reg),
94+
tsdbMetrics: mimir_tsdb.NewTSDBMetrics(prometheus.WrapRegistererWithPrefix("cortex_blockbuilder_", reg), logger),
9295
readerMetrics: readerMetrics,
9396
readerMetricsSource: readerMetricsSource,
9497
kpromMetrics: kpm,
@@ -241,7 +244,7 @@ func (b *BlockBuilder) consumeJob(ctx context.Context, key schedulerpb.JobKey, s
241244

242245
logger := log.With(sp, "partition", spec.Partition, "job_id", key.Id, "job_epoch", key.Epoch)
243246

244-
builder := NewTSDBBuilder(logger, b.cfg.DataDir, spec.Partition, b.cfg.BlocksStorage, b.limits, b.tsdbBuilderMetrics, b.cfg.ApplyMaxGlobalSeriesPerUserBelow)
247+
builder := NewTSDBBuilder(logger, b.cfg.DataDir, spec.Partition, b.cfg.BlocksStorage, b.limits, b.tsdbBuilderMetrics, b.tsdbMetrics, b.cfg.ApplyMaxGlobalSeriesPerUserBelow)
245248
defer runutil.CloseWithErrCapture(&err, builder, "closing tsdb builder")
246249

247250
// TODO: the block-builder can skip unmarshaling of exemplars because TSDB doesn't persist them into blocks; find a way to let PusherConsumer know about it

pkg/blockbuilder/metrics.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ type tsdbBuilderMetrics struct {
5050
lastSuccessfulCompactAndUploadTime *prometheus.GaugeVec
5151
}
5252

53-
func newTSDBBBuilderMetrics(reg prometheus.Registerer) tsdbBuilderMetrics {
53+
func newTSDBBuilderMetrics(reg prometheus.Registerer) tsdbBuilderMetrics {
5454
var m tsdbBuilderMetrics
5555

5656
m.processSamplesDiscarded = promauto.With(reg).NewCounterVec(prometheus.CounterOpts{

pkg/blockbuilder/tsdb.go

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
"github.com/grafana/dskit/multierror"
1818
dskittenant "github.com/grafana/dskit/tenant"
1919
"github.com/oklog/ulid/v2"
20+
"github.com/prometheus/client_golang/prometheus"
2021
"github.com/prometheus/prometheus/model/histogram"
2122
"github.com/prometheus/prometheus/model/labels"
2223
"github.com/prometheus/prometheus/storage"
@@ -35,10 +36,12 @@ import (
3536
type TSDBBuilder struct {
3637
dataDir string
3738

38-
logger log.Logger
39-
limits *validation.Overrides
40-
blocksStorageCfg mimir_tsdb.BlocksStorageConfig
41-
metrics tsdbBuilderMetrics
39+
logger log.Logger
40+
limits *validation.Overrides
41+
blocksStorageCfg mimir_tsdb.BlocksStorageConfig
42+
tsdbBuilderMetrics tsdbBuilderMetrics
43+
tsdbMetrics *mimir_tsdb.TSDBMetrics
44+
4245
applyMaxGlobalSeriesPerUserBelow int // inclusive
4346

4447
partitionID int32
@@ -65,13 +68,23 @@ type tsdbTenant struct {
6568
tenantID string
6669
}
6770

68-
func NewTSDBBuilder(logger log.Logger, dataDir string, partitionID int32, blocksStorageCfg mimir_tsdb.BlocksStorageConfig, limits *validation.Overrides, metrics tsdbBuilderMetrics, applyMaxGlobalSeriesPerUserBelow int) *TSDBBuilder {
71+
func NewTSDBBuilder(
72+
logger log.Logger,
73+
dataDir string,
74+
partitionID int32,
75+
blocksStorageCfg mimir_tsdb.BlocksStorageConfig,
76+
limits *validation.Overrides,
77+
tsdbBuilderMetrics tsdbBuilderMetrics,
78+
tsdbMetrics *mimir_tsdb.TSDBMetrics,
79+
applyMaxGlobalSeriesPerUserBelow int,
80+
) *TSDBBuilder {
6981
return &TSDBBuilder{
7082
dataDir: dataDir,
7183
logger: logger,
7284
limits: limits,
7385
blocksStorageCfg: blocksStorageCfg,
74-
metrics: metrics,
86+
tsdbBuilderMetrics: tsdbBuilderMetrics,
87+
tsdbMetrics: tsdbMetrics,
7588
applyMaxGlobalSeriesPerUserBelow: applyMaxGlobalSeriesPerUserBelow,
7689
partitionID: partitionID,
7790
tsdbs: make(map[tsdbTenant]*userTSDB),
@@ -252,7 +265,7 @@ func (b *TSDBBuilder) PushToStorageAndReleaseRequest(ctx context.Context, req *m
252265

253266
if discardedSamples > 0 {
254267
partitionStr := fmt.Sprintf("%d", tenant.partitionID)
255-
b.metrics.processSamplesDiscarded.WithLabelValues(partitionStr).Add(float64(discardedSamples))
268+
b.tsdbBuilderMetrics.processSamplesDiscarded.WithLabelValues(partitionStr).Add(float64(discardedSamples))
256269
}
257270

258271
return app.Commit()
@@ -292,6 +305,8 @@ func (b *TSDBBuilder) getOrCreateTSDB(tenant tsdbTenant) (*userTSDB, error) {
292305
}
293306

294307
func (b *TSDBBuilder) newTSDB(tenant tsdbTenant) (*userTSDB, error) {
308+
tsdbPromReg := prometheus.NewRegistry()
309+
295310
udir := filepath.Join(b.dataDir, strconv.Itoa(int(tenant.partitionID)), tenant.tenantID)
296311
// Remove any previous TSDB dir. We don't need it.
297312
if err := os.RemoveAll(udir); err != nil {
@@ -318,7 +333,7 @@ func (b *TSDBBuilder) newTSDB(tenant tsdbTenant) (*userTSDB, error) {
318333
udb.maxGlobalSeries = userLimit
319334
}
320335

321-
db, err := tsdb.Open(udir, util_log.SlogFromGoKit(userLogger), nil, &tsdb.Options{
336+
db, err := tsdb.Open(udir, util_log.SlogFromGoKit(userLogger), tsdbPromReg, &tsdb.Options{
322337
RetentionDuration: 0,
323338
MinBlockDuration: 2 * time.Hour.Milliseconds(),
324339
MaxBlockDuration: 2 * time.Hour.Milliseconds(),
@@ -334,18 +349,20 @@ func (b *TSDBBuilder) newTSDB(tenant tsdbTenant) (*userTSDB, error) {
334349
OutOfOrderCapMax: int64(b.blocksStorageCfg.TSDB.OutOfOrderCapacityMax),
335350
SecondaryHashFunction: nil, // TODO(codesome): May needed when applying limits. Used to determine the owned series by an ingesters
336351
SeriesLifecycleCallback: udb,
337-
HeadPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(nil),
338-
BlockPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(nil),
352+
HeadPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(nil), // No need for these metrics; no one queries tsdb through block-builder
353+
BlockPostingsForMatchersCacheMetrics: tsdb.NewPostingsForMatchersCacheMetrics(nil), // No need for these metrics; no one queries tsdb through block-builder
339354
PostingsClonerFactory: tsdb.DefaultPostingsClonerFactory{},
340355
}, nil)
341356
if err != nil {
342357
return nil, err
343358
}
344359

345-
db.DisableCompactions()
360+
db.DisableCompactions() // we compact on our own schedule
346361

347362
udb.DB = db
348363

364+
b.tsdbMetrics.SetRegistryForTenant(userID, tsdbPromReg)
365+
349366
return udb, nil
350367
}
351368

@@ -377,6 +394,12 @@ func (b *TSDBBuilder) CompactAndUpload(ctx context.Context, uploadBlocks blockUp
377394
}
378395
merr.Add(os.RemoveAll(db.Dir()))
379396
}
397+
398+
// Remove all registered per-tenant TSDB metrics. Their local DBs are wiped out from the block-builder no-matter what.
399+
for tenant := range b.tsdbs {
400+
b.tsdbMetrics.RemoveRegistryForTenant(tenant.tenantID)
401+
}
402+
380403
// Clear the map so that it can be released from the memory. Not setting to nil in case we want to reuse the TSDBBuilder.
381404
clear(b.tsdbs)
382405
b.tsdbsMu.Unlock()
@@ -403,11 +426,11 @@ func (b *TSDBBuilder) CompactAndUpload(ctx context.Context, uploadBlocks blockUp
403426
}
404427
partitionStr := strconv.Itoa(int(tenant.partitionID))
405428
if err != nil {
406-
b.metrics.compactAndUploadFailed.WithLabelValues(partitionStr).Inc()
429+
b.tsdbBuilderMetrics.compactAndUploadFailed.WithLabelValues(partitionStr).Inc()
407430
return
408431
}
409-
b.metrics.compactAndUploadDuration.WithLabelValues(partitionStr).Observe(time.Since(t).Seconds())
410-
b.metrics.lastSuccessfulCompactAndUploadTime.WithLabelValues(partitionStr).SetToCurrentTime()
432+
b.tsdbBuilderMetrics.compactAndUploadDuration.WithLabelValues(partitionStr).Observe(time.Since(t).Seconds())
433+
b.tsdbBuilderMetrics.lastSuccessfulCompactAndUploadTime.WithLabelValues(partitionStr).SetToCurrentTime()
411434
}(time.Now())
412435

413436
if err := db.compactEverything(ctx); err != nil {
@@ -451,10 +474,12 @@ func (b *TSDBBuilder) Close() error {
451474
defer b.tsdbsMu.Unlock()
452475

453476
var merr multierror.MultiError
454-
for _, db := range b.tsdbs {
477+
for tenant, db := range b.tsdbs {
455478
dbDir := db.Dir()
456479
merr.Add(db.Close())
457480
merr.Add(os.RemoveAll(dbDir))
481+
482+
b.tsdbMetrics.RemoveRegistryForTenant(tenant.tenantID)
458483
}
459484

460485
// Clear the map so that it can be released from the memory. Not setting to nil in case

pkg/blockbuilder/tsdb_test.go

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -224,9 +224,11 @@ func TestTSDBBuilder(t *testing.T) {
224224
userID: tc.limits,
225225
}
226226
overrides := validation.NewOverrides(defaultLimitsTestConfig(), validation.NewMockTenantLimits(limits))
227-
metrics := newTSDBBBuilderMetrics(prometheus.NewPedanticRegistry())
227+
logger := log.NewNopLogger()
228+
tsdbBuilderMetrics := newTSDBBuilderMetrics(prometheus.NewPedanticRegistry())
229+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(prometheus.NewPedanticRegistry(), logger)
228230

229-
builder := NewTSDBBuilder(log.NewNopLogger(), t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, 0)
231+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, 0)
230232

231233
ctx := user.InjectOrgID(ctx, userID)
232234

@@ -307,8 +309,10 @@ func TestTSDBBuilder_CompactAndUpload_fail(t *testing.T) {
307309
userID := "user1"
308310

309311
overrides := validation.NewOverrides(defaultLimitsTestConfig(), nil)
310-
metrics := newTSDBBBuilderMetrics(prometheus.NewPedanticRegistry())
311-
builder := NewTSDBBuilder(log.NewNopLogger(), t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, 0)
312+
logger := log.NewNopLogger()
313+
tsdbBuilderMetrics := newTSDBBuilderMetrics(prometheus.NewPedanticRegistry())
314+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(prometheus.NewPedanticRegistry(), logger)
315+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, 0)
312316
t.Cleanup(func() {
313317
require.NoError(t, builder.Close())
314318
})
@@ -403,8 +407,10 @@ func TestProcessingEmptyRequest(t *testing.T) {
403407
userID := "1"
404408

405409
overrides := validation.NewOverrides(defaultLimitsTestConfig(), nil)
406-
metrics := newTSDBBBuilderMetrics(prometheus.NewPedanticRegistry())
407-
builder := NewTSDBBuilder(log.NewNopLogger(), t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, 0)
410+
logger := log.NewNopLogger()
411+
tsdbBuilderMetrics := newTSDBBuilderMetrics(prometheus.NewPedanticRegistry())
412+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(prometheus.NewPedanticRegistry(), logger)
413+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, 0)
408414

409415
ctx := user.InjectOrgID(t.Context(), userID)
410416

@@ -451,8 +457,10 @@ func TestTSDBBuilderLimits(t *testing.T) {
451457
}
452458
overrides := validation.NewOverrides(defaultLimitsTestConfig(), validation.NewMockTenantLimits(limits))
453459

454-
metrics := newTSDBBBuilderMetrics(prometheus.NewPedanticRegistry())
455-
builder := NewTSDBBuilder(log.NewNopLogger(), t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, applyGlobalSeriesLimitUnder)
460+
logger := log.NewNopLogger()
461+
tsdbBuilderMetrics := newTSDBBuilderMetrics(prometheus.NewPedanticRegistry())
462+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(prometheus.NewPedanticRegistry(), logger)
463+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, applyGlobalSeriesLimitUnder)
456464
t.Cleanup(func() {
457465
require.NoError(t, builder.Close())
458466
})
@@ -516,8 +524,10 @@ func TestTSDBBuilderNativeHistogramEnabledError(t *testing.T) {
516524
}
517525
overrides := validation.NewOverrides(defaultLimitsTestConfig(), validation.NewMockTenantLimits(limits))
518526

519-
metrics := newTSDBBBuilderMetrics(prometheus.NewPedanticRegistry())
520-
builder := NewTSDBBuilder(log.NewNopLogger(), t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, 0)
527+
logger := log.NewNopLogger()
528+
tsdbBuilderMetrics := newTSDBBuilderMetrics(prometheus.NewPedanticRegistry())
529+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(prometheus.NewPedanticRegistry(), logger)
530+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, 0)
521531
t.Cleanup(func() {
522532
require.NoError(t, builder.Close())
523533
})
@@ -814,9 +824,10 @@ func TestBuilderCreatedTimestamp(t *testing.T) {
814824
}
815825

816826
registry := prometheus.NewPedanticRegistry()
817-
metrics := newTSDBBBuilderMetrics(registry)
818-
logger := log.NewLogfmtLogger(log.NewSyncWriter(os.Stdout))
819-
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, metrics, 0)
827+
logger := log.NewNopLogger()
828+
tsdbBuilderMetrics := newTSDBBuilderMetrics(registry)
829+
tsdbMetrics := mimir_tsdb.NewTSDBMetrics(registry, logger)
830+
builder := NewTSDBBuilder(logger, t.TempDir(), partitionID, mimir_tsdb.BlocksStorageConfig{}, overrides, tsdbBuilderMetrics, tsdbMetrics, 0)
820831
t.Cleanup(func() {
821832
require.NoError(t, builder.Close())
822833
})

0 commit comments

Comments
 (0)