Skip to content

Commit 22b5f87

Browse files
authored
compactor: verify that TSDB uploads use index format v2 (#13815)
Verify that the TSDB block index file uses v2 for the file format, not v1. The v1 format was only used for Prometheus 2.0 and 2.1 as far as I can tell. It hasn't been the default format for new blocks since Prometheus 2.2 and has never been the default for Mimir. Part of #13808
1 parent c990735 commit 22b5f87

File tree

3 files changed

+96
-0
lines changed

3 files changed

+96
-0
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
* [CHANGE] Limits: removed the experimental `cost_attribution_labels` configuration option. Use `cost_attribution_labels_structured` instead. #13286
2828
* [CHANGE] Ingester: Renamed `cortex_ingest_storage_writer_buffered_produce_bytes` metric to `cortex_ingest_storage_writer_buffered_produce_bytes_distribution` (Prometheus summary), and added `cortex_ingest_storage_writer_buffered_produce_bytes` metric that exports the buffer size as a Prometheus Gauge. #13414
2929
* [CHANGE] Querier and query-frontend: Removed support for per-step stats when MQE is enabled. #13582
30+
* [CHANGE] Compactor: Require that uploaded TSDB blocks use v2 of the index file format. #13815
3031
* [CHANGE] Query-frontend: Removed support for calculating 'cache-adjusted samples processed' query statistic. The `-query-frontend.cache-samples-processed-stats` CLI flag has been deprecated and will be removed in a future release. Setting it has now no effect. #13582
3132
* [CHANGE] Querier: Renamed experimental flag `-querier.prefer-availability-zone` to `-querier.prefer-availability-zones` and changed it to accept a comma-separated list of availability zones. All zones in the list are given equal priority when querying ingesters and store-gateways. #13756 #13758
3233
* [CHANGE] Ingester: Stabilize experimental flag `-ingest-storage.write-logs-fsync-before-kafka-commit-concurrency` to fsync write logs before the offset is committed to Kafka. Remove `-ingest-storage.write-logs-fsync-before-kafka-commit-enabled` since this is always enabled now. #13591

pkg/storage/tsdb/block/index.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ import (
3333
util_log "github.com/grafana/mimir/pkg/util/log"
3434
)
3535

36+
var (
37+
SupportedIndexFormats = []int{index.FormatV2}
38+
)
39+
3640
// VerifyBlock does a full run over a block index and chunk data and verifies that they fulfill the order invariants.
3741
func VerifyBlock(ctx context.Context, logger log.Logger, blockDir string, minTime, maxTime int64, checkChunks bool) error {
3842
stats, err := GatherBlockHealthStats(ctx, logger, blockDir, minTime, maxTime, checkChunks)
@@ -44,6 +48,9 @@ func VerifyBlock(ctx context.Context, logger log.Logger, blockDir string, minTim
4448
}
4549

4650
type HealthStats struct {
51+
// IndexFormat is the format version used by the TSDB index file.
52+
IndexFormat int
53+
4754
// TotalSeries represents total number of series in block.
4855
TotalSeries int64
4956
// OutOfOrderSeries represents number of series that have out of order chunks.
@@ -68,6 +75,17 @@ type HealthStats struct {
6875
OutOfOrderLabels int
6976
}
7077

78+
// UnsupportedIndexFormat returns an error if the stats indicate this TSDB block uses
79+
// an index format that isn't supported by the read path (store-gateways use a custom
80+
// implementation of TSDB index parsing code, to avoid mmap, which only supports v2).
81+
func (i HealthStats) UnsupportedIndexFormat() error {
82+
if !slices.Contains(SupportedIndexFormats, i.IndexFormat) {
83+
return fmt.Errorf("index uses format %d which is not supported (%v are supported)", i.IndexFormat, SupportedIndexFormats)
84+
}
85+
86+
return nil
87+
}
88+
7189
// OutOfOrderLabelsErr returns an error if the HealthStats object indicates
7290
// postings without of order labels. This is corrected by Prometheus Issue
7391
// #5372 and affects Prometheus versions 2.8.0 and below.
@@ -129,6 +147,10 @@ func (i HealthStats) AnyErr() error {
129147
errMsg = append(errMsg, err.Error())
130148
}
131149

150+
if err := i.UnsupportedIndexFormat(); err != nil {
151+
errMsg = append(errMsg, err.Error())
152+
}
153+
132154
if err := i.Issue347OutsideChunksErr(); err != nil {
133155
errMsg = append(errMsg, err.Error())
134156
}
@@ -162,6 +184,8 @@ func GatherBlockHealthStats(ctx context.Context, logger log.Logger, blockDir str
162184
}
163185
defer runutil.CloseWithErrCapture(&err, r, "gather index issue file reader")
164186

187+
stats.IndexFormat = r.Version()
188+
165189
n, v := index.AllPostingsKey()
166190
p, err := r.Postings(ctx, n, v)
167191
if err != nil {

pkg/storage/tsdb/block/index_test.go

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,77 @@ import (
2020
"github.com/stretchr/testify/require"
2121
)
2222

23+
func TestHealthStats_AnyErr(t *testing.T) {
24+
testCases := []struct {
25+
name string
26+
stats HealthStats
27+
expectSuccess bool
28+
}{
29+
{
30+
name: "critical error",
31+
stats: HealthStats{
32+
IndexFormat: index.FormatV2,
33+
OutsideChunks: 1,
34+
CompleteOutsideChunks: 1,
35+
},
36+
expectSuccess: false,
37+
},
38+
{
39+
name: "invalid index version",
40+
stats: HealthStats{
41+
IndexFormat: index.FormatV1,
42+
},
43+
expectSuccess: false,
44+
},
45+
{
46+
name: "issue 347 outside chunks",
47+
stats: HealthStats{
48+
IndexFormat: index.FormatV2,
49+
OutsideChunks: 1,
50+
Issue347OutsideChunks: 1,
51+
},
52+
expectSuccess: false,
53+
},
54+
{
55+
name: "out of order labels",
56+
stats: HealthStats{
57+
IndexFormat: index.FormatV2,
58+
OutOfOrderLabels: 1,
59+
},
60+
expectSuccess: false,
61+
},
62+
63+
{
64+
name: "out of order chunks",
65+
stats: HealthStats{
66+
IndexFormat: index.FormatV2,
67+
TotalSeries: 100,
68+
OutOfOrderSeries: 50,
69+
OutOfOrderChunks: 10,
70+
},
71+
expectSuccess: false,
72+
},
73+
{
74+
name: "success",
75+
stats: HealthStats{
76+
IndexFormat: index.FormatV2,
77+
},
78+
expectSuccess: true,
79+
},
80+
}
81+
82+
for _, tc := range testCases {
83+
t.Run(tc.name, func(t *testing.T) {
84+
err := tc.stats.AnyErr()
85+
if tc.expectSuccess {
86+
require.NoError(t, err)
87+
} else {
88+
require.Error(t, err)
89+
}
90+
})
91+
}
92+
}
93+
2394
func TestRewrite(t *testing.T) {
2495
const excludeTime int64 = 600
2596

0 commit comments

Comments
 (0)