|
| 1 | +// Copyright 2025 The etcd Authors |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | + |
| 15 | +package e2e |
| 16 | + |
| 17 | +import ( |
| 18 | + "context" |
| 19 | + "fmt" |
| 20 | + "net/url" |
| 21 | + "testing" |
| 22 | + "time" |
| 23 | + |
| 24 | + clientv3 "go.etcd.io/etcd/client/v3" |
| 25 | + "go.etcd.io/etcd/pkg/v3/stringutil" |
| 26 | + "go.etcd.io/etcd/tests/v3/framework/e2e" |
| 27 | + |
| 28 | + "github.com/stretchr/testify/require" |
| 29 | +) |
| 30 | + |
| 31 | +// TestReproduce19406 reproduces the issue: https://github.com/etcd-io/etcd/issues/19406 |
| 32 | +func TestReproduce19406(t *testing.T) { |
| 33 | + e2e.BeforeTest(t) |
| 34 | + |
| 35 | + compactionSleepInterval := 100 * time.Millisecond |
| 36 | + ctx := context.TODO() |
| 37 | + |
| 38 | + clus, cerr := e2e.NewEtcdProcessCluster(ctx, t, |
| 39 | + e2e.WithClusterSize(1), |
| 40 | + e2e.WithGoFailEnabled(true), |
| 41 | + e2e.WithCompactionBatchLimit(1), |
| 42 | + e2e.WithCompactionSleepInterval(compactionSleepInterval), |
| 43 | + ) |
| 44 | + require.NoError(t, cerr) |
| 45 | + t.Cleanup(func() { require.NoError(t, clus.Stop()) }) |
| 46 | + |
| 47 | + // Produce some data |
| 48 | + cli := newClient(t, clus.EndpointsGRPC(), e2e.ClientConfig{}) |
| 49 | + valueSize := 10 |
| 50 | + var latestRevision int64 |
| 51 | + |
| 52 | + produceKeyNum := 20 |
| 53 | + for i := 0; i <= produceKeyNum; i++ { |
| 54 | + resp, err := cli.Put(ctx, fmt.Sprintf("%d", i), stringutil.RandString(uint(valueSize))) |
| 55 | + require.NoError(t, err) |
| 56 | + latestRevision = resp.Header.Revision |
| 57 | + } |
| 58 | + |
| 59 | + // Sleep for PerCompactionInterationInterval to simulate a single iteration of compaction lasting at least this duration. |
| 60 | + PerCompactionInterationInterval := compactionSleepInterval |
| 61 | + require.NoError(t, clus.Procs[0].Failpoints().SetupHTTP(ctx, "compactAfterAcquiredBatchTxLock", |
| 62 | + fmt.Sprintf(`sleep("%s")`, PerCompactionInterationInterval))) |
| 63 | + |
| 64 | + // start compaction |
| 65 | + t.Log("start compaction...") |
| 66 | + _, err := cli.Compact(ctx, latestRevision, clientv3.WithCompactPhysical()) |
| 67 | + require.NoError(t, err) |
| 68 | + t.Log("finished compaction...") |
| 69 | + |
| 70 | + // Validate that total compaction sleep interval |
| 71 | + // Compaction runs in batches. During each batch, it acquires a lock, releases it at the end, |
| 72 | + // and then waits for a compactionSleepInterval before starting the next batch. This pause |
| 73 | + // allows PUT requests to be processed. |
| 74 | + // Therefore, the total compaction sleep interval larger or equal to |
| 75 | + // (compaction iteration number - 1) * compactionSleepInterval |
| 76 | + httpEndpoint := clus.EndpointsHTTP()[0] |
| 77 | + totalKeys := produceKeyNum + 1 |
| 78 | + pauseDuration, totalDuration := getEtcdCompactionMetrics(t, httpEndpoint) |
| 79 | + require.NoError(t, err) |
| 80 | + actualSleepInterval := time.Duration(totalDuration-pauseDuration) * time.Millisecond |
| 81 | + expectSleepInterval := compactionSleepInterval * time.Duration(totalKeys) |
| 82 | + t.Logf("db_compaction_pause_duration: %.2f db_compaction_total_duration: %.2f, totalKeys: %d", |
| 83 | + pauseDuration, totalDuration, totalKeys) |
| 84 | + require.GreaterOrEqualf(t, actualSleepInterval, expectSleepInterval, |
| 85 | + "expect total compact sleep interval larger than (%v) but got (%v)", |
| 86 | + expectSleepInterval, actualSleepInterval) |
| 87 | +} |
| 88 | + |
| 89 | +func getEtcdCompactionMetrics(t *testing.T, httpEndpoint string) (pauseDuration, totalDuration float64) { |
| 90 | + metricsURL, err := url.JoinPath(httpEndpoint, "metrics") |
| 91 | + require.NoError(t, err) |
| 92 | + |
| 93 | + // Fetch metrics from the endpoint |
| 94 | + metricFamilies, err := e2e.GetMetrics(metricsURL) |
| 95 | + require.NoError(t, err) |
| 96 | + |
| 97 | + // Extract sum from histogram metric |
| 98 | + getHistogramSum := func(name string) float64 { |
| 99 | + mf, ok := metricFamilies[name] |
| 100 | + require.Truef(t, ok, "metric %q not found", name) |
| 101 | + require.NotEmptyf(t, mf.Metric, "metric %q has no data", name) |
| 102 | + |
| 103 | + hist := mf.Metric[0].GetHistogram() |
| 104 | + require.NotEmptyf(t, hist, "metric %q is not a histogram", name) |
| 105 | + |
| 106 | + return hist.GetSampleSum() |
| 107 | + } |
| 108 | + |
| 109 | + pauseDuration = getHistogramSum("etcd_debugging_mvcc_db_compaction_pause_duration_milliseconds") |
| 110 | + totalDuration = getHistogramSum("etcd_debugging_mvcc_db_compaction_total_duration_milliseconds") |
| 111 | + |
| 112 | + return pauseDuration, totalDuration |
| 113 | +} |
0 commit comments