Skip to content

Commit 604be3e

Browse files
fix(scyllaclient_test): skip TestClientActiveRepairsIntegration for tablets
When SM uses tablet repair API, Scylla registers a request to repair a tablet table. Later on, Scylla creates repair jobs for given tablets according to its own internal scheduler. Because of that, we have no guarantee that Scylla will start any repair jobs right after SM used tablet repair API. For example, they might be delayed due to a table migration/merge/split. The API for checking active repairs and killing repairs works on the repair job level, not the tablet repair API request level. Task manager API should be used for handling those requests. Fixes #4334
1 parent eaacce4 commit 604be3e

File tree

2 files changed

+19
-42
lines changed

2 files changed

+19
-42
lines changed

Diff for: pkg/scyllaclient/client_scylla_integration_test.go

+19-27
Original file line numberDiff line numberDiff line change
@@ -169,32 +169,23 @@ func TestClientActiveRepairsIntegration(t *testing.T) {
169169
if err != nil {
170170
t.Fatal(err)
171171
}
172-
ni, err := client.AnyNodeInfo(context.Background())
173-
if err != nil {
174-
t.Fatal(err)
175-
}
176-
tabletAPI, err := ni.SupportsTabletRepair()
177-
if err != nil {
178-
t.Fatal(err)
179-
}
180172

181173
Print("Given: cluster with table to repair")
182174
const ks = "test_active_repairs_ks"
183175
s := db.CreateSessionAndDropAllKeyspaces(t, client)
184-
db.WriteData(t, s, ks, 1)
176+
db.WriteData(t, s, ks, 2)
185177

186178
rd := scyllaclient.NewRingDescriber(context.Background(), client)
187-
asyncRepair := func(ctx context.Context, ks, tab, master string) {
188-
if _, err := client.RawRepair(ctx, ks, tab, master); err != nil {
189-
t.Error(err)
190-
}
191-
}
192-
if rd.IsTabletKeyspace(ks) && tabletAPI {
193-
asyncRepair = func(ctx context.Context, ks, tab, master string) {
194-
if _, err := client.TabletRepair(ctx, ks, tab, master, nil, nil); err != nil {
195-
t.Error(err)
196-
}
197-
}
179+
if rd.IsTabletKeyspace(ks) {
180+
// When SM uses tablet repair API, Scylla registers a request to repair
181+
// a tablet table. Later on, Scylla creates repair jobs for given tablets
182+
// according to its own internal scheduler. Because of that, we have no guarantee
183+
// that Scylla will start any repair jobs right after SM used tablet repair API.
184+
// For example, they might be delayed due to a table migration/merge/split.
185+
// The API for checking active repairs and killing repairs works on the repair job
186+
// level, not the tablet repair API request level. Task manager API should be used
187+
// for handling those requests.
188+
t.Skip("Checking active repairs and killing repairs is flaky with tablets")
198189
}
199190

200191
Print("When: cluster is idle")
@@ -207,24 +198,25 @@ func TestClientActiveRepairsIntegration(t *testing.T) {
207198
t.Fatal(active)
208199
}
209200

201+
Print("When: repair is running")
202+
go ExecOnHost(ManagedClusterHost(), "nodetool repair -pr")
210203
defer func() {
211-
// Make sure that repairs don't spill to other tests
212204
if err := client.KillAllRepairs(context.Background(), ManagedClusterHosts()...); err != nil {
213205
t.Fatal(err)
214206
}
215207
}()
216208

217-
Print("When: repairs are running")
218-
Print("Then: repairs are reported as active")
219-
WaitCond(t, func() bool {
220-
// Multiple repair requests in order to reduce flakiness
221-
asyncRepair(context.Background(), ks, db.BigTableName, ManagedClusterHost())
209+
Print("Then: active repairs reports")
210+
check := func() bool {
222211
active, err = client.ActiveRepairs(context.Background(), ManagedClusterHosts())
223212
if err != nil {
224213
t.Fatal(err)
225214
}
226215
return len(active) > 0
227-
}, 500*time.Millisecond, 4*time.Second)
216+
}
217+
if !check() {
218+
WaitCond(t, check, 500*time.Millisecond, 4*time.Second)
219+
}
228220
}
229221

230222
func TestClientSnapshotIntegration(t *testing.T) {

Diff for: pkg/scyllaclient/export_test.go

-15
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ package scyllaclient
55
import (
66
"context"
77
"time"
8-
9-
"github.com/scylladb/scylla-manager/v3/swagger/gen/scylla/v1/client/operations"
108
)
119

1210
func NoRetry(ctx context.Context) context.Context {
@@ -40,16 +38,3 @@ func (p *CachedProvider) SetValidity(d time.Duration) {
4038
func (c *Client) Hosts(ctx context.Context) ([]string, error) {
4139
return c.hosts(ctx)
4240
}
43-
44-
func (c *Client) RawRepair(ctx context.Context, ks, tab, master string) (int32, error) {
45-
p := operations.StorageServiceRepairAsyncByKeyspacePostParams{
46-
Context: forceHost(ctx, master),
47-
Keyspace: ks,
48-
ColumnFamilies: &tab,
49-
}
50-
resp, err := c.scyllaOps.StorageServiceRepairAsyncByKeyspacePost(&p)
51-
if err != nil {
52-
return 0, err
53-
}
54-
return resp.GetPayload(), nil
55-
}

0 commit comments

Comments
 (0)