Skip to content

Commit b6e6d39

Browse files
authored
feat(1-1-restore): set tombstone_gc mode to 'repair' for views (#4377)
This includes views into alterTGC stage as view is a normal table under the hood. * refactor: get views info from DESC SCHEMA instead of driver output This changes the way information about views is obtained - it's more reliable to use DESC SCHEMA output, then information from the gocql driver. * refactor: remove addIfNotExists function * fix: do not recreate views if restoreTables failed This changes the behavior in case of restoreTables return error, previously we would attempt to recreateViews, but without paying attention to the fact that base tables can have some (partial) data. Refs: #4261
1 parent fe594c3 commit b6e6d39

File tree

9 files changed

+253
-222
lines changed

9 files changed

+253
-222
lines changed

pkg/service/one2onerestore/model.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,11 @@ type node struct {
4141

4242
// Host contains basic information about Scylla node.
4343
type Host struct {
44-
ID string
45-
DC string
46-
Addr string
47-
ShardCount int
44+
ID string
45+
DC string
46+
Addr string
47+
ShardCount int
48+
SafeDescribeMethod scyllaclient.SafeDescribeMethod
4849
}
4950

5051
// ViewType either Materialized View or Secondary Index.

pkg/service/one2onerestore/progress.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,7 @@ import (
2222
// to show progress can return some information at this point.
2323
// Sets initial values for download_remaining_bytes and view_build_status metrics.
2424
func (w *worker) initProgressAndMetrics(ctx context.Context, workload []hostWorkload) error {
25-
tablesToRestore := getTablesToRestore(workload)
26-
views, err := w.getViews(ctx, tablesToRestore)
25+
views, err := w.getViews(ctx, workload)
2726
if err != nil {
2827
return errors.Wrap(err, "get views")
2928
}

pkg/service/one2onerestore/service.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@ func (s *Service) newWorker(ctx context.Context, clusterID, taskID, runID uuid.U
127127

128128
client: client,
129129
clusterSession: clusterSession,
130+
sessionFunc: s.clusterSession,
130131

131132
logger: s.logger,
132133
metrics: s.metrics,

pkg/service/one2onerestore/service_integration_test.go

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ func TestOne2OneRestoreServiceIntegration(t *testing.T) {
3333
WriteData(t, clusterSession, ksName, 10)
3434
mvName := "testmv"
3535
CreateMaterializedView(t, clusterSession, ksName, BigTableName, mvName)
36+
siName := "testsi"
37+
siTableName := siName + "_index"
38+
CreateSecondaryIndex(t, clusterSession, ksName, BigTableName, siName)
3639

3740
srcCnt := rowCount(t, clusterSession, ksName, BigTableName)
3841
if srcCnt == 0 {
@@ -42,6 +45,10 @@ func TestOne2OneRestoreServiceIntegration(t *testing.T) {
4245
if srcCntMV == 0 {
4346
t.Fatalf("Unexpected row count in materialized view: 0")
4447
}
48+
srcCntSI := rowCount(t, clusterSession, ksName, siTableName)
49+
if srcCntSI == 0 {
50+
t.Fatalf("Unexpected row count in secondary index: 0")
51+
}
4552

4653
Print("Run backup")
4754
loc := []backupspec.Location{testLocation("1-1-restore", "")}
@@ -75,16 +82,30 @@ func TestOne2OneRestoreServiceIntegration(t *testing.T) {
7582
if srcCntMV != dstCntMV {
7683
t.Fatalf("Expected row count in materialized view %d, but got %d", srcCntMV, dstCntMV)
7784
}
85+
dstCntSI := rowCount(t, clusterSession, ksName, siTableName)
86+
if srcCntSI != dstCntSI {
87+
t.Fatalf("Expected row count in secondary index %d, but got %d", srcCntSI, dstCntSI)
88+
}
7889

7990
// Ensure table's tombstone_gc mode is set to 'repair'
80-
w, _ := newTestWorker(t, ManagedClusterHosts())
81-
mode, err := w.getTableTombstoneGCMode(ksName, BigTableName)
82-
if err != nil {
83-
t.Fatalf("Get table tombstone_gc mode: %v", err)
84-
}
85-
if mode != modeRepair {
86-
t.Fatalf("Expected repair mode, but got %s", string(mode))
87-
}
91+
validateTombstoneGCMode(t, []testTable{
92+
{
93+
ks: ksName,
94+
name: BigTableName,
95+
},
96+
{
97+
ks: ksName,
98+
name: mvName,
99+
isView: true,
100+
},
101+
// It turns out, there is no way to create INDEX with tombstone_gc mode repair,
102+
// so for now I'm skipping this check.
103+
//{
104+
// ks: ksName,
105+
// name: siTableName,
106+
// isView: true,
107+
//},
108+
})
88109

89110
Print("Validate progress")
90111
pr, err := h.restoreSvc.GetProgress(context.Background(), h.clusterID, h.taskID, h.runID, h.props)
@@ -94,6 +115,25 @@ func TestOne2OneRestoreServiceIntegration(t *testing.T) {
94115
validateGetProgress(t, pr)
95116
}
96117

118+
type testTable struct {
119+
ks, name string
120+
isView bool
121+
}
122+
123+
func validateTombstoneGCMode(t *testing.T, tables []testTable) {
124+
t.Helper()
125+
w, _ := newTestWorker(t, ManagedClusterHosts())
126+
for _, table := range tables {
127+
mode, err := w.getTombstoneGCMode(table.ks, table.name, table.isView)
128+
if err != nil {
129+
t.Fatalf("Get table tombstone_gc mode: %v", err)
130+
}
131+
if mode != modeRepair {
132+
t.Fatalf("Expected repair mode, but got %s, table: %s.%s", string(mode), table.ks, table.name)
133+
}
134+
}
135+
}
136+
97137
func truncateAllTablesInKeyspace(tb testing.TB, session gocqlx.Session, ks string) {
98138
tb.Helper()
99139

pkg/service/one2onerestore/worker.go

Lines changed: 40 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,20 +16,21 @@ import (
1616
"github.com/scylladb/scylla-manager/backupspec"
1717
"github.com/scylladb/scylla-manager/v3/pkg/metrics"
1818
"github.com/scylladb/scylla-manager/v3/pkg/scyllaclient"
19+
"github.com/scylladb/scylla-manager/v3/pkg/service/cluster"
1920
"github.com/scylladb/scylla-manager/v3/pkg/util/inexlist/ksfilter"
2021
"github.com/scylladb/scylla-manager/v3/pkg/util/parallel"
2122
"github.com/scylladb/scylla-manager/v3/pkg/util/query"
2223
"github.com/scylladb/scylla-manager/v3/pkg/util/retry"
2324
"github.com/scylladb/scylla-manager/v3/pkg/util/uuid"
2425
"github.com/scylladb/scylla-manager/v3/pkg/util/version"
25-
"go.uber.org/multierr"
2626
)
2727

2828
type worker struct {
2929
managerSession gocqlx.Session
3030

3131
client *scyllaclient.Client
3232
clusterSession gocqlx.Session
33+
sessionFunc cluster.SessionFunc // is needed to create cql session to single host
3334

3435
logger log.Logger
3536
metrics metrics.One2OneRestoreMetrics
@@ -69,7 +70,7 @@ func (w *worker) parseTarget(ctx context.Context, properties json.RawMessage) (T
6970
}
7071

7172
// restore is an actual 1-1-restore stages.
72-
func (w *worker) restore(ctx context.Context, workload []hostWorkload, target Target) (err error) {
73+
func (w *worker) restore(ctx context.Context, workload []hostWorkload, target Target) error {
7374
defer func() {
7475
if err := w.setAutoCompaction(context.Background(), workload, true); err != nil {
7576
w.logger.Error(ctx, "Can't enable auto compaction", "err", err)
@@ -99,16 +100,16 @@ func (w *worker) restore(ctx context.Context, workload []hostWorkload, target Ta
99100
if err != nil {
100101
return errors.Wrap(err, "drop views")
101102
}
102-
defer func() {
103-
if rErr := w.reCreateViews(ctx, views); rErr != nil {
104-
err = multierr.Combine(
105-
err,
106-
errors.Wrap(rErr, "recreate views"),
107-
)
108-
}
109-
}()
110103

111-
return w.restoreTables(ctx, workload, target.Keyspace)
104+
if err := w.restoreTables(ctx, workload, target.Keyspace); err != nil {
105+
return errors.Wrap(err, "restore tables")
106+
}
107+
108+
if err := w.reCreateViews(ctx, views); err != nil {
109+
return errors.Wrap(err, "recreate views")
110+
}
111+
112+
return nil
112113
}
113114

114115
// getAllSnapshotManifestsAndTargetHosts gets backup(source) cluster node represented by manifests and target cluster nodes.
@@ -200,6 +201,16 @@ func (w *worker) prepareHostWorkload(ctx context.Context, manifests []*backupspe
200201
return errors.Wrap(err, "read manifest content")
201202
}
202203

204+
nodeInfo, err := w.client.NodeInfo(ctx, h.Addr)
205+
if err != nil {
206+
return errors.Wrapf(err, "get node %s info", h.Addr)
207+
}
208+
method, err := nodeInfo.SupportsSafeDescribeSchemaWithInternals()
209+
if err != nil {
210+
return errors.Wrapf(err, "node %s safe describe method", h.Addr)
211+
}
212+
hw.host.SafeDescribeMethod = method
213+
203214
result[i] = hw
204215

205216
return nil
@@ -237,6 +248,24 @@ func (w *worker) pinAgentCPU(ctx context.Context, workload []hostWorkload, pin b
237248
})
238249
}
239250

251+
func (w *worker) singleHostCQLSession(ctx context.Context, clusterID uuid.UUID, host string) (gocqlx.Session, error) {
252+
session, err := w.sessionFunc(ctx, clusterID, cluster.SingleHostSessionConfigOption(host))
253+
if err != nil {
254+
return gocqlx.Session{}, errors.Wrap(err, "create cql session")
255+
}
256+
return session, nil
257+
}
258+
259+
func (w *worker) raftReadBarrier(ctx context.Context, session gocqlx.Session, host Host) error {
260+
switch host.SafeDescribeMethod {
261+
case scyllaclient.SafeDescribeMethodReadBarrierAPI:
262+
return w.client.RaftReadBarrier(ctx, host.Addr, "")
263+
case scyllaclient.SafeDescribeMethodReadBarrierCQL:
264+
return query.RaftReadBarrier(session)
265+
}
266+
return errors.Errorf("unsupported method: %s", host.SafeDescribeMethod)
267+
}
268+
240269
// alterSchemaRetryWrapper is useful when executing many statements altering schema,
241270
// as it might take more time for Scylla to process them one after another.
242271
// This wrapper exits on: success, context cancel, op returned non-timeout error or after maxTotalTime has passed.

pkg/service/one2onerestore/worker_tgc.go

Lines changed: 57 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -25,37 +25,68 @@ const (
2525

2626
// setTombstoneGCModeRepair sets tombstone gc mode to repair to avoid data resurrection issues during restore.
2727
func (w *worker) setTombstoneGCModeRepair(ctx context.Context, workload []hostWorkload) error {
28+
type alterTGCTarget struct {
29+
keyspace string
30+
name string
31+
isView bool
32+
}
33+
var targets []alterTGCTarget
2834
for table := range getTablesToRestore(workload) {
29-
mode, err := w.getTableTombstoneGCMode(table.keyspace, table.table)
35+
targets = append(targets, alterTGCTarget{
36+
keyspace: table.keyspace,
37+
name: table.table,
38+
isView: false,
39+
})
40+
}
41+
views, err := w.getViews(ctx, workload)
42+
if err != nil {
43+
return errors.Wrap(err, "get views")
44+
}
45+
for _, view := range views {
46+
targets = append(targets, alterTGCTarget{
47+
keyspace: view.Keyspace,
48+
name: view.View,
49+
isView: true,
50+
})
51+
}
52+
53+
for _, target := range targets {
54+
mode, err := w.getTombstoneGCMode(target.keyspace, target.name, target.isView)
3055
if err != nil {
31-
return errors.Wrap(err, "get tombstone_gc mode")
56+
return errors.Wrapf(err, "get tombstone_gc mode: %s.%s", target.keyspace, target.name)
3257
}
3358
// No need to change tombstone gc mode.
3459
if mode == modeDisabled || mode == modeImmediate || mode == modeRepair {
35-
w.logger.Info(ctx, "Skipping set tombstone_gc mode", "table", table, "mode", mode)
60+
w.logger.Info(ctx, "Skipping set tombstone_gc mode", "name", target.keyspace+"."+target.name, "mode", mode)
3661
continue
3762
}
38-
if err := w.setTableTombstoneGCMode(ctx, table.keyspace, table.table, modeRepair); err != nil {
39-
return errors.Wrap(err, "set tombstone_gc mode repair")
63+
if err := w.setTombstoneGCMode(ctx, target.keyspace, target.name, target.isView, modeRepair); err != nil {
64+
return errors.Wrapf(err, "set tombstone_gc mode repair: %s.%s", target.keyspace, target.name)
4065
}
4166
}
4267

4368
return nil
4469
}
4570

46-
// getTableTombstoneGCMode returns table's tombstone_gc mode.
47-
func (w *worker) getTableTombstoneGCMode(keyspace, table string) (tombstoneGCMode, error) {
71+
// getTombstoneGCMode returns table's tombstone_gc mode.
72+
func (w *worker) getTombstoneGCMode(keyspace, name string, isView bool) (tombstoneGCMode, error) {
73+
systemSchemaTable := "system_schema.tables"
74+
columnName := "table_name"
75+
if isView {
76+
systemSchemaTable = "system_schema.views"
77+
columnName = "view_name"
78+
}
4879
var ext map[string]string
49-
q := qb.Select("system_schema.tables").
80+
q := qb.Select(systemSchemaTable).
5081
Columns("extensions").
51-
Where(qb.Eq("keyspace_name"), qb.Eq("table_name")).
82+
Where(qb.Eq("keyspace_name"), qb.Eq(columnName)).
5283
Query(w.clusterSession).
53-
Bind(keyspace, table)
84+
Bind(keyspace, name)
5485

5586
defer q.Release()
5687
err := q.Scan(&ext)
5788
if err != nil {
58-
return "", err
89+
return "", errors.Wrap(err, "scan")
5990
}
6091

6192
// Timeout (just using gc_grace_seconds) is the default mode
@@ -73,21 +104,22 @@ func (w *worker) getTableTombstoneGCMode(keyspace, table string) (tombstoneGCMod
73104
return "", errors.Errorf("unrecognized tombstone_gc mode: %s", mode)
74105
}
75106

76-
// setTableTombstoneGCMode alters 'tombstone_gc' mode.
77-
func (w *worker) setTableTombstoneGCMode(ctx context.Context, keyspace, table string, mode tombstoneGCMode) error {
78-
w.logger.Info(ctx, "Alter table's tombstone_gc mode",
79-
"keyspace", keyspace,
80-
"table", table,
81-
)
107+
// setTombstoneGCMode alters 'tombstone_gc' mode.
108+
func (w *worker) setTombstoneGCMode(ctx context.Context, keyspace, name string, isView bool, mode tombstoneGCMode) error {
109+
logger := w.logger.With("keyspace", keyspace, "name", name, "is_view", isView)
110+
111+
logger.Info(ctx, "Alter tombstone_gc mode")
82112

83113
op := func() error {
84-
return w.clusterSession.ExecStmt(alterTableTombstoneGCStmt(keyspace, table, mode))
114+
stmt := alterTableTombstoneGCStmt(keyspace, name, mode)
115+
if isView {
116+
stmt = alterViewTombstoneGCStmt(keyspace, name, mode)
117+
}
118+
return w.clusterSession.ExecStmt(stmt)
85119
}
86120

87121
notify := func(err error, wait time.Duration) {
88-
w.logger.Info(ctx, "Altering table's tombstone_gc mode failed",
89-
"keyspace", keyspace,
90-
"table", table,
122+
logger.Info(ctx, "Altering tombstone_gc mode failed",
91123
"error", err,
92124
"wait", wait,
93125
)
@@ -99,3 +131,7 @@ func (w *worker) setTableTombstoneGCMode(ctx context.Context, keyspace, table st
99131
func alterTableTombstoneGCStmt(keyspace, table string, mode tombstoneGCMode) string {
100132
return fmt.Sprintf(`ALTER TABLE %q.%q WITH tombstone_gc = {'mode': '%s'}`, keyspace, table, mode)
101133
}
134+
135+
func alterViewTombstoneGCStmt(keyspace, view string, mode tombstoneGCMode) string {
136+
return fmt.Sprintf(`ALTER MATERIALIZED VIEW %q.%q WITH tombstone_gc = {'mode': '%s'}`, keyspace, view, mode)
137+
}

pkg/service/one2onerestore/worker_validate_integration_test.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,16 @@ import (
1515
"testing"
1616

1717
"github.com/scylladb/go-log"
18+
"github.com/scylladb/gocqlx/v2"
1819
"github.com/scylladb/scylla-manager/backupspec"
1920
"github.com/scylladb/scylla-manager/v3/pkg/metrics"
2021
"github.com/scylladb/scylla-manager/v3/pkg/scyllaclient"
22+
"github.com/scylladb/scylla-manager/v3/pkg/service/cluster"
2123
"github.com/scylladb/scylla-manager/v3/pkg/testutils"
2224
"github.com/scylladb/scylla-manager/v3/pkg/testutils/db"
2325
"github.com/scylladb/scylla-manager/v3/pkg/testutils/testconfig"
2426
"github.com/scylladb/scylla-manager/v3/pkg/util/httpx"
27+
"github.com/scylladb/scylla-manager/v3/pkg/util/uuid"
2528
)
2629

2730
func TestWorkerValidateClustersIntegration(t *testing.T) {
@@ -254,8 +257,11 @@ func newTestWorker(t *testing.T, hosts []string) (*worker, *testutils.HackableRo
254257
managerSession: managerSession,
255258
client: sc,
256259
clusterSession: clusterSession,
257-
logger: log.NopLogger,
258-
metrics: metrics.NewOne2OneRestoreMetrics(),
260+
sessionFunc: func(ctx context.Context, clusterID uuid.UUID, _ ...cluster.SessionConfigOption) (gocqlx.Session, error) {
261+
return db.CreateSession(t, sc), nil
262+
},
263+
logger: log.NopLogger,
264+
metrics: metrics.NewOne2OneRestoreMetrics(),
259265
}
260266
return w, hrt
261267
}

0 commit comments

Comments
 (0)