Skip to content

Commit af23793

Browse files
authored
feat(restore): adds --dc-mapping flag to restore command (#4213)
This adds support for `--dc-mapping` flag to restore command. It specifies mapping between DCs from the backup and DCs in the restored(target) cluster. Only 1 use case is supported: 1-1 dc mapping. This means that squeezing (restore dc1 and dc2 into dc3) or extending (restore dc1 into dc1 and dc2) DCs is not supported when --dc-mapping is provided. So the syntax is: source_dc1=target_dc1,source_dc2=target_dc2 Where equal(=) is used to separate source dc name and target dc name comma(,) is used to separate multiple mappings If --dc-mapping is not provided, then current behavior should be preserved - each node with access to DC can download it data. Also it's allowed to provide only subset of DCs, ignoring source dc or target (or both). Only works with tables restoration (--restore-tables=true). Fixes: #3829
1 parent 723864a commit af23793

File tree

17 files changed

+839
-172
lines changed

17 files changed

+839
-172
lines changed

docs/source/sctool/partials/sctool_restore.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@ options:
2626
usage: |
2727
Task schedule as a cron `expression`.
2828
It supports the extended syntax including @monthly, @weekly, @daily, @midnight, @hourly, @every X[h|m|s].
29+
- name: dc-mapping
30+
default_value: '[]'
31+
usage: "Specifies mapping between DCs from the backup and DCs in the restored(target) cluster.\n\nThe Syntax is \"source_dc1=target_dc1,source_dc2=target_dc2\" where multiple mappings are separated by comma (,)\nand source and target DCs are separated by equal (=).\n\nExample: \"dc1=dc3,dc2=dc4\" - data from dc1 should be restored to dc3 and data from dc2 should be restored to dc4.\n\nOnly works with tables restoration (--restore-tables=true). \nNote: Only DCs that are provided in mappings will be restored.\n"
2932
- name: dry-run
3033
default_value: "false"
3134
usage: |
@@ -90,6 +93,7 @@ options:
9093
The `<dc>` parameter is optional. It allows you to specify the datacenter whose nodes will be used to restore the data
9194
from this location in a multi-dc setting, it must match Scylla nodes datacenter.
9295
By default, all live nodes are used to restore data from specified locations.
96+
If `--dc-mapping` is used, then `<dc>` parameter will be ignored.
9397
9498
Note that specifying datacenters closest to backup locations might reduce download time of restored data.
9599
The supported storage '<provider>'s are 'azure', 'gcs', 's3'.

docs/source/sctool/partials/sctool_restore_update.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ options:
2424
usage: |
2525
Task schedule as a cron `expression`.
2626
It supports the extended syntax including @monthly, @weekly, @daily, @midnight, @hourly, @every X[h|m|s].
27+
- name: dc-mapping
28+
default_value: '[]'
29+
usage: "Specifies mapping between DCs from the backup and DCs in the restored(target) cluster.\n\nThe Syntax is \"source_dc1=target_dc1,source_dc2=target_dc2\" where multiple mappings are separated by comma (,)\nand source and target DCs are separated by equal (=).\n\nExample: \"dc1=dc3,dc2=dc4\" - data from dc1 should be restored to dc3 and data from dc2 should be restored to dc4.\n\nOnly works with tables restoration (--restore-tables=true). \nNote: Only DCs that are provided in mappings will be restored.\n"
2730
- name: dry-run
2831
default_value: "false"
2932
usage: |
@@ -88,6 +91,7 @@ options:
8891
The `<dc>` parameter is optional. It allows you to specify the datacenter whose nodes will be used to restore the data
8992
from this location in a multi-dc setting, it must match Scylla nodes datacenter.
9093
By default, all live nodes are used to restore data from specified locations.
94+
If `--dc-mapping` is used, then `<dc>` parameter will be ignored.
9195
9296
Note that specifying datacenters closest to backup locations might reduce download time of restored data.
9397
The supported storage '<provider>'s are 'azure', 'gcs', 's3'.

pkg/command/restore/cmd.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ type command struct {
3737
restoreTables bool
3838
dryRun bool
3939
showTables bool
40+
dcMapping map[string]string
4041
}
4142

4243
func NewCommand(client *managerclient.Client) *cobra.Command {
@@ -90,6 +91,7 @@ func (cmd *command) init() {
9091
w.Unwrap().BoolVar(&cmd.restoreTables, "restore-tables", false, "")
9192
w.Unwrap().BoolVar(&cmd.dryRun, "dry-run", false, "")
9293
w.Unwrap().BoolVar(&cmd.showTables, "show-tables", false, "")
94+
w.Unwrap().StringToStringVar(&cmd.dcMapping, "dc-mapping", nil, "")
9395
}
9496

9597
func (cmd *command) run(args []string) error {
@@ -182,6 +184,13 @@ func (cmd *command) run(args []string) error {
182184
props["restore_tables"] = cmd.restoreTables
183185
ok = true
184186
}
187+
if cmd.Flag("dc-mapping").Changed {
188+
if cmd.Update() {
189+
return wrapper("dc-mapping")
190+
}
191+
props["dc_mapping"] = cmd.dcMapping
192+
ok = true
193+
}
185194

186195
if cmd.dryRun {
187196
res, err := cmd.client.GetRestoreTarget(cmd.Context(), cmd.cluster, task)

pkg/command/restore/res.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ location: |
1515
The `<dc>` parameter is optional. It allows you to specify the datacenter whose nodes will be used to restore the data
1616
from this location in a multi-dc setting, it must match Scylla nodes datacenter.
1717
By default, all live nodes are used to restore data from specified locations.
18+
If `--dc-mapping` is used, then `<dc>` parameter will be ignored.
1819
1920
Note that specifying datacenters closest to backup locations might reduce download time of restored data.
2021
The supported storage '<provider>'s are 'azure', 'gcs', 's3'.
@@ -72,3 +73,14 @@ dry-run: |
7273
7374
show-tables: |
7475
Prints table names together with keyspace, used in combination with --dry-run.
76+
77+
dc-mapping: |
78+
Specifies mapping between DCs from the backup and DCs in the restored(target) cluster.
79+
80+
The Syntax is "source_dc1=target_dc1,source_dc2=target_dc2" where multiple mappings are separated by comma (,)
81+
and source and target DCs are separated by equal (=).
82+
83+
Example: "dc1=dc3,dc2=dc4" - data from dc1 should be restored to dc3 and data from dc2 should be restored to dc4.
84+
85+
Only works with tables restoration (--restore-tables=true).
86+
Note: Only DCs that are provided in mappings will be restored.

pkg/service/restore/batch.go

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ type batchDispatcher struct {
5757
hostShardCnt map[string]uint
5858
}
5959

60-
func newBatchDispatcher(workload Workload, batchSize int, hostShardCnt map[string]uint, locationHosts map[backupspec.Location][]string) *batchDispatcher {
60+
func newBatchDispatcher(workload Workload, batchSize int, hostShardCnt map[string]uint, locationInfo []LocationInfo) *batchDispatcher {
6161
sortWorkload(workload)
6262
var shards uint
6363
for _, sh := range hostShardCnt {
@@ -70,7 +70,7 @@ func newBatchDispatcher(workload Workload, batchSize int, hostShardCnt map[strin
7070
mu: sync.Mutex{},
7171
wait: make(chan struct{}),
7272
workload: workload,
73-
workloadProgress: newWorkloadProgress(workload, locationHosts),
73+
workloadProgress: newWorkloadProgress(workload, locationInfo),
7474
batchSize: batchSize,
7575
expectedShardWorkload: workload.TotalSize / int64(shards),
7676
hostShardCnt: hostShardCnt,
@@ -106,32 +106,36 @@ type remoteSSTableDirProgress struct {
106106
RemainingSSTables []RemoteSSTable
107107
}
108108

109-
func newWorkloadProgress(workload Workload, locationHosts map[backupspec.Location][]string) workloadProgress {
109+
func newWorkloadProgress(workload Workload, locationInfo []LocationInfo) workloadProgress {
110110
dcBytes := make(map[string]int64)
111-
locationDC := make(map[string][]string)
112111
p := make([]remoteSSTableDirProgress, len(workload.RemoteDir))
113112
for i, rdw := range workload.RemoteDir {
114113
dcBytes[rdw.DC] += rdw.Size
115-
locationDC[rdw.Location.StringWithoutDC()] = append(locationDC[rdw.Location.StringWithoutDC()], rdw.DC)
116114
p[i] = remoteSSTableDirProgress{
117115
RemainingSize: rdw.Size,
118116
RemainingSSTables: rdw.SSTables,
119117
}
120118
}
121-
hostDCAccess := make(map[string][]string)
122-
for loc, hosts := range locationHosts {
123-
for _, h := range hosts {
124-
hostDCAccess[h] = append(hostDCAccess[h], locationDC[loc.StringWithoutDC()]...)
125-
}
126-
}
127119
return workloadProgress{
128120
dcBytesToBeRestored: dcBytes,
129121
hostFailedDC: make(map[string][]string),
130-
hostDCAccess: hostDCAccess,
122+
hostDCAccess: getHostDCAccess(locationInfo),
131123
remoteDir: p,
132124
}
133125
}
134126

127+
func getHostDCAccess(locationInfo []LocationInfo) map[string][]string {
128+
hostDCAccess := map[string][]string{}
129+
for _, l := range locationInfo {
130+
for dc, hosts := range l.DCHosts {
131+
for _, h := range hosts {
132+
hostDCAccess[h] = append(hostDCAccess[h], dc)
133+
}
134+
}
135+
}
136+
return hostDCAccess
137+
}
138+
135139
// Checks if given host finished restoring all that it could.
136140
func (wp workloadProgress) isDone(host string) bool {
137141
failed := wp.hostFailedDC[host]
@@ -201,8 +205,8 @@ func (bd *batchDispatcher) ValidateAllDispatched() error {
201205
for i, rdp := range bd.workloadProgress.remoteDir {
202206
if rdp.RemainingSize != 0 || len(rdp.RemainingSSTables) != 0 {
203207
rdw := bd.workload.RemoteDir[i]
204-
return errors.Errorf("failed to restore sstables from location %s table %s.%s (%d bytes). See logs for more info",
205-
rdw.Location, rdw.Keyspace, rdw.Table, rdw.Size)
208+
return errors.Errorf("failed to restore sstables from location %s dc %s table %s.%s (%d bytes). See logs for more info",
209+
rdw.Location, rdw.DC, rdw.Keyspace, rdw.Table, rdw.Size)
206210
}
207211
}
208212
for dc, bytes := range bd.workloadProgress.dcBytesToBeRestored {
@@ -257,7 +261,7 @@ func (bd *batchDispatcher) dispatchBatch(host string) (batch, bool) {
257261
if slices.Contains(bd.workloadProgress.hostFailedDC[host], rdw.DC) {
258262
continue
259263
}
260-
// Sip dir from location without access
264+
// Skip dir from location without access
261265
if !slices.Contains(bd.workloadProgress.hostDCAccess[host], rdw.DC) {
262266
continue
263267
}

pkg/service/restore/batch_test.go

Lines changed: 122 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package restore
55
import (
66
"testing"
77

8+
"github.com/google/go-cmp/cmp"
89
"github.com/scylladb/scylla-manager/backupspec"
910
)
1011

@@ -104,17 +105,29 @@ func TestBatchDispatcher(t *testing.T) {
104105

105106
workload := aggregateWorkload(rawWorkload)
106107

107-
locationHosts := map[backupspec.Location][]string{
108-
l1: {"h1", "h2"},
109-
l2: {"h3"},
110-
}
111108
hostToShard := map[string]uint{
112109
"h1": 1,
113110
"h2": 2,
114111
"h3": 3,
115112
}
116113

117-
bd := newBatchDispatcher(workload, 1, hostToShard, locationHosts)
114+
locationInfo := []LocationInfo{
115+
{
116+
Location: l1,
117+
DCHosts: map[string][]string{
118+
"dc1": {"h1", "h2"},
119+
"dc2": {"h1", "h2"},
120+
},
121+
},
122+
{
123+
Location: l2,
124+
DCHosts: map[string][]string{
125+
"dc3": {"h3"},
126+
},
127+
},
128+
}
129+
130+
bd := newBatchDispatcher(workload, 1, hostToShard, locationInfo)
118131

119132
scenario := []struct {
120133
host string
@@ -166,3 +179,107 @@ func TestBatchDispatcher(t *testing.T) {
166179
t.Fatalf("Expected sstables to be batched: %s", err)
167180
}
168181
}
182+
183+
func TestGetHostDCAccess(t *testing.T) {
184+
testCases := []struct {
185+
name string
186+
187+
locationInfo []LocationInfo
188+
189+
expected map[string][]string
190+
}{
191+
{
192+
name: "one location with one DC",
193+
locationInfo: []LocationInfo{
194+
{
195+
DCHosts: map[string][]string{
196+
"dc1": {"host1", "host2"},
197+
},
198+
},
199+
},
200+
expected: map[string][]string{
201+
"host1": {"dc1"},
202+
"host2": {"dc1"},
203+
},
204+
},
205+
{
206+
name: "one location with two DC's",
207+
locationInfo: []LocationInfo{
208+
{
209+
DCHosts: map[string][]string{
210+
"dc1": {"host1"},
211+
"dc2": {"host2"},
212+
},
213+
},
214+
},
215+
expected: map[string][]string{
216+
"host1": {"dc1"},
217+
"host2": {"dc2"},
218+
},
219+
},
220+
{
221+
name: "one location with two DC's, more nodes",
222+
locationInfo: []LocationInfo{
223+
{
224+
DCHosts: map[string][]string{
225+
"dc1": {"host1", "host2"},
226+
"dc2": {"host3", "host4"},
227+
},
228+
},
229+
},
230+
expected: map[string][]string{
231+
"host1": {"dc1"},
232+
"host2": {"dc1"},
233+
"host3": {"dc2"},
234+
"host4": {"dc2"},
235+
},
236+
},
237+
{
238+
name: "two locations with one DC each",
239+
locationInfo: []LocationInfo{
240+
{
241+
DCHosts: map[string][]string{
242+
"dc1": {"host1"},
243+
},
244+
},
245+
{
246+
DCHosts: map[string][]string{
247+
"dc2": {"host2"},
248+
},
249+
},
250+
},
251+
expected: map[string][]string{
252+
"host1": {"dc1"},
253+
"host2": {"dc2"},
254+
},
255+
},
256+
{
257+
name: "two locations with one DC each, but hosts maps to all dcs",
258+
locationInfo: []LocationInfo{
259+
{
260+
DCHosts: map[string][]string{
261+
"dc1": {"host1", "host2"},
262+
},
263+
},
264+
{
265+
DCHosts: map[string][]string{
266+
"dc2": {"host1", "host2"},
267+
},
268+
},
269+
},
270+
expected: map[string][]string{
271+
"host1": {"dc1", "dc2"},
272+
"host2": {"dc1", "dc2"},
273+
},
274+
},
275+
}
276+
277+
for _, tc := range testCases {
278+
t.Run(tc.name, func(t *testing.T) {
279+
actual := getHostDCAccess(tc.locationInfo)
280+
if diff := cmp.Diff(actual, tc.expected); diff != "" {
281+
t.Fatalf("Actual != Expected: %s", diff)
282+
}
283+
})
284+
}
285+
}

pkg/service/restore/index.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,12 @@ type SSTable struct {
4646
}
4747

4848
// IndexWorkload returns sstables to be restored aggregated by location, table and remote sstable dir.
49-
func (w *tablesWorker) IndexWorkload(ctx context.Context, locations []backupspec.Location) (Workload, error) {
49+
func (w *tablesWorker) IndexWorkload(ctx context.Context, locations []LocationInfo) (Workload, error) {
5050
var rawWorkload []RemoteDirWorkload
5151
for _, l := range locations {
5252
lw, err := w.indexLocationWorkload(ctx, l)
5353
if err != nil {
54-
return Workload{}, errors.Wrapf(err, "index workload in %s", l)
54+
return Workload{}, errors.Wrapf(err, "index workload in %s", l.Location)
5555
}
5656
rawWorkload = append(rawWorkload, lw...)
5757
}
@@ -60,7 +60,7 @@ func (w *tablesWorker) IndexWorkload(ctx context.Context, locations []backupspec
6060
return workload, nil
6161
}
6262

63-
func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location backupspec.Location) ([]RemoteDirWorkload, error) {
63+
func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location LocationInfo) ([]RemoteDirWorkload, error) {
6464
rawWorkload, err := w.createRemoteDirWorkloads(ctx, location)
6565
if err != nil {
6666
return nil, errors.Wrap(err, "create remote dir workloads")
@@ -74,7 +74,7 @@ func (w *tablesWorker) indexLocationWorkload(ctx context.Context, location backu
7474
return rawWorkload, nil
7575
}
7676

77-
func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location backupspec.Location) ([]RemoteDirWorkload, error) {
77+
func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location LocationInfo) ([]RemoteDirWorkload, error) {
7878
var rawWorkload []RemoteDirWorkload
7979
err := w.forEachManifest(ctx, location, func(m backupspec.ManifestInfoWithContent) error {
8080
return m.ForEachIndexIterWithError(nil, func(fm backupspec.FilesMeta) error {
@@ -87,7 +87,7 @@ func (w *tablesWorker) createRemoteDirWorkloads(ctx context.Context, location ba
8787
return errors.Wrapf(err, "convert files meta to sstables")
8888
}
8989
sstDir := m.LocationSSTableVersionDir(fm.Keyspace, fm.Table, fm.Version)
90-
remoteSSTables, err := w.adjustSSTablesWithRemote(ctx, w.randomHostFromLocation(location), sstDir, sstables)
90+
remoteSSTables, err := w.adjustSSTablesWithRemote(ctx, location.AnyHost(), sstDir, sstables)
9191
if err != nil {
9292
return errors.Wrap(err, "fetch sstables sizes")
9393
}

0 commit comments

Comments
 (0)