Skip to content

Commit 5759792

Browse files
VAveryanov8karol-kokoszka
authored andcommitted
feat(1-1-restore): adds progress tracking of 1-1-restore (#4296)
This adds progress tracking of various 1-1-restore stage. Each progress update represented by RunProgress model saved into db. w.getProgress is used to aggregate current task progress by Keyspaces, Tables and Hosts.
1 parent 0a41c64 commit 5759792

32 files changed

+1692
-50
lines changed

pkg/cmd/scylla-manager/server.go

+7-6
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,13 @@ func (s *server) onClusterChange(ctx context.Context, c cluster.Change) error {
221221

222222
func (s *server) makeServers(ctx context.Context) error {
223223
services := restapi.Services{
224-
Cluster: s.clusterSvc,
225-
HealthCheck: s.healthSvc,
226-
Repair: s.repairSvc,
227-
Backup: s.backupSvc,
228-
Restore: s.restoreSvc,
229-
Scheduler: s.schedSvc,
224+
Cluster: s.clusterSvc,
225+
HealthCheck: s.healthSvc,
226+
Repair: s.repairSvc,
227+
Backup: s.backupSvc,
228+
Restore: s.restoreSvc,
229+
Scheduler: s.schedSvc,
230+
One2OneRestore: s.one2OneRestoreSvc,
230231
}
231232
h := restapi.New(services, s.logger.Named("http"))
232233

pkg/restapi/services.go

+13-6
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"github.com/scylladb/scylla-manager/v3/pkg/service/backup"
1111
"github.com/scylladb/scylla-manager/v3/pkg/service/cluster"
1212
"github.com/scylladb/scylla-manager/v3/pkg/service/healthcheck"
13+
"github.com/scylladb/scylla-manager/v3/pkg/service/one2onerestore"
1314
"github.com/scylladb/scylla-manager/v3/pkg/service/repair"
1415
"github.com/scylladb/scylla-manager/v3/pkg/service/restore"
1516
"github.com/scylladb/scylla-manager/v3/pkg/service/scheduler"
@@ -19,12 +20,13 @@ import (
1920

2021
// Services contains REST API services.
2122
type Services struct {
22-
Cluster ClusterService
23-
HealthCheck HealthCheckService
24-
Repair RepairService
25-
Backup BackupService
26-
Restore RestoreService
27-
Scheduler SchedService
23+
Cluster ClusterService
24+
HealthCheck HealthCheckService
25+
Repair RepairService
26+
Backup BackupService
27+
Restore RestoreService
28+
Scheduler SchedService
29+
One2OneRestore One2OneRestoreService
2830
}
2931

3032
// ClusterService service interface for the REST API handlers.
@@ -93,3 +95,8 @@ type SchedService interface {
9395
Suspend(ctx context.Context, clusterID uuid.UUID) error
9496
Resume(ctx context.Context, clusterID uuid.UUID, startTasks bool) error
9597
}
98+
99+
// One2OneRestoreService service interface for the 1-1-restore REST API handlers.
100+
type One2OneRestoreService interface {
101+
GetProgress(ctx context.Context, clusterID, taskID, runID uuid.UUID, properties json.RawMessage) (one2onerestore.Progress, error)
102+
}

pkg/restapi/task.go

+5
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
"github.com/pkg/errors"
1616
"github.com/scylladb/scylla-manager/v3/pkg/scyllaclient"
1717
"github.com/scylladb/scylla-manager/v3/pkg/service/backup"
18+
"github.com/scylladb/scylla-manager/v3/pkg/service/one2onerestore"
1819
"github.com/scylladb/scylla-manager/v3/pkg/service/repair"
1920
"github.com/scylladb/scylla-manager/v3/pkg/service/restore"
2021
"github.com/scylladb/scylla-manager/v3/pkg/service/scheduler"
@@ -469,6 +470,8 @@ func (h *taskHandler) taskRunProgress(w http.ResponseWriter, r *http.Request) {
469470
prog.Progress = restore.Progress{}
470471
case scheduler.ValidateBackupTask:
471472
prog.Progress = backup.ValidationHostProgress{}
473+
case scheduler.One2OneRestoreTask:
474+
prog.Progress = one2onerestore.Progress{}
472475
}
473476
render.Respond(w, r, prog)
474477
return
@@ -503,6 +506,8 @@ func (h *taskHandler) taskRunProgress(w http.ResponseWriter, r *http.Request) {
503506
pr, err = h.Restore.GetProgress(r.Context(), t.ClusterID, t.ID, prog.Run.ID)
504507
case scheduler.ValidateBackupTask:
505508
pr, err = h.Backup.GetValidationProgress(r.Context(), t.ClusterID, t.ID, prog.Run.ID)
509+
case scheduler.One2OneRestoreTask:
510+
pr, err = h.One2OneRestore.GetProgress(r.Context(), t.ClusterID, t.ID, prog.Run.ID, t.Properties)
506511
default:
507512
respondBadRequest(w, r, errors.Errorf("unsupported task type %s", t.Type))
508513
return

pkg/schema/table/table.go

+46-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

pkg/service/one2onerestore/helpers_integration_test.go

+13-7
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,11 @@ import (
2727
)
2828

2929
type testHelper struct {
30-
client *scyllaclient.Client
31-
clusterID uuid.UUID
32-
backupSvc *backup.Service
33-
restoreSvc Servicer
30+
client *scyllaclient.Client
31+
clusterID, taskID, runID uuid.UUID
32+
props []byte
33+
backupSvc *backup.Service
34+
restoreSvc Servicer
3435
}
3536

3637
func newTestHelper(t *testing.T, hosts []string) *testHelper {
@@ -40,13 +41,17 @@ func newTestHelper(t *testing.T, hosts []string) *testHelper {
4041
session := CreateScyllaManagerDBSession(t)
4142

4243
clusterID := uuid.NewTime()
44+
taskID := uuid.NewTime()
45+
runID := uuid.NewTime()
4346

4447
backupSvc := newBackupSvc(t, session, sc, clusterID)
4548
restoreSvc := newRestoreSvc(t, session, sc, clusterID, "", "")
4649

4750
return &testHelper{
4851
client: sc,
4952
clusterID: clusterID,
53+
taskID: taskID,
54+
runID: runID,
5055
backupSvc: backupSvc,
5156
restoreSvc: restoreSvc,
5257
}
@@ -86,15 +91,16 @@ func (h *testHelper) runRestore(t *testing.T, props map[string]any) {
8691
t.Helper()
8792
Printf("Run 1-1-restore with properties: %v", props)
8893
ctx := context.Background()
89-
taskID := uuid.NewTime()
90-
runID := uuid.NewTime()
94+
h.taskID = uuid.NewTime()
95+
h.runID = uuid.NewTime()
9196

9297
rawProps, err := json.Marshal(props)
9398
if err != nil {
9499
t.Fatal(errors.Wrap(err, "marshal properties"))
95100
}
101+
h.props = rawProps
96102

97-
err = h.restoreSvc.One2OneRestore(ctx, h.clusterID, taskID, runID, rawProps)
103+
err = h.restoreSvc.One2OneRestore(ctx, h.clusterID, h.taskID, h.runID, h.props)
98104
if err != nil {
99105
t.Fatal(errors.Wrap(err, "run 1-1-restore"))
100106
}

pkg/service/one2onerestore/model.go

+109-8
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
package one2onerestore
44

55
import (
6+
"time"
7+
68
"github.com/pkg/errors"
79
"github.com/scylladb/go-set/strset"
10+
"github.com/scylladb/scylla-manager/v3/pkg/scyllaclient"
811
. "github.com/scylladb/scylla-manager/v3/pkg/service/backup/backupspec"
912
"github.com/scylladb/scylla-manager/v3/pkg/util/uuid"
1013
)
@@ -37,9 +40,10 @@ type node struct {
3740

3841
// Host contains basic information about Scylla node.
3942
type Host struct {
40-
ID string
41-
DC string
42-
Addr string
43+
ID string
44+
DC string
45+
Addr string
46+
ShardCount int
4347
}
4448

4549
// ViewType either Materialized View or Secondary Index.
@@ -53,11 +57,12 @@ const (
5357

5458
// View represents statement used for recreating restored (dropped) views.
5559
type View struct {
56-
Keyspace string `json:"keyspace" db:"keyspace_name"`
57-
View string `json:"view" db:"view_name"`
58-
Type ViewType `json:"type" db:"view_type"`
59-
BaseTable string `json:"base_table"`
60-
CreateStmt string `json:"create_stmt"`
60+
Keyspace string `json:"keyspace" db:"keyspace_name"`
61+
View string `json:"view" db:"view_name"`
62+
Type ViewType `json:"type" db:"view_type"`
63+
BaseTable string `json:"base_table"`
64+
CreateStmt string `json:"create_stmt,omitempty"`
65+
BuildStatus scyllaclient.ViewBuildStatus `json:"status"`
6166
}
6267

6368
// hostWorkload represents what data (manifest) from the backup should be handled
@@ -72,6 +77,10 @@ type hostWorkload struct {
7277

7378
type scyllaTable struct{ keyspace, table string }
7479

80+
func (st scyllaTable) String() string {
81+
return st.keyspace + "." + st.table
82+
}
83+
7584
func getTablesToRestore(workload []hostWorkload) map[scyllaTable]struct{} {
7685
tablesToRestore := map[scyllaTable]struct{}{}
7786
for _, wl := range workload {
@@ -185,3 +194,95 @@ func checkHostMapping(hostMap map[string]struct{}, hostID string) error {
185194
}
186195
return errors.Errorf("host is already mapped: %s", hostID)
187196
}
197+
198+
// RunProgress describes progress of various 1-1-restore stages.
199+
type RunProgress struct {
200+
ClusterID uuid.UUID
201+
TaskID uuid.UUID
202+
RunID uuid.UUID
203+
204+
KeyspaceName string
205+
TableName string
206+
TableSize int64
207+
RemoteSSTableDir string `db:"remote_sstable_dir"`
208+
TombstoneGC string
209+
210+
Host string // IP of the node to which SSTables are downloaded.
211+
ShardCnt int // Host shard count used for bandwidth per shard calculation.
212+
213+
VersionedProgress int64
214+
215+
StartedAt *time.Time
216+
CompletedAt *time.Time
217+
// RClone job info fields
218+
AgentJobID int64
219+
ScyllaTaskID string // reserved for future use
220+
221+
Downloaded int64
222+
Skipped int64
223+
Failed int64
224+
Error string
225+
226+
ViewName string
227+
ViewType ViewType
228+
ViewBuildStatus scyllaclient.ViewBuildStatus
229+
230+
Stage Stage
231+
}
232+
233+
// Stage specifies the restore stage.
234+
type Stage string
235+
236+
// Stage enumeration.
237+
const (
238+
StageDropViews Stage = "DROP_VIEWS"
239+
StageAlterTGC Stage = "ALTER_TGC"
240+
StageData Stage = "DATA"
241+
StageRecreateViews Stage = "RECREATE_VIEWS"
242+
StageDone Stage = "DONE"
243+
)
244+
245+
// Progress groups restore progress for all restored keyspaces.
246+
type Progress struct {
247+
progress
248+
249+
SnapshotTag string `json:"snapshot_tag"`
250+
Keyspaces []KeyspaceProgress `json:"keyspaces,omitempty"`
251+
Hosts []HostProgress `json:"hosts,omitempty"`
252+
Views []View `json:"views,omitempty"`
253+
Stage Stage `json:"stage"`
254+
}
255+
256+
// KeyspaceProgress groups restore progress for the tables belonging to this keyspace.
257+
type KeyspaceProgress struct {
258+
progress
259+
260+
Keyspace string `json:"keyspace"`
261+
Tables []TableProgress `json:"tables,omitempty"`
262+
}
263+
264+
// TableProgress defines restore progress for the table.
265+
type TableProgress struct {
266+
progress
267+
268+
Table string `json:"table"`
269+
TombstoneGC tombstoneGCMode `json:"tombstone_gc"`
270+
Error string `json:"error,omitempty"`
271+
}
272+
273+
// HostProgress groups restore progress for the host.
274+
type HostProgress struct {
275+
Host string `json:"host"`
276+
ShardCnt int `json:"shard_cnt"`
277+
DownloadedBytes int64 `json:"downloaded_bytes"`
278+
DownloadDuration int64 `json:"download_duration"`
279+
}
280+
281+
type progress struct {
282+
Size int64 `json:"size"`
283+
Restored int64 `json:"restored"`
284+
Downloaded int64 `json:"downloaded"`
285+
Failed int64 `json:"failed"`
286+
StartedAt *time.Time `json:"started_at"`
287+
CompletedAt *time.Time `json:"completed_at"`
288+
}

0 commit comments

Comments
 (0)