From 3c951294f64b33c2684e101a23d7f808f30311c3 Mon Sep 17 00:00:00 2001 From: Vasil Averyanau Date: Mon, 12 May 2025 11:31:13 +0200 Subject: [PATCH 1/3] feat(1-1-restore): add unpin-agent-cpu flag This adds support of unpin-agent-cpu flag to 1-1-restore command. Unpinning agent CPU should help with agent download speed. Refs: #4375 --- pkg/command/one2onerestore/cmd.go | 7 +++++++ pkg/command/one2onerestore/res.yaml | 4 ++++ pkg/service/one2onerestore/model.go | 1 + pkg/service/one2onerestore/worker.go | 28 ++++++++++++++++++++++++++++ 4 files changed, 40 insertions(+) diff --git a/pkg/command/one2onerestore/cmd.go b/pkg/command/one2onerestore/cmd.go index 76ab933371..59c6c38838 100644 --- a/pkg/command/one2onerestore/cmd.go +++ b/pkg/command/one2onerestore/cmd.go @@ -32,6 +32,7 @@ type command struct { keyspace []string snapshotTag string nodesMapping nodesMapping + unpinAgentCPU bool dryRun bool } @@ -83,6 +84,7 @@ func (cmd *command) init() { // Common configuration for restore procedures w.Unwrap().BoolVar(&cmd.dryRun, "dry-run", false, "") + w.Unwrap().BoolVar(&cmd.unpinAgentCPU, "unpin-agent-cpu", false, "") } func (cmd *command) run(args []string) error { @@ -205,6 +207,11 @@ func flagsToTaskProperties(cmd *command, task *models.Task) (updated bool, err e flagName: "nodes-mapping", value: cmd.nodesMapping, }, + { + flagName: "unpin-agent-cpu", + value: cmd.unpinAgentCPU, + canBeUpdated: true, + }, } props := task.Properties.(map[string]interface{}) diff --git a/pkg/command/one2onerestore/res.yaml b/pkg/command/one2onerestore/res.yaml index b90cf57eba..d17c807173 100644 --- a/pkg/command/one2onerestore/res.yaml +++ b/pkg/command/one2onerestore/res.yaml @@ -28,3 +28,7 @@ nodes-mapping: dry-run: | Validates and displays restore information without actually running the restore. This allows you to display what will happen should the restore run with the parameters you set. + +unpin-agent-cpu: | + Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore. + This might significantly improve download speed at the cost of decreasing streaming speed. \ No newline at end of file diff --git a/pkg/service/one2onerestore/model.go b/pkg/service/one2onerestore/model.go index b8e1b7799f..0b1fee79c7 100644 --- a/pkg/service/one2onerestore/model.go +++ b/pkg/service/one2onerestore/model.go @@ -19,6 +19,7 @@ type Target struct { SourceClusterID uuid.UUID `json:"source_cluster_id"` SnapshotTag string `json:"snapshot_tag"` NodesMapping []nodeMapping `json:"nodes_mapping"` + UnpinAgentCPU bool `json:"unpin_agent_cpu"` } func defaultTarget() Target { diff --git a/pkg/service/one2onerestore/worker.go b/pkg/service/one2onerestore/worker.go index 97e695eba8..58ee937adf 100644 --- a/pkg/service/one2onerestore/worker.go +++ b/pkg/service/one2onerestore/worker.go @@ -79,6 +79,18 @@ func (w *worker) restore(ctx context.Context, workload []hostWorkload, target Ta return errors.Wrap(err, "disable auto compaction") } + // We always want to pin agent to CPUs outside the 1-1-restore. + defer func() { + if err := w.pinAgentCPU(context.Background(), workload, true); err != nil { + w.logger.Error(ctx, "Can't pin agent to CPU", "error", err) + } + }() + if target.UnpinAgentCPU { + if err := w.pinAgentCPU(ctx, workload, false); err != nil { + return errors.Wrap(err, "unpin agent from CPU") + } + } + if err := w.setTombstoneGCModeRepair(ctx, workload); err != nil { return errors.Wrap(err, "tombstone_gc mode") } @@ -209,6 +221,22 @@ func (w *worker) setAutoCompaction(ctx context.Context, workload []hostWorkload, return nil } +func (w *worker) pinAgentCPU(ctx context.Context, workload []hostWorkload, pin bool) error { + setPinFunc := w.client.PinCPU + if !pin { + setPinFunc = w.client.UnpinFromCPU + } + return parallel.Run(len(workload), len(workload), func(i int) error { + host := workload[i].host + return errors.Wrapf(setPinFunc(ctx, host.Addr), "set CPU pinning on %s", host.Addr) + }, func(i int, err error) { + w.logger.Error(ctx, "Failed to change agent CPU pinning", + "host", workload[i].host.Addr, + "pinned", pin, + "error", err) + }) +} + // alterSchemaRetryWrapper is useful when executing many statements altering schema, // as it might take more time for Scylla to process them one after another. // This wrapper exits on: success, context cancel, op returned non-timeout error or after maxTotalTime has passed. From fae86e33658aa8beebe45751a9c80d5d7762a930 Mon Sep 17 00:00:00 2001 From: Vasil Averyanau Date: Mon, 12 May 2025 11:32:47 +0200 Subject: [PATCH 2/3] chore(docs): run `make docs` --- docs/source/sctool/partials/sctool_restore_1-1-restore.yaml | 3 +++ .../sctool/partials/sctool_restore_1-1-restore_update.yaml | 3 +++ 2 files changed, 6 insertions(+) diff --git a/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml b/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml index 8005fcc592..4f97fede9d 100644 --- a/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml +++ b/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml @@ -117,6 +117,9 @@ options: usage: | Timezone of --cron and --window flag values. The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file. + - name: unpin-agent-cpu + default_value: "false" + usage: "Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.\nThis might significantly improve download speed at the cost of decreasing streaming speed. " - name: window default_value: '[]' usage: | diff --git a/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml b/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml index 23c166f1d5..72993ea2a3 100644 --- a/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml +++ b/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml @@ -118,6 +118,9 @@ options: usage: | Timezone of --cron and --window flag values. The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file. + - name: unpin-agent-cpu + default_value: "false" + usage: "Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.\nThis might significantly improve download speed at the cost of decreasing streaming speed. " - name: window default_value: '[]' usage: | From 788fc0f200f167cb221d9f27ca8a4dbd9788f284 Mon Sep 17 00:00:00 2001 From: Vasil Averyanau Date: Tue, 13 May 2025 15:00:33 +0200 Subject: [PATCH 3/3] fix: remove misleading sentence from `unpin-agent-cpu` flag description --- docs/source/sctool/partials/sctool_restore_1-1-restore.yaml | 3 ++- .../sctool/partials/sctool_restore_1-1-restore_update.yaml | 3 ++- pkg/command/one2onerestore/res.yaml | 1 - 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml b/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml index 4f97fede9d..bb2da5f969 100644 --- a/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml +++ b/docs/source/sctool/partials/sctool_restore_1-1-restore.yaml @@ -119,7 +119,8 @@ options: The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file. - name: unpin-agent-cpu default_value: "false" - usage: "Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.\nThis might significantly improve download speed at the cost of decreasing streaming speed. " + usage: | + Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore. - name: window default_value: '[]' usage: | diff --git a/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml b/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml index 72993ea2a3..7161c85467 100644 --- a/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml +++ b/docs/source/sctool/partials/sctool_restore_1-1-restore_update.yaml @@ -120,7 +120,8 @@ options: The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file. - name: unpin-agent-cpu default_value: "false" - usage: "Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.\nThis might significantly improve download speed at the cost of decreasing streaming speed. " + usage: | + Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore. - name: window default_value: '[]' usage: | diff --git a/pkg/command/one2onerestore/res.yaml b/pkg/command/one2onerestore/res.yaml index d17c807173..ad82d272b2 100644 --- a/pkg/command/one2onerestore/res.yaml +++ b/pkg/command/one2onerestore/res.yaml @@ -31,4 +31,3 @@ dry-run: | unpin-agent-cpu: | Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore. - This might significantly improve download speed at the cost of decreasing streaming speed. \ No newline at end of file