Skip to content

feat(1-1-restore): add unpin-agent-cpu flag #4380

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions docs/source/sctool/partials/sctool_restore_1-1-restore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,10 @@ options:
usage: |
Timezone of --cron and --window flag values.
The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file.
- name: unpin-agent-cpu
default_value: "false"
usage: |
Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.
- name: window
default_value: '[]'
usage: |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,10 @@ options:
usage: |
Timezone of --cron and --window flag values.
The default value is taken from this system, namely 'TZ' envvar or '/etc/localtime' file.
- name: unpin-agent-cpu
default_value: "false"
usage: |
Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.
- name: window
default_value: '[]'
usage: |
Expand Down
7 changes: 7 additions & 0 deletions pkg/command/one2onerestore/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type command struct {
keyspace []string
snapshotTag string
nodesMapping nodesMapping
unpinAgentCPU bool
dryRun bool
}

Expand Down Expand Up @@ -83,6 +84,7 @@ func (cmd *command) init() {

// Common configuration for restore procedures
w.Unwrap().BoolVar(&cmd.dryRun, "dry-run", false, "")
w.Unwrap().BoolVar(&cmd.unpinAgentCPU, "unpin-agent-cpu", false, "")
}

func (cmd *command) run(args []string) error {
Expand Down Expand Up @@ -205,6 +207,11 @@ func flagsToTaskProperties(cmd *command, task *models.Task) (updated bool, err e
flagName: "nodes-mapping",
value: cmd.nodesMapping,
},
{
flagName: "unpin-agent-cpu",
value: cmd.unpinAgentCPU,
canBeUpdated: true,
},
}

props := task.Properties.(map[string]interface{})
Expand Down
3 changes: 3 additions & 0 deletions pkg/command/one2onerestore/res.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,6 @@ nodes-mapping:
dry-run: |
Validates and displays restore information without actually running the restore.
This allows you to display what will happen should the restore run with the parameters you set.

unpin-agent-cpu: |
Defines if ScyllaDB Manager Agent should be unpinned from CPUs during restore.
1 change: 1 addition & 0 deletions pkg/service/one2onerestore/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type Target struct {
SourceClusterID uuid.UUID `json:"source_cluster_id"`
SnapshotTag string `json:"snapshot_tag"`
NodesMapping []nodeMapping `json:"nodes_mapping"`
UnpinAgentCPU bool `json:"unpin_agent_cpu"`
}

func defaultTarget() Target {
Expand Down
28 changes: 28 additions & 0 deletions pkg/service/one2onerestore/worker.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,18 @@ func (w *worker) restore(ctx context.Context, workload []hostWorkload, target Ta
return errors.Wrap(err, "disable auto compaction")
}

// We always want to pin agent to CPUs outside the 1-1-restore.
defer func() {
if err := w.pinAgentCPU(context.Background(), workload, true); err != nil {
w.logger.Error(ctx, "Can't pin agent to CPU", "error", err)
}
}()
if target.UnpinAgentCPU {
if err := w.pinAgentCPU(ctx, workload, false); err != nil {
return errors.Wrap(err, "unpin agent from CPU")
}
}

if err := w.setTombstoneGCModeRepair(ctx, workload); err != nil {
return errors.Wrap(err, "tombstone_gc mode")
}
Expand Down Expand Up @@ -209,6 +221,22 @@ func (w *worker) setAutoCompaction(ctx context.Context, workload []hostWorkload,
return nil
}

func (w *worker) pinAgentCPU(ctx context.Context, workload []hostWorkload, pin bool) error {
setPinFunc := w.client.PinCPU
if !pin {
setPinFunc = w.client.UnpinFromCPU
}
return parallel.Run(len(workload), len(workload), func(i int) error {
host := workload[i].host
return errors.Wrapf(setPinFunc(ctx, host.Addr), "set CPU pinning on %s", host.Addr)
}, func(i int, err error) {
w.logger.Error(ctx, "Failed to change agent CPU pinning",
"host", workload[i].host.Addr,
"pinned", pin,
"error", err)
})
}

// alterSchemaRetryWrapper is useful when executing many statements altering schema,
// as it might take more time for Scylla to process them one after another.
// This wrapper exits on: success, context cancel, op returned non-timeout error or after maxTotalTime has passed.
Expand Down