Skip to content

Commit b9e35cf

Browse files
kaovilaiclaude
andcommitted
Add priority class support for Velero server and node-agent
- Add --server-priority-class-name and --node-agent-priority-class-name flags to velero install command - Configure data mover pods (PVB/PVR/DataUpload/DataDownload) to use priority class from node-agent-configmap - Update e2e tests to include PriorityClass label for testing - Move priority class design document to Implemented folder - Add changelog entry for velero-io#8883 🤖 Generated with [Claude Code](https://claude.ai/code) Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> Add support for ConfigMap options in Velero server installation Signed-off-by: Tiger Kaovilai <tkaovila@redhat.com> Implement centralized ConfigMap watching for node-agent controllers This change introduces a centralized ConfigMap watching mechanism to eliminate redundant ConfigMap watchers across controllers. Previously, each controller (PodVolumeBackup, PodVolumeRestore, DataUpload, DataDownload) independently watched the same ConfigMap, leading to inefficiency. Key changes: - Add ConfigProvider interface for centralized configuration management - Implement nodeAgentConfigProvider with single ConfigMap watcher - Update all controllers to use ConfigProvider instead of direct watching - Add comprehensive unit tests for ConfigProvider implementation - Add enhanced MockConfigProvider for testing - Add E2E test for validating centralized watching behavior - Remove redundant ConfigMap watching code from controllers Benefits: - Single source of truth for ConfigMap watching - Reduced resource usage with one watcher instead of four - Consistent configuration updates across all controllers - Improved testability with centralized mocking - Better separation of concerns 🤖 Generated with [Claude Code](https://claude.ai/code) Fix ConfigProvider tests for empty ConfigMap name and informer behavior - Handle empty ConfigMap name gracefully in NewNodeAgentConfigProvider - Skip informer start when ConfigMap name is empty - Update test expectations to handle informer Add events on startup - Ensure tests pass with correct behavior for edge cases 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent ddb3e3d commit b9e35cf

41 files changed

Lines changed: 2038 additions & 160 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Implement PriorityClass Support

design/priority-class-name-support_design.md renamed to design/Implemented/priority-class-name-support_design.md

Lines changed: 147 additions & 40 deletions
Large diffs are not rendered by default.

pkg/cmd/cli/install/install.go

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ limitations under the License.
1717
package install
1818

1919
import (
20+
"context"
2021
"fmt"
2122
"os"
2223
"path/filepath"
@@ -26,6 +27,7 @@ import (
2627
"github.com/vmware-tanzu/velero/pkg/uploader"
2728

2829
"github.com/pkg/errors"
30+
"github.com/sirupsen/logrus"
2931
"github.com/spf13/cobra"
3032
"github.com/spf13/pflag"
3133
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
@@ -91,6 +93,8 @@ type Options struct {
9193
ItemBlockWorkerCount int
9294
NodeAgentDisableHostPath bool
9395
kubeletRootDir string
96+
ServerPriorityClassName string
97+
NodeAgentPriorityClassName string
9498
}
9599

96100
// BindFlags adds command line values to the options struct.
@@ -194,6 +198,18 @@ func (o *Options) BindFlags(flags *pflag.FlagSet) {
194198
o.ItemBlockWorkerCount,
195199
"Number of worker threads to process ItemBlocks. Default is one. Optional.",
196200
)
201+
flags.StringVar(
202+
&o.ServerPriorityClassName,
203+
"server-priority-class-name",
204+
o.ServerPriorityClassName,
205+
"Priority class name for the Velero server deployment. Optional.",
206+
)
207+
flags.StringVar(
208+
&o.NodeAgentPriorityClassName,
209+
"node-agent-priority-class-name",
210+
o.NodeAgentPriorityClassName,
211+
"Priority class name for the node agent daemonset. Optional.",
212+
)
197213
}
198214

199215
// NewInstallOptions instantiates a new, default InstallOptions struct.
@@ -301,6 +317,8 @@ func (o *Options) AsVeleroOptions() (*install.VeleroOptions, error) {
301317
ItemBlockWorkerCount: o.ItemBlockWorkerCount,
302318
KubeletRootDir: o.kubeletRootDir,
303319
NodeAgentDisableHostPath: o.NodeAgentDisableHostPath,
320+
ServerPriorityClassName: o.ServerPriorityClassName,
321+
NodeAgentPriorityClassName: o.NodeAgentPriorityClassName,
304322
}, nil
305323
}
306324

@@ -389,6 +407,25 @@ func (o *Options) Run(c *cobra.Command, f client.Factory) error {
389407
if err != nil {
390408
return err
391409
}
410+
411+
// Get Kubernetes client for priority class validation
412+
kubeClient, err := f.KubeClient()
413+
if err != nil {
414+
return err
415+
}
416+
417+
// Validate priority classes if specified
418+
logger := logrus.New()
419+
logger.SetOutput(os.Stdout)
420+
if o.ServerPriorityClassName != "" {
421+
_ = kubeutil.ValidatePriorityClass(context.Background(), kubeClient, o.ServerPriorityClassName, logger.WithField("component", "server"))
422+
// For install command, we continue even if validation fails (warnings are logged)
423+
}
424+
if o.NodeAgentPriorityClassName != "" {
425+
_ = kubeutil.ValidatePriorityClass(context.Background(), kubeClient, o.NodeAgentPriorityClassName, logger.WithField("component", "node-agent"))
426+
// For install command, we continue even if validation fails (warnings are logged)
427+
}
428+
392429
errorMsg := fmt.Sprintf("\n\nError installing Velero. Use `kubectl logs deploy/velero -n %s` to check the deploy logs", o.Namespace)
393430

394431
err = install.Install(dynamicFactory, kbClient, resources, os.Stdout)
Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
/*
2+
Copyright the Velero contributors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package install
18+
19+
import (
20+
"testing"
21+
22+
"github.com/spf13/pflag"
23+
"github.com/stretchr/testify/assert"
24+
"github.com/stretchr/testify/require"
25+
)
26+
27+
func TestPriorityClassNameFlag(t *testing.T) {
28+
// Test that the flag is properly defined
29+
o := NewInstallOptions()
30+
flags := pflag.NewFlagSet("test", pflag.ContinueOnError)
31+
o.BindFlags(flags)
32+
33+
// Verify the server priority class flag exists
34+
serverFlag := flags.Lookup("server-priority-class-name")
35+
assert.NotNil(t, serverFlag, "server-priority-class-name flag should exist")
36+
assert.Equal(t, "Priority class name for the Velero server deployment. Optional.", serverFlag.Usage)
37+
38+
// Verify the node agent priority class flag exists
39+
nodeAgentFlag := flags.Lookup("node-agent-priority-class-name")
40+
assert.NotNil(t, nodeAgentFlag, "node-agent-priority-class-name flag should exist")
41+
assert.Equal(t, "Priority class name for the node agent daemonset. Optional.", nodeAgentFlag.Usage)
42+
43+
// Test with values for both server and node agent
44+
testCases := []struct {
45+
name string
46+
serverPriorityClassName string
47+
nodeAgentPriorityClassName string
48+
expectedServerValue string
49+
expectedNodeAgentValue string
50+
}{
51+
{
52+
name: "with both priority class names",
53+
serverPriorityClassName: "high-priority",
54+
nodeAgentPriorityClassName: "medium-priority",
55+
expectedServerValue: "high-priority",
56+
expectedNodeAgentValue: "medium-priority",
57+
},
58+
{
59+
name: "with only server priority class name",
60+
serverPriorityClassName: "high-priority",
61+
nodeAgentPriorityClassName: "",
62+
expectedServerValue: "high-priority",
63+
expectedNodeAgentValue: "",
64+
},
65+
{
66+
name: "with only node agent priority class name",
67+
serverPriorityClassName: "",
68+
nodeAgentPriorityClassName: "medium-priority",
69+
expectedServerValue: "",
70+
expectedNodeAgentValue: "medium-priority",
71+
},
72+
{
73+
name: "without priority class names",
74+
serverPriorityClassName: "",
75+
nodeAgentPriorityClassName: "",
76+
expectedServerValue: "",
77+
expectedNodeAgentValue: "",
78+
},
79+
}
80+
81+
for _, tc := range testCases {
82+
t.Run(tc.name, func(t *testing.T) {
83+
o := NewInstallOptions()
84+
o.ServerPriorityClassName = tc.serverPriorityClassName
85+
o.NodeAgentPriorityClassName = tc.nodeAgentPriorityClassName
86+
87+
veleroOptions, err := o.AsVeleroOptions()
88+
require.NoError(t, err)
89+
assert.Equal(t, tc.expectedServerValue, veleroOptions.ServerPriorityClassName)
90+
assert.Equal(t, tc.expectedNodeAgentValue, veleroOptions.NodeAgentPriorityClassName)
91+
})
92+
}
93+
}

pkg/cmd/cli/nodeagent/server.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,25 @@ func (s *nodeAgentServer) run() {
280280

281281
s.logger.Info("Starting controllers")
282282

283+
// Read priority class from ConfigMap if available
284+
dataMovePriorityClass := ""
285+
if s.config.nodeAgentConfig != "" {
286+
ctx, cancel := context.WithTimeout(context.Background(), time.Second*30)
287+
defer cancel()
288+
priorityClass, err := kube.GetDataMoverPriorityClassName(ctx, s.namespace, s.kubeClient, s.config.nodeAgentConfig)
289+
if err != nil {
290+
s.logger.WithError(err).Warn("Failed to get priority class name from node-agent-configmap, using empty value")
291+
} else if priorityClass != "" {
292+
// Validate the priority class exists in the cluster
293+
if kube.ValidatePriorityClass(ctx, s.kubeClient, priorityClass, s.logger.WithField("component", "data-mover")) {
294+
dataMovePriorityClass = priorityClass
295+
s.logger.WithField("priorityClassName", priorityClass).Info("Using priority class for data mover pods")
296+
} else {
297+
s.logger.WithField("priorityClassName", priorityClass).Warn("Priority class not found in cluster, data mover pods will use default priority")
298+
}
299+
}
300+
}
301+
283302
var loadAffinity []*kube.LoadAffinity
284303
if s.dataPathConfigs != nil && len(s.dataPathConfigs.LoadAffinity) > 0 {
285304
loadAffinity = s.dataPathConfigs.LoadAffinity
@@ -311,12 +330,12 @@ func (s *nodeAgentServer) run() {
311330
}
312331
}
313332

314-
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger)
333+
pvbReconciler := controller.NewPodVolumeBackupReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.metrics, s.logger, dataMovePriorityClass)
315334
if err := pvbReconciler.SetupWithManager(s.mgr); err != nil {
316335
s.logger.Fatal(err, "unable to create controller", "controller", constant.ControllerPodVolumeBackup)
317336
}
318337

319-
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger)
338+
pvrReconciler := controller.NewPodVolumeRestoreReconciler(s.mgr.GetClient(), s.mgr, s.kubeClient, s.dataPathMgr, s.vgdpCounter, s.nodeName, s.config.dataMoverPrepareTimeout, s.config.resourceTimeout, podResources, s.logger, dataMovePriorityClass)
320339
if err := pvrReconciler.SetupWithManager(s.mgr); err != nil {
321340
s.logger.WithError(err).Fatal("Unable to create the pod volume restore controller")
322341
}
@@ -340,6 +359,7 @@ func (s *nodeAgentServer) run() {
340359
s.config.dataMoverPrepareTimeout,
341360
s.logger,
342361
s.metrics,
362+
dataMovePriorityClass,
343363
)
344364
if err := dataUploadReconciler.SetupWithManager(s.mgr); err != nil {
345365
s.logger.WithError(err).Fatal("Unable to create the data upload controller")
@@ -364,6 +384,7 @@ func (s *nodeAgentServer) run() {
364384
s.config.dataMoverPrepareTimeout,
365385
s.logger,
366386
s.metrics,
387+
dataMovePriorityClass,
367388
)
368389

369390
if err := dataDownloadReconciler.SetupWithManager(s.mgr); err != nil {

pkg/controller/data_download_controller.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ type DataDownloadReconciler struct {
7171
preparingTimeout time.Duration
7272
metrics *metrics.ServerMetrics
7373
cancelledDataDownload map[string]time.Time
74+
dataMovePriorityClass string
7475
}
7576

7677
func NewDataDownloadReconciler(
@@ -86,12 +87,19 @@ func NewDataDownloadReconciler(
8687
preparingTimeout time.Duration,
8788
logger logrus.FieldLogger,
8889
metrics *metrics.ServerMetrics,
90+
dataMovePriorityClass string,
8991
) *DataDownloadReconciler {
92+
log := logger.WithField("controller", "DataDownload")
93+
94+
if dataMovePriorityClass != "" {
95+
log.Infof("Data mover priority class set to %q", dataMovePriorityClass)
96+
}
97+
9098
return &DataDownloadReconciler{
9199
client: client,
92100
kubeClient: kubeClient,
93101
mgr: mgr,
94-
logger: logger.WithField("controller", "DataDownload"),
102+
logger: log,
95103
Clock: &clock.RealClock{},
96104
nodeName: nodeName,
97105
restoreExposer: exposer.NewGenericRestoreExposer(kubeClient, logger),
@@ -103,6 +111,7 @@ func NewDataDownloadReconciler(
103111
preparingTimeout: preparingTimeout,
104112
metrics: metrics,
105113
cancelledDataDownload: make(map[string]time.Time),
114+
dataMovePriorityClass: dataMovePriorityClass,
106115
}
107116
}
108117

@@ -599,7 +608,7 @@ func (r *DataDownloadReconciler) SetupWithManager(mgr ctrl.Manager) error {
599608
Predicates: []predicate.Predicate{gp},
600609
})
601610

602-
return ctrl.NewControllerManagedBy(mgr).
611+
controllerBuilder := ctrl.NewControllerManagedBy(mgr).
603612
For(&velerov2alpha1api.DataDownload{}).
604613
WatchesRawSource(s).
605614
Watches(&corev1api.Pod{}, kube.EnqueueRequestsFromMapUpdateFunc(r.findSnapshotRestoreForPod),
@@ -626,8 +635,9 @@ func (r *DataDownloadReconciler) SetupWithManager(mgr ctrl.Manager) error {
626635
GenericFunc: func(ge event.GenericEvent) bool {
627636
return false
628637
},
629-
})).
630-
Complete(r)
638+
}))
639+
640+
return controllerBuilder.Complete(r)
631641
}
632642

633643
func (r *DataDownloadReconciler) findSnapshotRestoreForPod(ctx context.Context, podObj client.Object) []reconcile.Request {
@@ -892,6 +902,7 @@ func (r *DataDownloadReconciler) setupExposeParam(dd *velerov2alpha1api.DataDown
892902
NodeOS: nodeOS,
893903
RestorePVCConfig: r.restorePVCConfig,
894904
LoadAffinity: affinity,
905+
PriorityClassName: r.dataMovePriorityClass,
895906
}, nil
896907
}
897908

pkg/controller/data_download_controller_test.go

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -130,19 +130,7 @@ func initDataDownloadReconcilerWithError(t *testing.T, objects []any, needError
130130

131131
dataPathMgr := datapath.NewManager(1)
132132

133-
return NewDataDownloadReconciler(
134-
&fakeClient,
135-
nil,
136-
fakeKubeClient,
137-
dataPathMgr,
138-
nil,
139-
nil,
140-
nodeagent.RestorePVC{},
141-
corev1api.ResourceRequirements{},
142-
"test-node",
143-
time.Minute*5,
144-
velerotest.NewLogger(),
145-
metrics.NewServerMetrics()), nil
133+
return NewDataDownloadReconciler(&fakeClient, nil, fakeKubeClient, dataPathMgr, nil, nil, nodeagent.RestorePVC{}, corev1api.ResourceRequirements{}, "test-node", time.Minute*5, velerotest.NewLogger(), metrics.NewServerMetrics(), ""), nil
146134
}
147135

148136
func TestDataDownloadReconcile(t *testing.T) {

0 commit comments

Comments
 (0)