Skip to content

Commit 8992271

Browse files
authored
Merge of #949
2 parents bf49f69 + 4658bdc commit 8992271

18 files changed

Lines changed: 453 additions & 43 deletions

api/csiaddons/v1alpha1/encryptionkeyrotationcronjob_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ type EncryptionKeyRotationJobTemplateSpec struct {
3636
// EncryptionKeyRotationCronJobSpec defines the desired state of EncryptionKeyRotationCronJob
3737
type EncryptionKeyRotationCronJobSpec struct {
3838
// The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
39+
// A deterministic, UID-based stagger offset is applied to spread
40+
// execution across the "cronjob-stagger-window" (default: 2 hours,
41+
// set to 0 to disable) configured in the csi-addons-config ConfigMap.
3942
// +kubebuilder:validation:Required
4043
// +kubebuilder:validation:Pattern:=.+
4144
Schedule string `json:"schedule"`

api/csiaddons/v1alpha1/reclaimspacecronjob_types.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ type ReclaimSpaceJobTemplateSpec struct {
3636
// ReclaimSpaceCronJobSpec defines the desired state of ReclaimSpaceJob
3737
type ReclaimSpaceCronJobSpec struct {
3838
// The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
39+
// A deterministic, UID-based stagger offset is applied to spread
40+
// execution across the "cronjob-stagger-window" (default: 2 hours,
41+
// set to 0 to disable) configured in the csi-addons-config ConfigMap.
3942
// +kubebuilder:validation:Required
4043
// +kubebuilder:validation:Pattern:=.+
4144
Schedule string `json:"schedule"`

cmd/manager/main.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ func main() {
107107
flag.BoolVar(&enableAuth, "enable-auth", true, "Enables TLS and adds bearer token to the headers (enabled by default)")
108108
flag.IntVar(&cfg.MaxGroupPVC, "max-group-pvc", cfg.MaxGroupPVC, "Maximum number of PVCs allowed in a volume group")
109109
flag.IntVar(&cfg.CSIAddonsNodeRetryDelay, util.CsiaddonsNodeRetryDelayKey, cfg.CSIAddonsNodeRetryDelay, "Duration, in seconds, the CSIAddonsNode reconciler must wait before retrying the connection to csi-addons sidecar")
110+
flag.IntVar(&cfg.CronJobStaggerWindow, util.CronJobStaggerWindowKey, cfg.CronJobStaggerWindow, "Duration, in hours, that the CronJobs for KeyRotation and ReclaimSpace are staggered within. Defaults to 2 hours, set as 0 to disable.")
110111
opts := zap.Options{
111112
Development: true,
112113
TimeEncoder: zapcore.ISO8601TimeEncoder,
@@ -234,8 +235,9 @@ func main() {
234235
os.Exit(1)
235236
}
236237
if err = (&controllers.ReclaimSpaceCronJobReconciler{
237-
Client: mgr.GetClient(),
238-
Scheme: mgr.GetScheme(),
238+
Client: mgr.GetClient(),
239+
Scheme: mgr.GetScheme(),
240+
StaggerWindow: cfg.CronJobStaggerWindow,
239241
}).SetupWithManager(mgr, ctrlOptions); err != nil {
240242
setupLog.Error(err, "unable to create controller", "controller", "ReclaimSpaceCronJob")
241243
os.Exit(1)
@@ -299,8 +301,9 @@ func main() {
299301
os.Exit(1)
300302
}
301303
if err = (&controllers.EncryptionKeyRotationCronJobReconciler{
302-
Client: mgr.GetClient(),
303-
Scheme: mgr.GetScheme(),
304+
Client: mgr.GetClient(),
305+
Scheme: mgr.GetScheme(),
306+
StaggerWindow: cfg.CronJobStaggerWindow,
304307
}).SetupWithManager(mgr, ctrlOptions); err != nil {
305308
setupLog.Error(err, "unable to create controller", "controller", "EncryptionKeyRotationCronJob")
306309
os.Exit(1)

config/crd/bases/csiaddons.openshift.io_encryptionkeyrotationcronjobs.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,11 @@ spec:
146146
- spec
147147
type: object
148148
schedule:
149-
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
149+
description: |-
150+
The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
151+
A deterministic, UID-based stagger offset is applied to spread
152+
execution across the "cronjob-stagger-window" (default: 2 hours,
153+
set to 0 to disable) configured in the csi-addons-config ConfigMap.
150154
pattern: .+
151155
type: string
152156
startingDeadlineSeconds:

config/crd/bases/csiaddons.openshift.io_reclaimspacecronjobs.yaml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,11 @@ spec:
145145
- spec
146146
type: object
147147
schedule:
148-
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
148+
description: |-
149+
The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
150+
A deterministic, UID-based stagger offset is applied to spread
151+
execution across the "cronjob-stagger-window" (default: 2 hours,
152+
set to 0 to disable) configured in the csi-addons-config ConfigMap.
149153
pattern: .+
150154
type: string
151155
startingDeadlineSeconds:

deploy/controller/crds.yaml

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,11 @@ spec:
304304
- spec
305305
type: object
306306
schedule:
307-
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
307+
description: |-
308+
The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
309+
A deterministic, UID-based stagger offset is applied to spread
310+
execution across the "cronjob-stagger-window" (default: 2 hours,
311+
set to 0 to disable) configured in the csi-addons-config ConfigMap.
308312
pattern: .+
309313
type: string
310314
startingDeadlineSeconds:
@@ -1004,7 +1008,11 @@ spec:
10041008
- spec
10051009
type: object
10061010
schedule:
1007-
description: The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
1011+
description: |-
1012+
The schedule in Cron format, see https://en.wikipedia.org/wiki/Cron.
1013+
A deterministic, UID-based stagger offset is applied to spread
1014+
execution across the "cronjob-stagger-window" (default: 2 hours,
1015+
set to 0 to disable) configured in the csi-addons-config ConfigMap.
10081016
pattern: .+
10091017
type: string
10101018
startingDeadlineSeconds:

deploy/controller/csi-addons-config.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,4 @@ data:
1010
"max-concurrent-reconciles": "100"
1111
"max-group-pvcs": "100"
1212
"csi-addons-node-retry-delay": "5"
13+
"cronjob-stagger-window": "2"

docs/csi-addons-config.md

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@ CSI-Addons Operator can consume configuration from a ConfigMap named `csi-addons
44
in the same namespace as the operator. This enables configuration of the operator to persist across
55
upgrades. The ConfigMap can support the following configuration options:
66

7-
| Option | Default value | Description |
8-
| ----------------------------- | ------------- | --------------------------------------------------------------------------------------------------- |
9-
| `reclaim-space-timeout` | `"3m"` | Timeout for reclaimspace operation |
10-
| `max-concurrent-reconciles` | `"100"` | Maximum number of concurrent reconciles |
11-
| `max-group-pvcs` | `"100"` | Maximum number of PVCs allowed in a volume group |
12-
| `csi-addons-node-retry-delay` | `"5"` | Duration, in seconds, that csi-addons reconcile must wait before retrying connection to the sidecar |
13-
| `schedule-precedence` | `"pvc"` | The order in which the schedule annotation should be read |
7+
| Option | Default value | Description |
8+
| ----------------------------- | ------------- | ------------------------------------------------------------------------------------------------------------------------ |
9+
| `max-concurrent-reconciles` | `"100"` | Maximum number of concurrent reconciles |
10+
| `max-group-pvcs` | `"100"` | Maximum number of PVCs allowed in a volume group |
11+
| `csi-addons-node-retry-delay` | `"5"` | Duration, in seconds, that csi-addons reconcile must wait before retrying connection to the sidecar |
12+
| `schedule-precedence` | `"pvc"` | The order in which the schedule annotation should be read |
13+
| `cronjob-stagger-window` | `"2"` | Maximum stagger window, in hours, for key rotation and reclaim space CronJob schedules. Set to `0` to disable staggering |
1414

1515
[`csi-addons-config` ConfigMap](../deploy/controller/csi-addons-config.yaml) is provided as an example.
1616

internal/controller/csiaddons/encryptionkeyrotationcronjob_controller.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ import (
4040
// EncryptionKeyRotationCronJobReconciler reconciles a EncryptionKeyRotationCronJob object
4141
type EncryptionKeyRotationCronJobReconciler struct {
4242
client.Client
43-
Scheme *runtime.Scheme
43+
Scheme *runtime.Scheme
44+
StaggerWindow int
4445
}
4546

4647
//+kubebuilder:rbac:groups=csiaddons.openshift.io,resources=encryptionkeyrotationcronjobs,verbs=get;list;watch;create;update;patch;delete
@@ -123,7 +124,7 @@ func (r *EncryptionKeyRotationCronJobReconciler) Reconcile(ctx context.Context,
123124
return ctrl.Result{}, nil
124125
}
125126

126-
missedRun, nextRun, err := getNextScheduleForKeyRotation(krcJob, time.Now())
127+
missedRun, nextRun, err := getNextScheduleForKeyRotation(krcJob, time.Now(), r.StaggerWindow)
127128
if err != nil {
128129
logger.Error(err, "failed to get next schedule for jobs", "schedule", krcJob.Spec.Schedule)
129130

@@ -293,7 +294,8 @@ func (r *EncryptionKeyRotationCronJobReconciler) deleteOldEncryptionKeyRotationJ
293294
// An error is returned if start is missed more than 100 times
294295
func getNextScheduleForKeyRotation(
295296
krcJob *csiaddonsv1alpha1.EncryptionKeyRotationCronJob,
296-
now time.Time) (time.Time, time.Time, error) {
297+
now time.Time,
298+
staggerWindow int) (time.Time, time.Time, error) {
297299
sched, err := cron.ParseStandard(krcJob.Spec.Schedule)
298300
if err != nil {
299301
return time.Time{}, time.Time{}, fmt.Errorf("unparsable schedule %q: %v", krcJob.Spec.Schedule, err)
@@ -313,8 +315,11 @@ func getNextScheduleForKeyRotation(
313315
earliestTime = schedulingDeadline
314316
}
315317
}
318+
319+
rawNext := sched.Next(now)
320+
staggeredNext := utils.GetStaggeredNext(krcJob.UID, rawNext, sched, staggerWindow)
316321
if earliestTime.After(now) {
317-
return time.Time{}, sched.Next(now), nil
322+
return time.Time{}, staggeredNext, nil
318323
}
319324

320325
starts := 0
@@ -333,7 +338,7 @@ func getNextScheduleForKeyRotation(
333338
" delete and recreate encryptionkeyrotationjob")
334339
}
335340
}
336-
return lastMissed, sched.Next(now), nil
341+
return lastMissed, staggeredNext, nil
337342
}
338343

339344
// constructEncryptionKeyRotationJob forms an EncryptionKeyRotationJob for a given

internal/controller/csiaddons/reclaimspacecronjob_controller.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,8 @@ import (
4040
// ReclaimSpaceCronJobReconciler reconciles a ReclaimSpaceCronJob object
4141
type ReclaimSpaceCronJobReconciler struct {
4242
client.Client
43-
Scheme *runtime.Scheme
43+
Scheme *runtime.Scheme
44+
StaggerWindow int
4445
}
4546

4647
var (
@@ -129,7 +130,7 @@ func (r *ReclaimSpaceCronJobReconciler) Reconcile(ctx context.Context, req ctrl.
129130
}
130131

131132
// figure out the next times that we need to create jobs at (or anything we missed).
132-
missedRun, nextRun, err := getNextSchedule(rsCronJob, time.Now())
133+
missedRun, nextRun, err := getNextSchedule(rsCronJob, time.Now(), r.StaggerWindow)
133134
if err != nil {
134135
logger.Error(err, "Failed to Parse out CronJob schedule", "schedule", rsCronJob.Spec.Schedule)
135136
// invalid schedule, do not requeue.
@@ -356,7 +357,8 @@ func getScheduledTimeForRSJob(rsJob *csiaddonsv1alpha1.ReclaimSpaceJob) (*time.T
356357
// This function returns error if there are more than 100 missed start times.
357358
func getNextSchedule(
358359
rsCronJob *csiaddonsv1alpha1.ReclaimSpaceCronJob,
359-
now time.Time) (time.Time, time.Time, error) {
360+
now time.Time,
361+
staggerWindow int) (time.Time, time.Time, error) {
360362
sched, err := cron.ParseStandard(rsCronJob.Spec.Schedule)
361363
if err != nil {
362364
return time.Time{}, time.Time{}, fmt.Errorf("unparsable schedule %q: %v", rsCronJob.Spec.Schedule, err)
@@ -376,8 +378,11 @@ func getNextSchedule(
376378
earliestTime = schedulingDeadline
377379
}
378380
}
381+
382+
rawNext := sched.Next(now)
383+
staggeredNext := utils.GetStaggeredNext(rsCronJob.UID, rawNext, sched, staggerWindow)
379384
if earliestTime.After(now) {
380-
return time.Time{}, sched.Next(now), nil
385+
return time.Time{}, staggeredNext, nil
381386
}
382387

383388
starts := 0
@@ -396,5 +401,5 @@ func getNextSchedule(
396401
" delete and recreate reclaimspacecronjob")
397402
}
398403
}
399-
return lastMissed, sched.Next(now), nil
404+
return lastMissed, staggeredNext, nil
400405
}

0 commit comments

Comments
 (0)