Skip to content
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
68 changes: 45 additions & 23 deletions api/v1alpha1/temporalworker_webhook.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ func (s *TemporalWorkerDeploymentSpec) Default(ctx context.Context) error {
s.SunsetStrategy.DeleteDelay = &v1.Duration{Duration: defaults.DeleteDelay}
}

if s.RollbackStrategy == nil {
s.RollbackStrategy = &RollbackStrategy{Strategy: RollbackAllAtOnce}
Comment on lines +60 to +61
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a change in behavior that should be called out in release notes.

Is there a way to disable rollbacks and keep the existing behavior of treating every version change as a new rollout? This should be documented as well.

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Now it's possible to disable the rollback by setting the max version age to 0. I've also updated the docs.

} else if s.RollbackStrategy.Strategy == "" {
s.RollbackStrategy.Strategy = RollbackAllAtOnce
}
return nil
}

Expand Down Expand Up @@ -94,6 +99,7 @@ func validateForUpdateOrCreate(old, new *TemporalWorkerDeployment) (admission.Wa
}

allErrs = append(allErrs, validateRolloutStrategy(new.Spec.RolloutStrategy)...)
allErrs = append(allErrs, validateRollbackStrategy(*new.Spec.RollbackStrategy)...)

if len(allErrs) > 0 {
return nil, newInvalidErr(new, allErrs)
Expand All @@ -106,29 +112,7 @@ func validateRolloutStrategy(s RolloutStrategy) []*field.Error {
var allErrs []*field.Error

if s.Strategy == UpdateProgressive {
rolloutSteps := s.Steps
if len(rolloutSteps) == 0 {
allErrs = append(allErrs,
field.Invalid(field.NewPath("spec.rollout.steps"), rolloutSteps, "steps are required for Progressive rollout"),
)
}
var lastRamp int
for i, s := range rolloutSteps {
// Check duration >= 30s
if s.PauseDuration.Duration < 30*time.Second {
allErrs = append(allErrs,
field.Invalid(field.NewPath(fmt.Sprintf("spec.rollout.steps[%d].pauseDuration", i)), s.PauseDuration.Duration.String(), "pause duration must be at least 30s"),
)
}

// Check ramp value greater than last
if s.RampPercentage <= lastRamp {
allErrs = append(allErrs,
field.Invalid(field.NewPath(fmt.Sprintf("spec.rollout.steps[%d].rampPercentage", i)), s.RampPercentage, "rampPercentage must increase between each step"),
)
}
lastRamp = s.RampPercentage
}
allErrs = append(allErrs, validateProgressiveStrategySteps("spec.rollout.steps", s.Steps)...)
}

// Validate gate input fields
Expand All @@ -155,6 +139,44 @@ func validateRolloutStrategy(s RolloutStrategy) []*field.Error {
return allErrs
}

func validateRollbackStrategy(s RollbackStrategy) []*field.Error {
var allErrs []*field.Error
if s.Strategy == RollbackProgressive {
allErrs = append(allErrs, validateProgressiveStrategySteps("spec.rollback.steps", s.Steps)...)
}
return allErrs
}

func validateProgressiveStrategySteps(specName string, steps []RolloutStep) []*field.Error {
var allErrs []*field.Error

if len(steps) == 0 {
allErrs = append(allErrs,
field.Invalid(field.NewPath(specName), steps, "steps are required for Progressive strategy"),
)
}

var lastRamp int
for i, step := range steps {
// Check duration >= 30s
if step.PauseDuration.Duration < 30*time.Second {
allErrs = append(allErrs,
field.Invalid(field.NewPath(fmt.Sprintf("%s[%d].pauseDuration", specName, i)), step.PauseDuration.Duration.String(), "pause duration must be at least 30s"),
)
}

// Check ramp value greater than last
if step.RampPercentage <= lastRamp {
allErrs = append(allErrs,
field.Invalid(field.NewPath(fmt.Sprintf("%s[%d].rampPercentage", specName, i)), step.RampPercentage, "rampPercentage must increase between each step"),
)
}
lastRamp = step.RampPercentage
}

return allErrs
}

func newInvalidErr(dep *TemporalWorkerDeployment, errs field.ErrorList) *apierrors.StatusError {
return apierrors.NewInvalid(dep.GroupVersionKind().GroupKind(), dep.GetName(), errs)
}
93 changes: 88 additions & 5 deletions api/v1alpha1/temporalworker_webhook_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TestTemporalWorkerDeployment_ValidateCreate(t *testing.T) {
obj runtime.Object
errorMsg string
}{
"valid temporal worker deployment": {
"valid default temporal worker deployment": {
obj: testhelpers.MakeTWDWithName("valid-worker", ""),
},
"temporal worker deployment with name too long": {
Expand All @@ -39,15 +39,15 @@ func TestTemporalWorkerDeployment_ValidateCreate(t *testing.T) {
},
errorMsg: "expected a TemporalWorkerDeployment",
},
"missing rollout steps": {
"rollout strategy - invalid Progressive without steps": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("prog-rollout-missing-steps", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RolloutStrategy.Strategy = temporaliov1alpha1.UpdateProgressive
obj.Spec.RolloutStrategy.Steps = nil
return obj
}),
errorMsg: "spec.rollout.steps: Invalid value: null: steps are required for Progressive rollout",
errorMsg: "spec.rollout.steps: Invalid value: null: steps are required for Progressive strategy",
},
"ramp value for step <= previous step": {
"rollout strategy - invalid Progressive with non-increasing ramp": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("prog-rollout-decreasing-ramps", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RolloutStrategy.Strategy = temporaliov1alpha1.UpdateProgressive
obj.Spec.RolloutStrategy.Steps = []temporaliov1alpha1.RolloutStep{
Expand All @@ -62,7 +62,7 @@ func TestTemporalWorkerDeployment_ValidateCreate(t *testing.T) {
}),
errorMsg: "[spec.rollout.steps[2].rampPercentage: Invalid value: 9: rampPercentage must increase between each step, spec.rollout.steps[4].rampPercentage: Invalid value: 50: rampPercentage must increase between each step]",
},
"pause duration < 30s": {
"rollout strategy - invalid Progressive pause duration < 30s": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("prog-rollout-decreasing-ramps", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RolloutStrategy.Strategy = temporaliov1alpha1.UpdateProgressive
obj.Spec.RolloutStrategy.Steps = []temporaliov1alpha1.RolloutStep{
Expand All @@ -74,6 +74,52 @@ func TestTemporalWorkerDeployment_ValidateCreate(t *testing.T) {
}),
errorMsg: `spec.rollout.steps[1].pauseDuration: Invalid value: "10s": pause duration must be at least 30s`,
},
"rollback strategy - valid Progressive with steps": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("rollback-progressive", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: temporaliov1alpha1.RollbackProgressive,
Steps: []temporaliov1alpha1.RolloutStep{
{50, metav1.Duration{Duration: 30 * time.Second}},
},
}
return obj
}),
},
"rollback strategy - invalid Progressive without steps": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("rollback-progressive-no-steps", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: temporaliov1alpha1.RollbackProgressive,
Steps: nil,
}
return obj
}),
errorMsg: "steps are required for Progressive strategy",
},
"rollback strategy - invalid Progressive pause duration < 30s": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("rollback-progressive-invalid", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: temporaliov1alpha1.RollbackProgressive,
Steps: []temporaliov1alpha1.RolloutStep{
{50, metav1.Duration{Duration: 10 * time.Second}},
},
}
return obj
}),
errorMsg: "pause duration must be at least 30s",
},
"rollback strategy - invalid Progressive with non-increasing ramp": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("rollback-progressive-decreasing", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: temporaliov1alpha1.RollbackProgressive,
Steps: []temporaliov1alpha1.RolloutStep{
{50, metav1.Duration{Duration: time.Minute}},
{25, metav1.Duration{Duration: time.Minute}},
},
}
return obj
}),
errorMsg: "rampPercentage must increase between each step",
},
}

for name, tc := range tests {
Expand Down Expand Up @@ -168,6 +214,43 @@ func TestTemporalWorkerDeployment_Default(t *testing.T) {
assert.Equal(t, 24*time.Hour, obj.Spec.SunsetStrategy.DeleteDelay.Duration)
},
},
"rollback strategy initialized when nil": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("default-rollback-nil", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = nil
return obj
}),
expected: func(t *testing.T, obj *temporaliov1alpha1.TemporalWorkerDeployment) {
require.NotNil(t, obj.Spec.RollbackStrategy, "expected RollbackStrategy to be initialized by webhook")
assert.Equal(t, temporaliov1alpha1.RollbackAllAtOnce, obj.Spec.RollbackStrategy.Strategy, "expected RollbackStrategy.Strategy to default to AllAtOnce")
},
},
"rollback strategy defaults empty strategy field to AllAtOnce": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("default-rollback-empty", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: "",
}
return obj
}),
expected: func(t *testing.T, obj *temporaliov1alpha1.TemporalWorkerDeployment) {
require.NotNil(t, obj.Spec.RollbackStrategy)
assert.Equal(t, temporaliov1alpha1.RollbackAllAtOnce, obj.Spec.RollbackStrategy.Strategy, "expected RollbackStrategy.Strategy to default to AllAtOnce")
},
},
"rollback strategy preserves explicit strategy": {
obj: testhelpers.ModifyObj(testhelpers.MakeTWDWithName("explicit-rollback-progressive", ""), func(obj *temporaliov1alpha1.TemporalWorkerDeployment) *temporaliov1alpha1.TemporalWorkerDeployment {
obj.Spec.RollbackStrategy = &temporaliov1alpha1.RollbackStrategy{
Strategy: temporaliov1alpha1.RollbackProgressive,
Steps: []temporaliov1alpha1.RolloutStep{
{50, metav1.Duration{Duration: 30 * time.Second}},
},
}
return obj
}),
expected: func(t *testing.T, obj *temporaliov1alpha1.TemporalWorkerDeployment) {
require.NotNil(t, obj.Spec.RollbackStrategy)
assert.Equal(t, temporaliov1alpha1.RollbackProgressive, obj.Spec.RollbackStrategy.Strategy, "expected RollbackStrategy.Strategy to remain Progressive")
},
},
}

for name, tc := range tests {
Expand Down
31 changes: 31 additions & 0 deletions api/v1alpha1/worker_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,10 @@ type TemporalWorkerDeploymentSpec struct {
// How to rollout new workflow executions to the target version.
RolloutStrategy RolloutStrategy `json:"rollout"`

// How to rollback to a previous version. If not specified, defaults to AllAtOnce strategy.
// +optional
RollbackStrategy *RollbackStrategy `json:"rollback,omitempty"`
Comment thread
eniko-dif marked this conversation as resolved.

// How to manage sunsetting drained versions.
SunsetStrategy SunsetStrategy `json:"sunset"`

Expand Down Expand Up @@ -355,6 +359,18 @@ const (
UpdateProgressive DefaultVersionUpdateStrategy = "Progressive"
)

// DefaultVersionRollbackStrategy describes how to cut over during rollback to a previous version.
// +kubebuilder:validation:Enum=AllAtOnce;Progressive
type DefaultVersionRollbackStrategy string
Comment thread
eniko-dif marked this conversation as resolved.
Outdated

const (
// RollbackAllAtOnce immediately switches 100% of traffic back to the previous version.
RollbackAllAtOnce DefaultVersionRollbackStrategy = "AllAtOnce"

// RollbackProgressive gradually ramps traffic back to the previous version.
RollbackProgressive DefaultVersionRollbackStrategy = "Progressive"
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if we have a progressive rollout mode, we should also require that there be at least 1 step defined. It could be reasonable to assume that choosing the progressive rollout mode without also adding steps would reuse the existing steps from the rollout config. Without steps, progressive and all at once modes would behave the same way, right?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The validation is the same as for rollout, so there cannot be a no-step progressive strategy (done in the webhook). "It could be reasonable to assume that choosing the progressive rollout mode without also adding steps would reuse the existing steps from the rollout config." I would not assume this, it seems like a nasty side-effect, but if you think it's a good way forward, I can implement it.

)

type GateWorkflowConfig struct {
WorkflowType string `json:"workflowType"`
// Input is an arbitrary JSON object passed as the first parameter to the gate workflow.
Expand Down Expand Up @@ -396,6 +412,21 @@ type RolloutStrategy struct {
Steps []RolloutStep `json:"steps,omitempty" protobuf:"bytes,3,rep,name=steps"`
}

// RollbackStrategy defines strategy to apply when rolling back to a previous version.
// This is separate from RolloutStrategy because rollbacks have different requirements:
// - No gate workflow (already trusted version)
// - No manual mode (rollbacks should be automatic)
// - Default to AllAtOnce for fast recovery
type RollbackStrategy struct {
Comment thread
eniko-dif marked this conversation as resolved.
// Strategy for rollback. Valid values are "AllAtOnce" or "Progressive".
// Defaults to "AllAtOnce" for fast recovery.
Strategy DefaultVersionRollbackStrategy `json:"strategy"`

// Steps to execute progressive rollbacks. Only required when strategy is "Progressive".
// +optional
Steps []RolloutStep `json:"steps,omitempty"`
Comment thread
eniko-dif marked this conversation as resolved.
}

// SunsetStrategy defines strategy to apply when sunsetting k8s deployments of drained versions.
type SunsetStrategy struct {
// ScaledownDelay specifies how long to wait after a version is drained before scaling its Deployment to zero.
Expand Down
25 changes: 25 additions & 0 deletions api/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ require (
github.com/onsi/gomega v1.36.1
github.com/pborman/uuid v1.2.1
github.com/stretchr/testify v1.11.1
go.temporal.io/api v1.60.2
go.temporal.io/api v1.61.0
go.temporal.io/sdk v1.38.0
go.temporal.io/sdk/contrib/envconfig v0.1.0
go.temporal.io/server v1.30.1
Expand All @@ -21,6 +21,7 @@ require (
k8s.io/apimachinery v0.34.0
k8s.io/client-go v0.34.0
sigs.k8s.io/controller-runtime v0.21.0
sigs.k8s.io/yaml v1.6.0
)

require (
Expand Down Expand Up @@ -191,5 +192,4 @@ require (
sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v6 v6.3.0 // indirect
sigs.k8s.io/yaml v1.6.0 // indirect
)
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -396,8 +396,8 @@ go.opentelemetry.io/otel/trace v1.40.0 h1:WA4etStDttCSYuhwvEa8OP8I5EWu24lkOzp+ZY
go.opentelemetry.io/otel/trace v1.40.0/go.mod h1:zeAhriXecNGP/s2SEG3+Y8X9ujcJOTqQ5RgdEJcawiA=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.temporal.io/api v1.60.2 h1:xqUqdPeOu8/HNWVPu51P6tVoBJ5kRh8nBI62xXi+IWg=
go.temporal.io/api v1.60.2/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
go.temporal.io/api v1.61.0 h1:gGIvUu1pRE9yVKqlirYd5FGDT5N/hvcZ0tlB4mRvVM4=
go.temporal.io/api v1.61.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM=
go.temporal.io/sdk v1.38.0 h1:4Bok5LEdED7YKpsSjIa3dDqram5VOq+ydBf4pyx0Wo4=
go.temporal.io/sdk v1.38.0/go.mod h1:a+R2Ej28ObvHoILbHaxMyind7M6D+W0L7edt5UJF4SE=
go.temporal.io/sdk/contrib/envconfig v0.1.0 h1:s+G/Ujph+Xl2jzLiiIm2T1vuijDkUL4Kse49dgDVGBE=
Expand Down
Loading
Loading