Skip to content

Commit 02c3913

Browse files
committed
fix(tests): handle autoscaler policy version conflicts with wait.Backoff retry
When node template and autoscaler policies are updated concurrently in the same terraform apply, the API rejects updates with: "node template has changed since the policies have been retrieved, refetch the policies and perform the update again"
1 parent dbdf668 commit 02c3913

File tree

3 files changed

+113
-40
lines changed

3 files changed

+113
-40
lines changed

castai/resource_autoscaler.go

Lines changed: 71 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"io"
99
"log"
1010
"net/http"
11+
"strings"
1112
"time"
1213

1314
jsonpatch "github.com/evanphx/json-patch"
@@ -17,6 +18,7 @@ import (
1718
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
1819
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/validation"
1920
"github.com/mitchellh/mapstructure"
21+
"k8s.io/apimachinery/pkg/util/wait"
2022

2123
"github.com/castai/terraform-provider-castai/castai/sdk"
2224
"github.com/castai/terraform-provider-castai/castai/types"
@@ -805,23 +807,73 @@ func updateAutoscalerPolicies(ctx context.Context, data *schema.ResourceData, me
805807
return nil
806808
}
807809

808-
policies, err := getChangedPolicies(ctx, data, meta, clusterId)
809-
if err != nil {
810-
return err
810+
// Define the update operation that will be executed with retry logic
811+
updatePolicies := func() error {
812+
policies, err := getChangedPolicies(ctx, data, meta, clusterId)
813+
if err != nil {
814+
return err
815+
}
816+
817+
if policies == nil {
818+
log.Printf("[DEBUG] changed policies json not calculated. Skipping autoscaler policies changes")
819+
return nil
820+
}
821+
822+
changedPoliciesJSON := string(policies)
823+
if changedPoliciesJSON == "" {
824+
log.Printf("[DEBUG] changed policies json not found. Skipping autoscaler policies changes")
825+
return nil
826+
}
827+
828+
return upsertPolicies(ctx, meta, clusterId, changedPoliciesJSON)
811829
}
812830

813-
if policies == nil {
814-
log.Printf("[DEBUG] changed policies json not calculated. Skipping autoscaler policies changes")
831+
// Try to update policies immediately without any delay.
832+
err := updatePolicies()
833+
if err == nil {
815834
return nil
816835
}
817836

818-
changedPoliciesJSON := string(policies)
819-
if changedPoliciesJSON == "" {
820-
log.Printf("[DEBUG] changed policies json not found. Skipping autoscaler policies changes")
821-
return nil
837+
// Check if error is retryable (node template version conflict)
838+
if !isNodeTemplateVersionConflict(err) {
839+
return err // Non-retryable error
840+
}
841+
842+
// Fall back to exponential backoff retry only if version conflict occurred.
843+
log.Printf("[INFO] Node template version conflict detected, will retry with exponential backoff: %v", err)
844+
845+
// Exponential backoff configuration
846+
backoff := wait.Backoff{
847+
Duration: 100 * time.Millisecond,
848+
Factor: 2.0,
849+
Jitter: 0.1,
850+
Steps: 5,
851+
Cap: 2 * time.Second,
822852
}
823853

824-
return upsertPolicies(ctx, meta, clusterId, changedPoliciesJSON)
854+
retryErr := wait.ExponentialBackoffWithContext(ctx, backoff, func(ctx context.Context) (done bool, err error) {
855+
err = updatePolicies()
856+
if err == nil {
857+
return true, nil // Success - stop retrying
858+
}
859+
860+
// Check if error is retryable
861+
if !isNodeTemplateVersionConflict(err) {
862+
return false, err // Non-retryable error - stop with error
863+
}
864+
865+
log.Printf("[DEBUG] Retry failed with version conflict: %v", err)
866+
return false, nil // Retryable error - continue retrying
867+
})
868+
869+
if retryErr != nil {
870+
if wait.Interrupted(retryErr) {
871+
return fmt.Errorf("timeout waiting for autoscaler policy update after version conflicts: %w", err)
872+
}
873+
return retryErr
874+
}
875+
876+
return nil
825877
}
826878

827879
func upsertPolicies(ctx context.Context, meta interface{}, clusterId string, changedPoliciesJSON string) error {
@@ -835,6 +887,15 @@ func upsertPolicies(ctx context.Context, meta interface{}, clusterId string, cha
835887
return nil
836888
}
837889

890+
// isNodeTemplateVersionConflict checks if the error is due to version mismatch
891+
func isNodeTemplateVersionConflict(err error) bool {
892+
if err == nil {
893+
return false
894+
}
895+
errMsg := err.Error()
896+
return strings.Contains(errMsg, "template has changed") || strings.Contains(errMsg, "refetch the policies")
897+
}
898+
838899
func readAutoscalerPolicies(ctx context.Context, data *schema.ResourceData, meta interface{}) error {
839900
log.Printf("[INFO] AUTOSCALER policies get call start")
840901
defer log.Printf("[INFO] AUTOSCALER policies get call end")

go.mod

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,8 @@ go 1.24.0
44

55
require (
66
github.com/cenkalti/backoff/v4 v4.1.3
7-
github.com/evanphx/json-patch v4.9.0+incompatible
7+
github.com/evanphx/json-patch v4.12.0+incompatible
88
github.com/golang/mock v1.5.0
9-
github.com/google/martian v2.1.0+incompatible
109
github.com/google/uuid v1.6.0
1110
github.com/gruntwork-io/terratest v0.40.18
1211
github.com/hashicorp/go-cty v1.5.0
@@ -22,9 +21,10 @@ require (
2221
github.com/mitchellh/mapstructure v1.5.0
2322
github.com/oapi-codegen/runtime v1.1.1
2423
github.com/samber/lo v1.49.1
25-
github.com/stretchr/testify v1.10.0
26-
golang.org/x/crypto v0.41.0
24+
github.com/stretchr/testify v1.11.1
25+
golang.org/x/crypto v0.44.0
2726
golang.org/x/exp v0.0.0-20230626212559-97b1e661b5df
27+
k8s.io/apimachinery v0.30.0
2828
)
2929

3030
require (
@@ -39,6 +39,7 @@ require (
3939
github.com/cloudflare/circl v1.6.1 // indirect
4040
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
4141
github.com/fatih/color v1.16.0 // indirect
42+
github.com/go-logr/logr v1.4.3 // indirect
4243
github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect
4344
github.com/golang/protobuf v1.5.4 // indirect
4445
github.com/google/go-cmp v0.7.0 // indirect
@@ -85,19 +86,22 @@ require (
8586
github.com/zclconf/go-cty v1.16.3 // indirect
8687
go.opencensus.io v0.23.0 // indirect
8788
golang.org/x/lint v0.0.0-20210508222113-6edffad5e616 // indirect
88-
golang.org/x/mod v0.26.0 // indirect
89-
golang.org/x/net v0.43.0 // indirect
89+
golang.org/x/mod v0.29.0 // indirect
90+
golang.org/x/net v0.47.0 // indirect
9091
golang.org/x/oauth2 v0.30.0 // indirect
91-
golang.org/x/sync v0.16.0 // indirect
92-
golang.org/x/sys v0.35.0 // indirect
93-
golang.org/x/text v0.28.0 // indirect
94-
golang.org/x/tools v0.35.0 // indirect
92+
golang.org/x/sync v0.18.0 // indirect
93+
golang.org/x/sys v0.38.0 // indirect
94+
golang.org/x/telemetry v0.0.0-20251008203120-078029d740a8 // indirect
95+
golang.org/x/text v0.31.0 // indirect
96+
golang.org/x/tools v0.38.0 // indirect
9597
google.golang.org/api v0.47.0 // indirect
9698
google.golang.org/appengine v1.6.8 // indirect
9799
google.golang.org/genproto v0.0.0-20210602131652-f16073e35f0c // indirect
98100
google.golang.org/grpc v1.75.1 // indirect
99101
google.golang.org/protobuf v1.36.9 // indirect
100102
gopkg.in/yaml.v3 v3.0.1 // indirect
103+
k8s.io/klog/v2 v2.130.1 // indirect
104+
k8s.io/utils v0.0.0-20251002143259-bc988d571ff4 // indirect
101105
)
102106

103107
exclude github.com/satori/go.uuid v1.2.0

go.sum

Lines changed: 28 additions & 20 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)