Skip to content

Commit 5ab9bf7

Browse files
Shivs11claude
andauthored
fix: retry on conflict in test helper to fix flaky integration test (#244)
## Summary - `setHealthyDeploymentStatus` in the integration test helper does a `Get` followed by `Status().Update()`. Between these calls, the controller's reconcile loop can modify the same Deployment (e.g. rolling update for the custom-build test), bumping the `resourceVersion` and causing a conflict error that fails the test. - Wraps the Get+Update in `retry.RetryOnConflict` (standard k8s optimistic concurrency pattern) so the helper re-fetches and retries on conflict instead of fatally failing. - This fixes the flaky `manual-rollout-custom-build-expect-rolling-update` integration test that was consistently failing in CI but passing locally. ## Test plan - [x] Full integration test suite passes locally (34/34 tests) - [x] Unit tests unaffected 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 5f00a13 commit 5ab9bf7

1 file changed

Lines changed: 40 additions & 37 deletions

File tree

internal/tests/internal/deployment_controller.go

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
corev1 "k8s.io/api/core/v1"
1515
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1616
"k8s.io/apimachinery/pkg/types"
17+
"k8s.io/client-go/util/retry"
1718
"sigs.k8s.io/controller-runtime/pkg/client"
1819
)
1920

@@ -103,45 +104,47 @@ func applyDeployment(t *testing.T, ctx context.Context, k8sClient client.Client,
103104
return stopFuncs
104105
}
105106

106-
// Set deployment status to `DeploymentAvailable` to simulate a healthy deployment
107-
// This is necessary because envtest doesn't actually start pods
107+
// Set deployment status to `DeploymentAvailable` to simulate a healthy deployment.
108+
// This is necessary because envtest doesn't actually start pods.
109+
// Uses retry-on-conflict because the controller may update the Deployment (e.g. rolling
110+
// update) between our Get and Status().Update(), bumping the resourceVersion.
108111
func setHealthyDeploymentStatus(t *testing.T, ctx context.Context, k8sClient client.Client, deployment appsv1.Deployment) {
109-
// Refetch to get the latest resourceVersion before updating status, since the
110-
// controller may have modified the Deployment (e.g. rolling update) while workers
111-
// were starting up.
112-
var fresh appsv1.Deployment
113-
if err := k8sClient.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, &fresh); err != nil {
114-
t.Fatalf("failed to refetch deployment before status update: %v", err)
115-
}
116-
now := metav1.Now()
117-
fresh.Status = appsv1.DeploymentStatus{
118-
Replicas: *fresh.Spec.Replicas,
119-
UpdatedReplicas: *fresh.Spec.Replicas,
120-
ReadyReplicas: *fresh.Spec.Replicas,
121-
AvailableReplicas: *fresh.Spec.Replicas,
122-
UnavailableReplicas: 0,
123-
Conditions: []appsv1.DeploymentCondition{
124-
{
125-
Type: appsv1.DeploymentAvailable,
126-
Status: corev1.ConditionTrue,
127-
LastUpdateTime: now,
128-
LastTransitionTime: now,
129-
Reason: "MinimumReplicasAvailable",
130-
Message: "Deployment has minimum availability.",
131-
},
132-
{
133-
Type: appsv1.DeploymentProgressing,
134-
Status: corev1.ConditionTrue,
135-
LastUpdateTime: now,
136-
LastTransitionTime: now,
137-
Reason: "NewReplicaSetAvailable",
138-
Message: "ReplicaSet is available.",
112+
t.Helper()
113+
err := retry.RetryOnConflict(retry.DefaultRetry, func() error {
114+
var fresh appsv1.Deployment
115+
if err := k8sClient.Get(ctx, types.NamespacedName{Name: deployment.Name, Namespace: deployment.Namespace}, &fresh); err != nil {
116+
return err
117+
}
118+
now := metav1.Now()
119+
fresh.Status = appsv1.DeploymentStatus{
120+
Replicas: *fresh.Spec.Replicas,
121+
UpdatedReplicas: *fresh.Spec.Replicas,
122+
ReadyReplicas: *fresh.Spec.Replicas,
123+
AvailableReplicas: *fresh.Spec.Replicas,
124+
UnavailableReplicas: 0,
125+
Conditions: []appsv1.DeploymentCondition{
126+
{
127+
Type: appsv1.DeploymentAvailable,
128+
Status: corev1.ConditionTrue,
129+
LastUpdateTime: now,
130+
LastTransitionTime: now,
131+
Reason: "MinimumReplicasAvailable",
132+
Message: "Deployment has minimum availability.",
133+
},
134+
{
135+
Type: appsv1.DeploymentProgressing,
136+
Status: corev1.ConditionTrue,
137+
LastUpdateTime: now,
138+
LastTransitionTime: now,
139+
Reason: "NewReplicaSetAvailable",
140+
Message: "ReplicaSet is available.",
141+
},
139142
},
140-
},
141-
}
142-
t.Logf("started %d healthy workers, updating deployment status", *fresh.Spec.Replicas)
143-
if err := k8sClient.Status().Update(ctx, &fresh); err != nil {
144-
t.Fatalf("failed to update deployment status: %v", err)
143+
}
144+
return k8sClient.Status().Update(ctx, &fresh)
145+
})
146+
if err != nil {
147+
t.Fatalf("failed to update deployment status after retries: %v", err)
145148
}
146149
}
147150

0 commit comments

Comments
 (0)