Skip to content

Commit ca5df98

Browse files
authored
feat: Handle 409 status code returned by DELETE prepared_scale_down (#343)
* handle of 409 error * add comment about callPrepareDownscaleAndReturnElapsedDurationsSinceInitiatedDownscale
1 parent 21ff470 commit ca5df98

File tree

2 files changed

+29
-1
lines changed

2 files changed

+29
-1
lines changed

pkg/controller/controller_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,25 @@ func TestRolloutController_ReconcileStatefulsetWithDownscaleDelay(t *testing.T)
941941
"DELETE http://ingester-zone-b-2.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
942942
},
943943
},
944+
945+
"scale up succeeds even if DELETE returns 409 Conflict (i.e partition state locked)": {
946+
statefulSets: []runtime.Object{
947+
mockStatefulSet("ingester-zone-b", withReplicas(2, 2),
948+
withMirrorReplicasAnnotations("test", customResourceGVK),
949+
withDelayedDownscaleAnnotations(time.Hour, "http://pod/prepare-delayed-downscale")),
950+
},
951+
httpResponses: map[string]httpResponse{
952+
"DELETE http://ingester-zone-b-0.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale": {statusCode: http.StatusConflict, body: "partition state is locked"},
953+
"DELETE http://ingester-zone-b-1.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale": {statusCode: http.StatusConflict, body: "partition state is locked"},
954+
},
955+
customResourceScaleSpecReplicas: 5,
956+
customResourceScaleStatusReplicas: 2,
957+
expectedPatchedSets: map[string][]string{"ingester-zone-b": {`{"spec":{"replicas":5}}`}},
958+
expectedHttpRequests: []string{
959+
"DELETE http://ingester-zone-b-0.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
960+
"DELETE http://ingester-zone-b-1.ingester-zone-b.test.svc.cluster.local./prepare-delayed-downscale",
961+
},
962+
},
944963
}
945964

946965
for testName, testData := range tests {

pkg/controller/delay.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,8 @@ func callPrepareDownscaleAndReturnElapsedDurationsSinceInitiatedDownscale(ctx co
200200
}
201201

202202
if resp.StatusCode/100 != 2 {
203+
// Unlike `callCancelDelayedDownscale`, here we consider all non 2xx status code as error and should block the downscale.
204+
// this includes the case where we fail because a partition state change is locked.
203205
level.Error(epLogger).Log("msg", "unexpected status code returned when calling POST on endpoint", "status", resp.StatusCode, "response_body", string(body))
204206
return fmt.Errorf("HTTP POST request returned non-2xx status code: %v", resp.StatusCode)
205207
}
@@ -260,8 +262,15 @@ func callCancelDelayedDownscale(ctx context.Context, logger log.Logger, client h
260262
defer resp.Body.Close()
261263

262264
if resp.StatusCode/100 != 2 {
263-
err := errors.New("HTTP DELETE request returned non-2xx status code")
264265
body, readError := io.ReadAll(resp.Body)
266+
267+
// Handle 409 Conflict separately - this typically means that the partition state is locked by an engineer
268+
if resp.StatusCode == http.StatusConflict {
269+
level.Info(epLogger).Log("msg", "HTTP DELETE request returned 409 status code, delayed downscale cancellation skipped", "status", resp.StatusCode, "response_body", string(body))
270+
return nil
271+
}
272+
273+
err := errors.New("HTTP DELETE request returned non-2xx status code")
265274
level.Error(epLogger).Log("msg", "unexpected status code returned when calling DELETE on endpoint", "status", resp.StatusCode, "response_body", string(body))
266275
return errors.Join(err, readError)
267276
}

0 commit comments

Comments
 (0)