Skip to content

Commit 3b299a3

Browse files
Fix race in RemoveRemoteObjects where remote LWS is not deleted (#9201)
When MultiKueueGC deletes the remote workload before the reconciler processes the deletion, RemoveRemoteObjects would early-return on remWl == nil without deleting the remote controller object (e.g., LWS). Move DeleteRemoteObject before the nil check to ensure the remote controller object is always deleted regardless of remote workload state. Signed-off-by: Sohan Kunkerkar <sohank2602@gmail.com>
1 parent 78380d5 commit 3b299a3

File tree

1 file changed

+7
-3
lines changed

1 file changed

+7
-3
lines changed

pkg/controller/admissionchecks/multikueue/workload.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -127,14 +127,18 @@ func (g *wlGroup) bestMatchByCondition(conditionType string) (*metav1.Condition,
127127
return bestMatchCond, bestMatchRemote
128128
}
129129

130+
// RemoveRemoteObjects deletes the remote controller object and workload for a cluster.
131+
// The controller object is deleted first to handle cases where GC has already removed
132+
// the remote workload.
130133
func (g *wlGroup) RemoveRemoteObjects(ctx context.Context, cluster string) error {
134+
if err := g.jobAdapter.DeleteRemoteObject(ctx, g.remoteClients[cluster].client, g.controllerKey); err != nil {
135+
return fmt.Errorf("deleting remote controller object: %w", err)
136+
}
137+
131138
remWl := g.remotes[cluster]
132139
if remWl == nil {
133140
return nil
134141
}
135-
if err := g.jobAdapter.DeleteRemoteObject(ctx, g.remoteClients[cluster].client, g.controllerKey); err != nil {
136-
return fmt.Errorf("deleting remote controller object: %w", err)
137-
}
138142

139143
if controllerutil.RemoveFinalizer(remWl, kueue.ResourceInUseFinalizerName) {
140144
if err := g.remoteClients[cluster].client.Update(ctx, remWl); err != nil {

0 commit comments

Comments
 (0)