Skip to content

Commit 3620790

Browse files
fix: prevent finalizer from blocking subscription deletion when TRLP is in bad state
handleDeletion now uses best-effort cleanup: if reconcileTRLPForModel fails (e.g. Kuadrant webhook rejects a rebuild), it falls back to force-deleting the TRLP by labels. If that also fails, the finalizer is still removed so the subscription deletion is never permanently blocked. An orphaned TRLP will be garbage-collected via its HTTPRoute owner reference. Signed-off-by: Wen Liang <liangwen12year@gmail.com>
1 parent fae753e commit 3620790

2 files changed

Lines changed: 135 additions & 8 deletions

File tree

maas-controller/pkg/controller/maas/maassubscription_controller.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -684,9 +684,9 @@ func (r *MaaSSubscriptionReconciler) deleteModelTRLP(ctx context.Context, log lo
684684

685685
func (r *MaaSSubscriptionReconciler) handleDeletion(ctx context.Context, log logr.Logger, subscription *maasv1alpha1.MaaSSubscription) (ctrl.Result, error) {
686686
if controllerutil.ContainsFinalizer(subscription, maasSubscriptionFinalizer) {
687-
// For each model referenced by this subscription, rebuild the aggregated TokenRateLimitPolicy
688-
// without the deleted subscription's limits. If no other subscriptions reference the model,
689-
// the TRLP will be deleted. This ensures zero-downtime rate limiting during subscription removal.
687+
// Best-effort TRLP cleanup: try to rebuild cleanly, fall back to force-delete,
688+
// and proceed with finalizer removal even if cleanup fails entirely.
689+
// An orphaned TRLP is preferable to a permanently stuck subscription deletion.
690690
seen := make(map[string]struct{}, len(subscription.Spec.ModelRefs))
691691
for _, modelRef := range subscription.Spec.ModelRefs {
692692
k := modelRef.Namespace + "/" + modelRef.Name
@@ -696,14 +696,14 @@ func (r *MaaSSubscriptionReconciler) handleDeletion(ctx context.Context, log log
696696
seen[k] = struct{}{}
697697
log.Info("Rebuilding TokenRateLimitPolicy without deleted subscription", "model", modelRef.Namespace+"/"+modelRef.Name, "subscription", subscription.Name)
698698
if err := r.reconcileTRLPForModel(ctx, log, modelRef.Namespace, modelRef.Name); err != nil {
699-
log.Error(err, "failed to reconcile TokenRateLimitPolicy during deletion, will retry", "model", modelRef.Namespace+"/"+modelRef.Name)
700-
return ctrl.Result{}, err
699+
log.Error(err, "TRLP reconciliation failed during deletion, falling back to force-delete", "model", modelRef.Namespace+"/"+modelRef.Name)
700+
if delErr := r.deleteModelTRLP(ctx, log, modelRef.Namespace, modelRef.Name); delErr != nil {
701+
log.Error(delErr, "TRLP force-delete also failed, proceeding with finalizer removal anyway", "model", modelRef.Namespace+"/"+modelRef.Name)
702+
}
701703
}
702704
}
703-
// Also clean up stale TRLPs from modelRefs that were removed
704-
// before the CR was deleted (edge case: edit + delete before reconcile).
705705
if err := r.cleanupStaleTRLPs(ctx, log, subscription); err != nil {
706-
return ctrl.Result{}, err
706+
log.Error(err, "stale TRLP cleanup failed during deletion, proceeding with finalizer removal anyway")
707707
}
708708
controllerutil.RemoveFinalizer(subscription, maasSubscriptionFinalizer)
709709
if err := r.Update(ctx, subscription); err != nil {

maas-controller/pkg/controller/maas/maassubscription_controller_test.go

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package maas
1818

1919
import (
2020
"context"
21+
"errors"
2122
"fmt"
2223
"strings"
2324
"testing"
@@ -31,6 +32,7 @@ import (
3132
ctrl "sigs.k8s.io/controller-runtime"
3233
"sigs.k8s.io/controller-runtime/pkg/client"
3334
"sigs.k8s.io/controller-runtime/pkg/client/fake"
35+
"sigs.k8s.io/controller-runtime/pkg/client/interceptor"
3436
gatewayapiv1 "sigs.k8s.io/gateway-api/apis/v1"
3537

3638
maasv1alpha1 "github.com/opendatahub-io/models-as-a-service/maas-controller/api/maas/v1alpha1"
@@ -1346,3 +1348,128 @@ func TestMaaSSubscriptionReconciler_WindowValuesInTRLP(t *testing.T) {
13461348
})
13471349
}
13481350
}
1351+
1352+
// TestHandleDeletion_ReconcileFails_FallsBackToDeleteTRLP verifies that when
1353+
// reconcileTRLPForModel fails during deletion (e.g. Kuadrant webhook rejects the
1354+
// TRLP update), the handler falls back to force-deleting the TRLP via labels and
1355+
// still removes the finalizer so the subscription deletion is not permanently blocked.
1356+
func TestHandleDeletion_ReconcileFails_FallsBackToDeleteTRLP(t *testing.T) {
1357+
const (
1358+
modelName = "shared-model"
1359+
namespace = "default"
1360+
trlpName = "maas-trlp-" + modelName
1361+
)
1362+
1363+
// MaaSModelRef + HTTPRoute are required so reconcileTRLPForModel gets past
1364+
// findHTTPRouteForModel and actually attempts the TRLP Update (which we fail).
1365+
model := &maasv1alpha1.MaaSModelRef{
1366+
ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: namespace},
1367+
Spec: maasv1alpha1.MaaSModelSpec{ModelRef: maasv1alpha1.ModelReference{Kind: "ExternalModel", Name: modelName}},
1368+
}
1369+
route := &gatewayapiv1.HTTPRoute{
1370+
ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: namespace},
1371+
}
1372+
existingTRLP := newPreexistingTRLP(trlpName, namespace, modelName, map[string]string{})
1373+
subA := newMaaSSubscription("sub-a", namespace, "team-a", modelName, 100)
1374+
subA.Finalizers = []string{maasSubscriptionFinalizer}
1375+
subB := newMaaSSubscription("sub-b", namespace, "team-b", modelName, 200)
1376+
1377+
c := fake.NewClientBuilder().
1378+
WithScheme(scheme).
1379+
WithRESTMapper(testRESTMapper()).
1380+
WithObjects(model, route, subA, subB, existingTRLP).
1381+
WithIndex(&maasv1alpha1.MaaSSubscription{}, "spec.modelRef", subscriptionModelRefIndexer).
1382+
WithInterceptorFuncs(interceptor.Funcs{
1383+
Update: func(ctx context.Context, cl client.WithWatch, obj client.Object, opts ...client.UpdateOption) error {
1384+
if u, ok := obj.(*unstructured.Unstructured); ok && u.GetKind() == "TokenRateLimitPolicy" {
1385+
return errors.New("simulated Kuadrant webhook rejection")
1386+
}
1387+
return cl.Update(ctx, obj, opts...)
1388+
},
1389+
}).
1390+
Build()
1391+
1392+
if err := c.Delete(context.Background(), subA); err != nil {
1393+
t.Fatalf("Delete sub-a: %v", err)
1394+
}
1395+
1396+
r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
1397+
req := ctrl.Request{NamespacedName: types.NamespacedName{Name: "sub-a", Namespace: namespace}}
1398+
if _, err := r.Reconcile(context.Background(), req); err != nil {
1399+
t.Fatalf("Reconcile: unexpected error: %v", err)
1400+
}
1401+
1402+
// TRLP should be deleted via the fallback deleteModelTRLP path
1403+
got := &unstructured.Unstructured{}
1404+
got.SetGroupVersionKind(schema.GroupVersionKind{Group: "kuadrant.io", Version: "v1alpha1", Kind: "TokenRateLimitPolicy"})
1405+
if err := c.Get(context.Background(), types.NamespacedName{Name: trlpName, Namespace: namespace}, got); !apierrors.IsNotFound(err) {
1406+
t.Errorf("expected TRLP to be deleted via fallback, but got err: %v", err)
1407+
}
1408+
1409+
// Finalizer should be removed (subscription should be gone from the store)
1410+
var sub maasv1alpha1.MaaSSubscription
1411+
if err := c.Get(context.Background(), types.NamespacedName{Name: "sub-a", Namespace: namespace}, &sub); !apierrors.IsNotFound(err) {
1412+
t.Errorf("expected sub-a to be fully deleted (finalizer removed), got err: %v", err)
1413+
}
1414+
}
1415+
1416+
// TestHandleDeletion_AllCleanupFails_StillRemovesFinalizer verifies that even when
1417+
// both TRLP reconciliation and force-delete fail, the finalizer is still removed
1418+
// so the subscription deletion is never permanently blocked.
1419+
func TestHandleDeletion_AllCleanupFails_StillRemovesFinalizer(t *testing.T) {
1420+
const (
1421+
modelName = "shared-model"
1422+
namespace = "default"
1423+
trlpName = "maas-trlp-" + modelName
1424+
)
1425+
1426+
model := &maasv1alpha1.MaaSModelRef{
1427+
ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: namespace},
1428+
Spec: maasv1alpha1.MaaSModelSpec{ModelRef: maasv1alpha1.ModelReference{Kind: "ExternalModel", Name: modelName}},
1429+
}
1430+
route := &gatewayapiv1.HTTPRoute{
1431+
ObjectMeta: metav1.ObjectMeta{Name: modelName, Namespace: namespace},
1432+
}
1433+
existingTRLP := newPreexistingTRLP(trlpName, namespace, modelName, map[string]string{})
1434+
subA := newMaaSSubscription("sub-a", namespace, "team-a", modelName, 100)
1435+
subA.Finalizers = []string{maasSubscriptionFinalizer}
1436+
subB := newMaaSSubscription("sub-b", namespace, "team-b", modelName, 200)
1437+
1438+
// Fail TRLP Update (breaks reconcileTRLPForModel) AND TRLP List (breaks deleteModelTRLP fallback).
1439+
c := fake.NewClientBuilder().
1440+
WithScheme(scheme).
1441+
WithRESTMapper(testRESTMapper()).
1442+
WithObjects(model, route, subA, subB, existingTRLP).
1443+
WithIndex(&maasv1alpha1.MaaSSubscription{}, "spec.modelRef", subscriptionModelRefIndexer).
1444+
WithInterceptorFuncs(interceptor.Funcs{
1445+
Update: func(ctx context.Context, cl client.WithWatch, obj client.Object, opts ...client.UpdateOption) error {
1446+
if u, ok := obj.(*unstructured.Unstructured); ok && u.GetKind() == "TokenRateLimitPolicy" {
1447+
return errors.New("simulated TRLP update failure")
1448+
}
1449+
return cl.Update(ctx, obj, opts...)
1450+
},
1451+
List: func(ctx context.Context, cl client.WithWatch, list client.ObjectList, opts ...client.ListOption) error {
1452+
if ul, ok := list.(*unstructured.UnstructuredList); ok && ul.GetKind() == "TokenRateLimitPolicyList" {
1453+
return errors.New("simulated TRLP list failure")
1454+
}
1455+
return cl.List(ctx, list, opts...)
1456+
},
1457+
}).
1458+
Build()
1459+
1460+
if err := c.Delete(context.Background(), subA); err != nil {
1461+
t.Fatalf("Delete sub-a: %v", err)
1462+
}
1463+
1464+
r := &MaaSSubscriptionReconciler{Client: c, Scheme: scheme}
1465+
req := ctrl.Request{NamespacedName: types.NamespacedName{Name: "sub-a", Namespace: namespace}}
1466+
if _, err := r.Reconcile(context.Background(), req); err != nil {
1467+
t.Fatalf("Reconcile should not return error even when all cleanup fails, got: %v", err)
1468+
}
1469+
1470+
// Finalizer should be removed despite all cleanup failures
1471+
var sub maasv1alpha1.MaaSSubscription
1472+
if err := c.Get(context.Background(), types.NamespacedName{Name: "sub-a", Namespace: namespace}, &sub); !apierrors.IsNotFound(err) {
1473+
t.Errorf("expected sub-a to be fully deleted (finalizer removed despite cleanup failures), got err: %v", err)
1474+
}
1475+
}

0 commit comments

Comments
 (0)