Skip to content

Commit 1ae57fc

Browse files
jrhynessclaude
andcommitted
fix: delete workload CRs before operators to prevent stuck ReplicaSets
Fixes issue where force-deleting the namespace leaves ReplicaSets in a broken state (desired != current, but no pods created) because the force_delete_namespace function removes CR finalizers, preventing operators from cleaning up owned resources. Changes: - Add new step 10 to delete workload CRs (Kuadrant, Limitador, Authorino) BEFORE deleting operators - This allows operators to cleanly delete Deployments/ReplicaSets/Pods through normal Kubernetes ownership cascade - Renumber subsequent steps (11-17) for consistency Order is now: 1. Delete workload CRs (triggers operator cleanup) 2. Delete OLM resources (operators, CSVs, catalog) 3. Force-delete namespace (cleanup stragglers) This prevents the stuck ReplicaSet issue where desired=1 but current=0 and no pod creation attempts occur on subsequent deployments. Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
1 parent 819feab commit 1ae57fc

File tree

1 file changed

+29
-15
lines changed

1 file changed

+29
-15
lines changed

.github/hack/cleanup-odh.sh

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -117,8 +117,22 @@ echo "9. Deleting models-as-a-service namespace..."
117117
force_delete_namespace "models-as-a-service" \
118118
"maasauthpolicies.maas.opendatahub.io" "maassubscriptions.maas.opendatahub.io"
119119

120-
# 10. Delete policy engine OLM resources (before namespace deletion)
121-
echo "10. Cleaning up policy engine OLM resources..."
120+
# 10. Delete policy engine workload CRs (before operator cleanup)
121+
# This allows operators to cleanly delete Deployments/ReplicaSets before we delete the operators themselves
122+
echo "10. Deleting policy engine workload CRs..."
123+
for policy_ns in kuadrant-system rh-connectivity-link; do
124+
if kubectl get namespace "$policy_ns" &>/dev/null; then
125+
echo " Deleting workload CRs in $policy_ns..."
126+
# Delete high-level CRs to trigger operator cleanup of owned resources
127+
kubectl delete kuadrant --all -n "$policy_ns" --ignore-not-found --timeout=60s 2>/dev/null || true
128+
kubectl delete limitador --all -n "$policy_ns" --ignore-not-found --timeout=60s 2>/dev/null || true
129+
kubectl delete authorino --all -n "$policy_ns" --ignore-not-found --timeout=60s 2>/dev/null || true
130+
echo " ✅ Workload CRs deleted from $policy_ns"
131+
fi
132+
done
133+
134+
# 11. Delete policy engine OLM resources (before namespace deletion)
135+
echo "11. Cleaning up policy engine OLM resources..."
122136
# Kuadrant cleanup
123137
if kubectl get namespace kuadrant-system &>/dev/null; then
124138
echo " Cleaning up Kuadrant OLM resources..."
@@ -139,15 +153,15 @@ if kubectl get namespace rh-connectivity-link &>/dev/null; then
139153
echo " ✅ RHCL OLM resources cleaned"
140154
fi
141155

142-
# 11. Delete policy engine namespaces (Kuadrant or RHCL)
156+
# 12. Delete policy engine namespaces (Kuadrant or RHCL)
143157
for policy_ns in kuadrant-system rh-connectivity-link; do
144-
echo "11. Deleting $policy_ns namespace (if installed)..."
158+
echo "12. Deleting $policy_ns namespace (if installed)..."
145159
force_delete_namespace "$policy_ns" \
146160
"authorinos.operator.authorino.kuadrant.io" "kuadrants.kuadrant.io" "limitadors.limitador.kuadrant.io"
147161
done
148162

149-
# 12. Delete Keycloak identity provider (if installed)
150-
echo "12. Deleting Keycloak namespace (if installed)..."
163+
# 13. Delete Keycloak identity provider (if installed)
164+
echo "13. Deleting Keycloak namespace (if installed)..."
151165
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && cd ../.. && pwd)"
152166
if [[ -f "${SCRIPT_DIR}/scripts/cleanup-keycloak.sh" ]]; then
153167
# Pass --delete-crds if --include-crds was specified for this script
@@ -165,33 +179,33 @@ else
165179
fi
166180
fi
167181

168-
# 13. Delete llm namespace and model resources
169-
echo "13. Deleting LLM models and namespace..."
182+
# 14. Delete llm namespace and model resources
183+
echo "14. Deleting LLM models and namespace..."
170184
force_delete_namespace "llm" "llminferenceservice" "inferenceservice" "maasmodelrefs.maas.opendatahub.io"
171185

172-
# 14. Delete gateway resources in openshift-ingress
173-
echo "14. Deleting gateway resources..."
186+
# 15. Delete gateway resources in openshift-ingress
187+
echo "15. Deleting gateway resources..."
174188
kubectl delete gateway maas-default-gateway -n openshift-ingress --ignore-not-found 2>/dev/null || true
175189
kubectl delete envoyfilter -n openshift-ingress -l kuadrant.io/managed=true --ignore-not-found 2>/dev/null || true
176190
kubectl delete envoyfilter kuadrant-auth-tls-fix -n openshift-ingress --ignore-not-found 2>/dev/null || true
177191
kubectl delete authpolicy -n openshift-ingress --all --ignore-not-found 2>/dev/null || true
178192
kubectl delete ratelimitpolicy -n openshift-ingress --all --ignore-not-found 2>/dev/null || true
179193
kubectl delete tokenratelimitpolicy -n openshift-ingress --all --ignore-not-found 2>/dev/null || true
180194

181-
# 15. Delete MaaS RBAC (ClusterRoles, ClusterRoleBindings - can conflict with other managers)
182-
echo "15. Deleting MaaS RBAC..."
195+
# 16. Delete MaaS RBAC (ClusterRoles, ClusterRoleBindings - can conflict with other managers)
196+
echo "16. Deleting MaaS RBAC..."
183197
kubectl delete clusterrolebinding maas-api maas-controller-rolebinding --ignore-not-found 2>/dev/null || true
184198
kubectl delete clusterrole maas-api maas-controller-role --ignore-not-found 2>/dev/null || true
185199

186-
# 16. Optionally delete CRDs
200+
# 17. Optionally delete CRDs
187201
if $INCLUDE_CRDS; then
188-
echo "16. Deleting ODH CRDs..."
202+
echo "17. Deleting ODH CRDs..."
189203
kubectl delete crd datascienceclusters.datasciencecluster.opendatahub.io --ignore-not-found 2>/dev/null || true
190204
kubectl delete crd dscinitializations.dscinitialization.opendatahub.io --ignore-not-found 2>/dev/null || true
191205
kubectl delete crd datasciencepipelinesapplications.datasciencepipelinesapplications.opendatahub.io --ignore-not-found 2>/dev/null || true
192206
# Add more CRDs as needed
193207
else
194-
echo "16. Skipping CRD deletion (use --include-crds to remove CRDs)"
208+
echo "17. Skipping CRD deletion (use --include-crds to remove CRDs)"
195209
fi
196210

197211
echo ""

0 commit comments

Comments
 (0)