Cleanup Replica sets only in single node mode

Pramodh Pallapothu · eriknordmark · commit 43d4287b194d · 2025-07-11T07:14:53.000+02:00
This commit fixes a serious bug in cluster configuration.
Basically whenever domainmgr restarts there was a legacy code which deletes all
VMs running. That was implemented to handle a corner case where apps were deleted when device is
powered off. We want to delete any app config in kubernetes as soon as device reboots. Assumption is
device will resync config with the controller.

That works fine for single node cases. In a cluster setup, apps would have moved to other nodes when this
device went to reboot. Because of this bug as soon as this device reboots its deleting VMs running on other nodes,
basically bouncing all VMs.

Added a new API to check if this device is in cluster mode and ignore delete of VMs.

Also, we no longer directly deal with VMs, its all VM replica sets now. So moved CleanupVMs to CleanupVMRs.

Move the const to types/locationconstants.go
Change the logging to Debugf

Signed-off-by: Pramodh Pallapothu &lt;pramodh@zededa.com&gt;
diff --git a/pkg/pillar/cmd/domainmgr/domainmgr.go b/pkg/pillar/cmd/domainmgr/domainmgr.go
@@ -645,8 +645,20 @@ func Run(ps *pubsub.PubSub, loggerArg *logrus.Logger, logArg *base.LogObject, ar
 		} else {
 			// If device rebooted abruptly, kubernetes did not get time to stop the VMs.
 			// They will be in failed state, so clean them up if they exists.
-			count, err := kubeapi.CleanupStaleVMI()
-			log.Noticef("domainmgr cleanup vmi count %d, %v", count, err)
+			// We have to do this only on single node config. In the cluster setup the VMs
+			// will be running on some other node after failover.
+			// Even in single node one might wonder why we need to delete VMI when node is coming up
+			// after reboot !! This is for very corner case, if the user deleted the app in the controller when
+			// the device is powered off. Next config refresh will see app is gone and domainmgr will not do anything.
+			// But kubernetes thinks app is still running and starts. So its safe to delete all replica sets at the start
+			// on single node installs.
+			clusterMode := kubeapi.IsClusterMode()
+
+			if !clusterMode {
+				count, err := kubeapi.CleanupStaleVMIRs()
+				log.Noticef("domainmgr cleanup vmirs count %d, %v", count, err)
+			}
+
 		}
 	}
 
diff --git a/pkg/pillar/kubeapi/kubeapi.go b/pkg/pillar/kubeapi/kubeapi.go
@@ -16,6 +16,8 @@ import (
 	netclientset "github.com/k8snetworkplumbingwg/network-attachment-definition-client/pkg/client/clientset/versioned"
 	"github.com/lf-edge/eve/pkg/pillar/base"
 	"github.com/lf-edge/eve/pkg/pillar/pubsub"
+	"github.com/lf-edge/eve/pkg/pillar/types"
+	"github.com/sirupsen/logrus"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/util/wait"
 	"k8s.io/client-go/kubernetes"
@@ -364,8 +366,12 @@ func WaitForPVCReady(pvcName string, log *base.LogObject) error {
 	return fmt.Errorf("WaitForPVCReady: time expired count %d, err %v", count, err2)
 }
 
-// CleanupStaleVMI : delete all VMIs. Used by domainmgr on startup.
-func CleanupStaleVMI() (int, error) {
+// CleanupStaleVMIRs : delete all VMI replica sets on single node. Used by domainmgr on startup.
+// There are two replica set types.
+// 1) vmirs (VM replica sets)
+// 2) podrs (Pod replica sets, basically native containers)
+// Iterate through all replicasets and delete those.
+func CleanupStaleVMIRs() (int, error) {
 	// Only wait for kubevirt if we are not in base-k3s mode.
 	if err := registrationAppliedToCluster(); err == nil {
 		// In base k3s mode, pillar not deploying kubevirt VM app instances
@@ -377,25 +383,63 @@ func CleanupStaleVMI() (int, error) {
 		return 0, fmt.Errorf("couldn't get the Kube Config: %v", err)
 	}
 
-	clientset, err := kubecli.GetKubevirtClientFromRESTConfig(kubeconfig)
+	virtClient, err := kubecli.GetKubevirtClientFromRESTConfig(kubeconfig)
 	if err != nil {
 		return 0, fmt.Errorf("couldn't get the Kube client Config: %v", err)
 	}
 
 	ctx := context.Background()
 
-	// get a list of our VMs
-	vmiList, err := clientset.VirtualMachineInstance(EVEKubeNameSpace).List(ctx, &metav1.ListOptions{})
+	// get a list of our VM replica sets
+	vmrsList, err := virtClient.ReplicaSet(EVEKubeNameSpace).List(metav1.ListOptions{})
 	if err != nil {
-		return 0, fmt.Errorf("couldn't get the Kubevirt VMs: %v", err)
+		return 0, fmt.Errorf("couldn't get the Kubevirt VM replcia sets: %v", err)
 	}
 
 	var count int
-	for _, vmi := range vmiList.Items {
-		if err := clientset.VirtualMachineInstance(EVEKubeNameSpace).Delete(ctx, vmi.ObjectMeta.Name, &metav1.DeleteOptions{}); err != nil {
-			return count, fmt.Errorf("delete vmi error: %v", err)
+	for _, vmirs := range vmrsList.Items {
+
+		if err := virtClient.ReplicaSet(EVEKubeNameSpace).Delete(vmirs.ObjectMeta.Name, &metav1.DeleteOptions{}); err != nil {
+			return count, fmt.Errorf("delete vmirs error: %v", err)
 		}
 		count++
 	}
+
+	// Get list of native container pods replica sets
+	podrsList, err := virtClient.AppsV1().ReplicaSets(EVEKubeNameSpace).List(ctx, metav1.ListOptions{})
+	if err != nil {
+		return count, fmt.Errorf("couldn't get the pod replica sets: %v", err)
+	}
+
+	for _, podrs := range podrsList.Items {
+
+		err := virtClient.AppsV1().ReplicaSets(EVEKubeNameSpace).Delete(ctx, podrs.ObjectMeta.Name, metav1.DeleteOptions{})
+		if err != nil {
+			return count, fmt.Errorf("delete podrs error: %v", err)
+		}
+		count++
+	}
+
 	return count, nil
 }
+
+// IsClusterMode : Returns true if this node is part of a cluster by checking EdgeNodeClusterConfigFile
+// If EdgeNodeClusterConfigFile exists and is > 0 bytes then this node is part of a cluster.
+func IsClusterMode() bool {
+
+	fileInfo, err := os.Stat(types.EdgeNodeClusterConfigFile)
+	if os.IsNotExist(err) {
+		logrus.Debugf("This node is not in cluster mode")
+		return false
+	} else if err != nil {
+		logrus.Errorf("Error checking file '%s': %v", types.EdgeNodeClusterConfigFile, err)
+		return false
+	}
+
+	if fileInfo.Size() > 0 {
+		logrus.Debugf("This node is in cluster mode")
+		return true
+	}
+
+	return false
+}
diff --git a/pkg/pillar/kubeapi/nokube.go b/pkg/pillar/kubeapi/nokube.go
@@ -19,9 +19,9 @@ func WaitForKubernetes(string, *pubsub.PubSub, *time.Ticker,
 	panic("WaitForKubernetes is not built")
 }
 
-// CleanupStaleVMI in this file is just stub for non-kubevirt hypervisors.
-func CleanupStaleVMI() (int, error) {
-	panic("CleanupStaleVMI is not built")
+// CleanupStaleVMIRs in this file is just stub for non-kubevirt hypervisors.
+func CleanupStaleVMIRs() (int, error) {
+	panic("CleanupStaleVMIRs is not built")
 }
 
 // GetPVCList in this file is just stub for non-kubevirt hypervisors.
@@ -40,3 +40,8 @@ func GetNodeDrainStatus(pubsub.Subscription, *base.LogObject) *NodeDrainStatus {
 	// No need to query for inprogress operations, just a noop
 	return &NodeDrainStatus{Status: NOTSUPPORTED}
 }
+
+// IsClusterMode  is a stub for non-kubevirt builds
+func IsClusterMode() bool {
+	return false
+}
diff --git a/pkg/pillar/types/locationconsts.go b/pkg/pillar/types/locationconsts.go
@@ -144,6 +144,8 @@ const (
 
 	// LocalActiveAppConfigDir - directory to put JSON of the apps that are running.
 	LocalActiveAppConfigDir = "/persist/vault/active-app-instance-config/"
+	// EdgeNodeClusterConfigFile - the file which contains edgenodecluster config
+	EdgeNodeClusterConfigFile = PersistStatusDir + "/zedagent/EdgeNodeClusterConfig/global.json"
 )
 
 var (

Original file line number	Diff line number	Diff line change
`@@ -19,9 +19,9 @@ func WaitForKubernetes(string, pubsub.PubSub, time.Ticker,`
`19`	`19`	`panic("WaitForKubernetes is not built")`
`20`	`20`	`}`
`21`	`21`
`22`		`-// CleanupStaleVMI in this file is just stub for non-kubevirt hypervisors.`
`23`		`-func CleanupStaleVMI() (int, error) {`
`24`		`- panic("CleanupStaleVMI is not built")`
	`22`	`+// CleanupStaleVMIRs in this file is just stub for non-kubevirt hypervisors.`
	`23`	`+func CleanupStaleVMIRs() (int, error) {`
	`24`	`+ panic("CleanupStaleVMIRs is not built")`
`25`	`25`	`}`
`26`	`26`
`27`	`27`	`// GetPVCList in this file is just stub for non-kubevirt hypervisors.`
`@@ -40,3 +40,8 @@ func GetNodeDrainStatus(pubsub.Subscription, base.LogObject) NodeDrainStatus {`
`40`	`40`	`// No need to query for inprogress operations, just a noop`
`41`	`41`	`return &NodeDrainStatus{Status: NOTSUPPORTED}`
`42`	`42`	`}`
	`43`	`+`
	`44`	`+// IsClusterMode is a stub for non-kubevirt builds`
	`45`	`+func IsClusterMode() bool {`
	`46`	`+ return false`
	`47`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -144,6 +144,8 @@ const (`
`144`	`144`
`145`	`145`	`// LocalActiveAppConfigDir - directory to put JSON of the apps that are running.`
`146`	`146`	`LocalActiveAppConfigDir = "/persist/vault/active-app-instance-config/"`
	`147`	`+ // EdgeNodeClusterConfigFile - the file which contains edgenodecluster config`
	`148`	`+ EdgeNodeClusterConfigFile = PersistStatusDir + "/zedagent/EdgeNodeClusterConfig/global.json"`
`147`	`149`	`)`
`148`	`150`
`149`	`151`	`var (`