Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Complete deletion of CnsFileAccessConfig instance if associated nodeVM instance is not found #3151

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion manifests/supervisorcluster/1.28/cns-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rules:
verbs: ["create", "get", "list", "update", "watch"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsfilevolumeclients"]
verbs: ["get", "update", "create", "delete"]
verbs: ["get", "list", "update", "create", "delete"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
verbs: ["get", "list", "watch", "update", "delete"]
Expand Down
2 changes: 1 addition & 1 deletion manifests/supervisorcluster/1.29/cns-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rules:
verbs: ["create", "get", "list", "update", "watch"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsfilevolumeclients"]
verbs: ["get", "update", "create", "delete"]
verbs: ["get", "list", "update", "create", "delete"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
verbs: ["get", "list", "watch", "update", "delete"]
Expand Down
2 changes: 1 addition & 1 deletion manifests/supervisorcluster/1.30/cns-csi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ rules:
verbs: ["create", "get", "list", "update", "watch"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsfilevolumeclients"]
verbs: ["get", "update", "create", "delete"]
verbs: ["get", "list", "update", "create", "delete"]
- apiGroups: ["cns.vmware.com"]
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
verbs: ["get", "list", "watch", "update", "delete"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/cnsfilevolumeclient/v1alpha1"
k8s "sigs.k8s.io/vsphere-csi-driver/v3/pkg/kubernetes"
cnsoperatortypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/cnsoperator/types"
)

// FileVolumeClient exposes an interface to support
Expand All @@ -49,6 +50,9 @@ type FileVolumeClient interface {
// clientVMIP for a given file volume. fileVolumeName is used
// to uniquely identify CnsFileVolumeClient instances.
RemoveClientVMFromIPList(ctx context.Context, fileVolumeName, clientVMName, clientVMIP string) error
// GetVMIPFromVMName returns the VMIP associated with a
// given client VM name.
GetVMIPFromVMName(ctx context.Context, fileVolumeName string, clientVMName string) (string, int, error)
}

// fileVolumeClient maintains a client to the API
Expand Down Expand Up @@ -186,6 +190,8 @@ func (f *fileVolumeClient) AddClientVMToIPList(ctx context.Context,
ObjectMeta: v1.ObjectMeta{
Name: instanceName,
Namespace: instanceNamespace,
// Add finalizer so that CnsFileVolumeClient instance doesn't get deleted abruptly
Finalizers: []string{cnsoperatortypes.CNSFinalizer},
},
Spec: v1alpha1.CnsFileVolumeClientSpec{
ExternalIPtoClientVms: map[string][]string{
Expand Down Expand Up @@ -282,9 +288,24 @@ func (f *fileVolumeClient) RemoveClientVMFromIPList(ctx context.Context,
delete(instance.Spec.ExternalIPtoClientVms, clientVMIP)
}
if len(instance.Spec.ExternalIPtoClientVms) == 0 {
log.Debugf("Deleting cnsfilevolumeclient instance %s from API server", fileVolumeName)
log.Infof("Deleting cnsfilevolumeclient instance %s from API server", fileVolumeName)
// Remove finalizer from CnsFileVolumeClient instance
err = removeFinalizer(ctx, f.client, instance)
if err != nil {
log.Errorf("failed to remove finalizer from cnsfilevolumeclient instance %s with error: %+v",
fileVolumeName, err)
}
err = f.client.Delete(ctx, instance)
if err != nil {
// In case of namespace deletion, we will have deletion timestamp added on the
// CnsFileVolumeClient instance. So, as soon as we delete finalizer, instance might
// get deleted immediately. In such cases we will get NotFound error here, return success
// if instance is already deleted.
if errors.IsNotFound(err) {
log.Infof("cnsfilevolumeclient instance %s seems to be already deleted.", fileVolumeName)
f.volumeLock.Delete(fileVolumeName)
return nil
}
log.Errorf("failed to delete cnsfilevolumeclient instance %s with error: %+v", fileVolumeName, err)
return err
}
Expand All @@ -302,3 +323,76 @@ func (f *fileVolumeClient) RemoveClientVMFromIPList(ctx context.Context,
log.Debugf("Could not find VM %s in list. Returning.", clientVMName)
return nil
}

// GetVMIPFromVMName returns the VM IP associated with a
// given client VM name.
// Callers need to specify fileVolumeName as a combination of
// "<SV-namespace>/<SV-PVC-name>". This combination is used to uniquely
// identify CnsFileVolumeClient instances.
// Returns an empty string if the instance doesn't exist OR if the
// input VM name is not present in this instance.
// Returns an error if any operations fails.
func (f *fileVolumeClient) GetVMIPFromVMName(ctx context.Context,
fileVolumeName string, clientVMName string) (string, int, error) {
log := logger.GetLogger(ctx)

log.Infof("Fetching VM IP from cnsfilevolumeclient %s for VM name %s", fileVolumeName, clientVMName)
actual, _ := f.volumeLock.LoadOrStore(fileVolumeName, &sync.Mutex{})
instanceLock, ok := actual.(*sync.Mutex)
if !ok {
return "", 0, fmt.Errorf("failed to cast lock for cnsfilevolumeclient instance: %s", fileVolumeName)
}
instanceLock.Lock()
defer instanceLock.Unlock()

instance := &v1alpha1.CnsFileVolumeClient{}
instanceNamespace, instanceName, err := cache.SplitMetaNamespaceKey(fileVolumeName)
if err != nil {
log.Errorf("failed to split key %s with error: %+v", fileVolumeName, err)
return "", 0, err
}
instanceKey := types.NamespacedName{
Namespace: instanceNamespace,
Name: instanceName,
}
err = f.client.Get(ctx, instanceKey, instance)
if err != nil {
log.Errorf("failed to get cnsfilevolumeclient instance %s with error: %+v", fileVolumeName, err)
return "", 0, err
}

// Verify if input VM name exists in Spec.ExternalIPtoClientVms
log.Debugf("Verifying if ExternalIPtoClientVms list has VM name: %s", clientVMName)
for vmIP, vmNames := range instance.Spec.ExternalIPtoClientVms {
for _, vmName := range vmNames {
if vmName == clientVMName {
return vmIP, len(vmNames), nil
}
}
}
return "", 0, err
}

// removeFinalizer will remove the CNS Finalizer = cns.vmware.com,
// from a given CnsFileVolumeClient instance.
func removeFinalizer(ctx context.Context, client client.Client,
instance *v1alpha1.CnsFileVolumeClient) error {
log := logger.GetLogger(ctx)
for i, finalizer := range instance.Finalizers {
if finalizer == cnsoperatortypes.CNSFinalizer {
log.Debugf("Removing %q finalizer from CnsFileVolumeClient instance with name: %q on namespace: %q",
cnsoperatortypes.CNSFinalizer, instance.Name, instance.Namespace)
instance.Finalizers = append(instance.Finalizers[:i], instance.Finalizers[i+1:]...)
// Update the instance after removing finalizer
err := client.Update(ctx, instance)
if err != nil {
log.Errorf("failed to update CnsFileVolumeClient instance with name: %q on namespace: %q",
instance.Name, instance.Namespace)
return err
}
break
}
}

return nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,54 @@ func (r *ReconcileCnsFileAccessConfig) Reconcile(ctx context.Context,
msg := fmt.Sprintf("Failed to get virtualmachine instance for the VM with name: %q. Error: %+v",
instance.Spec.VMName, err)
log.Error(msg)
// If virtualmachine instance is NotFound and if deletion timestamp is set on CnsFileAccessConfig instance,
// then proceed with the deletion of CnsFileAccessConfig instance.
if apierrors.IsNotFound(err) && instance.DeletionTimestamp != nil {
log.Infof("CnsFileAccessConfig instance %q has deletion timestamp set, but VM instance with "+
"name %q is not found. Processing the deletion of CnsFileAccessConfig instance.",
instance.Name, instance.Spec.VMName)
// Fetch the PVC and PV instance and get volume ID
skipConfigureVolumeACL := false
volumeID, err := cnsoperatorutil.GetVolumeID(ctx, r.client, instance.Spec.PvcName, instance.Namespace)
if err != nil {
if apierrors.IsNotFound(err) {
// If PVC instance is NotFound (deleted), then there is no need to configure ACL on file volume.
// TODO: When we support PV with retain policy on supervisor, then we need to configure ACL
// in cases where PVC is deleted but PV is not deleted.
skipConfigureVolumeACL = true
} else {
msg := fmt.Sprintf("Failed to get volumeID from pvcName: %q. Error: %+v", instance.Spec.PvcName, err)
log.Error(msg)
setInstanceError(ctx, r, instance, msg)
return reconcile.Result{RequeueAfter: timeout}, nil
}
}
if !skipConfigureVolumeACL {
err = r.removePermissionsForFileVolume(ctx, volumeID, instance)
if err != nil {
msg := fmt.Sprintf("Failed to remove file volume permissions with error: %+v", err)
log.Error(msg)
setInstanceError(ctx, r, instance, msg)
return reconcile.Result{RequeueAfter: timeout}, nil
}
}

// Remove finalizer from CnsFileAccessConfig CRD
removeFinalizerFromCRDInstance(ctx, instance)
err = updateCnsFileAccessConfig(ctx, r.client, instance)
if err != nil {
msg := fmt.Sprintf("failed to update CnsFileAccessConfig instance: %q on namespace: %q. Error: %+v",
instance.Name, instance.Namespace, err)
recordEvent(ctx, r, instance, v1.EventTypeWarning, msg)
return reconcile.Result{RequeueAfter: timeout}, nil
}
// Cleanup instance entry from backOffDuration map.
backOffDurationMapMutex.Lock()
delete(backOffDuration, instance.Name)
backOffDurationMapMutex.Unlock()
return reconcile.Result{}, nil
}

setInstanceError(ctx, r, instance, msg)
return reconcile.Result{RequeueAfter: timeout}, nil
}
Expand Down Expand Up @@ -412,6 +460,44 @@ func (r *ReconcileCnsFileAccessConfig) Reconcile(ctx context.Context,
return reconcile.Result{}, nil
}

// removePermissionsForFileVolume helps to remove net permissions for a given file volume.
// This method is used when we don't have VM instance. It fetches the VM IP from CNSFileVolumeClient
// instance for the VM name associated with CnsFileAccessConfig.
func (r *ReconcileCnsFileAccessConfig) removePermissionsForFileVolume(ctx context.Context,
volumeID string, instance *cnsfileaccessconfigv1alpha1.CnsFileAccessConfig) error {
log := logger.GetLogger(ctx)
cnsFileVolumeClientInstance, err := cnsfilevolumeclient.GetFileVolumeClientInstance(ctx)
if err != nil {
return logger.LogNewErrorf(log, "Failed to get CNSFileVolumeClient instance. Error: %+v", err)
}

vmIP, vmsAssociatedWithIP, err := cnsFileVolumeClientInstance.GetVMIPFromVMName(ctx,
instance.Namespace+"/"+instance.Spec.PvcName, instance.Spec.VMName)
if err != nil {
return logger.LogNewErrorf(log, "Failed to get VM IP from VM name in CNSFileVolumeClient instance. "+
"Error: %+v", err)
}
if vmsAssociatedWithIP == 1 {
// In case of VDS setup, we will always have 1 VM associated with IP. But, in case of
// SNAT setup, we may have multiple VMs associated with same IP. We will remove ACL
// permission only when it is the last VM associated with IP.
err = r.configureVolumeACLs(ctx, volumeID, vmIP, true)
if err != nil {
return logger.LogNewErrorf(log, "Failed to remove net permissions for file volume %q. Error: %+v",
volumeID, err)
}
}
err = cnsFileVolumeClientInstance.RemoveClientVMFromIPList(ctx,
instance.Namespace+"/"+instance.Spec.PvcName, instance.Spec.VMName, vmIP)
if err != nil {
return logger.LogNewErrorf(log, "Failed to remove VM %q with IP %q from IPList. Error: %+v",
instance.Spec.VMName, vmIP, err)
}
log.Infof("Successfully removed VM IP %q from IPList for CnsFileAccessConfig request with name: %q on "+
"namespace: %q", vmIP, instance.Name, instance.Namespace)
return nil
}

// configureNetPermissionsForFileVolume helps to add or remove net permissions
// for a given file volume. The callers of this method can remove or add net
// permissions by setting the parameter removePermission to true or false
Expand Down
76 changes: 76 additions & 0 deletions pkg/syncer/metadatasyncer.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ import (
commoncotypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/common/commonco/types"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/logger"
csitypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/types"
cnsfilevolumeclientv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/cnsfilevolumeclient/v1alpha1"
triggercsifullsyncv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/triggercsifullsync/v1alpha1"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsvolumeinfo"
cnsvolumeinfov1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsvolumeinfo/v1alpha1"
Expand All @@ -74,6 +75,7 @@ import (
csinodetopologyv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/csinodetopology/v1alpha1"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/featurestates"
k8s "sigs.k8s.io/vsphere-csi-driver/v3/pkg/kubernetes"
cnsoperatortypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/cnsoperator/types"
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/storagepool"
)

Expand Down Expand Up @@ -290,6 +292,15 @@ func InitMetadataSyncer(ctx context.Context, clusterFlavor cnstypes.CnsClusterFl
}
}

// Check if finalizer is added on CnsFileVolumeClient CRs, if not then add a finalizer.
// We want to protect CnsFileVolumeClient from getting abruptly deleted, as it is being used
// in CnsFileAccessConfig CR. So, in case of upgrade we will add finalizer if it is missing.
err = addFinalizerOnCnsFileVolumeClientCRs(ctx)
if err != nil {
log.Errorf("Failed to add finalizer on CnsFileVolumeClient CRs. Error: %v", err)
return err
}

parseBool := func(CfgMap *v1.ConfigMap, featureName string, namespace string) bool {
var fssVal bool
if state, ok := CfgMap.Data[featureName]; ok {
Expand Down Expand Up @@ -986,6 +997,71 @@ func InitMetadataSyncer(ctx context.Context, clusterFlavor cnstypes.CnsClusterFl
return nil
}

// addFinalizerOnCnsFileVolumeClientCRs checks and adds finalizer on CnsFileVolumeClient CRs if it is missing.
func addFinalizerOnCnsFileVolumeClientCRs(ctx context.Context) error {
log := logger.GetLogger(ctx).WithOptions()
var stretchedSupervisor bool
if commonco.ContainerOrchestratorUtility.IsFSSEnabled(ctx, common.TKGsHA) {
clusterComputeResourceMoIds, err := common.GetClusterComputeResourceMoIds(ctx)
if err != nil {
log.Errorf("failed to get clusterComputeResourceMoIds. err: %v", err)
return err
}
if len(clusterComputeResourceMoIds) > 1 {
stretchedSupervisor = true
}
}
if !stretchedSupervisor || (stretchedSupervisor && IsWorkloadDomainIsolationSupported) {
// Prepare Config and NewClientForGroup for cnsOperatorClient
restConfig, err := config.GetConfig()
if err != nil {
log.Errorf("failed to get Kubernetes k8sconfig. Err: %+v", err)
return err
}
cnsOperatorClient, err := k8s.NewClientForGroup(ctx, restConfig, cnsoperatorv1alpha1.GroupName)
if err != nil {
log.Errorf("failed to create CnsOperator client. Err: %+v", err)
return err
}
// Get the list of all CnsFileVolumeClient CRs from all supervisor namespaces.
cnsFileVolumeClientList := &cnsfilevolumeclientv1alpha1.CnsFileVolumeClientList{}
err = cnsOperatorClient.List(ctx, cnsFileVolumeClientList)
if err != nil {
log.Errorf("failed to list CnsFileVolumeClient CRs from all supervisor namespaces. Error: %+v",
err)
return err
}

for _, cnsFileVolumeClient := range cnsFileVolumeClientList.Items {
// If cnsFileVolumeClient instance is marked for deletion, then no need to add finalizer
if cnsFileVolumeClient.DeletionTimestamp == nil {
cnsFinalizerExists := false
// Check if finalizer already exists.
for _, finalizer := range cnsFileVolumeClient.Finalizers {
if finalizer == cnsoperatortypes.CNSFinalizer {
cnsFinalizerExists = true
break
}
}
if !cnsFinalizerExists {
// Add finalizer.
cnsFileVolumeClient.Finalizers = append(cnsFileVolumeClient.Finalizers,
cnsoperatortypes.CNSFinalizer)
err = cnsOperatorClient.Update(ctx, &cnsFileVolumeClient)
if err != nil {
log.Errorf("failed to update CnsFileVolumeClient instance: %q on namespace: %q. Error: %+v",
cnsFileVolumeClient.Name, cnsFileVolumeClient.Namespace, err)
return err
}
log.Infof("successfully added finalizer on CnsFileVolumeClient instance: %q on namespace: %q",
cnsFileVolumeClient.Name, cnsFileVolumeClient.Namespace)
}
}
}
}
return nil
}

func initStorageQuotaPeriodicSync(ctx context.Context, metadataSyncer *metadataSyncInformer) error {
log := logger.GetLogger(ctx).WithOptions()
if int(PeriodicSyncIntervalInMin.Minutes()) == 0 {
Expand Down