Skip to content

Commit 7586cba

Browse files
committed
Complete deletion of CnsFileAccessConfig instance if associated nodeVM instance is not found
1 parent 4479e24 commit 7586cba

File tree

6 files changed

+260
-4
lines changed

6 files changed

+260
-4
lines changed

manifests/supervisorcluster/1.28/cns-csi.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ rules:
4747
verbs: ["create", "get", "list", "update", "watch"]
4848
- apiGroups: ["cns.vmware.com"]
4949
resources: ["cnsfilevolumeclients"]
50-
verbs: ["get", "update", "create", "delete"]
50+
verbs: ["get", "list", "update", "create", "delete"]
5151
- apiGroups: ["cns.vmware.com"]
5252
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
5353
verbs: ["get", "list", "watch", "update", "delete"]

manifests/supervisorcluster/1.29/cns-csi.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ rules:
4747
verbs: ["create", "get", "list", "update", "watch"]
4848
- apiGroups: ["cns.vmware.com"]
4949
resources: ["cnsfilevolumeclients"]
50-
verbs: ["get", "update", "create", "delete"]
50+
verbs: ["get", "list", "update", "create", "delete"]
5151
- apiGroups: ["cns.vmware.com"]
5252
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
5353
verbs: ["get", "list", "watch", "update", "delete"]

manifests/supervisorcluster/1.30/cns-csi.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ rules:
4747
verbs: ["create", "get", "list", "update", "watch"]
4848
- apiGroups: ["cns.vmware.com"]
4949
resources: ["cnsfilevolumeclients"]
50-
verbs: ["get", "update", "create", "delete"]
50+
verbs: ["get", "list", "update", "create", "delete"]
5151
- apiGroups: ["cns.vmware.com"]
5252
resources: ["cnsregistervolumes", "cnsunregistervolumes"]
5353
verbs: ["get", "list", "watch", "update", "delete"]

pkg/internalapis/cnsoperator/cnsfilevolumeclient/cnsfilevolumeclient.go

+95-1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ import (
3131
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis"
3232
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/cnsfilevolumeclient/v1alpha1"
3333
k8s "sigs.k8s.io/vsphere-csi-driver/v3/pkg/kubernetes"
34+
cnsoperatortypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/cnsoperator/types"
3435
)
3536

3637
// FileVolumeClient exposes an interface to support
@@ -49,6 +50,9 @@ type FileVolumeClient interface {
4950
// clientVMIP for a given file volume. fileVolumeName is used
5051
// to uniquely identify CnsFileVolumeClient instances.
5152
RemoveClientVMFromIPList(ctx context.Context, fileVolumeName, clientVMName, clientVMIP string) error
53+
// GetVMIPFromVMName returns the VMIP associated with a
54+
// given client VM name.
55+
GetVMIPFromVMName(ctx context.Context, fileVolumeName string, clientVMName string) (string, int, error)
5256
}
5357

5458
// fileVolumeClient maintains a client to the API
@@ -186,6 +190,8 @@ func (f *fileVolumeClient) AddClientVMToIPList(ctx context.Context,
186190
ObjectMeta: v1.ObjectMeta{
187191
Name: instanceName,
188192
Namespace: instanceNamespace,
193+
// Add finalizer so that CnsFileVolumeClient instance doesn't get deleted abruptly
194+
Finalizers: []string{cnsoperatortypes.CNSFinalizer},
189195
},
190196
Spec: v1alpha1.CnsFileVolumeClientSpec{
191197
ExternalIPtoClientVms: map[string][]string{
@@ -282,9 +288,24 @@ func (f *fileVolumeClient) RemoveClientVMFromIPList(ctx context.Context,
282288
delete(instance.Spec.ExternalIPtoClientVms, clientVMIP)
283289
}
284290
if len(instance.Spec.ExternalIPtoClientVms) == 0 {
285-
log.Debugf("Deleting cnsfilevolumeclient instance %s from API server", fileVolumeName)
291+
log.Infof("Deleting cnsfilevolumeclient instance %s from API server", fileVolumeName)
292+
// Remove finalizer from CnsFileVolumeClient instance
293+
err = removeFinalizer(ctx, f.client, instance)
294+
if err != nil {
295+
log.Errorf("failed to remove finalizer from cnsfilevolumeclient instance %s with error: %+v",
296+
fileVolumeName, err)
297+
}
286298
err = f.client.Delete(ctx, instance)
287299
if err != nil {
300+
// In case of namespace deletion, we will have deletion timestamp added on the
301+
// CnsFileVolumeClient instance. So, as soon as we delete finalizer, instance might
302+
// get deleted immediately. In such cases we will get NotFound error here, return success
303+
// if instance is already deleted.
304+
if errors.IsNotFound(err) {
305+
log.Infof("cnsfilevolumeclient instance %s seems to be already deleted.", fileVolumeName)
306+
f.volumeLock.Delete(fileVolumeName)
307+
return nil
308+
}
288309
log.Errorf("failed to delete cnsfilevolumeclient instance %s with error: %+v", fileVolumeName, err)
289310
return err
290311
}
@@ -302,3 +323,76 @@ func (f *fileVolumeClient) RemoveClientVMFromIPList(ctx context.Context,
302323
log.Debugf("Could not find VM %s in list. Returning.", clientVMName)
303324
return nil
304325
}
326+
327+
// GetVMIPFromVMName returns the VM IP associated with a
328+
// given client VM name.
329+
// Callers need to specify fileVolumeName as a combination of
330+
// "<SV-namespace>/<SV-PVC-name>". This combination is used to uniquely
331+
// identify CnsFileVolumeClient instances.
332+
// Returns an empty string if the instance doesn't exist OR if the
333+
// input VM name is not present in this instance.
334+
// Returns an error if any operations fails.
335+
func (f *fileVolumeClient) GetVMIPFromVMName(ctx context.Context,
336+
fileVolumeName string, clientVMName string) (string, int, error) {
337+
log := logger.GetLogger(ctx)
338+
339+
log.Infof("Fetching VM IP from cnsfilevolumeclient %s for VM name %s", fileVolumeName, clientVMName)
340+
actual, _ := f.volumeLock.LoadOrStore(fileVolumeName, &sync.Mutex{})
341+
instanceLock, ok := actual.(*sync.Mutex)
342+
if !ok {
343+
return "", 0, fmt.Errorf("failed to cast lock for cnsfilevolumeclient instance: %s", fileVolumeName)
344+
}
345+
instanceLock.Lock()
346+
defer instanceLock.Unlock()
347+
348+
instance := &v1alpha1.CnsFileVolumeClient{}
349+
instanceNamespace, instanceName, err := cache.SplitMetaNamespaceKey(fileVolumeName)
350+
if err != nil {
351+
log.Errorf("failed to split key %s with error: %+v", fileVolumeName, err)
352+
return "", 0, err
353+
}
354+
instanceKey := types.NamespacedName{
355+
Namespace: instanceNamespace,
356+
Name: instanceName,
357+
}
358+
err = f.client.Get(ctx, instanceKey, instance)
359+
if err != nil {
360+
log.Errorf("failed to get cnsfilevolumeclient instance %s with error: %+v", fileVolumeName, err)
361+
return "", 0, err
362+
}
363+
364+
// Verify if input VM name exists in Spec.ExternalIPtoClientVms
365+
log.Debugf("Verifying if ExternalIPtoClientVms list has VM name: %s", clientVMName)
366+
for vmIP, vmNames := range instance.Spec.ExternalIPtoClientVms {
367+
for _, vmName := range vmNames {
368+
if vmName == clientVMName {
369+
return vmIP, len(vmNames), nil
370+
}
371+
}
372+
}
373+
return "", 0, err
374+
}
375+
376+
// removeFinalizer will remove the CNS Finalizer = cns.vmware.com,
377+
// from a given CnsFileVolumeClient instance.
378+
func removeFinalizer(ctx context.Context, client client.Client,
379+
instance *v1alpha1.CnsFileVolumeClient) error {
380+
log := logger.GetLogger(ctx)
381+
for i, finalizer := range instance.Finalizers {
382+
if finalizer == cnsoperatortypes.CNSFinalizer {
383+
log.Debugf("Removing %q finalizer from CnsFileVolumeClient instance with name: %q on namespace: %q",
384+
cnsoperatortypes.CNSFinalizer, instance.Name, instance.Namespace)
385+
instance.Finalizers = append(instance.Finalizers[:i], instance.Finalizers[i+1:]...)
386+
// Update the instance after removing finalizer
387+
err := client.Update(ctx, instance)
388+
if err != nil {
389+
log.Errorf("failed to update CnsFileVolumeClient instance with name: %q on namespace: %q",
390+
instance.Name, instance.Namespace)
391+
return err
392+
}
393+
break
394+
}
395+
}
396+
397+
return nil
398+
}

pkg/syncer/cnsoperator/controller/cnsfileaccessconfig/cnsfileaccessconfig_controller.go

+86
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,54 @@ func (r *ReconcileCnsFileAccessConfig) Reconcile(ctx context.Context,
246246
msg := fmt.Sprintf("Failed to get virtualmachine instance for the VM with name: %q. Error: %+v",
247247
instance.Spec.VMName, err)
248248
log.Error(msg)
249+
// If virtualmachine instance is NotFound and if deletion timestamp is set on CnsFileAccessConfig instance,
250+
// then proceed with the deletion of CnsFileAccessConfig instance.
251+
if apierrors.IsNotFound(err) && instance.DeletionTimestamp != nil {
252+
log.Infof("CnsFileAccessConfig instance %q has deletion timestamp set, but VM instance with "+
253+
"name %q is not found. Processing the deletion of CnsFileAccessConfig instance.",
254+
instance.Name, instance.Spec.VMName)
255+
// Fetch the PVC and PV instance and get volume ID
256+
skipConfigureVolumeACL := false
257+
volumeID, err := cnsoperatorutil.GetVolumeID(ctx, r.client, instance.Spec.PvcName, instance.Namespace)
258+
if err != nil {
259+
if apierrors.IsNotFound(err) {
260+
// If PVC instance is NotFound (deleted), then there is no need to configure ACL on file volume.
261+
// TODO: When we support PV with retain policy on supervisor, then we need to configure ACL
262+
// in cases where PVC is deleted but PV is not deleted.
263+
skipConfigureVolumeACL = true
264+
} else {
265+
msg := fmt.Sprintf("Failed to get volumeID from pvcName: %q. Error: %+v", instance.Spec.PvcName, err)
266+
log.Error(msg)
267+
setInstanceError(ctx, r, instance, msg)
268+
return reconcile.Result{RequeueAfter: timeout}, nil
269+
}
270+
}
271+
if !skipConfigureVolumeACL {
272+
err = r.removePermissionsForFileVolume(ctx, volumeID, instance)
273+
if err != nil {
274+
msg := fmt.Sprintf("Failed to remove file volume permissions with error: %+v", err)
275+
log.Error(msg)
276+
setInstanceError(ctx, r, instance, msg)
277+
return reconcile.Result{RequeueAfter: timeout}, nil
278+
}
279+
}
280+
281+
// Remove finalizer from CnsFileAccessConfig CRD
282+
removeFinalizerFromCRDInstance(ctx, instance)
283+
err = updateCnsFileAccessConfig(ctx, r.client, instance)
284+
if err != nil {
285+
msg := fmt.Sprintf("failed to update CnsFileAccessConfig instance: %q on namespace: %q. Error: %+v",
286+
instance.Name, instance.Namespace, err)
287+
recordEvent(ctx, r, instance, v1.EventTypeWarning, msg)
288+
return reconcile.Result{RequeueAfter: timeout}, nil
289+
}
290+
// Cleanup instance entry from backOffDuration map.
291+
backOffDurationMapMutex.Lock()
292+
delete(backOffDuration, instance.Name)
293+
backOffDurationMapMutex.Unlock()
294+
return reconcile.Result{}, nil
295+
}
296+
249297
setInstanceError(ctx, r, instance, msg)
250298
return reconcile.Result{RequeueAfter: timeout}, nil
251299
}
@@ -412,6 +460,44 @@ func (r *ReconcileCnsFileAccessConfig) Reconcile(ctx context.Context,
412460
return reconcile.Result{}, nil
413461
}
414462

463+
// removePermissionsForFileVolume helps to remove net permissions for a given file volume.
464+
// This method is used when we don't have VM instance. It fetches the VM IP from CNSFileVolumeClient
465+
// instance for the VM name associated with CnsFileAccessConfig.
466+
func (r *ReconcileCnsFileAccessConfig) removePermissionsForFileVolume(ctx context.Context,
467+
volumeID string, instance *cnsfileaccessconfigv1alpha1.CnsFileAccessConfig) error {
468+
log := logger.GetLogger(ctx)
469+
cnsFileVolumeClientInstance, err := cnsfilevolumeclient.GetFileVolumeClientInstance(ctx)
470+
if err != nil {
471+
return logger.LogNewErrorf(log, "Failed to get CNSFileVolumeClient instance. Error: %+v", err)
472+
}
473+
474+
vmIP, vmsAssociatedWithIP, err := cnsFileVolumeClientInstance.GetVMIPFromVMName(ctx,
475+
instance.Namespace+"/"+instance.Spec.PvcName, instance.Spec.VMName)
476+
if err != nil {
477+
return logger.LogNewErrorf(log, "Failed to get VM IP from VM name in CNSFileVolumeClient instance. "+
478+
"Error: %+v", err)
479+
}
480+
if vmsAssociatedWithIP == 1 {
481+
// In case of VDS setup, we will always have 1 VM associated with IP. But, in case of
482+
// SNAT setup, we may have multiple VMs associated with same IP. We will remove ACL
483+
// permission only when it is the last VM associated with IP.
484+
err = r.configureVolumeACLs(ctx, volumeID, vmIP, true)
485+
if err != nil {
486+
return logger.LogNewErrorf(log, "Failed to remove net permissions for file volume %q. Error: %+v",
487+
volumeID, err)
488+
}
489+
}
490+
err = cnsFileVolumeClientInstance.RemoveClientVMFromIPList(ctx,
491+
instance.Namespace+"/"+instance.Spec.PvcName, instance.Spec.VMName, vmIP)
492+
if err != nil {
493+
return logger.LogNewErrorf(log, "Failed to remove VM %q with IP %q from IPList. Error: %+v",
494+
instance.Spec.VMName, vmIP, err)
495+
}
496+
log.Infof("Successfully removed VM IP %q from IPList for CnsFileAccessConfig request with name: %q on "+
497+
"namespace: %q", vmIP, instance.Name, instance.Namespace)
498+
return nil
499+
}
500+
415501
// configureNetPermissionsForFileVolume helps to add or remove net permissions
416502
// for a given file volume. The callers of this method can remove or add net
417503
// permissions by setting the parameter removePermission to true or false

pkg/syncer/metadatasyncer.go

+76
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ import (
6565
commoncotypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/common/commonco/types"
6666
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/service/logger"
6767
csitypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/csi/types"
68+
cnsfilevolumeclientv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/cnsfilevolumeclient/v1alpha1"
6869
triggercsifullsyncv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsoperator/triggercsifullsync/v1alpha1"
6970
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsvolumeinfo"
7071
cnsvolumeinfov1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/cnsvolumeinfo/v1alpha1"
@@ -74,6 +75,7 @@ import (
7475
csinodetopologyv1alpha1 "sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/csinodetopology/v1alpha1"
7576
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/internalapis/featurestates"
7677
k8s "sigs.k8s.io/vsphere-csi-driver/v3/pkg/kubernetes"
78+
cnsoperatortypes "sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/cnsoperator/types"
7779
"sigs.k8s.io/vsphere-csi-driver/v3/pkg/syncer/storagepool"
7880
)
7981

@@ -290,6 +292,15 @@ func InitMetadataSyncer(ctx context.Context, clusterFlavor cnstypes.CnsClusterFl
290292
}
291293
}
292294

295+
// Check if finalizer is added on CnsFileVolumeClient CRs, if not then add a finalizer.
296+
// We want to protect CnsFileVolumeClient from getting abruptly deleted, as it is being used
297+
// in CnsFileAccessConfig CR. So, in case of upgrade we will add finalizer if it is missing.
298+
err = addFinalizerOnCnsFileVolumeClientCRs(ctx)
299+
if err != nil {
300+
log.Errorf("Failed to add finalizer on CnsFileVolumeClient CRs. Error: %v", err)
301+
return err
302+
}
303+
293304
parseBool := func(CfgMap *v1.ConfigMap, featureName string, namespace string) bool {
294305
var fssVal bool
295306
if state, ok := CfgMap.Data[featureName]; ok {
@@ -986,6 +997,71 @@ func InitMetadataSyncer(ctx context.Context, clusterFlavor cnstypes.CnsClusterFl
986997
return nil
987998
}
988999

1000+
// addFinalizerOnCnsFileVolumeClientCRs checks and adds finalizer on CnsFileVolumeClient CRs if it is missing.
1001+
func addFinalizerOnCnsFileVolumeClientCRs(ctx context.Context) error {
1002+
log := logger.GetLogger(ctx).WithOptions()
1003+
var stretchedSupervisor bool
1004+
if commonco.ContainerOrchestratorUtility.IsFSSEnabled(ctx, common.TKGsHA) {
1005+
clusterComputeResourceMoIds, err := common.GetClusterComputeResourceMoIds(ctx)
1006+
if err != nil {
1007+
log.Errorf("failed to get clusterComputeResourceMoIds. err: %v", err)
1008+
return err
1009+
}
1010+
if len(clusterComputeResourceMoIds) > 1 {
1011+
stretchedSupervisor = true
1012+
}
1013+
}
1014+
if !stretchedSupervisor || (stretchedSupervisor && IsWorkloadDomainIsolationSupported) {
1015+
// Prepare Config and NewClientForGroup for cnsOperatorClient
1016+
restConfig, err := config.GetConfig()
1017+
if err != nil {
1018+
log.Errorf("failed to get Kubernetes k8sconfig. Err: %+v", err)
1019+
return err
1020+
}
1021+
cnsOperatorClient, err := k8s.NewClientForGroup(ctx, restConfig, cnsoperatorv1alpha1.GroupName)
1022+
if err != nil {
1023+
log.Errorf("failed to create CnsOperator client. Err: %+v", err)
1024+
return err
1025+
}
1026+
// Get the list of all CnsFileVolumeClient CRs from all supervisor namespaces.
1027+
cnsFileVolumeClientList := &cnsfilevolumeclientv1alpha1.CnsFileVolumeClientList{}
1028+
err = cnsOperatorClient.List(ctx, cnsFileVolumeClientList)
1029+
if err != nil {
1030+
log.Errorf("failed to list CnsFileVolumeClient CRs from all supervisor namespaces. Error: %+v",
1031+
err)
1032+
return err
1033+
}
1034+
1035+
for _, cnsFileVolumeClient := range cnsFileVolumeClientList.Items {
1036+
// If cnsFileVolumeClient instance is marked for deletion, then no need to add finalizer
1037+
if cnsFileVolumeClient.DeletionTimestamp == nil {
1038+
cnsFinalizerExists := false
1039+
// Check if finalizer already exists.
1040+
for _, finalizer := range cnsFileVolumeClient.Finalizers {
1041+
if finalizer == cnsoperatortypes.CNSFinalizer {
1042+
cnsFinalizerExists = true
1043+
break
1044+
}
1045+
}
1046+
if !cnsFinalizerExists {
1047+
// Add finalizer.
1048+
cnsFileVolumeClient.Finalizers = append(cnsFileVolumeClient.Finalizers,
1049+
cnsoperatortypes.CNSFinalizer)
1050+
err = cnsOperatorClient.Update(ctx, &cnsFileVolumeClient)
1051+
if err != nil {
1052+
log.Errorf("failed to update CnsFileVolumeClient instance: %q on namespace: %q. Error: %+v",
1053+
cnsFileVolumeClient.Name, cnsFileVolumeClient.Namespace, err)
1054+
return err
1055+
}
1056+
log.Infof("successfully added finalizer on CnsFileVolumeClient instance: %q on namespace: %q",
1057+
cnsFileVolumeClient.Name, cnsFileVolumeClient.Namespace)
1058+
}
1059+
}
1060+
}
1061+
}
1062+
return nil
1063+
}
1064+
9891065
func initStorageQuotaPeriodicSync(ctx context.Context, metadataSyncer *metadataSyncInformer) error {
9901066
log := logger.GetLogger(ctx).WithOptions()
9911067
if int(PeriodicSyncIntervalInMin.Minutes()) == 0 {

0 commit comments

Comments
 (0)