diff --git a/pkg/constants/constants.go b/pkg/constants/constants.go index 481e55499..ef375b724 100644 --- a/pkg/constants/constants.go +++ b/pkg/constants/constants.go @@ -201,6 +201,9 @@ const ( // deleted or having its disks promoted. VMProvKeepDisksExtraConfigKey = "vmprov.keepDisks" + // ExtraConfigVMDirNamespacePath is the path for DatastoreNamespaceManager.DeleteDirectory (non-TLD). + ExtraConfigVMDirNamespacePath = "vmoperator.vmware.com.vmDirNamespacePath" + // CSIVSphereVolumeSyncAnnotationKey is the annotation key for the CSIVSphereVolumeSync // annotation. CSIVSphereVolumeSyncAnnotationKey = "csi.vsphere.volume.sync" diff --git a/pkg/providers/vsphere/virtualmachine/delete.go b/pkg/providers/vsphere/virtualmachine/delete.go index 59758370d..261665a15 100644 --- a/pkg/providers/vsphere/virtualmachine/delete.go +++ b/pkg/providers/vsphere/virtualmachine/delete.go @@ -1,17 +1,26 @@ // © Broadcom. All Rights Reserved. -// The term “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. +// The term "Broadcom" refers to Broadcom Inc. and/or its subsidiaries. // SPDX-License-Identifier: Apache-2.0 package virtualmachine import ( + "context" "fmt" + "path" "github.com/go-logr/logr" + "github.com/vmware/govmomi/cns" + cnstypes "github.com/vmware/govmomi/cns/types" + "github.com/vmware/govmomi/fault" "github.com/vmware/govmomi/object" + "github.com/vmware/govmomi/vim25" vimtypes "github.com/vmware/govmomi/vim25/types" + "github.com/vmware/govmomi/vslm" + pkgconst "github.com/vmware-tanzu/vm-operator/pkg/constants" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" + pkglog "github.com/vmware-tanzu/vm-operator/pkg/log" vmutil "github.com/vmware-tanzu/vm-operator/pkg/util/vsphere/vm" ) @@ -20,12 +29,52 @@ import ( var VMDeletePropertiesSelector = []string{ "recentTask", "config.extraConfig", + "config.files", + "config.hardware.device", "summary.runtime.connectionState", } +// CleanupVMDir removes the VM directory on the datastore and, when non-TLD, +// the namespace mapping. +func CleanupVMDir( + ctx context.Context, + vimClient *vim25.Client, + dc *object.Datacenter, + vmDirPath, namespacePath string) error { + + if vimClient == nil { + return nil + } + + ctx = context.WithoutCancel(ctx) + fm := object.NewFileManager(vimClient) + + if vmDirPath != "" { + task, err := fm.DeleteDatastoreFile(ctx, vmDirPath, dc) + if err != nil { + return err + } + if err := task.Wait(ctx); err != nil && + !fault.Is(err, &vimtypes.FileNotFound{}) { + return err + } + } + + if dc != nil && namespacePath != "" { + nm := object.NewDatastoreNamespaceManager(vimClient) + if err := nm.DeleteDirectory(ctx, dc, namespacePath); err != nil && + !fault.Is(err, &vimtypes.FileNotFound{}) { + return err + } + } + + return nil +} + func DeleteVirtualMachine( vmCtx pkgctx.VirtualMachineContext, - vcVM *object.VirtualMachine) error { + vcVM *object.VirtualMachine, + dc *object.Datacenter) error { if _, err := vmutil.SetAndWaitOnPowerState( logr.NewContext(vmCtx, vmCtx.Logger), @@ -50,5 +99,190 @@ func DeleteVirtualMachine( return fmt.Errorf("destroy VM task failed: %w", err) } + // After Destroy(), delete any FCD-backed disks left in the VM's directory + // (left behind because keepAfterDeleteVm=true) via the CNS DeleteVolume + // API. This MUST run after Destroy() so the disk is no longer attached + // and CNS permits the delete. Using the CNS API (not raw VSLM) keeps + // CNS's internal state consistent so the subsequent PVC cleanup path + // handles any duplicate delete gracefully with no VC UI error. + // See deleteFCDsInVMDir for full context. + deleteFCDsInVMDir( + vmCtx, + vcVM.Client(), + vmCtx.MoVM.Config) + + // Best-effort cleanup of VM dir after Destroy(); + // no error return since delete is still considered successful. + if dc == nil || + vmCtx.MoVM.Config == nil || + vmCtx.MoVM.Config.Files.VmPathName == "" { + return nil + } + + namespacePath, _ := object.OptionValueList( + vmCtx.MoVM.Config.ExtraConfig).GetString(pkgconst.ExtraConfigVMDirNamespacePath) + + _ = CleanupVMDir(vmCtx.Context, + vcVM.Client(), + dc, + path.Dir(vmCtx.MoVM.Config.Files.VmPathName), + namespacePath) + return nil +} + +// deleteFCDsInVMDir finds all FCD-backed disks in the VM's own directory and +// deletes them via the CNS DeleteVolume API (with deleteDisk=true) before the +// VM is destroyed. +// +// The unmanaged volumes registration process (unmanagedvolumes_register) +// registers a VM's ephemeral disks as CNS FCDs via CnsRegisterVolume, which +// sets keepAfterDeleteVm=true. During VM deletion, vcVM.Destroy() skips such +// disks because of this flag. CleanupVMDir() then removes the directory via +// the raw FileManager API without updating the FCD catalog, leaving a stale +// catalog entry pointing to a now-deleted file. +// +// The CNS DeleteVolume API (rather than the lower-level VSLM +// DeleteVStorageObject) is used because CNS tracks the deletion in its own +// internal state. When CNS's PVC cleanup path subsequently also tries to +// delete the same volume, CNS recognises it is already gone and handles the +// duplicate call gracefully — avoiding "not found" errors visible in the +// vCenter task UI. +// +// Only disks whose backing file is in the VM's own directory are affected; +// user-attached PVC disks in separate directories are not touched. +// +// Note: after CSI registers a VMDK as an FCD, the VM hardware config's +// backingObjectId field is not updated. The VSLM catalog is queried directly +// to find FCDs whose backing file path matches the VM's disk paths. +func deleteFCDsInVMDir( + ctx context.Context, + vimClient *vim25.Client, + config *vimtypes.VirtualMachineConfigInfo) { + + if config == nil { + return + } + if vimClient.ServiceContent.VStorageObjectManager == nil { + return + } + + vmDir := path.Dir(config.Files.VmPathName) + logger := pkglog.FromContextOrDefault(ctx) + + logger.Info( + "Scanning VM disks for FCD cleanup", + "vmDir", vmDir, + "numDevices", len(config.Hardware.Device)) + + // Collect the file paths of all disks in the VM's own directory, and + // capture one datastore MoRef to use for VSLM queries. + diskFilesInVMDir := map[string]struct{}{} + var datastoreMoRef *vimtypes.ManagedObjectReference + for _, dev := range config.Hardware.Device { + disk, ok := dev.(*vimtypes.VirtualDisk) + if !ok { + continue + } + bi := getVirtualDiskFileBackingInfo(disk) + if bi == nil || bi.FileName == "" || bi.Datastore == nil { + continue + } + logger.Info( + "Found disk in VM", + "fileName", bi.FileName, + "inVMDir", path.Dir(bi.FileName) == vmDir) + if path.Dir(bi.FileName) != vmDir { + continue + } + diskFilesInVMDir[bi.FileName] = struct{}{} + datastoreMoRef = bi.Datastore + } + + if len(diskFilesInVMDir) == 0 || datastoreMoRef == nil { + logger.Info( + "No disks found in VM directory, skipping FCD cleanup", + "vmDir", vmDir) + return + } + + // List all FCDs on the datastore and find those whose backing file + // matches a disk in the VM's directory. + mgr := vslm.NewObjectManager(vimClient) + ds := object.NewDatastore(vimClient, *datastoreMoRef) + + ids, err := mgr.List(ctx, ds) + if err != nil { + logger.Error(err, + "Failed to list FCDs on datastore for cleanup") + return + } + + // Build CNS client once; used to delete matched FCDs via CNS DeleteVolume + // so that CNS internal state is updated and its PVC cleanup path handles + // the subsequent duplicate delete gracefully (no VC UI errors). + cnsClient, err := cns.NewClient(ctx, vimClient) + if err != nil { + logger.Error(err, "Failed to create CNS client for FCD cleanup") + return + } + + for _, id := range ids { + obj, err := mgr.Retrieve(ctx, ds, id.Id) + if err != nil { + logger.Info( + "Failed to retrieve FCD, skipping", + "fcdID", id.Id, + "err", err) + continue + } + + // Extract the backing file path. + fb, ok := obj.Config.Backing.( + vimtypes.BaseBaseConfigInfoFileBackingInfo) + if !ok { + continue + } + filePath := fb.GetBaseConfigInfoFileBackingInfo().FilePath + + if _, found := diskFilesInVMDir[filePath]; !found { + continue + } + + logger.Info( + "Deleting FCD disk in VM directory via CNS", + "fcdID", id.Id, + "diskFile", filePath) + + task, err := cnsClient.DeleteVolume( + ctx, + []cnstypes.CnsVolumeId{{Id: id.Id}}, + true) + if err != nil { + logger.Error(err, + "Failed to initiate CNS volume delete", + "fcdID", id.Id, + "diskFile", filePath) + continue + } + if _, err := task.WaitForResult(ctx); err != nil { + logger.Error(err, + "CNS volume delete task failed", + "fcdID", id.Id, + "diskFile", filePath) + } + } +} + +// getVirtualDiskFileBackingInfo returns the VirtualDeviceFileBackingInfo for +// a virtual disk if the disk uses file-based backing, otherwise nil. +func getVirtualDiskFileBackingInfo( + disk *vimtypes.VirtualDisk) *vimtypes.VirtualDeviceFileBackingInfo { + + type hasFileBackingInfo interface { + GetVirtualDeviceFileBackingInfo() *vimtypes.VirtualDeviceFileBackingInfo + } + if fb, ok := disk.Backing.(hasFileBackingInfo); ok { + return fb.GetVirtualDeviceFileBackingInfo() + } return nil } diff --git a/pkg/providers/vsphere/virtualmachine/delete_test.go b/pkg/providers/vsphere/virtualmachine/delete_test.go index 782ade5e7..54e353ce5 100644 --- a/pkg/providers/vsphere/virtualmachine/delete_test.go +++ b/pkg/providers/vsphere/virtualmachine/delete_test.go @@ -57,7 +57,7 @@ func deleteTests() { Expect(err).ToNot(HaveOccurred()) Expect(t.Wait(ctx)).To(Succeed()) - err = virtualmachine.DeleteVirtualMachine(vmCtx, vcVM) + err = virtualmachine.DeleteVirtualMachine(vmCtx, vcVM, nil) Expect(err).ToNot(HaveOccurred()) Expect(ctx.GetVMFromMoID(moID)).To(BeNil()) @@ -71,7 +71,7 @@ func deleteTests() { Expect(err).ToNot(HaveOccurred()) Expect(state).To(Equal(vimtypes.VirtualMachinePowerStatePoweredOn)) - err = virtualmachine.DeleteVirtualMachine(vmCtx, vcVM) + err = virtualmachine.DeleteVirtualMachine(vmCtx, vcVM, nil) Expect(err).ToNot(HaveOccurred()) Expect(ctx.GetVMFromMoID(moID)).To(BeNil()) diff --git a/pkg/providers/vsphere/vmlifecycle/create_fastdeploy.go b/pkg/providers/vsphere/vmlifecycle/create_fastdeploy.go index 9a500c1e6..7cc4acb3b 100644 --- a/pkg/providers/vsphere/vmlifecycle/create_fastdeploy.go +++ b/pkg/providers/vsphere/vmlifecycle/create_fastdeploy.go @@ -14,7 +14,6 @@ import ( "time" "github.com/go-logr/logr" - "github.com/vmware/govmomi/fault" "github.com/vmware/govmomi/object" "github.com/vmware/govmomi/vim25" vimtypes "github.com/vmware/govmomi/vim25/types" @@ -23,6 +22,7 @@ import ( pkgconst "github.com/vmware-tanzu/vm-operator/pkg/constants" pkgctx "github.com/vmware-tanzu/vm-operator/pkg/context" pkglog "github.com/vmware-tanzu/vm-operator/pkg/log" + "github.com/vmware-tanzu/vm-operator/pkg/providers/vsphere/virtualmachine" pkgutil "github.com/vmware-tanzu/vm-operator/pkg/util" "github.com/vmware-tanzu/vm-operator/pkg/util/ptr" ) @@ -98,6 +98,13 @@ func fastDeploy( return nil, fmt.Errorf("failed to create vm dir: %w", err) } + // Store for DeleteDirectory at VM delete time. + createArgs.ConfigSpec.ExtraConfig = append( + createArgs.ConfigSpec.ExtraConfig, + &vimtypes.OptionValue{ + Key: pkgconst.ExtraConfigVMDirNamespacePath, + Value: vdp}) + // The vmDirUUIDPath value will look something like the following: // // /vmfs/volumes/vsan:a5982d36bd6d11f0-9fd50050569d3d0d/dfc05687-5880-431c-9924-a3a34837443e @@ -145,45 +152,14 @@ func fastDeploy( return } - // Use a context that is not cancelled when the parent is, so cleanup - // runs even if the request is cancelled. Preserves the VC opID so - // cleanup is correlated with the failed create in VC logs. - ctx := context.WithoutCancel(vmCtx.Context) - - // Delete the VM directory and its contents. - // Always use FileManager.DeleteDatastoreFile() first as it can delete - // non-empty directories recursively. Note that DeleteDirectory() on a - // non-empty directory will return an error, which is why we delete the - // directory contents first using DeleteDatastoreFile(). - t, err := fm.DeleteDatastoreFile(ctx, vmDirPath, datacenter) - if err != nil { - retErr = fmt.Errorf( - "failed to call delete api for vm dir %q: %w,%w", - vmDirPath, err, retErr) - return - } - - // Wait for the delete call to return. - if err := t.Wait(ctx); err != nil && - !fault.Is(err, &vimtypes.FileNotFound{}) { - - retErr = fmt.Errorf( - "failed to delete vm dir %q: %w,%w", - vmDirPath, err, retErr) - } - - // For non-TLD datastores, also clean up the namespace mapping. - if !createArgs.Datastores[0].TopLevelDirectoryCreateSupported { - if err := nm.DeleteDirectory( - ctx, - datacenter, - vmDirUUIDPath); err != nil && - !fault.Is(err, &vimtypes.FileNotFound{}) { - - retErr = fmt.Errorf( - "failed to delete vm dir namespace mapping %q: %w,%w", - vmDirUUIDPath, err, retErr) - } + // vmDirUUIDPath is set only for non-TLD; empty for TLD. + if err := virtualmachine.CleanupVMDir( + vmCtx.Context, + vimClient, + datacenter, + vmDirPath, + vmDirUUIDPath); err != nil { + retErr = fmt.Errorf("failed to cleanup vm dir: %w,%w", err, retErr) } }() diff --git a/pkg/providers/vsphere/vmprovider_vm.go b/pkg/providers/vsphere/vmprovider_vm.go index a94b32b2b..e43ed4a4f 100644 --- a/pkg/providers/vsphere/vmprovider_vm.go +++ b/pkg/providers/vsphere/vmprovider_vm.go @@ -455,7 +455,7 @@ func (vs *vSphereVMProvider) DeleteVirtualMachine( } } - return virtualmachine.DeleteVirtualMachine(vmCtx, vcVM) + return virtualmachine.DeleteVirtualMachine(vmCtx, vcVM, client.Datacenter()) } func (vs *vSphereVMProvider) PublishVirtualMachine(