@@ -98,10 +98,23 @@ var (
9898 vmMoidToHostMoid , volumeIDToVMMap map [string ]string
9999)
100100
101+ // volumeLock represents a lock for a specific volume with reference counting
102+ type volumeLock struct {
103+ mutex sync.Mutex
104+ refCount int
105+ }
106+
107+ // snapshotLockManager manages per-volume locks for snapshot operations
108+ type snapshotLockManager struct {
109+ locks map [string ]* volumeLock
110+ mapMutex sync.RWMutex
111+ }
112+
101113type controller struct {
102- manager * common.Manager
103- authMgr common.AuthorizationService
104- topologyMgr commoncotypes.ControllerTopologyService
114+ manager * common.Manager
115+ authMgr common.AuthorizationService
116+ topologyMgr commoncotypes.ControllerTopologyService
117+ snapshotLockMgr * snapshotLockManager
105118 csi.UnimplementedControllerServer
106119}
107120
@@ -211,6 +224,12 @@ func (c *controller) Init(config *cnsconfig.Config, version string) error {
211224 CryptoClient : cryptoClient ,
212225 }
213226
227+ // Initialize snapshot lock manager
228+ c .snapshotLockMgr = & snapshotLockManager {
229+ locks : make (map [string ]* volumeLock ),
230+ }
231+ log .Info ("Initialized snapshot lock manager for per-volume serialization" )
232+
214233 vc , err := common .GetVCenter (ctx , c .manager )
215234 if err != nil {
216235 log .Errorf ("failed to get vcenter. err=%v" , err )
@@ -447,6 +466,53 @@ func (c *controller) ReloadConfiguration(reconnectToVCFromNewConfig bool) error
447466 return nil
448467}
449468
469+ // acquireSnapshotLock acquires a lock for the given volume ID.
470+ // It creates a new lock if one doesn't exist and increments the reference count.
471+ // The caller must call releaseSnapshotLock when done.
472+ func (c * controller ) acquireSnapshotLock (ctx context.Context , volumeID string ) {
473+ log := logger .GetLogger (ctx )
474+ c .snapshotLockMgr .mapMutex .Lock ()
475+ defer c .snapshotLockMgr .mapMutex .Unlock ()
476+
477+ vLock , exists := c .snapshotLockMgr .locks [volumeID ]
478+ if ! exists {
479+ vLock = & volumeLock {}
480+ c .snapshotLockMgr .locks [volumeID ] = vLock
481+ log .Debugf ("Created new lock for volume %q" , volumeID )
482+ }
483+ vLock .refCount ++
484+ log .Debugf ("Acquired lock for volume %q, refCount: %d" , volumeID , vLock .refCount )
485+
486+ // Unlock the map before acquiring the volume lock to avoid deadlock
487+ c .snapshotLockMgr .mapMutex .Unlock ()
488+ vLock .mutex .Lock ()
489+ c .snapshotLockMgr .mapMutex .Lock ()
490+ }
491+
492+ // releaseSnapshotLock releases the lock for the given volume ID.
493+ // It decrements the reference count and removes the lock if count reaches zero.
494+ func (c * controller ) releaseSnapshotLock (ctx context.Context , volumeID string ) {
495+ log := logger .GetLogger (ctx )
496+ c .snapshotLockMgr .mapMutex .Lock ()
497+ defer c .snapshotLockMgr .mapMutex .Unlock ()
498+
499+ vLock , exists := c .snapshotLockMgr .locks [volumeID ]
500+ if ! exists {
501+ log .Warnf ("Attempted to release non-existent lock for volume %q" , volumeID )
502+ return
503+ }
504+
505+ vLock .mutex .Unlock ()
506+ vLock .refCount --
507+ log .Debugf ("Released lock for volume %q, refCount: %d" , volumeID , vLock .refCount )
508+
509+ // Clean up the lock if reference count reaches zero
510+ if vLock .refCount == 0 {
511+ delete (c .snapshotLockMgr .locks , volumeID )
512+ log .Debugf ("Cleaned up lock for volume %q" , volumeID )
513+ }
514+ }
515+
450516// createBlockVolume creates a block volume based on the CreateVolumeRequest.
451517func (c * controller ) createBlockVolume (ctx context.Context , req * csi.CreateVolumeRequest ,
452518 isWorkloadDomainIsolationEnabled bool , clusterMoIds []string ) (
@@ -2446,8 +2512,47 @@ func (c *controller) CreateSnapshot(ctx context.Context, req *csi.CreateSnapshot
24462512 "Queried VolumeType: %v" , volumeType , cnsVolumeDetailsMap [volumeID ].VolumeType )
24472513 }
24482514
2449- // TODO: We may need to add logic to check the limit of max number of snapshots by using
2450- // GlobalMaxSnapshotsPerBlockVolume etc. variables in the future.
2515+ // Acquire lock for this volume to serialize snapshot operations
2516+ // Check snapshot limit if the feature is enabled
2517+ isSnapshotLimitWCPEnabled := commonco .ContainerOrchestratorUtility .IsFSSEnabled (ctx , common .SnapshotLimitWCP )
2518+ if isSnapshotLimitWCPEnabled {
2519+ c .acquireSnapshotLock (ctx , volumeID )
2520+ defer c .releaseSnapshotLock (ctx , volumeID )
2521+
2522+ // Extract namespace from request parameters
2523+ volumeSnapshotNamespace := req .Parameters [common .VolumeSnapshotNamespaceKey ]
2524+ if volumeSnapshotNamespace == "" {
2525+ return nil , logger .LogNewErrorCodef (log , codes .Internal ,
2526+ "volumesnapshot namespace is not set in the request parameters" )
2527+ }
2528+
2529+ // Get snapshot limit from namespace annotation
2530+ snapshotLimit , err := getSnapshotLimitFromNamespace (ctx , volumeSnapshotNamespace )
2531+ if err != nil {
2532+ return nil , logger .LogNewErrorCodef (log , codes .Internal ,
2533+ "failed to get snapshot limit for namespace %q: %v" , volumeSnapshotNamespace , err )
2534+ }
2535+ log .Infof ("Snapshot limit for namespace %q is set to %d" , volumeSnapshotNamespace , snapshotLimit )
2536+
2537+ // Query existing snapshots for this volume
2538+ snapshotList , _ , err := common .QueryVolumeSnapshotsByVolumeID (ctx , c .manager .VolumeManager , volumeID ,
2539+ common .QuerySnapshotLimit )
2540+ if err != nil {
2541+ return nil , logger .LogNewErrorCodef (log , codes .Internal ,
2542+ "failed to query snapshots for volume %q: %v" , volumeID , err )
2543+ }
2544+
2545+ // Check if the limit is exceeded
2546+ currentSnapshotCount := len (snapshotList )
2547+ if currentSnapshotCount >= snapshotLimit {
2548+ return nil , logger .LogNewErrorCodef (log , codes .FailedPrecondition ,
2549+ "the number of snapshots (%d) on the source volume %s has reached or exceeded " +
2550+ "the configured maximum (%d) for namespace %s" ,
2551+ currentSnapshotCount , volumeID , snapshotLimit , volumeSnapshotNamespace )
2552+ }
2553+ log .Infof ("Current snapshot count for volume %q is %d, within limit of %d" ,
2554+ volumeID , currentSnapshotCount , snapshotLimit )
2555+ }
24512556
24522557 // the returned snapshotID below is a combination of CNS VolumeID and CNS SnapshotID concatenated by the "+"
24532558 // sign. That is, a string of "<UUID>+<UUID>". Because, all other CNS snapshot APIs still require both
0 commit comments