@@ -61,6 +61,11 @@ type MultiClusterCache struct {
6161 restMapper meta.RESTMapper
6262 // newClientFunc returns a dynamic client for member cluster apiserver
6363 newClientFunc func (string ) (dynamic.Interface , error )
64+
65+ // activeWatchers tracks all active watch connections for each GVR
66+ // key: GVR string representation, value: list of active watch multiplexers
67+ activeWatchersLock sync.RWMutex
68+ activeWatchers map [string ][]* invalidatableWatchMux
6469}
6570
6671var _ Store = & MultiClusterCache {}
@@ -72,6 +77,7 @@ func NewMultiClusterCache(newClientFunc func(string) (dynamic.Interface, error),
7277 newClientFunc : newClientFunc ,
7378 cache : map [string ]* clusterCache {},
7479 registeredResources : map [schema.GroupVersionResource ]struct {}{},
80+ activeWatchers : map [string ][]* invalidatableWatchMux {},
7581 }
7682}
7783
@@ -116,19 +122,33 @@ func (c *MultiClusterCache) UpdateCache(resourcesByCluster map[string]map[schema
116122 }
117123
118124 // add/update cluster cache
125+ clustersAdded := false
126+ addedClusters := []string {}
119127 for clusterName , resources := range resourcesByCluster {
120128 cache , exist := c .cache [clusterName ]
121129 if ! exist {
122130 klog .Infof ("Add cache for cluster %v" , clusterName )
123131 cache = newClusterCache (clusterName , c .clientForClusterFunc (clusterName ), c .restMapper )
124132 c .cache [clusterName ] = cache
133+ // Any cluster being added to cache (whether new or recovered) should trigger invalidation
134+ // This is critical for cluster recovery scenarios where existing watch connections
135+ // don't include the recovered cluster's resources
136+ clustersAdded = true
137+ addedClusters = append (addedClusters , clusterName )
125138 }
126139 err := cache .updateCache (resources )
127140 if err != nil {
128141 return err
129142 }
130143 }
131144 c .registeredResources = registeredResources
145+
146+ // Only invalidate watches when clusters are added (not removed)
147+ // Cluster removal is already handled by cacher.Stop() -> terminateAllWatchers()
148+ if clustersAdded {
149+ klog .Infof ("Cluster topology changed (clusters added: %v), invalidating all active watches to trigger reconnection" , addedClusters )
150+ c .invalidateAllWatches ()
151+ }
132152 return nil
133153}
134154
@@ -347,7 +367,7 @@ func (c *MultiClusterCache) Watch(ctx context.Context, gvr schema.GroupVersionRe
347367 accessor .SetResourceVersion (responseResourceVersion .String ())
348368 }
349369
350- mux := newWatchMux ()
370+ mux := newInvalidatableWatchMux ()
351371 clusters := c .getClusterNames ()
352372 for i := range clusters {
353373 cluster := clusters [i ]
@@ -367,6 +387,9 @@ func (c *MultiClusterCache) Watch(ctx context.Context, gvr schema.GroupVersionRe
367387 })
368388 }
369389 mux .Start ()
390+
391+ // Register this watch so we can invalidate it when cluster topology changes
392+ c .registerWatch (gvr , mux )
370393 return mux , nil
371394}
372395
@@ -500,6 +523,62 @@ func (c *MultiClusterCache) getClusterResourceVersion(ctx context.Context, clust
500523 return listObj .GetResourceVersion (), nil
501524}
502525
526+ // registerWatch registers an active watch connection
527+ func (c * MultiClusterCache ) registerWatch (gvr schema.GroupVersionResource , mux * invalidatableWatchMux ) {
528+ c .activeWatchersLock .Lock ()
529+ defer c .activeWatchersLock .Unlock ()
530+
531+ key := gvr .String ()
532+ c .activeWatchers [key ] = append (c .activeWatchers [key ], mux )
533+
534+ // Set up cleanup when watch is stopped
535+ go func () {
536+ <- mux .StoppedCh ()
537+ c .unregisterWatch (gvr , mux )
538+ }()
539+ }
540+
541+ // unregisterWatch removes a watch connection from tracking
542+ func (c * MultiClusterCache ) unregisterWatch (gvr schema.GroupVersionResource , mux * invalidatableWatchMux ) {
543+ c .activeWatchersLock .Lock ()
544+ defer c .activeWatchersLock .Unlock ()
545+
546+ key := gvr .String ()
547+ watchers := c .activeWatchers [key ]
548+ for i , w := range watchers {
549+ if w == mux {
550+ // Remove from slice
551+ c .activeWatchers [key ] = append (watchers [:i ], watchers [i + 1 :]... )
552+ break
553+ }
554+ }
555+
556+ // Clean up empty entries
557+ if len (c .activeWatchers [key ]) == 0 {
558+ delete (c .activeWatchers , key )
559+ }
560+ }
561+
562+ // invalidateAllWatches sends invalidation events to all active watches
563+ // This causes clients to reconnect and get the updated cluster list
564+ func (c * MultiClusterCache ) invalidateAllWatches () {
565+ c .activeWatchersLock .RLock ()
566+ defer c .activeWatchersLock .RUnlock ()
567+
568+ totalWatches := 0
569+ for _ , watchers := range c .activeWatchers {
570+ totalWatches += len (watchers )
571+ for _ , mux := range watchers {
572+ // Send invalidation event asynchronously to avoid blocking
573+ go mux .Invalidate ()
574+ }
575+ }
576+
577+ if totalWatches > 0 {
578+ klog .Infof ("Sent invalidation signal to %d active watch connections" , totalWatches )
579+ }
580+ }
581+
503582// Inputs and outputs:
504583// o.ResourceVersion o.Continue | cluster options.ResourceVersion options.Continue mrv
505584// xxxx "" | "" xxxx "" decode(xxx)
0 commit comments