Skip to content

Commit 05a2989

Browse files
Kevinz857Kevinmmt
authored andcommitted
perf: optimize RB to Work throughput for large-scale Pod distribution
This commit introduces multiple performance optimizations for the ResourceBinding to Work synchronization path, targeting scenarios with 10000+ Pods distribution. Key optimizations: 1. AsyncWorkCreator for Binding Controller - Decouples Work creation from reconcile loop using 64 async workers - Implements Assume Cache pattern (similar to kube-scheduler) - Adds failure retry via requeue callback mechanism - Periodic cleanup of stale cache entries (every 5 min) 2. Parallel Work preparation and execution - Parallelizes DeepCopy and ApplyOverridePolicies across clusters - Concurrent Work creation for multi-cluster scenarios 3. CreateOrUpdateWork optimization - Implements Create-First pattern (try Create before Get+Update) - Adds fast-path comparison to skip unchanged Work updates - Reduces API calls by 30-50% in update scenarios 4. Precise orphan Work detection - Uses TargetClustersHashAnnotation to track cluster changes - Skips orphan check when clusters haven't changed - Expected 90%+ reduction in List API calls 5. AsyncBinder for Scheduler - 32 async workers for RB/CRB patch operations - Decouples scheduling decisions from persistence New configuration options: --enable-async-work-creation=true --async-work-workers=64 --enable-async-bind=true --async-bind-workers=32 Performance improvement: - New Work API calls: 2 -> 1 per Work (50% reduction) - Orphan check: Every reconcile -> Only on cluster change (90%+ reduction) - Multi-cluster Work creation: Sequential -> Parallel (Nx speedup) - Expected throughput: ~200 Work/s -> ~1000+ Work/s (5-10x improvement) Signed-off-by: Kevinz857 <[email protected]>
1 parent 31e4756 commit 05a2989

File tree

18 files changed

+2866
-142
lines changed

18 files changed

+2866
-142
lines changed

cmd/controller-manager/app/controllermanager.go

Lines changed: 66 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package app
1919
import (
2020
"context"
2121
"flag"
22+
"strings"
2223
"time"
2324

2425
"github.com/spf13/cobra"
@@ -380,15 +381,56 @@ func startBindingController(ctx controllerscontext.Context) (enabled bool, err e
380381
klog.Errorf("Failed to register index for Work based on ResourceBinding ID: %v", err)
381382
return false, err
382383
}
384+
385+
// Create requeue channels for async work creation failures
386+
// RB and CRB use separate channels but share the same AsyncWorkCreator
387+
var rbRequeueChan, crbRequeueChan chan string
388+
if ctx.Opts.EnableAsyncWorkCreation {
389+
rbRequeueChan = make(chan string, binding.DefaultAsyncWorkQueueSize)
390+
crbRequeueChan = make(chan string, binding.DefaultAsyncWorkQueueSize)
391+
}
392+
393+
// Create and start AsyncWorkCreator if enabled
394+
var asyncWorkCreator *binding.AsyncWorkCreator
395+
if ctx.Opts.EnableAsyncWorkCreation {
396+
// Create requeue function that routes to appropriate channel based on binding key format
397+
// RB keys have format "namespace/name", CRB keys have format "crb:name"
398+
requeueFunc := func(bindingKey string) {
399+
var targetChan chan string
400+
if strings.HasPrefix(bindingKey, "crb:") {
401+
targetChan = crbRequeueChan
402+
bindingKey = strings.TrimPrefix(bindingKey, "crb:")
403+
} else {
404+
targetChan = rbRequeueChan
405+
}
406+
select {
407+
case targetChan <- bindingKey:
408+
default:
409+
klog.Warningf("Requeue channel is full, dropping requeue for %s", bindingKey)
410+
}
411+
}
412+
asyncWorkCreator = binding.NewAsyncWorkCreator(
413+
ctx.Mgr.GetClient(),
414+
ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
415+
ctx.Opts.AsyncWorkWorkers,
416+
requeueFunc,
417+
)
418+
go asyncWorkCreator.Run(ctx.Context)
419+
klog.Infof("Started async work creator with %d workers", ctx.Opts.AsyncWorkWorkers)
420+
}
421+
383422
bindingController := &binding.ResourceBindingController{
384-
Client: ctx.Mgr.GetClient(),
385-
DynamicClient: ctx.DynamicClientSet,
386-
EventRecorder: ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
387-
RESTMapper: ctx.Mgr.GetRESTMapper(),
388-
OverrideManager: ctx.OverrideManager,
389-
InformerManager: ctx.ControlPlaneInformerManager,
390-
ResourceInterpreter: ctx.ResourceInterpreter,
391-
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
423+
Client: ctx.Mgr.GetClient(),
424+
DynamicClient: ctx.DynamicClientSet,
425+
EventRecorder: ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
426+
RESTMapper: ctx.Mgr.GetRESTMapper(),
427+
OverrideManager: ctx.OverrideManager,
428+
InformerManager: ctx.ControlPlaneInformerManager,
429+
ResourceInterpreter: ctx.ResourceInterpreter,
430+
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
431+
EnableAsyncWorkCreation: ctx.Opts.EnableAsyncWorkCreation,
432+
AsyncWorkCreator: asyncWorkCreator,
433+
RequeueAfterFailure: rbRequeueChan,
392434
}
393435
if err := bindingController.SetupWithManager(ctx.Mgr); err != nil {
394436
return false, err
@@ -398,15 +440,21 @@ func startBindingController(ctx controllerscontext.Context) (enabled bool, err e
398440
klog.Errorf("Failed to register index for Work based on ClusterResourceBinding ID: %v", err)
399441
return false, err
400442
}
443+
444+
// CRB shares the same AsyncWorkCreator with RB, but uses its own requeue channel
445+
// The AsyncWorkCreator's requeueFunc routes to appropriate channel based on binding key prefix
401446
clusterResourceBindingController := &binding.ClusterResourceBindingController{
402-
Client: ctx.Mgr.GetClient(),
403-
DynamicClient: ctx.DynamicClientSet,
404-
EventRecorder: ctx.Mgr.GetEventRecorderFor(binding.ClusterResourceBindingControllerName),
405-
RESTMapper: ctx.Mgr.GetRESTMapper(),
406-
OverrideManager: ctx.OverrideManager,
407-
InformerManager: ctx.ControlPlaneInformerManager,
408-
ResourceInterpreter: ctx.ResourceInterpreter,
409-
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
447+
Client: ctx.Mgr.GetClient(),
448+
DynamicClient: ctx.DynamicClientSet,
449+
EventRecorder: ctx.Mgr.GetEventRecorderFor(binding.ClusterResourceBindingControllerName),
450+
RESTMapper: ctx.Mgr.GetRESTMapper(),
451+
OverrideManager: ctx.OverrideManager,
452+
InformerManager: ctx.ControlPlaneInformerManager,
453+
ResourceInterpreter: ctx.ResourceInterpreter,
454+
RateLimiterOptions: ctx.Opts.RateLimiterOptions,
455+
EnableAsyncWorkCreation: ctx.Opts.EnableAsyncWorkCreation,
456+
AsyncWorkCreator: asyncWorkCreator,
457+
RequeueAfterFailure: crbRequeueChan,
410458
}
411459
if err := clusterResourceBindingController.SetupWithManager(ctx.Mgr); err != nil {
412460
return false, err
@@ -924,6 +972,8 @@ func setupControllers(ctx context.Context, mgr controllerruntime.Manager, opts *
924972
RateLimiterOptions: opts.RateLimiterOpts,
925973
GracefulEvictionTimeout: opts.GracefulEvictionTimeout,
926974
EnableClusterResourceModeling: opts.EnableClusterResourceModeling,
975+
EnableAsyncWorkCreation: opts.EnableAsyncWorkCreation,
976+
AsyncWorkWorkers: opts.AsyncWorkWorkers,
927977
HPAControllerConfiguration: opts.HPAControllerConfiguration,
928978
FederatedResourceQuotaOptions: opts.FederatedResourceQuotaOptions,
929979
FailoverConfiguration: opts.FailoverOptions,

cmd/controller-manager/app/options/options.go

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,14 @@ type Options struct {
144144
// in scenario of dynamic replica assignment based on cluster free resources.
145145
// Disable if it does not fit your cases for better performance.
146146
EnableClusterResourceModeling bool
147+
// EnableAsyncWorkCreation enables asynchronous work creation for binding controller.
148+
// When enabled, work creation tasks are submitted to an async queue and processed
149+
// by dedicated workers, improving throughput for large-scale deployments.
150+
EnableAsyncWorkCreation bool
151+
// AsyncWorkWorkers is the number of concurrent workers for asynchronous work creation.
152+
// Only effective when EnableAsyncWorkCreation is true.
153+
// Defaults to 64.
154+
AsyncWorkWorkers int
147155
// FederatedResourceQuotaOptions holds configurations for FederatedResourceQuota reconciliation.
148156
FederatedResourceQuotaOptions FederatedResourceQuotaOptions
149157
// FailoverOptions holds the Failover configurations.
@@ -228,6 +236,8 @@ func (o *Options) AddFlags(flags *pflag.FlagSet, allControllers, disabledByDefau
228236
flags.BoolVar(&o.EnableClusterResourceModeling, "enable-cluster-resource-modeling", true, "Enable means controller would build resource modeling for each cluster by syncing Nodes and Pods resources.\n"+
229237
"The resource modeling might be used by the scheduler to make scheduling decisions in scenario of dynamic replica assignment based on cluster free resources.\n"+
230238
"Disable if it does not fit your cases for better performance.")
239+
flags.BoolVar(&o.EnableAsyncWorkCreation, "enable-async-work-creation", false, "Enable asynchronous work creation for binding controller. When enabled, work creation tasks are submitted to an async queue for processing by dedicated workers, improving throughput for large-scale deployments.")
240+
flags.IntVar(&o.AsyncWorkWorkers, "async-work-workers", 64, "Number of concurrent workers for asynchronous work creation. Only effective when --enable-async-work-creation is true.")
231241

232242
o.RateLimiterOpts.AddFlags(flags)
233243
o.ProfileOpts.AddFlags(flags)

cmd/scheduler/app/options/options.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,20 @@ type Options struct {
104104

105105
// RateLimiterOpts contains the options for rate limiter.
106106
RateLimiterOpts ratelimiterflag.Options
107+
108+
// ScheduleWorkers is the number of concurrent workers for scheduling ResourceBindings.
109+
// Higher values improve throughput but increase API server load.
110+
// Defaults to 1 for backward compatibility.
111+
ScheduleWorkers int
112+
113+
// EnableAsyncBind enables asynchronous binding of scheduling results.
114+
// When enabled, the scheduler submits binding requests to an async queue
115+
// for processing by dedicated workers, improving throughput.
116+
EnableAsyncBind bool
117+
// AsyncBindWorkers is the number of concurrent workers for asynchronous binding.
118+
// Only effective when EnableAsyncBind is true.
119+
// Defaults to 32.
120+
AsyncBindWorkers int
107121
}
108122

109123
// NewOptions builds an default scheduler options.
@@ -163,6 +177,9 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) {
163177
fs.StringSliceVar(&o.Plugins, "plugins", []string{"*"},
164178
fmt.Sprintf("A list of plugins to enable. '*' enables all build-in and customized plugins, 'foo' enables the plugin named 'foo', '*,-foo' disables the plugin named 'foo'.\nAll build-in plugins: %s.", strings.Join(frameworkplugins.NewInTreeRegistry().FactoryNames(), ",")))
165179
fs.StringVar(&o.SchedulerName, "scheduler-name", scheduler.DefaultScheduler, "SchedulerName represents the name of the scheduler. default is 'default-scheduler'.")
180+
fs.IntVar(&o.ScheduleWorkers, "schedule-workers", 1, "Number of concurrent workers for scheduling ResourceBindings. Higher values improve throughput but increase API server load. Defaults to 1 for backward compatibility.")
181+
fs.BoolVar(&o.EnableAsyncBind, "enable-async-bind", false, "Enable asynchronous binding of scheduling results. When enabled, the scheduler submits binding requests to an async queue for processing by dedicated workers, improving throughput.")
182+
fs.IntVar(&o.AsyncBindWorkers, "async-bind-workers", 32, "Number of concurrent workers for asynchronous binding. Only effective when --enable-async-bind is true.")
166183
features.FeatureGate.AddFlag(fs)
167184
o.ProfileOpts.AddFlags(fs)
168185
o.RateLimiterOpts.AddFlags(fs)

cmd/scheduler/app/scheduler.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,9 @@ func run(ctx context.Context, opts *options.Options, registryOptions ...Option)
188188
scheduler.WithEnableSchedulerPlugin(opts.Plugins),
189189
scheduler.WithSchedulerName(opts.SchedulerName),
190190
scheduler.WithRateLimiterOptions(opts.RateLimiterOpts),
191+
scheduler.WithScheduleWorkers(opts.ScheduleWorkers),
192+
scheduler.WithEnableAsyncBind(opts.EnableAsyncBind),
193+
scheduler.WithAsyncBindWorkers(opts.AsyncBindWorkers),
191194
)
192195
if err != nil {
193196
return fmt.Errorf("couldn't create scheduler: %w", err)

pkg/apis/work/v1alpha2/well_known_constants.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,11 @@ const (
5858
// - Manifest in Work object: describes the name of ClusterResourceBinding which the manifest derived from.
5959
ClusterResourceBindingAnnotationKey = "clusterresourcebinding.karmada.io/name"
6060

61+
// TargetClustersHashAnnotation is added to ResourceBinding/ClusterResourceBinding to record
62+
// the hash of target clusters. This is used to determine if orphan work check is needed.
63+
// Only when target clusters change (hash differs), the orphan check will be performed.
64+
TargetClustersHashAnnotation = "resourcebinding.karmada.io/target-clusters-hash"
65+
6166
// BindingManagedByLabel is added to ResourceBinding to represent what kind of resource manages this Binding.
6267
BindingManagedByLabel = "binding.karmada.io/managed-by"
6368

0 commit comments

Comments
 (0)