karmada-io
diff --git a/‎.gitignore‎
Lines changed: 3 additions & 0 deletions b/‎.gitignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎cmd/controller-manager/app/controllermanager.go‎
Lines changed: 66 additions & 16 deletions b/‎cmd/controller-manager/app/controllermanager.go‎
Lines changed: 66 additions & 16 deletions
diff --git a/‎cmd/controller-manager/app/options/options.go‎
Lines changed: 10 additions & 0 deletions b/‎cmd/controller-manager/app/options/options.go‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎cmd/scheduler/app/options/options.go‎
Lines changed: 17 additions & 0 deletions b/‎cmd/scheduler/app/options/options.go‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎cmd/scheduler/app/scheduler.go‎
Lines changed: 3 additions & 0 deletions b/‎cmd/scheduler/app/scheduler.go‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎pkg/apis/work/v1alpha2/well_known_constants.go‎
Lines changed: 5 additions & 0 deletions b/‎pkg/apis/work/v1alpha2/well_known_constants.go‎
Lines changed: 5 additions & 0 deletions
@@ -25,3 +25,6 @@ _output/
 # sub chart tgz
 charts/karmada/charts
 charts/karmada-operator/charts
+
+# Claude Code local context
+CLAUDE.local.md
@@ -19,6 +19,7 @@ package app
 import (
 	"context"
 	"flag"
+	"strings"
 	"time"
 
 	"github.com/spf13/cobra"
@@ -380,15 +381,56 @@ func startBindingController(ctx controllerscontext.Context) (enabled bool, err e
 		klog.Errorf("Failed to register index for Work based on ResourceBinding ID: %v", err)
 		return false, err
 	}
+
+	// Create requeue channels for async work creation failures
+	// RB and CRB use separate channels but share the same AsyncWorkCreator
+	var rbRequeueChan, crbRequeueChan chan string
+	if ctx.Opts.EnableAsyncWorkCreation {
+		rbRequeueChan = make(chan string, binding.DefaultAsyncWorkQueueSize)
+		crbRequeueChan = make(chan string, binding.DefaultAsyncWorkQueueSize)
+	}
+
+	// Create and start AsyncWorkCreator if enabled
+	var asyncWorkCreator *binding.AsyncWorkCreator
+	if ctx.Opts.EnableAsyncWorkCreation {
+		// Create requeue function that routes to appropriate channel based on binding key format
+		// RB keys have format "namespace/name", CRB keys have format "crb:name"
+		requeueFunc := func(bindingKey string) {
+			var targetChan chan string
+			if strings.HasPrefix(bindingKey, "crb:") {
+				targetChan = crbRequeueChan
+				bindingKey = strings.TrimPrefix(bindingKey, "crb:")
+			} else {
+				targetChan = rbRequeueChan
+			}
+			select {
+			case targetChan <- bindingKey:
+			default:
+				klog.Warningf("Requeue channel is full, dropping requeue for %s", bindingKey)
+			}
+		}
+		asyncWorkCreator = binding.NewAsyncWorkCreator(
+			ctx.Mgr.GetClient(),
+			ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
+			ctx.Opts.AsyncWorkWorkers,
+			requeueFunc,
+		)
+		go asyncWorkCreator.Run(ctx.Context)
+		klog.Infof("Started async work creator with %d workers", ctx.Opts.AsyncWorkWorkers)
+	}
+
 	bindingController := &binding.ResourceBindingController{
-		Client:              ctx.Mgr.GetClient(),
-		DynamicClient:       ctx.DynamicClientSet,
-		EventRecorder:       ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
-		RESTMapper:          ctx.Mgr.GetRESTMapper(),
-		OverrideManager:     ctx.OverrideManager,
-		InformerManager:     ctx.ControlPlaneInformerManager,
-		ResourceInterpreter: ctx.ResourceInterpreter,
-		RateLimiterOptions:  ctx.Opts.RateLimiterOptions,
+		Client:                  ctx.Mgr.GetClient(),
+		DynamicClient:           ctx.DynamicClientSet,
+		EventRecorder:           ctx.Mgr.GetEventRecorderFor(binding.ControllerName),
+		RESTMapper:              ctx.Mgr.GetRESTMapper(),
+		OverrideManager:         ctx.OverrideManager,
+		InformerManager:         ctx.ControlPlaneInformerManager,
+		ResourceInterpreter:     ctx.ResourceInterpreter,
+		RateLimiterOptions:      ctx.Opts.RateLimiterOptions,
+		EnableAsyncWorkCreation: ctx.Opts.EnableAsyncWorkCreation,
+		AsyncWorkCreator:        asyncWorkCreator,
+		RequeueAfterFailure:     rbRequeueChan,
 	}
 	if err := bindingController.SetupWithManager(ctx.Mgr); err != nil {
 		return false, err
@@ -398,15 +440,21 @@ func startBindingController(ctx controllerscontext.Context) (enabled bool, err e
 		klog.Errorf("Failed to register index for Work based on ClusterResourceBinding ID: %v", err)
 		return false, err
 	}
+
+	// CRB shares the same AsyncWorkCreator with RB, but uses its own requeue channel
+	// The AsyncWorkCreator's requeueFunc routes to appropriate channel based on binding key prefix
 	clusterResourceBindingController := &binding.ClusterResourceBindingController{
-		Client:              ctx.Mgr.GetClient(),
-		DynamicClient:       ctx.DynamicClientSet,
-		EventRecorder:       ctx.Mgr.GetEventRecorderFor(binding.ClusterResourceBindingControllerName),
-		RESTMapper:          ctx.Mgr.GetRESTMapper(),
-		OverrideManager:     ctx.OverrideManager,
-		InformerManager:     ctx.ControlPlaneInformerManager,
-		ResourceInterpreter: ctx.ResourceInterpreter,
-		RateLimiterOptions:  ctx.Opts.RateLimiterOptions,
+		Client:                  ctx.Mgr.GetClient(),
+		DynamicClient:           ctx.DynamicClientSet,
+		EventRecorder:           ctx.Mgr.GetEventRecorderFor(binding.ClusterResourceBindingControllerName),
+		RESTMapper:              ctx.Mgr.GetRESTMapper(),
+		OverrideManager:         ctx.OverrideManager,
+		InformerManager:         ctx.ControlPlaneInformerManager,
+		ResourceInterpreter:     ctx.ResourceInterpreter,
+		RateLimiterOptions:      ctx.Opts.RateLimiterOptions,
+		EnableAsyncWorkCreation: ctx.Opts.EnableAsyncWorkCreation,
+		AsyncWorkCreator:        asyncWorkCreator,
+		RequeueAfterFailure:     crbRequeueChan,
 	}
 	if err := clusterResourceBindingController.SetupWithManager(ctx.Mgr); err != nil {
 		return false, err
@@ -924,6 +972,8 @@ func setupControllers(ctx context.Context, mgr controllerruntime.Manager, opts *
 			RateLimiterOptions:                opts.RateLimiterOpts,
 			GracefulEvictionTimeout:           opts.GracefulEvictionTimeout,
 			EnableClusterResourceModeling:     opts.EnableClusterResourceModeling,
+			EnableAsyncWorkCreation:           opts.EnableAsyncWorkCreation,
+			AsyncWorkWorkers:                  opts.AsyncWorkWorkers,
 			HPAControllerConfiguration:        opts.HPAControllerConfiguration,
 			FederatedResourceQuotaOptions:     opts.FederatedResourceQuotaOptions,
 			FailoverConfiguration:             opts.FailoverOptions,
 
@@ -144,6 +144,14 @@ type Options struct {
 	// in scenario of dynamic replica assignment based on cluster free resources.
 	// Disable if it does not fit your cases for better performance.
 	EnableClusterResourceModeling bool
+	// EnableAsyncWorkCreation enables asynchronous work creation for binding controller.
+	// When enabled, work creation tasks are submitted to an async queue and processed
+	// by dedicated workers, improving throughput for large-scale deployments.
+	EnableAsyncWorkCreation bool
+	// AsyncWorkWorkers is the number of concurrent workers for asynchronous work creation.
+	// Only effective when EnableAsyncWorkCreation is true.
+	// Defaults to 64.
+	AsyncWorkWorkers int
 	// FederatedResourceQuotaOptions holds configurations for FederatedResourceQuota reconciliation.
 	FederatedResourceQuotaOptions FederatedResourceQuotaOptions
 	// FailoverOptions holds the Failover configurations.
@@ -228,6 +236,8 @@ func (o *Options) AddFlags(flags *pflag.FlagSet, allControllers, disabledByDefau
 	flags.BoolVar(&o.EnableClusterResourceModeling, "enable-cluster-resource-modeling", true, "Enable means controller would build resource modeling for each cluster by syncing Nodes and Pods resources.\n"+
 		"The resource modeling might be used by the scheduler to make scheduling decisions in scenario of dynamic replica assignment based on cluster free resources.\n"+
 		"Disable if it does not fit your cases for better performance.")
+	flags.BoolVar(&o.EnableAsyncWorkCreation, "enable-async-work-creation", false, "Enable asynchronous work creation for binding controller. When enabled, work creation tasks are submitted to an async queue for processing by dedicated workers, improving throughput for large-scale deployments.")
+	flags.IntVar(&o.AsyncWorkWorkers, "async-work-workers", 64, "Number of concurrent workers for asynchronous work creation. Only effective when --enable-async-work-creation is true.")
 
 	o.RateLimiterOpts.AddFlags(flags)
 	o.ProfileOpts.AddFlags(flags)
 
@@ -104,6 +104,20 @@ type Options struct {
 
 	// RateLimiterOpts contains the options for rate limiter.
 	RateLimiterOpts ratelimiterflag.Options
+
+	// ScheduleWorkers is the number of concurrent workers for scheduling ResourceBindings.
+	// Higher values improve throughput but increase API server load.
+	// Defaults to 1 for backward compatibility.
+	ScheduleWorkers int
+
+	// EnableAsyncBind enables asynchronous binding of scheduling results.
+	// When enabled, the scheduler submits binding requests to an async queue
+	// for processing by dedicated workers, improving throughput.
+	EnableAsyncBind bool
+	// AsyncBindWorkers is the number of concurrent workers for asynchronous binding.
+	// Only effective when EnableAsyncBind is true.
+	// Defaults to 32.
+	AsyncBindWorkers int
 }
 
 // NewOptions builds an default scheduler options.
@@ -163,6 +177,9 @@ func (o *Options) AddFlags(fs *pflag.FlagSet) {
 	fs.StringSliceVar(&o.Plugins, "plugins", []string{"*"},
 		fmt.Sprintf("A list of plugins to enable. '*' enables all build-in and customized plugins, 'foo' enables the plugin named 'foo', '*,-foo' disables the plugin named 'foo'.\nAll build-in plugins: %s.", strings.Join(frameworkplugins.NewInTreeRegistry().FactoryNames(), ",")))
 	fs.StringVar(&o.SchedulerName, "scheduler-name", scheduler.DefaultScheduler, "SchedulerName represents the name of the scheduler. default is 'default-scheduler'.")
+	fs.IntVar(&o.ScheduleWorkers, "schedule-workers", 1, "Number of concurrent workers for scheduling ResourceBindings. Higher values improve throughput but increase API server load. Defaults to 1 for backward compatibility.")
+	fs.BoolVar(&o.EnableAsyncBind, "enable-async-bind", false, "Enable asynchronous binding of scheduling results. When enabled, the scheduler submits binding requests to an async queue for processing by dedicated workers, improving throughput.")
+	fs.IntVar(&o.AsyncBindWorkers, "async-bind-workers", 32, "Number of concurrent workers for asynchronous binding. Only effective when --enable-async-bind is true.")
 	features.FeatureGate.AddFlag(fs)
 	o.ProfileOpts.AddFlags(fs)
 	o.RateLimiterOpts.AddFlags(fs)
 
@@ -188,6 +188,9 @@ func run(ctx context.Context, opts *options.Options, registryOptions ...Option)
 		scheduler.WithEnableSchedulerPlugin(opts.Plugins),
 		scheduler.WithSchedulerName(opts.SchedulerName),
 		scheduler.WithRateLimiterOptions(opts.RateLimiterOpts),
+		scheduler.WithScheduleWorkers(opts.ScheduleWorkers),
+		scheduler.WithEnableAsyncBind(opts.EnableAsyncBind),
+		scheduler.WithAsyncBindWorkers(opts.AsyncBindWorkers),
 	)
 	if err != nil {
 		return fmt.Errorf("couldn't create scheduler: %w", err)
 
@@ -58,6 +58,11 @@ const (
 	// - Manifest in Work object: describes the name of ClusterResourceBinding which the manifest derived from.
 	ClusterResourceBindingAnnotationKey = "clusterresourcebinding.karmada.io/name"
 
+	// TargetClustersHashAnnotation is added to ResourceBinding/ClusterResourceBinding to record
+	// the hash of target clusters. This is used to determine if orphan work check is needed.
+	// Only when target clusters change (hash differs), the orphan check will be performed.
+	TargetClustersHashAnnotation = "resourcebinding.karmada.io/target-clusters-hash"
+
 	// BindingManagedByLabel is added to ResourceBinding to represent what kind of resource manages this Binding.
 	BindingManagedByLabel = "binding.karmada.io/managed-by"