kubewharf · JustinChengLZ · Jan 26, 2026 · Mar 18, 2026 · Mar 18, 2026 · Mar 18, 2026
@@ -20,6 +20,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	cliflag "k8s.io/component-base/cli/flag"
 
+	"github.com/kubewharf/katalyst-api/pkg/consts"
 	"github.com/kubewharf/katalyst-core/cmd/katalyst-agent/app/options/qrm/gpustrategy"
 	qrmconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/qrm"
 )
@@ -28,10 +29,19 @@ type GPUOptions struct {
 	PolicyName                      string
 	GPUDeviceNames                  []string
 	GPUMemoryAllocatablePerGPU      string
+	MilliGPUAllocatablePerGPU       string
 	SkipGPUStateCorruption          bool
 	RDMADeviceNames                 []string
 	RequiredDeviceAffinity          bool
 	EnableKubeletCheckpointFallback bool
+	VirtualGPUPrefersSpreading      bool
+	VirtualGPUMemoryWeightEnvName   string
+	VirtualGPUComputeWeightEnvName  string
+	VirtualGPUTimesliceEnvName      string
+	VirtualGPUComputePolicyEnvName  string
+	GPUSelectionResultAnnotationKey string
+	VirtualGPUTimesliceEnvValue     int
+	VirtualGPUComputePolicyEnvValue int
 
 	GPUStrategyOptions *gpustrategy.GPUStrategyOptions
 }
@@ -41,10 +51,15 @@ func NewGPUOptions() *GPUOptions {
 		PolicyName:                      "static",
 		GPUDeviceNames:                  []string{"nvidia.com/gpu"},
 		GPUMemoryAllocatablePerGPU:      "100",
+		MilliGPUAllocatablePerGPU:       "1000",
 		RDMADeviceNames:                 []string{},
 		GPUStrategyOptions:              gpustrategy.NewGPUStrategyOptions(),
 		RequiredDeviceAffinity:          true,
 		EnableKubeletCheckpointFallback: true,
+		VirtualGPUPrefersSpreading:      false,
+		GPUSelectionResultAnnotationKey: consts.PodAnnotationGPUSelectionResultKey,
+		VirtualGPUTimesliceEnvValue:     300,
+		VirtualGPUComputePolicyEnvValue: 0,
 	}
 }
 
@@ -56,13 +71,31 @@ func (o *GPUOptions) AddFlags(fss *cliflag.NamedFlagSets) {
 	fs.StringSliceVar(&o.GPUDeviceNames, "gpu-resource-names", o.GPUDeviceNames, "The name of the GPU resource")
 	fs.StringVar(&o.GPUMemoryAllocatablePerGPU, "gpu-memory-allocatable-per-gpu",
 		o.GPUMemoryAllocatablePerGPU, "The total memory allocatable for each GPU, e.g. 100")
+	fs.StringVar(&o.MilliGPUAllocatablePerGPU, "gpu-milligpu-allocatable-per-gpu",
+		o.MilliGPUAllocatablePerGPU, "The total milliGPU allocatable for each GPU, e.g. 1000")
 	fs.BoolVar(&o.SkipGPUStateCorruption, "skip-gpu-state-corruption",
 		o.SkipGPUStateCorruption, "skip gpu state corruption, and it will be used after updating state properties")
 	fs.StringSliceVar(&o.RDMADeviceNames, "rdma-resource-names", o.RDMADeviceNames, "The name of the RDMA resource")
 	fs.BoolVar(&o.RequiredDeviceAffinity, "gpu-required-device-affinity", o.RequiredDeviceAffinity,
 		"required device affinity, and when true it will cause pods to admit fail if unable to meet device affinity")
 	fs.BoolVar(&o.EnableKubeletCheckpointFallback, "enable-kubelet-checkpoint-fallback", o.EnableKubeletCheckpointFallback,
 		"enable fallback to kubelet device plugin checkpoint for device allocation.")
+	fs.BoolVar(&o.VirtualGPUPrefersSpreading, "virtual-gpu-prefers-spreading",
+		o.VirtualGPUPrefersSpreading, "whether virtual GPU prefers spreading across devices")
+	fs.StringVar(&o.VirtualGPUMemoryWeightEnvName, "virtual-gpu-memory-weight-env-name",
+		o.VirtualGPUMemoryWeightEnvName, "The environment variable name for Virtual GPU memory weight")
+	fs.StringVar(&o.VirtualGPUComputeWeightEnvName, "virtual-gpu-compute-weight-env-name",
+		o.VirtualGPUComputeWeightEnvName, "The environment variable name for Virtual GPU compute weight")
+	fs.StringVar(&o.VirtualGPUTimesliceEnvName, "virtual-gpu-timeslice-env-name",
+		o.VirtualGPUTimesliceEnvName, "The environment variable name for Virtual GPU timeslice")
+	fs.StringVar(&o.VirtualGPUComputePolicyEnvName, "virtual-gpu-compute-policy-env-name",
+		o.VirtualGPUComputePolicyEnvName, "The environment variable name for Virtual GPU compute policy")
+	fs.StringVar(&o.GPUSelectionResultAnnotationKey, "gpu-selection-result-annotation-key",
+		o.GPUSelectionResultAnnotationKey, "The annotation key for GPU selection result")
+	fs.IntVar(&o.VirtualGPUTimesliceEnvValue, "virtual-gpu-timeslice-env-value",
+		o.VirtualGPUTimesliceEnvValue, "The environment variable value for Virtual GPU timeslice")
+	fs.IntVar(&o.VirtualGPUComputePolicyEnvValue, "virtual-gpu-compute-policy-env-value",
+		o.VirtualGPUComputePolicyEnvValue, "The environment variable value for Virtual GPU compute policy")
 	o.GPUStrategyOptions.AddFlags(fss)
 }
 
@@ -74,12 +107,25 @@ func (o *GPUOptions) ApplyTo(conf *qrmconfig.GPUQRMPluginConfig) error {
 		return err
 	}
 	conf.GPUMemoryAllocatablePerGPU = gpuMemory
+	milliGPU, err := resource.ParseQuantity(o.MilliGPUAllocatablePerGPU)
+	if err != nil {
+		return err
+	}
+	conf.MilliGPUAllocatablePerGPU = milliGPU
 	conf.SkipGPUStateCorruption = o.SkipGPUStateCorruption
 	conf.RDMADeviceNames = o.RDMADeviceNames
+	conf.RequiredDeviceAffinity = o.RequiredDeviceAffinity
+	conf.EnableKubeletCheckpointFallback = o.EnableKubeletCheckpointFallback
+	conf.VirtualGPUPrefersSpreading = o.VirtualGPUPrefersSpreading
+	conf.VirtualGPUMemoryWeightEnvName = o.VirtualGPUMemoryWeightEnvName
+	conf.VirtualGPUComputeWeightEnvName = o.VirtualGPUComputeWeightEnvName
+	conf.VirtualGPUTimesliceEnvName = o.VirtualGPUTimesliceEnvName
+	conf.VirtualGPUComputePolicyEnvName = o.VirtualGPUComputePolicyEnvName
+	conf.VirtualGPUTimesliceEnvValue = o.VirtualGPUTimesliceEnvValue
+	conf.VirtualGPUComputePolicyEnvValue = o.VirtualGPUComputePolicyEnvValue
+	conf.GPUSelectionResultAnnotationKey = o.GPUSelectionResultAnnotationKey
 	if err := o.GPUStrategyOptions.ApplyTo(conf.GPUStrategyConfig); err != nil {
 		return err
 	}
-	conf.RequiredDeviceAffinity = o.RequiredDeviceAffinity
-	conf.EnableKubeletCheckpointFallback = o.EnableKubeletCheckpointFallback
 	return nil
 }
@@ -42,6 +42,7 @@ type MemoryOptions struct {
 	EnableNonBindingShareCoresMemoryResourceCheck bool
 	EnableNUMAAllocationReactor                   bool
 	NUMABindResultResourceAllocationAnnotationKey string
+	ExtraMemoryResources                          []string
 
 	SockMemOptions
 	LogCacheOptions
@@ -157,6 +158,7 @@ func NewMemoryOptions() *MemoryOptions {
 			EnabledQoS:                 []string{apiconsts.PodAnnotationQoSLevelSharedCores},
 			MonGroupEnabledClosIDs:     []string{},
 		},
+		ExtraMemoryResources: []string{},
 	}
 }
 
@@ -235,6 +237,8 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
 		o.MonGroupEnabledClosIDs, "enabled-closid mon-groups")
 	fs.Float64Var(&o.MonGroupMaxCountRatio, "resctrl-mon-groups-max-count-ratio",
 		o.MonGroupMaxCountRatio, "ratio of mon_groups max count")
+	fs.StringSliceVar(&o.ExtraMemoryResources, "extra-memory-resources", o.ExtraMemoryResources,
+		"extra memory resources such as hugepages-*")
 }
 
 func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
@@ -273,6 +277,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
 	conf.EnabledQoS = o.EnabledQoS
 	conf.MonGroupEnabledClosIDs = o.MonGroupEnabledClosIDs
 	conf.MonGroupMaxCountRatio = o.MonGroupMaxCountRatio
+	conf.ExtraMemoryResources = o.ExtraMemoryResources
 
 	for _, reservation := range o.ReservedNumaMemory {
 		conf.ReservedNumaMemory[reservation.NumaNode] = reservation.Limits

@@ -20,7 +20,7 @@ require (
 	github.com/google/uuid v1.3.0
 	github.com/h2non/gock v1.2.0
 	github.com/klauspost/cpuid/v2 v2.2.6
-	github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e
+	github.com/kubewharf/katalyst-api v0.5.11-0.20260427134811-f4e50b5266f4
 	github.com/moby/sys/mountinfo v0.6.2
 	github.com/montanaflynn/stats v0.7.1
 	github.com/opencontainers/runc v1.1.6
@@ -176,6 +176,7 @@ require (
 )
 
 replace (
+	github.com/kubewharf/katalyst-api => github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0
 	k8s.io/api => k8s.io/api v0.24.6
 	k8s.io/apiextensions-apiserver => k8s.io/apiextensions-apiserver v0.24.6
 	k8s.io/apimachinery => k8s.io/apimachinery v0.24.6
@@ -197,7 +198,7 @@ replace (
 	k8s.io/kube-proxy => k8s.io/kube-proxy v0.24.6
 	k8s.io/kube-scheduler => k8s.io/kube-scheduler v0.24.6
 	k8s.io/kubectl => k8s.io/kubectl v0.24.6
-	k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2
+	k8s.io/kubelet => github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3
 	k8s.io/kubernetes => k8s.io/kubernetes v1.24.6
 	k8s.io/legacy-cloud-providers => k8s.io/legacy-cloud-providers v0.24.6
 	k8s.io/metrics => k8s.io/metrics v0.24.6

@@ -574,10 +574,8 @@ github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
 github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
 github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
 github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
-github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e h1:/0LP1rzsXEI09g3eqUZCJcyv/XGU8MofdFyi7NBtZA0=
-github.com/kubewharf/katalyst-api v0.5.11-0.20260423040236-f1a2330d266e/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
-github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2 h1:2KLMzgntDypiFJRX4fSQJCD+a6zIgHuhcAzd/7nAGmU=
-github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.2/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
+github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3 h1:oFwpVVeDESqgzkse3iSODzHRKX495VvUgslu2hhepCc=
+github.com/kubewharf/kubelet v1.24.6-kubewharf-pre.3/go.mod h1:MxbSZUx3wXztFneeelwWWlX7NAAStJ6expqq7gY2J3c=
 github.com/kyoh86/exportloopref v0.1.7/go.mod h1:h1rDl2Kdj97+Kwh4gdz3ujE7XHmH51Q0lUiZ1z4NLj8=
 github.com/lib/pq v1.0.0/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
 github.com/libopenstorage/openstorage v1.0.0/go.mod h1:Sp1sIObHjat1BeXhfMqLZ14wnOzEhNx2YQedreMcUyc=
@@ -587,6 +585,8 @@ github.com/lightstep/lightstep-tracer-go v0.18.1/go.mod h1:jlF1pusYV4pidLvZ+XD0U
 github.com/lithammer/dedent v1.1.0/go.mod h1:jrXYCQtgg0nJiN+StA2KgR7w6CiQNv9Fd/Z9BP0jIOc=
 github.com/logrusorgru/aurora v0.0.0-20181002194514-a7b3b318ed4e/go.mod h1:7rIyQOR62GCctdiQpZ/zOJlFyk6y+94wXzv6RNZgaR4=
 github.com/lpabon/godbc v0.1.1/go.mod h1:Jo9QV0cf3U6jZABgiJ2skINAXb9j8m51r07g4KI92ZA=
+github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0 h1:1xk6xC1H9gzGehH8WBeT/9K2PDOtL/PsViOo8pdZHlQ=
+github.com/luomingmeng/katalyst-api v0.0.0-20260428035600-507ef616baa0/go.mod h1:BZMVGVl3EP0eCn5xsDgV41/gjYkoh43abIYxrB10e3k=
 github.com/lyft/protoc-gen-validate v0.0.13/go.mod h1:XbGvPuh87YZc5TdIa2/I4pLk0QoUACkjt2znoq26NVQ=
 github.com/magiconair/properties v1.8.0/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=
 github.com/magiconair/properties v1.8.1/go.mod h1:PppfXfuXeibc/6YijjN8zIbojt8czPbwD3XqdrwzmxQ=

@@ -145,8 +145,8 @@ func (am *AllocationMeta) GetSpecifiedNUMABindingNUMAID() (int, error) {
 	return GetSpecifiedNUMABindingNUMAID(am.Annotations)
 }
 
-// SetSpecifiedNUMABindingNUMAID set the numa id for AllocationInfo
-func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
+// SetSpecifiedNUMABindingNUMAID set the numa ids for AllocationInfo
+func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaIDs []uint64) {
 	if am == nil {
 		return
 	}
@@ -155,7 +155,12 @@ func (am *AllocationMeta) SetSpecifiedNUMABindingNUMAID(numaID uint64) {
 		am.Annotations = make(map[string]string)
 	}
 
-	am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(int(numaID)).String()
+	intIDs := make([]int, len(numaIDs))
+	for i, id := range numaIDs {
+		intIDs[i] = int(id)
+	}
+
+	am.Annotations[cpuconsts.CPUStateAnnotationKeyNUMAHint] = machine.NewCPUSet(intIDs...).String()
 }
 
 // GetSpecifiedNUMABindingPoolName get numa_binding pool name
@@ -316,3 +321,14 @@ func (am *AllocationMeta) CheckDedicatedPool() bool {
 	}
 	return am.OwnerPoolName == PoolNameDedicated
 }
+
+// CheckDistributeEvenlyAcrossNuma returns true if the AllocationInfo is for pod with distribute evenly across numa
+// annotation enabled.
+func (am *AllocationMeta) CheckDistributeEvenlyAcrossNuma() bool {
+	if am == nil {
+		return false
+	}
+
+	return am.Annotations[consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNuma] ==
+		consts.PodAnnotationCPUEnhancementDistributeEvenlyAcrossNumaEnable
+}
@@ -284,7 +284,7 @@ func (p *DynamicPolicy) reclaimedCoresAllocationHandler(ctx context.Context,
 			// set reclaimed numa_binding NUMA ID to allocationInfo
 			if req.Hint != nil && len(req.Hint.Nodes) == 1 && (reclaimActualBindingNUMAs.Contains(int(req.Hint.Nodes[0])) ||
 				!nonReclaimActualBindingNUMAs.Equals(machine.NewCPUSet(int(req.Hint.Nodes[0])))) {
-				allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
+				allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
 			}
 		}
 
@@ -470,7 +470,7 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingAllocationHandler(ctx conte
 			return nil, fmt.Errorf("numa binding without numa exclusive allocation result numa node size is %d, "+
 				"not equal to 1", len(req.Hint.Nodes))
 		}
-		allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes[0])
+		allocationInfo.SetSpecifiedNUMABindingNUMAID(req.Hint.Nodes)
 	}
 
 	// update pod entries directly.
@@ -741,7 +741,7 @@ func (p *DynamicPolicy) allocateSharedNumaBindingCPUs(req *pluginapi.ResourceReq
 		InitTimestamp:   time.Now().Format(util.QRMTimeFormat),
 		RequestQuantity: reqFloat64,
 	}
-	allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes[0])
+	allocationInfo.SetSpecifiedNUMABindingNUMAID(hint.Nodes)
 
 	if util.PodInplaceUpdateResizing(req) {
 		originAllocationInfo := p.state.GetAllocationInfo(allocationInfo.PodUid, allocationInfo.ContainerName)

@@ -277,7 +277,7 @@ func TestAllocateSharedNumaBindingCPUs(t *testing.T) {
 				0: machine.NewCPUSet(0, 1),
 			},
 		}
-		originAllocationInfo.SetSpecifiedNUMABindingNUMAID(0)
+		originAllocationInfo.SetSpecifiedNUMABindingNUMAID([]uint64{0})
 
 		policy.state.SetAllocationInfo(podUID, containerName, originAllocationInfo, false)
 

@@ -161,7 +161,9 @@ func (p *DynamicPolicy) dedicatedCoresWithNUMABindingHintHandler(_ context.Conte
 			(*commonstate.AllocationMeta).CheckDedicatedNUMABindingNUMAExclusive))
 
 		var extraErr error
-		hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, string(v1.ResourceCPU), p.extraStateFileAbsPath, availableNUMAs)
+		hints, extraErr = util.GetHintsFromExtraStateFile(req.PodName, p.extraStateFileAbsPath, availableNUMAs, []v1.ResourceName{
+			v1.ResourceCPU,
+		})
 		if extraErr != nil {
 			general.Infof("pod: %s/%s, container: %s GetHintsFromExtraStateFile failed with error: %v",
 				req.PodNamespace, req.PodName, req.ContainerName, extraErr)
@@ -303,10 +305,10 @@ func (p *DynamicPolicy) calculateHints(
 		maskCount := mask.Count()
 		if maskCount < minNUMAsCountNeeded {
 			return
-		} else if numaBinding && !numaExclusive && numaNumber <= 1 && maskCount > 1 {
+		} else if numaBinding && !numaExclusive && maskCount > 1 && numaNumber <= 1 {
 			// because it's hard to control memory allocation accurately,
 			// we only support numa_binding but not exclusive container with request smaller than 1 NUMA
-			// pods with distribute evenly across numa annotation can occupy more than 1 NUMA
+			// pods with numa number more than 1 can occupy more than 1 NUMA
 			return
 		}
 
@@ -371,7 +373,6 @@ func (p *DynamicPolicy) calculateHints(
 	if numaNumber != 0 {
 		minAffinitySize = numaNumber
 	}
-
 	// Update hint to be preferred if they have minimum number of NUMA nodes
 	for _, hint := range availableNumaHints {
 		if len(hint.Nodes) == minAffinitySize {