@@ -16,12 +16,53 @@ limitations under the License.
1616
1717package v1alpha1
1818
19- import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20-
21- // PoolPolicy defines the proactive provisioning policy for idle launcher pods.
19+ import (
20+ "k8s.io/apimachinery/pkg/api/resource"
21+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+ )
23+
24+ // LauncherPoolPolicy defines the proactive provisioning policy for the idle launcher pods.
25+ // The LauncherPoolPolicy semantics should be defined as below:
26+ //
27+ // ## Multiple LauncherPoolPolicy Objects
28+ //
29+ // 1. **Additive Semantics**
30+ // - Multiple LauncherPoolPolicy objects follow additive semantics
31+ // - All policies across all objects are evaluated together
32+ // - Rules from different objects are combined to form the complete policy set
33+ //
34+ // 2. **Zero Objects Behavior**
35+ // - When no LauncherPoolPolicy objects exist, no proactive provisioning occurs
36+ // - System falls back to on-demand launcher pod creation
37+ //
38+ // ## Multiple LauncherPoolForNodeType Matching
39+ //
40+ // 1. **Multiple Matches**
41+ // - When multiple LauncherPoolForNodeType structs match a single Node (across same or different LauncherPoolPolicy objects):
42+ // - For each unique combination of (Node, Accelerator, LauncherConfig), select the highest LauncherCount value
43+ // - This forms the effective policy for that specific tuple
44+ //
45+ // 2. **Zero Matches**
46+ // - When no LauncherPoolForNodeType matches a Node:
47+ // - No pre-provisioning policy applies to that Node
48+ // - Launcher pods for that Node are created on-demand
49+ //
50+ // ## Multiple CountForLauncher with Same LauncherConfig
51+ //
52+ // 1. **Duplicate LauncherConfig Names**
53+ // - When multiple CountForLauncher structs specify the same LauncherConfigName for the same (Node, Accelerator) combination:
54+ // - Select the highest LauncherCount value among all matching entries
55+ // - This determines the target pool size for that LauncherConfig on that Node with that Accelerator
56+ //
57+ // 2. **Zero Matching CountForLauncher**
58+ // - When no CountForLauncher matches a specific (Node, Accelerator, LauncherConfig) tuple:
59+ // - No pre-provisioning occurs for that specific combination
60+ // - Launcher pods are created on-demand when needed
61+ //
62+ // +genclient
2263// +kubebuilder:object:root=true
2364// +kubebuilder:subresource:status
24- // +kubebuilder:resource:path=launcherpoolpolicies,scope=Cluster
65+ // +kubebuilder:resource:shortName=lpp
2566
2667type LauncherPoolPolicy struct {
2768 metav1.TypeMeta `json:",inline"`
@@ -60,29 +101,32 @@ type LauncherPoolForNodeType struct {
60101 // memory: ">=200Gi"
61102 // nvidia.com/gpu: ">=1"
62103 // cpu: ">=32"
63- // # Optional: also support capacity (if total capacity—not just allocatable—is desired)
64- // # capacity:
65- // # memory: ">=256Gi"
66104 EnhancedNodeSelector EnhancedNodeSelector `json:"enhancedNodeSelector"`
67105
106+ // PerAcceleratorCount defines pre-configuration quantities for each accelerator type
107+ PerAcceleratorCount []PerAcceleratorCount `json:"perAcceleratorCount"`
108+ }
109+
110+ // PerAcceleratorCount defines configuration for specific accelerators
111+ type PerAcceleratorCount struct {
112+ // AcceleratorSelector accelerator selector
113+ AcceleratorSelector AcceleratorSelector `json:"acceleratorSelector"`
114+
68115 // CountForLauncher is the total number of launcher for each LauncherConfig
69- // to maintain on each matching node.
70- // If two different counts are specified for the same (Node, LauncherConfig),
116+ // to maintain on each matching node per accelerator .
117+ // If two different counts are specified for the same (Node, Accelerator, LauncherConfig),
71118 // the higher count is used and will be populated into LauncherPoolPolicyStatus.
72- // If no CountForLauncher applies to a given (Node, LauncherConfig), this Node
119+ // If no CountForLauncher applies to a given (Node, Accelerator, LauncherConfig), this Node
73120 // will be ignored for this LauncherConfig.
74121 // +kubebuilder:validation:MinItems=1
75122 CountForLauncher []CountForLauncher `json:"countForLauncher"`
76123}
77124
78- // ResourceRequirementSpec defines resource requirements for a node.
79- type ResourceRequirementSpec struct {
125+ // ResourceRequirements defines resource requirements for a node.
126+ type ResourceRequirements struct {
80127 // Allocatable defines the allocatable resources for a node.
81128 // +kubebuilder:validation:Required
82- Allocatable map [string ]string `json:"allocatable,omitempty"`
83- // Capacity defines the capacity resources for a node.
84- // +kubebuilder:validation:Optional
85- Capacity map [string ]string `json:"capacity,omitempty"`
129+ Allocatable map [string ]resource.Quantity `json:"allocatable,omitempty"`
86130}
87131
88132// EnhancedNodeSelector defines node selector with label selector and resource requirements.
@@ -92,11 +136,25 @@ type EnhancedNodeSelector struct {
92136 LabelSelector * metav1.LabelSelector `json:"labelSelector,omitempty"`
93137 // ResourceRequirements defines the resource requirements for a node.
94138 // +kubebuilder:validation:Optional
95- ResourceRequirements * ResourceRequirementSpec `json:"resourceRequirements,omitempty"`
139+ ResourceRequirements * ResourceRequirements `json:"resourceRequirements,omitempty"`
140+ // AcceleratorSelector defines accelerator-specific selection criteria
141+ AcceleratorSelector * AcceleratorSelector `json:"acceleratorSelector,omitempty"`
142+ }
143+
144+ // AcceleratorSelector defines accelerator selection criteria
145+ type AcceleratorSelector struct {
146+ // Type specifies accelerator type (e.g., nvidia.com/gpu)
147+ Type string `json:"type,omitempty"`
148+
149+ // Memory specifies accelerator memory size requirement
150+ Memory * resource.Quantity `json:"memory,omitempty"`
151+
152+ // Count specifies required number of accelerators
153+ Count * int32 `json:"count,omitempty"`
96154}
97155
98156type CountForLauncher struct {
99- // LauncherConfigName references the name of the LauncherConfig this policy applies to.
157+ // LauncherConfigName is the name of the LauncherConfig this policy applies to.
100158 // +kubebuilder:validation:Required
101159 LauncherConfigName string `json:"launcherConfigName"`
102160
@@ -108,7 +166,7 @@ type CountForLauncher struct {
108166type LauncherPoolPolicyStatus struct {
109167 // `observedGeneration` is the `metadata.generation` last seen by the controller.
110168 // +optional
111- ObservedGeneration int32 `json:"observedGeneration,omitempty"`
169+ ObservedGeneration int64 `json:"observedGeneration,omitempty"`
112170
113171 // `errors` reports problems seen in the desired state of this object;
114172 // in particular, in the version reported by `observedGeneration`.
@@ -124,7 +182,3 @@ type LauncherPoolPolicyList struct {
124182 metav1.ListMeta `json:"metadata,omitempty"`
125183 Items []LauncherPoolPolicy `json:"items"`
126184}
127-
128- func init () {
129- SchemeBuilder .Register (& LauncherPoolPolicy {}, & LauncherPoolPolicyList {})
130- }
0 commit comments