Skip to content

Commit a701c33

Browse files
committed
Refine LauncherPoolPolicy struct
1 parent 5ed08a0 commit a701c33

23 files changed

+1558
-38
lines changed

api/v1alpha1/launcherpoolpolicy_types.go renamed to api/fma/v1alpha1/launcherpoolpolicy_types.go

Lines changed: 77 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,53 @@ limitations under the License.
1616

1717
package v1alpha1
1818

19-
import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20-
21-
// PoolPolicy defines the proactive provisioning policy for idle launcher pods.
19+
import (
20+
"k8s.io/apimachinery/pkg/api/resource"
21+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
22+
)
23+
24+
// LauncherPoolPolicy defines the proactive provisioning policy for the idle launcher pods.
25+
// The LauncherPoolPolicy semantics should be defined as below:
26+
//
27+
// ## Multiple LauncherPoolPolicy Objects
28+
//
29+
// 1. **Additive Semantics**
30+
// - Multiple LauncherPoolPolicy objects follow additive semantics
31+
// - All policies across all objects are evaluated together
32+
// - Rules from different objects are combined to form the complete policy set
33+
//
34+
// 2. **Zero Objects Behavior**
35+
// - When no LauncherPoolPolicy objects exist, no proactive provisioning occurs
36+
// - System falls back to on-demand launcher pod creation
37+
//
38+
// ## Multiple LauncherPoolForNodeType Matching
39+
//
40+
// 1. **Multiple Matches**
41+
// - When multiple LauncherPoolForNodeType structs match a single Node (across same or different LauncherPoolPolicy objects):
42+
// - For each unique combination of (Node, Accelerator, LauncherConfig), select the highest LauncherCount value
43+
// - This forms the effective policy for that specific tuple
44+
//
45+
// 2. **Zero Matches**
46+
// - When no LauncherPoolForNodeType matches a Node:
47+
// - No pre-provisioning policy applies to that Node
48+
// - Launcher pods for that Node are created on-demand
49+
//
50+
// ## Multiple CountForLauncher with Same LauncherConfig
51+
//
52+
// 1. **Duplicate LauncherConfig Names**
53+
// - When multiple CountForLauncher structs specify the same LauncherConfigName for the same (Node, Accelerator) combination:
54+
// - Select the highest LauncherCount value among all matching entries
55+
// - This determines the target pool size for that LauncherConfig on that Node with that Accelerator
56+
//
57+
// 2. **Zero Matching CountForLauncher**
58+
// - When no CountForLauncher matches a specific (Node, Accelerator, LauncherConfig) tuple:
59+
// - No pre-provisioning occurs for that specific combination
60+
// - Launcher pods are created on-demand when needed
61+
//
62+
// +genclient
2263
// +kubebuilder:object:root=true
2364
// +kubebuilder:subresource:status
24-
// +kubebuilder:resource:path=launcherpoolpolicies,scope=Cluster
65+
// +kubebuilder:resource:shortName=lpp
2566

2667
type LauncherPoolPolicy struct {
2768
metav1.TypeMeta `json:",inline"`
@@ -60,29 +101,32 @@ type LauncherPoolForNodeType struct {
60101
// memory: ">=200Gi"
61102
// nvidia.com/gpu: ">=1"
62103
// cpu: ">=32"
63-
// # Optional: also support capacity (if total capacity—not just allocatable—is desired)
64-
// # capacity:
65-
// # memory: ">=256Gi"
66104
EnhancedNodeSelector EnhancedNodeSelector `json:"enhancedNodeSelector"`
67105

106+
// PerAcceleratorCount defines pre-configuration quantities for each accelerator type
107+
PerAcceleratorCount []PerAcceleratorCount `json:"perAcceleratorCount"`
108+
}
109+
110+
// PerAcceleratorCount defines configuration for specific accelerators
111+
type PerAcceleratorCount struct {
112+
// AcceleratorSelector accelerator selector
113+
AcceleratorSelector AcceleratorSelector `json:"acceleratorSelector"`
114+
68115
// CountForLauncher is the total number of launcher for each LauncherConfig
69-
// to maintain on each matching node.
70-
// If two different counts are specified for the same (Node, LauncherConfig),
116+
// to maintain on each matching node per accelerator.
117+
// If two different counts are specified for the same (Node, Accelerator, LauncherConfig),
71118
// the higher count is used and will be populated into LauncherPoolPolicyStatus.
72-
// If no CountForLauncher applies to a given (Node, LauncherConfig), this Node
119+
// If no CountForLauncher applies to a given (Node, Accelerator, LauncherConfig), this Node
73120
// will be ignored for this LauncherConfig.
74121
// +kubebuilder:validation:MinItems=1
75122
CountForLauncher []CountForLauncher `json:"countForLauncher"`
76123
}
77124

78-
// ResourceRequirementSpec defines resource requirements for a node.
79-
type ResourceRequirementSpec struct {
125+
// ResourceRequirements defines resource requirements for a node.
126+
type ResourceRequirements struct {
80127
// Allocatable defines the allocatable resources for a node.
81128
// +kubebuilder:validation:Required
82-
Allocatable map[string]string `json:"allocatable,omitempty"`
83-
// Capacity defines the capacity resources for a node.
84-
// +kubebuilder:validation:Optional
85-
Capacity map[string]string `json:"capacity,omitempty"`
129+
Allocatable map[string]resource.Quantity `json:"allocatable,omitempty"`
86130
}
87131

88132
// EnhancedNodeSelector defines node selector with label selector and resource requirements.
@@ -92,11 +136,25 @@ type EnhancedNodeSelector struct {
92136
LabelSelector *metav1.LabelSelector `json:"labelSelector,omitempty"`
93137
// ResourceRequirements defines the resource requirements for a node.
94138
// +kubebuilder:validation:Optional
95-
ResourceRequirements *ResourceRequirementSpec `json:"resourceRequirements,omitempty"`
139+
ResourceRequirements *ResourceRequirements `json:"resourceRequirements,omitempty"`
140+
// AcceleratorSelector defines accelerator-specific selection criteria
141+
AcceleratorSelector *AcceleratorSelector `json:"acceleratorSelector,omitempty"`
142+
}
143+
144+
// AcceleratorSelector defines accelerator selection criteria
145+
type AcceleratorSelector struct {
146+
// Type specifies accelerator type (e.g., nvidia.com/gpu)
147+
Type string `json:"type,omitempty"`
148+
149+
// Memory specifies accelerator memory size requirement
150+
Memory *resource.Quantity `json:"memory,omitempty"`
151+
152+
// Count specifies required number of accelerators
153+
Count *int32 `json:"count,omitempty"`
96154
}
97155

98156
type CountForLauncher struct {
99-
// LauncherConfigName references the name of the LauncherConfig this policy applies to.
157+
// LauncherConfigName is the name of the LauncherConfig this policy applies to.
100158
// +kubebuilder:validation:Required
101159
LauncherConfigName string `json:"launcherConfigName"`
102160

@@ -108,7 +166,7 @@ type CountForLauncher struct {
108166
type LauncherPoolPolicyStatus struct {
109167
// `observedGeneration` is the `metadata.generation` last seen by the controller.
110168
// +optional
111-
ObservedGeneration int32 `json:"observedGeneration,omitempty"`
169+
ObservedGeneration int64 `json:"observedGeneration,omitempty"`
112170

113171
// `errors` reports problems seen in the desired state of this object;
114172
// in particular, in the version reported by `observedGeneration`.
@@ -124,7 +182,3 @@ type LauncherPoolPolicyList struct {
124182
metav1.ListMeta `json:"metadata,omitempty"`
125183
Items []LauncherPoolPolicy `json:"items"`
126184
}
127-
128-
func init() {
129-
SchemeBuilder.Register(&LauncherPoolPolicy{}, &LauncherPoolPolicyList{})
130-
}

0 commit comments

Comments
 (0)