Skip to content

Commit c69b8d5

Browse files
authored
Merge branch 'main' into hbc/karpenter-helmchart
2 parents 42a6f16 + a468304 commit c69b8d5

28 files changed

Lines changed: 1556 additions & 370 deletions

File tree

karpenter/cmd/controller/main.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ func main() {
8585
}, "azure")
8686
}
8787

88+
clusterVersion := lo.Must(utilsk8s.RetrieveClusterVersion(op.GetConfig()))
8889
clusterCA := lo.Must(utilsk8s.RetrieveClusterCA(op.GetConfig()))
8990

9091
// nebius cloud provider...
@@ -97,6 +98,7 @@ func main() {
9798
hubCloudProvider,
9899
flexoptions.MustNewNebiusSDK(ctx),
99100
op.GetClient(),
101+
clusterVersion,
100102
clusterCA,
101103
wgAlloc,
102104
)

karpenter/examples/nebius/cpu_nodepool.yaml

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,12 @@ spec:
1616
- key: karpenter.azure.com/sku-cpu
1717
operator: Gt
1818
values: ["8"]
19+
startupTaints:
20+
# NOTE: this start up taint is required to tell karpenter to avoid
21+
# adding the cilium taint for scheduling decision.
22+
- key: "node.cilium.io/agent-not-ready"
23+
effect: "NoSchedule"
24+
value: "true"
1925
limits:
2026
cpu: "1000"
21-
memory: 1000Gi
27+
memory: 1000Gi

karpenter/examples/nebius/gpu_nodepool.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,12 @@ spec:
1717
- key: kubernetes.io/os
1818
operator: In
1919
values: ["linux"]
20+
startupTaints:
21+
# NOTE: this start up taint is required to tell karpenter to avoid
22+
# adding the cilium taint for scheduling decision.
23+
- key: "node.cilium.io/agent-not-ready"
24+
effect: "NoSchedule"
25+
value: "true"
2026
limits:
2127
cpu: "1000"
2228
memory: 10000Gi

karpenter/go.mod

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,12 @@ require (
1010
github.com/go-logr/zapr v1.3.0
1111
github.com/nebius/gosdk v0.0.0-20260218100913-7fb27c45819a
1212
github.com/samber/lo v1.52.0
13+
golang.org/x/sync v0.19.0
1314
google.golang.org/grpc v1.79.1
14-
google.golang.org/protobuf v1.36.11
1515
k8s.io/api v0.35.1
1616
k8s.io/apimachinery v0.35.1
1717
k8s.io/client-go v0.35.1
18+
k8s.io/utils v0.0.0-20260108192941-914a6e750570
1819
sigs.k8s.io/controller-runtime v0.23.1
1920
sigs.k8s.io/karpenter v1.7.1
2021
)
@@ -166,13 +167,13 @@ require (
166167
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
167168
golang.org/x/net v0.49.0 // indirect
168169
golang.org/x/oauth2 v0.34.0 // indirect
169-
golang.org/x/sync v0.19.0 // indirect
170170
golang.org/x/sys v0.40.0 // indirect
171171
golang.org/x/term v0.39.0 // indirect
172172
golang.org/x/text v0.33.0 // indirect
173173
golang.org/x/time v0.14.0 // indirect
174174
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
175175
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
176+
google.golang.org/protobuf v1.36.11 // indirect
176177
gopkg.in/dnaeon/go-vcr.v3 v3.2.0 // indirect
177178
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
178179
gopkg.in/inf.v0 v0.9.1 // indirect
@@ -186,7 +187,6 @@ require (
186187
k8s.io/csi-translation-lib v0.35.0 // indirect
187188
k8s.io/klog/v2 v2.130.1 // indirect
188189
k8s.io/kube-openapi v0.0.0-20260127142750-a19766b6e2d4 // indirect
189-
k8s.io/utils v0.0.0-20260108192941-914a6e750570 // indirect
190190
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.14.3 // indirect
191191
sigs.k8s.io/cluster-api v1.12.3 // indirect
192192
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect

karpenter/pkg/apis/crds/flex.aks.azure.com_nebiusnodeclasses.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,13 @@ spec:
6161
description: OSDiskSizeGB is the size of the OS disk in GB.
6262
format: int32
6363
type: integer
64+
maxPodsPerNode:
65+
default: 110
66+
description: |-
67+
MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node.
68+
This value is advertised in the node's capacity and affects Karpenter's scheduling decisions.
69+
format: int32
70+
type: integer
6471
projectID:
6572
description: ProjectID is the nebius project id to launch nodes in.
6673
type: string

karpenter/pkg/apis/v1alpha1/nebius.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,12 @@ type NebiusNodeClassSpec struct {
7676
// +optional
7777
WireguardPeerCIDR *string `json:"wireguardPeerCIDR,omitempty"`
7878

79+
// MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node.
80+
// This value is advertised in the node's capacity and affects Karpenter's scheduling decisions.
81+
// +default=110
82+
// +optional
83+
MaxPodsPerNode *int32 `json:"maxPodsPerNode,omitempty"`
84+
7985
// TODO: other fields (kublet etc)
8086
}
8187

karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go

Lines changed: 5 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

karpenter/pkg/cloudproviders/nebius/api.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,5 +21,6 @@ func IsQuotaError(err error) bool {
2121
// FIXME: check how to identify the error using nebius sdk
2222
errString := err.Error()
2323
return strings.Contains(errString, "Quota failure") ||
24-
strings.Contains(errString, "Not enough resources")
24+
strings.Contains(errString, "Not enough resources") ||
25+
strings.Contains(errString, "insufficient capacity") // example: insufficient capacity, rpc error: code = ResourceExhausted
2526
}

0 commit comments

Comments
 (0)