Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions karpenter/cmd/controller/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ func main() {
}, "azure")
}

clusterVersion := lo.Must(utilsk8s.RetrieveClusterVersion(op.GetConfig()))
clusterCA := lo.Must(utilsk8s.RetrieveClusterCA(op.GetConfig()))

// nebius cloud provider...
Expand All @@ -97,6 +98,7 @@ func main() {
hubCloudProvider,
flexoptions.MustNewNebiusSDK(ctx),
op.GetClient(),
clusterVersion,
clusterCA,
wgAlloc,
)
Expand Down
8 changes: 7 additions & 1 deletion karpenter/examples/nebius/cpu_nodepool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,12 @@ spec:
- key: karpenter.azure.com/sku-cpu
operator: Gt
values: ["8"]
startupTaints:
# NOTE: this start up taint is required to tell karpenter to avoid
# adding the cilium taint for scheduling decision.
- key: "node.cilium.io/agent-not-ready"
effect: "NoSchedule"
value: "true"
limits:
cpu: "1000"
memory: 1000Gi
memory: 1000Gi
6 changes: 6 additions & 0 deletions karpenter/examples/nebius/gpu_nodepool.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ spec:
- key: kubernetes.io/os
operator: In
values: ["linux"]
startupTaints:
# NOTE: this start up taint is required to tell karpenter to avoid
# adding the cilium taint for scheduling decision.
- key: "node.cilium.io/agent-not-ready"
effect: "NoSchedule"
value: "true"
limits:
cpu: "1000"
memory: 10000Gi
Expand Down
6 changes: 3 additions & 3 deletions karpenter/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,12 @@ require (
github.com/go-logr/zapr v1.3.0
github.com/nebius/gosdk v0.0.0-20260218100913-7fb27c45819a
github.com/samber/lo v1.52.0
golang.org/x/sync v0.19.0
google.golang.org/grpc v1.79.1
google.golang.org/protobuf v1.36.11
k8s.io/api v0.35.1
k8s.io/apimachinery v0.35.1
k8s.io/client-go v0.35.1
k8s.io/utils v0.0.0-20260108192941-914a6e750570
sigs.k8s.io/controller-runtime v0.23.1
sigs.k8s.io/karpenter v1.7.1
)
Expand Down Expand Up @@ -166,13 +167,13 @@ require (
golang.org/x/exp v0.0.0-20250305212735-054e65f0b394 // indirect
golang.org/x/net v0.49.0 // indirect
golang.org/x/oauth2 v0.34.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/term v0.39.0 // indirect
golang.org/x/text v0.33.0 // indirect
golang.org/x/time v0.14.0 // indirect
gomodules.xyz/jsonpatch/v2 v2.5.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20251202230838-ff82c1b0f217 // indirect
google.golang.org/protobuf v1.36.11 // indirect
gopkg.in/dnaeon/go-vcr.v3 v3.2.0 // indirect
gopkg.in/evanphx/json-patch.v4 v4.13.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
Expand All @@ -186,7 +187,6 @@ require (
k8s.io/csi-translation-lib v0.35.0 // indirect
k8s.io/klog/v2 v2.130.1 // indirect
k8s.io/kube-openapi v0.0.0-20260127142750-a19766b6e2d4 // indirect
k8s.io/utils v0.0.0-20260108192941-914a6e750570 // indirect
sigs.k8s.io/cloud-provider-azure/pkg/azclient v0.14.3 // indirect
sigs.k8s.io/cluster-api v1.12.3 // indirect
sigs.k8s.io/json v0.0.0-20250730193827-2d320260d730 // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,13 @@ spec:
description: OSDiskSizeGB is the size of the OS disk in GB.
format: int32
type: integer
maxPodsPerNode:
default: 110
description: |-
MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node.
This value is advertised in the node's capacity and affects Karpenter's scheduling decisions.
format: int32
type: integer
projectID:
description: ProjectID is the nebius project id to launch nodes in.
type: string
Expand Down
6 changes: 6 additions & 0 deletions karpenter/pkg/apis/v1alpha1/nebius.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,12 @@ type NebiusNodeClassSpec struct {
// +optional
WireguardPeerCIDR *string `json:"wireguardPeerCIDR,omitempty"`

// MaxPodsPerNode is the maximum number of pods that can be scheduled on a single node.
// This value is advertised in the node's capacity and affects Karpenter's scheduling decisions.
// +default=110
// +optional
MaxPodsPerNode *int32 `json:"maxPodsPerNode,omitempty"`

// TODO: other fields (kublet etc)
}

Expand Down
5 changes: 5 additions & 0 deletions karpenter/pkg/apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion karpenter/pkg/cloudproviders/nebius/api.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,6 @@ func IsQuotaError(err error) bool {
// FIXME: check how to identify the error using nebius sdk
errString := err.Error()
return strings.Contains(errString, "Quota failure") ||
strings.Contains(errString, "Not enough resources")
strings.Contains(errString, "Not enough resources") ||
strings.Contains(errString, "insufficient capacity") // example: insufficient capacity, rpc error: code = ResourceExhausted
}
Loading