Skip to content

Commit 37265c7

Browse files
feat: implement CAPI failure domain contract for zones
Map existing CAPMOX zones to CAPI failure domains so that KubeadmControlPlane automatically distributes control plane nodes across Proxmox zones. API changes: - Add Nodes []string to ZoneConfigSpec for per-zone node lists - Add ControlPlane *bool to ZoneConfigSpec (defaults to true) - Add FailureDomains to ProxmoxClusterStatus (CAPI v1beta2 contract) - Add spec.failureDomain to ProxmoxMachineSpec (InfraMachine contract) - Add GetZoneNodes() helper on ProxmoxCluster - Add FailureDomainNotReady condition reason Controller changes: - Add reconcileFailureDomains in cluster controller (sorted by name) - Read Machine.Spec.FailureDomain in machine controller with retryable FailureDomainNotReady condition - Use effectiveZone/effectiveAllowedNodes on MachineScope to avoid spec mutation from the controller - Pass zone override to GetInClusterPools for IPAM pool selection - Set Zone on NodeLocation in createVM - Update scheduler to use GetEffectiveAllowedNodes() Conversion: - Restore Status.FailureDomains and Spec.FailureDomain in v1alpha1 conversion webhook - Add manual conversion stub for ProxmoxMachineSpec.FailureDomain Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7d3d55d commit 37265c7

24 files changed

Lines changed: 528 additions & 22 deletions

api/v1alpha1/conversion.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -492,6 +492,14 @@ func Convert_v1alpha2_ProxmoxMachineTemplateResource_To_v1alpha1_ProxmoxMachineT
492492
return Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(&in.Spec, &out.Spec, s)
493493
}
494494

495+
// Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec handles
496+
// the lossy conversion of ProxmoxMachineSpec from v1alpha2 to v1alpha1.
497+
// The FailureDomain field is intentionally dropped (it does not exist in v1alpha1
498+
// and is restored from annotation on ConvertTo).
499+
func Convert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in *v1alpha2.ProxmoxMachineSpec, out *ProxmoxMachineSpec, s conversion.Scope) error {
500+
return autoConvert_v1alpha2_ProxmoxMachineSpec_To_v1alpha1_ProxmoxMachineSpec(in, out, s)
501+
}
502+
495503
func Convert_v1beta1_ObjectMeta_To_v1beta2_ObjectMeta(in *clusterv1beta1.ObjectMeta, out *clusterv1beta2.ObjectMeta, s conversion.Scope) error {
496504
if err := clusterv1beta1.Convert_v1beta1_ObjectMeta_To_v1beta2_ObjectMeta(in, out, s); err != nil {
497505
return err

api/v1alpha1/proxmoxcluster_conversion.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ func (src *ProxmoxCluster) ConvertTo(dstRaw conversion.Hub) error {
4141
// Restore lossy fields
4242
dst.Spec.ZoneConfigs = restored.Spec.ZoneConfigs
4343
dst.Status.InClusterZoneRef = restored.Status.InClusterZoneRef
44+
dst.Status.FailureDomains = restored.Status.FailureDomains
4445

4546
clusterv1.Convert_bool_To_Pointer_bool(src.Spec.ExternalManagedControlPlane, ok, restored.Spec.ExternalManagedControlPlane, &dst.Spec.ExternalManagedControlPlane)
4647

api/v1alpha1/proxmoxmachine_conversion.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,9 @@ func (src *ProxmoxMachine) ConvertTo(dstRaw conversion.Hub) error {
4141

4242
restoreProxmoxMachineSpec(&src.Spec, &dst.Spec, &restored.Spec, ok)
4343

44+
// Restore FailureDomain (v1alpha2-only field, set by CAPI machine controller).
45+
dst.Spec.FailureDomain = restored.Spec.FailureDomain
46+
4447
clusterv1.Convert_bool_To_Pointer_bool(src.Status.Ready, ok,
4548
restored.Status.Initialization.Provisioned,
4649
&dst.Status.Initialization.Provisioned)

api/v1alpha1/zz_generated.conversion.go

Lines changed: 7 additions & 10 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

api/v1alpha2/conditions_consts.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,12 @@ const (
9494
// documents a ProxmoxMachine assigning host addresses for Cluster API.
9595
ProxmoxMachineVirtualMachineProvisionedWaitingForClusterAPIMachineAddressesReason = "WaitingForClusterAPIMachineAddresses"
9696

97+
// ProxmoxMachineVirtualMachineProvisionedFailureDomainNotReadyReason documents
98+
// a ProxmoxMachine waiting for its failure domain (zone) to be configured
99+
// in the ProxmoxCluster. This is a transient condition that resolves when
100+
// the zone is added to spec.zoneConfig.
101+
ProxmoxMachineVirtualMachineProvisionedFailureDomainNotReadyReason = "FailureDomainNotReady"
102+
97103
// ProxmoxMachineVirtualMachineProvisionedVMProvisionFailedReason documents a failure
98104
// during virtual machine provisioning.
99105
ProxmoxMachineVirtualMachineProvisionedVMProvisionFailedReason = "VMProvisionFailed"

api/v1alpha2/proxmoxcluster_types.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
corev1 "k8s.io/api/core/v1"
2525
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2626
"k8s.io/utils/ptr"
27+
clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2"
2728
"sigs.k8s.io/controller-runtime/pkg/client"
2829
)
2930

@@ -148,6 +149,18 @@ type ZoneConfigSpec struct {
148149
// +listType=set
149150
// +kubebuilder:validation:MinItems=1
150151
DNSServers []string `json:"dnsServers,omitempty"`
152+
153+
// nodes specifies the Proxmox nodes that belong to this zone.
154+
// When set, machines assigned to this failure domain will only
155+
// be placed on these nodes.
156+
// +optional
157+
// +listType=set
158+
Nodes []string `json:"nodes,omitempty"`
159+
160+
// controlPlane indicates whether this zone is eligible for control plane machines.
161+
// Defaults to true when not set.
162+
// +optional
163+
ControlPlane *bool `json:"controlPlane,omitempty"`
151164
}
152165

153166
// IPConfigSpec contains information about available IP config.
@@ -232,6 +245,14 @@ type ProxmoxClusterStatus struct {
232245
// for different machines.
233246
// +optional
234247
NodeLocations *NodeLocations `json:"nodeLocations,omitempty"`
248+
249+
// failureDomains is a slice of failure domains synced from zone configurations.
250+
// This field is part of the Cluster API contract and is used by KubeadmControlPlane
251+
// to distribute control plane machines across zones.
252+
// +optional
253+
// +listType=map
254+
// +listMapKey=name
255+
FailureDomains []clusterv1.FailureDomain `json:"failureDomains,omitempty"`
235256
}
236257

237258
// ProxmoxClusterInitializationStatus provides observations of the ProxmoxCluster initialization process.
@@ -523,6 +544,18 @@ func (c *ProxmoxCluster) addNodeLocation(loc NodeLocation, isControlPlane bool)
523544
c.Status.NodeLocations.Workers = append(c.Status.NodeLocations.Workers, loc)
524545
}
525546

547+
// GetZoneNodes returns the Proxmox node names for a given zone name.
548+
// Returns nil if the zone is not found or has no explicit nodes configured.
549+
func (c *ProxmoxCluster) GetZoneNodes(zoneName string) []string {
550+
for _, zc := range c.Spec.ZoneConfigs {
551+
if ptr.Deref(zc.Zone, "") == zoneName {
552+
return zc.Nodes
553+
}
554+
}
555+
556+
return nil
557+
}
558+
526559
func init() {
527560
objectTypes = append(objectTypes, &ProxmoxCluster{}, &ProxmoxClusterList{})
528561
}

api/v1alpha2/proxmoxcluster_types_test.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,6 +216,40 @@ func TestRemoveNodeLocation(t *testing.T) {
216216
require.Len(t, cl.Status.NodeLocations.ControlPlane, 0)
217217
}
218218

219+
func TestGetZoneNodes(t *testing.T) {
220+
cl := &ProxmoxCluster{
221+
Spec: ProxmoxClusterSpec{
222+
ZoneConfigs: []ZoneConfigSpec{
223+
{
224+
Zone: ptr.To("zone-a"),
225+
Nodes: []string{"pve1", "pve2"},
226+
},
227+
{
228+
Zone: ptr.To("zone-b"),
229+
// No nodes explicitly set.
230+
},
231+
},
232+
},
233+
}
234+
235+
// Zone found with nodes.
236+
nodes := cl.GetZoneNodes("zone-a")
237+
require.Equal(t, []string{"pve1", "pve2"}, nodes)
238+
239+
// Zone found without nodes.
240+
nodes = cl.GetZoneNodes("zone-b")
241+
require.Nil(t, nodes)
242+
243+
// Zone not found.
244+
nodes = cl.GetZoneNodes("zone-c")
245+
require.Nil(t, nodes)
246+
247+
// Empty ZoneConfigs.
248+
empty := &ProxmoxCluster{}
249+
nodes = empty.GetZoneNodes("anything")
250+
require.Nil(t, nodes)
251+
}
252+
219253
func TestSetInClusterIPPoolRef(t *testing.T) {
220254
cl := defaultCluster()
221255

api/v1alpha2/proxmoxmachine_types.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,12 @@ type ProxmoxMachineChecks struct {
6565
type ProxmoxMachineSpec struct {
6666
VirtualMachineCloneSpec `json:",inline"`
6767

68+
// failureDomain is the failure domain the machine is placed in.
69+
// This field is part of the Cluster API InfrastructureMachine contract
70+
// and is set by the CAPI machine controller.
71+
// +optional
72+
FailureDomain string `json:"failureDomain,omitempty"`
73+
6874
// providerID is the virtual machine BIOS UUID formatted as
6975
// proxmox://6c3fa683-bef9-4425-b413-eaa45a9d6191
7076
// +optional

api/v1alpha2/zz_generated.deepcopy.go

Lines changed: 17 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

config/crd/bases/infrastructure.cluster.x-k8s.io_proxmoxclusters.yaml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,6 +1244,11 @@ spec:
12441244
description: ZoneConfigSpec is the Network Configuration for further
12451245
deployment zones.
12461246
properties:
1247+
controlPlane:
1248+
description: |-
1249+
controlPlane indicates whether this zone is eligible for control plane machines.
1250+
Defaults to true when not set.
1251+
type: boolean
12471252
dnsServers:
12481253
description: dnsServers contains information about nameservers
12491254
used by the machines in this zone.
@@ -1328,6 +1333,15 @@ spec:
13281333
x-kubernetes-validations:
13291334
- message: IPv6Config addresses must be provided
13301335
rule: self.addresses.size() > 0
1336+
nodes:
1337+
description: |-
1338+
nodes specifies the Proxmox nodes that belong to this zone.
1339+
When set, machines assigned to this failure domain will only
1340+
be placed on these nodes.
1341+
items:
1342+
type: string
1343+
type: array
1344+
x-kubernetes-list-type: set
13311345
zone:
13321346
description: zone is the name of your deployment zone.
13331347
pattern: ^[a-z0-9A-Z](?:[a-z0-9A-Z-_.]{0,61}[a-z0-9A-Z])?$
@@ -1412,6 +1426,38 @@ spec:
14121426
x-kubernetes-list-map-keys:
14131427
- type
14141428
x-kubernetes-list-type: map
1429+
failureDomains:
1430+
description: |-
1431+
failureDomains is a slice of failure domains synced from zone configurations.
1432+
This field is part of the Cluster API contract and is used by KubeadmControlPlane
1433+
to distribute control plane machines across zones.
1434+
items:
1435+
description: |-
1436+
FailureDomain is the Schema for Cluster API failure domains.
1437+
It allows controllers to understand how many failure domains a cluster can optionally span across.
1438+
properties:
1439+
attributes:
1440+
additionalProperties:
1441+
type: string
1442+
description: attributes is a free form map of attributes an
1443+
infrastructure provider might use or require.
1444+
type: object
1445+
controlPlane:
1446+
description: controlPlane determines if this failure domain
1447+
is suitable for use by control plane machines.
1448+
type: boolean
1449+
name:
1450+
description: name is the name of the failure domain.
1451+
maxLength: 256
1452+
minLength: 1
1453+
type: string
1454+
required:
1455+
- name
1456+
type: object
1457+
type: array
1458+
x-kubernetes-list-map-keys:
1459+
- name
1460+
x-kubernetes-list-type: map
14151461
inClusterIPPoolRef:
14161462
description: inClusterIPPoolRef is the reference to the created in-cluster
14171463
IP pool.

0 commit comments

Comments
 (0)