From a17629ecc8b56327dc18fddaff16cb225fc339f8 Mon Sep 17 00:00:00 2001 From: Fabien Dupont Date: Tue, 10 Mar 2026 15:38:14 +0100 Subject: [PATCH] fix: Reject reserved IP addresses (network, broadcast, gateway) on interface update The instance inventory workflow was persisting IP addresses from the Site Controller without validation. This allowed reserved addresses (network, broadcast, gateway) to be assigned to interfaces, causing network issues. Add ValidateIPAddresses to check IPs against the interface's CIDR prefix. When reserved IPs are detected, the interface is set to Error status and invalid addresses are not persisted. The interface recovers automatically on the next inventory cycle once the Site Controller reports valid IPs. GetInterfaceCIDRAndGateway extracts the CIDR from Subnet (IPv4 and IPv6) or VpcPrefix, handling the case where VpcPrefix.Prefix already contains the prefix length from the IPAM library. Co-Authored-By: Claude Opus 4.6 (1M context) Signed-off-by: Fabien Dupont --- db/pkg/db/ipam/ipam.go | 85 ++++++++ db/pkg/db/ipam/ipam_test.go | 183 ++++++++++++++++++ workflow/pkg/activity/instance/instance.go | 18 +- .../pkg/activity/instance/instance_test.go | 78 ++++++++ 4 files changed, 363 insertions(+), 1 deletion(-) diff --git a/db/pkg/db/ipam/ipam.go b/db/pkg/db/ipam/ipam.go index 16c51162..45a82e6e 100644 --- a/db/pkg/db/ipam/ipam.go +++ b/db/pkg/db/ipam/ipam.go @@ -26,6 +26,7 @@ import ( cipam "github.com/nvidia/bare-metal-manager-rest/ipam" "github.com/uptrace/bun" + "go4.org/netipx" cdb "github.com/nvidia/bare-metal-manager-rest/db/pkg/db" cdbm "github.com/nvidia/bare-metal-manager-rest/db/pkg/db/model" @@ -242,3 +243,87 @@ func DeleteChildIpamEntryFromCidr(ctx context.Context, tx *cdb.Tx, dbSession *cd } return nil } + +// ValidateIPAddresses checks that none of the given IP addresses are reserved +// network addresses (network address, broadcast address, or gateway) for the +// specified CIDR prefix. It returns the list of valid IPs and the list of +// rejected IPs with reasons. +func ValidateIPAddresses(ipAddresses []string, cidr string, gateway string) (valid []string, rejected []string) { + if len(ipAddresses) == 0 { + return ipAddresses, nil + } + + prefix, err := netip.ParsePrefix(cidr) + if err != nil { + // If we can't parse the prefix, we can't validate — pass through + return ipAddresses, nil + } + + iprange := netipx.RangeOfPrefix(prefix) + networkAddr := iprange.From() + broadcastAddr := iprange.To() + + var gatewayAddr netip.Addr + if gateway != "" { + gatewayAddr, _ = netip.ParseAddr(gateway) + } + + for _, ipStr := range ipAddresses { + addr, err := netip.ParseAddr(ipStr) + if err != nil { + rejected = append(rejected, fmt.Sprintf("%s (invalid IP)", ipStr)) + continue + } + + if !prefix.Contains(addr) { + rejected = append(rejected, fmt.Sprintf("%s (not in prefix %s)", ipStr, cidr)) + continue + } + + if addr == networkAddr { + rejected = append(rejected, fmt.Sprintf("%s (network address of %s)", ipStr, cidr)) + continue + } + + if prefix.Addr().Is4() && addr == broadcastAddr { + rejected = append(rejected, fmt.Sprintf("%s (broadcast address of %s)", ipStr, cidr)) + continue + } + + if gatewayAddr.IsValid() && addr == gatewayAddr { + rejected = append(rejected, fmt.Sprintf("%s (gateway address of %s)", ipStr, cidr)) + continue + } + + valid = append(valid, ipStr) + } + + return valid, rejected +} + +// GetInterfaceCIDRAndGateway returns the CIDR and gateway for an interface +// based on its associated Subnet or VpcPrefix. +func GetInterfaceCIDRAndGateway(ifc *cdbm.Interface) (cidr string, gateway string) { + if ifc.Subnet != nil { + if ifc.Subnet.IPv4Prefix != nil { + cidr = fmt.Sprintf("%s/%d", *ifc.Subnet.IPv4Prefix, ifc.Subnet.PrefixLength) + if ifc.Subnet.IPv4Gateway != nil { + gateway = *ifc.Subnet.IPv4Gateway + } + } else if ifc.Subnet.IPv6Prefix != nil { + cidr = fmt.Sprintf("%s/%d", *ifc.Subnet.IPv6Prefix, ifc.Subnet.PrefixLength) + if ifc.Subnet.IPv6Gateway != nil { + gateway = *ifc.Subnet.IPv6Gateway + } + } + } else if ifc.VpcPrefix != nil { + // VpcPrefix.Prefix may already contain the prefix length (e.g. "192.172.0.0/24") + // from the IPAM library's Cidr format. Parse it to extract just the address. + if p, err := netip.ParsePrefix(ifc.VpcPrefix.Prefix); err == nil { + cidr = p.String() + } else { + cidr = fmt.Sprintf("%s/%d", ifc.VpcPrefix.Prefix, ifc.VpcPrefix.PrefixLength) + } + } + return cidr, gateway +} diff --git a/db/pkg/db/ipam/ipam_test.go b/db/pkg/db/ipam/ipam_test.go index 1fb1a534..79378ac9 100644 --- a/db/pkg/db/ipam/ipam_test.go +++ b/db/pkg/db/ipam/ipam_test.go @@ -846,3 +846,186 @@ func TestGetFirstIPFromCidr(t *testing.T) { }) } } + +func TestValidateIPAddresses(t *testing.T) { + tests := []struct { + name string + ipAddresses []string + cidr string + gateway string + expectedValid []string + expectedRejected []string + }{ + { + name: "all valid IPs in /24", + ipAddresses: []string{"192.168.1.10", "192.168.1.20"}, + cidr: "192.168.1.0/24", + gateway: "192.168.1.1", + expectedValid: []string{"192.168.1.10", "192.168.1.20"}, + expectedRejected: nil, + }, + { + name: "reject network address", + ipAddresses: []string{"192.168.1.0", "192.168.1.10"}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: []string{"192.168.1.0 (network address of 192.168.1.0/24)"}, + }, + { + name: "reject broadcast address", + ipAddresses: []string{"192.168.1.255", "192.168.1.10"}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: []string{"192.168.1.255 (broadcast address of 192.168.1.0/24)"}, + }, + { + name: "reject gateway address", + ipAddresses: []string{"192.168.1.1", "192.168.1.10"}, + cidr: "192.168.1.0/24", + gateway: "192.168.1.1", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: []string{"192.168.1.1 (gateway address of 192.168.1.0/24)"}, + }, + { + name: "reject all reserved addresses at once", + ipAddresses: []string{"192.168.1.0", "192.168.1.1", "192.168.1.255", "192.168.1.100"}, + cidr: "192.168.1.0/24", + gateway: "192.168.1.1", + expectedValid: []string{"192.168.1.100"}, + expectedRejected: []string{"192.168.1.0 (network address of 192.168.1.0/24)", "192.168.1.1 (gateway address of 192.168.1.0/24)", "192.168.1.255 (broadcast address of 192.168.1.0/24)"}, + }, + { + name: "reject IP outside prefix", + ipAddresses: []string{"10.0.0.1", "192.168.1.10"}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: []string{"10.0.0.1 (not in prefix 192.168.1.0/24)"}, + }, + { + name: "reject invalid IP string", + ipAddresses: []string{"not-an-ip", "192.168.1.10"}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: []string{"not-an-ip (invalid IP)"}, + }, + { + name: "small /30 prefix - reject network and broadcast", + ipAddresses: []string{"10.0.0.0", "10.0.0.1", "10.0.0.2", "10.0.0.3"}, + cidr: "10.0.0.0/30", + gateway: "10.0.0.1", + expectedValid: []string{"10.0.0.2"}, + expectedRejected: []string{"10.0.0.0 (network address of 10.0.0.0/30)", "10.0.0.1 (gateway address of 10.0.0.0/30)", "10.0.0.3 (broadcast address of 10.0.0.0/30)"}, + }, + { + name: "IPv6 - reject network address only (no broadcast in IPv6)", + ipAddresses: []string{"2001:db8::", "2001:db8::1", "2001:db8::f"}, + cidr: "2001:db8::/124", + gateway: "", + expectedValid: []string{"2001:db8::1", "2001:db8::f"}, + expectedRejected: []string{"2001:db8:: (network address of 2001:db8::/124)"}, + }, + { + name: "empty IP list", + ipAddresses: []string{}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{}, + expectedRejected: nil, + }, + { + name: "invalid CIDR passes through", + ipAddresses: []string{"192.168.1.10"}, + cidr: "bad-cidr", + gateway: "", + expectedValid: []string{"192.168.1.10"}, + expectedRejected: nil, + }, + { + name: "no gateway - only network and broadcast rejected", + ipAddresses: []string{"192.168.1.0", "192.168.1.1", "192.168.1.255"}, + cidr: "192.168.1.0/24", + gateway: "", + expectedValid: []string{"192.168.1.1"}, + expectedRejected: []string{"192.168.1.0 (network address of 192.168.1.0/24)", "192.168.1.255 (broadcast address of 192.168.1.0/24)"}, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + valid, rejected := ValidateIPAddresses(tc.ipAddresses, tc.cidr, tc.gateway) + assert.Equal(t, tc.expectedValid, valid) + assert.Equal(t, tc.expectedRejected, rejected) + }) + } +} + +func TestGetInterfaceCIDRAndGateway(t *testing.T) { + ipv4Prefix := "192.168.1.0" + ipv4Gateway := "192.168.1.1" + ipv6Prefix := "2001:db8::" + ipv6Gateway := "2001:db8::1" + + t.Run("subnet interface with IPv4", func(t *testing.T) { + ifc := &cdbm.Interface{ + Subnet: &cdbm.Subnet{ + IPv4Prefix: &ipv4Prefix, + PrefixLength: 24, + IPv4Gateway: &ipv4Gateway, + }, + } + cidr, gw := GetInterfaceCIDRAndGateway(ifc) + assert.Equal(t, "192.168.1.0/24", cidr) + assert.Equal(t, "192.168.1.1", gw) + }) + + t.Run("subnet interface with IPv6 only", func(t *testing.T) { + ifc := &cdbm.Interface{ + Subnet: &cdbm.Subnet{ + IPv6Prefix: &ipv6Prefix, + PrefixLength: 64, + IPv6Gateway: &ipv6Gateway, + }, + } + cidr, gw := GetInterfaceCIDRAndGateway(ifc) + assert.Equal(t, "2001:db8::/64", cidr) + assert.Equal(t, "2001:db8::1", gw) + }) + + t.Run("vpc prefix interface with CIDR format", func(t *testing.T) { + // In production, VpcPrefix.Prefix is set from childPrefix.Cidr which + // includes the prefix length (e.g. "10.0.0.0/16") + ifc := &cdbm.Interface{ + VpcPrefix: &cdbm.VpcPrefix{ + Prefix: "10.0.0.0/16", + PrefixLength: 16, + }, + } + cidr, gw := GetInterfaceCIDRAndGateway(ifc) + assert.Equal(t, "10.0.0.0/16", cidr) + assert.Equal(t, "", gw) + }) + + t.Run("vpc prefix interface with bare IP format", func(t *testing.T) { + // Fallback: if Prefix is just an IP without prefix length + ifc := &cdbm.Interface{ + VpcPrefix: &cdbm.VpcPrefix{ + Prefix: "10.0.0.0", + PrefixLength: 16, + }, + } + cidr, gw := GetInterfaceCIDRAndGateway(ifc) + assert.Equal(t, "10.0.0.0/16", cidr) + assert.Equal(t, "", gw) + }) + + t.Run("no subnet or vpc prefix", func(t *testing.T) { + ifc := &cdbm.Interface{} + cidr, gw := GetInterfaceCIDRAndGateway(ifc) + assert.Equal(t, "", cidr) + assert.Equal(t, "", gw) + }) +} diff --git a/workflow/pkg/activity/instance/instance.go b/workflow/pkg/activity/instance/instance.go index ce2d8cf4..1d9488f7 100644 --- a/workflow/pkg/activity/instance/instance.go +++ b/workflow/pkg/activity/instance/instance.go @@ -32,6 +32,7 @@ import ( "go.temporal.io/sdk/client" cdb "github.com/nvidia/bare-metal-manager-rest/db/pkg/db" + "github.com/nvidia/bare-metal-manager-rest/db/pkg/db/ipam" cdbm "github.com/nvidia/bare-metal-manager-rest/db/pkg/db/model" "github.com/nvidia/bare-metal-manager-rest/db/pkg/db/paginator" cdbp "github.com/nvidia/bare-metal-manager-rest/db/pkg/db/paginator" @@ -1041,6 +1042,17 @@ func (mi ManageInstance) UpdateInstancesInDB(ctx context.Context, siteID uuid.UU ipAddresses := []string{} ipAddresses = append(ipAddresses, interfaceStatus.Addresses...) + // Validate IP addresses against the interface's prefix to reject + // network, broadcast, and gateway addresses + var ipValidationFailed bool + if cidr, gateway := ipam.GetInterfaceCIDRAndGateway(ifc); cidr != "" { + _, rejectedIPs := ipam.ValidateIPAddresses(ipAddresses, cidr, gateway) + if len(rejectedIPs) > 0 { + slogger.Error().Strs("rejected_ips", rejectedIPs).Str("Interface ID", ifc.ID.String()).Msg("interface has reserved IP addresses assigned by site controller") + ipValidationFailed = true + } + } + // Update device and device_instance if specified in the inventory var device *string var deviceInstance *int @@ -1052,7 +1064,11 @@ func (mi ManageInstance) UpdateInstancesInDB(ctx context.Context, siteID uuid.UU } var status *string - if controllerInstance.Status.Network.ConfigsSynced == cwsv1.SyncState_SYNCED { + if ipValidationFailed { + status = cdb.GetStrPtr(cdbm.InterfaceStatusError) + // Do not persist invalid IP addresses + ipAddresses = nil + } else if controllerInstance.Status.Network.ConfigsSynced == cwsv1.SyncState_SYNCED { status = cdb.GetStrPtr(cdbm.InterfaceStatusReady) } diff --git a/workflow/pkg/activity/instance/instance_test.go b/workflow/pkg/activity/instance/instance_test.go index 45fccf32..af843168 100644 --- a/workflow/pkg/activity/instance/instance_test.go +++ b/workflow/pkg/activity/instance/instance_test.go @@ -1802,6 +1802,40 @@ func TestManageInstance_UpdateInstancesInDB(t *testing.T) { ibInterface18_3 := util.TestBuildInfiniBandInterface(t, dbSession, instance18.ID, site.ID, partition1.ID, "MT2910 Family [ConnectX-7]", 2, true, nil, cdbm.InfiniBandInterfaceStatusDeleting, false) assert.NotNil(t, ibInterface18_3) + // Instance 19 receives a broadcast IP from the Site Controller which should be rejected + machine19 := util.TestBuildMachine(t, dbSession, ip.ID, site.ID, nil, cdb.GetBoolPtr(true), cdbm.MachineStatusReady) + instance19, err := instanceDAO.Create( + ctx, nil, + cdbm.InstanceCreateInput{ + Name: "test-instance-19", + Description: cdb.GetStrPtr("Test description"), + TenantID: tenant.ID, + InfrastructureProviderID: ip.ID, + SiteID: site.ID, + InstanceTypeID: &instanceType.ID, + VpcID: vpc.ID, + MachineID: &machine19.ID, + ControllerInstanceID: cdb.GetUUIDPtr(uuid.New()), + Hostname: cdb.GetStrPtr("test.com"), + OperatingSystemID: cdb.GetUUIDPtr(operatingSystem.ID), + IpxeScript: cdb.GetStrPtr("ipxe"), + AlwaysBootWithCustomIpxe: true, + UserData: cdb.GetStrPtr("userdata"), + Labels: map[string]string{}, + Status: cdbm.InstanceStatusProvisioning, + CreatedBy: tnu.ID, + }, + ) + assert.Nil(t, err) + + // Set created earlier than the inventory receipt interval + _, err = dbSession.DB.Exec("UPDATE instance SET updated = ? WHERE id = ?", time.Now().Add(-time.Duration(cwutil.InventoryReceiptInterval)*2), instance19.ID.String()) + assert.NoError(t, err) + + // Interface on vpcPrefix2 (192.172.0.0/24) - will receive broadcast address from inventory + ifcvpc_reserved_ip := util.TestBuildInterface(t, dbSession, &instance19.ID, nil, &vpcPrefix2.ID, true, nil, nil, nil, &tnu.ID, cdbm.InterfaceStatusPending) + assert.NotNil(t, ifcvpc_reserved_ip) + // Build DPU Extension Services and Deployments for testing dpuExtensionService1 := util.TestBuildDpuExtensionService(t, dbSession, "test-dpu-ext-service-1", site, tenant, "ovs-offload", cdb.GetStrPtr("1"), nil, []string{}, cdbm.DpuExtensionServiceStatusReady, ipu) assert.NotNil(t, dpuExtensionService1) @@ -2199,6 +2233,40 @@ func TestManageInstance_UpdateInstancesInDB(t *testing.T) { }, }, }, + // Instance 19: VPC Prefix interface with broadcast address - should be rejected + { + Id: &cwsv1.InstanceId{Value: instance19.ControllerInstanceID.String()}, + Config: &cwsv1.InstanceConfig{ + Network: &cwsv1.InstanceNetworkConfig{ + Interfaces: []*cwsv1.InstanceInterfaceConfig{ + { + FunctionType: cwsv1.InterfaceFunctionType_PHYSICAL_FUNCTION, + NetworkSegmentId: nil, + NetworkDetails: &cwsv1.InstanceInterfaceConfig_VpcPrefixId{ + VpcPrefixId: &cwsv1.VpcPrefixId{Value: vpcPrefix2.ID.String()}, + }, + }, + }, + }, + }, + Status: &cwsv1.InstanceStatus{ + Tenant: &cwsv1.InstanceTenantStatus{ + State: cwsv1.TenantState_READY, + }, + Network: &cwsv1.InstanceNetworkStatus{ + Interfaces: []*cwsv1.InstanceInterfaceStatus{ + { + MacAddress: &macAddress, + Addresses: []string{"192.172.0.255", "192.172.0.10"}, + }, + }, + ConfigsSynced: cwsv1.SyncState_SYNCED, + }, + Update: &cwsv1.InstanceUpdateStatus{ + UserApprovalReceived: false, + }, + }, + }, }, } @@ -2395,6 +2463,7 @@ func TestManageInstance_UpdateInstancesInDB(t *testing.T) { deletedDpuExtServiceDeployments []*cdbm.DpuExtensionServiceDeployment readyNVLinkInterfaces []*cdbm.NVLinkInterface deletingNVLinkInterfaces []*cdbm.NVLinkInterface + rejectedIPInterfaces []*cdbm.Interface requiredMetadataUpdate bool metadataInstanceUpdate *cdbm.Instance tpmCertificateUpdatedInstance *cdbm.Instance @@ -2432,6 +2501,7 @@ func TestManageInstance_UpdateInstancesInDB(t *testing.T) { deletedDpuExtServiceDeployments: []*cdbm.DpuExtensionServiceDeployment{dpuExtServiceDeployment2}, readyNVLinkInterfaces: []*cdbm.NVLinkInterface{nvlinkInterface1, nvlinkInterface2}, deletingNVLinkInterfaces: []*cdbm.NVLinkInterface{nvlinkInterface3}, + rejectedIPInterfaces: []*cdbm.Interface{ifcvpc_reserved_ip}, tpmCertificateUpdatedInstance: instance16, expectErr: false, }, @@ -2691,6 +2761,14 @@ func TestManageInstance_UpdateInstancesInDB(t *testing.T) { } } + // Verify interfaces with reserved IPs are set to Error with no IPs stored + for _, ifc := range tc.rejectedIPInterfaces { + rejectedIfc, serr := ifcDAO.GetByID(ctx, nil, ifc.ID, nil) + assert.Nil(t, serr) + assert.Equal(t, cdbm.InterfaceStatusError, rejectedIfc.Status, "interface with reserved IP should be in Error status") + assert.Nil(t, rejectedIfc.IPAddresses, "interface with reserved IP should not have IP addresses stored") + } + for _, instPropStatus := range tc.instanceInventory.NetworkSecurityGroupPropagations { updatedInstance, _ := instanceDAO.GetByID(ctx, nil, uuid.MustParse(instPropStatus.Id), nil)