From cfac60c59adf6fb4c9a8f4058c1caa347317c746 Mon Sep 17 00:00:00 2001 From: SkalaNetworks Date: Thu, 12 Feb 2026 09:31:28 +0000 Subject: [PATCH 1/4] fix(endpoint): add proper gc to find unused ip_port_mappings Signed-off-by: SkalaNetworks --- mocks/pkg/ovs/interface.go | 44 +++++++++++++++++++----- pkg/controller/endpoint_slice.go | 4 +-- pkg/controller/gc.go | 39 ++++++++++++++++++++++ pkg/ovs/interface.go | 3 +- pkg/ovs/ovn-nb-load_balancer.go | 57 +++++++++++++++++++++----------- 5 files changed, 116 insertions(+), 31 deletions(-) diff --git a/mocks/pkg/ovs/interface.go b/mocks/pkg/ovs/interface.go index fbfe43c4223..fefc7c4cb27 100644 --- a/mocks/pkg/ovs/interface.go +++ b/mocks/pkg/ovs/interface.go @@ -1639,17 +1639,17 @@ func (mr *MockLoadBalancerMockRecorder) LoadBalancerDeleteHealthCheck(lbName, uu } // LoadBalancerDeleteIPPortMapping mocks base method. -func (m *MockLoadBalancer) LoadBalancerDeleteIPPortMapping(lbName, vip string) error { +func (m *MockLoadBalancer) LoadBalancerDeleteIPPortMapping(lbName, backendIP string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "LoadBalancerDeleteIPPortMapping", lbName, vip) + ret := m.ctrl.Call(m, "LoadBalancerDeleteIPPortMapping", lbName, backendIP) ret0, _ := ret[0].(error) return ret0 } // LoadBalancerDeleteIPPortMapping indicates an expected call of LoadBalancerDeleteIPPortMapping. -func (mr *MockLoadBalancerMockRecorder) LoadBalancerDeleteIPPortMapping(lbName, vip any) *gomock.Call { +func (mr *MockLoadBalancerMockRecorder) LoadBalancerDeleteIPPortMapping(lbName, backendIP any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteIPPortMapping", reflect.TypeOf((*MockLoadBalancer)(nil).LoadBalancerDeleteIPPortMapping), lbName, vip) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteIPPortMapping", reflect.TypeOf((*MockLoadBalancer)(nil).LoadBalancerDeleteIPPortMapping), lbName, backendIP) } // LoadBalancerDeleteVip mocks base method. @@ -1666,6 +1666,20 @@ func (mr *MockLoadBalancerMockRecorder) LoadBalancerDeleteVip(lbName, vip, ignor return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteVip", reflect.TypeOf((*MockLoadBalancer)(nil).LoadBalancerDeleteVip), lbName, vip, ignoreHealthCheck) } +// LoadBalancerDeleteVipIPPortMapping mocks base method. +func (m *MockLoadBalancer) LoadBalancerDeleteVipIPPortMapping(lbName, vip string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "LoadBalancerDeleteVipIPPortMapping", lbName, vip) + ret0, _ := ret[0].(error) + return ret0 +} + +// LoadBalancerDeleteVipIPPortMapping indicates an expected call of LoadBalancerDeleteVipIPPortMapping. +func (mr *MockLoadBalancerMockRecorder) LoadBalancerDeleteVipIPPortMapping(lbName, vip any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteVipIPPortMapping", reflect.TypeOf((*MockLoadBalancer)(nil).LoadBalancerDeleteVipIPPortMapping), lbName, vip) +} + // LoadBalancerExists mocks base method. func (m *MockLoadBalancer) LoadBalancerExists(lbName string) (bool, error) { m.ctrl.T.Helper() @@ -4826,17 +4840,17 @@ func (mr *MockNbClientMockRecorder) LoadBalancerDeleteHealthCheck(lbName, uuid a } // LoadBalancerDeleteIPPortMapping mocks base method. -func (m *MockNbClient) LoadBalancerDeleteIPPortMapping(lbName, vip string) error { +func (m *MockNbClient) LoadBalancerDeleteIPPortMapping(lbName, backendIP string) error { m.ctrl.T.Helper() - ret := m.ctrl.Call(m, "LoadBalancerDeleteIPPortMapping", lbName, vip) + ret := m.ctrl.Call(m, "LoadBalancerDeleteIPPortMapping", lbName, backendIP) ret0, _ := ret[0].(error) return ret0 } // LoadBalancerDeleteIPPortMapping indicates an expected call of LoadBalancerDeleteIPPortMapping. -func (mr *MockNbClientMockRecorder) LoadBalancerDeleteIPPortMapping(lbName, vip any) *gomock.Call { +func (mr *MockNbClientMockRecorder) LoadBalancerDeleteIPPortMapping(lbName, backendIP any) *gomock.Call { mr.mock.ctrl.T.Helper() - return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteIPPortMapping", reflect.TypeOf((*MockNbClient)(nil).LoadBalancerDeleteIPPortMapping), lbName, vip) + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteIPPortMapping", reflect.TypeOf((*MockNbClient)(nil).LoadBalancerDeleteIPPortMapping), lbName, backendIP) } // LoadBalancerDeleteVip mocks base method. @@ -4853,6 +4867,20 @@ func (mr *MockNbClientMockRecorder) LoadBalancerDeleteVip(lbName, vip, ignoreHea return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteVip", reflect.TypeOf((*MockNbClient)(nil).LoadBalancerDeleteVip), lbName, vip, ignoreHealthCheck) } +// LoadBalancerDeleteVipIPPortMapping mocks base method. +func (m *MockNbClient) LoadBalancerDeleteVipIPPortMapping(lbName, vip string) error { + m.ctrl.T.Helper() + ret := m.ctrl.Call(m, "LoadBalancerDeleteVipIPPortMapping", lbName, vip) + ret0, _ := ret[0].(error) + return ret0 +} + +// LoadBalancerDeleteVipIPPortMapping indicates an expected call of LoadBalancerDeleteVipIPPortMapping. +func (mr *MockNbClientMockRecorder) LoadBalancerDeleteVipIPPortMapping(lbName, vip any) *gomock.Call { + mr.mock.ctrl.T.Helper() + return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "LoadBalancerDeleteVipIPPortMapping", reflect.TypeOf((*MockNbClient)(nil).LoadBalancerDeleteVipIPPortMapping), lbName, vip) +} + // LoadBalancerExists mocks base method. func (m *MockNbClient) LoadBalancerExists(lbName string) (bool, error) { m.ctrl.T.Helper() diff --git a/pkg/controller/endpoint_slice.go b/pkg/controller/endpoint_slice.go index 4f417d12657..2a918eb9b69 100644 --- a/pkg/controller/endpoint_slice.go +++ b/pkg/controller/endpoint_slice.go @@ -249,11 +249,11 @@ func (c *Controller) handleUpdateEndpointSlice(key string) error { } if c.config.EnableOVNLBPreferLocal { - if err := c.OVNNbClient.LoadBalancerDeleteIPPortMapping(lb, vip); err != nil { + if err := c.OVNNbClient.LoadBalancerDeleteVipIPPortMapping(lb, vip); err != nil { klog.Errorf("failed to delete ip port mapping for vip %s from LB %s: %v", vip, lb, err) return err } - if err := c.OVNNbClient.LoadBalancerDeleteIPPortMapping(oldLb, vip); err != nil { + if err := c.OVNNbClient.LoadBalancerDeleteVipIPPortMapping(oldLb, vip); err != nil { klog.Errorf("failed to delete ip port mapping for vip %s from LB %s: %v", vip, lb, err) return err } diff --git a/pkg/controller/gc.go b/pkg/controller/gc.go index f03ca76f2ff..da4af9e8db2 100644 --- a/pkg/controller/gc.go +++ b/pkg/controller/gc.go @@ -3,6 +3,7 @@ package controller import ( "context" "fmt" + "net" "slices" "strings" "unicode" @@ -50,6 +51,7 @@ func (c *Controller) gc() error { c.gcVip, c.gcLbSvcPods, c.gcVPCDNS, + c.gcOvnLb, } for _, gcFunc := range gcFunctions { if err := gcFunc(); err != nil { @@ -1189,6 +1191,43 @@ func (c *Controller) gcLbSvcPods() error { return nil } +func (c *Controller) gcOvnLb() error { + klog.Infof("start to gc ovn load balancers") + lbs, err := c.OVNNbClient.ListLoadBalancers(func(lb *ovnnb.LoadBalancer) bool { + return lb.ExternalIDs["vendor"] == util.CniTypeName + }) + if err != nil { + klog.Errorf("failed to list load balancers: %v", err) + return err + } + + for _, lb := range lbs { + backendIPs := set.New[string]() + + // Collect every backend IP associated with every VIP in that loadbalancer + for _, backends := range lb.Vips { + for _, backend := range strings.Split(backends, ",") { + if ip, _, err := net.SplitHostPort(backend); err == nil { + backendIPs.Insert(ip) + } + } + } + + // If a backend is present in the ip_port_mapping but not associated with any VIP, delete the entry + for ip := range lb.IPPortMappings { + if !backendIPs.Has(ip) { + klog.Infof("gc stale ip_port_mapping entry %s in load balancer %s", ip, lb.Name) + if err := c.OVNNbClient.LoadBalancerDeleteIPPortMapping(lb.Name, ip); err != nil { + klog.Errorf("failed to delete stale ip_port_mapping entry %s from load balancer %s: %v", ip, lb.Name, err) + return err + } + } + } + } + + return nil +} + func (c *Controller) gcVPCDNS() error { if !c.config.EnableLb { return nil diff --git a/pkg/ovs/interface.go b/pkg/ovs/interface.go index 8e78aa53cf3..5bb38008e15 100644 --- a/pkg/ovs/interface.go +++ b/pkg/ovs/interface.go @@ -133,7 +133,8 @@ type LoadBalancer interface { LoadBalancerDeleteVip(lbName, vip string, ignoreHealthCheck bool) error LoadBalancerAddIPPortMapping(lbName, vip string, ipPortMappings map[string]string) error LoadBalancerUpdateIPPortMapping(lbName, vip string, ipPortMappings map[string]string) error - LoadBalancerDeleteIPPortMapping(lbName, vip string) error + LoadBalancerDeleteIPPortMapping(lbName, backendIP string) error + LoadBalancerDeleteVipIPPortMapping(lbName, vip string) error LoadBalancerAddHealthCheck(lbName, vip string, ignoreHealthCheck bool, ipPortMapping, externals map[string]string) error LoadBalancerDeleteHealthCheck(lbName, uuid string) error SetLoadBalancerAffinityTimeout(lbName string, timeout int) error diff --git a/pkg/ovs/ovn-nb-load_balancer.go b/pkg/ovs/ovn-nb-load_balancer.go index a514544ce52..30fbafa60d6 100644 --- a/pkg/ovs/ovn-nb-load_balancer.go +++ b/pkg/ovs/ovn-nb-load_balancer.go @@ -160,7 +160,7 @@ func (c *OVNNbClient) LoadBalancerDeleteVip(lbName, vipEndpoint string, ignoreHe if !ignoreHealthCheck && lbhc != nil { klog.Infof("clean health check for lb %s with vip %s", lbName, vipEndpoint) // delete ip port mapping - if err = c.LoadBalancerDeleteIPPortMapping(lbName, vipEndpoint); err != nil { + if err = c.LoadBalancerDeleteVipIPPortMapping(lbName, vipEndpoint); err != nil { klog.Errorf("failed to delete lb ip port mapping: %v", err) return err } @@ -473,9 +473,43 @@ func (c *OVNNbClient) LoadBalancerAddIPPortMapping(lbName, vipEndpoint string, m return nil } -// LoadBalancerDeleteIPPortMapping deletes IP port mappings for a specific VIP from a load balancer. +// LoadBalancerDeleteIPPortMapping deletes IP port mappings for a specific backend IP from a load balancer. +func (c *OVNNbClient) LoadBalancerDeleteIPPortMapping(lbName, backendIP string) error { + ops, err := c.LoadBalancerOp( + lbName, + func(lb *ovnnb.LoadBalancer) []model.Mutation { + if _, ok := lb.IPPortMappings[backendIP]; !ok { + return nil + } + return []model.Mutation{ + { + Field: &lb.IPPortMappings, + Value: map[string]string{backendIP: lb.IPPortMappings[backendIP]}, + Mutator: ovsdb.MutateOperationDelete, + }, + } + }, + ) + if err != nil { + klog.Error(err) + return fmt.Errorf("failed to generate operations when deleting IP port mapping for backend %s from load balancer %s: %w", backendIP, lbName, err) + } + + if len(ops) == 0 { + return nil + } + + if err = c.Transact("lb-del", ops); err != nil { + return fmt.Errorf("failed to delete IP port mapping for backend %s from load balancer %s: %w", backendIP, lbName, err) + } + + klog.Infof("successfully deleted ip port mapping for backend %s from load balancer %s", backendIP, lbName) + return nil +} + +// LoadBalancerDeleteVipIPPortMapping deletes IP port mappings for a specific VIP from a load balancer. // This function ensures that only backend IPs that are no longer referenced by any VIP are removed. -func (c *OVNNbClient) LoadBalancerDeleteIPPortMapping(lbName, vipEndpoint string) error { +func (c *OVNNbClient) LoadBalancerDeleteVipIPPortMapping(lbName, vipEndpoint string) error { lb, err := c.getLoadBalancerForDeletion(lbName) if err != nil { klog.Errorf("failed to get load balancer for deletion: %v", err) @@ -625,24 +659,7 @@ func (c *OVNNbClient) LoadBalancerUpdateIPPortMapping(lbName, vipEndpoint string ops, err := c.LoadBalancerOp( lbName, func(lb *ovnnb.LoadBalancer) []model.Mutation { - // Delete from the IPPortMappings any outdated mapping - mappingToDelete := make(map[string]string) - for portIP, portMapVip := range lb.IPPortMappings { - if _, ok := ipPortMappings[portIP]; !ok { - mappingToDelete[portIP] = portMapVip - } - } - - if len(mappingToDelete) > 0 { - klog.Infof("deleting outdated entry from ipportmapping %v", mappingToDelete) - } - return []model.Mutation{ - { - Field: &lb.IPPortMappings, - Value: mappingToDelete, - Mutator: ovsdb.MutateOperationDelete, - }, { Field: &lb.IPPortMappings, Value: ipPortMappings, From efcf065d311613e9d4553552b012a4bb89e08fc9 Mon Sep 17 00:00:00 2001 From: SkalaNetworks Date: Thu, 12 Feb 2026 10:58:15 +0000 Subject: [PATCH 2/4] feat(gc): add tests for deletion of backends Signed-off-by: SkalaNetworks --- pkg/controller/gc.go | 10 ++++- pkg/ovs/ovn-nb-load_balancer_test.go | 57 +++++++++++++++++++++++++--- pkg/ovs/ovn-nb-suite_test.go | 4 ++ 3 files changed, 65 insertions(+), 6 deletions(-) diff --git a/pkg/controller/gc.go b/pkg/controller/gc.go index da4af9e8db2..c3d4ea99fcd 100644 --- a/pkg/controller/gc.go +++ b/pkg/controller/gc.go @@ -1191,6 +1191,13 @@ func (c *Controller) gcLbSvcPods() error { return nil } +// gcOvnLb handles cleaning up loadbalancers created by SwitchLBRules/EndpointSlices +// For every LB present in OVN, we make sure: +// - the ip_port_mappings are not stale (they're used by a VIP) +// - TODO: the VIPs are linked to an EndpointSlice and are not stale +// - TODO: the healthchecks are linked to an EndpointSlice and are not stale +// Right now, if the controller is down while EPs are getting deleted, the VIPs will not be cleaned +// and the healthchecks will not be cleaned. This can lead to dangling resources in OVN. func (c *Controller) gcOvnLb() error { klog.Infof("start to gc ovn load balancers") lbs, err := c.OVNNbClient.ListLoadBalancers(func(lb *ovnnb.LoadBalancer) bool { @@ -1201,12 +1208,13 @@ func (c *Controller) gcOvnLb() error { return err } + // Run GC on every loadbalancer within OVN for _, lb := range lbs { backendIPs := set.New[string]() // Collect every backend IP associated with every VIP in that loadbalancer for _, backends := range lb.Vips { - for _, backend := range strings.Split(backends, ",") { + for backend := range strings.SplitSeq(backends, ",") { if ip, _, err := net.SplitHostPort(backend); err == nil { backendIPs.Insert(ip) } diff --git a/pkg/ovs/ovn-nb-load_balancer_test.go b/pkg/ovs/ovn-nb-load_balancer_test.go index deb78a953b4..1f73d957494 100644 --- a/pkg/ovs/ovn-nb-load_balancer_test.go +++ b/pkg/ovs/ovn-nb-load_balancer_test.go @@ -723,13 +723,13 @@ func (suite *OvnClientTestSuite) testLoadBalancerAddIPPortMapping() { ) } -func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { +func (suite *OvnClientTestSuite) testLoadBalancerDeleteVipIPPortMapping() { t := suite.T() t.Parallel() var ( nbClient = suite.ovnNBClient - lbName = "test-lb-del-ip-port-mapping" + lbName = "test-lb-del-vip-ip-port-mapping" vips, mappings map[string]string lb *ovnnb.LoadBalancer err error @@ -781,7 +781,7 @@ func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { require.Contains(t, lb.IPPortMappings, backend) } - err = nbClient.LoadBalancerDeleteIPPortMapping(lbName, vhost) + err = nbClient.LoadBalancerDeleteVipIPPortMapping(lbName, vhost) require.NoError(t, err) lb, err = nbClient.GetLoadBalancer(lbName, false) @@ -829,7 +829,7 @@ func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { err = nbClient.LoadBalancerAddIPPortMapping(lbName, vhost, mappings) require.NoError(t, err) - err = nbClient.LoadBalancerDeleteIPPortMapping(lbName, vhost) + err = nbClient.LoadBalancerDeleteVipIPPortMapping(lbName, vhost) require.NoError(t, err) lb, err = nbClient.GetLoadBalancer(lbName, false) @@ -873,7 +873,7 @@ func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { err = nbClient.LoadBalancerAddIPPortMapping(lbName, vhost, mappings) require.NoError(t, err) - err = nbClient.LoadBalancerDeleteIPPortMapping(lbName, vhost) + err = nbClient.LoadBalancerDeleteVipIPPortMapping(lbName, vhost) require.NoError(t, err) lb, err = nbClient.GetLoadBalancer(lbName, false) @@ -890,6 +890,53 @@ func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { ) } +func (suite *OvnClientTestSuite) testLoadBalancerDeleteIPPortMapping() { + t := suite.T() + t.Parallel() + + var ( + nbClient = suite.ovnNBClient + lbName = "test-lb-del-ip-port-mapping" + lb *ovnnb.LoadBalancer + err error + ) + + err = nbClient.CreateLoadBalancer(lbName, "tcp", "") + require.NoError(t, err) + + t.Run("delete a specific backend IP from ip_port_mappings", func(t *testing.T) { + mappings := map[string]string{ + "192.168.20.3": "node1", + "192.168.20.4": "node2", + } + err = nbClient.LoadBalancerUpdateIPPortMapping(lbName, "10.96.0.5:443", mappings) + require.NoError(t, err) + + lb, err = nbClient.GetLoadBalancer(lbName, false) + require.NoError(t, err) + require.Contains(t, lb.IPPortMappings, "192.168.20.3") + require.Contains(t, lb.IPPortMappings, "192.168.20.4") + + err = nbClient.LoadBalancerDeleteIPPortMapping(lbName, "192.168.20.3") + require.NoError(t, err) + + lb, err = nbClient.GetLoadBalancer(lbName, false) + require.NoError(t, err) + require.NotContains(t, lb.IPPortMappings, "192.168.20.3") + require.Contains(t, lb.IPPortMappings, "192.168.20.4") + }) + + t.Run("delete non-existent backend IP", func(t *testing.T) { + err = nbClient.LoadBalancerDeleteIPPortMapping(lbName, "1.1.1.1") + require.NoError(t, err) + }) + + t.Run("delete from non-existent load balancer", func(t *testing.T) { + err = nbClient.LoadBalancerDeleteIPPortMapping("non-existent-lb", "192.168.20.4") + require.ErrorContains(t, err, "not found load balancer") + }) +} + func (suite *OvnClientTestSuite) testLoadBalancerWithHealthCheck() { t := suite.T() t.Parallel() diff --git a/pkg/ovs/ovn-nb-suite_test.go b/pkg/ovs/ovn-nb-suite_test.go index 037ede12722..e4156f67c57 100644 --- a/pkg/ovs/ovn-nb-suite_test.go +++ b/pkg/ovs/ovn-nb-suite_test.go @@ -563,6 +563,10 @@ func (suite *OvnClientTestSuite) Test_LoadBalancerAddIPPortMapping() { suite.testLoadBalancerAddIPPortMapping() } +func (suite *OvnClientTestSuite) Test_LoadBalancerDeleteVipIPPortMapping() { + suite.testLoadBalancerDeleteVipIPPortMapping() +} + func (suite *OvnClientTestSuite) Test_LoadBalancerDeleteIPPortMapping() { suite.testLoadBalancerDeleteIPPortMapping() } From 029b1109174797563d399dad8aaf50d94e31b6be Mon Sep 17 00:00:00 2001 From: SkalaNetworks Date: Thu, 12 Feb 2026 12:12:12 +0000 Subject: [PATCH 3/4] chore(gc): add tests to verify loadbalancer garbage collector is working correctly Signed-off-by: SkalaNetworks --- pkg/controller/gc_test.go | 95 +++++++++++++++++ pkg/ovs/ovn-nb-load_balancer_test.go | 25 +++++ test/e2e/kube-ovn/e2e_test.go | 1 + test/e2e/kube-ovn/gc/gc.go | 147 +++++++++++++++++++++++++++ 4 files changed, 268 insertions(+) create mode 100644 test/e2e/kube-ovn/gc/gc.go diff --git a/pkg/controller/gc_test.go b/pkg/controller/gc_test.go index 7afc9a8de5f..13bbc0e4196 100644 --- a/pkg/controller/gc_test.go +++ b/pkg/controller/gc_test.go @@ -5,6 +5,7 @@ import ( "github.com/scylladb/go-set/strset" "github.com/stretchr/testify/require" + "go.uber.org/mock/gomock" "github.com/kubeovn/kube-ovn/pkg/ovsdb/ovnnb" "github.com/kubeovn/kube-ovn/pkg/util" @@ -49,3 +50,97 @@ func Test_logicalRouterPortFilter(t *testing.T) { } } } + +func Test_gcOvnLb(t *testing.T) { + t.Parallel() + + ctrl := gomock.NewController(t) + defer ctrl.Finish() + + fakeCtrl, err := newFakeControllerWithOptions(t, nil) + if err != nil { + t.Fatalf("failed to create fake controller: %v", err) + } + + t.Run("cleanup stale ip_port_mappings", func(t *testing.T) { + lb1 := &ovnnb.LoadBalancer{ + Name: "lb1", + ExternalIDs: map[string]string{ + "vendor": util.CniTypeName, + }, + Vips: map[string]string{ + "10.96.0.1:80": "192.168.1.1:80,192.168.1.2:80", + }, + IPPortMappings: map[string]string{ + "192.168.1.1": "node1", // active + "192.168.1.2": "node2", // active + "192.168.1.3": "node3", // stale + }, + } + + lb2 := &ovnnb.LoadBalancer{ + Name: "lb2", + ExternalIDs: map[string]string{ + "vendor": util.CniTypeName, + }, + Vips: map[string]string{ + "10.96.0.2:443": "192.168.2.10:443", + }, + IPPortMappings: map[string]string{ + "192.168.2.1": "node1", // stale + "192.168.2.2": "node2", // stale + }, + } + + // IPv6 test case + lb3 := &ovnnb.LoadBalancer{ + Name: "lb3", + ExternalIDs: map[string]string{ + "vendor": util.CniTypeName, + }, + Vips: map[string]string{ + "[fd00::1]:80": "[fd00::101]:80", + }, + IPPortMappings: map[string]string{ + "fd00::101": "node1", // active + "fd00::102": "node2", // stale + }, + } + + fakeCtrl.mockOvnClient.EXPECT().ListLoadBalancers(gomock.Any()).Return([]ovnnb.LoadBalancer{*lb1, *lb2, *lb3}, nil) + + // Expect deletions for stale entries + fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb1", "192.168.1.3").Return(nil) + fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb2", "192.168.2.1").Return(nil) + fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb2", "192.168.2.2").Return(nil) + fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb3", "fd00::102").Return(nil) + + err := fakeCtrl.fakeController.gcOvnLb() + if err != nil { + t.Errorf("gcOvnLb() error = %v", err) + } + }) + + t.Run("no stale mappings", func(t *testing.T) { + lb := &ovnnb.LoadBalancer{ + Name: "lb-clean", + ExternalIDs: map[string]string{ + "vendor": util.CniTypeName, + }, + Vips: map[string]string{ + "10.96.0.1:80": "192.168.1.1:80", + }, + IPPortMappings: map[string]string{ + "192.168.1.1": "node1", + }, + } + + fakeCtrl.mockOvnClient.EXPECT().ListLoadBalancers(gomock.Any()).Return([]ovnnb.LoadBalancer{*lb}, nil) + // No LoadBalancerDeleteIPPortMapping expected + + err := fakeCtrl.fakeController.gcOvnLb() + if err != nil { + t.Errorf("gcOvnLb() error = %v", err) + } + }) +} diff --git a/pkg/ovs/ovn-nb-load_balancer_test.go b/pkg/ovs/ovn-nb-load_balancer_test.go index 1f73d957494..b95b425dd7c 100644 --- a/pkg/ovs/ovn-nb-load_balancer_test.go +++ b/pkg/ovs/ovn-nb-load_balancer_test.go @@ -721,6 +721,31 @@ func (suite *OvnClientTestSuite) testLoadBalancerAddIPPortMapping() { require.NoError(t, err) }, ) + + t.Run("update ip port mappings should not overwrite existing data", func(t *testing.T) { + lbName := "test-lb-update-ip-port-mapping-no-overwrite" + err := nbClient.CreateLoadBalancer(lbName, "tcp", "") + require.NoError(t, err) + + initialMappings := map[string]string{ + "192.168.20.3": "node1", + } + err = nbClient.LoadBalancerUpdateIPPortMapping(lbName, "10.96.0.5:443", initialMappings) + require.NoError(t, err) + + newMappings := map[string]string{ + "192.168.20.4": "node2", + } + err = nbClient.LoadBalancerUpdateIPPortMapping(lbName, "10.96.0.6:443", newMappings) + require.NoError(t, err) + + lb, err := nbClient.GetLoadBalancer(lbName, false) + require.NoError(t, err) + + require.Len(t, lb.IPPortMappings, 2) + require.Equal(t, "node1", lb.IPPortMappings["192.168.20.3"]) + require.Equal(t, "node2", lb.IPPortMappings["192.168.20.4"]) + }) } func (suite *OvnClientTestSuite) testLoadBalancerDeleteVipIPPortMapping() { diff --git a/test/e2e/kube-ovn/e2e_test.go b/test/e2e/kube-ovn/e2e_test.go index 903a6d66e1b..e302650ab3f 100644 --- a/test/e2e/kube-ovn/e2e_test.go +++ b/test/e2e/kube-ovn/e2e_test.go @@ -12,6 +12,7 @@ import ( "github.com/onsi/ginkgo/v2" // Import tests. + _ "github.com/kubeovn/kube-ovn/test/e2e/kube-ovn/gc" _ "github.com/kubeovn/kube-ovn/test/e2e/kube-ovn/ipam" _ "github.com/kubeovn/kube-ovn/test/e2e/kube-ovn/kubectl-ko" _ "github.com/kubeovn/kube-ovn/test/e2e/kube-ovn/network-policy" diff --git a/test/e2e/kube-ovn/gc/gc.go b/test/e2e/kube-ovn/gc/gc.go new file mode 100644 index 00000000000..c171943cec5 --- /dev/null +++ b/test/e2e/kube-ovn/gc/gc.go @@ -0,0 +1,147 @@ +package gc + +import ( + "context" + "fmt" + "strings" + "time" + + "github.com/onsi/ginkgo/v2" + corev1 "k8s.io/api/core/v1" + + kubeovnv1 "github.com/kubeovn/kube-ovn/pkg/apis/kubeovn/v1" + "github.com/kubeovn/kube-ovn/pkg/util" + "github.com/kubeovn/kube-ovn/test/e2e/framework" +) + +var _ = framework.Describe("[group:gc]", func() { + f := framework.NewDefaultFramework("gc") + + var ( + switchLBRuleClient *framework.SwitchLBRuleClient + podClient *framework.PodClient + subnetClient *framework.SubnetClient + vpcClient *framework.VpcClient + + namespaceName, suffix string + vpcName, subnetName, clientPodName string + slrName string + overlaySubnetCidr string + frontPort, backendPort int32 + ) + + ginkgo.BeforeEach(func() { + switchLBRuleClient = f.SwitchLBRuleClient() + podClient = f.PodClient() + subnetClient = f.SubnetClient() + vpcClient = f.VpcClient() + suffix = framework.RandomSuffix() + namespaceName = f.Namespace.Name + slrName = "slr-" + suffix + clientPodName = "client-" + suffix + subnetName = "subnet-" + suffix + vpcName = "vpc-" + suffix + frontPort = 8888 + backendPort = 80 + overlaySubnetCidr = framework.RandomCIDR(f.ClusterIPFamily) + + ginkgo.By("Creating vpc " + vpcName) + vpc := framework.MakeVpc(vpcName, "", false, false, []string{namespaceName}) + vpcClient.CreateSync(vpc) + + ginkgo.By("Creating subnet " + subnetName) + subnet := framework.MakeSubnet(subnetName, "", overlaySubnetCidr, "", vpcName, "", nil, nil, nil) + subnetClient.CreateSync(subnet) + }) + + ginkgo.AfterEach(func() { + ginkgo.By("Deleting switch-lb-rule " + slrName) + switchLBRuleClient.DeleteSync(slrName) + ginkgo.By("Deleting client pod " + clientPodName) + podClient.DeleteSync(clientPodName) + ginkgo.By("Deleting subnet " + subnetName) + subnetClient.DeleteSync(subnetName) + ginkgo.By("Deleting vpc " + vpcName) + vpcClient.DeleteSync(vpcName) + }) + + ginkgo.It("should gc stale ip_port_mappings in OVN LoadBalancer", func() { + ginkgo.By("1. Creating a SwitchLBRule to populate LoadBalancer") + labels := map[string]string{"app": "gc-test"} + annotations := map[string]string{util.LogicalSwitchAnnotation: subnetName} + + ginkgo.By("Creating backend pod") + backendPodName := "backend-" + suffix + pod := framework.MakePod(namespaceName, backendPodName, labels, annotations, framework.AgnhostImage, nil, nil) + podClient.CreateSync(pod) + pod = podClient.GetPod(backendPodName) + backendIP := pod.Status.PodIP + + ginkgo.By("Creating SwitchLBRule " + slrName) + slrPorts := []kubeovnv1.SwitchLBRulePort{{ + Name: "http", + Port: frontPort, + TargetPort: backendPort, + Protocol: "TCP", + }} + slr := framework.MakeSwitchLBRule(slrName, namespaceName, "1.1.1.1", corev1.ServiceAffinityNone, nil, []string{"app:gc-test"}, nil, slrPorts) + switchLBRuleClient.CreateSync(slr, func(_ *kubeovnv1.SwitchLBRule) (bool, error) { + return true, nil + }, "switch-lb-rule is created") + + ginkgo.By("2. Identifying the OVN LoadBalancer") + // LBs created for VPCs are named like "vpc-suffix_tcp" or similar in status + vpc := vpcClient.Get(vpcName) + lbName := vpc.Status.TCPLoadBalancer + framework.ExpectNotEmpty(lbName) + + ginkgo.By("Verifying active ip_port_mapping exists for backend " + backendIP) + cmd := []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} + stdout, _, err := framework.NBExec(cmd...) + framework.ExpectNil(err) + framework.ExpectContainSubstring(string(stdout), backendIP) + + ginkgo.By("3. Manually injecting a stale ip_port_mapping entry") + staleIP := "1.2.3.4" + staleMapping := "stale-node" + + // Get existing mappings to ensure we don't overwrite them + cmd = []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} + stdout, _, err = framework.NBExec(cmd...) + framework.ExpectNil(err) + existingMappings := strings.TrimSpace(string(stdout)) + + // Inject the stale mapping while preserving existing ones + // Using 'add' instead of 'set' for map columns in ovn-nbctl is safer as it adds to the map + setCmd := []string{"ovn-nbctl", "add", "load_balancer", lbName, "ip_port_mappings", fmt.Sprintf("{\"%s\"=\"%s\"}", staleIP, staleMapping)} + _, _, err = framework.NBExec(setCmd...) + framework.ExpectNil(err) + + ginkgo.By("Verifying stale entry was injected and existing ones preserved") + stdout, _, err = framework.NBExec(cmd...) + framework.ExpectNil(err) + stdoutStr := string(stdout) + framework.ExpectContainSubstring(stdoutStr, staleIP) + framework.ExpectContainSubstring(stdoutStr, backendIP) + if existingMappings != "{}" && existingMappings != "" { + // Basic check that we didn't lose what was there before + framework.ExpectContainSubstring(stdoutStr, strings.Trim(existingMappings, "{}")) + } + + ginkgo.By("4. Waiting for GC to clean up the stale entry") + // The default GC interval might be long, but in E2E tests we expect the controller to be running. + // If GC interval is e.g. 60s, we might need to wait. + framework.WaitUntil(5*time.Second, 2*time.Minute, func(_ context.Context) (bool, error) { + stdout, _, err = framework.NBExec(cmd...) + if err != nil { + return false, err + } + return !strings.Contains(string(stdout), staleIP), nil + }, "stale ip_port_mapping is removed by GC") + + ginkgo.By("Verifying active entry still exists") + stdout, _, err = framework.NBExec(cmd...) + framework.ExpectNil(err) + framework.ExpectContainSubstring(string(stdout), backendIP) + }) +}) From 3a1d6054957ce009603d2c026ee56dcd977cd898 Mon Sep 17 00:00:00 2001 From: SkalaNetworks Date: Thu, 12 Feb 2026 13:48:54 +0000 Subject: [PATCH 4/4] chore(gc): quarantine ip_port_mapping test Signed-off-by: SkalaNetworks --- pkg/controller/gc.go | 15 +++++---------- pkg/controller/gc_test.go | 8 ++++---- test/e2e/kube-ovn/gc/gc.go | 22 +++++++++++++--------- 3 files changed, 22 insertions(+), 23 deletions(-) diff --git a/pkg/controller/gc.go b/pkg/controller/gc.go index c3d4ea99fcd..115ac9cdb1a 100644 --- a/pkg/controller/gc.go +++ b/pkg/controller/gc.go @@ -51,7 +51,7 @@ func (c *Controller) gc() error { c.gcVip, c.gcLbSvcPods, c.gcVPCDNS, - c.gcOvnLb, + c.gcIpPortMapping, } for _, gcFunc := range gcFunctions { if err := gcFunc(); err != nil { @@ -1191,15 +1191,10 @@ func (c *Controller) gcLbSvcPods() error { return nil } -// gcOvnLb handles cleaning up loadbalancers created by SwitchLBRules/EndpointSlices -// For every LB present in OVN, we make sure: -// - the ip_port_mappings are not stale (they're used by a VIP) -// - TODO: the VIPs are linked to an EndpointSlice and are not stale -// - TODO: the healthchecks are linked to an EndpointSlice and are not stale -// Right now, if the controller is down while EPs are getting deleted, the VIPs will not be cleaned -// and the healthchecks will not be cleaned. This can lead to dangling resources in OVN. -func (c *Controller) gcOvnLb() error { - klog.Infof("start to gc ovn load balancers") +// gcIpPortMapping handles cleaning up ip_port_mappings created by SwitchLBRules/EndpointSlices +// For every LB present in OVN, we make sure the ip_port_mappings are not stale (they're used by a VIP) +func (c *Controller) gcIpPortMapping() error { + klog.Infof("start to gc ovn load balancers ip_port_mappings") lbs, err := c.OVNNbClient.ListLoadBalancers(func(lb *ovnnb.LoadBalancer) bool { return lb.ExternalIDs["vendor"] == util.CniTypeName }) diff --git a/pkg/controller/gc_test.go b/pkg/controller/gc_test.go index 13bbc0e4196..4f4d26e7137 100644 --- a/pkg/controller/gc_test.go +++ b/pkg/controller/gc_test.go @@ -115,9 +115,9 @@ func Test_gcOvnLb(t *testing.T) { fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb2", "192.168.2.2").Return(nil) fakeCtrl.mockOvnClient.EXPECT().LoadBalancerDeleteIPPortMapping("lb3", "fd00::102").Return(nil) - err := fakeCtrl.fakeController.gcOvnLb() + err := fakeCtrl.fakeController.gcIpPortMapping() if err != nil { - t.Errorf("gcOvnLb() error = %v", err) + t.Errorf("gcIpPortMapping() error = %v", err) } }) @@ -138,9 +138,9 @@ func Test_gcOvnLb(t *testing.T) { fakeCtrl.mockOvnClient.EXPECT().ListLoadBalancers(gomock.Any()).Return([]ovnnb.LoadBalancer{*lb}, nil) // No LoadBalancerDeleteIPPortMapping expected - err := fakeCtrl.fakeController.gcOvnLb() + err := fakeCtrl.fakeController.gcIpPortMapping() if err != nil { - t.Errorf("gcOvnLb() error = %v", err) + t.Errorf("gcIpPortMapping() error = %v", err) } }) } diff --git a/test/e2e/kube-ovn/gc/gc.go b/test/e2e/kube-ovn/gc/gc.go index c171943cec5..6c28478e360 100644 --- a/test/e2e/kube-ovn/gc/gc.go +++ b/test/e2e/kube-ovn/gc/gc.go @@ -66,6 +66,7 @@ var _ = framework.Describe("[group:gc]", func() { }) ginkgo.It("should gc stale ip_port_mappings in OVN LoadBalancer", func() { + ginkgo.Skip("GC doesn't yet run periodically and can't be E2E tested") ginkgo.By("1. Creating a SwitchLBRule to populate LoadBalancer") labels := map[string]string{"app": "gc-test"} annotations := map[string]string{util.LogicalSwitchAnnotation: subnetName} @@ -96,18 +97,21 @@ var _ = framework.Describe("[group:gc]", func() { framework.ExpectNotEmpty(lbName) ginkgo.By("Verifying active ip_port_mapping exists for backend " + backendIP) - cmd := []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} - stdout, _, err := framework.NBExec(cmd...) - framework.ExpectNil(err) - framework.ExpectContainSubstring(string(stdout), backendIP) + framework.WaitUntil(5*time.Second, 2*time.Minute, func(_ context.Context) (bool, error) { + cmd := []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} + stdout, _, err := framework.NBExec(cmd...) + framework.ExpectNil(err) + + return strings.Contains(string(stdout), backendIP), nil + }, "we got correct ip_port_mappings") ginkgo.By("3. Manually injecting a stale ip_port_mapping entry") staleIP := "1.2.3.4" staleMapping := "stale-node" // Get existing mappings to ensure we don't overwrite them - cmd = []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} - stdout, _, err = framework.NBExec(cmd...) + cmd := []string{"ovn-nbctl", "get", "load_balancer", lbName, "ip_port_mappings"} + stdout, _, err := framework.NBExec(cmd...) framework.ExpectNil(err) existingMappings := strings.TrimSpace(string(stdout)) @@ -129,9 +133,9 @@ var _ = framework.Describe("[group:gc]", func() { } ginkgo.By("4. Waiting for GC to clean up the stale entry") - // The default GC interval might be long, but in E2E tests we expect the controller to be running. - // If GC interval is e.g. 60s, we might need to wait. - framework.WaitUntil(5*time.Second, 2*time.Minute, func(_ context.Context) (bool, error) { + // Currently, the GC doesn't run periodically for this type of stuff... + // This test can be used to test the GC by end, but not yet in E2E testing + framework.WaitUntil(5*time.Second, 1*time.Minute, func(_ context.Context) (bool, error) { stdout, _, err = framework.NBExec(cmd...) if err != nil { return false, err