Skip to content

Commit ede1d71

Browse files
oilbeaterclaude
andauthored
fix(e2e): reuse OVSDB client in address set tests to avoid HA deadlock (#6368)
Cache the OVN NB client across WaitForAddressSet* calls instead of creating a new connection each time. In HA environments, repeated connect/monitor cycles can trigger a deadlock in libovsdb when a leader change occurs during the initial Monitor RPC setup: the Monitor goroutine holds rpcMutex.RLock while the leader-change handler blocks on rpcMutex.Lock, and Go's write-preferring RWMutex then also blocks the inactivity probe's RLock, leaving no recovery path. Also tolerate transient "not connected" errors during the poll loop instead of failing immediately, giving libovsdb's automatic reconnection time to recover after a leader failover. Signed-off-by: Mengxin Liu <liumengxinfly@gmail.com> Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 43ff19a commit ede1d71

File tree

1 file changed

+22
-15
lines changed

1 file changed

+22
-15
lines changed

test/e2e/framework/ovn_address_set.go

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,13 @@ const (
3737
)
3838

3939
var (
40-
ovnnbAddrOnce sync.Once
41-
ovnnbAddrErr error
42-
ovnnbAddr string
40+
ovnnbAddrOnce sync.Once
41+
ovnnbAddrErr error
42+
ovnnbAddr string
43+
ovnnbClientOnce sync.Once
44+
ovnnbClientErr error
45+
ovnnbCachedNbCli *ovs.OVNNbClient
46+
ovnnbCachedModel map[string]model.Model
4347
)
4448

4549
func validateModelStructure(model model.Model, table string, expectedFields map[string]reflect.Type) {
@@ -61,8 +65,6 @@ func WaitForAddressSetCondition(condition func(rows any) (bool, error)) {
6165
ginkgo.GinkgoHelper()
6266

6367
client, models := getOVNNbClient(ovnnb.AddressSetTable)
64-
defer client.Close()
65-
6668
model := models[ovnnb.AddressSetTable]
6769
validateModelStructure(model, ovnnb.AddressSetTable, map[string]reflect.Type{
6870
"Name": reflect.TypeFor[string](),
@@ -76,7 +78,10 @@ func WaitForAddressSetCondition(condition func(rows any) (bool, error)) {
7678

7779
result := reflect.New(reflect.SliceOf(reflect.TypeOf(model).Elem())).Interface()
7880
if err := client.List(ctx, result); err != nil {
79-
return false, err
81+
// Transient errors (e.g. "not connected" during leader failover)
82+
// should be retried rather than immediately failing the poll.
83+
Logf("Failed to list address sets (will retry): %v", err)
84+
return false, nil
8085
}
8186

8287
return condition(result)
@@ -176,16 +181,18 @@ func getOVNNbClient(tables ...string) (*ovs.OVNNbClient, map[string]model.Model)
176181
})
177182
ExpectNoError(ovnnbAddrErr)
178183

179-
client, models, err := ovs.NewDynamicOvnNbClient(
180-
ovnnbAddr,
181-
ovnNbTimeoutSeconds,
182-
ovsdbConnTimeout,
183-
ovsdbInactivityTimeout,
184-
tables...,
185-
)
186-
ExpectNoError(err)
184+
ovnnbClientOnce.Do(func() {
185+
ovnnbCachedNbCli, ovnnbCachedModel, ovnnbClientErr = ovs.NewDynamicOvnNbClient(
186+
ovnnbAddr,
187+
ovnNbTimeoutSeconds,
188+
ovsdbConnTimeout,
189+
ovsdbInactivityTimeout,
190+
tables...,
191+
)
192+
})
193+
ExpectNoError(ovnnbClientErr)
187194

188-
return client, models
195+
return ovnnbCachedNbCli, ovnnbCachedModel
189196
}
190197

191198
func resolveOVNNbConnection() (string, error) {

0 commit comments

Comments
 (0)