Skip to content

Commit 37c4a41

Browse files
authored
CLOUDP-383543: fix flaky secondaries check using replSetGetStatus (#824)
## Summary `client.secondaries` in pymongo relies on async background topology discovery — after `ismaster` initiates the connection, monitor threads continue discovering replica set members in the background. This means `client.secondaries` can return an empty or incomplete set immediately after connecting, causing intermittent test failures. This PR replaces all usages of `client.secondaries` with a new `KubernetesTester.get_replica_set_secondaries()` helper that runs `replSetGetStatus` on the primary. This is a synchronous call that returns the authoritative replica set state directly from the server, bypassing the client-side topology cache entirely. ## Proof of Work N/A — test-only change fixing flaky assertions. ## Checklist - [x] Have you linked a jira ticket and/or is the ticket in the title? - [ ] Have you checked whether your jira ticket required DOCSP changes? - [x] Have you added changelog file? - use `skip-changelog` label if not needed
1 parent f6fe7bb commit 37c4a41

File tree

5 files changed

+18
-8
lines changed

5 files changed

+18
-8
lines changed

docker/mongodb-kubernetes-tests/kubetester/kubetester.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1334,7 +1334,7 @@ def yield_existing_csrs(csr_names, timeout=300):
13341334
return yield_existing_csrs(csr_names, timeout)
13351335

13361336
@staticmethod
1337-
def get_populated_mongo_client(hosts: list[str], ssl: bool = False) -> pymongo.MongoClient:
1337+
def get_connected_mongo_client(hosts: list[str], ssl: bool = False) -> pymongo.MongoClient:
13381338
mongodburi = KubernetesTester.build_mongodb_uri_for_rs(hosts)
13391339
options = {}
13401340
if ssl:
@@ -1346,6 +1346,16 @@ def get_populated_mongo_client(hosts: list[str], ssl: bool = False) -> pymongo.M
13461346

13471347
return client
13481348

1349+
@staticmethod
1350+
def get_replica_set_secondaries(client: pymongo.MongoClient) -> list:
1351+
"""Returns healthy secondaries queried from the primary via replSetGetStatus.
1352+
1353+
Prefer this over client.secondaries, which relies on pymongo's async topology
1354+
discovery and may return an incomplete result immediately after connecting.
1355+
"""
1356+
status = client.admin.command("replSetGetStatus", read_preference=pymongo.ReadPreference.PRIMARY)
1357+
return [m for m in status["members"] if m["stateStr"] == "SECONDARY" and m["health"] == 1]
1358+
13491359
def _get_pods(self, podname, qty=3):
13501360
return [podname.format(i) for i in range(qty)]
13511361

docker/mongodb-kubernetes-tests/tests/multicluster_shardedcluster/multi_cluster_sharded_geo_sharding.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,5 +149,5 @@ def test_assert_shard_primary_replicas(self, sc: MongoDB):
149149
}
150150
for shard_idx, cluster_idx in cluster_primary_member_mapping.items():
151151
shard_primary_hostname = sc.shard_hostname(shard_idx, 0, cluster_idx)
152-
client = KubernetesTester.get_populated_mongo_client(hosts=[shard_primary_hostname])
152+
client = KubernetesTester.get_connected_mongo_client(hosts=[shard_primary_hostname])
153153
assert client.is_primary

docker/mongodb-kubernetes-tests/tests/replicaset/replica_set_pv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,10 +91,10 @@ def test_replica_set_was_configured(self):
9191
"Should connect to one of the mongods and check the replica set was correctly configured."
9292
hosts = ["rs001-pv-{}.rs001-pv-svc.{}.svc.cluster.local:27017".format(i, self.namespace) for i in range(3)]
9393

94-
client = self.get_populated_mongo_client(hosts=hosts)
94+
client = self.get_connected_mongo_client(hosts=hosts)
9595

9696
assert client.primary is not None
97-
assert len(client.secondaries) == 2
97+
assert len(KubernetesTester.get_replica_set_secondaries(client)) == 2
9898

9999

100100
@pytest.mark.e2e_replica_set_pv

docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,9 @@ def test_both_shards_are_configured(self, sc: MongoDB):
130130
hosts.append(hostname)
131131

132132
logger.debug(f"Checking for connectivity of hosts: {hosts}")
133-
client = KubernetesTester.get_populated_mongo_client(hosts=hosts)
133+
client = KubernetesTester.get_connected_mongo_client(hosts=hosts)
134134
assert client.primary is not None
135-
assert len(client.secondaries) == 2
135+
assert len(KubernetesTester.get_replica_set_secondaries(client)) == 2
136136

137137
def test_monitoring_versions(self, sc: MongoDB):
138138
"""Verifies that monitoring agent is configured for each process in the deployment"""

docker/mongodb-kubernetes-tests/tests/shardedcluster/sharded_cluster_pv.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@ def test_shard0_was_configured(self, sc: MongoDB):
9696
hostname = sc.shard_hostname(0, member_idx, cluster_member_client.cluster_index)
9797
hosts.append(hostname)
9898

99-
client = KubernetesTester.get_populated_mongo_client(hosts=hosts)
99+
client = KubernetesTester.get_connected_mongo_client(hosts=hosts)
100100

101101
assert client.primary is not None
102-
assert len(client.secondaries) == 2
102+
assert len(KubernetesTester.get_replica_set_secondaries(client)) == 2
103103

104104
def test_pvc_are_bound(self, sc: MongoDB):
105105
for cluster_member_client in get_member_cluster_clients_using_cluster_mapping(sc.name, sc.namespace):

0 commit comments

Comments
 (0)