Skip to content

Commit bb738bd

Browse files
committed
TMP test_restart.py
1 parent e7ce404 commit bb738bd

File tree

1 file changed

+44
-15
lines changed

1 file changed

+44
-15
lines changed

src/integration-tests/test_restart.py

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,19 @@
3636
pytestmark = order(2)
3737

3838

39+
def configure_cluster(cluster: Cluster, is_fsm: bool):
40+
'''
41+
Configure the `cluster` to FSM or non-FSM mode, based on the `is_fsm` flag.
42+
'''
43+
for broker in cluster.configurator.brokers.values():
44+
my_clusters = broker.clusters.my_clusters
45+
if my_clusters:
46+
cluster_attr = my_clusters[0].cluster_attributes
47+
cluster_attr.is_cslmode_enabled = is_fsm
48+
cluster_attr.is_fsmworkflow = is_fsm
49+
cluster_attr.doesFSMwriteQLIST = True
50+
cluster.deploy_domains()
51+
3952
def ensureMessageAtStorageLayer(cluster: Cluster, partitionId: int, queueUri: str, numMessages: int):
4053
'''
4154
Assert that in the `partitionId` of the `cluster`, there are exactly
@@ -178,13 +191,14 @@ def test_restart_between_non_FSM_and_FSM(cluster: Cluster, domain_urls: tc.Domai
178191
cluster.stop_nodes(prevent_leader_bounce=True)
179192

180193
# Reconfigure the cluster from non-FSM to FSM mode
181-
for broker in cluster.configurator.brokers.values():
182-
my_clusters = broker.clusters.my_clusters
183-
test_logger.info("TODO xxm: " + str(len(my_clusters)))
184-
if len(my_clusters) > 0:
185-
my_clusters[0].cluster_attributes.is_cslmode_enabled = True
186-
my_clusters[0].cluster_attributes.is_fsmworkflow = True
187-
cluster.deploy_domains()
194+
# for broker in cluster.configurator.brokers.values():
195+
# my_clusters = broker.clusters.my_clusters
196+
# test_logger.info("TODO xxm: " + str(len(my_clusters)))
197+
# if len(my_clusters) > 0:
198+
# my_clusters[0].cluster_attributes.is_cslmode_enabled = True
199+
# my_clusters[0].cluster_attributes.is_fsmworkflow = True
200+
# cluster.deploy_domains()
201+
configure_cluster(cluster, is_fsm=True)
188202

189203
cluster.start_nodes(wait_leader=True, wait_ready=True)
190204
# For a standard cluster, states have already been restored as part of
@@ -243,24 +257,39 @@ def test_restart_between_non_FSM_and_FSM(cluster: Cluster, domain_urls: tc.Domai
243257
consumer_foo.confirm(du.uri_fanout_2_foo, "+1", succeed=True)
244258
consumer_foo.close(du.uri_fanout_2_foo, succeed=True)
245259

260+
# Non-FSM mode has poor healing mechanism, and can have flaky dirty
261+
# shutdowns, so let's disable checking exit code here.
262+
#
263+
# To give an example, an in-sync node might attempt to syncrhonize with an
264+
# out-of-sync node, and become out-of-sync too. FSM mode is determined to
265+
# eliminate these kinds of defects.
266+
for node in cluster.nodes():
267+
node.check_exit_code = False
246268
cluster.stop_nodes(prevent_leader_bounce=True)
247269

248270
# Reconfigure the cluster from FSM to back to non-FSM mode
249-
for broker in cluster.configurator.brokers.values():
250-
my_clusters = broker.clusters.my_clusters
251-
test_logger.info("TODO xxm: " + str(len(my_clusters)))
252-
if len(my_clusters) > 0:
253-
my_clusters[0].cluster_attributes.is_cslmode_enabled = False
254-
my_clusters[0].cluster_attributes.is_fsmworkflow = False
255-
my_clusters[0].cluster_attributes.doesFSMwriteQLIST = True
256-
cluster.deploy_domains()
271+
# for broker in cluster.configurator.brokers.values():
272+
# my_clusters = broker.clusters.my_clusters
273+
# test_logger.info("TODO xxm: " + str(len(my_clusters)))
274+
# if len(my_clusters) > 0:
275+
# my_clusters[0].cluster_attributes.is_cslmode_enabled = False
276+
# my_clusters[0].cluster_attributes.is_fsmworkflow = False
277+
# my_clusters[0].cluster_attributes.doesFSMwriteQLIST = True
278+
# cluster.deploy_domains()
279+
configure_cluster(cluster, is_fsm=False)
257280

258281
cluster.start_nodes(wait_leader=True, wait_ready=True)
259282
# For a standard cluster, states have already been restored as part of
260283
# leader re-election.
261284
if cluster.is_single_node:
262285
producer.wait_state_restored()
263286

287+
# Non-FSM mode has poor healing mechanism, but restarting once more will
288+
# fix the flaky dirty shutdowns
289+
cluster.restart_nodes()
290+
if cluster.is_single_node:
291+
producer.wait_state_restored()
292+
264293
# The producers posts one more message on every queue
265294
producer.post(du.uri_priority, payload=["msg3"], wait_ack=True, succeed=True)
266295
producer.post(du.uri_fanout, payload=["fanout_msg3"], wait_ack=True, succeed=True)

0 commit comments

Comments
 (0)