Skip to content

Commit 56bb06f

Browse files
committed
fix(nemesis): increase timeout waiting node down
After logs investigation, wait of node to be down by nemesises: disrupt_refuse_connection_with_* could take more than 10 minutes for test configuration with tablets, large data sets and multidc clusters, especially when verification node and target nodes are in different DCes Fixes: #10434
1 parent 8859a38 commit 56bb06f

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

sdcm/nemesis.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -5522,18 +5522,19 @@ def drop_keyspace(node):
55225522
simulate_node_unavailability = node_operations.block_scylla_ports if use_iptables else node_operations.pause_scylla_with_sigstop
55235523
with self.run_nemesis(node_list=self.cluster.nodes,
55245524
nemesis_label=f"Running {simulate_node_unavailability.__name__}") as working_node, ExitStack() as stack:
5525+
stack.callback(drop_keyspace, node=working_node)
55255526
target_host_id = self.target_node.host_id
55265527
stack.callback(self._remove_node_add_node, verification_node=working_node, node_to_remove=self.target_node,
55275528
remove_node_host_id=target_host_id)
55285529

55295530
self.tester.create_keyspace(keyspace_name, replication_factor=3)
55305531
self.tester.create_table(name=table_name, keyspace_name=keyspace_name, key_type="bigint",
55315532
columns={"name": "text"})
5532-
stack.callback(drop_keyspace, node=working_node)
55335533

55345534
with simulate_node_unavailability(self.target_node):
5535-
# target node stopped by Contextmanger. Wait while its status will be updated
5536-
wait_for(node_operations.is_node_seen_as_down, timeout=600, throw_exc=True,
5535+
# target node stopped by Contextmanger. Wait while its status will be updated.
5536+
# with tablets and multidc it could take more time.
5537+
wait_for(node_operations.is_node_seen_as_down, step=10, timeout=3600, throw_exc=True,
55375538
down_node=self.target_node, verification_node=working_node, text=f"Wait other nodes see {self.target_node.name} as DOWN...")
55385539
self.log.debug("Remove node %s : hostid: %s with blocked scylla from cluster",
55395540
self.target_node.name, target_host_id)
@@ -5560,12 +5561,11 @@ def drop_keyspace(node):
55605561

55615562
with self.cluster.cql_connection_patient(working_node) as session:
55625563
LOGGER.debug("Check keyspace %s.%s is empty", keyspace_name, table_name)
5563-
result = list(session.execute(f"SELECT * from {keyspace_name}.{table_name}"))
5564+
result = list(session.execute(
5565+
f"SELECT * from {keyspace_name}.{table_name}", consistency_level=ConsistencyLevel.QUORUM))
55645566
LOGGER.debug("Query result %s", result)
55655567
assert not result, f"New rows were added from banned node, {result}"
55665568

5567-
drop_keyspace(working_node)
5568-
55695569

55705570
def disrupt_method_wrapper(method, is_exclusive=False): # pylint: disable=too-many-statements # noqa: PLR0915
55715571
"""

0 commit comments

Comments
 (0)