Skip to content

Commit a06b7e1

Browse files
committed
fix(nemesis): add support ipv6 for refuse connection for banned node
disrupt_refuse_connection_with_* nemesises doesn't support ipv6. - Added command for blocking ports for ipv6 stack. When node is banned and alive, c-s/s-b could connect to it and failed with critical error, because banned node return that other node cluster is down. - Added new node_operation: block_loader_workload_for_scyllanode. This allow to block connections to scylla node from loaders and aboid critical error of c-s/s-b if them connect to banned node and failed to run Fixes: #10434
1 parent 1208c23 commit a06b7e1

File tree

2 files changed

+39
-9
lines changed

2 files changed

+39
-9
lines changed

sdcm/nemesis.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -5525,15 +5525,15 @@ def drop_keyspace(node):
55255525
target_host_id = self.target_node.host_id
55265526
stack.callback(self._remove_node_add_node, verification_node=working_node, node_to_remove=self.target_node,
55275527
remove_node_host_id=target_host_id)
5528-
5528+
stack.enter_context(node_operations.block_loaders_payload_for_scylla_node(
5529+
self.target_node, loader_nodes=self.loaders.nodes))
55295530
self.tester.create_keyspace(keyspace_name, replication_factor=3)
55305531
self.tester.create_table(name=table_name, keyspace_name=keyspace_name, key_type="bigint",
55315532
columns={"name": "text"})
5532-
stack.callback(drop_keyspace, node=working_node)
55335533

55345534
with simulate_node_unavailability(self.target_node):
55355535
# target node stopped by Contextmanger. Wait while its status will be updated
5536-
wait_for(node_operations.is_node_seen_as_down, timeout=600, throw_exc=True,
5536+
wait_for(node_operations.is_node_seen_as_down, step=5, timeout=600, throw_exc=True,
55375537
down_node=self.target_node, verification_node=working_node, text=f"Wait other nodes see {self.target_node.name} as DOWN...")
55385538
self.log.debug("Remove node %s : hostid: %s with blocked scylla from cluster",
55395539
self.target_node.name, target_host_id)
@@ -5560,12 +5560,12 @@ def drop_keyspace(node):
55605560

55615561
with self.cluster.cql_connection_patient(working_node) as session:
55625562
LOGGER.debug("Check keyspace %s.%s is empty", keyspace_name, table_name)
5563-
result = list(session.execute(f"SELECT * from {keyspace_name}.{table_name}"))
5563+
stmt = SimpleStatement(f"SELECT * from {keyspace_name}.{table_name}",
5564+
consistency_level=ConsistencyLevel.QUORUM)
5565+
result = list(session.execute(stmt))
55645566
LOGGER.debug("Query result %s", result)
55655567
assert not result, f"New rows were added from banned node, {result}"
55665568

5567-
drop_keyspace(working_node)
5568-
55695569

55705570
def disrupt_method_wrapper(method, is_exclusive=False): # pylint: disable=too-many-statements # noqa: PLR0915
55715571
"""

sdcm/utils/nemesis_utils/node_operations.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,59 @@
99

1010

1111
@contextlib.contextmanager
12-
def block_scylla_ports(target_node: "BaseNode", ports: list[int] | None = None):
12+
def block_scylla_ports(target_node: BaseNode, ports: list[int] | None = None):
1313
ports = ports or [7001, 7000, 9042, 9142, 19042, 19142]
1414
target_node.install_package("iptables")
1515
target_node.start_service("iptables", ignore_status=True)
1616
target_node.log.debug("Block connections %s", target_node.name)
1717
for port in ports:
1818
target_node.remoter.sudo(f"iptables -A INPUT -p tcp --dport {port} -j DROP")
1919
target_node.remoter.sudo(f"iptables -A OUTPUT -p tcp --dport {port} -j DROP")
20+
target_node.remoter.sudo(f"ip6tables -A INPUT -p tcp --dport {port} -j DROP")
21+
target_node.remoter.sudo(f"ip6tables -A OUTPUT -p tcp --dport {port} -j DROP")
2022
yield
2123
target_node.log.debug("Remove all iptable rules %s", target_node.name)
2224
for port in ports:
2325
target_node.remoter.sudo(f"iptables -D INPUT -p tcp --dport {port} -j DROP")
2426
target_node.remoter.sudo(f"iptables -D OUTPUT -p tcp --dport {port} -j DROP")
27+
target_node.remoter.sudo(f"ip6tables -D INPUT -p tcp --dport {port} -j DROP")
28+
target_node.remoter.sudo(f"ip6tables -D OUTPUT -p tcp --dport {port} -j DROP")
2529
target_node.stop_service("iptables", ignore_status=True)
2630

2731

2832
@contextlib.contextmanager
29-
def pause_scylla_with_sigstop(target_node: "BaseNode"):
33+
def pause_scylla_with_sigstop(target_node: BaseNode):
3034
target_node.log.debug("Send signal SIGSTOP to scylla process on node %s", target_node.name)
3135
target_node.remoter.sudo("pkill --signal SIGSTOP -e scylla", timeout=60)
3236
yield
3337
target_node.log.debug("Send signal SIGCONT to scylla process on node %s", target_node.name)
3438
target_node.remoter.sudo(cmd="pkill --signal SIGCONT -e scylla", timeout=60)
3539

3640

41+
@contextlib.contextmanager
42+
def block_loaders_payload_for_scylla_node(scylla_node: BaseNode, loader_nodes: list[BaseNode]):
43+
ports = [9042, 9142, 19042, 19142]
44+
scylla_node.install_package("iptables")
45+
scylla_node.start_service("iptables", ignore_status=True)
46+
loader_nodes_names = [node.name for node in loader_nodes]
47+
blocking_ips = [node.ip_address for node in loader_nodes]
48+
scylla_node.log.debug("Block connections on %s from loader nodes %s", scylla_node.name, loader_nodes_names)
49+
for port in ports:
50+
scylla_node.remoter.sudo(
51+
f"iptables -A INPUT -s {','.join(blocking_ips)} -p tcp --dport {port} -j DROP", ignore_status=True)
52+
scylla_node.remoter.sudo(
53+
f"ip6tables -A INPUT -s {','.join(blocking_ips)} -p tcp --dport {port} -j DROP", ignore_status=True)
54+
yield
55+
# if scylla_node is alive, then delete the iptables rules
56+
if scylla_node.remoter.is_up():
57+
for port in ports:
58+
scylla_node.remoter.sudo(
59+
f"iptables -D INPUT -s {','.join(blocking_ips)} -p tcp --dport {port} -j DROP", ignore_status=True)
60+
scylla_node.remoter.sudo(
61+
f"ip6tables -D INPUT -s {','.join(blocking_ips)} -p tcp --dport {port} -j DROP", ignore_status=True)
62+
scylla_node.stop_service("iptables", ignore_status=True)
63+
64+
3765
def is_node_removed_from_cluster(removed_node: BaseNode, verification_node: BaseNode) -> bool:
3866
LOGGER.debug("Verification node %s", verification_node.name)
3967
cluster_status: Optional[dict] = removed_node.parent_cluster.get_nodetool_status(
@@ -48,4 +76,6 @@ def is_node_removed_from_cluster(removed_node: BaseNode, verification_node: Base
4876

4977
def is_node_seen_as_down(down_node: BaseNode, verification_node: BaseNode) -> bool:
5078
LOGGER.debug("Verification node %s", verification_node.name)
51-
return down_node not in verification_node.parent_cluster.get_nodes_up_and_normal(verification_node)
79+
nodes_status = verification_node.parent_cluster.get_nodetool_status(verification_node, dc_aware=False)
80+
down_node_status = nodes_status.get(down_node.ip_address)
81+
return (not down_node_status or down_node_status["state"] == "DN")

0 commit comments

Comments
 (0)