Skip to content

Commit 98b7009

Browse files
author
yarongilor
committed
fix(nodetool rebuild): use repair instead of rebuild if no tablets support
if no tables support for nodetool rebuild, test should use an alternative action of repair. it should then disable load-balancing and repair all nodes in this datacenter. refs: scylladb/scylladb#17575 refs: scylladb/scylladb#20084 (comment)
1 parent 8dcdcc9 commit 98b7009

File tree

1 file changed

+26
-4
lines changed

1 file changed

+26
-4
lines changed

sdcm/nemesis.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4116,6 +4116,16 @@ def decommission_post_action():
41164116
ParallelObject(objects=[trigger, watcher], timeout=full_operations_timeout).call_objects()
41174117
if new_node := decommission_post_action():
41184118
new_node.wait_node_fully_start()
4119+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4120+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4121+
if is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported:
4122+
for node in [n for n in self.cluster.nodes if
4123+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4124+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4125+
else:
4126+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4127+
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
4128+
41194129
new_node.run_nodetool("rebuild", long_running=True, retry=0)
41204130
else:
41214131
self.target_node.wait_node_fully_start()
@@ -4147,8 +4157,14 @@ def start_and_interrupt_repair_streaming(self):
41474157

41484158
self.target_node.wait_node_fully_start()
41494159

4150-
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4151-
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
4160+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4161+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4162+
if is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported:
4163+
for node in [n for n in self.cluster.nodes if
4164+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4165+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4166+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4167+
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
41524168

41534169
def start_and_interrupt_rebuild_streaming(self):
41544170
"""
@@ -4178,8 +4194,14 @@ def start_and_interrupt_rebuild_streaming(self):
41784194
)
41794195
ParallelObject(objects=[trigger, watcher], timeout=timeout + 60).call_objects()
41804196
self.target_node.wait_node_fully_start(timeout=300)
4181-
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4182-
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
4197+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4198+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4199+
if is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported:
4200+
for node in [n for n in self.cluster.nodes if
4201+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4202+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4203+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4204+
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
41834205

41844206
def disrupt_decommission_streaming_err(self):
41854207
"""

0 commit comments

Comments
 (0)