Skip to content

Commit 9ab928c

Browse files
author
yarongilor
committed
fix(nodetool rebuild): use repair instead of rebuild if no tablets support
if no tables support for nodetool rebuild, test should use an alternative action of repair. it should then disable load-balancing and repair all nodes in this datacenter. refs: scylladb/scylladb#17575 refs: scylladb/scylladb#20084 (comment)
1 parent 8dcdcc9 commit 9ab928c

File tree

1 file changed

+32
-6
lines changed

1 file changed

+32
-6
lines changed

sdcm/nemesis.py

Lines changed: 32 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4114,12 +4114,26 @@ def decommission_post_action():
41144114
timeout=full_operations_timeout):
41154115

41164116
ParallelObject(objects=[trigger, watcher], timeout=full_operations_timeout).call_objects()
4117+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4118+
should_workaround_rebuild = is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported
41174119
if new_node := decommission_post_action():
41184120
new_node.wait_node_fully_start()
4119-
new_node.run_nodetool("rebuild", long_running=True, retry=0)
4121+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4122+
if should_workaround_rebuild:
4123+
for node in [n for n in self.cluster.nodes if
4124+
n.dc_idx == new_node.dc_idx and n.db_up()]:
4125+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4126+
else:
4127+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4128+
new_node.run_nodetool("rebuild", long_running=True, retry=0)
41204129
else:
41214130
self.target_node.wait_node_fully_start()
4122-
self.target_node.run_nodetool(sub_cmd="rebuild", long_running=True, retry=0)
4131+
if should_workaround_rebuild:
4132+
for node in [n for n in self.cluster.nodes if
4133+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4134+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4135+
else:
4136+
self.target_node.run_nodetool(sub_cmd="rebuild", long_running=True, retry=0)
41234137

41244138
def start_and_interrupt_repair_streaming(self):
41254139
"""
@@ -4147,8 +4161,14 @@ def start_and_interrupt_repair_streaming(self):
41474161

41484162
self.target_node.wait_node_fully_start()
41494163

4150-
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4151-
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
4164+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4165+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4166+
if is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported:
4167+
for node in [n for n in self.cluster.nodes if
4168+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4169+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4170+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4171+
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
41524172

41534173
def start_and_interrupt_rebuild_streaming(self):
41544174
"""
@@ -4178,8 +4198,14 @@ def start_and_interrupt_rebuild_streaming(self):
41784198
)
41794199
ParallelObject(objects=[trigger, watcher], timeout=timeout + 60).call_objects()
41804200
self.target_node.wait_node_fully_start(timeout=300)
4181-
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4182-
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
4201+
is_rebuild_supported = SkipPerIssues('scylladb/scylladb#17575', params=self.tester.params)
4202+
# If tablets in use and rebuild is not supported, running a DC repair instead.
4203+
if is_tablets_feature_enabled(self.target_node) and not is_rebuild_supported:
4204+
for node in [n for n in self.cluster.nodes if
4205+
n.dc_idx == self.target_node.dc_idx and n.db_up()]:
4206+
node.run_nodetool(sub_cmd="repair", long_running=True, retry=0)
4207+
with adaptive_timeout(Operations.REBUILD, self.target_node, timeout=HOUR_IN_SEC * 48):
4208+
self.target_node.run_nodetool("rebuild", long_running=True, retry=0)
41834209

41844210
def disrupt_decommission_streaming_err(self):
41854211
"""

0 commit comments

Comments
 (0)