Skip to content

Commit 3efbff8

Browse files
committed
test(nemesis.py): Add a ScyllaDiagnosisReport nemesis to dump diagnostics report and verify it.
Adds a new ScyllaDiagnosisReport nemesis to send signal to scylladb to dump diagnostics and verify it. Fixes: #9443 Signed-off-by: Lakshmipathi.Ganapathi <[email protected]>
1 parent 70f3e92 commit 3efbff8

File tree

8 files changed

+87
-1
lines changed

8 files changed

+87
-1
lines changed

Diff for: collect_diagnosis_data_test.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
#!/usr/bin/env python
2+
3+
# This program is free software; you can redistribute it and/or modify
4+
# it under the terms of the GNU Affero General Public License as published by
5+
# the Free Software Foundation; either version 3 of the License, or
6+
# (at your option) any later version.
7+
#
8+
# This program is distributed in the hope that it will be useful,
9+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
10+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11+
#
12+
# See LICENSE for more details.
13+
#
14+
# Copyright (c) 2025 ScyllaDB
15+
16+
from sdcm.tester import ClusterTester
17+
from sdcm import nemesis
18+
19+
20+
class ScyllaDiagnosisReport(ClusterTester):
21+
22+
def test_diagnosis_data(self): # pylint: disable=invalid-name
23+
current_nemesis = nemesis.ScyllaDiagnosisReport(
24+
tester_obj=self, termination_event=self.db_cluster.nemesis_termination_event)
25+
current_nemesis.disrupt()

Diff for: configurations/nemesis/ScyllaDiagnosisReport.yaml

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
nemesis_class_name: 'ScyllaDiagnosisReport'
2+
user_prefix: 'ScyllaDiagnosisReport'

Diff for: data_dir/nemesis_classes.yml

+1
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@
6565
- RollingRestartConfigChangeInternodeCompression
6666
- ScyllaKillMonkey
6767
- SerialRestartOfElectedTopologyCoordinatorNemesis
68+
- ScyllaDiagnosisReport
6869
- SlaDecreaseSharesDuringLoad
6970
- SlaIncreaseSharesByAttachAnotherSlDuringLoad
7071
- SlaIncreaseSharesDuringLoad
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!groovy
2+
3+
// trick from https://github.com/jenkinsci/workflow-cps-global-lib-plugin/pull/43
4+
def lib = library identifier: 'sct@snapshot', retriever: legacySCM(scm)
5+
6+
longevityPipeline(
7+
params: params,
8+
9+
backend: 'aws',
10+
region: 'eu-west-1',
11+
test_name: 'collect_diagnosis_data_test.ScyllaDiagnosisReport.test_diagnosis_data',
12+
test_config: 'test-cases/features/diagnosis-report.yaml'
13+
)

Diff for: sdcm/cluster.py

+4
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,10 @@ def instance_name(self) -> str:
988988
"""
989989
return self.name
990990

991+
@property
992+
def is_diagnostics_logged(self):
993+
return self.remoter.run("grep 'Diagnostics dump requested via SIGQUIT' /var/log/messages", ignore_status=True).exit_status == 0
994+
991995
@property
992996
def is_spot(self):
993997
return False

Diff for: sdcm/nemesis.py

+28
Original file line numberDiff line numberDiff line change
@@ -702,6 +702,18 @@ def _kill_scylla_daemon(self):
702702
self.log.info('Waiting JMX services to be restarted after we killed them...')
703703
self.target_node.wait_jmx_up()
704704
self.cluster.wait_for_schema_agreement()
705+
706+
def _sigquit_scylla_daemon(self):
707+
self.log.info('Sending SIGQUIT to scylla processes in %s',
708+
self.target_node)
709+
self.target_node.remoter.sudo("pkill -3 scylla", ignore_status=True)
710+
# Wait for few seconds before checking diagnosis data
711+
time.sleep(30)
712+
if self.target_node.is_diagnostics_logged:
713+
self.log.info("Diagnosis dump report is found")
714+
else:
715+
self.log.error("Diagonosis dump report is missing")
716+
raise Exception("Diagonosis dump report not found")
705717

706718
@decorate_with_context(ignore_raft_topology_cmd_failing)
707719
@target_all_nodes
@@ -1744,6 +1756,14 @@ def wait_for_old_node_to_removed():
17441756
def disrupt_kill_scylla(self):
17451757
self._kill_scylla_daemon()
17461758

1759+
def disrupt_sigquit_scylla(self):
1760+
stress_cmd = self.tester.params.get('stress_cmd')
1761+
self.tester.run_stress_thread(
1762+
stress_cmd=stress_cmd, stress_num=1, stats_aggregate_cmds=False)
1763+
# Wait for 5 mins before sending signal
1764+
time.sleep(300)
1765+
self._sigquit_scylla_daemon()
1766+
17471767
def disrupt_no_corrupt_repair(self):
17481768

17491769
if SkipPerIssues("https://github.com/scylladb/scylladb/issues/18059", self.tester.params):
@@ -6612,6 +6632,14 @@ def disrupt(self):
66126632
self.disrupt_nodetool_flush_and_reshard_on_kubernetes()
66136633

66146634

6635+
class ScyllaDiagnosisReport(Nemesis):
6636+
disruptive = False
6637+
kubernetes = False
6638+
6639+
def disrupt(self):
6640+
self.disrupt_sigquit_scylla()
6641+
6642+
66156643
class ScyllaKillMonkey(Nemesis):
66166644
disruptive = True
66176645
supports_high_disk_utilization = True

Diff for: test-cases/features/diagnosis-report.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
test_duration: 30
2+
stress_cmd: "cassandra-stress write no-warmup cl=QUORUM duration=10m -schema 'replication(strategy=NetworkTopologyStrategy,replication_factor=3)' -mode cql3 native -rate threads=60 -pop seq=1..30000000"
3+
n_db_nodes: 3
4+
n_loaders: 1
5+
n_monitor_nodes: 1
6+
7+
instance_type_db: 'i4i.large'
8+
nemesis_class_name: 'ScyllaDiagnosisReport'
9+
nemesis_interval: 5
10+
11+
email_recipients: ['[email protected]']
12+
stress_image:
13+
cassandra-stress: 'scylladb/cassandra-stress:3.13.0'

Diff for: unit_tests/test_nemesis_sisyphus.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ def test_list_all_available_nemesis(generate_file=True):
8080
disruption_list, disruptions_dict, disruption_classes = sisyphus.get_list_of_disrupt_methods(
8181
subclasses_list=subclasses, export_properties=True)
8282

83-
assert len(disruption_list) == 92
83+
assert len(disruption_list) == 93
8484

8585
if generate_file:
8686
with open(sct_abs_path('data_dir/nemesis.yml'), 'w', encoding="utf-8") as outfile1:

0 commit comments

Comments
 (0)