3232import enum
3333from contextlib import ExitStack , contextmanager
3434from typing import Any , List , Optional , Tuple , Callable , Dict , Set , Union , Iterable
35- from functools import wraps , partial
35+ from functools import wraps , partial , cached_property
3636from collections import defaultdict , Counter , namedtuple
3737from concurrent .futures import ThreadPoolExecutor
3838from threading import Lock
@@ -242,7 +242,6 @@ def __init__(self, tester_obj, termination_event, *args, nemesis_selector=None,
242242 self .monitoring_set = tester_obj .monitors
243243 self .target_node : BaseNode = None
244244 self .disruptions_list = []
245- self .disruptions_cycle : Iterable [Callable ] | None = None
246245 self .termination_event = termination_event
247246 self .operation_log = []
248247 self .current_disruption = None
@@ -264,7 +263,6 @@ def __init__(self, tester_obj, termination_event, *args, nemesis_selector=None,
264263 self .task_used_streaming = None
265264 self .filter_seed = self .cluster .params .get ('nemesis_filter_seeds' )
266265 self .nemesis_seed = nemesis_seed or random .randint (0 , 1000 )
267- self ._random_sequence = None
268266 self ._add_drop_column_max_per_drop = 5
269267 self ._add_drop_column_max_per_add = 5
270268 self ._add_drop_column_max_column_name_size = 10
@@ -1808,80 +1806,43 @@ def _deprecated_disrupt_stop_start(self):
18081806 self .log .info ('StopStart %s' , self .target_node )
18091807 self .target_node .restart ()
18101808
1811- def call_random_disrupt_method (self , disrupt_methods = None , predefined_sequence = False ):
1812- # pylint: disable=too-many-branches
1813-
1814- if disrupt_methods is None :
1815- disrupt_methods = [attr [1 ] for attr in inspect .getmembers (self ) if
1816- attr [0 ].startswith ('disrupt_' ) and
1817- callable (attr [1 ])]
1818- else :
1819- disrupt_methods = [attr [1 ] for attr in inspect .getmembers (self ) if
1820- attr [0 ] in disrupt_methods and
1821- callable (attr [1 ])]
1822- if not disrupt_methods :
1823- self .log .warning ("No monkey to run" )
1824- return
1825- if not predefined_sequence :
1826- disrupt_method = random .choice (disrupt_methods )
1827- else :
1828- if not self ._random_sequence :
1829- # Generate random sequence, every method has same chance to be called.
1830- # Here we use multiple original methods list, it will increase the chance
1831- # to call same method continuously.
1832- #
1833- # Adjust the rate according to the test duration. Try to call more unique
1834- # methods and don't wait to long time to meet the balance if the test
1835- # duration is short.
1836- test_duration = self .cluster .params .get ('test_duration' )
1837- if test_duration < 600 : # less than 10 hours
1838- rate = 1
1839- elif test_duration < 4320 : # less than 3 days
1840- rate = 2
1841- else :
1842- rate = 3
1843- multiple_disrupt_methods = disrupt_methods * rate
1844- random .shuffle (multiple_disrupt_methods )
1845- self ._random_sequence = multiple_disrupt_methods
1846- # consume the random sequence
1847- disrupt_method = self ._random_sequence .pop ()
1809+ # Nemesis running code
18481810
1849- self .execute_disrupt_method (disrupt_method )
1811+ @cached_property
1812+ def all_disrupt_methods (self ):
1813+ return self .nemesis_registry .get_disrupt_methods ()
18501814
18511815 def execute_disrupt_method (self , disrupt_method ):
1816+ """Runs selected disrupt method"""
18521817 disrupt_method_name = disrupt_method .__name__ .replace ('disrupt_' , '' )
18531818 self .metrics_srv .event_start (disrupt_method_name )
18541819 try :
18551820 disrupt_method (self )
18561821 finally :
18571822 self .metrics_srv .event_stop (disrupt_method_name )
18581823
1859- def build_list_of_disruptions_to_execute (self , nemesis_selector : str | None = None , nemesis_multiply_factor = 1 ):
1860- """
1861- Builds the list of disruptions that should be excuted during a test.
1824+ def build_disruptions_by_name (self , disrupt_methods : List [str ]):
1825+ """Builds list of available disruptions according to function names"""
1826+ filtered = [func for func in self .all_disrupt_methods if func .__name__ in disrupt_methods ]
1827+ names = [func .__name__ for func in filtered ]
1828+ assert names == disrupt_methods , f"Unable to find these disrupt methods: { set (disrupt_methods ).difference (names )} "
1829+ return filtered
18621830
1863- nemesis_selector: should be retrived from the test yaml by using the "nemesis_selector".
1864- Here it kept for future usages and unit testing ability.
1865- more about nemesis_selector behaviour in sct_config.py
1831+ def build_disruptions_by_selector ( self , nemesis_selector : str | None = None ):
1832+ """
1833+ Filter available disruptions according to the logical phrase
18661834
1867- nemesis_multiply_factor: should be retrived from the test yaml by using the "nemesis_multiply_factor" .
1868- Here it kept for future usages and unit testing ability .
1869- more about nemesis_selector behaviour in sct_config.py
1835+ nemesis_selector: Logical phrase selector to filter available disruption methods .
1836+ To be able to be filtered, method needs to have corresponding class .
1837+ Usually retrieved from the test yaml by using the "nemesis_selector", more about nemesis_selector behaviour in sct_config.py
18701838 """
1871- nemesis_selector = nemesis_selector or self .nemesis_selector
18721839 if self ._is_it_on_kubernetes ():
18731840 if nemesis_selector :
18741841 nemesis_selector = nemesis_selector + " and kubernetes"
18751842 else :
18761843 nemesis_selector = "kubernetes"
1877- nemesis_multiply_factor = self .cluster .params .get ('nemesis_multiply_factor' ) or nemesis_multiply_factor
18781844 disruptions = self .nemesis_registry .get_disrupt_methods (nemesis_selector )
1879-
1880- if nemesis_multiply_factor :
1881- disruptions = disruptions * nemesis_multiply_factor
1882-
1883- self .disruptions_list .extend (disruptions )
1884- return self .disruptions_list
1845+ return disruptions
18851846
18861847 @property
18871848 def nemesis_selector (self ) -> str :
@@ -1905,22 +1866,34 @@ def nemesis_selector(self, value: str):
19051866
19061867 @property
19071868 def _disruption_list_names (self ):
1869+ """Returns name of all collected nemesis"""
19081870 return [nemesis .__name__ for nemesis in self .disruptions_list ]
19091871
1910- def shuffle_list_of_disruptions (self ):
1911- self .log .debug (f'nemesis_seed to be used is { self .nemesis_seed } ' )
1872+ def shuffle_list_of_disruptions (self , disruption_list : List , nemesis_multiply_factor : int | None = None ):
1873+ """
1874+ Randomizes list of disruptions
19121875
1876+ nemesis_multiply_factor: How many times to multiply the original list before shuffle
1877+ Useful for increasing probability of the same nemesis twice in a row
1878+ Usually retrieved from the test yaml by using the "nemesis_selector", more about nemesis_selector behaviour in sct_config.py
1879+ """
1880+ self .log .debug (f'nemesis_seed to be used is { self .nemesis_seed } ' )
19131881 self .log .debug (f"nemesis stack BEFORE SHUFFLE is { self ._disruption_list_names } " )
1914- random .Random (self .nemesis_seed ).shuffle (self .disruptions_list )
1882+ nemesis_multiply_factor = nemesis_multiply_factor or self .cluster .params .get ('nemesis_multiply_factor' ) or 1
1883+ random .Random (self .nemesis_seed ).shuffle (disruption_list * nemesis_multiply_factor )
19151884 self .log .info (f"List of Nemesis to execute: { self ._disruption_list_names } " )
19161885
1886+ @cached_property
1887+ def infinite_cycle (self ):
1888+ """Returns infinite cycle of all nemesis"""
1889+ return itertools .cycle (self .disruptions_list )
1890+
19171891 def call_next_nemesis (self ):
1892+ """Calls next nemesis in the order"""
19181893 assert self .disruptions_list , "no nemesis were selected"
1919- if not self .disruptions_cycle :
1920- self .disruptions_cycle = itertools .cycle (self .disruptions_list )
1921- self .log .debug (f'Selecting the next nemesis out of stack { self ._disruption_list_names [10 :]} ' )
1922- self .execute_disrupt_method (disrupt_method = next (self .disruptions_cycle ))
1894+ self .execute_disrupt_method (disrupt_method = next (self .infinite_cycle ))
19231895
1896+ # End of Nemesis running code
19241897 @latency_calculator_decorator (legend = "Run repair process with nodetool repair" )
19251898 def repair_nodetool_repair (self , node = None , publish_event = True ):
19261899 node = node if node else self .target_node
@@ -5628,8 +5601,8 @@ def wrapper(*args, **kwargs): # pylint: disable=too-many-statements # noqa: PL
56285601class SisyphusMonkey (Nemesis ):
56295602 def __init__ (self , * args , ** kwargs ):
56305603 super ().__init__ (* args , ** kwargs )
5631- self .build_list_of_disruptions_to_execute ( )
5632- self .shuffle_list_of_disruptions ()
5604+ self .disruptions_list = self . build_disruptions_by_selector ( self . nemesis_selector )
5605+ self .shuffle_list_of_disruptions (self . disruptions_list )
56335606
56345607 def disrupt (self ):
56355608 self .call_next_nemesis ()
@@ -5740,13 +5713,14 @@ class EnableDisableTableEncryptionAwsKmsProviderMonkey(Nemesis):
57405713
57415714 def __init__ (self , * args , ** kwargs ):
57425715 super ().__init__ (* args , ** kwargs )
5743- self .disrupt_methods_list = [
5716+ self .disruptions_list = self . build_disruptions_by_name ( [
57445717 'disrupt_enable_disable_table_encryption_aws_kms_provider_without_rotation' ,
57455718 'disrupt_enable_disable_table_encryption_aws_kms_provider_with_rotation' ,
5746- ]
5719+ ])
5720+ self .shuffle_list_of_disruptions (self .disruptions_list )
57475721
57485722 def disrupt (self ):
5749- self .call_random_disrupt_method ( disrupt_methods = self . disrupt_methods_list , predefined_sequence = True )
5723+ self .call_next_nemesis ( )
57505724
57515725
57525726class RestartThenRepairNodeMonkey (Nemesis ):
@@ -5974,12 +5948,6 @@ def disrupt(self):
59745948 self .disrupt_delete_overlapping_row_ranges ()
59755949
59765950
5977- class ChaosMonkey (Nemesis ):
5978-
5979- def disrupt (self ):
5980- self .call_random_disrupt_method ()
5981-
5982-
59835951class CategoricalMonkey (Nemesis ):
59845952 """Randomly picks disruptions to execute using the given categorical distribution.
59855953
@@ -6064,34 +6032,39 @@ def _random_disrupt(self):
60646032 self .execute_disrupt_method (method )
60656033
60666034
6067- CLOUD_LIMITED_CHAOS_MONKEY = ['disrupt_nodetool_cleanup' ,
6068- 'disrupt_nodetool_drain' , 'disrupt_nodetool_refresh' ,
6069- 'disrupt_stop_start_scylla_server' , 'disrupt_major_compaction' ,
6070- 'disrupt_modify_table' , 'disrupt_nodetool_enospc' ,
6071- 'disrupt_stop_wait_start_scylla_server' ,
6072- 'disrupt_soft_reboot_node' ,
6073- 'disrupt_truncate' ]
6074-
6075-
60766035class ScyllaCloudLimitedChaosMonkey (Nemesis ):
60776036
6078- def disrupt (self ):
6079- # Limit the nemesis scope to only one relevant to scylla cloud, where we defined we don't have AWS api access:
6080- self .call_random_disrupt_method (disrupt_methods = CLOUD_LIMITED_CHAOS_MONKEY )
6081-
6082-
6083- class AllMonkey (Nemesis ):
6037+ def __init__ (self , * args , ** kwargs ):
6038+ super ().__init__ (* args , ** kwargs )
6039+ self .disruptions_list = self .build_disruptions_by_name ([
6040+ 'disrupt_nodetool_cleanup' ,
6041+ 'disrupt_nodetool_drain' , 'disrupt_nodetool_refresh' ,
6042+ 'disrupt_stop_start_scylla_server' , 'disrupt_major_compaction' ,
6043+ 'disrupt_modify_table' , 'disrupt_nodetool_enospc' ,
6044+ 'disrupt_stop_wait_start_scylla_server' ,
6045+ 'disrupt_soft_reboot_node' ,
6046+ 'disrupt_truncate'
6047+ ])
6048+ self .shuffle_list_of_disruptions (self .disruptions_list )
60846049
60856050 def disrupt (self ):
6086- self .call_random_disrupt_method (predefined_sequence = True )
6051+ # Limit the nemesis scope to only one relevant to scylla cloud, where we defined we don't have AWS api access:
6052+ self .call_next_nemesis ()
60876053
60886054
60896055class MdcChaosMonkey (Nemesis ):
60906056
6057+ def __init__ (self , * args , ** kwargs ):
6058+ super ().__init__ (* args , ** kwargs )
6059+ self .disruptions_list = self .build_disruptions_by_name ([
6060+ 'disrupt_destroy_data_then_repair' ,
6061+ 'disrupt_no_corrupt_repair' ,
6062+ 'disrupt_nodetool_decommission'
6063+ ])
6064+ self .shuffle_list_of_disruptions (self .disruptions_list )
6065+
60916066 def disrupt (self ):
6092- self .call_random_disrupt_method (
6093- disrupt_methods = ['disrupt_destroy_data_then_repair' , 'disrupt_no_corrupt_repair' ,
6094- 'disrupt_nodetool_decommission' ])
6067+ self .call_next_nemesis ()
60956068
60966069
60976070class ModifyTableMonkey (Nemesis ):
@@ -6239,13 +6212,14 @@ class DisruptKubernetesNodeThenReplaceScyllaNode(Nemesis):
62396212
62406213 def __init__ (self , * args , ** kwargs ):
62416214 super ().__init__ (* args , ** kwargs )
6242- self .disrupt_methods_list = [
6215+ self .disruptions_list = self . build_disruptions_by_name ( [
62436216 'disrupt_drain_kubernetes_node_then_replace_scylla_node' ,
62446217 'disrupt_terminate_kubernetes_host_then_replace_scylla_node' ,
6245- ]
6218+ ])
6219+ self .shuffle_list_of_disruptions (self .disruptions_list )
62466220
62476221 def disrupt (self ):
6248- self .call_random_disrupt_method ( disrupt_methods = self . disrupt_methods_list )
6222+ self .call_next_nemesis ( )
62496223
62506224
62516225class DrainKubernetesNodeThenDecommissionAndAddScyllaNode (Nemesis ):
@@ -6270,13 +6244,14 @@ class DisruptKubernetesNodeThenDecommissionAndAddScyllaNode(Nemesis):
62706244
62716245 def __init__ (self , * args , ** kwargs ):
62726246 super ().__init__ (* args , ** kwargs )
6273- self .disrupt_methods_list = [
6247+ self .disruptions_list = self . build_disruptions_by_name ( [
62746248 'disrupt_drain_kubernetes_node_then_decommission_and_add_scylla_node' ,
62756249 'disrupt_terminate_kubernetes_host_then_decommission_and_add_scylla_node' ,
6276- ]
6250+ ])
6251+ self .shuffle_list_of_disruptions (self .disruptions_list )
62776252
62786253 def disrupt (self ):
6279- self .call_random_disrupt_method ( disrupt_methods = self . disrupt_methods_list )
6254+ self .call_next_nemesis ( )
62806255
62816256
62826257class K8sSetMonkey (Nemesis ):
@@ -6285,16 +6260,16 @@ class K8sSetMonkey(Nemesis):
62856260
62866261 def __init__ (self , * args , ** kwargs ):
62876262 super ().__init__ (* args , ** kwargs )
6288- self .disrupt_methods_list = [
6263+ self .disruptions_list = self . build_disruptions_by_name ( [
62896264 'disrupt_drain_kubernetes_node_then_replace_scylla_node' ,
62906265 'disrupt_terminate_kubernetes_host_then_replace_scylla_node' ,
62916266 'disrupt_drain_kubernetes_node_then_decommission_and_add_scylla_node' ,
62926267 'disrupt_terminate_kubernetes_host_then_decommission_and_add_scylla_node' ,
6293- ]
6268+ ])
6269+ self .shuffle_list_of_disruptions (self .disruptions_list )
62946270
62956271 def disrupt (self ):
6296- self .call_random_disrupt_method (
6297- disrupt_methods = self .disrupt_methods_list , predefined_sequence = True )
6272+ self .call_next_nemesis ()
62986273
62996274
63006275class OperatorNodeReplace (Nemesis ):
@@ -6466,7 +6441,7 @@ class ScyllaOperatorBasicOperationsMonkey(Nemesis):
64666441
64676442 def __init__ (self , * args , ** kwargs ):
64686443 super ().__init__ (* args , ** kwargs )
6469- self .disrupt_methods_list = [
6444+ self .disruptions_list = self . build_disruptions_by_name ( [
64706445 'disrupt_nodetool_flush_and_reshard_on_kubernetes' ,
64716446 'disrupt_rolling_restart_cluster' ,
64726447 'disrupt_grow_shrink_cluster' ,
@@ -6481,10 +6456,11 @@ def __init__(self, *args, **kwargs):
64816456 'disrupt_mgmt_repair_cli' ,
64826457 'disrupt_mgmt_backup_specific_keyspaces' ,
64836458 'disrupt_mgmt_backup' ,
6484- ]
6459+ ])
6460+ self .shuffle_list_of_disruptions (self .disruptions_list )
64856461
64866462 def disrupt (self ):
6487- self .call_random_disrupt_method ( disrupt_methods = self . disrupt_methods_list , predefined_sequence = True )
6463+ self .call_next_nemesis ( )
64886464
64896465
64906466class NemesisSequence (Nemesis ):
@@ -6552,8 +6528,8 @@ def disrupt(self):
65526528 self .disrupt_repair_streaming_err ()
65536529
65546530
6555- COMPLEX_NEMESIS = [NoOpMonkey , ChaosMonkey , ScyllaCloudLimitedChaosMonkey ,
6556- AllMonkey , MdcChaosMonkey , SisyphusMonkey ,
6531+ COMPLEX_NEMESIS = [NoOpMonkey , ScyllaCloudLimitedChaosMonkey ,
6532+ MdcChaosMonkey , SisyphusMonkey ,
65576533 DisruptKubernetesNodeThenReplaceScyllaNode ,
65586534 DisruptKubernetesNodeThenDecommissionAndAddScyllaNode ,
65596535 CategoricalMonkey ]
0 commit comments