Skip to content

Commit 4dc5a01

Browse files
committed
fix(ignore_topology_change_coordinator_errors): support sct_config parameter
PR scylladb/scylla-cluster-tests/scylladb#10386 filter some expected raft error messages globally. But this change broke integration unit test. Issue scylladb#10676. This fix 2 things: - change function signature to run with passed sct config for unit tests or get sct running config - correctly filter error events. ignore_topology_change_coordinator_errors is context manager created as generator wrapped contextlib.contextmanager and DBEventsFilter are created using ExitStack as ContextManagers. To call this contextmanagers, was used `ignore_topology_change_coordinator_errors().__enter__()` call but once this call finished, all DBEventsFilters contextmangers executed theirs __exit__() method and thus all event filters marked as expired and appriate events are not filterd out. To avoid that, ExitStack object recreated and all __exit__ methods will be triggered after `yield`. This allow to run `ignore_topology_change_coordinator_errors` as context manager to wrap some code/functions/method or execute it globally withou events expiring Fixes scylladb#10676
1 parent 020154a commit 4dc5a01

File tree

3 files changed

+25
-14
lines changed

3 files changed

+25
-14
lines changed

sdcm/sct_events/group_common_events.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -72,14 +72,15 @@ def ignore_operation_errors():
7272

7373

7474
@contextmanager
75-
def ignore_topology_change_coordinator_errors():
75+
def ignore_topology_change_coordinator_errors(params: dict | None = None):
76+
params = params or TestConfig().tester_obj().params
7677
with ExitStack() as stack:
7778
if SkipPerIssues(
7879
issues=[
7980
"https://github.com/scylladb/scylladb/issues/20754",
8081
"https://github.com/scylladb/scylladb/issues/20950",
8182
],
82-
params=TestConfig().tester_obj().params,
83+
params=params,
8384
):
8485
# @piodul:
8586
#
@@ -98,17 +99,19 @@ def ignore_topology_change_coordinator_errors():
9899
line=r".*raft_topology - topology change coordinator fiber got error exceptions::unavailable_exception "
99100
r"\(Cannot achieve consistency level for cl ALL\.",
100101
))
101-
stack.enter_context(DbEventsFilter(
102-
db_event=DatabaseLogEvent.RUNTIME_ERROR,
103-
line=r".*raft_topology - topology change coordinator fiber got error std::runtime_error"
104-
r" \(raft topology: exec_global_command\(barrier\) failed with seastar::rpc::closed_erro"
105-
r"r \(connection is closed\)\)"
106-
))
107-
stack.enter_context(DbEventsFilter(
108-
db_event=DatabaseLogEvent.RUNTIME_ERROR,
109-
line=r".*raft_topology - drain rpc failed, proceed to fence old writes:.*connection is closed",
110-
))
102+
stack.enter_context(DbEventsFilter(
103+
db_event=DatabaseLogEvent.RUNTIME_ERROR,
104+
line=r".*raft_topology - topology change coordinator fiber got error std::runtime_error"
105+
r" \(raft topology: exec_global_command\(barrier\) failed with seastar::rpc::closed_error"
106+
r" \(connection is closed\)\)"
107+
))
108+
stack.enter_context(DbEventsFilter(
109+
db_event=DatabaseLogEvent.RUNTIME_ERROR,
110+
line=r".*raft_topology - drain rpc failed, proceed to fence old writes:.*connection is closed",
111+
))
112+
new_stack = stack.pop_all()
111113
yield
114+
new_stack.close()
112115

113116

114117
@contextmanager

sdcm/sct_events/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@ def enable_default_filters(sct_config: SCTConfiguration): # pylint: disable=unu
154154
# upgrades and any place where the race between raft global barrier and gossipier could
155155
# take place. So ignore such messages globally for any sct test.
156156
# TODO: this should be removed after gossiper will be removed.
157-
ignore_topology_change_coordinator_errors().__enter__()
157+
ignore_topology_change_coordinator_errors(sct_config).__enter__()
158158

159159

160160
__all__ = ("start_events_device", "stop_events_device", "enable_default_filters")

unit_tests/test_events.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,7 @@ def test_default_filters(self):
412412
with environment(SCT_CLUSTER_BACKEND='docker'):
413413
enable_default_filters(SCTConfiguration())
414414

415-
with self.wait_for_n_events(self.get_events_logger(), count=4):
415+
with self.wait_for_n_events(self.get_events_logger(), count=5):
416416
DatabaseLogEvent.BACKTRACE() \
417417
.add_info(node="A",
418418
line_number=22,
@@ -439,6 +439,13 @@ def test_default_filters(self):
439439
".c.sct-project-1.internal/10.142.1.155:9042: Cannot achieve consistency level for cl ONE. Requires 1, alive 0",
440440
).publish()
441441

442+
DatabaseLogEvent.RUNTIME_ERROR().add_info(
443+
node="A",
444+
line_number=22,
445+
line="ERROR 2023-12-18 12:45:25,673 [shard 0: gms] raft_topology - topology change coordinator fiber got error std::runtime_error "
446+
"(raft topology: exec_global_command(barrier) failed with seastar::rpc::closed_error (connection is closed))"
447+
).publish()
448+
442449
log_content = self.get_event_log_file("events.log")
443450

444451
self.assertIn("other back trace", log_content)
@@ -451,6 +458,7 @@ def test_default_filters(self):
451458
error_log_content = self.get_event_log_file("error.log")
452459
assert 'data_dictionary::no_such_column_family' not in error_log_content
453460
assert 'Authentication error' in error_log_content
461+
assert 'topology change coordinator fiber got error' not in error_log_content
454462

455463
def test_failed_stall_during_filter(self):
456464
with self.wait_for_n_events(self.get_events_logger(), count=5, timeout=3):

0 commit comments

Comments
 (0)