-
Notifications
You must be signed in to change notification settings - Fork 110
Open
Labels
Description
Multiple unit tests are failing from time to time
10:47:34 FAILED unit_tests/test_aws_spot_provisioning.py::TestGetProvisionedFleetInstanceIds::test_fleet_scenarios[fleet_capacity_error] - assert 2 == 1
10:47:34 + where 2 = len([<LogRecord: paramiko.transport, 40, /usr/local/lib/python3.14/site-packages/paramiko/transport.py, 1938, "Exception (client): Error reading SSH protocol banner">, <LogRecord: sdcm.provision.aws.utils, 40, /tmp/jenkins/workspace/an_scylla-cluster-tests_PR-13086/sdcm/provision/aws/utils.py, 205, "Critical spot fleet provisioning failure in region %s for fleet request %s: State='%s', ActivityStatus='%s', Error='%s'. Error history: %s. This request cannot be fulfilled and provisioning will not retry.">])
10:47:34 + where [<LogRecord: paramiko.transport, 40, /usr/local/lib/python3.14/site-packages/paramiko/transport.py, 1938, "Exception (client): Error reading SSH protocol banner">, <LogRecord: sdcm.provision.aws.utils, 40, /tmp/jenkins/workspace/an_scylla-cluster-tests_PR-13086/sdcm/provision/aws/utils.py, 205, "Critical spot fleet provisioning failure in region %s for fleet request %s: State='%s', ActivityStatus='%s', Error='%s'. Error history: %s. This request cannot be fulfilled and provisioning will not retry.">] = <_pytest.logging.LogCaptureFixture object at 0x752adf69d6d0>.records
and
10:51:16 =================================== FAILURES ===================================
10:51:16 ___________________________ TestGrafana.test_grafana ___________________________
10:51:16 [gw0] linux -- Python 3.14.0 /usr/local/bin/python3
10:51:16
10:51:16 self = <unit_tests.test_sct_events_grafana.TestGrafana testMethod=test_grafana>
10:51:16
10:51:16 def test_grafana(self):
10:51:16 start_grafana_pipeline(_registry=self.events_processes_registry)
10:51:16 grafana_annotator = get_events_process(EVENTS_GRAFANA_ANNOTATOR_ID, _registry=self.events_processes_registry)
10:51:16 grafana_aggregator = get_events_process(EVENTS_GRAFANA_AGGREGATOR_ID, _registry=self.events_processes_registry)
10:51:16 grafana_postman = get_grafana_postman(_registry=self.events_processes_registry)
10:51:16
10:51:16 time.sleep(EVENTS_SUBSCRIBERS_START_DELAY)
10:51:16
10:51:16 try:
10:51:16 self.assertIsInstance(grafana_annotator, GrafanaAnnotator)
10:51:16 self.assertTrue(grafana_annotator.is_alive())
10:51:16 self.assertEqual(grafana_annotator._registry, self.events_main_device._registry)
10:51:16 self.assertEqual(grafana_annotator._registry, self.events_processes_registry)
10:51:16
10:51:16 self.assertIsInstance(grafana_aggregator, GrafanaEventAggregator)
10:51:16 self.assertTrue(grafana_aggregator.is_alive())
10:51:16 self.assertEqual(grafana_aggregator._registry, self.events_main_device._registry)
10:51:16 self.assertEqual(grafana_aggregator._registry, self.events_processes_registry)
10:51:16
10:51:16 self.assertIsInstance(grafana_postman, GrafanaEventPostman)
10:51:16 self.assertTrue(grafana_postman.is_alive())
10:51:16 self.assertEqual(grafana_postman._registry, self.events_main_device._registry)
10:51:16 self.assertEqual(grafana_postman._registry, self.events_processes_registry)
10:51:16
10:51:16 grafana_aggregator.time_window = 1
10:51:16
10:51:16 set_grafana_url("http://localhost/", _registry=self.events_processes_registry)
10:51:16 with unittest.mock.patch("requests.post") as mock:
10:51:16 for runs in range(1, 4):
10:51:16 with self.wait_for_n_events(grafana_annotator, count=10, timeout=1):
10:51:16 for _ in range(10):
10:51:16 self.events_main_device.publish_event(
10:51:16 ClusterHealthValidatorEvent.NodeStatus(severity=Severity.NORMAL)
10:51:16 )
10:51:16 time.sleep(1)
10:51:16 self.assertEqual(mock.call_count, 0)
10:51:16
10:51:16 start_posting_grafana_annotations(_registry=self.events_processes_registry)
10:51:16 wait_for(lambda: mock.call_count == runs * 5, timeout=10, step=0.1, throw_exc=False)
10:51:16
10:51:16 self.assertEqual(mock.call_count, runs * 5)
10:51:16 self.assertEqual(
10:51:16 mock.call_args.kwargs["json"]["tags"],
10:51:16 ["ClusterHealthValidatorEvent", "NORMAL", "events", "NodeStatus"],
10:51:16 )
10:51:16
10:51:16 self.assertEqual(self.events_main_device.events_counter, grafana_annotator.events_counter)
10:51:16 self.assertEqual(grafana_annotator.events_counter, grafana_aggregator.events_counter)
10:51:16 self.assertLessEqual(grafana_postman.events_counter, grafana_aggregator.events_counter)
10:51:16 finally:
10:51:16 grafana_annotator.stop(timeout=1)
10:51:16 > grafana_aggregator.stop(timeout=1)
10:51:16
10:51:16 unit_tests/test_sct_events_grafana.py:100:
10:51:16 _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
10:51:16
10:51:16 self = <GrafanaEventAggregator(Thread-3933, stopped daemon 126701696714432)>
10:51:16 timeout = 1
10:51:16
10:51:16 def stop(self, timeout: float = None) -> None:
10:51:16 events_process_class_name = self.__class__.__name__
10:51:16 LOGGER.debug("Stopping events process %s", events_process_class_name)
10:51:16 self.terminate()
10:51:16 LOGGER.debug("Waiting for events process %s to stop", events_process_class_name)
10:51:16 self.join(timeout)
10:51:16 if self.is_alive():
10:51:16 LOGGER.error("Events process %s is still alive after timeout", events_process_class_name)
10:51:16 > assert False, f"Events process {events_process_class_name} is still alive after timeout"
10:51:16 ^^^^^
10:51:16 E AssertionError: Events process GrafanaEventAggregator is still alive after timeout
10:51:16
10:51:16 sdcm/sct_events/events_processes.py:104: AssertionError
in all of the there lots of failures like that, indicating something was trying to open ssh connection and failing
10:47:34 Traceback (most recent call last):
10:47:34 File "/usr/local/lib/python3.14/site-packages/paramiko/transport.py", line 2179, in run
10:47:34 File "/usr/local/lib/python3.14/site-packages/paramiko/transport.py", line 2179, in run
10:47:34 self._check_banner()
10:47:34 self._check_banner()
10:47:34 ~~~~~~~~~~~~~~~~~~^^
10:47:34 ~~~~~~~~~~~~~~~~~~^^
10:47:34 File "/usr/local/lib/python3.14/site-packages/paramiko/transport.py", line 2367, in _check_banner
10:47:34 File "/usr/local/lib/python3.14/site-packages/paramiko/transport.py", line 2367, in _check_banner
10:47:34 raise SSHException(
10:47:34 raise SSHException(
10:47:34 "Error reading SSH protocol banner" + str(e)
10:47:34 "Error reading SSH protocol banner" + str(e)
10:47:34 )
10:47:34 )
10:47:34 paramiko.ssh_exception.SSHException: Error reading SSH protocol banner
not clear which test is leaving this effect
Logs
Copilot