|
| 1 | +"""Tests for memory leak fix in SentryMonitoredBroker traceback handling.""" |
| 2 | + |
| 3 | +import gc |
| 4 | +import traceback |
| 5 | +import weakref |
| 6 | +from unittest.mock import patch |
| 7 | + |
| 8 | +from insights.core.dr import MissingRequirements |
| 9 | +from insights.core.spec_factory import ContentException |
| 10 | + |
| 11 | +from ccx_messaging.monitored_broker import SentryMonitoredBroker |
| 12 | + |
| 13 | + |
| 14 | +def _make_exception_with_traceback(exc_type=Exception, msg="test exception"): |
| 15 | + """Raise and catch an exception so it has a real __traceback__.""" |
| 16 | + try: |
| 17 | + raise exc_type(msg) |
| 18 | + except exc_type as ex: |
| 19 | + tb_string = traceback.format_exc() |
| 20 | + return ex, tb_string |
| 21 | + |
| 22 | + |
| 23 | +def test_traceback_cleared_keyboard_interrupt(): |
| 24 | + """Verify __traceback__ is cleared for BaseException subclasses. |
| 25 | +
|
| 26 | + The implementation uses `isinstance(ex, BaseException)` (not Exception), |
| 27 | + which includes system exceptions like KeyboardInterrupt, SystemExit, and |
| 28 | + GeneratorExit. These can occur during graceful shutdown, signal handling, |
| 29 | + or async task cancellation. This test verifies the full BaseException |
| 30 | + hierarchy is handled correctly, not just regular exceptions. |
| 31 | +
|
| 32 | + KeyboardInterrupt exception is in some frameworks mapped to signal SIGTERM |
| 33 | + for graceful shutdown. |
| 34 | + """ |
| 35 | + broker = SentryMonitoredBroker() |
| 36 | + ex, tb = _make_exception_with_traceback(KeyboardInterrupt, "interrupt") |
| 37 | + assert ex.__traceback__ is not None |
| 38 | + |
| 39 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 40 | + broker.add_exception("component", ex, tb) |
| 41 | + |
| 42 | + assert ex.__traceback__ is None |
| 43 | + |
| 44 | + |
| 45 | +def test_formatted_traceback_preserved(): |
| 46 | + """The formatted traceback string must still be in broker.tracebacks.""" |
| 47 | + broker = SentryMonitoredBroker() |
| 48 | + ex, tb = _make_exception_with_traceback(Exception, "preserved test") |
| 49 | + |
| 50 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 51 | + broker.add_exception("component", ex, tb) |
| 52 | + |
| 53 | + assert broker.tracebacks[ex] == tb |
| 54 | + assert "preserved test" in broker.tracebacks[ex] |
| 55 | + |
| 56 | + |
| 57 | +def test_sentry_called_with_traceback(): |
| 58 | + """capture_exception must be called while __traceback__ is still set.""" |
| 59 | + broker = SentryMonitoredBroker() |
| 60 | + ex, tb = _make_exception_with_traceback(Exception, "sentry test") |
| 61 | + |
| 62 | + traceback_was_set = [] |
| 63 | + |
| 64 | + def mock_capture(captured_ex): |
| 65 | + traceback_was_set.append(captured_ex.__traceback__ is not None) |
| 66 | + |
| 67 | + with patch( |
| 68 | + "ccx_messaging.monitored_broker.capture_exception", |
| 69 | + side_effect=mock_capture, |
| 70 | + ): |
| 71 | + broker.add_exception("component", ex, tb) |
| 72 | + |
| 73 | + assert traceback_was_set == [True], ( |
| 74 | + "capture_exception must be called before __traceback__ is cleared" |
| 75 | + ) |
| 76 | + assert ex.__traceback__ is None |
| 77 | + |
| 78 | + |
| 79 | +def test_sentry_not_called_for_excluded_types(): |
| 80 | + """MissingRequirements and ContentException skip Sentry but still clear traceback.""" |
| 81 | + broker = SentryMonitoredBroker() |
| 82 | + |
| 83 | + # Don't use _make_exception_with_traceback helper here because these |
| 84 | + # insights-core exception types have different constructor signatures: |
| 85 | + # MissingRequirements expects a list, ContentException expects a string. |
| 86 | + # The inline approach is clearer than abstracting a helper with complex |
| 87 | + # parameter handling for just two test cases. |
| 88 | + for exc_type, msg in [ |
| 89 | + (MissingRequirements, ["req"]), |
| 90 | + (ContentException, "content"), |
| 91 | + ]: |
| 92 | + ex = exc_type(msg) |
| 93 | + try: |
| 94 | + raise ex |
| 95 | + except type(ex): |
| 96 | + tb = traceback.format_exc() |
| 97 | + |
| 98 | + with patch("ccx_messaging.monitored_broker.capture_exception") as mock: |
| 99 | + broker.add_exception("component", ex, tb) |
| 100 | + assert not mock.called |
| 101 | + assert ex.__traceback__ is None |
| 102 | + |
| 103 | + |
| 104 | +def test_traceback_cleared_even_when_sentry_fails(): |
| 105 | + """Traceback must be cleared even if capture_exception raises.""" |
| 106 | + broker = SentryMonitoredBroker() |
| 107 | + ex, tb = _make_exception_with_traceback(Exception, "sentry failure") |
| 108 | + |
| 109 | + with patch( |
| 110 | + "ccx_messaging.monitored_broker.capture_exception", |
| 111 | + side_effect=RuntimeError("sentry unavailable"), |
| 112 | + ): |
| 113 | + broker.add_exception("component", ex, tb) |
| 114 | + |
| 115 | + assert ex.__traceback__ is None |
| 116 | + |
| 117 | + |
| 118 | +def test_chained_exception_tracebacks(): |
| 119 | + """Chained exception tracebacks (__cause__, __context__) should also be cleared.""" |
| 120 | + broker = SentryMonitoredBroker() |
| 121 | + caught = None |
| 122 | + try: |
| 123 | + try: |
| 124 | + raise ValueError("original") |
| 125 | + except ValueError as orig: |
| 126 | + raise RuntimeError("wrapper") from orig |
| 127 | + except RuntimeError as ex: |
| 128 | + tb = traceback.format_exc() |
| 129 | + caught = ex |
| 130 | + |
| 131 | + assert caught.__traceback__ is not None |
| 132 | + assert caught.__cause__.__traceback__ is not None |
| 133 | + |
| 134 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 135 | + broker.add_exception("component", caught, tb) |
| 136 | + |
| 137 | + assert caught.__traceback__ is None |
| 138 | + assert caught.__cause__.__traceback__ is None |
| 139 | + |
| 140 | + |
| 141 | +def test_implicit_chained_exception_context(): |
| 142 | + """Implicit chaining (__context__) should also have traceback cleared.""" |
| 143 | + broker = SentryMonitoredBroker() |
| 144 | + caught = None |
| 145 | + try: |
| 146 | + try: |
| 147 | + raise ValueError("original") |
| 148 | + except ValueError: |
| 149 | + raise RuntimeError("during handling") # noqa: B904 |
| 150 | + except RuntimeError as ex: |
| 151 | + tb = traceback.format_exc() |
| 152 | + caught = ex |
| 153 | + |
| 154 | + assert caught.__traceback__ is not None |
| 155 | + assert caught.__context__.__traceback__ is not None |
| 156 | + |
| 157 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 158 | + broker.add_exception("component", caught, tb) |
| 159 | + |
| 160 | + assert caught.__traceback__ is None |
| 161 | + assert caught.__context__.__traceback__ is None |
| 162 | + |
| 163 | + |
| 164 | +def test_deep_chained_exception_tracebacks(): |
| 165 | + """Multi-level exception chains must have all tracebacks cleared.""" |
| 166 | + broker = SentryMonitoredBroker() |
| 167 | + caught = None |
| 168 | + try: |
| 169 | + try: |
| 170 | + try: |
| 171 | + raise ValueError("root cause") |
| 172 | + except ValueError as e1: |
| 173 | + raise TypeError("middle") from e1 |
| 174 | + except TypeError as e2: |
| 175 | + raise RuntimeError("outer") from e2 |
| 176 | + except RuntimeError as ex: |
| 177 | + tb = traceback.format_exc() |
| 178 | + caught = ex |
| 179 | + |
| 180 | + assert caught.__traceback__ is not None |
| 181 | + assert caught.__cause__.__traceback__ is not None |
| 182 | + assert caught.__cause__.__cause__.__traceback__ is not None |
| 183 | + |
| 184 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 185 | + broker.add_exception("component", caught, tb) |
| 186 | + |
| 187 | + assert caught.__traceback__ is None |
| 188 | + assert caught.__cause__.__traceback__ is None |
| 189 | + assert caught.__cause__.__cause__.__traceback__ is None |
| 190 | + |
| 191 | + |
| 192 | +def test_multiple_exceptions_all_tracebacks_cleared(): |
| 193 | + """All tracebacks must be cleared when multiple exceptions are added to one broker.""" |
| 194 | + broker = SentryMonitoredBroker() |
| 195 | + exceptions = [] |
| 196 | + |
| 197 | + for i in range(5): |
| 198 | + ex, tb = _make_exception_with_traceback(Exception, f"error {i}") |
| 199 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 200 | + broker.add_exception(f"component_{i}", ex, tb) |
| 201 | + exceptions.append(ex) |
| 202 | + |
| 203 | + for ex in exceptions: |
| 204 | + assert ex.__traceback__ is None, f"Traceback not cleared for: {ex}" |
| 205 | + |
| 206 | + |
| 207 | +def test_exception_without_traceback(): |
| 208 | + """Exceptions that were never raised (no __traceback__) should not crash.""" |
| 209 | + broker = SentryMonitoredBroker() |
| 210 | + ex = Exception("never raised") |
| 211 | + assert ex.__traceback__ is None |
| 212 | + |
| 213 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 214 | + broker.add_exception("component", ex, None) |
| 215 | + |
| 216 | + assert ex.__traceback__ is None |
| 217 | + |
| 218 | + |
| 219 | +def test_broker_collected_after_add_exception(): |
| 220 | + """Verify that brokers are collected by reference counting alone. |
| 221 | +
|
| 222 | + The memory leak manifests as circular references that prevent garbage |
| 223 | + collection: broker.exceptions -> ex -> __traceback__ -> frame -> broker. |
| 224 | +
|
| 225 | + This test disables Python's cyclic garbage collector to verify that |
| 226 | + reference counting ALONE (immediate collection) successfully reclaims |
| 227 | + brokers after exceptions are added. This proves that clearing |
| 228 | + __traceback__ breaks the circular reference chain. |
| 229 | +
|
| 230 | + Without the fix, brokers would remain alive until the next GC cycle, |
| 231 | + causing memory to accumulate over time in production (where thousands |
| 232 | + of exceptions are processed before GC runs). |
| 233 | + """ |
| 234 | + n_brokers = 5 |
| 235 | + refs = [] |
| 236 | + |
| 237 | + gc.collect() |
| 238 | + was_enabled = gc.isenabled() |
| 239 | + gc.disable() |
| 240 | + |
| 241 | + try: |
| 242 | + for _ in range(n_brokers): |
| 243 | + broker = SentryMonitoredBroker() |
| 244 | + ex, tb = _make_exception_with_traceback(Exception, "gc test") |
| 245 | + |
| 246 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 247 | + broker.add_exception("component", ex, tb) |
| 248 | + |
| 249 | + refs.append(weakref.ref(broker)) |
| 250 | + del broker, ex, tb |
| 251 | + |
| 252 | + collected = sum(1 for ref in refs if ref() is None) |
| 253 | + assert collected == n_brokers, ( |
| 254 | + f"Only {collected}/{n_brokers} brokers were collected " |
| 255 | + "by reference counting alone. Remaining brokers are held " |
| 256 | + "by circular references from ex.__traceback__ -> frame -> broker." |
| 257 | + ) |
| 258 | + finally: |
| 259 | + if was_enabled: |
| 260 | + gc.enable() |
| 261 | + gc.collect() |
| 262 | + |
| 263 | + |
| 264 | +def test_broker_collected_with_chained_exceptions(): |
| 265 | + """Brokers must be collectable even when exceptions have __cause__ chains.""" |
| 266 | + n_brokers = 5 |
| 267 | + refs = [] |
| 268 | + |
| 269 | + gc.collect() |
| 270 | + was_enabled = gc.isenabled() |
| 271 | + gc.disable() |
| 272 | + |
| 273 | + try: |
| 274 | + for _ in range(n_brokers): |
| 275 | + broker = SentryMonitoredBroker() |
| 276 | + caught = None |
| 277 | + try: |
| 278 | + try: |
| 279 | + raise ValueError("cause") |
| 280 | + except ValueError as orig: |
| 281 | + raise RuntimeError("effect") from orig |
| 282 | + except RuntimeError as ex: |
| 283 | + tb = traceback.format_exc() |
| 284 | + caught = ex |
| 285 | + |
| 286 | + with patch("ccx_messaging.monitored_broker.capture_exception"): |
| 287 | + broker.add_exception("component", caught, tb) |
| 288 | + |
| 289 | + refs.append(weakref.ref(broker)) |
| 290 | + del broker, caught, tb |
| 291 | + |
| 292 | + collected = sum(1 for ref in refs if ref() is None) |
| 293 | + assert collected == n_brokers, ( |
| 294 | + f"Only {collected}/{n_brokers} brokers collected with chained exceptions" |
| 295 | + ) |
| 296 | + finally: |
| 297 | + if was_enabled: |
| 298 | + gc.enable() |
| 299 | + gc.collect() |
0 commit comments