Skip to content

Commit c42e3c6

Browse files
eschuthoclaude
andauthored
fix(reports): improve error handling for report schedule execution (apache#35800)
Co-authored-by: Claude <[email protected]>
1 parent 3167a0d commit c42e3c6

File tree

2 files changed

+117
-23
lines changed

2 files changed

+117
-23
lines changed

superset/commands/report/execute.py

Lines changed: 78 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -177,19 +177,35 @@ def create_log(self, error_message: Optional[str] = None) -> None:
177177
"""
178178
Creates a Report execution log, uses the current computed last_value for Alerts
179179
"""
180-
log = ReportExecutionLog(
181-
scheduled_dttm=self._scheduled_dttm,
182-
start_dttm=self._start_dttm,
183-
end_dttm=datetime.utcnow(),
184-
value=self._report_schedule.last_value,
185-
value_row_json=self._report_schedule.last_value_row_json,
186-
state=self._report_schedule.last_state,
187-
error_message=error_message,
188-
report_schedule=self._report_schedule,
189-
uuid=self._execution_id,
190-
)
191-
db.session.add(log)
192-
db.session.commit() # pylint: disable=consider-using-transaction
180+
from sqlalchemy.orm.exc import StaleDataError
181+
182+
try:
183+
log = ReportExecutionLog(
184+
scheduled_dttm=self._scheduled_dttm,
185+
start_dttm=self._start_dttm,
186+
end_dttm=datetime.utcnow(),
187+
value=self._report_schedule.last_value,
188+
value_row_json=self._report_schedule.last_value_row_json,
189+
state=self._report_schedule.last_state,
190+
error_message=error_message,
191+
report_schedule=self._report_schedule,
192+
uuid=self._execution_id,
193+
)
194+
db.session.add(log)
195+
db.session.commit() # pylint: disable=consider-using-transaction
196+
except StaleDataError as ex:
197+
# Report schedule was modified or deleted by another process
198+
db.session.rollback()
199+
logger.warning(
200+
"Report schedule (execution %s) was modified or deleted "
201+
"during execution. This can occur when a report is deleted "
202+
"while running.",
203+
self._execution_id,
204+
)
205+
raise ReportScheduleUnexpectedError(
206+
"Report schedule was modified or deleted by another process "
207+
"during execution"
208+
) from ex
193209

194210
def _get_url(
195211
self,
@@ -743,7 +759,7 @@ class ReportNotTriggeredErrorState(BaseReportState):
743759
current_states = [ReportState.NOOP, ReportState.ERROR]
744760
initial = True
745761

746-
def next(self) -> None:
762+
def next(self) -> None: # noqa: C901
747763
self.update_report_schedule_and_log(ReportState.WORKING)
748764
try:
749765
# If it's an alert check if the alert is triggered
@@ -758,9 +774,21 @@ def next(self) -> None:
758774
if isinstance(first_ex, SupersetErrorsException):
759775
error_message = ";".join([error.message for error in first_ex.errors])
760776

761-
self.update_report_schedule_and_log(
762-
ReportState.ERROR, error_message=error_message
763-
)
777+
try:
778+
self.update_report_schedule_and_log(
779+
ReportState.ERROR, error_message=error_message
780+
)
781+
except ReportScheduleUnexpectedError as logging_ex:
782+
# Logging failed (likely StaleDataError), but we still want to
783+
# raise the original error so the root cause remains visible
784+
logger.warning(
785+
"Failed to log error for report schedule (execution %s) "
786+
"due to database issue",
787+
self._execution_id,
788+
exc_info=True,
789+
)
790+
# Re-raise the original exception, not the logging failure
791+
raise first_ex from logging_ex
764792

765793
# TODO (dpgaspar) convert this logic to a new state eg: ERROR_ON_GRACE
766794
if not self.is_in_error_grace_period():
@@ -776,12 +804,26 @@ def next(self) -> None:
776804
second_error_message = ";".join(
777805
[error.message for error in second_ex.errors]
778806
)
807+
except ReportScheduleUnexpectedError:
808+
# send_error failed due to logging issue, log and continue
809+
# to raise the original error
810+
logger.warning(
811+
"Failed to send error notification due to database issue",
812+
exc_info=True,
813+
)
779814
except Exception as second_ex: # pylint: disable=broad-except
780815
second_error_message = str(second_ex)
781816
finally:
782-
self.update_report_schedule_and_log(
783-
ReportState.ERROR, error_message=second_error_message
784-
)
817+
try:
818+
self.update_report_schedule_and_log(
819+
ReportState.ERROR, error_message=second_error_message
820+
)
821+
except ReportScheduleUnexpectedError:
822+
# Logging failed again, log it but don't let it hide first_ex
823+
logger.warning(
824+
"Failed to log final error state due to database issue",
825+
exc_info=True,
826+
)
785827
raise
786828

787829

@@ -851,9 +893,22 @@ def next(self) -> None:
851893
self.send()
852894
self.update_report_schedule_and_log(ReportState.SUCCESS)
853895
except Exception as ex: # pylint: disable=broad-except
854-
self.update_report_schedule_and_log(
855-
ReportState.ERROR, error_message=str(ex)
856-
)
896+
try:
897+
self.update_report_schedule_and_log(
898+
ReportState.ERROR, error_message=str(ex)
899+
)
900+
except ReportScheduleUnexpectedError as logging_ex:
901+
# Logging failed (likely StaleDataError), but we still want to
902+
# raise the original error so the root cause remains visible
903+
logger.warning(
904+
"Failed to log error for report schedule (execution %s) "
905+
"due to database issue",
906+
self._execution_id,
907+
exc_info=True,
908+
)
909+
# Re-raise the original exception, not the logging failure
910+
raise ex from logging_ex
911+
raise
857912

858913

859914
class ReportScheduleStateMachine: # pylint: disable=too-few-public-methods

tests/integration_tests/reports/commands/execute_dashboard_report_tests.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,10 @@
2020

2121
import pytest
2222
from flask import current_app
23+
from sqlalchemy.orm.exc import StaleDataError
2324

2425
from superset.commands.dashboard.permalink.create import CreateDashboardPermalinkCommand
26+
from superset.commands.report.exceptions import ReportScheduleUnexpectedError
2527
from superset.commands.report.execute import AsyncExecuteReportScheduleCommand
2628
from superset.models.dashboard import Dashboard
2729
from superset.reports.models import ReportSourceFormat
@@ -118,3 +120,40 @@ def test_report_with_header_data(
118120
assert header_data.get("notification_source") == ReportSourceFormat.DASHBOARD
119121
assert header_data.get("notification_type") == report_schedule.type
120122
assert len(send_email_smtp_mock.call_args.kwargs["header_data"]) == 8
123+
124+
125+
@patch("superset.reports.notifications.email.send_email_smtp")
126+
@patch("superset.commands.report.execute.DashboardScreenshot")
127+
@pytest.mark.usefixtures("login_as_admin")
128+
def test_report_schedule_stale_data_error_preserves_cause(
129+
dashboard_screenshot_mock: MagicMock,
130+
send_email_smtp_mock: MagicMock,
131+
tabbed_dashboard: Dashboard, # noqa: F811
132+
) -> None:
133+
"""
134+
Test that when db.session.commit raises StaleDataError during logging,
135+
we surface ReportScheduleUnexpectedError while preserving the original
136+
StaleDataError as the cause.
137+
"""
138+
dashboard_screenshot_mock.get_screenshot.return_value = b"test-image"
139+
current_app.config["ALERT_REPORTS_NOTIFICATION_DRY_RUN"] = False
140+
141+
with create_dashboard_report(
142+
dashboard=tabbed_dashboard,
143+
extra={},
144+
name="test stale data error",
145+
) as report_schedule:
146+
# Mock db.session.commit to raise StaleDataError during log creation
147+
with patch("superset.db.session.commit") as mock_commit:
148+
mock_commit.side_effect = StaleDataError("test stale data")
149+
150+
# Execute the report and expect ReportScheduleUnexpectedError
151+
with pytest.raises(ReportScheduleUnexpectedError) as exc_info:
152+
AsyncExecuteReportScheduleCommand(
153+
str(uuid4()), report_schedule.id, datetime.utcnow()
154+
).run()
155+
156+
# Verify the original StaleDataError is preserved as the cause
157+
assert exc_info.value.__cause__ is not None
158+
assert isinstance(exc_info.value.__cause__, StaleDataError)
159+
assert str(exc_info.value.__cause__) == "test stale data"

0 commit comments

Comments
 (0)