|
| 1 | +from typing import Union |
| 2 | + |
| 3 | +from dagster import RunRequest |
| 4 | +from dagster._annotations import beta |
| 5 | +from dagster._core.definitions.decorators.job_decorator import job |
| 6 | +from dagster._core.definitions.decorators.op_decorator import op |
| 7 | +from dagster._core.definitions.decorators.schedule_decorator import schedule |
| 8 | +from dagster._core.definitions.definitions_class import Definitions |
| 9 | +from dagster._core.definitions.op_definition import OpDefinition |
| 10 | +from dagster._core.definitions.run_request import SkipReason |
| 11 | +from dagster._core.definitions.schedule_definition import ( |
| 12 | + DefaultScheduleStatus, |
| 13 | + ScheduleEvaluationContext, |
| 14 | +) |
| 15 | +from dagster._core.execution.context.op_execution_context import OpExecutionContext |
| 16 | +from dagster._core.storage.dagster_run import RunsFilter |
| 17 | +from dagster._grpc.client import DEFAULT_SENSOR_GRPC_TIMEOUT |
| 18 | +from dagster._time import datetime_from_timestamp, get_current_datetime |
| 19 | +from dagster_airlift.core.airflow_defs_data import AirflowDefinitionsData |
| 20 | +from dagster_airlift.core.airflow_instance import AirflowInstance |
| 21 | +from dagster_airlift.core.monitoring_job.event_stream import persist_events |
| 22 | +from dagster_airlift.core.monitoring_job.utils import ( |
| 23 | + augment_monitor_run_with_range_tags, |
| 24 | + get_range_from_run_history, |
| 25 | + structured_log, |
| 26 | +) |
| 27 | +from dagster_airlift.core.utils import monitoring_job_name |
| 28 | + |
| 29 | +MAIN_LOOP_TIMEOUT_SECONDS = DEFAULT_SENSOR_GRPC_TIMEOUT - 20 |
| 30 | +DEFAULT_AIRFLOW_SENSOR_INTERVAL_SECONDS = 30 |
| 31 | +START_LOOKBACK_SECONDS = 60 # Lookback one minute in time for the initial setting of the cursor. |
| 32 | + |
| 33 | + |
| 34 | +# IMPROVEME BCOR-102: We should be able to replace the sensor from the original Airlift functionality with this job. |
| 35 | +@beta |
| 36 | +def build_airflow_monitoring_defs( |
| 37 | + *, |
| 38 | + airflow_instance: AirflowInstance, |
| 39 | +) -> Definitions: |
| 40 | + """The constructed job polls the Airflow instance for activity, and inserts asset events into Dagster's event log.""" |
| 41 | + |
| 42 | + @job(name=monitoring_job_name(airflow_instance.name)) |
| 43 | + def airflow_monitoring_job(): |
| 44 | + _build_monitoring_op(airflow_instance)() |
| 45 | + |
| 46 | + @schedule( |
| 47 | + job=airflow_monitoring_job, |
| 48 | + cron_schedule="* * * * *", |
| 49 | + name=f"{airflow_instance.name}__airflow_monitoring_job_schedule", |
| 50 | + default_status=DefaultScheduleStatus.RUNNING, |
| 51 | + ) |
| 52 | + def airflow_monitoring_job_schedule( |
| 53 | + context: ScheduleEvaluationContext, |
| 54 | + ) -> Union[RunRequest, SkipReason]: |
| 55 | + """The schedule that runs the sensor job.""" |
| 56 | + # Get the last run for this job |
| 57 | + last_run = next( |
| 58 | + iter( |
| 59 | + context.instance.get_runs( |
| 60 | + filters=RunsFilter(job_name=airflow_monitoring_job.name), |
| 61 | + limit=1, |
| 62 | + ) |
| 63 | + ), |
| 64 | + None, |
| 65 | + ) |
| 66 | + if not last_run or last_run.is_finished: |
| 67 | + return RunRequest() |
| 68 | + else: |
| 69 | + return SkipReason("Monitoring job is already running.") |
| 70 | + |
| 71 | + return Definitions( |
| 72 | + jobs=[airflow_monitoring_job], |
| 73 | + schedules=[airflow_monitoring_job_schedule], |
| 74 | + ) |
| 75 | + |
| 76 | + |
| 77 | +def _build_monitoring_op( |
| 78 | + airflow_instance: AirflowInstance, |
| 79 | +) -> OpDefinition: |
| 80 | + @op( |
| 81 | + name=monitoring_job_op_name(airflow_instance), |
| 82 | + ) |
| 83 | + def monitor_dags(context: OpExecutionContext) -> None: |
| 84 | + """The main function that runs the sensor. It polls the Airflow instance for activity and emits asset events.""" |
| 85 | + # This is a hack to get the repository tag for the current run. It's bad because it assumes that the job we're |
| 86 | + # creating a run for is within the same repository; but I think that we'll have to do a second pass to get "outside of code |
| 87 | + # location" runs working (if that's even something we want to do). |
| 88 | + airflow_data = AirflowDefinitionsData( |
| 89 | + airflow_instance=airflow_instance, resolved_repository=context.repository_def |
| 90 | + ) |
| 91 | + # get previously processed time range from run tags |
| 92 | + current_date = get_current_datetime() |
| 93 | + range_start, range_end = get_range_from_run_history(context, current_date.timestamp()) |
| 94 | + augment_monitor_run_with_range_tags(context, range_start, range_end) |
| 95 | + |
| 96 | + structured_log( |
| 97 | + context, |
| 98 | + f"Processing from {datetime_from_timestamp(range_start)} to {datetime_from_timestamp(range_end)}", |
| 99 | + ) |
| 100 | + persist_events(context, airflow_data, airflow_instance, range_start, range_end) |
| 101 | + |
| 102 | + return monitor_dags |
| 103 | + |
| 104 | + |
| 105 | +def monitoring_job_op_name(airflow_instance: AirflowInstance) -> str: |
| 106 | + return f"core_monitor__{airflow_instance.name}" |
0 commit comments