3
3
from functools import cached_property
4
4
from typing import AbstractSet , Union # noqa: UP035
5
5
6
- from dagster import AssetKey , AssetsDefinition , AssetSpec
6
+ from dagster import (
7
+ AssetKey ,
8
+ AssetsDefinition ,
9
+ AssetSpec ,
10
+ _check as check ,
11
+ )
7
12
from dagster ._annotations import beta , public
13
+ from dagster ._core .definitions .definitions_class import Definitions
14
+ from dagster ._core .definitions .job_definition import JobDefinition
15
+ from dagster ._core .definitions .unresolved_asset_job_definition import UnresolvedAssetJobDefinition
8
16
from dagster ._record import record
9
17
10
18
from dagster_airlift .core .airflow_instance import AirflowInstance
15
23
from dagster_airlift .core .serialization .serialized_data import DagHandle , TaskHandle
16
24
from dagster_airlift .core .utils import (
17
25
dag_handles_for_spec ,
26
+ get_producing_dag_ids ,
18
27
is_dag_mapped_asset_spec ,
19
28
is_peered_dag_asset_spec ,
20
29
is_task_mapped_asset_spec ,
26
35
MappedAsset = Union [AssetSpec , AssetsDefinition ]
27
36
28
37
38
+ def _is_mapped_asset (asset : MappedAsset ) -> bool :
39
+ return (
40
+ is_task_mapped_asset_spec (asset )
41
+ or is_dag_mapped_asset_spec (asset )
42
+ or is_peered_dag_asset_spec (asset )
43
+ )
44
+
45
+
29
46
@beta
30
47
@record
31
48
class AirflowDefinitionsData :
@@ -37,25 +54,61 @@ class AirflowDefinitionsData:
37
54
"""
38
55
39
56
airflow_instance : AirflowInstance
40
- airflow_mapped_assets : Sequence [MappedAsset ]
57
+ defs : Definitions
58
+
59
+ @property
60
+ def airflow_mapped_asset_specs (self ) -> Mapping [AssetKey , AssetSpec ]:
61
+ """The assets that are mapped to Airflow tasks and dags."""
62
+ result = {}
63
+ for asset in self .defs .assets :
64
+ if not isinstance (asset , (AssetsDefinition , AssetSpec )):
65
+ continue
66
+ for spec in spec_iterator ([asset ]):
67
+ if _is_mapped_asset (spec ):
68
+ result [spec .key ] = spec
69
+ return result
70
+
71
+ @property
72
+ def airflow_mapped_jobs (self ) -> Sequence [Union [JobDefinition , UnresolvedAssetJobDefinition ]]:
73
+ """Jobs mapping to Airflow dags."""
74
+ return [job for job in self .defs .jobs if job .tags .get ("dagster/external_job" ) == "airflow" ]
75
+
76
+ @property
77
+ def airflow_mapped_jobs_by_dag_handle (
78
+ self ,
79
+ ) -> Mapping [DagHandle , Union [JobDefinition , UnresolvedAssetJobDefinition ]]:
80
+ """Jobs mapping to Airflow dags by dag_id."""
81
+ return {
82
+ DagHandle (dag_id = check .not_none (job .tags )["dagster-airlift/dag_id" ]): job
83
+ for job in self .airflow_mapped_jobs
84
+ }
85
+
86
+ @property
87
+ def assets_per_job (self ) -> Mapping [str , AbstractSet [AssetKey ]]:
88
+ """Assets per job mapping to Airflow dags."""
89
+ return {
90
+ job .name : self .assets_produced_by_dags [dag_handle .dag_id ]
91
+ for dag_handle , job in self .airflow_mapped_jobs_by_dag_handle .items ()
92
+ }
93
+
94
+ @property
95
+ def assets_produced_by_dags (self ) -> Mapping [str , AbstractSet [AssetKey ]]:
96
+ """Assets produced by Airflow dags."""
97
+ result = defaultdict (set )
98
+ for spec in self .airflow_mapped_asset_specs .values ():
99
+ for dag_id in get_producing_dag_ids (spec ):
100
+ result [dag_id ].add (spec .key )
101
+ return result
41
102
42
103
@public
43
104
@property
44
105
def instance_name (self ) -> str :
45
106
"""The name of the Airflow instance."""
46
107
return self .airflow_instance .name
47
108
48
- @cached_property
49
- def all_asset_specs (self ) -> Sequence [AssetSpec ]:
50
- return list (spec_iterator (self .airflow_mapped_assets ))
51
-
52
109
@cached_property
53
110
def mapping_info (self ) -> AirliftMetadataMappingInfo :
54
- return build_airlift_metadata_mapping_info (self .airflow_mapped_assets )
55
-
56
- @cached_property
57
- def all_asset_specs_by_key (self ) -> Mapping [AssetKey , AssetSpec ]:
58
- return {spec .key : spec for spec in self .all_asset_specs }
111
+ return build_airlift_metadata_mapping_info (self .airflow_mapped_asset_specs )
59
112
60
113
@public
61
114
def task_ids_in_dag (self , dag_id : str ) -> set [str ]:
@@ -80,7 +133,7 @@ def dag_ids_with_mapped_asset_keys(self) -> AbstractSet[str]:
80
133
@cached_property
81
134
def mapped_asset_keys_by_task_handle (self ) -> Mapping [TaskHandle , AbstractSet [AssetKey ]]:
82
135
asset_keys_per_handle = defaultdict (set )
83
- for spec in self .all_asset_specs :
136
+ for spec in self .airflow_mapped_asset_specs . values () :
84
137
if is_task_mapped_asset_spec (spec ):
85
138
task_handles = task_handles_for_spec (spec )
86
139
for task_handle in task_handles :
@@ -89,8 +142,9 @@ def mapped_asset_keys_by_task_handle(self) -> Mapping[TaskHandle, AbstractSet[As
89
142
90
143
@cached_property
91
144
def mapped_asset_keys_by_dag_handle (self ) -> Mapping [DagHandle , AbstractSet [AssetKey ]]:
145
+ """Assets specifically mapped to each dag."""
92
146
asset_keys_per_handle = defaultdict (set )
93
- for spec in self .all_asset_specs :
147
+ for spec in self .airflow_mapped_asset_specs . values () :
94
148
if is_dag_mapped_asset_spec (spec ):
95
149
dag_handles = dag_handles_for_spec (spec )
96
150
for dag_handle in dag_handles :
@@ -99,8 +153,9 @@ def mapped_asset_keys_by_dag_handle(self) -> Mapping[DagHandle, AbstractSet[Asse
99
153
100
154
@cached_property
101
155
def peered_dag_asset_keys_by_dag_handle (self ) -> Mapping [DagHandle , AbstractSet [AssetKey ]]:
156
+ """Autogenerated "peered" dag assets."""
102
157
asset_keys_per_handle = defaultdict (set )
103
- for spec in self .all_asset_specs :
158
+ for spec in self .airflow_mapped_asset_specs . values () :
104
159
if is_peered_dag_asset_spec (spec ):
105
160
dag_handles = peered_dag_handles_for_spec (spec )
106
161
for dag_handle in dag_handles :
0 commit comments