5
5
6
6
from dagster import AssetKey , AssetsDefinition , AssetSpec
7
7
from dagster ._annotations import beta , public
8
+ from dagster ._core .definitions .job_definition import JobDefinition
9
+ from dagster ._core .definitions .repository_definition .repository_definition import (
10
+ RepositoryDefinition ,
11
+ )
12
+ from dagster ._core .definitions .unresolved_asset_job_definition import UnresolvedAssetJobDefinition
8
13
from dagster ._record import record
9
14
10
15
from dagster_airlift .core .airflow_instance import AirflowInstance
14
19
)
15
20
from dagster_airlift .core .serialization .serialized_data import DagHandle , TaskHandle
16
21
from dagster_airlift .core .utils import (
22
+ dag_handle_from_job ,
17
23
dag_handles_for_spec ,
24
+ get_producing_dag_ids ,
25
+ is_airflow_mapped_job ,
18
26
is_dag_mapped_asset_spec ,
19
27
is_peered_dag_asset_spec ,
20
28
is_task_mapped_asset_spec ,
26
34
MappedAsset = Union [AssetSpec , AssetsDefinition ]
27
35
28
36
37
+ def _is_mapped_asset (asset : MappedAsset ) -> bool :
38
+ return (
39
+ is_task_mapped_asset_spec (asset )
40
+ or is_dag_mapped_asset_spec (asset )
41
+ or is_peered_dag_asset_spec (asset )
42
+ )
43
+
44
+
29
45
@beta
30
46
@record
31
47
class AirflowDefinitionsData :
@@ -37,25 +53,57 @@ class AirflowDefinitionsData:
37
53
"""
38
54
39
55
airflow_instance : AirflowInstance
40
- airflow_mapped_assets : Sequence [MappedAsset ]
56
+ resolved_repository : RepositoryDefinition
57
+
58
+ @property
59
+ def airflow_mapped_asset_specs (self ) -> Mapping [AssetKey , AssetSpec ]:
60
+ """The assets that are mapped to Airflow tasks and dags."""
61
+ return {
62
+ spec .key : spec
63
+ for spec in spec_iterator (self .resolved_repository .assets_defs_by_key .values ())
64
+ if _is_mapped_asset (spec )
65
+ }
66
+
67
+ @property
68
+ def airflow_mapped_jobs (self ) -> Sequence [JobDefinition ]:
69
+ """Jobs mapping to Airflow dags."""
70
+ return [
71
+ job for job in self .resolved_repository .get_all_jobs () if is_airflow_mapped_job (job )
72
+ ]
73
+
74
+ @property
75
+ def airflow_mapped_jobs_by_dag_handle (
76
+ self ,
77
+ ) -> Mapping [DagHandle , Union [JobDefinition , UnresolvedAssetJobDefinition ]]:
78
+ """Jobs mapping to Airflow dags by dag_id."""
79
+ return {dag_handle_from_job (job ): job for job in self .airflow_mapped_jobs }
80
+
81
+ @property
82
+ def assets_per_job (self ) -> Mapping [str , AbstractSet [AssetKey ]]:
83
+ """Assets per job mapping to Airflow dags."""
84
+ return {
85
+ job .name : self .assets_produced_by_dags [dag_handle .dag_id ]
86
+ for dag_handle , job in self .airflow_mapped_jobs_by_dag_handle .items ()
87
+ }
88
+
89
+ @property
90
+ def assets_produced_by_dags (self ) -> Mapping [str , AbstractSet [AssetKey ]]:
91
+ """Assets produced by Airflow dags."""
92
+ result = defaultdict (set )
93
+ for spec in self .airflow_mapped_asset_specs .values ():
94
+ for dag_id in get_producing_dag_ids (spec ):
95
+ result [dag_id ].add (spec .key )
96
+ return result
41
97
42
98
@public
43
99
@property
44
100
def instance_name (self ) -> str :
45
101
"""The name of the Airflow instance."""
46
102
return self .airflow_instance .name
47
103
48
- @cached_property
49
- def all_asset_specs (self ) -> Sequence [AssetSpec ]:
50
- return list (spec_iterator (self .airflow_mapped_assets ))
51
-
52
104
@cached_property
53
105
def mapping_info (self ) -> AirliftMetadataMappingInfo :
54
- return build_airlift_metadata_mapping_info (self .airflow_mapped_assets )
55
-
56
- @cached_property
57
- def all_asset_specs_by_key (self ) -> Mapping [AssetKey , AssetSpec ]:
58
- return {spec .key : spec for spec in self .all_asset_specs }
106
+ return build_airlift_metadata_mapping_info (self .airflow_mapped_asset_specs .values ())
59
107
60
108
@public
61
109
def task_ids_in_dag (self , dag_id : str ) -> set [str ]:
@@ -80,7 +128,7 @@ def dag_ids_with_mapped_asset_keys(self) -> AbstractSet[str]:
80
128
@cached_property
81
129
def mapped_asset_keys_by_task_handle (self ) -> Mapping [TaskHandle , AbstractSet [AssetKey ]]:
82
130
asset_keys_per_handle = defaultdict (set )
83
- for spec in self .all_asset_specs :
131
+ for spec in self .airflow_mapped_asset_specs . values () :
84
132
if is_task_mapped_asset_spec (spec ):
85
133
task_handles = task_handles_for_spec (spec )
86
134
for task_handle in task_handles :
@@ -89,8 +137,9 @@ def mapped_asset_keys_by_task_handle(self) -> Mapping[TaskHandle, AbstractSet[As
89
137
90
138
@cached_property
91
139
def mapped_asset_keys_by_dag_handle (self ) -> Mapping [DagHandle , AbstractSet [AssetKey ]]:
140
+ """Assets specifically mapped to each dag."""
92
141
asset_keys_per_handle = defaultdict (set )
93
- for spec in self .all_asset_specs :
142
+ for spec in self .airflow_mapped_asset_specs . values () :
94
143
if is_dag_mapped_asset_spec (spec ):
95
144
dag_handles = dag_handles_for_spec (spec )
96
145
for dag_handle in dag_handles :
@@ -99,8 +148,9 @@ def mapped_asset_keys_by_dag_handle(self) -> Mapping[DagHandle, AbstractSet[Asse
99
148
100
149
@cached_property
101
150
def peered_dag_asset_keys_by_dag_handle (self ) -> Mapping [DagHandle , AbstractSet [AssetKey ]]:
151
+ """Autogenerated "peered" dag assets."""
102
152
asset_keys_per_handle = defaultdict (set )
103
- for spec in self .all_asset_specs :
153
+ for spec in self .airflow_mapped_asset_specs . values () :
104
154
if is_peered_dag_asset_spec (spec ):
105
155
dag_handles = peered_dag_handles_for_spec (spec )
106
156
for dag_handle in dag_handles :
0 commit comments