7
7
from typing import Dict
8
8
from typing import List
9
9
10
- from vdk .internal .core .errors import ErrorMessage
11
- from vdk .internal .core .errors import UserCodeError
10
+ from vdk .plugin .dag .exception import DagValidationException
12
11
13
12
log = logging .getLogger (__name__ )
14
13
Error = namedtuple ("Error" , ["TYPE" , "PERMISSION" , "REQUIREMENT" , "CONFLICT" ])
@@ -45,23 +44,10 @@ def validate(self, jobs: List[Dict]):
45
44
self ._check_dag_cycles (jobs )
46
45
log .debug ("Successfully validated the DAG!" )
47
46
48
- def _raise_error (
49
- self , error_type : str , reason : str , countermeasures : str , jobs : List [str ] = ""
50
- ):
51
- raise UserCodeError (
52
- ErrorMessage (
53
- "" ,
54
- "DAG failed due to a Data Job validation failure." ,
55
- f"There is a { error_type } error with job(s) { jobs } . " + reason ,
56
- "The DAG will not be built and will fail." ,
57
- countermeasures ,
58
- )
59
- )
60
-
61
47
def _validate_no_duplicates (self , jobs : List [Dict ]):
62
48
duplicated_jobs = list ({job ["job_name" ] for job in jobs if jobs .count (job ) > 1 })
63
49
if duplicated_jobs :
64
- self . _raise_error (
50
+ raise DagValidationException (
65
51
ERROR .CONFLICT ,
66
52
f"There are some duplicated jobs: { duplicated_jobs } ." ,
67
53
f"Remove the duplicated jobs from the list - each job can appear in the jobs list at most once. "
@@ -85,100 +71,111 @@ def _validate_job(self, job: Dict):
85
71
86
72
def _validate_job_type (self , job : Dict ):
87
73
if not isinstance (job , dict ):
88
- self ._raise_error (
74
+ jobs = ["" .join (list (job ))]
75
+ raise DagValidationException (
89
76
ERROR .TYPE ,
90
77
"The job type is not dict." ,
91
78
f"Change the Data Job type. Current type is { type (job )} . Expected type is dict." ,
92
- [ "" . join ( list ( job ))] ,
79
+ jobs ,
93
80
)
94
81
95
82
def _validate_allowed_and_required_keys (self , job : Dict ):
96
83
disallowed_keys = [key for key in job .keys () if key not in allowed_job_keys ]
97
84
if disallowed_keys :
98
- self . _raise_error (
85
+ raise DagValidationException (
99
86
ERROR .PERMISSION ,
100
87
"One or more job dict keys are not allowed." ,
101
88
f"Remove the disallowed Data Job Dict keys. "
102
89
f"Keys { disallowed_keys } are not allowed. Allowed keys: { allowed_job_keys } ." ,
90
+ None ,
103
91
)
104
92
missing_keys = [key for key in required_job_keys if key not in job ]
105
93
if missing_keys :
106
- self . _raise_error (
94
+ raise DagValidationException (
107
95
ERROR .REQUIREMENT ,
108
96
"One or more job dict required keys are missing." ,
109
97
f"Add the missing required Data Job Dict keys. Keys { missing_keys } "
110
98
f"are missing. Required keys: { required_job_keys } ." ,
99
+ None ,
111
100
)
112
101
113
102
def _validate_job_name (self , job : Dict ):
114
103
if not isinstance (job ["job_name" ], str ):
115
- self ._raise_error (
104
+ jobs = ["" .join (list (job ))]
105
+ raise DagValidationException (
116
106
ERROR .TYPE ,
117
107
"The type of the job dict key job_name is not string." ,
118
108
f"Change the Data Job Dict value of job_name. "
119
109
f"Current type is { type (job ['job_name' ])} . Expected type is string." ,
120
- [ "" . join ( list ( job ))] ,
110
+ jobs ,
121
111
)
122
112
123
113
def _validate_dependencies (self , job_name : str , dependencies : List [str ]):
124
114
if not (isinstance (dependencies , List )):
125
- self ._raise_error (
115
+ jobs = [job_name ]
116
+ raise DagValidationException (
126
117
ERROR .TYPE ,
127
118
"The type of the job dict depends_on key is not list." ,
128
119
f"Check the Data Job Dict type of the depends_on key. Current type "
129
120
f"is { type (dependencies )} . Expected type is list." ,
130
- [ job_name ] ,
121
+ jobs ,
131
122
)
132
123
non_string_dependencies = [
133
124
pred for pred in dependencies if not isinstance (pred , str )
134
125
]
135
126
if non_string_dependencies :
136
- self ._raise_error (
127
+ jobs1 = [job_name ]
128
+ raise DagValidationException (
137
129
ERROR .TYPE ,
138
130
"One or more items of the job dependencies list are not strings." ,
139
131
f"Check the Data Job Dict values of the depends_on list. "
140
132
f"There are some non-string values: { non_string_dependencies } . Expected type is string." ,
141
- [ job_name ] ,
133
+ jobs1 ,
142
134
)
143
135
144
136
def _validate_team_name (self , job_name : str , team_name : str ):
145
137
if not isinstance (team_name , str ):
146
- self ._raise_error (
138
+ jobs = [job_name ]
139
+ raise DagValidationException (
147
140
ERROR .TYPE ,
148
141
"The type of the job dict key job_name is not string." ,
149
142
f"Change the Data Job Dict value of team_name. "
150
143
f"Current type is { type (team_name )} . Expected type is string." ,
151
- [ job_name ] ,
144
+ jobs ,
152
145
)
153
146
154
147
def _validate_fail_dag_on_error (self , job_name : str , fail_dag_on_error : bool ):
155
148
if not isinstance (fail_dag_on_error , bool ):
156
- self ._raise_error (
149
+ jobs = [job_name ]
150
+ raise DagValidationException (
157
151
ERROR .TYPE ,
158
152
"The type of the job dict key fail_dag_on_error is not bool (True/False)." ,
159
153
f"Change the Data Job Dict value of fail_dag_on_error. Current type"
160
154
f" is { type (fail_dag_on_error )} . Expected type is bool." ,
161
- [ job_name ] ,
155
+ jobs ,
162
156
)
163
157
164
158
def _validate_arguments (self , job_name : str , job_args : dict ):
165
159
if not isinstance (job_args , dict ):
166
- self ._raise_error (
160
+ jobs = [job_name ]
161
+ raise DagValidationException (
167
162
ERROR .TYPE ,
168
163
"The type of the job dict key arguments is not dict." ,
169
164
f"Change the Data Job Dict value of arguments. "
170
165
f"Current type is { type (job_args )} . Expected type is dict." ,
171
- [ job_name ] ,
166
+ jobs ,
172
167
)
173
168
try :
174
169
json .dumps (job_args )
175
170
except TypeError as e :
176
- self ._raise_error (
171
+ reason = str (e )
172
+ jobs1 = [job_name ]
173
+ raise DagValidationException (
177
174
ERROR .TYPE ,
178
- str ( e ) ,
175
+ reason ,
179
176
f"Change the Data Job Dict value of arguments. "
180
177
f"Current type is { type (job_args )} but not serializable as JSON." ,
181
- [ job_name ] ,
178
+ jobs1 ,
182
179
)
183
180
184
181
def _check_dag_cycles (self , jobs : List [Dict ]):
@@ -190,9 +187,10 @@ def _check_dag_cycles(self, jobs: List[Dict]):
190
187
# Preparing the sorter raises CycleError if cycles exist
191
188
topological_sorter .prepare ()
192
189
except graphlib .CycleError as e :
193
- self ._raise_error (
190
+ jobs1 = e .args [1 ][:- 1 ]
191
+ raise DagValidationException (
194
192
ERROR .CONFLICT ,
195
193
"There is a cycle in the DAG." ,
196
194
f"Change the depends_on list of the jobs that participate in the detected cycle: { e .args [1 ]} ." ,
197
- e . args [ 1 ][: - 1 ] ,
195
+ jobs1 ,
198
196
)
0 commit comments