Skip to content

Commit 61a666c

Browse files
committed
added reset checkpoint at the end of dataset pipeline
1 parent 7fa2174 commit 61a666c

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

dags/dataset.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,16 @@ def update_checkpoint(checkpoint:Checkpoint, stage:Stages):
156156
Get those image files and then, map them into an h5 file
157157
with resized and normalized images.
158158
"""
159+
160+
reset_checkpoint = PythonOperator(
161+
task_id="reset_checkpoint",
162+
python_callable=update_checkpoint,
163+
op_args=[checkpoint, Stages.GEN_IMAGES]
164+
)
165+
166+
reset_checkpoint.doc_md = """
167+
Reset checkpoint to start again.
168+
"""
159169

160170
command = f"cd {folder} && zip -r dataset-images.zip dataset/"
161171
pack_img = BashOperator(task_id="pack_images", bash_command=command)
@@ -210,7 +220,8 @@ def update_checkpoint(checkpoint:Checkpoint, stage:Stages):
210220
transtion_gen_to_remove >> remove_duplicates
211221
remove_duplicates >> transition_remove_to_transform
212222
transition_remove_to_transform >> transform_img
213-
transform_img >> pack_img
223+
transform_img >> reset_checkpoint
224+
reset_checkpoint >> pack_img
214225

215226
[gen_ghz, pack_img] >> trigger_dag_train
216227
[gen_ghz, pack_img] >> send_kaggle

0 commit comments

Comments
 (0)