|
16 | 16 | update_progress_artifact, |
17 | 17 | ) |
18 | 18 |
|
19 | | -from archiver.config.variables import Variables |
20 | | -from archiver.utils.datablocks import ArchiveInfo |
| 19 | +from config.variables import Variables |
| 20 | +from utils.datablocks import ArchiveInfo |
21 | 21 |
|
22 | | -from .utils import StoragePaths, report_archival_error |
| 22 | +from .flow_utils import StoragePaths, report_archival_error |
23 | 23 | from .task_utils import ( |
24 | 24 | generate_task_name_dataset, |
25 | 25 | generate_flow_name_job_id, |
26 | 26 | generate_subflow_run_name_job_id_dataset_id, |
27 | 27 | generate_sleep_for_task_name |
28 | 28 | ) |
29 | | -from archiver.scicat.scicat_interface import SciCatClient |
30 | | -from archiver.scicat.scicat_tasks import ( |
| 29 | +from scicat.scicat_interface import SciCatClient |
| 30 | +from scicat.scicat_tasks import ( |
31 | 31 | update_scicat_archival_job_status, |
32 | 32 | update_scicat_archival_dataset_lifecycle, |
33 | 33 | get_origdatablocks, |
34 | 34 | register_datablocks, |
35 | 35 | get_scicat_access_token, |
36 | 36 | get_job_datasetlist, |
| 37 | + reset_dataset |
37 | 38 | ) |
38 | | -from archiver.scicat.scicat_tasks import ( |
| 39 | +from scicat.scicat_tasks import ( |
39 | 40 | report_job_failure_system_error, |
40 | | - report_dataset_user_error, |
| 41 | + report_dataset_user_error |
41 | 42 | ) |
42 | | -from archiver.utils.datablocks import wait_for_free_space |
43 | | -from archiver.utils.model import OrigDataBlock, DataBlock |
44 | | -import archiver.utils.datablocks as datablocks_operations |
45 | | -from archiver.config.concurrency_limits import ConcurrencyLimits |
46 | | -from archiver.utils.s3_storage_interface import Bucket, get_s3_client |
47 | | -from archiver.utils.log import getLogger |
| 43 | + |
| 44 | +from utils.datablocks import wait_for_free_space |
| 45 | +from utils.model import OrigDataBlock, DataBlock |
| 46 | +import utils.datablocks as datablocks_operations |
| 47 | +from config.concurrency_limits import ConcurrencyLimits |
| 48 | +from utils.s3_storage_interface import Bucket, get_s3_client |
| 49 | +from utils.log import getLogger |
48 | 50 |
|
49 | 51 |
|
50 | 52 | def on_get_origdatablocks_error(dataset_id: str, task: Task, task_run: TaskRun, state: State): |
@@ -108,28 +110,6 @@ def update_progress(p): |
108 | 110 | return file_paths |
109 | 111 |
|
110 | 112 |
|
111 | | -# @task(task_run_name=generate_task_name_dataset) |
112 | | -# def create_datablocks(dataset_id: str, origDataBlocks: List[OrigDataBlock], file_paths: List[Path]) -> List[DataBlock]: |
113 | | -# """Prefect task to create datablocks. |
114 | | - |
115 | | -# Args: |
116 | | -# dataset_id (str): dataset id |
117 | | -# origDataBlocks (List[OrigDataBlock]): List of OrigDataBlocks (Pydantic Model) |
118 | | - |
119 | | -# Returns: |
120 | | -# List[DataBlock]: List of DataBlocks (Pydantic Model) |
121 | | -# """ |
122 | | - |
123 | | -# s3_client = get_s3_client() |
124 | | - |
125 | | -# progress_artifact_id = create_progress_artifact( |
126 | | -# progress=0.0, |
127 | | -# description="Create datablocks from datafiles", |
128 | | -# ) |
129 | | - |
130 | | -# return datablocks_operations.create_datablocks(s3_client, dataset_id, origDataBlocks, file_paths, update_progress) |
131 | | - |
132 | | - |
133 | 113 | @task(task_run_name=generate_task_name_dataset) |
134 | 114 | def create_tarfiles(dataset_id: str) -> List[ArchiveInfo]: |
135 | 115 | datablocks_scratch_folder = StoragePaths.scratch_archival_datablocks_folder(dataset_id) |
@@ -388,22 +368,29 @@ def on_dataset_flow_failure(flow: Flow, flow_run: FlowRun, state: State): |
388 | 368 | task_run=None, |
389 | 369 | token=scicat_token, |
390 | 370 | ) |
| 371 | + try: |
| 372 | + reset_dataset( |
| 373 | + dataset_id=flow_run.parameters["dataset_id"], |
| 374 | + token=scicat_token |
| 375 | + ) |
| 376 | + except Exception as e: |
| 377 | + getLogger().error(f"failed to reset datablocks {e}") |
391 | 378 | datablocks_operations.cleanup_lts_folder(flow_run.parameters["dataset_id"]) |
392 | 379 | datablocks_operations.cleanup_scratch(flow_run.parameters["dataset_id"]) |
393 | 380 | try: |
394 | 381 | s3_client = get_s3_client() |
395 | 382 | datablocks_operations.cleanup_s3_staging(s3_client, flow_run.parameters["dataset_id"]) |
396 | | - except: |
397 | | - pass |
| 383 | + except Exception as e: |
| 384 | + getLogger().error(f"failed to cleanup staging {e}") |
398 | 385 |
|
399 | 386 |
|
400 | 387 | def cleanup_dataset(flow: Flow, flow_run: FlowRun, state: State): |
401 | 388 | try: |
402 | 389 | s3_client = get_s3_client() |
403 | 390 | datablocks_operations.cleanup_s3_landingzone(s3_client, flow_run.parameters["dataset_id"]) |
404 | 391 | datablocks_operations.cleanup_s3_staging(s3_client, flow_run.parameters["dataset_id"]) |
405 | | - except: |
406 | | - pass |
| 392 | + except Exception as e: |
| 393 | + getLogger().error(f"failed to cleanup staging or landingzone {e}") |
407 | 394 | datablocks_operations.cleanup_scratch(flow_run.parameters["dataset_id"]) |
408 | 395 |
|
409 | 396 |
|
|
0 commit comments