1+ import base64
12from typing import List
23from prefect import task
34from uuid import UUID
1819from utils .s3_storage_interface import Bucket , S3Storage , get_s3_client
1920
2021
21- from prefect .artifacts import create_link_artifact
22+ from prefect .artifacts import create_link_artifact , create_markdown_artifact
23+ from utils .script_generation import generate_download_script
2224
2325scicat_instance : SciCatClient | None = None
2426
@@ -197,7 +199,7 @@ def reset_dataset(dataset_id: str, token: SecretStr):
197199
198200
199201@task
200- def create_job_result_object_task (dataset_ids : List [str ]) -> List [ JobResultEntry ] :
202+ def create_job_result_object_task (dataset_ids : List [str ]) -> JobResultObject :
201203 access_token = get_scicat_access_token .submit ()
202204 access_token .wait ()
203205
@@ -209,28 +211,52 @@ def create_job_result_object_task(dataset_ids: List[str]) -> List[JobResultEntry
209211 datablocks_future .wait ()
210212 datablocks = datablocks_future .result ()
211213
212- dataset_job_results = create_job_result_object (dataset_id , datablocks )
214+ dataset_job_results = create_job_result_entries (dataset_id , datablocks )
213215 job_results = job_results + dataset_job_results
214216
215- return job_results
217+ job_results_object = JobResultObject ( result = job_results )
216218
219+ script = create_download_script (job_results );
220+ job_results_object .downloadScript = base64 .b64encode (bytes (script , 'utf-8' ))
221+
222+ markdown = f"""Download script for all datablocks in this job\n ```bash\n { script } \n ```\n """
223+
224+ create_markdown_artifact (
225+ key = f"script" , markdown = markdown )
226+
227+ return job_results_object
228+
229+ def create_download_script (job_result_entries : List [JobResultEntry ]) -> str :
230+
231+ dataset_to_datablocks = {}
232+
233+ for result in job_result_entries :
234+ dataset_to_datablocks .setdefault (result .datasetId , []).append ({"name" : Path (result .archiveId ).name , "url" : result .url })
235+
236+ return generate_download_script (dataset_to_datablocks )
237+
238+
217239
218240def create_presigned_url (client : S3Storage , datablock : DataBlock ):
219241 url = client .get_presigned_url (Bucket .retrieval_bucket (), datablock .archiveId )
220242 return url
221243
244+ def sanitize_name (name : str ) -> str :
245+ invalid_chars = ["/" , "." , "_" ]
246+ sanitized_name = ""
247+ for c in invalid_chars :
248+ sanitized_name = name .replace (c , "-" )
249+ return sanitized_name
222250
223251@log
224- def create_job_result_object (dataset_id : str , datablocks : List [DataBlock ]) -> List [JobResultEntry ]:
252+ def create_job_result_entries (dataset_id : str , datablocks : List [DataBlock ]) -> List [JobResultEntry ]:
225253 s3_client = get_s3_client ()
226254 job_result_entries : List [JobResultEntry ] = []
227255 for datablock in datablocks :
228256 url = create_presigned_url (s3_client , datablock )
229257
230- invalid_chars = ["/" , "." , "_" ]
231- sanitized_name = str (Path (datablock .archiveId ).name )
232- for c in invalid_chars :
233- sanitized_name = sanitized_name .replace (c , "-" )
258+ sanitized_name = sanitize_name (str (Path (datablock .archiveId ).stem ))
259+
234260 create_link_artifact (
235261 key = sanitized_name ,
236262 link = url ,
0 commit comments