@@ -36,6 +36,11 @@ workflow mt_coverage_merge {
3636 Int finalize_shard_n_partitions = 256
3737 Int finalize_union_n_partitions = 1000
3838
39+ # Step 5 (add_annotations) output bucket
40+ # Annotated outputs are written directly to GCS. A timestamp subdirectory
41+ # is created inside the task to prevent accidental overwrites.
42+ String annotated_output_bucket
43+
3944 }
4045
4146 if (defined (sample_list_tsv )) {
@@ -231,7 +236,8 @@ workflow mt_coverage_merge {
231236 coverage_tsv = process_tsv_files .processed_tsv , # Path to the coverage input TSV file
232237 vcf_mt = combined_mt_tar , # Path to the MatrixTable
233238 keep_all_samples = true ,
234- output_name = "annotated"
239+ output_name = "annotated" ,
240+ output_bucket = annotated_output_bucket
235241 }
236242
237243
@@ -241,15 +247,16 @@ workflow mt_coverage_merge {
241247 coverage_tsv = process_tsv_files .processed_tsv , # Path to the coverage input TSV file
242248 vcf_mt = combined_mt_tar , # Path to the MatrixTable
243249 keep_all_samples = false ,
244- output_name = "filt_annotated"
250+ output_name = "filt_annotated" ,
251+ output_bucket = annotated_output_bucket
245252 }
246253
247254 output {
248255 File processed_tsv = process_tsv_files .processed_tsv
249256 File output_coverage_db = annotate_coverage .output_db
250257 File combined_vcf = combined_mt_tar
251- File annotated_output_tar = annotated .annotated_output_tar
252- File filt_annotated_output_tar = filt_annotated .annotated_output_tar
258+ String annotated_output_gcs_path = annotated .annotated_output_gcs_path
259+ String filt_annotated_output_gcs_path = filt_annotated .annotated_output_gcs_path
253260 }
254261}
255262
@@ -1319,6 +1326,7 @@ task add_annotations {
13191326 File coverage_tsv # Path to the coverage input TSV file
13201327 File vcf_mt # Path to the MatrixTable
13211328 String output_name # directory output name
1329+ String output_bucket # GCS bucket/prefix to write annotated outputs into
13221330
13231331 # Runtime parameters
13241332 Int memory_gb = 96
@@ -1407,6 +1415,11 @@ task add_annotations {
14071415 test -f ./coverage_db/coverage.h5
14081416
14091417 # Run the add_annotations.py script baked inside mtSwirl clone
1418+ # Build a timestamped GCS output path so runs never overwrite each other.
1419+ TIMESTAMP = $(date -u +%Y %m %dT %H %M %SZ )
1420+ OUT_GCS = "~{sub(output_bucket, " /$", "")}/${TIMESTAMP} /~{output_name}"
1421+ echo "Annotated outputs will be written to: ${OUT_GCS} "
1422+
14101423 python3 /opt/mtSwirl/generate_mtdna_call_mt/add_annotations.py \
14111424 --sample-stats =~{coverage_tsv } \
14121425 ~{if keep_all_samples then "--keep-all-samples" else "" } \
@@ -1417,7 +1430,7 @@ task add_annotations {
14171430 -v ./~{output_name }/vep \
14181431 -a ~{coverage_tsv } \
14191432 -m "${VCF_MT_DIR} " \
1420- -d ./~{ output_name } \
1433+ -d " ${OUT_GCS} " \
14211434 --temp-dir ./tmp
14221435
14231436 echo "DONE WITH ANNOTATION"
@@ -1426,12 +1439,12 @@ task add_annotations {
14261439 echo "Contents of /tmp:"
14271440 ls -lh /tmp
14281441
1429- # Compress the annotated output directory
1430- tar -czf $WORK_DIR /annotated_output.tar.gz ~{ output_name }
1442+ # Record the GCS output path for the workflow output
1443+ echo -n " ${OUT_GCS} " > annotated_output_gcs_path.txt
14311444 >>>
14321445
14331446 output {
1434- File annotated_output_tar = "annotated_output.tar.gz"
1447+ String annotated_output_gcs_path = read_string ( "annotated_output_gcs_path.txt" )
14351448 }
14361449
14371450 runtime {
0 commit comments