@@ -11,6 +11,7 @@ import "../tasks/tasks_utils.wdl" as utils
1111import "demux_deplete.wdl"
1212import "assemble_refbased.wdl"
1313import "sarscov2_lineages.wdl"
14+ import "sarscov2_biosample_load.wdl"
1415
1516workflow sarscov2_illumina_full {
1617 meta {
@@ -49,6 +50,7 @@ workflow sarscov2_illumina_full {
4950 Int min_genome_bases = 24000
5051 Int max_vadr_alerts = 0
5152
53+ File ? sample_rename_map
5254
5355 String ? workspace_name
5456 String ? terra_project
@@ -61,10 +63,15 @@ workflow sarscov2_illumina_full {
6163 Int taxid = 2697049
6264 String gisaid_prefix = 'hCoV-19/'
6365
66+ # Broad production pipeline only: metadata ETL and NCBI BioSample registration
67+ if (length (biosample_attributes ) == 0 ) {
68+ call sarscov2_biosample_load .sarscov2_biosample_load
69+ }
70+
6471 # merge biosample attributes tables
6572 call utils .tsv_join as biosample_merge {
6673 input :
67- input_tsvs = biosample_attributes ,
74+ input_tsvs = select_all ( flatten ([[ sarscov2_biosample_load . biosample_attributes ], biosample_attributes ])) ,
6875 id_col = 'accession' ,
6976 out_basename = "biosample_attributes-merged"
7077 }
@@ -81,7 +88,8 @@ workflow sarscov2_illumina_full {
8188 biosample_map = biosample_merge .out_tsv ,
8289 instrument_model_user_specified = instrument_model ,
8390 sra_title = sra_title ,
84- read_structure = read_structure
91+ read_structure = read_structure ,
92+ sample_rename_map = select_first ([sample_rename_map , sarscov2_biosample_load .id_map_tsv ])
8593 }
8694 String flowcell_id = demux_deplete .run_id
8795
@@ -277,7 +285,7 @@ workflow sarscov2_illumina_full {
277285 # this decorates assembly_meta_tsv with collab/internal IDs, genome_status, and many other columns
278286 input :
279287 assembly_stats_tsv = assembly_meta_tsv .combined ,
280- collab_ids_tsv = collab_ids_tsv ,
288+ collab_ids_tsv = select_first ([ collab_ids_tsv , sarscov2_biosample_load . collab_ids_tsv ]) ,
281289 drop_file_cols = true ,
282290 min_unambig = min_genome_bases ,
283291 genome_status_json = filter_bad_ntc_batches .fail_meta_json
@@ -382,7 +390,7 @@ workflow sarscov2_illumina_full {
382390 call sarscov2 .sequencing_report {
383391 input :
384392 assembly_stats_tsv = download_entities_tsv .tsv_file ,
385- collab_ids_tsv = collab_ids_tsv ,
393+ collab_ids_tsv = select_first ([ collab_ids_tsv , sarscov2_biosample_load . collab_ids_tsv ]) ,
386394 max_date = demux_deplete .run_date ,
387395 min_unambig = min_genome_bases
388396 }
@@ -502,6 +510,9 @@ workflow sarscov2_illumina_full {
502510 String run_id = demux_deplete .run_id
503511
504512 File ? sequencing_reports = sequencing_report .all_zip
513+
514+ File ? id_map_tsv = sarscov2_biosample_load .id_map_tsv
515+ Array [File ] biosample_attributes_out = select_all (flatten ([[sarscov2_biosample_load .biosample_attributes ], biosample_attributes ]))
505516
506517 Array [String ] data_tables_out = select_first ([data_tables .tables , []])
507518 }
0 commit comments