@@ -38,6 +38,7 @@ workflow assemble_denovo_metagenomic {
3838 Array [String ] taxa_to_dehost = ["Vertebrata" ]
3939 Array [String ] taxa_to_avoid_assembly = ["Vertebrata" , "other sequences" , "Bacteria" ]
4040
41+ String table_name = "sample"
4142 }
4243
4344 Int min_scaffold_unambig = 300 # in base-pairs; any scaffolded assembly < this length will not be refined/polished
@@ -149,30 +150,21 @@ workflow assemble_denovo_metagenomic {
149150 kraken_summary_report = kraken2 .kraken2_summary_report
150151 }
151152
152- # download (multi-segment) genomes for each reference, fasta filename = colon-concatenated accession list
153- scatter (taxon in read_tsv (taxid_to_ref_accessions_tsv )) {
154- # taxon = [taxid, isolate_prefix, taxname, semicolon_delim_accession_list]
155- call utils .string_split {
156- input :
157- joined_string = taxon [3 ],
158- delimiter = ":"
159- }
160- call ncbi .download_annotations {
161- input :
162- accessions = string_split .tokens ,
163- combined_out_prefix = sub (taxon [3 ], ":" , "-" ) # singularity does not like colons in filenames
164- }
153+ # download (multi-segment) genomes for each reference, fasta filename = dash-concatenated accession list
154+ call ncbi .download_ref_genomes_from_tsv {
155+ input :
156+ ref_genomes_tsv = taxid_to_ref_accessions_tsv
165157 }
166158
167159 # subset reference genomes to those with ANI hits to contigs and cluster reference hits by any ANI similarity to each other
168160 call assembly .select_references {
169161 input :
170- reference_genomes_fastas = download_annotations . combined_fasta ,
162+ reference_genomes_fastas = download_ref_genomes_from_tsv . ref_genomes_fastas ,
171163 contigs_fasta = spades .contigs_fasta
172164 }
173165
174166 # assemble and produce stats for every reference cluster
175- Array [String ] assembly_header = ["entity:assembly_id" , "assembly_name" , "sample_id" , "sample_name" , "taxid" , "tax_name" , "tax_shortname" , "assembly_fasta" , "aligned_only_reads_bam" , "coverage_plot" , "assembly_length" , "assembly_length_unambiguous" , "reads_aligned" , "mean_coverage" , "percent_reference_covered" , "scaffolding_num_segments_recovered" , "reference_num_segments_required" , "reference_length" , "reference_accessions" , "skani_num_ref_clusters" , "skani_this_cluster_num_refs" , "skani_dist_tsv" , "scaffolding_ani" , "scaffolding_pct_ref_cov" , "intermediate_gapfill_fasta" , "assembly_preimpute_length_unambiguous" , "replicate_concordant_sites" , "replicate_discordant_snps" , "replicate_discordant_indels" , "replicate_discordant_vcf" , "isnvsFile" , "aligned_bam" , "coverage_tsv" , "read_pairs_aligned" , "bases_aligned" , "assembly_method" , "assembly_method_version" , "biosample_accession" , "batch_ids" , "sample " ]
167+ Array [String ] assembly_header = ["entity:assembly_id" , "assembly_name" , "sample_id" , "sample_name" , "taxid" , "tax_name" , "tax_shortname" , "assembly_fasta" , "aligned_only_reads_bam" , "coverage_plot" , "assembly_length" , "assembly_length_unambiguous" , "reads_aligned" , "mean_coverage" , "percent_reference_covered" , "scaffolding_num_segments_recovered" , "reference_num_segments_required" , "reference_length" , "reference_accessions" , "skani_num_ref_clusters" , "skani_this_cluster_num_refs" , "skani_dist_tsv" , "scaffolding_ani" , "scaffolding_pct_ref_cov" , "intermediate_gapfill_fasta" , "assembly_preimpute_length_unambiguous" , "replicate_concordant_sites" , "replicate_discordant_snps" , "replicate_discordant_indels" , "replicate_discordant_vcf" , "isnvsFile" , "aligned_bam" , "coverage_tsv" , "read_pairs_aligned" , "bases_aligned" , "assembly_method" , "assembly_method_version" , "biosample_accession" , "batch_ids" , "~{ table_name } " ]
176168 scatter (ref_cluster_tar in select_references .matched_reference_clusters_fastas_tars ) {
177169
178170 call utils .tar_extract {
@@ -197,9 +189,9 @@ workflow assemble_denovo_metagenomic {
197189 tsv = taxid_to_ref_accessions_tsv ,
198190 idx_col = "accessions" ,
199191 idx_val = sub (scaffold .scaffolding_chosen_ref_basename , "-" , ":" ),
200- add_header = ["taxid " , "isolate_prefix" , "taxname" , "accessions" ]
192+ add_header = ["tax_id " , "isolate_prefix" , "taxname" , "accessions" ]
201193 }
202- String taxid = tax_lookup .map ["taxid " ]
194+ String taxid = tax_lookup .map ["tax_id " ]
203195 String tax_name = tax_lookup .map ["taxname" ]
204196 String isolate_prefix = tax_lookup .map ["isolate_prefix" ]
205197
@@ -266,7 +258,7 @@ workflow assemble_denovo_metagenomic {
266258
267259 "batch_ids" : unique_batch_ids .sorted_unique_joined ,
268260
269- "sample " : '{"entityType":"sample ","entityName":"' + sample_id + '"}'
261+ "~{ table_name } " : '{"entityType":"~{ table_name } ","entityName":"' + sample_id + '"}'
270262 }
271263
272264 if (assembly_length_unambiguous > min_scaffold_unambig ) {
0 commit comments