Skip to content

Commit 89872c6

Browse files
authored
Merge pull request #584 from broadinstitute/ct-genbank_single-optional-cmt-input
genbank_single: made mapped bam input optional
2 parents c9f794b + 33f4952 commit 89872c6

File tree

2 files changed

+26
-17
lines changed

2 files changed

+26
-17
lines changed

pipes/WDL/workflows/genbank_single.wdl

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -14,19 +14,21 @@ workflow genbank_single {
1414
}
1515

1616
input {
17-
File assembly_fasta
18-
String assembly_id = basename(assembly_fasta, ".fasta")
19-
String ref_accessions_colon_delim
17+
File assembly_fasta
18+
String assembly_id = basename(assembly_fasta, ".fasta")
19+
File? aligned_bam
2020

21-
String biosample_accession
22-
Int tax_id
23-
String organism_name
21+
String ref_accessions_colon_delim
2422

25-
String email_address # required for fetching data from NCBI APIs
26-
File authors_sbt
23+
String biosample_accession
24+
Int tax_id
25+
String organism_name
2726

28-
String? biosample_attributes_json # if this is used, we will use this first
29-
File? biosample_attributes_tsv # if no json, we will read this tsv
27+
String email_address # required for fetching data from NCBI APIs
28+
File authors_sbt
29+
30+
String? biosample_attributes_json # if this is used, we will use this first
31+
File? biosample_attributes_tsv # if no json, we will read this tsv
3032
# if both are unspecified, we will fetch from NCBI via biosample_accession
3133
}
3234

@@ -39,6 +41,10 @@ workflow genbank_single {
3941
description: "Reference genome Genbank accessions, each segment/chromosome in the exact same count and order as the segments/chromosomes described in assemblies_fasta. List of accessions should be colon delimited.",
4042
patterns: ["*.fasta"]
4143
}
44+
aligned_bam: {
45+
description: "Optional: aligned BAM file to inspect for reporting sequencing platform, read depth, etc. in GenBank structured comments.",
46+
patterns: ["*.bam","*.sam"]
47+
}
4248
biosample_attributes_tsv: {
4349
description: "A post-submission attributes file from NCBI BioSample, which is available at https://submit.ncbi.nlm.nih.gov/subs/ and clicking on 'Download attributes file with BioSample accessions'.",
4450
patterns: ["*.txt", "*.tsv"]
@@ -127,9 +133,12 @@ workflow genbank_single {
127133
}
128134
File feature_tbl = select_first([vadr.feature_tbl, annot.feature_tbl])
129135
130-
call ncbi.structured_comments_from_aligned_bam {
131-
input:
132-
out_basename = assembly_id
136+
if(defined(aligned_bam)) {
137+
call ncbi.structured_comments_from_aligned_bam {
138+
input:
139+
out_basename = assembly_id,
140+
aligned_bam = select_first([aligned_bam])
141+
}
133142
}
134143
135144
if(genbank_special_taxa.table2asn_allowed) {
@@ -145,9 +154,9 @@ workflow genbank_single {
145154
}
146155
}
147156
if(!genbank_special_taxa.table2asn_allowed) {
148-
Array[File] special_submit_files = [assembly_fsa.sanitized_fasta,
157+
Array[File] special_submit_files = select_all([assembly_fsa.sanitized_fasta,
149158
structured_comments_from_aligned_bam.structured_comment_file,
150-
biosample_to_genbank.genbank_source_modifier_table]
159+
biosample_to_genbank.genbank_source_modifier_table])
151160
String special_basename_list = '["~{assembly_id}.fsa", "~{assembly_id}.cmt", "~{assembly_id}.src"]'
152161
}
153162
String basename_list_json = select_first([special_basename_list, '["~{assembly_id}.sqn"]'])
@@ -158,7 +167,7 @@ workflow genbank_single {
158167

159168
output {
160169
String genbank_mechanism = genbank_special_taxa.genbank_submission_mechanism
161-
File genbank_comment_file = structured_comments_from_aligned_bam.structured_comment_file
170+
File? genbank_comment_file = structured_comments_from_aligned_bam.structured_comment_file
162171
File genbank_source_table = biosample_to_genbank.genbank_source_modifier_table
163172
String genbank_isolate_name = biosample_to_genbank.isolate_name
164173
File annotation_tbl = feature_tbl

test/input/WDL/test_inputs-genbank_single-local.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
"genbank_single.tax_id": 3052310,
55
"genbank_single.organism_name": "Lassa virus",
66
"genbank_single.ref_accessions_colon_delim": "KM821997.1:KM821998.1",
7-
"genbank_single.structured_comments_from_aligned_bam.aligned_bam": "test/input/genbank/LASV_NGA_2018_0026-aligned.bam",
7+
"genbank_single.aligned_bam": "test/input/genbank/LASV_NGA_2018_0026-aligned.bam",
88
"genbank_single.biosample_attributes_tsv": "test/input/genbank/biosample-attributes-lasv.txt",
99
"genbank_single.structured_comments_from_aligned_bam.assembly_method": "placeholder assembly software",
1010
"genbank_single.structured_comments_from_aligned_bam.assembly_method_version": "5.4.3.2.1",

0 commit comments

Comments
 (0)