Skip to content

Commit cb8cc79

Browse files
committed
percolate batch ids from library/flowcell -> sample/assembly tables
1 parent 63dae6b commit cb8cc79

File tree

2 files changed

+21
-2
lines changed

2 files changed

+21
-2
lines changed

pipes/WDL/tasks/tasks_utils.wdl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1067,18 +1067,26 @@ task raise {
10671067
task unique_strings {
10681068
input {
10691069
Array[String] strings
1070+
String separator=","
10701071
}
10711072
Int disk_size = 50
10721073
command {
10731074
cat ~{write_lines(strings)} | sort | uniq > UNIQUE_OUT
1075+
python3<<CODE
1076+
with open('UNIQUE_OUT', 'rt') as inf:
1077+
rows = [line.strip() for line in inf]
1078+
with open('UNIQUE_OUT_JOIN', 'wt') as outf:
1079+
outf.write('~{separator}'.join(rows) + '\n')
1080+
CODE
10741081
}
10751082
output {
10761083
Array[String] sorted_unique = read_lines("UNIQUE_OUT")
1084+
String sorted_unique_joined = read_string("UNIQUE_OUT_JOIN")
10771085
}
10781086
runtime {
10791087
memory: "1 GB"
10801088
cpu: 1
1081-
docker: "ubuntu"
1089+
docker: "python:slim"
10821090
disks: "local-disk " + disk_size + " HDD"
10831091
disk: disk_size + " GB" # TES
10841092
dx_instance_type: "mem1_ssd1_v2_x2"

pipes/WDL/workflows/assemble_denovo_metagenomic.wdl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@ workflow assemble_denovo_metagenomic {
2323

2424
Array[File]+ reads_bams
2525

26+
Array[String] batch_id_list
27+
2628
File ncbi_taxdump_tgz
2729

2830
File spikein_db
@@ -89,6 +91,11 @@ workflow assemble_denovo_metagenomic {
8991

9092
}
9193

94+
call utils.unique_strings as unique_batch_ids {
95+
input:
96+
strings = batch_id_list
97+
}
98+
9299
call read_utils.merge_and_reheader_bams as merge_raw_reads {
93100
input:
94101
in_bams = reads_bams
@@ -165,7 +172,7 @@ workflow assemble_denovo_metagenomic {
165172
}
166173
167174
# assemble and produce stats for every reference cluster
168-
Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "tax_shortname", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "skani_num_ref_clusters", "skani_this_cluster_num_refs", "skani_dist_tsv", "scaffolding_ani", "scaffolding_pct_ref_cov", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "assembly_method", "assembly_method_version", "biosample_accession", "sample"]
175+
Array[String] assembly_header = ["entity:assembly_id", "assembly_name", "sample_id", "sample_name", "taxid", "tax_name", "tax_shortname", "assembly_fasta", "aligned_only_reads_bam", "coverage_plot", "assembly_length", "assembly_length_unambiguous", "reads_aligned", "mean_coverage", "percent_reference_covered", "scaffolding_num_segments_recovered", "reference_num_segments_required", "reference_length", "reference_accessions", "skani_num_ref_clusters", "skani_this_cluster_num_refs", "skani_dist_tsv", "scaffolding_ani", "scaffolding_pct_ref_cov", "intermediate_gapfill_fasta", "assembly_preimpute_length_unambiguous", "replicate_concordant_sites", "replicate_discordant_snps", "replicate_discordant_indels", "replicate_discordant_vcf", "isnvsFile", "aligned_bam", "coverage_tsv", "read_pairs_aligned", "bases_aligned", "assembly_method", "assembly_method_version", "biosample_accession", "batch_ids", "sample"]
169176
scatter(ref_cluster_tar in select_references.matched_reference_clusters_fastas_tars) {
170177

171178
call utils.tar_extract {
@@ -257,6 +264,8 @@ workflow assemble_denovo_metagenomic {
257264

258265
"biosample_accession" : select_first([biosample_accession, ""]),
259266

267+
"batch_ids" : unique_batch_ids.sorted_unique_joined,
268+
260269
"sample": '{"entityType":"sample","entityName":"' + sample_id + '"}'
261270
}
262271

@@ -326,5 +335,7 @@ workflow assemble_denovo_metagenomic {
326335
Array[Int] assembly_all_lengths_unambig = assembly_length_unambiguous
327336
Array[Float] assembly_all_pct_ref_cov = percent_reference_covered
328337
Array[File] assembly_all_fastas = assembly_fasta
338+
339+
String batch_ids = unique_batch_ids.sorted_unique_joined
329340
}
330341
}

0 commit comments

Comments
 (0)