Skip to content

Commit 48aa7bd

Browse files
author
Sherrie Wang
committed
added parse quast report, fixed quast provenance parsing
1 parent 8946115 commit 48aa7bd

4 files changed

Lines changed: 157 additions & 28 deletions

File tree

bin/parse_quast_report.py

Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
#!/usr/bin/env python3
2+
3+
import argparse
4+
import collections
5+
import csv
6+
import json
7+
import sys
8+
9+
10+
def parse_transposed_quast_report(transposed_quast_report_path):
11+
"""
12+
"""
13+
field_lookup = collections.OrderedDict()
14+
field_lookup['Assembly'] = 'assembly_id'
15+
field_lookup['Total length'] = 'total_length'
16+
field_lookup['# contigs'] = 'num_contigs'
17+
field_lookup['Largest contig'] = 'largest_contig'
18+
field_lookup['N50'] = 'assembly_N50'
19+
field_lookup['N75'] = 'assembly_N75'
20+
field_lookup['L50'] = 'assembly_L50'
21+
field_lookup['L75'] = 'assembly_L75'
22+
field_lookup["# N's per 100 kbp"] = 'num_N_per_100_kb'
23+
field_lookup['# contigs (>= 0 bp)'] = 'num_contigs_gt_0_bp'
24+
field_lookup['# contigs (>= 1000 bp)'] = 'num_contigs_gt_1000_bp'
25+
field_lookup['# contigs (>= 5000 bp)'] = 'num_contigs_gt_5000_bp'
26+
field_lookup['# contigs (>= 10000 bp)'] = 'num_contigs_gt_10000_bp'
27+
field_lookup['# contigs (>= 25000 bp)'] = 'num_contigs_gt_25000_bp'
28+
field_lookup['# contigs (>= 50000 bp)'] = 'num_contigs_gt_50000_bp'
29+
field_lookup['Total length (>= 0 bp)'] = 'total_length_gt_0_bp'
30+
field_lookup['Total length (>= 1000 bp)'] = 'total_length_gt_1000_bp'
31+
field_lookup['Total length (>= 5000 bp)'] = 'total_length_gt_5000_bp'
32+
field_lookup['Total length (>= 10000 bp)'] = 'total_length_gt_10000_bp'
33+
field_lookup['Total length (>= 25000 bp)'] = 'total_length_gt_25000_bp'
34+
field_lookup['Total length (>= 50000 bp)'] = 'total_length_gt_50000_bp'
35+
36+
37+
int_fields = [
38+
'total_length',
39+
'num_contigs',
40+
'largest_contig',
41+
'assembly_N50',
42+
'assembly_N75',
43+
'assembly_L50',
44+
'assembly_L75',
45+
'num_contigs_gt_0_bp',
46+
'num_contigs_gt_1000_bp',
47+
'num_contigs_gt_5000_bp',
48+
'num_contigs_gt_10000_bp',
49+
'num_contigs_gt_25000_bp',
50+
'num_contigs_gt_50000_bp',
51+
'total_length_gt_0_bp',
52+
'total_length_gt_1000_bp',
53+
'total_length_gt_5000_bp',
54+
'total_length_gt_10000_bp',
55+
'total_length_gt_25000_bp',
56+
'total_length_gt_50000_bp',
57+
]
58+
59+
float_fields = [
60+
'num_N_per_100_kb',
61+
]
62+
63+
parsed_report = []
64+
with open(transposed_quast_report_path, 'r', newline='') as f:
65+
reader = csv.DictReader(f, dialect='excel-tab')
66+
for row in reader:
67+
r = collections.OrderedDict()
68+
for f in field_lookup:
69+
r[field_lookup[f]] = row[f]
70+
71+
for f in int_fields:
72+
try:
73+
r[f] = int(r[f])
74+
except ValueError as e:
75+
r[f] = None
76+
77+
for f in float_fields:
78+
try:
79+
r[f] = float(r[f])
80+
except ValueError as e:
81+
r[f] = None
82+
83+
parsed_report.append(r)
84+
85+
return parsed_report
86+
87+
88+
89+
def main():
90+
91+
92+
parser = argparse.ArgumentParser()
93+
parser.add_argument('transposed_quast_report')
94+
args = parser.parse_args()
95+
96+
output_fieldnames = [
97+
'assembly_id',
98+
'total_length',
99+
'num_contigs',
100+
'largest_contig',
101+
'assembly_N50',
102+
'assembly_N75',
103+
'assembly_L50',
104+
'assembly_L75',
105+
'num_contigs_gt_0_bp',
106+
'num_contigs_gt_1000_bp',
107+
'num_contigs_gt_5000_bp',
108+
'num_contigs_gt_10000_bp',
109+
'num_contigs_gt_25000_bp',
110+
'num_contigs_gt_50000_bp',
111+
'total_length_gt_0_bp',
112+
'total_length_gt_1000_bp',
113+
'total_length_gt_5000_bp',
114+
'total_length_gt_10000_bp',
115+
'total_length_gt_25000_bp',
116+
'total_length_gt_50000_bp',
117+
'num_N_per_100_kb',
118+
]
119+
120+
report = parse_transposed_quast_report(args.transposed_quast_report)
121+
writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames)
122+
writer.writeheader()
123+
for record in report:
124+
writer.writerow(record)
125+
126+
127+
128+
if __name__ == '__main__':
129+
main()

main.nf

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,8 +10,8 @@ include { parse_quast_report } from './modules/quast.nf'
1010

1111
process RUN_SHOVILL {
1212
tag "$sample_id"
13-
publishDir "${params.outdir}/${sample_id}", mode: 'copy'
14-
13+
publishDir "${params.outdir}/${sample_id}_contigs.fa", mode: 'copy'
14+
publishDir "${params.outdir}/${sample_id}_shovill.log", mode: 'copy'
1515

1616
input:
1717
tuple val(sample_id), path(reads_1), path(reads_2)
@@ -104,7 +104,7 @@ workflow {
104104
cutadapter(fastp.out.trimmed_reads)
105105
RUN_SHOVILL(cutadapter.out.out_reads)
106106

107-
quast(RUN_SHOVILL.out.assembly)
107+
quast(RUN_SHOVILL.out.contigs)
108108
parse_quast_report(quast.out.tsv)
109109

110110
ch_provenance = ch_fastq.map{ it -> it[0] }

modules/quast.nf

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,56 +1,56 @@
11
process quast {
22

3-
tag { sample_id + ' / ' + assembly_mode }
3+
tag { sample_id + ' /short '}
44

55
input:
6-
tuple val(sample_id), path(assembly), val(assembler), val(assembly_mode)
6+
tuple val(sample_id), path(assembly)
77

88
output:
9-
tuple val(sample_id), path("${sample_id}_${assembler}_${assembly_mode}_quast.tsv"), val(assembler), val(assembly_mode), emit: tsv
10-
tuple val(sample_id), path("${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml"), emit: provenance
9+
tuple val(sample_id), path("${sample_id}_${params.assembler}_short_quast.tsv"), emit: tsv
10+
tuple val(sample_id), path("${sample_id}_${params.assembler}_short_quast_provenance.yml"), emit: provenance
1111

1212
script:
1313
"""
14-
printf -- "- process_name: quast\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
15-
printf -- " tools:\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
16-
printf -- " - tool_name: quast\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
17-
printf -- " tool_version: \$(quast --version | cut -d ' ' -f 2 | tr -d 'v')\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
18-
printf -- " parameters:\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
19-
printf -- " - parameter: --space-efficient\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
20-
printf -- " value: null\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
21-
printf -- " - parameter: --fast\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
22-
printf -- " value: null\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
23-
printf -- " - parameter: --min-contig\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
24-
printf -- " value: 0\\n" >> ${sample_id}_${assembler}_${assembly_mode}_quast_provenance.yml
14+
printf -- "- process_name: quast\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
15+
printf -- " tools:\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
16+
printf -- " - tool_name: quast\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
17+
printf -- " tool_version: \$(quast.py --version 2>&1 | awk '/QUAST v/{gsub(/.*QUAST v/, ""); gsub(/ .*/, ""); print}')\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
18+
printf -- " parameters:\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
19+
printf -- " - parameter: --space-efficient\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
20+
printf -- " value: null\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
21+
printf -- " - parameter: --fast\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
22+
printf -- " value: null\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
23+
printf -- " - parameter: --min-contig\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
24+
printf -- " value: 0\\n" >> ${sample_id}_${params.assembler}_short_quast_provenance.yml
2525
2626
quast \
2727
--threads ${task.cpus} \
2828
--space-efficient \
2929
--fast \
30-
--min-contig 0 \
30+
--min-contig 0 \
31+
--x-for-Nx 75\
3132
--output-dir ${sample_id} \
3233
${assembly}
3334
34-
mv ${sample_id}/transposed_report.tsv ${sample_id}_${assembler}_${assembly_mode}_quast.tsv
35+
mv ${sample_id}/transposed_report.tsv ${sample_id}_${params.assembler}_short_quast.tsv
3536
"""
3637
}
3738

3839
process parse_quast_report {
3940

40-
tag { sample_id + ' / ' + assembly_mode }
41+
tag { sample_id + ' /short '}
4142

4243
executor 'local'
4344

44-
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${assembler}_${assembly_mode}_quast.csv", mode: 'copy'
45+
publishDir "${params.outdir}/${sample_id}", pattern: "${sample_id}_${params.assembler}_short_quast.csv", mode: 'copy'
4546

4647
input:
47-
tuple val(sample_id), path(quast_report), val(assembler), val(assembly_mode)
48-
48+
tuple val(sample_id), path(quast_report)
4949
output:
50-
tuple val(sample_id), path("${sample_id}_${assembler}_${assembly_mode}_quast.csv")
50+
tuple val(sample_id), path("${sample_id}_${params.assembler}_short_quast.csv")
5151

5252
script:
5353
"""
54-
parse_quast_report.py ${quast_report} > ${sample_id}_${assembler}_${assembly_mode}_quast.csv
54+
parse_quast_report.py ${quast_report} > ${sample_id}_${params.assembler}_short_quast.csv
5555
"""
5656
}

nextflow.config

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
manifest {
22
author = 'Sherrie Wang'
33
name = 'BCCDC-PHL/nf-shovill'
4-
version = '0.0.7'
4+
version = '0.1.2'
55
description = 'BCCDC-PHL Bacterial Assembly'
66
mainScript = 'main.nf'
77
nextflowVersion = '>=20.01.0'
@@ -53,7 +53,7 @@ profiles {
5353
apptainer {
5454
apptainer.enabled = true
5555
apptainer.autoMounts = true
56-
process.container = "oras://community.wave.seqera.io/library/nf-shovill:b0425123a400cf5d"
56+
process.container = "oras://community.wave.seqera.io/library/nf-shovill:1a5464527d03159e"
5757
process.containerOptions = '--env PYTHONWARNINGS=ignore'
5858
if (params.cache){
5959
apptainer.cacheDir = params.cache

0 commit comments

Comments
 (0)