1+ #!/usr/bin/env python3
2+
3+ import argparse
4+ import collections
5+ import csv
6+ import json
7+ import sys
8+
9+
10+ def parse_transposed_quast_report (transposed_quast_report_path ):
11+ """
12+ """
13+ field_lookup = collections .OrderedDict ()
14+ field_lookup ['Assembly' ] = 'assembly_id'
15+ field_lookup ['Total length' ] = 'total_length'
16+ field_lookup ['# contigs' ] = 'num_contigs'
17+ field_lookup ['Largest contig' ] = 'largest_contig'
18+ field_lookup ['N50' ] = 'assembly_N50'
19+ field_lookup ['N75' ] = 'assembly_N75'
20+ field_lookup ['L50' ] = 'assembly_L50'
21+ field_lookup ['L75' ] = 'assembly_L75'
22+ field_lookup ["# N's per 100 kbp" ] = 'num_N_per_100_kb'
23+ field_lookup ['# contigs (>= 0 bp)' ] = 'num_contigs_gt_0_bp'
24+ field_lookup ['# contigs (>= 1000 bp)' ] = 'num_contigs_gt_1000_bp'
25+ field_lookup ['# contigs (>= 5000 bp)' ] = 'num_contigs_gt_5000_bp'
26+ field_lookup ['# contigs (>= 10000 bp)' ] = 'num_contigs_gt_10000_bp'
27+ field_lookup ['# contigs (>= 25000 bp)' ] = 'num_contigs_gt_25000_bp'
28+ field_lookup ['# contigs (>= 50000 bp)' ] = 'num_contigs_gt_50000_bp'
29+ field_lookup ['Total length (>= 0 bp)' ] = 'total_length_gt_0_bp'
30+ field_lookup ['Total length (>= 1000 bp)' ] = 'total_length_gt_1000_bp'
31+ field_lookup ['Total length (>= 5000 bp)' ] = 'total_length_gt_5000_bp'
32+ field_lookup ['Total length (>= 10000 bp)' ] = 'total_length_gt_10000_bp'
33+ field_lookup ['Total length (>= 25000 bp)' ] = 'total_length_gt_25000_bp'
34+ field_lookup ['Total length (>= 50000 bp)' ] = 'total_length_gt_50000_bp'
35+
36+
37+ int_fields = [
38+ 'total_length' ,
39+ 'num_contigs' ,
40+ 'largest_contig' ,
41+ 'assembly_N50' ,
42+ 'assembly_N75' ,
43+ 'assembly_L50' ,
44+ 'assembly_L75' ,
45+ 'num_contigs_gt_0_bp' ,
46+ 'num_contigs_gt_1000_bp' ,
47+ 'num_contigs_gt_5000_bp' ,
48+ 'num_contigs_gt_10000_bp' ,
49+ 'num_contigs_gt_25000_bp' ,
50+ 'num_contigs_gt_50000_bp' ,
51+ 'total_length_gt_0_bp' ,
52+ 'total_length_gt_1000_bp' ,
53+ 'total_length_gt_5000_bp' ,
54+ 'total_length_gt_10000_bp' ,
55+ 'total_length_gt_25000_bp' ,
56+ 'total_length_gt_50000_bp' ,
57+ ]
58+
59+ float_fields = [
60+ 'num_N_per_100_kb' ,
61+ ]
62+
63+ parsed_report = []
64+ with open (transposed_quast_report_path , 'r' , newline = '' ) as f :
65+ reader = csv .DictReader (f , dialect = 'excel-tab' )
66+ for row in reader :
67+ r = collections .OrderedDict ()
68+ for f in field_lookup :
69+ r [field_lookup [f ]] = row [f ]
70+
71+ for f in int_fields :
72+ try :
73+ r [f ] = int (r [f ])
74+ except ValueError as e :
75+ r [f ] = None
76+
77+ for f in float_fields :
78+ try :
79+ r [f ] = float (r [f ])
80+ except ValueError as e :
81+ r [f ] = None
82+
83+ parsed_report .append (r )
84+
85+ return parsed_report
86+
87+
88+
89+ def main ():
90+
91+
92+ parser = argparse .ArgumentParser ()
93+ parser .add_argument ('transposed_quast_report' )
94+ args = parser .parse_args ()
95+
96+ output_fieldnames = [
97+ 'assembly_id' ,
98+ 'total_length' ,
99+ 'num_contigs' ,
100+ 'largest_contig' ,
101+ 'assembly_N50' ,
102+ 'assembly_N75' ,
103+ 'assembly_L50' ,
104+ 'assembly_L75' ,
105+ 'num_contigs_gt_0_bp' ,
106+ 'num_contigs_gt_1000_bp' ,
107+ 'num_contigs_gt_5000_bp' ,
108+ 'num_contigs_gt_10000_bp' ,
109+ 'num_contigs_gt_25000_bp' ,
110+ 'num_contigs_gt_50000_bp' ,
111+ 'total_length_gt_0_bp' ,
112+ 'total_length_gt_1000_bp' ,
113+ 'total_length_gt_5000_bp' ,
114+ 'total_length_gt_10000_bp' ,
115+ 'total_length_gt_25000_bp' ,
116+ 'total_length_gt_50000_bp' ,
117+ 'num_N_per_100_kb' ,
118+ ]
119+
120+ report = parse_transposed_quast_report (args .transposed_quast_report )
121+ writer = csv .DictWriter (sys .stdout , fieldnames = output_fieldnames )
122+ writer .writeheader ()
123+ for record in report :
124+ writer .writerow (record )
125+
126+
127+
128+ if __name__ == '__main__' :
129+ main ()
0 commit comments