@@ -9,14 +9,17 @@ def parse_seqtk_fqchk_output(seqtk_fqchk_output_path, quality_threshold):
9
9
with open (seqtk_fqchk_output_path , 'r' ) as f :
10
10
reader = csv .DictReader (f )
11
11
for row in reader :
12
- num_bases_and_avg_q = {}
12
+ parsed_row = {}
13
13
if row ['position' ] == 'ALL' :
14
14
percent_above_header = 'percent_bases_above_q' + str (quality_threshold )
15
- num_bases_and_avg_q ['num_bases' ] = int (row ['num_bases' ])
16
- num_bases_and_avg_q ['average_q' ] = float (row ['average_q' ])
17
- num_bases_and_avg_q [percent_above_header ] = float (row [percent_above_header ])
18
- output .append (num_bases_and_avg_q )
19
-
15
+ parsed_row ['num_bases' ] = int (row ['num_bases' ])
16
+ parsed_row ['average_q' ] = float (row ['average_q' ])
17
+ parsed_row [percent_above_header ] = float (row [percent_above_header ])
18
+ percent_g = float (row ['percent_g' ])
19
+ percent_c = float (row ['percent_c' ])
20
+ parsed_row ['percent_gc' ] = percent_g + percent_c
21
+ output .append (parsed_row )
22
+
20
23
return output
21
24
22
25
@@ -34,6 +37,8 @@ def main(args):
34
37
35
38
total_bases = sum ([x ['num_bases' ] for x in seqtk_fqchk_output ])
36
39
40
+ overall_percent_gc = sum ([x ['percent_gc' ] * x ['num_bases' ] for x in seqtk_fqchk_output ]) / total_bases
41
+
37
42
overall_average_q = sum ([x ['average_q' ] * x ['num_bases' ] for x in seqtk_fqchk_output ]) / total_bases
38
43
39
44
percent_above_header = 'percent_bases_above_q' + str (quality_threshold )
@@ -42,13 +47,15 @@ def main(args):
42
47
43
48
print (',' .join ([
44
49
'sample_id' ,
50
+ 'percent_gc' ,
45
51
'total_bases' ,
46
52
'average_base_quality' ,
47
53
percent_above_header ,
48
54
]))
49
55
50
56
print (',' .join ([
51
57
args .sample_id ,
58
+ str (round (overall_percent_gc , 3 )),
52
59
str (total_bases ),
53
60
str (round (overall_average_q , 3 )),
54
61
str (round (overall_percent_above_threshold , 3 )),
0 commit comments