8
8
def parse_read_summary (summary_path ):
9
9
read_summary_headers = []
10
10
read_summary_lines = []
11
- # Basic approach to parsing text between two specific lines
12
- # described here: https://stackoverflow.com/a/7559542/780188
11
+
12
+ replaced_fields = {'%>=q30' : 'percent_greater_than_q30' ,
13
+ '%_occupied' : 'percent_occupied' }
14
+
13
15
with open (summary_path ) as summary :
14
16
for line in summary :
15
17
if re .match ("^Level" , line ):
16
18
read_summary_headers = re .split ("\s*," , line .rstrip ())
17
19
read_summary_headers = [
18
20
x .lower ().replace (" " , "_" ) for x in read_summary_headers
19
21
]
20
- read_summary_headers = [
21
- x .replace ("%>=q30" , "percent_greater_than_q30" ) for x in read_summary_headers
22
- ]
22
+ for idx , header in enumerate (read_summary_headers ):
23
+ if header in replaced_fields :
24
+ read_summary_headers [idx ] = replaced_fields [header ]
25
+
23
26
break
24
27
for line in summary :
25
28
if re .match ("^Total" , line ):
26
- read_summary_lines .append (re .split ("\s* ," , line .rstrip ()))
29
+ read_summary_lines .append (re .split ("," , line .rstrip ()))
27
30
break
28
- read_summary_lines .append (re .split ("\s*," , line .rstrip ()))
31
+ else :
32
+ read_summary_lines .append (re .split ("," , line .rstrip ()))
29
33
30
34
read_summary = []
31
35
for line in read_summary_lines :
@@ -41,14 +45,15 @@ def parse_read_summary(summary_path):
41
45
42
46
return read_summary
43
47
48
+
44
49
def parse_read_summary_detail (summary_path ):
45
50
headers = [
46
51
'lane' ,
47
52
'surface' ,
48
53
'tiles' ,
49
54
'density' ,
50
55
'clusters_passing_filter' ,
51
- 'legacy_pasing_prephasing_rate ' ,
56
+ 'legacy_phasing_prephasing_rate ' ,
52
57
'phasing_slope_offset' ,
53
58
'prephasing_slope_offset' ,
54
59
'reads' ,
@@ -64,6 +69,31 @@ def parse_read_summary_detail(summary_path):
64
69
'percent_occupied' ,
65
70
'intensity_at_cycle_1' ,
66
71
]
72
+ average_stdev_fields = [
73
+ 'aligned' ,
74
+ 'clusters_passing_filter' ,
75
+ 'density' ,
76
+ 'error' ,
77
+ 'error_100' ,
78
+ 'error_75' ,
79
+ 'error_35' ,
80
+ 'intensity_at_cycle_1' ,
81
+ 'percent_occupied' ,
82
+ ]
83
+ slash_fields = { 'legacy_phasing_prephasing_rate' : {'numerator_field' : 'legacy_phasing_rate' ,
84
+ 'denominator_field' : 'legacy_prephasing_rate' },
85
+ 'phasing_slope_offset' : {'numerator_field' : 'phasing_slope' ,
86
+ 'denominator_field' : 'phasing_offset' },
87
+ 'prephasing_slope_offset' : {'numerator_field' : 'prephasing_slope' ,
88
+ 'denominator_field' : 'prephasing_offset' },
89
+ }
90
+ float_fields = [
91
+ 'percent_greater_than_q30' ,
92
+ 'reads' ,
93
+ 'reads_passing_filter' ,
94
+ 'yield' ,
95
+ ]
96
+
67
97
lines_by_read = {
68
98
'read_1' : [],
69
99
'read_i1' : [],
@@ -73,15 +103,13 @@ def parse_read_summary_detail(summary_path):
73
103
with open (summary_path ) as summary :
74
104
current_read = None
75
105
for line in summary :
76
- if re .match ("^Read 1$" , line ):
106
+ if re .match ("^Read 1\n $" , line ):
77
107
current_read = 'read_1'
78
- elif re .match ("^Read 2 \(I\)$" , line ):
108
+ elif re .match ("^Read 2 \(I\)\n $" , line ):
79
109
current_read = 'read_i1'
80
- elif re .match ("^Read 3 \(I\)$" , line ):
110
+ elif re .match ("^Read 3 \(I\)\n $" , line ):
81
111
current_read = 'read_i2'
82
- elif re .match ("^Read 4$" , line ):
83
- current_read = 'read_2'
84
- elif re .match ("^Read 4$" , line ):
112
+ elif re .match ("^Read 4$\n " , line ):
85
113
current_read = 'read_2'
86
114
elif re .match ("^Extracted" , line ) or re .match ("^Called" , line ) or re .match ("^Scored" , line ):
87
115
current_read = None
@@ -91,15 +119,33 @@ def parse_read_summary_detail(summary_path):
91
119
lines_by_read [current_read ].append (read_line_dict )
92
120
else :
93
121
pass
122
+
123
+ for field in average_stdev_fields :
124
+ string_value = read_line_dict [field ]
125
+ [average , stdev ] = [float (value ) for value in string_value .split (' +/- ' )]
126
+ read_line_dict [field ] = { 'average' : average ,
127
+ 'stdev' : stdev }
128
+
129
+ for field , num_denom in slash_fields .items ():
130
+ string_value = read_line_dict [field ]
131
+ numerator_field = num_denom ['numerator_field' ]
132
+ denominator_field = num_denom ['denominator_field' ]
133
+ [numerator , denominator ] = [float (value ) for value in string_value .split (' / ' )]
134
+ read_line_dict [numerator_field ] = numerator
135
+ read_line_dict [denominator_field ] = denominator
136
+ read_line_dict .pop (field , None )
94
137
95
138
return lines_by_read
96
139
140
+
97
141
def main (args ):
98
142
read_summary = parse_read_summary (args .summary )
99
143
read_summary_detail = parse_read_summary_detail (args .summary )
100
144
145
+ output = {'read_summary' : read_summary ,
146
+ 'read_details' : read_summary_detail }
101
147
# print(json.dumps(read_summary))
102
- print (json .dumps (read_summary_detail ))
148
+ print (json .dumps (output ))
103
149
104
150
if __name__ == '__main__' :
105
151
parser = argparse .ArgumentParser ()
0 commit comments