@@ -12,18 +12,25 @@ pub fn describe(args: DescribeArgs) -> io::Result<()> {
12
12
let mut metrics = Metrics :: default ( ) ;
13
13
14
14
while reader. read_record ( & mut record) ? != 0 {
15
- visit ( & mut metrics, & record) ;
15
+ visit ( & mut metrics, & record) ? ;
16
16
}
17
17
18
18
print_metrics ( & metrics) ;
19
19
20
20
Ok ( ( ) )
21
21
}
22
22
23
+ #[ derive( Clone , Copy , Default ) ]
24
+ struct ErrorProbability {
25
+ sum : f64 ,
26
+ count : u64 ,
27
+ }
28
+
23
29
struct Metrics {
24
30
record_count : u64 ,
25
31
min_sequence_length : usize ,
26
32
max_sequence_length : usize ,
33
+ error_probability_sums_per_position : Vec < ErrorProbability > ,
27
34
}
28
35
29
36
impl Default for Metrics {
@@ -32,17 +39,56 @@ impl Default for Metrics {
32
39
record_count : 0 ,
33
40
min_sequence_length : usize:: MAX ,
34
41
max_sequence_length : usize:: MIN ,
42
+ error_probability_sums_per_position : Vec :: new ( ) ,
35
43
}
36
44
}
37
45
}
38
46
39
- fn visit ( metrics : & mut Metrics , record : & Record ) {
47
+ fn visit ( metrics : & mut Metrics , record : & Record ) -> io :: Result < ( ) > {
40
48
metrics. record_count += 1 ;
41
49
42
50
let read_length = record. sequence ( ) . len ( ) ;
43
51
44
52
metrics. min_sequence_length = metrics. min_sequence_length . min ( read_length) ;
45
53
metrics. max_sequence_length = metrics. max_sequence_length . max ( read_length) ;
54
+
55
+ if read_length > metrics. error_probability_sums_per_position . len ( ) {
56
+ metrics
57
+ . error_probability_sums_per_position
58
+ . resize ( read_length, ErrorProbability :: default ( ) ) ;
59
+ }
60
+
61
+ for ( quality_score, error_probability) in record
62
+ . quality_scores ( )
63
+ . iter ( )
64
+ . zip ( & mut metrics. error_probability_sums_per_position )
65
+ {
66
+ let q = decode_score ( * quality_score) ?;
67
+ let p = phred_score_to_error_probability ( q) ;
68
+ error_probability. sum += p;
69
+ error_probability. count += 1 ;
70
+ }
71
+
72
+ Ok ( ( ) )
73
+ }
74
+
75
+ fn decode_score ( c : u8 ) -> io:: Result < u8 > {
76
+ const OFFSET : u8 = b'!' ;
77
+
78
+ c. checked_sub ( OFFSET )
79
+ . ok_or_else ( || io:: Error :: new ( io:: ErrorKind :: InvalidData , "invalid quality score" ) )
80
+ }
81
+
82
+ // https://en.wikipedia.org/wiki/Phred_quality_score#Definition
83
+ const BASE : f64 = 10.0 ;
84
+ const FACTOR : f64 = 10.0 ;
85
+
86
+ fn phred_score_to_error_probability ( n : u8 ) -> f64 {
87
+ BASE . powf ( -f64:: from ( n) / FACTOR )
88
+ }
89
+
90
+ fn error_probability_to_phred_score ( p : f64 ) -> f64 {
91
+ -FACTOR * p. log10 ( )
46
92
}
47
93
48
94
fn print_metrics ( metrics : & Metrics ) {
@@ -65,4 +111,50 @@ fn print_metrics(metrics: &Metrics) {
65
111
} ;
66
112
67
113
println ! ( "max_sequence_length\t {max_sequence_length}" ) ;
114
+
115
+ let avg_quality_score_per_position: Vec < _ > = metrics
116
+ . error_probability_sums_per_position
117
+ . iter ( )
118
+ . map ( |error_probability| {
119
+ let n = error_probability. count as f64 ;
120
+ let avg_error_probability = error_probability. sum / n;
121
+ error_probability_to_phred_score ( avg_error_probability)
122
+ } )
123
+ . collect ( ) ;
124
+
125
+ println ! ( "avg_quality_score_per_position\t {avg_quality_score_per_position:?}" ) ;
126
+ }
127
+
128
+ #[ cfg( test) ]
129
+ mod tests {
130
+ use super :: * ;
131
+
132
+ #[ test]
133
+ fn test_decode_score ( ) -> io:: Result < ( ) > {
134
+ assert_eq ! ( decode_score( b'!' ) ?, 0 ) ;
135
+ assert_eq ! ( decode_score( b'~' ) ?, 93 ) ;
136
+ assert ! ( matches!(
137
+ decode_score( 0x00 ) ,
138
+ Err ( e) if e. kind( ) == io:: ErrorKind :: InvalidData
139
+ ) ) ;
140
+ Ok ( ( ) )
141
+ }
142
+
143
+ #[ test]
144
+ fn test_phred_score_to_error_probability ( ) {
145
+ assert_eq ! ( phred_score_to_error_probability( 0 ) , 1.0 ) ;
146
+ assert_eq ! ( phred_score_to_error_probability( 10 ) , 0.1 ) ;
147
+ assert_eq ! ( phred_score_to_error_probability( 20 ) , 0.01 ) ;
148
+ assert_eq ! ( phred_score_to_error_probability( 30 ) , 0.001 ) ;
149
+ assert_eq ! ( phred_score_to_error_probability( 40 ) , 0.0001 ) ;
150
+ }
151
+
152
+ #[ test]
153
+ fn test_error_probability_to_phred_score ( ) {
154
+ assert_eq ! ( error_probability_to_phred_score( 1.0 ) , 0.0 ) ;
155
+ assert_eq ! ( error_probability_to_phred_score( 0.1 ) , 10.0 ) ;
156
+ assert_eq ! ( error_probability_to_phred_score( 0.01 ) , 20.0 ) ;
157
+ assert_eq ! ( error_probability_to_phred_score( 0.001 ) , 30.0 ) ;
158
+ assert_eq ! ( error_probability_to_phred_score( 0.0001 ) , 40.0 ) ;
159
+ }
68
160
}
0 commit comments