@@ -5,6 +5,7 @@ use tracing::info;
5
5
use crate :: {
6
6
cli:: DescribeArgs ,
7
7
fastq:: { self , Record } ,
8
+ metrics,
8
9
} ;
9
10
10
11
pub fn describe ( args : DescribeArgs ) -> io:: Result < ( ) > {
@@ -13,154 +14,19 @@ pub fn describe(args: DescribeArgs) -> io::Result<()> {
13
14
let mut reader = fastq:: open ( args. src ) ?;
14
15
let mut record = Record :: default ( ) ;
15
16
16
- let mut metrics = Metrics :: default ( ) ;
17
+ let mut metrics = metrics :: default ( ) ;
17
18
18
19
while reader. read_record ( & mut record) ? != 0 {
19
- visit ( & mut metrics, & record) ?;
20
- }
21
-
22
- print_metrics ( & metrics) ;
23
-
24
- info ! ( "done" ) ;
25
-
26
- Ok ( ( ) )
27
- }
28
-
29
- #[ derive( Clone , Copy , Default ) ]
30
- struct ErrorProbability {
31
- sum : f64 ,
32
- count : u64 ,
33
- }
34
-
35
- struct Metrics {
36
- record_count : u64 ,
37
- min_sequence_length : usize ,
38
- max_sequence_length : usize ,
39
- error_probability_sums_per_position : Vec < ErrorProbability > ,
40
- }
41
-
42
- impl Default for Metrics {
43
- fn default ( ) -> Self {
44
- Self {
45
- record_count : 0 ,
46
- min_sequence_length : usize:: MAX ,
47
- max_sequence_length : usize:: MIN ,
48
- error_probability_sums_per_position : Vec :: new ( ) ,
20
+ for metric in & mut metrics {
21
+ metric. visit ( & record) ?;
49
22
}
50
23
}
51
- }
52
-
53
- fn visit ( metrics : & mut Metrics , record : & Record ) -> io:: Result < ( ) > {
54
- metrics. record_count += 1 ;
55
24
56
- let read_length = record. sequence ( ) . len ( ) ;
57
-
58
- metrics. min_sequence_length = metrics. min_sequence_length . min ( read_length) ;
59
- metrics. max_sequence_length = metrics. max_sequence_length . max ( read_length) ;
60
-
61
- if read_length > metrics. error_probability_sums_per_position . len ( ) {
62
- metrics
63
- . error_probability_sums_per_position
64
- . resize ( read_length, ErrorProbability :: default ( ) ) ;
25
+ for metric in & metrics {
26
+ metric. println ( ) ;
65
27
}
66
28
67
- for ( quality_score, error_probability) in record
68
- . quality_scores ( )
69
- . iter ( )
70
- . zip ( & mut metrics. error_probability_sums_per_position )
71
- {
72
- let q = decode_score ( * quality_score) ?;
73
- let p = phred_score_to_error_probability ( q) ;
74
- error_probability. sum += p;
75
- error_probability. count += 1 ;
76
- }
29
+ info ! ( "done" ) ;
77
30
78
31
Ok ( ( ) )
79
32
}
80
-
81
- fn decode_score ( c : u8 ) -> io:: Result < u8 > {
82
- const OFFSET : u8 = b'!' ;
83
-
84
- c. checked_sub ( OFFSET )
85
- . ok_or_else ( || io:: Error :: new ( io:: ErrorKind :: InvalidData , "invalid quality score" ) )
86
- }
87
-
88
- // https://en.wikipedia.org/wiki/Phred_quality_score#Definition
89
- const BASE : f64 = 10.0 ;
90
- const FACTOR : f64 = 10.0 ;
91
-
92
- fn phred_score_to_error_probability ( n : u8 ) -> f64 {
93
- BASE . powf ( -f64:: from ( n) / FACTOR )
94
- }
95
-
96
- fn error_probability_to_phred_score ( p : f64 ) -> f64 {
97
- -FACTOR * p. log10 ( )
98
- }
99
-
100
- fn print_metrics ( metrics : & Metrics ) {
101
- let record_count = metrics. record_count ;
102
-
103
- println ! ( "record_count\t {record_count}" ) ;
104
-
105
- let min_sequence_length = if record_count == 0 {
106
- 0
107
- } else {
108
- metrics. min_sequence_length
109
- } ;
110
-
111
- println ! ( "min_sequence_length\t {min_sequence_length}" ) ;
112
-
113
- let max_sequence_length = if record_count == 0 {
114
- 0
115
- } else {
116
- metrics. max_sequence_length
117
- } ;
118
-
119
- println ! ( "max_sequence_length\t {max_sequence_length}" ) ;
120
-
121
- let avg_quality_score_per_position: Vec < _ > = metrics
122
- . error_probability_sums_per_position
123
- . iter ( )
124
- . map ( |error_probability| {
125
- let n = error_probability. count as f64 ;
126
- let avg_error_probability = error_probability. sum / n;
127
- error_probability_to_phred_score ( avg_error_probability)
128
- } )
129
- . collect ( ) ;
130
-
131
- println ! ( "avg_quality_score_per_position\t {avg_quality_score_per_position:?}" ) ;
132
- }
133
-
134
- #[ cfg( test) ]
135
- mod tests {
136
- use super :: * ;
137
-
138
- #[ test]
139
- fn test_decode_score ( ) -> io:: Result < ( ) > {
140
- assert_eq ! ( decode_score( b'!' ) ?, 0 ) ;
141
- assert_eq ! ( decode_score( b'~' ) ?, 93 ) ;
142
- assert ! ( matches!(
143
- decode_score( 0x00 ) ,
144
- Err ( e) if e. kind( ) == io:: ErrorKind :: InvalidData
145
- ) ) ;
146
- Ok ( ( ) )
147
- }
148
-
149
- #[ test]
150
- fn test_phred_score_to_error_probability ( ) {
151
- assert_eq ! ( phred_score_to_error_probability( 0 ) , 1.0 ) ;
152
- assert_eq ! ( phred_score_to_error_probability( 10 ) , 0.1 ) ;
153
- assert_eq ! ( phred_score_to_error_probability( 20 ) , 0.01 ) ;
154
- assert_eq ! ( phred_score_to_error_probability( 30 ) , 0.001 ) ;
155
- assert_eq ! ( phred_score_to_error_probability( 40 ) , 0.0001 ) ;
156
- }
157
-
158
- #[ test]
159
- fn test_error_probability_to_phred_score ( ) {
160
- assert_eq ! ( error_probability_to_phred_score( 1.0 ) , 0.0 ) ;
161
- assert_eq ! ( error_probability_to_phred_score( 0.1 ) , 10.0 ) ;
162
- assert_eq ! ( error_probability_to_phred_score( 0.01 ) , 20.0 ) ;
163
- assert_eq ! ( error_probability_to_phred_score( 0.001 ) , 30.0 ) ;
164
- assert_eq ! ( error_probability_to_phred_score( 0.0001 ) , 40.0 ) ;
165
- }
166
- }
0 commit comments