Skip to content

Commit 54c03de

Browse files
committed
commands/describe: Track average Phred score per position
1 parent 921a9b1 commit 54c03de

File tree

1 file changed

+94
-2
lines changed

1 file changed

+94
-2
lines changed

src/commands/describe.rs

+94-2
Original file line numberDiff line numberDiff line change
@@ -12,18 +12,25 @@ pub fn describe(args: DescribeArgs) -> io::Result<()> {
1212
let mut metrics = Metrics::default();
1313

1414
while reader.read_record(&mut record)? != 0 {
15-
visit(&mut metrics, &record);
15+
visit(&mut metrics, &record)?;
1616
}
1717

1818
print_metrics(&metrics);
1919

2020
Ok(())
2121
}
2222

23+
#[derive(Clone, Copy, Default)]
24+
struct ErrorProbability {
25+
sum: f64,
26+
count: u64,
27+
}
28+
2329
struct Metrics {
2430
record_count: u64,
2531
min_sequence_length: usize,
2632
max_sequence_length: usize,
33+
error_probability_sums_per_position: Vec<ErrorProbability>,
2734
}
2835

2936
impl Default for Metrics {
@@ -32,17 +39,56 @@ impl Default for Metrics {
3239
record_count: 0,
3340
min_sequence_length: usize::MAX,
3441
max_sequence_length: usize::MIN,
42+
error_probability_sums_per_position: Vec::new(),
3543
}
3644
}
3745
}
3846

39-
fn visit(metrics: &mut Metrics, record: &Record) {
47+
fn visit(metrics: &mut Metrics, record: &Record) -> io::Result<()> {
4048
metrics.record_count += 1;
4149

4250
let read_length = record.sequence().len();
4351

4452
metrics.min_sequence_length = metrics.min_sequence_length.min(read_length);
4553
metrics.max_sequence_length = metrics.max_sequence_length.max(read_length);
54+
55+
if read_length > metrics.error_probability_sums_per_position.len() {
56+
metrics
57+
.error_probability_sums_per_position
58+
.resize(read_length, ErrorProbability::default());
59+
}
60+
61+
for (quality_score, error_probability) in record
62+
.quality_scores()
63+
.iter()
64+
.zip(&mut metrics.error_probability_sums_per_position)
65+
{
66+
let q = decode_score(*quality_score)?;
67+
let p = phred_score_to_error_probability(q);
68+
error_probability.sum += p;
69+
error_probability.count += 1;
70+
}
71+
72+
Ok(())
73+
}
74+
75+
fn decode_score(c: u8) -> io::Result<u8> {
76+
const OFFSET: u8 = b'!';
77+
78+
c.checked_sub(OFFSET)
79+
.ok_or_else(|| io::Error::new(io::ErrorKind::InvalidData, "invalid quality score"))
80+
}
81+
82+
// https://en.wikipedia.org/wiki/Phred_quality_score#Definition
83+
const BASE: f64 = 10.0;
84+
const FACTOR: f64 = 10.0;
85+
86+
fn phred_score_to_error_probability(n: u8) -> f64 {
87+
BASE.powf(-f64::from(n) / FACTOR)
88+
}
89+
90+
fn error_probability_to_phred_score(p: f64) -> f64 {
91+
-FACTOR * p.log10()
4692
}
4793

4894
fn print_metrics(metrics: &Metrics) {
@@ -65,4 +111,50 @@ fn print_metrics(metrics: &Metrics) {
65111
};
66112

67113
println!("max_sequence_length\t{max_sequence_length}");
114+
115+
let avg_quality_score_per_position: Vec<_> = metrics
116+
.error_probability_sums_per_position
117+
.iter()
118+
.map(|error_probability| {
119+
let n = error_probability.count as f64;
120+
let avg_error_probability = error_probability.sum / n;
121+
error_probability_to_phred_score(avg_error_probability)
122+
})
123+
.collect();
124+
125+
println!("avg_quality_score_per_position\t{avg_quality_score_per_position:?}");
126+
}
127+
128+
#[cfg(test)]
129+
mod tests {
130+
use super::*;
131+
132+
#[test]
133+
fn test_decode_score() -> io::Result<()> {
134+
assert_eq!(decode_score(b'!')?, 0);
135+
assert_eq!(decode_score(b'~')?, 93);
136+
assert!(matches!(
137+
decode_score(0x00),
138+
Err(e) if e.kind() == io::ErrorKind::InvalidData
139+
));
140+
Ok(())
141+
}
142+
143+
#[test]
144+
fn test_phred_score_to_error_probability() {
145+
assert_eq!(phred_score_to_error_probability(0), 1.0);
146+
assert_eq!(phred_score_to_error_probability(10), 0.1);
147+
assert_eq!(phred_score_to_error_probability(20), 0.01);
148+
assert_eq!(phred_score_to_error_probability(30), 0.001);
149+
assert_eq!(phred_score_to_error_probability(40), 0.0001);
150+
}
151+
152+
#[test]
153+
fn test_error_probability_to_phred_score() {
154+
assert_eq!(error_probability_to_phred_score(1.0), 0.0);
155+
assert_eq!(error_probability_to_phred_score(0.1), 10.0);
156+
assert_eq!(error_probability_to_phred_score(0.01), 20.0);
157+
assert_eq!(error_probability_to_phred_score(0.001), 30.0);
158+
assert_eq!(error_probability_to_phred_score(0.0001), 40.0);
159+
}
68160
}

0 commit comments

Comments
 (0)