Skip to content

Commit baefa86

Browse files
committed
fixes
1 parent 877f21c commit baefa86

File tree

5 files changed

+62
-52
lines changed

5 files changed

+62
-52
lines changed

Cargo.lock

-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,6 @@ serde-aux = "4.1.2"
4949
seq_io = "0.3.1"
5050
thiserror = "1.0.37"
5151
proglog = {version = "0.3.0", features = ["pretty_counts"] }
52-
lazy_static = "1.5.0"
5352

5453
[dev-dependencies]
5554
csv = "1.1.6"

rust-toolchain.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
[toolchain]
2-
channel = "nightly"
2+
channel = "1.85"
33
components = ["rustfmt", "clippy"]

src/lib/bitenc.rs

+18-8
Original file line numberDiff line numberDiff line change
@@ -424,19 +424,28 @@ impl BitEnc {
424424
/// corresponding (IUPAC) base in this sequence. E.g. If the other sequence is an
425425
/// N, it will not match anything but an N, and if the other base is an R, it
426426
/// will match R, V, D, and N, since the latter IUPAC codes allow both A and G.
427+
///
428+
/// # Panics
429+
///
430+
/// Panics if the length and widths of the two sequences are not the same.
431+
#[must_use]
427432
pub fn hamming(&self, other: &BitEnc, max_mismatches: u32) -> u32 {
428433
assert!(self.len == other.len, "Both bitenc sequences must have the same length");
429434
assert!(self.width == other.width, "Both bitenc sequences must have the same width");
430435
let mut count: u32 = 0;
431436
let values_per_block = self.usable_bits_per_block / self.width;
432437
for block_index in 0..self.nr_blocks() {
433-
let intersection = self.storage[block_index] & other.storage[block_index];
434-
if intersection != self.storage[block_index] {
438+
// Get the bits that are different across the two blocks. These represent differences
439+
// in values (bases), where multiple values (bases) are stored in each block. We
440+
// save the block differences so we efficiently count the differences below.
441+
let block_diff = self.storage[block_index] & !other.storage[block_index];
442+
if block_diff != 0 {
443+
// Scan through the values (bases) in the block, counting those values (bases)
444+
// that are different.
435445
let mut shift_i = 0;
436446
for _ in 0..values_per_block {
437-
let intersection_sub = (intersection >> shift_i) & self.mask;
438-
let self_sub = (self.storage[block_index] >> shift_i) & self.mask;
439-
if intersection_sub != self_sub {
447+
let block_diff_sub = (block_diff >> shift_i) & self.mask;
448+
if block_diff_sub != 0 {
440449
count += 1;
441450
}
442451
shift_i += self.width;
@@ -528,9 +537,10 @@ mod tests {
528537
// | 42 42 42 42 | 17 17 23 42 | 17 17 17 17 | 17 17 17 17 | __ __ 17 17 |
529538

530539
let values: Vec<u8> = bitenc.iter().collect();
531-
assert_eq!(values, [
532-
42, 42, 42, 42, 42, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17
533-
]);
540+
assert_eq!(
541+
values,
542+
[42, 42, 42, 42, 42, 23, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17]
543+
);
534544
assert_eq!(bitenc.nr_blocks(), 5);
535545
assert_eq!(bitenc.nr_symbols(), 18);
536546
}

src/lib/mod.rs

+43-41
Original file line numberDiff line numberDiff line change
@@ -3,50 +3,48 @@ pub mod bitenc;
33
pub mod samples;
44

55
use crate::bitenc::BitEnc;
6-
use lazy_static::lazy_static;
6+
use std::sync::LazyLock;
77

88
pub const DNA_BASES: [u8; 5] = *b"ACGTN";
99
pub const IUPAC_BASES: [u8; 15] = *b"ACGTMRWSYKVHDBN";
1010

11-
lazy_static! {
12-
pub static ref BASE_A: usize = 1;
13-
pub static ref BASE_C: usize = 2;
14-
pub static ref BASE_G: usize = 4;
15-
pub static ref BASE_T: usize = 8;
16-
pub static ref BASE_N: usize = 15;
17-
pub static ref DNA_MASKS: [u8; 256] = {
18-
let mut masks = [0; 256];
19-
let (a, c, g, t) = (1, 2, 4, 8);
20-
masks['A' as usize] = a;
21-
masks['C' as usize] = c;
22-
masks['G' as usize] = g;
23-
masks['T' as usize] = t;
24-
masks['U' as usize] = t;
25-
masks['N' as usize] = a | c | g | t;
26-
masks
27-
};
28-
pub static ref IUPAC_MASKS: [u8; 256] = {
29-
let mut masks = [0; 256];
30-
let (a, c, g, t) = (1, 2, 4, 8);
31-
masks['A' as usize] = a;
32-
masks['C' as usize] = c;
33-
masks['G' as usize] = g;
34-
masks['T' as usize] = t;
35-
masks['U' as usize] = t;
36-
masks['M' as usize] = a | c;
37-
masks['R' as usize] = a | g;
38-
masks['W' as usize] = a | t;
39-
masks['S' as usize] = c | g;
40-
masks['Y' as usize] = c | t;
41-
masks['K' as usize] = g | t;
42-
masks['V' as usize] = a | c | g;
43-
masks['H' as usize] = a | c | t;
44-
masks['D' as usize] = a | g | t;
45-
masks['B' as usize] = c | g | t;
46-
masks['N' as usize] = a | c | g | t;
47-
masks
48-
};
49-
}
11+
pub static BASE_A: LazyLock<usize> = LazyLock::new(|| 1);
12+
pub static BASE_C: LazyLock<usize> = LazyLock::new(|| 2);
13+
pub static BASE_G: LazyLock<usize> = LazyLock::new(|| 4);
14+
pub static BASE_T: LazyLock<usize> = LazyLock::new(|| 8);
15+
pub static BASE_N: LazyLock<usize> = LazyLock::new(|| 15);
16+
pub static DNA_MASKS: LazyLock<[u8; 256]> = LazyLock::new(|| {
17+
let mut masks = [0; 256];
18+
let (a, c, g, t) = (1, 2, 4, 8);
19+
masks['A' as usize] = a;
20+
masks['C' as usize] = c;
21+
masks['G' as usize] = g;
22+
masks['T' as usize] = t;
23+
masks['U' as usize] = t;
24+
masks['N' as usize] = a | c | g | t;
25+
masks
26+
});
27+
pub static IUPAC_MASKS: LazyLock<[u8; 256]> = LazyLock::new(|| {
28+
let mut masks = [0; 256];
29+
let (a, c, g, t) = (1, 2, 4, 8);
30+
masks['A' as usize] = a;
31+
masks['C' as usize] = c;
32+
masks['G' as usize] = g;
33+
masks['T' as usize] = t;
34+
masks['U' as usize] = t;
35+
masks['M' as usize] = a | c;
36+
masks['R' as usize] = a | g;
37+
masks['W' as usize] = a | t;
38+
masks['S' as usize] = c | g;
39+
masks['Y' as usize] = c | t;
40+
masks['K' as usize] = g | t;
41+
masks['V' as usize] = a | c | g;
42+
masks['H' as usize] = a | c | t;
43+
masks['D' as usize] = a | g | t;
44+
masks['B' as usize] = c | g | t;
45+
masks['N' as usize] = a | c | g | t;
46+
masks
47+
});
5048

5149
#[must_use]
5250
pub fn encode(bases: &[u8]) -> BitEnc {
@@ -56,7 +54,11 @@ pub fn encode(bases: &[u8]) -> BitEnc {
5654
IUPAC_MASKS[b'N' as usize]
5755
} else {
5856
let value = base.to_ascii_uppercase() as usize;
59-
if value < 256 { IUPAC_MASKS[value] } else { 0 }
57+
if value < 256 {
58+
IUPAC_MASKS[value]
59+
} else {
60+
0
61+
}
6062
};
6163
vec.push(bit);
6264
}

0 commit comments

Comments
 (0)