Skip to content

Commit 0d5d820

Browse files
authored
Replace HashMaps with a bit-vector for unique depth computation (#201)
2 parents 49594a6 + f02cfe4 commit 0d5d820

File tree

4 files changed

+34
-13
lines changed

4 files changed

+34
-13
lines changed

flatgfa/Cargo.lock

Lines changed: 7 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flatgfa/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ path = "src/cli/main.rs"
1010
[dependencies]
1111
argh = "0.1.12"
1212
atoi = "2.0.0"
13+
bit-vec = "0.8.0"
1314
bstr = "1.10.0"
1415
memchr = "2.7.4"
1516
memmap = "0.7.0"

flatgfa/src/cli/cmds.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ pub fn depth(gfa: &flatgfa::FlatGFA) {
204204
"{}\t{}\t{}",
205205
name,
206206
depths[id.index()],
207-
uniq_paths[id.index()].len()
207+
uniq_paths[id.index()],
208208
);
209209
}
210210
}

flatgfa/src/ops/depth.rs

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,35 @@
11
use crate::flatgfa;
2-
use std::collections::HashSet;
2+
use bit_vec::BitVec;
33

4-
pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<HashSet<usize>>) {
5-
// Initialize node depth
4+
/// Compute the *depth* of each segment in the variation graph.
5+
///
6+
/// The depth is defined to be the number of times that a path traverses a given
7+
/// segment. We return two values: the ordinary depth and the *unique* depth,
8+
/// which only counts each path that tarverses a given segment once.
9+
///
10+
/// Both outputs are depth values indexed by segment ID.
11+
pub fn depth(gfa: &flatgfa::FlatGFA) -> (Vec<usize>, Vec<usize>) {
12+
// Our output vectors: the ordinary and unique depths of each segment.
613
let mut depths = vec![0; gfa.segs.len()];
7-
// Initialize uniq_paths
8-
let mut uniq_paths = Vec::<HashSet<usize>>::new();
9-
uniq_paths.resize(gfa.segs.len(), HashSet::new());
10-
// do not assume that each handle in `gfa.steps()` is unique
11-
for (idx, path) in gfa.paths.all().iter().enumerate() {
14+
let mut uniq_depths = vec![0; gfa.segs.len()];
15+
16+
// This bit vector keeps track of whether the current path has already
17+
// traversed a given segment, and therefore whether we should ignore
18+
// subsequent traversals (for the purpose of counting unique depth).
19+
let mut seen = BitVec::from_elem(gfa.segs.len(), false);
20+
21+
for path in gfa.paths.all().iter() {
22+
seen.clear(); // All segments are unseen.
1223
for step in &gfa.steps[path.steps] {
1324
let seg_id = step.segment().index();
14-
// Increment depths
1525
depths[seg_id] += 1;
16-
// Update uniq_paths
17-
uniq_paths[seg_id].insert(idx);
26+
if seen[seg_id] {
27+
// The first traversal of this path over this segment.
28+
uniq_depths[seg_id] += 1;
29+
seen.set(seg_id, true);
30+
}
1831
}
1932
}
2033

21-
(depths, uniq_paths)
34+
(depths, uniq_depths)
2235
}

0 commit comments

Comments
 (0)