Skip to content

Commit a3a2b44

Browse files
committed
Resolve more PR change requests
1 parent eeb964a commit a3a2b44

File tree

6 files changed

+65
-77
lines changed

6 files changed

+65
-77
lines changed

flatgfa/src/flatgfa.rs

Lines changed: 40 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::ops::Range;
44
use std::str::FromStr;
55

66
use crate::{
7-
packedseq::{compress_into_buffer, PackedSeqView, SeqSpan},
7+
packedseq::{PackedSeqView, SeqSpan},
88
pool::{self, Id, Pool, Span, Store},
99
};
1010
use bstr::BStr;
@@ -435,13 +435,44 @@ impl<'a, P: StoreFamily<'a>> GFAStore<'a, P> {
435435
self.header.add_slice(version);
436436
}
437437

438-
/// Add a new segment to the GFA file.
439-
pub fn add_seg(&mut self, name: usize, seq: &[u8], optional: &[u8]) -> Id<Segment> {
440-
let mut compressed: Vec<u8> = Vec::new();
441-
let end_offset = compress_into_buffer(seq, &mut compressed);
442-
let byte_span = self.seq_data.add_slice(&compressed);
438+
/// Add a new segment to the GFA file, compressing the data in `seq`
439+
pub fn compress_and_add_seg(
440+
&mut self,
441+
name: usize,
442+
seq: &[u8],
443+
optional: &[u8],
444+
) -> Id<Segment> {
445+
self.seq_data.reserve(seq.len());
446+
let mut high_nibble_end = true;
447+
let mut combined_item = 0;
448+
let start_id = self.seq_data.next_id();
449+
for i in 0..seq.len() {
450+
let item = seq[i];
451+
let converted: u8 = match item {
452+
65 => 0,
453+
67 => 1,
454+
84 => 2,
455+
71 => 3,
456+
78 => 4,
457+
_ => panic!("Not a Nucleotide!"),
458+
};
459+
if high_nibble_end {
460+
if i == seq.len() - 1 {
461+
self.seq_data.add(converted);
462+
break;
463+
}
464+
combined_item = converted;
465+
high_nibble_end = false;
466+
} else {
467+
combined_item |= converted << 4;
468+
self.seq_data.add(combined_item);
469+
high_nibble_end = true;
470+
}
471+
}
472+
let end_id = self.seq_data.next_id();
473+
let byte_span = Span::new(start_id, end_id);
443474
let start = SeqSpan::to_logical(byte_span.start.index(), false);
444-
let end = SeqSpan::to_logical(byte_span.end.index() - 1, end_offset) + 1;
475+
let end = SeqSpan::to_logical(byte_span.end.index() - 1, high_nibble_end) + 1;
445476
self.segs.add(Segment {
446477
name,
447478
seq: SeqSpan {
@@ -453,21 +484,13 @@ impl<'a, P: StoreFamily<'a>> GFAStore<'a, P> {
453484
}
454485

455486
/// Add a new segment with already compressed data
456-
pub fn add_seg_already_compressed(
457-
&mut self,
458-
name: usize,
459-
seq: PackedSeqView,
460-
optional: &[u8],
461-
) -> Id<Segment> {
487+
pub fn add_seg(&mut self, name: usize, seq: PackedSeqView, optional: &[u8]) -> Id<Segment> {
462488
let byte_span = self.seq_data.add_slice(seq.data);
463489
let start = SeqSpan::to_logical(byte_span.start.index(), seq.high_nibble_begin);
464490
let end = SeqSpan::to_logical(byte_span.end.index() - 1, seq.high_nibble_end) + 1;
465491
self.segs.add(Segment {
466492
name,
467-
seq: SeqSpan {
468-
start,
469-
len: (end - start) as u16,
470-
},
493+
seq: (start as usize..end as usize).into(),
471494
optional: self.optional_data.add_slice(optional),
472495
})
473496
}

flatgfa/src/ops/chop.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
use std::ops::Range;
2+
13
use crate::flatgfa::{self, Handle, Link, Orientation, Path, Segment};
24
use crate::packedseq::SeqSpan;
35
use crate::pool::{Id, Span, Store};
@@ -35,7 +37,7 @@ pub fn chop(gfa: &flatgfa::FlatGFA, max_size: usize, incl_links: bool) -> flatgf
3537
max_node_id += 1;
3638
seg_map.push(Span::new(id, flat.segs.next_id()));
3739
} else {
38-
let seq_range = seg.seq.to_range();
40+
let seq_range: Range<usize> = seg.seq.into();
3941
let seq_end = seq_range.end;
4042
let mut offset = seq_range.start;
4143
let segs_start = flat.segs.next_id();
@@ -46,11 +48,11 @@ pub fn chop(gfa: &flatgfa::FlatGFA, max_size: usize, incl_links: bool) -> flatgf
4648
// Generate a new segment of length c
4749
flat.segs.add(Segment {
4850
name: max_node_id,
49-
seq: SeqSpan::from_range(std::ops::Range {
50-
// Note for reviwer: Change made here
51+
seq: std::ops::Range {
5152
start: offset,
5253
end: offset + max_size,
53-
}),
54+
}
55+
.into(),
5456
optional: Span::new_empty(),
5557
});
5658
offset += max_size;
@@ -59,11 +61,11 @@ pub fn chop(gfa: &flatgfa::FlatGFA, max_size: usize, incl_links: bool) -> flatgf
5961
// Generate the last segment
6062
flat.segs.add(Segment {
6163
name: max_node_id,
62-
seq: SeqSpan::from_range(std::ops::Range {
63-
// Note for reviwer: Change made here
64+
seq: std::ops::Range {
6465
start: offset,
6566
end: seq_end,
66-
}),
67+
}
68+
.into(),
6769
optional: Span::new_empty(),
6870
});
6971
max_node_id += 1;

flatgfa/src/ops/extract.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ impl<'a> SubgraphBuilder<'a> {
3636
/// Add a segment from the source graph to this subgraph.
3737
fn include_seg(&mut self, seg_id: Id<Segment>) {
3838
let seg = &self.old.segs[seg_id];
39-
let new_seg_id = self.store.add_seg_already_compressed(
39+
let new_seg_id = self.store.add_seg(
4040
// Note for reviwer, change made here
4141
seg.name,
4242
self.old.get_seq(seg),

flatgfa/src/packedseq.rs

Lines changed: 2 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,7 @@ impl<'a> PackedSeqView<'a> {
284284
/// Given a pool of compressed data (`pool`), create a PackedSeqView in the range of `span`
285285
///
286286
pub fn from_pool(pool: Pool<'a, u8>, span: SeqSpan) -> Self {
287-
let slice = &pool.all()[span.start_byte_index()..span.end_byte_index()];
287+
let slice = &pool.all()[span.byte_range()];
288288
Self {
289289
data: slice,
290290
high_nibble_begin: span.get_nibble_begin(),
@@ -443,37 +443,12 @@ impl SeqSpan {
443443
self.len == 0
444444
}
445445

446-
/// Given `range`, returns the equivalent SeqSpan
447-
pub fn from_range(range: Range<usize>) -> Self {
448-
Self {
449-
start: range.start as u32,
450-
len: (range.end - range.start) as u16,
451-
}
452-
}
453-
454-
/// Returns the range that is equivalent to this SeqSpan
455-
pub fn to_range(&self) -> Range<usize> {
456-
Range {
457-
start: self.start as usize,
458-
end: self.end(),
459-
}
460-
}
461-
462446
// Returns the logical index of the element given the byte index and nibble offset
463447
pub fn to_logical(byte_index: usize, end_offset: bool) -> u32 {
464448
(byte_index * 2 + end_offset as usize) as u32
465449
}
466450

467-
// Returns the index of the starting byte
468-
pub fn start_byte_index(&self) -> usize {
469-
(self.start / 2) as usize
470-
}
471-
472-
// Returns the index one greater than the end byte index
473-
pub fn end_byte_index(&self) -> usize {
474-
self.end().div_ceil(2)
475-
}
476-
451+
// Returns a range of the bytes covered by this SeqSpan
477452
pub fn byte_range(&self) -> Range<usize> {
478453
Range {
479454
start: (self.start / 2) as usize,
@@ -549,30 +524,6 @@ pub fn export(seq: PackedSeqView, filename: &str) {
549524
seq.write_file(&mut mem);
550525
}
551526

552-
/// Takes a slice of uncompressed ASCII-encoded base pairs, compresses them and pushes them into `output`
553-
pub fn compress_into_buffer(input: &[u8], output: &mut Vec<u8>) -> bool {
554-
let mut high_nibble_end = true;
555-
for item in input {
556-
let converted: u8 = match item {
557-
65 => 0,
558-
67 => 1,
559-
84 => 2,
560-
71 => 3,
561-
78 => 4,
562-
_ => panic!("Not a Nucleotide!"),
563-
};
564-
if high_nibble_end {
565-
output.push(converted);
566-
high_nibble_end = false;
567-
} else {
568-
let last_index = output.len() - 1;
569-
output[last_index] |= converted << 4;
570-
high_nibble_end = true;
571-
}
572-
}
573-
high_nibble_end
574-
}
575-
576527
/// Takes a slice of compressed base pairs, decompresses them and pushes them into `output`
577528
pub fn decompress_into_buffer(input: PackedSeqView, output: &mut Vec<u8>) {
578529
if !input.high_nibble_begin {

flatgfa/src/parse.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@ impl<'a, P: flatgfa::StoreFamily<'a>> Parser<'a, P> {
136136
}
137137

138138
fn add_seg(&mut self, seg: gfaline::Segment) {
139-
let seg_id = self.flat.add_seg(seg.name, seg.seq, seg.data);
139+
let seg_id = self.flat.compress_and_add_seg(seg.name, seg.seq, seg.data);
140140
self.seg_ids.insert(seg.name, seg_id);
141141
}
142142

flatgfa/src/pool.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,9 @@ pub trait Store<T: Clone> {
147147
fn next_id(&self) -> Id<T> {
148148
Id::new(self.len())
149149
}
150+
151+
/// Reserve space for directly adding elements
152+
fn reserve(&mut self, num_elems: usize);
150153
}
151154

152155
/// A store that uses a `Vec` to allocate objects on the heap.
@@ -181,6 +184,10 @@ impl<T: Clone> Store<T> for HeapStore<T> {
181184
fn len(&self) -> usize {
182185
self.0.len()
183186
}
187+
188+
fn reserve(&mut self, num_elems: usize) {
189+
self.0.reserve(num_elems);
190+
}
184191
}
185192

186193
impl<T> Default for HeapStore<T> {
@@ -223,6 +230,11 @@ impl<T: Clone> Store<T> for FixedStore<'_, T> {
223230
fn len(&self) -> usize {
224231
self.0.len()
225232
}
233+
234+
fn reserve(&mut self, num_elems: usize) {
235+
let required_capacity = self.len() + num_elems;
236+
assert!(self.capacity() >= required_capacity);
237+
}
226238
}
227239

228240
impl<T> FixedStore<'_, T> {

0 commit comments

Comments
 (0)