Skip to content

Commit bb2c232

Browse files
committed
Add zerocopy interfacing
1 parent b10e4fd commit bb2c232

File tree

1 file changed

+74
-29
lines changed

1 file changed

+74
-29
lines changed

flatgfa/src/packedseq.rs

Lines changed: 74 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
1+
use crate::file::*;
2+
use crate::flatgfa;
3+
use crate::pool::*;
14
use std::fmt;
25

6+
use zerocopy::*;
7+
8+
const MAGIC_NUMBER: u64 = 0x0000_0000;
9+
310
#[derive(Debug, PartialEq, Eq, Copy, Clone)]
411
pub enum Nucleotide {
512
A,
@@ -45,34 +52,72 @@ impl From<Nucleotide> for char {
4552
/// A compressed vector-like structure for storing nucleotide sequences
4653
/// - Two base pairs are stored per byte
4754
///
48-
pub struct PackedVec {
49-
/// A vector that stores a compressed encoding of this PackedVec's sequence
55+
pub struct PackedSeqStore {
56+
/// A vector that stores a compressed encoding of this PackedSeqStore's sequence
5057
data: Vec<u8>,
5158

5259
/// True if the final base pair in the sequence is stored at a
5360
/// high nibble
5461
high_nibble_end: bool,
5562
}
5663

57-
impl PackedVec {
58-
/// Creates a new empty PackedVec
64+
#[derive(FromBytes, AsBytes, FromZeroes)]
65+
#[repr(packed)]
66+
pub struct PackedSeqView<'a> {
67+
data: Pool<'a, u8>,
68+
69+
/// True if the final base pair in the sequence is stored at a
70+
/// high nibble
71+
high_nibble_end: bool,
72+
}
73+
74+
#[derive(FromBytes, FromZeroes, AsBytes, Debug)]
75+
#[repr(packed)]
76+
pub struct PackedToc {
77+
magic: u64,
78+
size: Size,
79+
}
80+
81+
fn read_packed_toc(data: &[u8]) -> (&PackedToc, &[u8]) {
82+
let toc = PackedToc::ref_from_prefix(data).unwrap();
83+
let rest = &data[size_of::<PackedToc>()..];
84+
let magic = toc.magic;
85+
assert_eq!(magic, MAGIC_NUMBER);
86+
(toc, rest)
87+
}
88+
89+
pub fn view(data: &[u8]) -> PackedSeqView {
90+
let (toc, rest) = read_packed_toc(data);
91+
92+
let data = slice_prefix(rest, toc.size);
93+
94+
let high_nibble_end = slice_prefix(rest, 1);
95+
96+
PackedSeqView {
97+
data: data.into(),
98+
high_nibble_end: high_nibble_end.into(),
99+
}
100+
}
101+
102+
impl PackedSeqStore {
103+
/// Creates a new empty PackedSeqStore
59104
pub fn new() -> Self {
60-
PackedVec {
105+
PackedSeqStore {
61106
data: Vec::new(),
62107
high_nibble_end: true,
63108
}
64109
}
65110

66-
/// Returns a compressed PackedVec given an uncompressed vector `arr`
111+
/// Returns a compressed PackedSeqStore given an uncompressed vector `arr`
67112
pub fn create(arr: Vec<Nucleotide>) -> Self {
68-
let mut new_vec = PackedVec::new();
113+
let mut new_vec = PackedSeqStore::new();
69114
for item in arr {
70115
new_vec.push(item);
71116
}
72117
new_vec
73118
}
74119

75-
/// Appends `input` to the end of this PackedVec
120+
/// Appends `input` to the end of this PackedSeqStore
76121
pub fn push(&mut self, input: Nucleotide) {
77122
let value = input.into();
78123
assert!(value <= 0xF);
@@ -98,7 +143,7 @@ impl PackedVec {
98143
self.data.is_empty()
99144
}
100145

101-
/// Returns the element of this PackedVec at index `index`
146+
/// Returns the element of this PackedSeqStore at index `index`
102147
pub fn get(&self, index: usize) -> Nucleotide {
103148
let i = index / 2;
104149
if index % 2 == 1 {
@@ -108,7 +153,7 @@ impl PackedVec {
108153
}
109154
}
110155

111-
/// Sets the element of this PackedVec at index `index` to `elem`
156+
/// Sets the element of this PackedSeqStore at index `index` to `elem`
112157
pub fn set(&mut self, index: usize, input: Nucleotide) {
113158
let elem: u8 = input.into();
114159
let i = index / 2;
@@ -128,23 +173,23 @@ impl PackedVec {
128173
arr
129174
}
130175

131-
/// Returns a uncompressed vector that contains the same sequence as this PackedVec
176+
/// Returns a uncompressed vector that contains the same sequence as this PackedSeqStore
132177
pub fn get_elements(&self) -> Vec<Nucleotide> {
133178
self.get_range(0..(self.len() - 1))
134179
}
135180
}
136181

137-
impl Default for PackedVec {
182+
impl Default for PackedSeqStore {
138183
fn default() -> Self {
139184
Self::new()
140185
}
141186
}
142187

143-
impl fmt::Display for PackedVec {
188+
impl fmt::Display for PackedSeqStore {
144189
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
145190
write!(f, "[")?;
146191
let mut i = 0;
147-
for item in PackedVecIterator::new(self) {
192+
for item in PackedSeqStoreIterator::new(self) {
148193
if i == 0 {
149194
i = 1;
150195
} else {
@@ -157,21 +202,21 @@ impl fmt::Display for PackedVec {
157202
}
158203
}
159204

160-
struct PackedVecIterator<'a> {
161-
data: &'a PackedVec,
205+
struct PackedSeqStoreIterator<'a> {
206+
data: &'a PackedSeqStore,
162207
cur_index: usize,
163208
}
164209

165-
impl<'a> PackedVecIterator<'a> {
166-
pub fn new(vec: &'a PackedVec) -> Self {
210+
impl<'a> PackedSeqStoreIterator<'a> {
211+
pub fn new(vec: &'a PackedSeqStore) -> Self {
167212
Self {
168213
data: vec,
169214
cur_index: 0,
170215
}
171216
}
172217
}
173218

174-
impl Iterator for PackedVecIterator<'_> {
219+
impl Iterator for PackedSeqStoreIterator<'_> {
175220
type Item = Nucleotide;
176221

177222
fn next(&mut self) -> Option<Self::Item> {
@@ -184,18 +229,18 @@ impl Iterator for PackedVecIterator<'_> {
184229
}
185230
}
186231

187-
/// A reference to a subsection of a nucleotide sequence stored in a PackedVec
232+
/// A reference to a subsection of a nucleotide sequence stored in a PackedSeqStore
188233
pub struct PackedSlice<'a> {
189234
/// The underlying vector that stores the sequence referenced by this slice
190-
vec_ref: &'a PackedVec,
235+
vec_ref: &'a PackedSeqStore,
191236

192237
/// The specific section of the sequence that this slice references
193238
span: std::ops::Range<usize>,
194239
}
195240

196241
/// Returns a PackedSlice given a compressed PackVec `vec` that acts as a reference
197242
/// to the section of `vec` contained within the index bounds of Span `s`.
198-
pub fn create_slice(vec: &PackedVec, s: std::ops::Range<usize>) -> PackedSlice<'_> {
243+
pub fn create_slice(vec: &PackedSeqStore, s: std::ops::Range<usize>) -> PackedSlice<'_> {
199244
PackedSlice {
200245
vec_ref: vec,
201246
span: s,
@@ -209,7 +254,7 @@ pub fn get_slice_seq(slice: PackedSlice<'_>) -> Vec<Nucleotide> {
209254

210255
#[test]
211256
fn test_vec() {
212-
let mut vec = PackedVec::create(vec![
257+
let mut vec = PackedSeqStore::create(vec![
213258
Nucleotide::A,
214259
Nucleotide::C,
215260
Nucleotide::G,
@@ -228,7 +273,7 @@ fn test_vec() {
228273

229274
#[test]
230275
fn test_vec_push() {
231-
let mut vec = PackedVec::create(vec![
276+
let mut vec = PackedSeqStore::create(vec![
232277
Nucleotide::A,
233278
Nucleotide::C,
234279
Nucleotide::G,
@@ -252,7 +297,7 @@ fn test_vec_push() {
252297
#[test]
253298
fn test_slice() {
254299
let span = 1..4;
255-
let vec = PackedVec::create(vec![
300+
let vec = PackedSeqStore::create(vec![
256301
Nucleotide::A,
257302
Nucleotide::C,
258303
Nucleotide::G,
@@ -270,7 +315,7 @@ fn test_slice() {
270315

271316
#[test]
272317
fn test_display_even() {
273-
let vec = PackedVec::create(vec![
318+
let vec = PackedSeqStore::create(vec![
274319
Nucleotide::C,
275320
Nucleotide::A,
276321
Nucleotide::T,
@@ -283,13 +328,13 @@ fn test_display_even() {
283328

284329
#[test]
285330
fn test_display_single() {
286-
let vec = PackedVec::create(vec![Nucleotide::T.into()]);
331+
let vec = PackedSeqStore::create(vec![Nucleotide::T.into()]);
287332
assert_eq!("[T]", vec.to_string());
288333
}
289334

290335
#[test]
291336
fn test_display_odd() {
292-
let vec = PackedVec::create(vec![
337+
let vec = PackedSeqStore::create(vec![
293338
Nucleotide::C,
294339
Nucleotide::A,
295340
Nucleotide::T,
@@ -303,7 +348,7 @@ fn test_display_odd() {
303348

304349
#[test]
305350
fn test_getter_setter() {
306-
let mut vec = PackedVec::create(vec![
351+
let mut vec = PackedSeqStore::create(vec![
307352
Nucleotide::A,
308353
Nucleotide::A,
309354
Nucleotide::T,

0 commit comments

Comments
 (0)