1+ use crate :: file:: * ;
2+ use crate :: flatgfa;
3+ use crate :: pool:: * ;
14use std:: fmt;
25
6+ use zerocopy:: * ;
7+
8+ const MAGIC_NUMBER : u64 = 0x0000_0000 ;
9+
310#[ derive( Debug , PartialEq , Eq , Copy , Clone ) ]
411pub enum Nucleotide {
512 A ,
@@ -45,34 +52,72 @@ impl From<Nucleotide> for char {
4552/// A compressed vector-like structure for storing nucleotide sequences
4653/// - Two base pairs are stored per byte
4754///
48- pub struct PackedVec {
49- /// A vector that stores a compressed encoding of this PackedVec 's sequence
55+ pub struct PackedSeqStore {
56+ /// A vector that stores a compressed encoding of this PackedSeqStore 's sequence
5057 data : Vec < u8 > ,
5158
5259 /// True if the final base pair in the sequence is stored at a
5360 /// high nibble
5461 high_nibble_end : bool ,
5562}
5663
57- impl PackedVec {
58- /// Creates a new empty PackedVec
64+ #[ derive( FromBytes , AsBytes , FromZeroes ) ]
65+ #[ repr( packed) ]
66+ pub struct PackedSeqView < ' a > {
67+ data : Pool < ' a , u8 > ,
68+
69+ /// True if the final base pair in the sequence is stored at a
70+ /// high nibble
71+ high_nibble_end : bool ,
72+ }
73+
74+ #[ derive( FromBytes , FromZeroes , AsBytes , Debug ) ]
75+ #[ repr( packed) ]
76+ pub struct PackedToc {
77+ magic : u64 ,
78+ size : Size ,
79+ }
80+
81+ fn read_packed_toc ( data : & [ u8 ] ) -> ( & PackedToc , & [ u8 ] ) {
82+ let toc = PackedToc :: ref_from_prefix ( data) . unwrap ( ) ;
83+ let rest = & data[ size_of :: < PackedToc > ( ) ..] ;
84+ let magic = toc. magic ;
85+ assert_eq ! ( magic, MAGIC_NUMBER ) ;
86+ ( toc, rest)
87+ }
88+
89+ pub fn view ( data : & [ u8 ] ) -> PackedSeqView {
90+ let ( toc, rest) = read_packed_toc ( data) ;
91+
92+ let data = slice_prefix ( rest, toc. size ) ;
93+
94+ let high_nibble_end = slice_prefix ( rest, 1 ) ;
95+
96+ PackedSeqView {
97+ data : data. into ( ) ,
98+ high_nibble_end : high_nibble_end. into ( ) ,
99+ }
100+ }
101+
102+ impl PackedSeqStore {
103+ /// Creates a new empty PackedSeqStore
59104 pub fn new ( ) -> Self {
60- PackedVec {
105+ PackedSeqStore {
61106 data : Vec :: new ( ) ,
62107 high_nibble_end : true ,
63108 }
64109 }
65110
66- /// Returns a compressed PackedVec given an uncompressed vector `arr`
111+ /// Returns a compressed PackedSeqStore given an uncompressed vector `arr`
67112 pub fn create ( arr : Vec < Nucleotide > ) -> Self {
68- let mut new_vec = PackedVec :: new ( ) ;
113+ let mut new_vec = PackedSeqStore :: new ( ) ;
69114 for item in arr {
70115 new_vec. push ( item) ;
71116 }
72117 new_vec
73118 }
74119
75- /// Appends `input` to the end of this PackedVec
120+ /// Appends `input` to the end of this PackedSeqStore
76121 pub fn push ( & mut self , input : Nucleotide ) {
77122 let value = input. into ( ) ;
78123 assert ! ( value <= 0xF ) ;
@@ -98,7 +143,7 @@ impl PackedVec {
98143 self . data . is_empty ( )
99144 }
100145
101- /// Returns the element of this PackedVec at index `index`
146+ /// Returns the element of this PackedSeqStore at index `index`
102147 pub fn get ( & self , index : usize ) -> Nucleotide {
103148 let i = index / 2 ;
104149 if index % 2 == 1 {
@@ -108,7 +153,7 @@ impl PackedVec {
108153 }
109154 }
110155
111- /// Sets the element of this PackedVec at index `index` to `elem`
156+ /// Sets the element of this PackedSeqStore at index `index` to `elem`
112157 pub fn set ( & mut self , index : usize , input : Nucleotide ) {
113158 let elem: u8 = input. into ( ) ;
114159 let i = index / 2 ;
@@ -128,23 +173,23 @@ impl PackedVec {
128173 arr
129174 }
130175
131- /// Returns a uncompressed vector that contains the same sequence as this PackedVec
176+ /// Returns a uncompressed vector that contains the same sequence as this PackedSeqStore
132177 pub fn get_elements ( & self ) -> Vec < Nucleotide > {
133178 self . get_range ( 0 ..( self . len ( ) - 1 ) )
134179 }
135180}
136181
137- impl Default for PackedVec {
182+ impl Default for PackedSeqStore {
138183 fn default ( ) -> Self {
139184 Self :: new ( )
140185 }
141186}
142187
143- impl fmt:: Display for PackedVec {
188+ impl fmt:: Display for PackedSeqStore {
144189 fn fmt ( & self , f : & mut fmt:: Formatter < ' _ > ) -> fmt:: Result {
145190 write ! ( f, "[" ) ?;
146191 let mut i = 0 ;
147- for item in PackedVecIterator :: new ( self ) {
192+ for item in PackedSeqStoreIterator :: new ( self ) {
148193 if i == 0 {
149194 i = 1 ;
150195 } else {
@@ -157,21 +202,21 @@ impl fmt::Display for PackedVec {
157202 }
158203}
159204
160- struct PackedVecIterator < ' a > {
161- data : & ' a PackedVec ,
205+ struct PackedSeqStoreIterator < ' a > {
206+ data : & ' a PackedSeqStore ,
162207 cur_index : usize ,
163208}
164209
165- impl < ' a > PackedVecIterator < ' a > {
166- pub fn new ( vec : & ' a PackedVec ) -> Self {
210+ impl < ' a > PackedSeqStoreIterator < ' a > {
211+ pub fn new ( vec : & ' a PackedSeqStore ) -> Self {
167212 Self {
168213 data : vec,
169214 cur_index : 0 ,
170215 }
171216 }
172217}
173218
174- impl Iterator for PackedVecIterator < ' _ > {
219+ impl Iterator for PackedSeqStoreIterator < ' _ > {
175220 type Item = Nucleotide ;
176221
177222 fn next ( & mut self ) -> Option < Self :: Item > {
@@ -184,18 +229,18 @@ impl Iterator for PackedVecIterator<'_> {
184229 }
185230}
186231
187- /// A reference to a subsection of a nucleotide sequence stored in a PackedVec
232+ /// A reference to a subsection of a nucleotide sequence stored in a PackedSeqStore
188233pub struct PackedSlice < ' a > {
189234 /// The underlying vector that stores the sequence referenced by this slice
190- vec_ref : & ' a PackedVec ,
235+ vec_ref : & ' a PackedSeqStore ,
191236
192237 /// The specific section of the sequence that this slice references
193238 span : std:: ops:: Range < usize > ,
194239}
195240
196241/// Returns a PackedSlice given a compressed PackVec `vec` that acts as a reference
197242/// to the section of `vec` contained within the index bounds of Span `s`.
198- pub fn create_slice ( vec : & PackedVec , s : std:: ops:: Range < usize > ) -> PackedSlice < ' _ > {
243+ pub fn create_slice ( vec : & PackedSeqStore , s : std:: ops:: Range < usize > ) -> PackedSlice < ' _ > {
199244 PackedSlice {
200245 vec_ref : vec,
201246 span : s,
@@ -209,7 +254,7 @@ pub fn get_slice_seq(slice: PackedSlice<'_>) -> Vec<Nucleotide> {
209254
210255#[ test]
211256fn test_vec ( ) {
212- let mut vec = PackedVec :: create ( vec ! [
257+ let mut vec = PackedSeqStore :: create ( vec ! [
213258 Nucleotide :: A ,
214259 Nucleotide :: C ,
215260 Nucleotide :: G ,
@@ -228,7 +273,7 @@ fn test_vec() {
228273
229274#[ test]
230275fn test_vec_push ( ) {
231- let mut vec = PackedVec :: create ( vec ! [
276+ let mut vec = PackedSeqStore :: create ( vec ! [
232277 Nucleotide :: A ,
233278 Nucleotide :: C ,
234279 Nucleotide :: G ,
@@ -252,7 +297,7 @@ fn test_vec_push() {
252297#[ test]
253298fn test_slice ( ) {
254299 let span = 1 ..4 ;
255- let vec = PackedVec :: create ( vec ! [
300+ let vec = PackedSeqStore :: create ( vec ! [
256301 Nucleotide :: A ,
257302 Nucleotide :: C ,
258303 Nucleotide :: G ,
@@ -270,7 +315,7 @@ fn test_slice() {
270315
271316#[ test]
272317fn test_display_even ( ) {
273- let vec = PackedVec :: create ( vec ! [
318+ let vec = PackedSeqStore :: create ( vec ! [
274319 Nucleotide :: C ,
275320 Nucleotide :: A ,
276321 Nucleotide :: T ,
@@ -283,13 +328,13 @@ fn test_display_even() {
283328
284329#[ test]
285330fn test_display_single ( ) {
286- let vec = PackedVec :: create ( vec ! [ Nucleotide :: T . into( ) ] ) ;
331+ let vec = PackedSeqStore :: create ( vec ! [ Nucleotide :: T . into( ) ] ) ;
287332 assert_eq ! ( "[T]" , vec. to_string( ) ) ;
288333}
289334
290335#[ test]
291336fn test_display_odd ( ) {
292- let vec = PackedVec :: create ( vec ! [
337+ let vec = PackedSeqStore :: create ( vec ! [
293338 Nucleotide :: C ,
294339 Nucleotide :: A ,
295340 Nucleotide :: T ,
@@ -303,7 +348,7 @@ fn test_display_odd() {
303348
304349#[ test]
305350fn test_getter_setter ( ) {
306- let mut vec = PackedVec :: create ( vec ! [
351+ let mut vec = PackedSeqStore :: create ( vec ! [
307352 Nucleotide :: A ,
308353 Nucleotide :: A ,
309354 Nucleotide :: T ,
0 commit comments