Skip to content

Commit b853ffa

Browse files
committed
Merge branch 'main' into john-performance
2 parents 7e22822 + 2373bd5 commit b853ffa

File tree

11 files changed

+73
-44
lines changed

11 files changed

+73
-44
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ There are several things in this repository:
2828
`mygfa` and `slow_odgi`
2929
-----------------------
3030

31-
The `mygfa` library is an extremely simple Python library for representing (and parsing and emitting) GFA files. It emphasizes clarify over efficiency. Use `pip install mygfa` to get started, and read the [API documentation][mygfa-docs] for details.
31+
The `mygfa` library is an extremely simple Python library for representing (and parsing and emitting) GFA files. It emphasizes clarity over efficiency. Use `pip install mygfa` to get started, and read the [API documentation][mygfa-docs] for details.
3232

3333
Similarly, `slow_odgi` is a set of GFA analyses based on `mygfa`; it's meant to act as a *reference implementation* of the much faster functionality in [odgi][]. Check out [the slow_odgi README](slow_odgi/) for more details.
3434

flatgfa-py/src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ impl Store {
4242
}
4343

4444
/// Get the FlatGFA stored here.
45-
fn view(&self) -> FlatGFA {
45+
fn view(&self) -> FlatGFA<'_> {
4646
// TK It seems wasteful to check the type of store every time... and to construct
4747
// the view every time. It's probably possible to fix this with a self-reference,
4848
// e.g., with the `owning_ref` crate.

flatgfa/src/cli/cmds.rs

Lines changed: 34 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -14,20 +14,40 @@ use std::io::Write;
1414
/// print the FlatGFA table of contents
1515
#[derive(FromArgs, PartialEq, Debug)]
1616
#[argh(subcommand, name = "toc")]
17-
pub struct Toc {}
18-
19-
pub fn toc(gfa: &flatgfa::FlatGFA) {
20-
eprintln!("header: {}", gfa.header.len());
21-
eprintln!("segs: {}", gfa.segs.len());
22-
eprintln!("paths: {}", gfa.paths.len());
23-
eprintln!("links: {}", gfa.links.len());
24-
eprintln!("steps: {}", gfa.steps.len());
25-
eprintln!("seq_data: {}", gfa.seq_data.len());
26-
eprintln!("overlaps: {}", gfa.overlaps.len());
27-
eprintln!("alignment: {}", gfa.alignment.len());
28-
eprintln!("name_data: {}", gfa.name_data.len());
29-
eprintln!("optional_data: {}", gfa.optional_data.len());
30-
eprintln!("line_order: {}", gfa.line_order.len());
17+
pub struct Toc {
18+
/// show sizes in bytes instead of element counts
19+
#[argh(switch, short = 'b')]
20+
bytes: bool,
21+
}
22+
23+
pub fn toc(gfa: &flatgfa::FlatGFA, args: Toc) {
24+
if args.bytes {
25+
// Show sizes in bytes.
26+
println!("header: {}", gfa.header.size());
27+
println!("segs: {}", gfa.segs.size());
28+
println!("paths: {}", gfa.paths.size());
29+
println!("links: {}", gfa.links.size());
30+
println!("steps: {}", gfa.steps.size());
31+
println!("seq_data: {}", gfa.seq_data.size());
32+
println!("overlaps: {}", gfa.overlaps.size());
33+
println!("alignment: {}", gfa.alignment.size());
34+
println!("name_data: {}", gfa.name_data.size());
35+
println!("optional_data: {}", gfa.optional_data.size());
36+
println!("line_order: {}", gfa.line_order.size());
37+
} else {
38+
// Show element counts (which is what we record physically in the TOC).
39+
println!("header: {}", gfa.header.len());
40+
println!("segs: {}", gfa.segs.len());
41+
println!("paths: {}", gfa.paths.len());
42+
println!("links: {}", gfa.links.len());
43+
println!("steps: {}", gfa.steps.len());
44+
println!("seq_data: {}", gfa.seq_data.len());
45+
println!("overlaps: {}", gfa.overlaps.len());
46+
println!("alignment: {}", gfa.alignment.len());
47+
println!("name_data: {}", gfa.name_data.len());
48+
println!("optional_data: {}", gfa.optional_data.len());
49+
println!("line_order: {}", gfa.line_order.len());
50+
}
3151
}
3252

3353
/// list the paths

flatgfa/src/cli/main.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,11 +112,16 @@ fn main() -> Result<(), &'static str> {
112112
};
113113

114114
match args.command {
115+
<<<<<<< HEAD
115116
Some(Command::StatsFileSize(_)) => {
116117
cmds::stats_filesize(&gfa);
117118
}
118119
Some(Command::Toc(_)) => {
119120
cmds::toc(&gfa);
121+
=======
122+
Some(Command::Toc(sub_args)) => {
123+
cmds::toc(&gfa, sub_args);
124+
>>>>>>> main
120125
}
121126
Some(Command::Paths(_)) => {
122127
cmds::paths(&gfa);

flatgfa/src/file.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -182,7 +182,7 @@ fn read_toc_mut(data: &mut [u8]) -> (&mut Toc, &mut [u8]) {
182182
}
183183

184184
/// Get a FlatGFA backed by the data in a byte buffer.
185-
pub fn view(data: &[u8]) -> flatgfa::FlatGFA {
185+
pub fn view(data: &[u8]) -> flatgfa::FlatGFA<'_> {
186186
let (toc, rest) = read_toc(data);
187187

188188
let (header, rest) = slice_prefix(rest, toc.header);
@@ -216,7 +216,7 @@ pub fn view(data: &[u8]) -> flatgfa::FlatGFA {
216216
fn slice_vec_prefix<T: FromBytes + IntoBytes>(
217217
data: &mut [u8],
218218
size: Size,
219-
) -> (SliceVec<T>, &mut [u8]) {
219+
) -> (SliceVec<'_, T>, &mut [u8]) {
220220
let (prefix, rest) = <[T]>::mut_from_prefix_with_elems(data, size.capacity).unwrap();
221221
let vec = SliceVec::from_slice_len(prefix, size.len);
222222
(vec, rest)
@@ -252,13 +252,13 @@ fn slice_store<'a>(data: &'a mut [u8], toc: &Toc) -> flatgfa::FixedGFAStore<'a>
252252
}
253253

254254
/// Get a mutable FlatGFA `SliceStore` backed by a byte buffer.
255-
pub fn view_store(data: &mut [u8]) -> flatgfa::FixedGFAStore {
255+
pub fn view_store<'a>(data: &'a mut [u8]) -> flatgfa::FixedGFAStore<'a> {
256256
let (toc, rest) = read_toc_mut(data);
257257
slice_store(rest, toc)
258258
}
259259

260260
/// Initialize a buffer with an empty FlatGFA store.
261-
pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, flatgfa::FixedGFAStore) {
261+
pub fn init(data: &mut [u8], toc: Toc) -> (&mut Toc, flatgfa::FixedGFAStore<'_>) {
262262
// Write the table of contents.
263263
assert!(data.len() == toc.size());
264264
toc.write_to_prefix(data).unwrap();

flatgfa/src/flatbed.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ impl<'a, P: StoreFamily<'a>> BEDStore<'a, P> {
6969
self.entries.add(BEDEntry { name, start, end })
7070
}
7171

72-
pub fn as_ref(&self) -> FlatBED {
72+
pub fn as_ref(&self) -> FlatBED<'_> {
7373
FlatBED {
7474
name_data: self.name_data.as_ref(),
7575
entries: self.entries.as_ref(),
@@ -108,7 +108,7 @@ pub type HeapBEDStore = BEDStore<'static, HeapFamily>;
108108

109109
type ParseResult<T> = Result<T, &'static str>;
110110
type PartialParseResult<'a, T> = ParseResult<(T, &'a [u8])>;
111-
fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<T> {
111+
fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<'_, T> {
112112
match T::from_radix_10(s) {
113113
(_, 0) => Err("expected number"),
114114
(num, used) => Ok((num, &s[used..])),

flatgfa/src/flatgfa.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -363,7 +363,7 @@ impl<'a> FlatGFA<'a> {
363363
///
364364
/// A handle is a a forward or backward traversal of a specific segment. This method
365365
/// gets the sequence in the orientation specified by the handle.
366-
pub fn get_seq_oriented(&self, handle: Handle) -> Sequence {
366+
pub fn get_seq_oriented(&self, handle: Handle) -> Sequence<'_> {
367367
let seg = self.get_handle_seg(handle);
368368
let seq_data = PackedSeqView::from_pool(self.seq_data, seg.seq);
369369
Sequence::new(seq_data, handle.orient())
@@ -401,7 +401,7 @@ impl<'a> FlatGFA<'a> {
401401
}
402402

403403
/// Look up a CIGAR alignment.
404-
pub fn get_alignment(&self, overlap: Span<AlignOp>) -> Alignment {
404+
pub fn get_alignment(&self, overlap: Span<AlignOp>) -> Alignment<'_> {
405405
Alignment {
406406
ops: &self.alignment[overlap],
407407
}
@@ -584,7 +584,7 @@ impl<'a, P: StoreFamily<'a>> GFAStore<'a, P> {
584584
}
585585

586586
/// Borrow a FlatGFA view of this data store.
587-
pub fn as_ref(&self) -> FlatGFA {
587+
pub fn as_ref(&self) -> FlatGFA<'_> {
588588
FlatGFA {
589589
header: self.header.as_ref(),
590590
segs: self.segs.as_ref(),

flatgfa/src/gfaline.rs

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ pub struct Path<'a> {
3434
}
3535

3636
/// Parse a single line of a GFA file.
37-
pub fn parse_line(line: &[u8]) -> LineResult {
37+
pub fn parse_line(line: &[u8]) -> LineResult<'_> {
3838
if line.len() < 2 || line[1] != b'\t' {
3939
return Err("expected marker and tab");
4040
}
@@ -49,20 +49,20 @@ pub fn parse_line(line: &[u8]) -> LineResult {
4949
}
5050

5151
/// Parse a header line, which looks like `H <data>`.
52-
fn parse_header(line: &[u8]) -> LineResult {
52+
fn parse_header(line: &[u8]) -> LineResult<'_> {
5353
Ok(Line::Header(line))
5454
}
5555

5656
/// Parse a segment line, which looks like `S <name> <seq> <data>`.
57-
fn parse_seg(line: &[u8]) -> LineResult {
57+
fn parse_seg(line: &[u8]) -> LineResult<'_> {
5858
let (name, rest) = parse_num(line)?;
5959
let rest = parse_byte(rest, b'\t')?;
6060
let (seq, data) = parse_field(rest)?;
6161
Ok(Line::Segment(Segment { name, seq, data }))
6262
}
6363

6464
/// Parse a link line, which looks like `L <from> <+-> <to> <+-> <CIGAR>`.
65-
fn parse_link(line: &[u8]) -> LineResult {
65+
fn parse_link(line: &[u8]) -> LineResult<'_> {
6666
let (from_seg, rest) = parse_num(line)?;
6767
let rest = parse_byte(rest, b'\t')?;
6868
let (from_orient, rest) = parse_orient(rest)?;
@@ -85,7 +85,7 @@ fn parse_link(line: &[u8]) -> LineResult {
8585
}
8686

8787
/// Parse a path line, which looks like `P <name> <steps> <*|CIGARs>`.
88-
fn parse_path(line: &[u8]) -> LineResult {
88+
fn parse_path(line: &[u8]) -> LineResult<'_> {
8989
let (name, rest) = parse_field(line)?;
9090
let (steps, rest) = parse_field(rest)?;
9191
let (overlaps, rest) = parse_maybe_overlap_list(rest)?;
@@ -100,7 +100,7 @@ fn parse_path(line: &[u8]) -> LineResult {
100100
}
101101

102102
/// Parse a *possible* overlap list, which may be `*` (empty).
103-
pub fn parse_maybe_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
103+
pub fn parse_maybe_overlap_list(s: &[u8]) -> PartialParseResult<'_, Vec<Vec<AlignOp>>> {
104104
if s == b"*" {
105105
Ok((vec![], &s[1..]))
106106
} else {
@@ -111,7 +111,7 @@ pub fn parse_maybe_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>
111111
/// Parse a comma-separated list of CIGAR strings.
112112
///
113113
/// TODO: This could be optimized to avoid accumulating into a vector.
114-
fn parse_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
114+
fn parse_overlap_list(s: &[u8]) -> PartialParseResult<'_, Vec<Vec<AlignOp>>> {
115115
let mut rest = s;
116116
let mut overlaps = vec![];
117117
while !rest.is_empty() {
@@ -126,7 +126,7 @@ fn parse_overlap_list(s: &[u8]) -> PartialParseResult<Vec<Vec<AlignOp>>> {
126126
}
127127

128128
/// Consume a chunk of a string up to a given marker byte.
129-
fn parse_until(line: &[u8], marker: u8) -> PartialParseResult<&[u8]> {
129+
fn parse_until(line: &[u8], marker: u8) -> PartialParseResult<'_, &[u8]> {
130130
let end = memchr::memchr(marker, line).unwrap_or(line.len());
131131
let rest = if end == line.len() {
132132
&[]
@@ -137,7 +137,7 @@ fn parse_until(line: &[u8], marker: u8) -> PartialParseResult<&[u8]> {
137137
}
138138

139139
/// Consume a string from the line, until a tab (or the end of the line).
140-
pub fn parse_field(line: &[u8]) -> PartialParseResult<&[u8]> {
140+
pub fn parse_field(line: &[u8]) -> PartialParseResult<'_, &[u8]> {
141141
parse_until(line, b'\t')
142142
}
143143

@@ -150,15 +150,15 @@ fn parse_byte(s: &[u8], byte: u8) -> ParseResult<&[u8]> {
150150
}
151151

152152
/// Parse a single integer.
153-
fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<T> {
153+
fn parse_num<T: FromRadix10>(s: &[u8]) -> PartialParseResult<'_, T> {
154154
match T::from_radix_10(s) {
155155
(_, 0) => Err("expected number"),
156156
(num, used) => Ok((num, &s[used..])),
157157
}
158158
}
159159

160160
/// Parse a segment orientation (+ or -).
161-
fn parse_orient(line: &[u8]) -> PartialParseResult<Orientation> {
161+
fn parse_orient(line: &[u8]) -> PartialParseResult<'_, Orientation> {
162162
if line.is_empty() {
163163
return Err("expected orientation");
164164
}
@@ -171,7 +171,7 @@ fn parse_orient(line: &[u8]) -> PartialParseResult<Orientation> {
171171
}
172172

173173
/// Parse a single CIGAR alignment operation (like `4D`).
174-
fn parse_align_op(s: &[u8]) -> PartialParseResult<AlignOp> {
174+
fn parse_align_op(s: &[u8]) -> PartialParseResult<'_, AlignOp> {
175175
let (len, rest) = parse_num::<u32>(s)?;
176176
let op = match rest[0] {
177177
b'M' => crate::flatgfa::AlignOpcode::Match,
@@ -186,7 +186,7 @@ fn parse_align_op(s: &[u8]) -> PartialParseResult<AlignOp> {
186186
/// Parse a complete CIGAR alignment string (like `3M2I`).
187187
///
188188
/// TODO This could be optimized to avoid collecting into a vector.
189-
fn parse_align(s: &[u8]) -> PartialParseResult<Vec<AlignOp>> {
189+
fn parse_align(s: &[u8]) -> PartialParseResult<'_, Vec<AlignOp>> {
190190
let mut rest = s;
191191
let mut align = vec![];
192192
while !rest.is_empty() && rest[0].is_ascii_digit() {

flatgfa/src/memfile.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ pub struct MemchrSplit<'a> {
3838
}
3939

4040
impl MemchrSplit<'_> {
41-
pub fn new(needle: u8, haystack: &[u8]) -> MemchrSplit {
41+
pub fn new(needle: u8, haystack: &[u8]) -> MemchrSplit<'_> {
4242
MemchrSplit {
4343
needle,
4444
haystack,

flatgfa/src/packedseq.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -385,8 +385,7 @@ impl PackedSeqStore {
385385
}
386386
}
387387

388-
/// Creates a PackedSeqView with the same data as this PackedSeqStore
389-
pub fn as_ref(&self) -> PackedSeqView {
388+
pub fn as_ref(&self) -> PackedSeqView<'_> {
390389
PackedSeqView {
391390
data: &self.data,
392391
high_nibble_end: self.high_nibble_end,

0 commit comments

Comments
 (0)