Skip to content

Commit 3a5a695

Browse files
committed
Update to noodles 0.96.0
GFF strings are now byte strings, and the BAM encoder no longer allows invalid characters in record data string values.
1 parent 47e4c4e commit 3a5a695

11 files changed

+95
-78
lines changed

Cargo.lock

+14-12
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -20,8 +20,8 @@ flate2 = "1.0.14"
2020
git-testament = "0.2.0"
2121
indexmap = "2.7.1"
2222
mimalloc = "0.1.43"
23-
noodles = { version = "0.95.0", features = ["bam", "bgzf", "core", "gff", "sam"] }
24-
noodles-bgzf = { version = "0.37.0", features = ["libdeflate"] }
23+
noodles = { version = "0.96.0", features = ["bam", "bgzf", "core", "gff", "sam"] }
24+
noodles-bgzf = { version = "0.38.0", features = ["libdeflate"] }
2525
rustc-hash = "2.1.1"
2626
thiserror = "2.0.11"
2727
tracing = "0.1.25"

src/commands/normalize.rs

+9-8
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55
path::{Path, PathBuf},
66
};
77

8+
use bstr::BString;
89
use thiserror::Error;
910
use tracing::info;
1011

@@ -79,7 +80,7 @@ where
7980
Ok(())
8081
}
8182

82-
fn read_counts<P>(src: P) -> Result<Vec<(String, u32)>, NormalizeError>
83+
fn read_counts<P>(src: P) -> Result<Vec<(BString, u32)>, NormalizeError>
8384
where
8485
P: AsRef<Path>,
8586
{
@@ -91,8 +92,8 @@ where
9192
}
9293

9394
fn calculate_feature_lengths(
94-
features: &HashMap<String, Vec<Feature>>,
95-
names: &[String],
95+
features: &HashMap<BString, Vec<Feature>>,
96+
names: &[BString],
9697
) -> io::Result<Vec<u32>> {
9798
normalization::calculate_feature_lengths(features, names)
9899
.map_err(|e| io::Error::new(io::ErrorKind::InvalidData, e))?
@@ -101,7 +102,7 @@ fn calculate_feature_lengths(
101102
.collect()
102103
}
103104

104-
fn write_normalized_counts<W>(writer: &mut W, names: &[String], values: &[f64]) -> io::Result<()>
105+
fn write_normalized_counts<W>(writer: &mut W, names: &[BString], values: &[f64]) -> io::Result<()>
105106
where
106107
W: Write,
107108
{
@@ -121,10 +122,10 @@ mod tests {
121122
#[test]
122123
fn test_write_normalized_counts() -> io::Result<()> {
123124
let names = [
124-
String::from("AADAT"),
125-
String::from("CLN3"),
126-
String::from("NEO1"),
127-
String::from("PAK4"),
125+
BString::from("AADAT"),
126+
BString::from("CLN3"),
127+
BString::from("NEO1"),
128+
BString::from("PAK4"),
128129
];
129130

130131
let values = [30.2, 3.7, 0.0, 14.5];

src/commands/quantify.rs

+17-10
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55
path::{Path, PathBuf},
66
};
77

8+
use bstr::{BString, ByteSlice};
89
use noodles::{bam, bgzf};
910
use thiserror::Error;
1011
use tracing::{info, warn};
@@ -60,7 +61,7 @@ where
6061
.map(Box::new)
6162
.map_err(|e| QuantifyError::OpenFile(e, src.into()))?
6263
} else {
63-
bgzf::reader::Builder
64+
bgzf::io::reader::Builder
6465
.build_from_path(src)
6566
.map(Box::new)
6667
.map_err(|e| QuantifyError::OpenFile(e, src.into()))?
@@ -160,14 +161,14 @@ fn strand_specification_from_option_or(
160161

161162
const DELIMITER: char = '\t';
162163

163-
fn write_counts<W>(writer: &mut W, feature_names: &[&String], counts: &Counts) -> io::Result<()>
164+
fn write_counts<W>(writer: &mut W, feature_names: &[&BString], counts: &Counts) -> io::Result<()>
164165
where
165166
W: Write,
166167
{
167168
const MISSING: u64 = 0;
168169

169170
for name in feature_names {
170-
let count = counts.get(name.as_str()).copied().unwrap_or(MISSING);
171+
let count = counts.get(name.as_bstr()).copied().unwrap_or(MISSING);
171172
writeln!(writer, "{name}{DELIMITER}{count}")?;
172173
}
173174

@@ -189,6 +190,8 @@ where
189190

190191
#[cfg(test)]
191192
mod tests {
193+
use bstr::BStr;
194+
192195
use super::*;
193196

194197
#[test]
@@ -228,15 +231,19 @@ mod tests {
228231

229232
#[test]
230233
fn test_write_counts() -> io::Result<()> {
231-
let counts = [("AADAT", 302), ("CLN3", 37), ("PAK4", 145)]
232-
.into_iter()
233-
.collect();
234+
let counts = [
235+
(BStr::new("AADAT"), 302),
236+
(BStr::new("CLN3"), 37),
237+
(BStr::new("PAK4"), 145),
238+
]
239+
.into_iter()
240+
.collect();
234241

235242
let names = [
236-
&String::from("AADAT"),
237-
&String::from("CLN3"),
238-
&String::from("NEO1"),
239-
&String::from("PAK4"),
243+
&BString::from("AADAT"),
244+
&BString::from("CLN3"),
245+
&BString::from("NEO1"),
246+
&BString::from("PAK4"),
240247
];
241248

242249
let mut buf = Vec::new();

src/count.rs

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use std::{
88
thread,
99
};
1010

11+
use bstr::BStr;
1112
use noodles::{bam, core::Position};
1213

1314
use self::context::Event;
@@ -247,7 +248,7 @@ fn count_record<'f>(
247248
strand_specification: StrandSpecification,
248249
is_reverse_complemented: bool,
249250
record: &bam::Record,
250-
intersections: &mut HashSet<&'f str>,
251+
intersections: &mut HashSet<&'f BStr>,
251252
) -> io::Result<Option<Event<'f>>> {
252253
let reference_sequence_id = record
253254
.reference_sequence_id()
@@ -280,7 +281,7 @@ fn count_record<'f>(
280281
}
281282

282283
fn intersect<'f>(
283-
intersections: &mut HashSet<&'f str>,
284+
intersections: &mut HashSet<&'f BStr>,
284285
interval_tree: &IntervalTree<Position, Entry<'f>>,
285286
intervals: MatchIntervals,
286287
strand_specification: StrandSpecification,
@@ -304,7 +305,7 @@ fn intersect<'f>(
304305
Ok(())
305306
}
306307

307-
fn resolve_intersections<'f>(intersections: &HashSet<&'f str>) -> Event<'f> {
308+
fn resolve_intersections<'f>(intersections: &HashSet<&'f BStr>) -> Event<'f> {
308309
if intersections.is_empty() {
309310
Event::Miss
310311
} else if intersections.len() == 1 {

src/count/context.rs

+11-9
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
mod event;
22

3-
pub use self::event::Event;
3+
use bstr::BStr;
44

55
use std::collections::HashMap;
66

7-
pub type Counts<'f> = HashMap<&'f str, u64>;
7+
pub use self::event::Event;
8+
9+
pub type Counts<'f> = HashMap<&'f BStr, u64>;
810

911
#[derive(Default)]
1012
pub struct Context<'f> {
@@ -54,7 +56,7 @@ mod tests {
5456
fn test_add() {
5557
let mut ctx_a = Context::default();
5658

57-
ctx_a.counts.insert("AADAT", 2);
59+
ctx_a.counts.insert(BStr::new("AADAT"), 2);
5860
ctx_a.miss = 3;
5961
ctx_a.ambiguous = 5;
6062
ctx_a.low_quality = 8;
@@ -63,8 +65,8 @@ mod tests {
6365

6466
let mut ctx_b = Context::default();
6567

66-
ctx_b.counts.insert("AADAT", 2);
67-
ctx_b.counts.insert("CLN3", 3);
68+
ctx_b.counts.insert(BStr::new("AADAT"), 2);
69+
ctx_b.counts.insert(BStr::new("CLN3"), 3);
6870
ctx_b.miss = 5;
6971
ctx_b.ambiguous = 8;
7072
ctx_b.low_quality = 13;
@@ -74,8 +76,8 @@ mod tests {
7476
ctx_a.add(&ctx_b);
7577

7678
assert_eq!(ctx_a.counts.len(), 2);
77-
assert_eq!(ctx_a.counts["AADAT"], 4);
78-
assert_eq!(ctx_a.counts["CLN3"], 3);
79+
assert_eq!(ctx_a.counts[BStr::new("AADAT")], 4);
80+
assert_eq!(ctx_a.counts[BStr::new("CLN3")], 3);
7981

8082
assert_eq!(ctx_a.miss, 8);
8183
assert_eq!(ctx_a.ambiguous, 13);
@@ -87,15 +89,15 @@ mod tests {
8789
#[test]
8890
fn test_add_event() {
8991
let mut ctx = Context::default();
90-
ctx.add_event(Event::Hit("AADAT"));
92+
ctx.add_event(Event::Hit(BStr::new("AADAT")));
9193
ctx.add_event(Event::Miss);
9294
ctx.add_event(Event::Ambiguous);
9395
ctx.add_event(Event::LowQuality);
9496
ctx.add_event(Event::Unmapped);
9597
ctx.add_event(Event::Nonunique);
9698

9799
assert_eq!(ctx.counts.len(), 1);
98-
assert_eq!(ctx.counts["AADAT"], 1);
100+
assert_eq!(ctx.counts[BStr::new("AADAT")], 1);
99101

100102
assert_eq!(ctx.miss, 1);
101103
assert_eq!(ctx.ambiguous, 1);

src/count/context/event.rs

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
1+
use bstr::BStr;
2+
13
#[derive(Clone, Debug, Eq, PartialEq)]
24
pub enum Event<'f> {
3-
Hit(&'f str),
5+
Hit(&'f BStr),
46
Miss,
57
Ambiguous,
68
LowQuality,

src/counts.rs

+5-4
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ use std::{
33
num,
44
};
55

6+
use bstr::BString;
67
use thiserror::Error;
78

89
static HTSEQ_COUNT_META_PREFIX: &str = "__";
@@ -17,7 +18,7 @@ pub enum ReadCountsError {
1718
Io(#[from] io::Error),
1819
}
1920

20-
pub fn read<R>(reader: &mut R) -> Result<Vec<(String, u32)>, ReadCountsError>
21+
pub fn read<R>(reader: &mut R) -> Result<Vec<(BString, u32)>, ReadCountsError>
2122
where
2223
R: BufRead,
2324
{
@@ -116,9 +117,9 @@ __alignment_not_unique\t0
116117
let actual = read(&mut reader)?;
117118

118119
let expected = [
119-
(String::from("AADAT"), 302),
120-
(String::from("CLN3"), 37),
121-
(String::from("PAK4"), 145),
120+
(BString::from("AADAT"), 302),
121+
(BString::from("CLN3"), 37),
122+
(BString::from("PAK4"), 145),
122123
];
123124

124125
assert_eq!(actual, expected);

0 commit comments

Comments
 (0)