Skip to content

Commit 9032ab6

Browse files
committed
cli/commands/quantify: Map interval trees to reference sequence dictionary
1 parent 38a07d3 commit 9032ab6

File tree

10 files changed

+200
-46
lines changed

10 files changed

+200
-46
lines changed

Cargo.lock

+106
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ edition = "2021"
1212
[workspace.dependencies]
1313
anyhow = "1.0.55"
1414
clap = "4.4.0"
15+
indexmap = "2.6.0"
1516
noodles = "0.84.0"
1617
thiserror = "1.0.40"
1718
tracing = "0.1.30"

atlas-cli/Cargo.toml

+2-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,8 @@ path = "src/main.rs"
1111
anyhow.workspace = true
1212
atlas-core = { path = "../atlas-core", version = "0.1.0" }
1313
clap = { workspace = true, features = ["derive"] }
14-
noodles = { workspace = true, features = ["core"] }
14+
indexmap.workspace = true
15+
noodles = { workspace = true, features = ["bam", "core", "sam"] }
1516
thiserror.workspace = true
1617
tracing-subscriber.workspace = true
1718
tracing.workspace = true

atlas-cli/src/cli/quantify.rs

+3
Original file line numberDiff line numberDiff line change
@@ -15,4 +15,7 @@ pub struct Args {
1515
/// Input annotations file (GFF3).
1616
#[arg(long)]
1717
pub annotations: PathBuf,
18+
19+
/// Source input (BAM).
20+
pub src: PathBuf,
1821
}

atlas-cli/src/commands/normalize.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ where
118118
use atlas_core::features::read_features;
119119

120120
let mut reader = File::open(src).map(BufReader::new)?;
121-
let features = read_features(&mut reader, feature_type, feature_id)?;
121+
let (_, features) = read_features(&mut reader, feature_type, feature_id)?;
122122
Ok(features)
123123
}
124124

atlas-cli/src/commands/quantify.rs

+47-18
Original file line numberDiff line numberDiff line change
@@ -9,14 +9,15 @@ use atlas_core::{
99
collections::IntervalTree,
1010
features::{Feature, ReadFeaturesError},
1111
};
12-
use noodles::core::Position;
12+
use indexmap::IndexSet;
13+
use noodles::{bam, core::Position, sam};
1314
use thiserror::Error;
1415
use tracing::info;
1516

1617
use crate::cli::quantify;
1718

1819
type Features = HashMap<String, Vec<Feature>>;
19-
type IntervalTrees<'f> = HashMap<String, IntervalTree<Position, &'f str>>;
20+
type IntervalTrees<'f> = Vec<IntervalTree<Position, &'f str>>;
2021

2122
pub fn quantify(args: quantify::Args) -> Result<(), QuantifyError> {
2223
let annotations_src = &args.annotations;
@@ -28,12 +29,25 @@ pub fn quantify(args: quantify::Args) -> Result<(), QuantifyError> {
2829
feature_type, feature_id, "reading features"
2930
);
3031

31-
let features = read_features(annotations_src, feature_type, feature_id)?;
32+
let (reference_sequence_names, features) =
33+
read_features(annotations_src, feature_type, feature_id)?;
34+
35+
let src = &args.src;
36+
37+
info!(src = ?src, "reading alignment header");
38+
39+
let mut reader = bam::io::reader::Builder.build_from_path(src)?;
40+
let header = reader.read_header()?;
41+
42+
info!(
43+
reference_sequence_count = header.reference_sequences().len(),
44+
"read alignment header"
45+
);
3246

3347
info!(feature_count = features.len(), "read features");
3448
info!("building interval trees");
3549

36-
let interval_trees = build_interval_trees(&features);
50+
let interval_trees = build_interval_trees(&header, &reference_sequence_names, &features);
3751

3852
info!(
3953
interval_tree_count = interval_trees.len(),
@@ -51,36 +65,51 @@ pub enum QuantifyError {
5165
InvalidFeatures(#[from] ReadFeaturesError),
5266
}
5367

54-
fn read_features<P>(src: P, feature_type: &str, feature_id: &str) -> Result<Features, QuantifyError>
68+
fn read_features<P>(
69+
src: P,
70+
feature_type: &str,
71+
feature_id: &str,
72+
) -> Result<(IndexSet<String>, Features), QuantifyError>
5573
where
5674
P: AsRef<Path>,
5775
{
5876
use atlas_core::features::read_features;
5977

6078
let mut reader = File::open(src).map(BufReader::new)?;
61-
let features = read_features(&mut reader, feature_type, feature_id)?;
62-
Ok(features)
79+
let (reference_sequence_names, features) =
80+
read_features(&mut reader, feature_type, feature_id)?;
81+
Ok((reference_sequence_names, features))
6382
}
6483

65-
fn build_interval_trees(features: &Features) -> IntervalTrees<'_> {
66-
let mut interval_trees = IntervalTrees::default();
84+
fn build_interval_trees<'f>(
85+
header: &sam::Header,
86+
reference_sequence_names: &IndexSet<String>,
87+
features: &'f Features,
88+
) -> IntervalTrees<'f> {
89+
let reference_sequences = header.reference_sequences();
90+
91+
let mut interval_trees = Vec::new();
92+
interval_trees.resize_with(reference_sequences.len(), IntervalTree::default);
6793

6894
for (name, segments) in features {
6995
for feature in segments {
70-
let reference_sequence_name = &feature.reference_sequence_name;
71-
72-
let tree = if let Some(tree) = interval_trees.get_mut(reference_sequence_name) {
73-
tree
74-
} else {
75-
interval_trees
76-
.entry(reference_sequence_name.into())
77-
.or_default()
96+
let reference_sequenceid = feature.reference_sequence_id;
97+
let reference_sequence_name = reference_sequence_names
98+
.get_index(reference_sequenceid)
99+
.unwrap();
100+
101+
let Some(i) = reference_sequences.get_index_of(reference_sequence_name.as_bytes())
102+
else {
103+
continue;
78104
};
79105

106+
// SAFETY: `interval_trees.len() == reference_sequences.len()`.
107+
let tree = &mut interval_trees[i];
108+
80109
let start = feature.start;
81110
let end = feature.end;
82111

83-
tree.insert(start..=end, name)
112+
tree.insert(start..=end, name.as_str())
84113
}
85114
}
86115

atlas-core/Cargo.toml

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ version = "0.1.0"
44
edition.workspace = true
55

66
[dependencies]
7+
indexmap.workspace = true
78
ndarray = "0.16.1"
89
noodles = { workspace = true, features = ["core", "gff"] }
910
thiserror.workspace = true

0 commit comments

Comments
 (0)