@@ -9,14 +9,15 @@ use atlas_core::{
9
9
collections:: IntervalTree ,
10
10
features:: { Feature , ReadFeaturesError } ,
11
11
} ;
12
- use noodles:: core:: Position ;
12
+ use indexmap:: IndexSet ;
13
+ use noodles:: { bam, core:: Position , sam} ;
13
14
use thiserror:: Error ;
14
15
use tracing:: info;
15
16
16
17
use crate :: cli:: quantify;
17
18
18
19
type Features = HashMap < String , Vec < Feature > > ;
19
- type IntervalTrees < ' f > = HashMap < String , IntervalTree < Position , & ' f str > > ;
20
+ type IntervalTrees < ' f > = Vec < IntervalTree < Position , & ' f str > > ;
20
21
21
22
pub fn quantify ( args : quantify:: Args ) -> Result < ( ) , QuantifyError > {
22
23
let annotations_src = & args. annotations ;
@@ -28,12 +29,25 @@ pub fn quantify(args: quantify::Args) -> Result<(), QuantifyError> {
28
29
feature_type, feature_id, "reading features"
29
30
) ;
30
31
31
- let features = read_features ( annotations_src, feature_type, feature_id) ?;
32
+ let ( reference_sequence_names, features) =
33
+ read_features ( annotations_src, feature_type, feature_id) ?;
34
+
35
+ let src = & args. src ;
36
+
37
+ info ! ( src = ?src, "reading alignment header" ) ;
38
+
39
+ let mut reader = bam:: io:: reader:: Builder . build_from_path ( src) ?;
40
+ let header = reader. read_header ( ) ?;
41
+
42
+ info ! (
43
+ reference_sequence_count = header. reference_sequences( ) . len( ) ,
44
+ "read alignment header"
45
+ ) ;
32
46
33
47
info ! ( feature_count = features. len( ) , "read features" ) ;
34
48
info ! ( "building interval trees" ) ;
35
49
36
- let interval_trees = build_interval_trees ( & features) ;
50
+ let interval_trees = build_interval_trees ( & header , & reference_sequence_names , & features) ;
37
51
38
52
info ! (
39
53
interval_tree_count = interval_trees. len( ) ,
@@ -51,36 +65,51 @@ pub enum QuantifyError {
51
65
InvalidFeatures ( #[ from] ReadFeaturesError ) ,
52
66
}
53
67
54
- fn read_features < P > ( src : P , feature_type : & str , feature_id : & str ) -> Result < Features , QuantifyError >
68
+ fn read_features < P > (
69
+ src : P ,
70
+ feature_type : & str ,
71
+ feature_id : & str ,
72
+ ) -> Result < ( IndexSet < String > , Features ) , QuantifyError >
55
73
where
56
74
P : AsRef < Path > ,
57
75
{
58
76
use atlas_core:: features:: read_features;
59
77
60
78
let mut reader = File :: open ( src) . map ( BufReader :: new) ?;
61
- let features = read_features ( & mut reader, feature_type, feature_id) ?;
62
- Ok ( features)
79
+ let ( reference_sequence_names, features) =
80
+ read_features ( & mut reader, feature_type, feature_id) ?;
81
+ Ok ( ( reference_sequence_names, features) )
63
82
}
64
83
65
- fn build_interval_trees ( features : & Features ) -> IntervalTrees < ' _ > {
66
- let mut interval_trees = IntervalTrees :: default ( ) ;
84
+ fn build_interval_trees < ' f > (
85
+ header : & sam:: Header ,
86
+ reference_sequence_names : & IndexSet < String > ,
87
+ features : & ' f Features ,
88
+ ) -> IntervalTrees < ' f > {
89
+ let reference_sequences = header. reference_sequences ( ) ;
90
+
91
+ let mut interval_trees = Vec :: new ( ) ;
92
+ interval_trees. resize_with ( reference_sequences. len ( ) , IntervalTree :: default) ;
67
93
68
94
for ( name, segments) in features {
69
95
for feature in segments {
70
- let reference_sequence_name = & feature. reference_sequence_name ;
71
-
72
- let tree = if let Some ( tree ) = interval_trees . get_mut ( reference_sequence_name ) {
73
- tree
74
- } else {
75
- interval_trees
76
- . entry ( reference_sequence_name . into ( ) )
77
- . or_default ( )
96
+ let reference_sequenceid = feature. reference_sequence_id ;
97
+ let reference_sequence_name = reference_sequence_names
98
+ . get_index ( reference_sequenceid )
99
+ . unwrap ( ) ;
100
+
101
+ let Some ( i ) = reference_sequences . get_index_of ( reference_sequence_name . as_bytes ( ) )
102
+ else {
103
+ continue ;
78
104
} ;
79
105
106
+ // SAFETY: `interval_trees.len() == reference_sequences.len()`.
107
+ let tree = & mut interval_trees[ i] ;
108
+
80
109
let start = feature. start ;
81
110
let end = feature. end ;
82
111
83
- tree. insert ( start..=end, name)
112
+ tree. insert ( start..=end, name. as_str ( ) )
84
113
}
85
114
}
86
115
0 commit comments