@@ -7,14 +7,11 @@ use rayon::prelude::*;
77
88use crate :: types:: annotation:: { AnnotatedMS2Spectrum , FragmentAnnotation } ;
99use crate :: types:: ms2_spectrum:: MS2Spectrum ;
10- use crate :: utils:: parse_fragment ;
10+ use crate :: utils:: { build_theoretical_fragments , search_sorted_mz } ;
1111
12- use ordered_float:: OrderedFloat ;
1312use rustyms:: annotation:: model:: FragmentationModel ;
14- use rustyms:: annotation:: AnnotatableSpectrum ;
1513use rustyms:: chemistry:: MassMode ;
1614use rustyms:: prelude:: CompoundPeptidoformIon ;
17- use rustyms:: spectrum:: { PeakSpectrum , RawPeak , RawSpectrum } ;
1815use rustyms:: system:: f64:: MassOverCharge ;
1916use rustyms:: system:: mass_over_charge:: thomson;
2017
@@ -58,21 +55,19 @@ fn parse_tolerance(
5855
5956/// Annotate MS2 spectra with theoretical fragment ions.
6057#[ pyfunction]
61- #[ allow( clippy:: too_many_arguments) ]
6258pub fn annotate_ms2_spectra (
6359 py : Python < ' _ > ,
6460 spectra : Vec < Py < MS2Spectrum > > ,
6561 proformas : Vec < String > ,
66- seq_lens : Vec < usize > ,
6762 fragmentation_model : String ,
6863 mass_mode : String ,
6964 tolerance_value : f64 ,
7065 tolerance_mode : String ,
7166) -> PyResult < Vec < AnnotatedMS2Spectrum > > {
7267 let n = spectra. len ( ) ;
73- if proformas. len ( ) != n || seq_lens . len ( ) != n {
68+ if proformas. len ( ) != n {
7469 return Err ( PyException :: new_err (
75- "Input arrays must have identical length: spectra, proformas, seq_lens " ,
70+ "Input arrays must have identical length: spectra, proformas" ,
7671 ) ) ;
7772 }
7873
@@ -83,7 +78,6 @@ pub fn annotate_ms2_spectra(
8378 mz_f32 : Vec < f32 > ,
8479 intensity_f32 : Vec < f32 > ,
8580 precursor : Option < crate :: types:: precursor:: Precursor > ,
86- seq_len : usize ,
8781 precursor_charge : i32 ,
8882 proforma : String ,
8983 }
@@ -106,58 +100,77 @@ pub fn annotate_ms2_spectra(
106100 let spec_ref = spectra[ i] . bind ( py) ;
107101 let spec = spec_ref. borrow ( ) ;
108102
103+ let proforma = proformas[ i]
104+ . split ( '/' )
105+ . next ( )
106+ . unwrap_or ( & proformas[ i] )
107+ . to_string ( ) ;
108+
109109 owned. push ( OwnedSpec {
110110 id : spec. identifier . clone ( ) ,
111111 mz_f32 : spec. mz . clone ( ) ,
112112 intensity_f32 : spec. intensity . clone ( ) ,
113113 precursor : spec. precursor . clone ( ) ,
114- seq_len : seq_lens[ i] ,
115114 precursor_charge : spec
116115 . precursor
117116 . as_ref ( )
118117 . map ( |p| p. charge as i32 )
119118 . unwrap_or ( 0 ) ,
120- proforma : proformas[ i]
121- . split ( '/' )
122- . next ( )
123- . unwrap_or ( & proformas[ i] )
124- . to_string ( ) ,
119+ proforma,
125120 } ) ;
126121 }
127122
128123 let model = parse_fragmentation_model ( & fragmentation_model) ?;
129124 let mode = parse_mass_mode ( & mass_mode) ?;
130125 let tolerance = parse_tolerance ( tolerance_value, & tolerance_mode) ?;
131- let params = rustyms:: annotation:: model:: MatchingParameters :: default ( ) . tolerance ( tolerance) ;
132-
133- // Precompute theoretical fragments and parsed peptides per unique peptide+charge
134- type CacheEntry = ( CompoundPeptidoformIon , Vec < rustyms:: fragment:: Fragment > ) ;
135-
136- let mut frag_cache: HashMap < ( String , i32 ) , Arc < Option < CacheEntry > > > = HashMap :: new ( ) ;
137- for item in & owned {
138- let key = ( item. proforma . clone ( ) , item. precursor_charge ) ;
139- if item. precursor_charge <= 0 || frag_cache. contains_key ( & key) {
140- continue ;
141- }
142-
143- let entry = CompoundPeptidoformIon :: pro_forma ( & item. proforma , None )
144- . ok ( )
145- . map ( |peptide| {
146- let frag_charge = rustyms:: system:: isize:: Charge :: new :: < rustyms:: system:: e > (
147- item. precursor_charge as isize ,
148- ) ;
149- let frags = peptide. generate_theoretical_fragments ( frag_charge, & model) ;
150- ( peptide, frags)
151- } ) ;
152-
153- frag_cache. insert ( key, Arc :: new ( entry) ) ;
126+ // Precompute theoretical fragments per unique peptidoform+charge
127+ struct CacheEntry {
128+ fragments : Vec < crate :: utils:: CachedFragment > ,
129+ seq_len : usize ,
154130 }
131+ type FragCache = Arc < HashMap < ( String , i32 ) , Arc < Option < CacheEntry > > > > ;
132+
133+ let unique_keys: Vec < ( String , i32 ) > = {
134+ let mut seen = HashMap :: new ( ) ;
135+ for item in & owned {
136+ if item. precursor_charge > 0 {
137+ seen. entry ( ( item. proforma . clone ( ) , item. precursor_charge ) )
138+ . or_insert ( ( ) ) ;
139+ }
140+ }
141+ seen. into_keys ( ) . collect ( )
142+ } ;
155143
156- let frag_cache = Arc :: new ( frag_cache) ;
157- let params = Arc :: new ( params) ;
158-
159- // Release GIL and parallelize
144+ // Release GIL and parallelize both cache building and annotation
160145 let results: Result < Vec < AnnotatedMS2Spectrum > , String > = py. detach ( || {
146+ let frag_cache: FragCache = Arc :: new (
147+ unique_keys
148+ . into_par_iter ( )
149+ . map ( |( proforma, charge) | {
150+ let entry = CompoundPeptidoformIon :: pro_forma ( & proforma, None )
151+ . ok ( )
152+ . map ( |peptidoform| {
153+ let seq_len = peptidoform
154+ . peptidoforms ( )
155+ . next ( )
156+ . map ( |pf| pf. sequence ( ) . len ( ) )
157+ . unwrap_or ( 0 ) ;
158+ let frag_charge =
159+ rustyms:: system:: isize:: Charge :: new :: < rustyms:: system:: e > (
160+ charge as isize ,
161+ ) ;
162+ let fragments =
163+ build_theoretical_fragments ( & peptidoform, frag_charge, & model, mode) ;
164+ CacheEntry {
165+ fragments,
166+ seq_len,
167+ }
168+ } ) ;
169+ ( ( proforma, charge) , Arc :: new ( entry) )
170+ } )
171+ . collect ( ) ,
172+ ) ;
173+
161174 owned
162175 . into_par_iter ( )
163176 . map ( |item| {
@@ -168,53 +181,36 @@ pub fn annotate_ms2_spectra(
168181 ) ) ;
169182 }
170183
171- if item. mz_f32 . is_empty ( ) || item. seq_len == 0 || item . precursor_charge <= 0 {
184+ if item. mz_f32 . is_empty ( ) || item. precursor_charge <= 0 {
172185 return Ok ( item. empty_annotated ( ) ) ;
173186 }
174187
175188 let key = ( item. proforma . clone ( ) , item. precursor_charge ) ;
176189 let cache_entry = frag_cache. get ( & key) . and_then ( |e| e. as_ref ( ) . as_ref ( ) ) ;
177190
178- let ( peptide , frags ) = match cache_entry {
179- Some ( ( p , f ) ) if !f . is_empty ( ) => ( p , f ) ,
191+ let entry = match cache_entry {
192+ Some ( e ) if e . seq_len > 0 && !e . fragments . is_empty ( ) => e ,
180193 _ => return Ok ( item. empty_annotated ( ) ) ,
181194 } ;
182195
183- // Build RawSpectrum from f32 data (convert to f64 in-place, no pre-stored copy)
184- let mut spectrum = RawSpectrum :: default ( ) ;
185- spectrum. title = item. id . clone ( ) ;
186- spectrum. num_scans = 1 ;
187-
188- let peaks: Vec < RawPeak > = item
189- . mz_f32
190- . iter ( )
191- . zip ( item. intensity_f32 . iter ( ) )
192- . map ( |( & mz, & inten) | RawPeak {
193- mz : MassOverCharge :: new :: < thomson > ( mz as f64 ) ,
194- intensity : OrderedFloat ( inten as f64 ) ,
195- } )
196- . collect ( ) ;
197-
198- spectrum. extend ( peaks) ;
199-
200- let annotated = spectrum. annotate ( peptide. clone ( ) , frags. as_slice ( ) , & params, mode) ;
201-
202- let peak_annotations: Vec < Vec < FragmentAnnotation > > = annotated
203- . spectrum ( )
204- . map ( |peak| {
205- peak. annotation
206- . iter ( )
207- . filter_map ( |frag| {
208- let ( series, position, charge) = parse_fragment ( frag) ?;
209- Some ( FragmentAnnotation {
210- series : series. to_string ( ) ,
211- position,
212- charge,
213- } )
214- } )
215- . collect ( )
216- } )
217- . collect ( ) ;
196+ let mz_range = MassOverCharge :: new :: < thomson > ( 0.0 )
197+ ..=MassOverCharge :: new :: < thomson > ( f64:: MAX ) ;
198+ let mut peak_annotations = vec ! [ Vec :: new( ) ; item. mz_f32. len( ) ] ;
199+
200+ for frag in & entry. fragments {
201+ let fragment_mz = MassOverCharge :: new :: < thomson > ( frag. mz ) ;
202+ if !mz_range. contains ( & fragment_mz) {
203+ continue ;
204+ }
205+
206+ if let Some ( idx) = search_sorted_mz ( & item. mz_f32 , frag. mz , & tolerance) {
207+ peak_annotations[ idx] . push ( FragmentAnnotation {
208+ series : frag. series . to_string ( ) ,
209+ position : frag. position ,
210+ charge : frag. charge ,
211+ } ) ;
212+ }
213+ }
218214
219215 Ok ( AnnotatedMS2Spectrum {
220216 identifier : item. id ,
0 commit comments