11using Easy . Common . EasyComparer ;
2+ using MassSpectrometry ;
23using MathNet . Numerics . Distributions ;
34using MathNet . Numerics . Statistics ;
5+ using MzLibUtil ;
46using System ;
57using System . Collections . Generic ;
68using System . Data ;
79using System . Data . Entity . ModelConfiguration . Conventions ;
810using System . Linq ;
9- using MassSpectrometry ;
1011
1112namespace FlashLFQ
1213{
@@ -17,10 +18,11 @@ namespace FlashLFQ
1718 internal class MbrScorer
1819 {
1920 // Intensity and ppm distributions are specific to each acceptor file
20- private readonly Normal _logIntensityDistribution ;
21- private readonly Normal _ppmDistribution ;
22- private readonly Normal _scanCountDistribution ;
23- private readonly Gamma _isotopicCorrelationDistribution ;
21+ private Normal _logIntensityDistribution ;
22+ private Normal _ppmDistribution ;
23+ private Normal _scanCountDistribution ;
24+ private Gamma _isotopicCorrelationDistribution ;
25+
2426 // The logFcDistributions and rtDifference distributions are unique to each donor file - acceptor file pair
2527 private Dictionary < SpectraFileInfo , Normal > _logFcDistributionDictionary ;
2628 private Dictionary < SpectraFileInfo , Normal > _rtPredictionErrorDistributionDictionary ;
@@ -35,23 +37,69 @@ internal class MbrScorer
3537 /// </summary>
3638 internal MbrScorer (
3739 Dictionary < IIndexedPeak , ChromatographicPeak > apexToAcceptorFilePeakDict ,
38- List < ChromatographicPeak > acceptorFileMsmsPeaks ,
39- Normal ppmDistribution ,
40- Normal logIntensityDistribution )
40+ List < ChromatographicPeak > unambiguousAcceptorFilePeaks )
4141 {
4242 ApexToAcceptorFilePeakDict = apexToAcceptorFilePeakDict ;
43- UnambiguousMsMsAcceptorPeaks = acceptorFileMsmsPeaks . Where ( p => p . Apex != null && p . DetectionType != DetectionType . MBR && p . NumIdentificationsByFullSeq == 1 ) . ToList ( ) ;
44- MaxNumberOfScansObserved = acceptorFileMsmsPeaks . Max ( peak => peak . ScanCount ) ;
45- _logIntensityDistribution = logIntensityDistribution ;
46- _ppmDistribution = ppmDistribution ;
47- _isotopicCorrelationDistribution = GetIsotopicEnvelopeCorrDistribution ( ) ;
43+ UnambiguousMsMsAcceptorPeaks = unambiguousAcceptorFilePeaks ;
44+ MaxNumberOfScansObserved = unambiguousAcceptorFilePeaks . Max ( peak => peak . ScanCount ) ;
45+
46+ // Initialize the dictionaries that will hold the log fold change and RT prediction error distributions
47+ // for each donor file
4848 _logFcDistributionDictionary = new ( ) ;
4949 _rtPredictionErrorDistributionDictionary = new ( ) ;
50+ }
51+
52+ /// <summary>
53+ /// Constructs the distributions that are used to score MBR matches
54+ /// </summary>
55+ /// <returns>Returns true if the scorer was initialized successfully, false otherwise</returns>
56+ internal bool InitializeScorer ( )
57+ {
58+ if ( UnambiguousMsMsAcceptorPeaks . Count < 3 )
59+ return false ;
60+
61+ // Populate distributions for scoring MBR matches
62+ _logIntensityDistribution = GetLogIntensityDistribution ( ) ;
63+ _ppmDistribution = GetPpmErrorDistribution ( ) ;
64+ _isotopicCorrelationDistribution = GetIsotopicEnvelopeCorrDistribution ( ) ;
65+ _scanCountDistribution = GetScanCountDistribution ( ) ;
66+
67+ return IsValid ( ) ;
68+ }
69+
70+ private Normal GetPpmErrorDistribution ( )
71+ {
72+ // Construct a distribution of ppm errors for all MSMS peaks in the acceptor file
73+ List < double > ppmErrors = UnambiguousMsMsAcceptorPeaks . Select ( p => p . MassError ) . Where ( e => ! double . IsNaN ( e ) ) . ToList ( ) ;
74+ if ( ppmErrors . Count < 2 )
75+ return null ;
76+ double ppmSpread = ppmErrors . Count > 30 ? ppmErrors . InterquartileRange ( ) / 1.36 : ppmErrors . StandardDeviation ( ) ;
77+ Normal ppmDistribution = new Normal ( ppmErrors . Median ( ) , ppmSpread ) ;
78+ return ppmDistribution ;
79+ }
80+
81+ private Normal GetLogIntensityDistribution ( )
82+ {
83+ var logIntensities = UnambiguousMsMsAcceptorPeaks
84+ . Where ( p => p . Intensity > 0 )
85+ . Select ( p => Math . Log ( p . Intensity , 2 ) )
86+ . ToList ( ) ;
87+
88+ if ( logIntensities . Count < 2 )
89+ return null ;
90+
91+ double mean = logIntensities . Median ( ) ;
92+ double stdDev = logIntensities . InterquartileRange ( ) / 1.36 ;
93+ return new Normal ( mean , stdDev ) ;
94+ }
5095
51- // This is kludgey, because scan counts are discrete
96+ // This is kludgey, because scan counts are discrete
97+ private Normal GetScanCountDistribution ( )
98+ {
5299 List < double > scanList = UnambiguousMsMsAcceptorPeaks . Select ( peak => ( double ) peak . ScanCount ) . ToList ( ) ;
100+
53101 // build a normal distribution for the scan list of the acceptor peaks
54- _scanCountDistribution = new Normal ( scanList . Average ( ) , scanList . Count > 30 ? scanList . StandardDeviation ( ) : scanList . InterquartileRange ( ) / 1.36 ) ;
102+ return new Normal ( scanList . Average ( ) , scanList . Count > 30 ? scanList . StandardDeviation ( ) : scanList . InterquartileRange ( ) / 1.36 ) ;
55103 }
56104
57105 /// <summary>
@@ -61,11 +109,13 @@ internal MbrScorer(
61109 private Gamma GetIsotopicEnvelopeCorrDistribution ( )
62110 {
63111 var pearsonCorrs = UnambiguousMsMsAcceptorPeaks . Select ( p => 1 - p . IsotopicPearsonCorrelation ) . Where ( p => p > 0 ) . ToList ( ) ;
64- if ( pearsonCorrs . Count <= 1 ) return null ;
112+ if ( pearsonCorrs . Count < 2 ) return null ;
65113 double mean = pearsonCorrs . Mean ( ) ;
66114 double variance = pearsonCorrs . Variance ( ) ;
67115 var alpha = Math . Pow ( mean , 2 ) / variance ;
68116 var beta = mean / variance ;
117+ if ( ! Gamma . IsValidParameterSet ( alpha , beta ) )
118+ return null ;
69119 return new Gamma ( alpha , beta ) ;
70120 }
71121
@@ -98,20 +148,17 @@ internal void AddRtPredErrorDistribution(SpectraFileInfo donorFile, List<double>
98148 rtPredictionErrors . Add ( avgDiff - anchorPeptideRtDiffs [ i ] ) ;
99149 }
100150
101- Normal rtPredictionErrorDist = new Normal ( 0 , 0 ) ;
102151 // Default distribution. Effectively assigns a RT Score of zero if no alignment can be performed
103152 // between the donor and acceptor based on shared MS/MS IDs
104-
105- if ( rtPredictionErrors . Any ( ) )
153+ Normal rtPredictionErrorDist = new Normal ( 0 , 0 ) ;
154+ if ( rtPredictionErrors . Count >= 2 )
106155 {
107156 double medianRtError = rtPredictionErrors . Median ( ) ;
108157 double stdDevRtError = rtPredictionErrors . StandardDeviation ( ) ;
109- if ( stdDevRtError >= 0.0 && ! double . IsNaN ( medianRtError ) )
110- {
158+ if ( Normal . IsValidParameterSet ( medianRtError , stdDevRtError ) )
111159 rtPredictionErrorDist = new Normal ( medianRtError , 1 ) ;
112- }
113160 }
114-
161+
115162 _rtPredictionErrorDistributionDictionary . Add ( donorFile , rtPredictionErrorDist ) ;
116163 }
117164
@@ -139,6 +186,14 @@ internal double ScoreMbr(MbrChromatographicPeak acceptorPeak, ChromatographicPea
139186 * acceptorPeak . IsotopicDistributionScore , 0.20 ) ;
140187 }
141188
189+ /// <summary>
190+ /// Returns the standard deviation of the Ppm error distribution + the median of the Ppm error distribution
191+ /// </summary>
192+ internal double GetPpmErrorTolerance ( )
193+ {
194+ return _ppmDistribution . StdDev * 4 + Math . Abs ( _ppmDistribution . Median ) ;
195+ }
196+
142197 // Setting a minimum score prevents the MBR score from going to zero if one component of that score is 0
143198 // 3e-7 is the fraction of a normal distribution that lies at least 5 stdDev away from the mean
144199 private double _minScore = 3e-7 ;
@@ -189,10 +244,6 @@ internal double CalculateIntensityScore(double acceptorIntensity, Chromatographi
189244 /// <param name="idDonorPeaks"> List of peaks in the donoro file. </param>
190245 internal void CalculateFoldChangeBetweenFiles ( List < ChromatographicPeak > idDonorPeaks )
191246 {
192-
193- var donorFileLogIntensities = idDonorPeaks . Where ( p => p . Intensity > 0 ) . Select ( p => Math . Log ( p . Intensity , 2 ) ) . ToList ( ) ;
194- double medianDonorLogIntensity = donorFileLogIntensities . Median ( ) ;
195-
196247 // Find the difference in peptide intensities between donor and acceptor files
197248 // this intensity score creates a conservative bias in MBR
198249 List < double > listOfFoldChangesBetweenTheFiles = new List < double > ( ) ;
@@ -244,7 +295,19 @@ internal bool IsValid(SpectraFileInfo donorFile)
244295 {
245296 return _rtPredictionErrorDistributionDictionary . TryGetValue ( donorFile , out var rtDist )
246297 && rtDist != null
247- && _ppmDistribution != null
298+ && IsValid ( ) ;
299+ }
300+
301+ /// <summary>
302+ /// This method checks whether the scorer is validly parameterized and capable of scoring MBR transfers
303+ /// Notably, it is indifferent to the isotopic correlation distribution being null, as a null isotopic distribution correlation
304+ /// results in all MBR transfers receiving the minimum score for the isotopic distribution component.
305+ /// This could be changed in the future, but currently multiple tests results in null isotopic distributions, and will break if they can't do MBR
306+ /// </summary>
307+ /// <returns></returns>
308+ internal bool IsValid ( )
309+ {
310+ return _ppmDistribution != null
248311 && _scanCountDistribution != null
249312 && _logIntensityDistribution != null ;
250313 }
0 commit comments