Skip to content

Commit 1541fe2

Browse files
nbollistrishorts
andauthored
Averagine Generalization (#883)
* Built Averagtide * added averatide to decon structure * Broke out AverageResidue into its specific classes. * Injected AverageResidue into decon params and algorithm * Finished parameter injection and did some testing. * Changed naming to be rna specific * Cleanup * Comments. * Averatide rename --------- Co-authored-by: trishorts <mshort@chem.wisc.edu>
1 parent a01d497 commit 1541fe2

File tree

11 files changed

+184
-95
lines changed

11 files changed

+184
-95
lines changed

mzLib/MassSpectrometry/Deconvolution/Algorithms/ClassicDeconvolutionAlgorithm.cs

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -106,8 +106,8 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
106106
double testMostIntenseMass = candidateForMostIntensePeakMz.ToMass(chargeState);
107107

108108
//get the index of the theoretical isotopic envelope for an averagine model that's close in mass
109-
int massIndex = mostIntenseMasses.GetClosestIndex(testMostIntenseMass);
110-
109+
int massIndex = AverageResidueModel.GetMostIntenseMassIndex(testMostIntenseMass);
110+
111111
//create a list for each isotopic peak from this envelope. This is used to fine tune the monoisotopic mass and is populated in "FindIsotopicEnvelope"
112112
List<double> monoisotopicMassPredictions = new List<double>();
113113

@@ -166,16 +166,16 @@ internal override IEnumerable<IsotopicEnvelope> Deconvolute(MzSpectrum spectrumT
166166

167167
private IsotopicEnvelope FindIsotopicEnvelope(int massIndex, double candidateForMostIntensePeakMz, double candidateForMostIntensePeakIntensity, double testMostIntenseMass, int chargeState, double deconvolutionTolerancePpm, double intensityRatioLimit, List<double> monoisotopicMassPredictions)
168168
{
169-
double[] theoreticalMasses = allMasses[massIndex];
170-
double[] theoreticalIntensities = allIntensities[massIndex];
169+
double[] theoreticalMasses = AverageResidueModel.GetAllTheoreticalMasses(massIndex);
170+
double[] theoreticalIntensities = AverageResidueModel.GetAllTheoreticalIntensities(massIndex);
171171
//add "most intense peak"
172172
var listOfObservedPeaks = new List<(double, double)> { (candidateForMostIntensePeakMz, candidateForMostIntensePeakIntensity) };
173173
var listOfRatios = new List<double> { theoreticalIntensities[0] / candidateForMostIntensePeakIntensity }; // theoreticalIntensities and theoreticalMasses are sorted by intensity, so first is most intense
174174
// Assuming the test peak is most intense...
175175
// Try to find the rest of the isotopes!
176176
double differenceBetweenTheorAndActualMass = testMostIntenseMass - theoreticalMasses[0]; //mass difference actual-theoretical for the tallest peak (not necessarily the monoisotopic)
177177
double totalIntensity = candidateForMostIntensePeakIntensity;
178-
double monoisotopicMass = testMostIntenseMass - diffToMonoisotopic[massIndex]; //get the monoisotopic by taking the most intense mass minus the expected mass difference between most intense and monoisotopic
178+
double monoisotopicMass = testMostIntenseMass - AverageResidueModel.GetDiffToMonoisotopic(massIndex); //get the monoisotopic by taking the most intense mass minus the expected mass difference between most intense and monoisotopic
179179
monoisotopicMassPredictions.Add(monoisotopicMass);
180180
for (int indexToLookAt = 1; indexToLookAt < theoreticalIntensities.Length; indexToLookAt++) //cycle through all theoretical peaks in this envelope from most intense to least intense
181181
{

mzLib/MassSpectrometry/Deconvolution/Algorithms/DeconvolutionAlgorithm.cs

Lines changed: 3 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using Chemistry;
1+
using System.Collections.Generic;
52
using MzLibUtil;
63

74
namespace MassSpectrometry
@@ -11,57 +8,7 @@ namespace MassSpectrometry
118
/// </summary>
129
public abstract class DeconvolutionAlgorithm
1310
{
14-
// For ClassicDeconv. If not used elsewhere, move to that class
15-
#region Averagine Stuff
16-
17-
protected const int numAveraginesToGenerate = 1500;
18-
protected static readonly double[][] allMasses = new double[numAveraginesToGenerate][];
19-
protected static readonly double[][] allIntensities = new double[numAveraginesToGenerate][];
20-
protected static readonly double[] mostIntenseMasses = new double[numAveraginesToGenerate];
21-
protected static readonly double[] diffToMonoisotopic = new double[numAveraginesToGenerate];
22-
23-
static DeconvolutionAlgorithm()
24-
{
25-
// AVERAGINE
26-
const double averageC = 4.9384;
27-
const double averageH = 7.7583;
28-
const double averageO = 1.4773;
29-
const double averageN = 1.3577;
30-
const double averageS = 0.0417;
31-
32-
const double fineRes = 0.125;
33-
const double minRes = 1e-8;
34-
35-
for (int i = 0; i < numAveraginesToGenerate; i++)
36-
{
37-
double averagineMultiplier = (i + 1) / 2.0;
38-
//Console.Write("numAveragines = " + numAveragines);
39-
ChemicalFormula chemicalFormula = new ChemicalFormula();
40-
chemicalFormula.Add("C", Convert.ToInt32(averageC * averagineMultiplier));
41-
chemicalFormula.Add("H", Convert.ToInt32(averageH * averagineMultiplier));
42-
chemicalFormula.Add("O", Convert.ToInt32(averageO * averagineMultiplier));
43-
chemicalFormula.Add("N", Convert.ToInt32(averageN * averagineMultiplier));
44-
chemicalFormula.Add("S", Convert.ToInt32(averageS * averagineMultiplier));
45-
46-
{
47-
var chemicalFormulaReg = chemicalFormula;
48-
IsotopicDistribution ye = IsotopicDistribution.GetDistribution(chemicalFormulaReg, fineRes, minRes);
49-
var masses = ye.Masses.ToArray();
50-
var intensities = ye.Intensities.ToArray();
51-
Array.Sort(intensities, masses);
52-
Array.Reverse(intensities);
53-
Array.Reverse(masses);
54-
55-
mostIntenseMasses[i] = masses[0];
56-
diffToMonoisotopic[i] = masses[0] - chemicalFormulaReg.MonoisotopicMass;
57-
allMasses[i] = masses;
58-
allIntensities[i] = intensities;
59-
}
60-
}
61-
}
62-
63-
#endregion
64-
11+
protected readonly AverageResidue AverageResidueModel;
6512
protected readonly DeconvolutionParameters DeconvolutionParameters;
6613

6714
/// <summary>
@@ -72,6 +19,7 @@ static DeconvolutionAlgorithm()
7219
protected DeconvolutionAlgorithm(DeconvolutionParameters deconParameters)
7320
{
7421
DeconvolutionParameters = deconParameters;
22+
AverageResidueModel = deconParameters.AverageResidueModel;
7523
}
7624

7725
/// <summary>
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
namespace MassSpectrometry;
2+
3+
/// <summary>
4+
/// Represents the average residue and is used for most abundant isotopic peak to monoisotopic peak difference
5+
/// </summary>
6+
/// <remarks>CAREFUL: This is called a lot during <see cref="ClassicDeconvolutionAlgorithm"/> and care should be taken to precalculate as many values as possible when implementing a new type</remarks>
7+
public abstract class AverageResidue
8+
{
9+
protected const double FineRes = 0.125;
10+
protected const double MinRes = 1e-8;
11+
protected static readonly int NumAveraginesToGenerate = 1500;
12+
public abstract int GetMostIntenseMassIndex(double testMass);
13+
public abstract double[] GetAllTheoreticalMasses(int index);
14+
public abstract double[] GetAllTheoreticalIntensities(int index);
15+
public abstract double GetDiffToMonoisotopic(int index);
16+
}
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
using Chemistry;
2+
using System;
3+
using System.Linq;
4+
5+
namespace MassSpectrometry;
6+
7+
/// <summary>
8+
/// Represents the average Amino Acid and is used for most abundant isotopic peak to monoisotopic peak difference.
9+
/// </summary>
10+
/// <remarks>All instance methods return a reference to its static precalculated values</remarks>
11+
public sealed class Averagine : AverageResidue
12+
{
13+
public static readonly double[][] AllMasses = new double[NumAveraginesToGenerate][];
14+
public static readonly double[][] AllIntensities = new double[NumAveraginesToGenerate][];
15+
public static readonly double[] MostIntenseMasses = new double[NumAveraginesToGenerate];
16+
public static readonly double[] DiffToMonoisotopic = new double[NumAveraginesToGenerate];
17+
public override int GetMostIntenseMassIndex(double testMass) => MostIntenseMasses.GetClosestIndex(testMass);
18+
public override double[] GetAllTheoreticalMasses(int index) => AllMasses[index];
19+
public override double[] GetAllTheoreticalIntensities(int index) => AllIntensities[index];
20+
public override double GetDiffToMonoisotopic(int index) => DiffToMonoisotopic[index];
21+
static Averagine()
22+
{
23+
// Magic numbers determined by https://pmc.ncbi.nlm.nih.gov/articles/PMC6166224/
24+
double averageC = 4.9384;
25+
double averageH = 7.7583;
26+
double averageO = 1.4773;
27+
double averageN = 1.3577;
28+
double averageS = 0.0417;
29+
30+
for (int i = 0; i < NumAveraginesToGenerate; i++)
31+
{
32+
double averagineMultiplier = (i + 1) / 2.0;
33+
//Console.Write("numAveragines = " + numAveragines);
34+
ChemicalFormula chemicalFormula = new ChemicalFormula();
35+
chemicalFormula.Add("C", Convert.ToInt32(averageC * averagineMultiplier));
36+
chemicalFormula.Add("H", Convert.ToInt32(averageH * averagineMultiplier));
37+
chemicalFormula.Add("O", Convert.ToInt32(averageO * averagineMultiplier));
38+
chemicalFormula.Add("N", Convert.ToInt32(averageN * averagineMultiplier));
39+
chemicalFormula.Add("S", Convert.ToInt32(averageS * averagineMultiplier));
40+
41+
var chemicalFormulaReg = chemicalFormula;
42+
IsotopicDistribution ye = IsotopicDistribution.GetDistribution(chemicalFormulaReg, FineRes, MinRes);
43+
var masses = ye.Masses.ToArray();
44+
var intensities = ye.Intensities.ToArray();
45+
Array.Sort(intensities, masses);
46+
Array.Reverse(intensities);
47+
Array.Reverse(masses);
48+
49+
MostIntenseMasses[i] = masses[0];
50+
DiffToMonoisotopic[i] = masses[0] - chemicalFormulaReg.MonoisotopicMass;
51+
AllMasses[i] = masses;
52+
AllIntensities[i] = intensities;
53+
}
54+
}
55+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
using System;
2+
using System.Linq;
3+
using Chemistry;
4+
5+
namespace MassSpectrometry;
6+
7+
/// <summary>
8+
/// Represents the average RNA nucleotide and is used for most abundant isotopic peak to monoisotopic peak difference
9+
/// </summary>
10+
/// <remarks>All instance methods return a reference to its static precalculated values</remarks>
11+
public sealed class OxyriboAveragine : AverageResidue
12+
{
13+
public static readonly double[][] AllMasses = new double[NumAveraginesToGenerate][];
14+
public static readonly double[][] AllIntensities = new double[NumAveraginesToGenerate][];
15+
public static readonly double[] MostIntenseMasses = new double[NumAveraginesToGenerate];
16+
public static readonly double[] DiffToMonoisotopic = new double[NumAveraginesToGenerate];
17+
public override int GetMostIntenseMassIndex(double testMass) => MostIntenseMasses.GetClosestIndex(testMass);
18+
19+
public override double[] GetAllTheoreticalMasses(int index) => AllMasses[index];
20+
21+
public override double[] GetAllTheoreticalIntensities(int index) => AllIntensities[index];
22+
23+
public override double GetDiffToMonoisotopic(int index) => DiffToMonoisotopic[index];
24+
static OxyriboAveragine()
25+
{
26+
// Magic numbers determined by counting atoms in the main 4 canonical RNA bases.
27+
// This is not the best approach and future work should refine these numbers.
28+
// One possible approach is to also incorporate the residue frequency in RNA sequences.
29+
30+
double averageC = 9.5;
31+
double averageH = 12.75;
32+
double averageO = 3.75;
33+
double averageN = 5;
34+
35+
for (int i = 0; i < NumAveraginesToGenerate; i++)
36+
{
37+
double averagineMultiplier = (i + 1) / 4.0;
38+
ChemicalFormula chemicalFormula = new ChemicalFormula();
39+
chemicalFormula.Add("C", Convert.ToInt32(averageC * averagineMultiplier));
40+
chemicalFormula.Add("H", Convert.ToInt32(averageH * averagineMultiplier));
41+
chemicalFormula.Add("O", Convert.ToInt32(averageO * averagineMultiplier));
42+
chemicalFormula.Add("N", Convert.ToInt32(averageN * averagineMultiplier));
43+
44+
{
45+
var chemicalFormulaReg = chemicalFormula;
46+
IsotopicDistribution ye = IsotopicDistribution.GetDistribution(chemicalFormulaReg, FineRes, MinRes);
47+
var masses = ye.Masses.ToArray();
48+
var intensities = ye.Intensities.ToArray();
49+
Array.Sort(intensities, masses);
50+
Array.Reverse(intensities);
51+
Array.Reverse(masses);
52+
53+
MostIntenseMasses[i] = masses[0];
54+
DiffToMonoisotopic[i] = masses[0] - chemicalFormulaReg.MonoisotopicMass;
55+
AllMasses[i] = masses;
56+
AllIntensities[i] = intensities;
57+
}
58+
}
59+
}
60+
}

mzLib/MassSpectrometry/Deconvolution/DeconvolutionType.cs

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
6-
7-
namespace MassSpectrometry
1+
namespace MassSpectrometry
82
{
93
public enum DeconvolutionType
104
{

mzLib/MassSpectrometry/Deconvolution/Parameters/ClassicDeconvolutionParameters.cs

Lines changed: 3 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
6-
using MzLibUtil;
1+
#nullable enable
72

83
namespace MassSpectrometry
94
{
@@ -19,13 +14,8 @@ public class ClassicDeconvolutionParameters : DeconvolutionParameters
1914
/// <summary>
2015
/// Construct Classic deconvolution parameters
2116
/// </summary>
22-
/// <param name="minCharge"></param>
23-
/// <param name="maxCharge"></param>
24-
/// <param name="deconPpm"></param>
25-
/// <param name="intensityRatio"></param>
26-
/// <param name="range">Isolation range of the scan to be deconvoluted</param>
27-
public ClassicDeconvolutionParameters(int minCharge, int maxCharge, double deconPpm, double intensityRatio, Polarity polarity = Polarity.Positive)
28-
: base(minCharge, maxCharge, polarity)
17+
public ClassicDeconvolutionParameters(int minCharge, int maxCharge, double deconPpm, double intensityRatio, Polarity polarity = Polarity.Positive, AverageResidue? averageResidueModel = null)
18+
: base(minCharge, maxCharge, polarity, averageResidueModel)
2919
{
3020
IntensityRatioLimit = intensityRatio;
3121
DeconvolutionTolerancePpm = deconPpm;

mzLib/MassSpectrometry/Deconvolution/Parameters/DeconvolutionParameters.cs

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,4 @@
1-
using System;
2-
using System.Collections.Generic;
3-
using System.Linq;
4-
using System.Text;
5-
using System.Threading.Tasks;
6-
using MzLibUtil;
7-
1+
#nullable enable
82
namespace MassSpectrometry
93
{
104
/// <summary>
@@ -16,15 +10,17 @@ public abstract class DeconvolutionParameters
1610
public int MinAssumedChargeState { get; set; }
1711
public int MaxAssumedChargeState { get; set; }
1812
public Polarity Polarity { get; set; }
13+
public AverageResidue AverageResidueModel { get; set; }
1914

2015
/// <summary>
2116
/// Constructor should initialize all fields that are used by every deconvolution algorithm
2217
/// </summary>
23-
public DeconvolutionParameters(int minCharge, int maxCharge, Polarity polarity = Polarity.Positive)
18+
protected DeconvolutionParameters(int minCharge, int maxCharge, Polarity polarity = Polarity.Positive, AverageResidue? averageResidueModel = null)
2419
{
2520
MinAssumedChargeState = minCharge;
2621
MaxAssumedChargeState = maxCharge;
2722
Polarity = polarity;
23+
AverageResidueModel = averageResidueModel ?? new Averagine(); // Default to Averagine
2824
}
2925
}
3026
}

mzLib/MassSpectrometry/Deconvolution/Parameters/IsoDecDeconvolutionParameters.cs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Runtime.InteropServices;
1+
#nullable enable
2+
using System.Runtime.InteropServices;
23

34
namespace MassSpectrometry;
45
public class IsoDecDeconvolutionParameters : DeconvolutionParameters
@@ -162,7 +163,7 @@ internal IsoSettings ToIsoSettings()
162163
/// <summary>
163164
/// Mass difference between isotopes
164165
/// </summary>
165-
public double MassDiffC { get; protected set; } = 1.0033;
166+
public double MassDiffC { get; protected set; } = 1.0033; // TODO: Determine if this should be adjusted dynamically by <see cref="AverageResidueModel"/> deconvolution of alternative data types.
166167

167168
/// <summary>
168169
/// Adduct Mass
@@ -196,8 +197,9 @@ public IsoDecDeconvolutionParameters(
196197
float[] mzWindow = null,
197198
int knockdownRounds = 5,
198199
float minAreaCovered = (float)0.20,
199-
float relativeDataThreshold = (float)0.05)
200-
: base(1, 50, polarity)
200+
float relativeDataThreshold = (float)0.05,
201+
AverageResidue? averageResidueModel = null)
202+
: base(1, 50, polarity, averageResidueModel) // average residue model is currently unused for setting any parameters in IsoDec.
201203
{
202204
PhaseRes = phaseRes;
203205
ReportMulitpleMonoisos = reportMultipleMonoisos;

mzLib/Test/TestDeconvolution.cs

Lines changed: 30 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ namespace Test
2121
[ExcludeFromCodeCoverage]
2222
public sealed class TestDeconvolution
2323
{
24-
2524
#region Old Deconvolution
2625

2726
[Test]
@@ -218,7 +217,7 @@ public static void CheckClassicGetMostAbundantObservedIsotopicMass(string peptid
218217
[TestCase(473.05, -4, 1896.26)] // GUAGUC +Na -H -4
219218
[TestCase(631.07, -3, 1896.26)] // GUAGUC +Na -H -3
220219
[TestCase(947.121, -2, 1896.26)] // GUAGUC +Na -H -2
221-
public void TestNegativeModeClassicDeconvolution(double expectedMz, int expectedCharge, double expectedMonoMass)
220+
public void TestNegativeModeClassicDeconvolution_Averagine(double expectedMz, int expectedCharge, double expectedMonoMass)
222221
{
223222
// get scan
224223
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles",
@@ -239,6 +238,35 @@ public void TestNegativeModeClassicDeconvolution(double expectedMz, int expected
239238
Assert.That(expectedCharge, Is.EqualTo(resultsWithPeakOfInterest.Charge));
240239
}
241240

241+
[Test]
242+
[TestCase(373.85, -5, 1874.28)] // GUAGUC -5
243+
[TestCase(467.57, -4, 1874.28)] // GUAGUC -4
244+
[TestCase(623.75, -3, 1874.28)] // GUAGUC -3
245+
[TestCase(936.13, -2, 1874.28)] // GUAGUC -2
246+
[TestCase(473.05, -4, 1896.26)] // GUAGUC +Na -H -4
247+
[TestCase(631.07, -3, 1896.26)] // GUAGUC +Na -H -3
248+
[TestCase(947.121, -2, 1896.26)] // GUAGUC +Na -H -2
249+
public void TestNegativeModeClassicDeconvolution_Averagtide(double expectedMz, int expectedCharge, double expectedMonoMass)
250+
{
251+
// get scan
252+
string filePath = Path.Combine(TestContext.CurrentContext.TestDirectory, "DataFiles",
253+
"GUACUG_NegativeMode_Sliced.mzML");
254+
var scan = MsDataFileReader.GetDataFile(filePath).GetAllScansList().First();
255+
var tolerance = new PpmTolerance(20);
256+
257+
// set up deconvolution
258+
DeconvolutionParameters deconParams = new ClassicDeconvolutionParameters(-10, -1, 20, 3, Polarity.Negative, new OxyriboAveragine());
259+
260+
List<IsotopicEnvelope> deconvolutionResults = Deconvoluter.Deconvolute(scan, deconParams).ToList();
261+
// ensure each expected result is found, with correct mz, charge, and monoisotopic mass
262+
var resultsWithPeakOfInterest = deconvolutionResults.FirstOrDefault(envelope =>
263+
envelope.Peaks.Any(peak => tolerance.Within(peak.mz, expectedMz)));
264+
if (resultsWithPeakOfInterest is null) Assert.Fail();
265+
266+
Assert.That(expectedMonoMass, Is.EqualTo(resultsWithPeakOfInterest.MonoisotopicMass).Within(0.01));
267+
Assert.That(expectedCharge, Is.EqualTo(resultsWithPeakOfInterest.Charge));
268+
}
269+
242270
#endregion
243271

244272
#region IsoDec Deconvolution

0 commit comments

Comments
 (0)