Skip to content

Commit f21583e

Browse files
trishortsMICHAEL SHORTREED
andauthored
Top down biomarker (#621)
* correct Within calculation * update unit tests * first correct top-down biomarker test * remove unused test code * quotable protease * unquotable * add full length proteoform proteolysis products for biomarker search with unit tests * new strategy for adding proteolysis products * more complete biomarker addition * works for protein.xml databases * add unit test biomarkers with xml database * clean up * more clean up * fix unit test * adjust biomarker generation for three initiator methionine behaviors (cleave, retain, variable) * make sure decoy biomarkers for standard xml proteolysis products are generated * deal appropriately w/ n-terminal methionine cleavage * last litle unit test tweaks * delete unused code * add biomarkers during protein database load * greater clarity for biomarker proteolysis product entries * change digest for top-down biomarker search to resolve issues in MM * bro. shouldn't have been that hard * cleanup * add unit test * don't need to cover unit tests with unit tests * increase unit test coverage of protein.cs * codemaid * biomarkers on protein with no methionine * unit test booster * ACs comments * more summary statemsn * deleted mzlib.sln scarry Co-authored-by: MICHAEL SHORTREED <mrshortreed@wisc.edu>
1 parent 3148152 commit f21583e

File tree

9 files changed

+148
-162
lines changed

9 files changed

+148
-162
lines changed

mzLib/Proteomics/Protein/Protein.cs

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ public Protein(string sequence, string accession, string organism = null, List<T
6464
DatabaseReferences = databaseReferences ?? new List<DatabaseReference>();
6565
DisulfideBonds = disulfideBonds ?? new List<DisulfideBond>();
6666
SpliceSites = spliceSites ?? new List<SpliceSite>();
67+
6768
if (addBiomarkers)
6869
{
6970
this.AddBiomarkers();
@@ -236,7 +237,7 @@ public string GetEnsemblFastaHeader()
236237
/// Gets peptides for digestion of a protein
237238
/// </summary>
238239
public IEnumerable<PeptideWithSetModifications> Digest(DigestionParams digestionParams, List<Modification> allKnownFixedModifications,
239-
List<Modification> variableModifications, List<SilacLabel> silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null)
240+
List<Modification> variableModifications, List<SilacLabel> silacLabels = null, (SilacLabel startLabel, SilacLabel endLabel)? turnoverLabels = null, bool topDownBiomarkerSearch = false)
240241
{
241242
//can't be null
242243
allKnownFixedModifications = allKnownFixedModifications ?? new List<Modification>();
@@ -252,7 +253,7 @@ public IEnumerable<PeptideWithSetModifications> Digest(DigestionParams digestion
252253
IEnumerable<ProteolyticPeptide> unmodifiedPeptides =
253254
searchModeType == CleavageSpecificity.Semi ?
254255
digestion.SpeedySemiSpecificDigestion(this) :
255-
digestion.Digestion(this);
256+
digestion.Digestion(this, topDownBiomarkerSearch);
256257

257258
if (digestionParams.KeepNGlycopeptide || digestionParams.KeepOGlycopeptide)
258259
{
@@ -717,18 +718,18 @@ public void AddBiomarkers(bool addFullProtein = true, bool addForEachOrigninalPr
717718
AddIntactProteoformToProteolysisProducts(initiatorMethionineBehavior, minProductBaseSequenceLength);
718719
if (addNterminalDigestionBiomarkers)
719720
{
720-
AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, true, false, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full proteoform biomarker");
721+
AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, true, false, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform N-terminal digestion biomarker");
721722
}
722723
if (addCterminalDigestionBiomarkers)
723724
{
724-
AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, false, true, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full proteoform biomarker");
725+
AddBiomarkersToProteolysisProducts(1, BaseSequence.Length, false, true, initiatorMethionineBehavior, minProductBaseSequenceLength, lengthOfProteolysis, "full-length proteoform C-terminal digestion biomarker");
725726
}
726727
}
727728

728729
if (addForEachOrigninalProteolysisProduct) // this does not include the original intact proteoform
729730
{
730731
RemoveMethionineWhenAppropriateFromExistingProduts(initiatorMethionineBehavior);
731-
List<ProteolysisProduct> existingProducts = ProteolysisProducts.Where(p => !p.Type.Contains("biomarker") && !p.Type.Contains("intact")).ToList();
732+
List<ProteolysisProduct> existingProducts = ProteolysisProducts.Where(p => !p.Type.Contains("biomarker") && !p.Type.Contains("full-length proteoform")).ToList();
732733
foreach (ProteolysisProduct product in existingProducts)
733734
{
734735
if (product.OneBasedBeginPosition.HasValue && product.OneBasedEndPosition.HasValue)
@@ -795,7 +796,7 @@ public void AddIntactProteoformToProteolysisProducts(InitiatorMethionineBehavior
795796
//when it's variable, we don't have to add anything here, we'll get an additonal proteolysis product later.
796797
if (BaseSequence.Length >= minProductBaseSequenceLength)
797798
{
798-
_proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "intact proteoform biomarker"));
799+
_proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "full-length proteoform"));
799800
}
800801
}
801802
else if (initiatorMethionineBehavior == InitiatorMethionineBehavior.Cleave)
@@ -804,14 +805,14 @@ public void AddIntactProteoformToProteolysisProducts(InitiatorMethionineBehavior
804805
{
805806
if (BaseSequence.Length - 1 >= minProductBaseSequenceLength)
806807
{
807-
_proteolysisProducts.Add(new ProteolysisProduct(2, BaseSequence.Length, "intact proteoform biomarker"));
808+
_proteolysisProducts.Add(new ProteolysisProduct(2, BaseSequence.Length, "full-length proteoform"));
808809
}
809810
}
810811
else
811812
{
812813
if (BaseSequence.Length >= minProductBaseSequenceLength)
813814
{
814-
_proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "intact proteoform biomarker"));
815+
_proteolysisProducts.Add(new ProteolysisProduct(1, BaseSequence.Length, "full-length proteoform"));
815816
}
816817
}
817818
}
@@ -822,16 +823,17 @@ public void AddIntactProteoformToProteolysisProducts(InitiatorMethionineBehavior
822823
/// This method allows for one missed cleavage between proteolysis products.
823824
/// </summary>
824825
/// <param name="minimumProductLength"></param>
826+
825827
public void CleaveOnceBetweenProteolysisProducts(int minimumProductLength = 7)
826828
{
827829
List<int> cleavagePostions = new();
828-
List<int> proteolysisProductEndPositions = _proteolysisProducts.Where(p => p.OneBasedEndPosition.HasValue).Select(p => p.OneBasedEndPosition.Value).ToList();
829-
830+
List<ProteolysisProduct> localProducts = _proteolysisProducts.Where(p => !p.Type.Contains("biomarker") && !p.Type.Contains("full-length proteoform")).ToList();
831+
List<int> proteolysisProductEndPositions = localProducts.Where(p => p.OneBasedEndPosition.HasValue).Select(p => p.OneBasedEndPosition.Value).ToList();
830832
if (proteolysisProductEndPositions.Count > 0)
831833
{
832834
foreach (int proteolysisProductEndPosition in proteolysisProductEndPositions)
833835
{
834-
if (_proteolysisProducts.Any(p => p.OneBasedBeginPosition == (proteolysisProductEndPosition + 1)))
836+
if (localProducts.Any(p => p.OneBasedBeginPosition == (proteolysisProductEndPosition + 1)))
835837
{
836838
cleavagePostions.Add(proteolysisProductEndPosition);
837839
}
@@ -844,6 +846,7 @@ public void CleaveOnceBetweenProteolysisProducts(int minimumProductLength = 7)
844846
{
845847
string leftType = $"N-terminal Portion of Singly Cleaved Protein(1-{position})";
846848
ProteolysisProduct leftProduct = new(1, position, leftType);
849+
847850
//here we're making sure a product with these begin/end positions isn't already present
848851
if (!_proteolysisProducts.Any(p => p.OneBasedBeginPosition == leftProduct.OneBasedBeginPosition && p.OneBasedEndPosition == leftProduct.OneBasedEndPosition))
849852
{
@@ -855,6 +858,7 @@ public void CleaveOnceBetweenProteolysisProducts(int minimumProductLength = 7)
855858
{
856859
string rightType = $"C-terminal Portion of Singly Cleaved Protein({position + 1}-{BaseSequence.Length})";
857860
ProteolysisProduct rightProduct = new(position + 1, BaseSequence.Length, rightType);
861+
858862
//here we're making sure a product with these begin/end positions isn't already present
859863
if (!_proteolysisProducts.Any(p => p.OneBasedBeginPosition == rightProduct.OneBasedBeginPosition && p.OneBasedEndPosition == rightProduct.OneBasedEndPosition))
860864
{

mzLib/Proteomics/ProteolyticDigestion/Protease.cs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -13,15 +13,15 @@ public Protease(string name, CleavageSpecificity cleavageSpecificity, string psi
1313
PsiMsAccessionNumber = psiMSAccessionNumber;
1414
PsiMsName = psiMSName;
1515
DigestionMotifs = motifList ?? new List<DigestionMotif>();
16-
CleavageMod = modDetails;
16+
CleavageMod = modDetails;
1717
}
1818

1919
public string Name { get; }
2020
public CleavageSpecificity CleavageSpecificity { get; }
2121
public string PsiMsAccessionNumber { get; }
2222
public string PsiMsName { get; }
2323
public List<DigestionMotif> DigestionMotifs { get; }
24-
public Modification CleavageMod {get; set;}
24+
public Modification CleavageMod { get; set; }
2525

2626
public override string ToString()
2727
{
@@ -38,7 +38,6 @@ public override int GetHashCode()
3838
{
3939
return (Name ?? "").GetHashCode();
4040
}
41-
4241

4342
/// <summary>
4443
/// This method is used to determine cleavage specificity if the cleavage specificity is unknown
@@ -93,7 +92,7 @@ public CleavageSpecificity GetCleavageSpecificity(Protein protein, int startInde
9392
/// <param name="maxPeptideLength"></param>
9493
/// <returns></returns>
9594
internal List<ProteolyticPeptide> GetUnmodifiedPeptides(Protein protein, int maximumMissedCleavages, InitiatorMethionineBehavior initiatorMethionineBehavior,
96-
int minPeptideLength, int maxPeptideLength, Protease specificProtease)
95+
int minPeptideLength, int maxPeptideLength, Protease specificProtease, bool topDownBiomarkerSearch = false)
9796
{
9897
List<ProteolyticPeptide> peptides = new List<ProteolyticPeptide>();
9998

@@ -112,7 +111,7 @@ internal List<ProteolyticPeptide> GetUnmodifiedPeptides(Protein protein, int max
112111
//top-down
113112
else if (CleavageSpecificity == CleavageSpecificity.None)
114113
{
115-
if(specificProtease.Name != "top-down biomarker")
114+
if (!topDownBiomarkerSearch)//standard top-down
116115
{
117116
// retain methionine
118117
if ((initiatorMethionineBehavior != InitiatorMethionineBehavior.Cleave || protein[0] != 'M')
@@ -128,18 +127,14 @@ internal List<ProteolyticPeptide> GetUnmodifiedPeptides(Protein protein, int max
128127
peptides.Add(new ProteolyticPeptide(protein, 2, protein.Length, 0, CleavageSpecificity.Full, "full:M cleaved"));
129128
}
130129
}
131-
else
132-
{
133-
protein.AddBiomarkers(true, true, true, true, initiatorMethionineBehavior, minPeptideLength, 7);
134-
}
135130

136131
// Also digest using the proteolysis product start/end indices
137132
peptides.AddRange(
138133
protein.ProteolysisProducts
139134
.Where(proteolysisProduct => proteolysisProduct.OneBasedEndPosition.HasValue && proteolysisProduct.OneBasedBeginPosition.HasValue
140135
&& OkayLength(proteolysisProduct.OneBasedEndPosition.Value - proteolysisProduct.OneBasedBeginPosition.Value + 1, minPeptideLength, maxPeptideLength))
141136
.Select(proteolysisProduct =>
142-
new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, CleavageSpecificity.Full, proteolysisProduct.Type)));
137+
new ProteolyticPeptide(protein, proteolysisProduct.OneBasedBeginPosition.Value, proteolysisProduct.OneBasedEndPosition.Value, 0, CleavageSpecificity.None, proteolysisProduct.Type)));
143138
}
144139

145140
// Full proteolytic cleavage
@@ -548,7 +543,6 @@ private static IEnumerable<ProteolyticPeptide> FixedTermini(int nTerminusProtein
548543
return intervals.Concat(fixedCTermIntervals).Concat(fixedNTermIntervals);
549544
}
550545

551-
552546
/// <summary>
553547
/// Gets peptides for the singleN protease
554548
/// </summary>

mzLib/Proteomics/ProteolyticDigestion/ProteinDigestion.cs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,9 +232,9 @@ public IEnumerable<ProteolyticPeptide> SpeedySemiSpecificDigestion(Protein prote
232232
/// </summary>
233233
/// <param name="protein"></param>
234234
/// <returns></returns>
235-
public IEnumerable<ProteolyticPeptide> Digestion(Protein protein)
235+
public IEnumerable<ProteolyticPeptide> Digestion(Protein protein, bool topDownBiomarkerSearch = false)
236236
{
237-
return Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease);
237+
return Protease.GetUnmodifiedPeptides(protein, MaximumMissedCleavages, InitiatorMethionineBehavior, MinPeptideLength, MaxPeptideLength, DigestionParams.SpecificProtease, topDownBiomarkerSearch);
238238
}
239239
}
240240
}

mzLib/Test/TestClassExtensions.cs

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
using NUnit.Framework;
2+
using System.Diagnostics.CodeAnalysis;
3+
using MzLibUtil;
4+
using System.IO;
5+
using System.Linq;
6+
using System;
7+
8+
namespace Test
9+
{
10+
[TestFixture]
11+
[ExcludeFromCodeCoverage]
12+
public class TestClassExtensions
13+
{
14+
[Test]
15+
public static void TestBoxCarSmooth()
16+
{
17+
double[] inputData = new double[] { 0.19, 0.69, 0.03, 0.85, 0.84, 0.46, 0.09, 0.05, 0.11, 0.5, 0.6, 0.78, 0.48, 0.66, 0.61, 0.78, 0.82, 0.18, 0.77, 0.14, 0.97, 0.48, 0.54, 0.98, 0.01, 0.38, 0.26, 0.4, 0.31, 0.41, 0.03, 0.2, 0.98, 0.36, 0.24, 0.51, 0.14, 0.96, 0.32, 0.9, 0.36, 0.57, 0.97, 0.07, 0.12, 0.73, 0.92, 0.51, 0.04, 0.2, 0.39, 0.32, 0.33, 0.62, 0.32, 0.68, 0.91, 0.3, 0.68, 0.22, 0.89, 0.27, 0.68, 0.08, 0.61, 0.25, 0.82, 0.73, 0.49, 0.76, 0.01, 0.15, 0.13, 0.96, 0.57, 0.58, 0.96, 0.93, 0.5, 0.45, 0.89, 0.44, 0.59, 0.68, 0.71, 0.85, 0.16, 0.18, 0.68, 0.37, 0.22, 0.81, 0.53, 0.26, 0.94, 0.52, 0.66, 0.55, 0.51, 0.14 };
18+
double[] mySmoothedArray = ClassExtensions.BoxCarSmooth(inputData, 3);
19+
string[] expectedOutput = new string[] { "0.3", "0.52", "0.57", "0.72", "0.46", "0.2", "0.08", "0.22", "0.4", "0.63", "0.62", "0.64", "0.58", "0.68", "0.74", "0.59", "0.59", "0.36", "0.63", "0.53", "0.66", "0.67", "0.51", "0.46", "0.22", "0.35", "0.32", "0.37", "0.25", "0.21", "0.4", "0.51", "0.53", "0.37", "0.3", "0.54", "0.47", "0.73", "0.53", "0.61", "0.63", "0.54", "0.39", "0.31", "0.59", "0.72", "0.49", "0.25", "0.21", "0.3", "0.35", "0.42", "0.42", "0.54", "0.64", "0.63", "0.63", "0.4", "0.6", "0.46", "0.61", "0.34", "0.46", "0.31", "0.56", "0.6", "0.68", "0.66", "0.42", "0.31", "0.1", "0.41", "0.55", "0.7", "0.7", "0.82", "0.8", "0.63", "0.61", "0.59", "0.64", "0.57", "0.66", "0.75", "0.57", "0.4", "0.34", "0.41", "0.42", "0.47", "0.52", "0.53", "0.58", "0.57", "0.71", "0.58", "0.57", "0.4" };
20+
string[] actualOutput = mySmoothedArray.Select(v=>Math.Round(v,2).ToString()).ToArray();
21+
22+
CollectionAssert.AreEquivalent(expectedOutput, actualOutput);
23+
}
24+
}
25+
}

0 commit comments

Comments
 (0)