Skip to content

Commit a01d497

Browse files
Fix The bug on RunErrorChecking (#885)
* revised the RevisedModifiedPeptides function * revised the run error checking * add the tester for merge * Edit the tester for the new detectionType * fix the bug and set intensity to 0 for ambiguous peak * slove the ambiguous peak intensity problem * Fix the bug on MBR score when there are all ambiguous peak inside * change some comment --------- Co-authored-by: Alexander-Sol <41119316+Alexander-Sol@users.noreply.github.com>
1 parent 8490139 commit a01d497

File tree

9 files changed

+358
-21
lines changed

9 files changed

+358
-21
lines changed

mzLib/FlashLFQ/ChromatographicPeak.cs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ public void MergeFeatureWith(ChromatographicPeak otherFeature, bool integrate)
118118
.Where(p => !thisFeaturesPeaks.Contains(p.IndexedPeak)));
119119
this.CalculateIntensityForThisFeature(integrate);
120120
}
121+
122+
// If any merge happens on Isobaric peak, the detection type should be set to MSMSAmbiguousPeakfinding
123+
if (DetectionType == DetectionType.IsoTrack_MBR || DetectionType == DetectionType.IsoTrack_MSMS)
124+
{
125+
DetectionType = DetectionType.MSMSAmbiguousPeakfinding;
126+
}
121127
}
122128

123129
/// <summary>
@@ -225,7 +231,7 @@ public override string ToString()
225231

226232
sb.Append(NumChargeStatesObserved.ToString(CultureInfo.InvariantCulture) + "\t");
227233

228-
// temporary way to distinguish between MBR, MBR_IsoTrack, IsoTrack_Ambiguous and MSMS peaks
234+
// temporary way to distinguish between MBR, MBR_IsoTrack, IsoTrack_Ambiguous, MSMSAmbiguousPeakfinding and MSMS peaks
229235
switch (this.DetectionType)
230236
{
231237
case DetectionType.IsoTrack_MBR:
@@ -234,6 +240,9 @@ public override string ToString()
234240
case DetectionType.IsoTrack_Ambiguous:
235241
sb.Append("IsoTrack_Ambiguous" + "\t");
236242
break;
243+
case DetectionType.MSMSAmbiguousPeakfinding:
244+
sb.Append("MSMSAmbiguousPeakfinding" + "\t");
245+
break;
237246
default:
238247
sb.Append("MSMS" + "\t");
239248
break;

mzLib/FlashLFQ/DetectionType.cs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ public enum DetectionType
66
MSMS, // The peak is detected from MS2ID
77
MBR, // The peak is detected from MBR
88
MSMSAmbiguousPeakfinding, // The peak is detected from more than one MS2ID
9+
IsoTrack_MSMS, // The peak is detected from MS2ID by IsoTrack, The Priority is IsoTrack_MSMS > MSMS
910
IsoTrack_MBR, // The peak is detected from MBR by IsoTrack
1011
IsoTrack_Ambiguous, // Multiple peptides are mapped to this peak by IsoTracker.
1112
MSMSIdentifiedButNotQuantified, // We have MS2ID but no peak for quantification

mzLib/FlashLFQ/FlashLFQResults.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -806,6 +806,7 @@ internal void RevisedModifiedPeptides()
806806
.SelectMany(p => p)
807807
.Where(p => p != null)
808808
.SelectMany(p=>p.Identifications)
809+
.Where(p=>p.BaseSequence == originalPeptide.BaseSequence) // Avoid to remove any peptide with different base sequence
809810
.DistinctBy(p=>p.ModifiedSequence)
810811
.Select(p=>p.ModifiedSequence)
811812
.ToList();
@@ -826,6 +827,7 @@ internal void RevisedModifiedPeptides()
826827
var allSeq = isoPeptidePeaks
827828
.Where(p => p != null)
828829
.SelectMany(p => p.Identifications)
830+
.Where(p=>p.BaseSequence == originalPeptide.BaseSequence) // do not output the peptide with different base sequence in the peptide result
829831
.Select(p => p.ModifiedSequence)
830832
.Distinct()
831833
.ToList();
@@ -842,6 +844,7 @@ internal void RevisedModifiedPeptides()
842844
var allSeq = isoPeptidePeaks
843845
.Where(p => p != null)
844846
.SelectMany(p => p.Identifications)
847+
.Where(p=>p.BaseSequence == originalPeptide.BaseSequence)// do not output the peptide with different base sequence that was merged in RunErrorCheck
845848
.Select(p => p.ModifiedSequence)
846849
.Distinct()
847850
.ToList();

mzLib/FlashLFQ/FlashLfqEngine.cs

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,10 @@ public FlashLfqResults Run()
232232
QuantifyIsobaricPeaks();
233233
_results.IsobaricPeptideDict = IsobaricPeptideDict;
234234
AddIsoPeaks();
235+
foreach (var file in _results.SpectraFiles)
236+
{
237+
RunErrorChecking(file);
238+
}
235239
}
236240
IsoTrackerIsRunning = false;
237241

@@ -831,8 +835,14 @@ private MbrScorer BuildMbrScorer(List<ChromatographicPeak> acceptorFileIdentifie
831835
.ToList();
832836
double medianAcceptorLogIntensity = acceptorFileLogIntensities.Median();
833837
Normal logIntensityDistribution = new Normal(acceptorFileLogIntensities.Median(), acceptorFileLogIntensities.InterquartileRange() / 1.36);
834-
835-
return new MbrScorer(apexToAcceptorFilePeakDict, acceptorFileIdentifiedPeaks, ppmDistribution, logIntensityDistribution);
838+
try // if the constructor fails, we don't want to crash the program
839+
{
840+
return new MbrScorer(apexToAcceptorFilePeakDict, acceptorFileIdentifiedPeaks, ppmDistribution, logIntensityDistribution);
841+
}
842+
catch
843+
{
844+
return null;
845+
}
836846
}
837847

838848
/// <summary>
@@ -1354,6 +1364,12 @@ private void RunErrorChecking(SpectraFileInfo spectraFile)
13541364
{
13551365
if (PeptideModifiedSequencesToQuantify.Contains(storedPeak.Identifications.First().ModifiedSequence))
13561366
{
1367+
// if the try peak is merge into stored peaks, the try peak should be labeled as MSMSAmbiguousPeakfinding and then the intensity should be set as 0 in the peptide result
1368+
if (tryPeak.DetectionType == DetectionType.IsoTrack_MSMS ||
1369+
tryPeak.DetectionType == DetectionType.IsoTrack_MBR)
1370+
{
1371+
tryPeak.DetectionType = DetectionType.MSMSAmbiguousPeakfinding;
1372+
}
13571373
storedPeak.MergeFeatureWith(tryPeak, FlashParams.Integrate);
13581374
}
13591375
else
@@ -2009,7 +2025,7 @@ internal void CollectChromPeakInRuns(PeakRegion sharedPeak, List<Chromatographic
20092025
double peakStart = sharedPeak.StartRT ;
20102026
double PeakEnd = sharedPeak.EndRT ;
20112027
bool isMBR = false;
2012-
DetectionType detectionType = DetectionType.MSMS;
2028+
DetectionType detectionType = DetectionType.IsoTrack_MSMS;
20132029

20142030
// Check is there any Id in the XIC within the time window.
20152031
// Yes: use the Id from the same file. No: use the Id from other file, then set the detection type property as MBR.
@@ -2033,7 +2049,7 @@ internal void CollectChromPeakInRuns(PeakRegion sharedPeak, List<Chromatographic
20332049

20342050
break;
20352051
case 1: // If there is one Id from the same file in the time window, then detectionType should be MSMS.
2036-
detectionType = DetectionType.MSMS;
2052+
detectionType = DetectionType.IsoTrack_MSMS;
20372053
break;
20382054
case > 1: // If there are more than one Id from the same file in the time window, then detectionType should be IsoTrack_Ambiguous.
20392055
detectionType = DetectionType.IsoTrack_Ambiguous;
@@ -2119,24 +2135,37 @@ internal ChromatographicPeak FindChromPeak(Tuple<double, double, double> rtInfo,
21192135
/// </summary>
21202136
internal void AddIsoPeaks()
21212137
{
2122-
21232138
foreach (var fileInfo in SpectraFileInfoList)
21242139
{
2125-
var allChromPeaksInFile = IsobaricPeptideDict
2140+
var allIsoTrackerPeaksInFile = IsobaricPeptideDict
21262141
.SelectMany(p => p.Value)
21272142
.SelectMany(p => p.Value)
21282143
.Where(peak => peak != null && peak.SpectraFileInfo.Equals(fileInfo))
21292144
.ToList();
2130-
//remove the repeated peaks from FlashLFQ with the same identification list
2131-
foreach (var peak in allChromPeaksInFile)
2145+
2146+
//remove the repeated peaks from FlashLFQ with the same identification list, the priority is IsoTrack > MSMS
2147+
foreach (var peak in allIsoTrackerPeaksInFile)
21322148
{
21332149
_results.Peaks[fileInfo].RemoveAll(p => IDsEqual(p.Identifications,peak.Identifications));
21342150
}
21352151

21362152
// Add the peaks into the result dictionary, and remove the duplicated peaks.
2137-
_results.Peaks[fileInfo].AddRange(allChromPeaksInFile);
2153+
// And we choice the IsoTrack_MSMS as the priority.
2154+
_results.Peaks[fileInfo].AddRange(allIsoTrackerPeaksInFile);
21382155
_results.Peaks[fileInfo] = _results.Peaks[fileInfo]
2139-
.DistinctBy(peak => new { peak.ApexRetentionTime, peak.SpectraFileInfo, peak.Identifications.First().BaseSequence }).ToList();
2156+
.GroupBy(peak => new { peak.ApexRetentionTime, peak.SpectraFileInfo, peak.Identifications.First().BaseSequence })
2157+
.Select(group =>
2158+
{
2159+
// Prioritize IsoTrack_MSMS over other detection types
2160+
var prioritizedPeak = group
2161+
.OrderByDescending(p => p.DetectionType == DetectionType.IsoTrack_MSMS)
2162+
.ThenByDescending(p => p.DetectionType == DetectionType.IsoTrack_MBR)
2163+
.ThenByDescending(p => p.DetectionType == DetectionType.IsoTrack_Ambiguous)
2164+
.ThenByDescending(p => p.DetectionType == DetectionType.MSMS)
2165+
.First();
2166+
return prioritizedPeak;
2167+
})
2168+
.ToList();
21402169
}
21412170
}
21422171

mzLib/FlashLFQ/Peptide.cs

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,13 +158,37 @@ public void SetDetectionType(SpectraFileInfo fileInfo, DetectionType detectionTy
158158
}
159159
}
160160

161+
/// <summary>
162+
/// Set the isobaric peptide intensities and detection types by iterating through the peak list.
163+
/// </summary>
164+
/// <param name="peakList"></param>
161165
public void SetIsobaricPeptide(List<ChromatographicPeak> peakList)
162166
{
163167
foreach (var peak in peakList.Where(p=>p != null))
164168
{
165-
RetentionTimes[peak.SpectraFileInfo] = peak.ApexRetentionTime;
166-
Intensities[peak.SpectraFileInfo] = peak.Apex.Intensity;
167-
DetectionTypes[peak.SpectraFileInfo] = peak.DetectionType;
169+
if (peak.Identifications.Count > 1) // More than one id in this peak, the detectionType should be ambiguous and the intensity is set to 0
170+
{
171+
RetentionTimes[peak.SpectraFileInfo] = peak.ApexRetentionTime;
172+
Intensities[peak.SpectraFileInfo] = 0;
173+
if (peak.DetectionType != DetectionType.IsoTrack_Ambiguous) // If the detectionType is not IsoTrack_amb but with more than one id. That extra id must be added by RunErrorCheck
174+
{
175+
DetectionTypes[peak.SpectraFileInfo] = DetectionType.MSMSAmbiguousPeakfinding;
176+
}
177+
else
178+
{
179+
DetectionTypes[peak.SpectraFileInfo] = peak.DetectionType;
180+
}
181+
182+
}
183+
else
184+
{
185+
RetentionTimes[peak.SpectraFileInfo] = peak.ApexRetentionTime;
186+
//Some situation, the peak is merged in RunErrorCheck but still existed in IsoPeptideDict. In RunErrorCheck, we labeled it as ambiguous peak. At here, we set the intensity to 0!!
187+
Intensities[peak.SpectraFileInfo] = (peak.DetectionType == DetectionType.IsoTrack_Ambiguous || peak.DetectionType == DetectionType.MSMSAmbiguousPeakfinding)
188+
? 0
189+
: peak.Apex.Intensity;
190+
DetectionTypes[peak.SpectraFileInfo] = peak.DetectionType;
191+
}
168192
}
169193
}
170194

@@ -195,7 +219,7 @@ public string ToString(List<SpectraFileInfo> rawFiles, bool IsoTracker = false)
195219
orderedProteinGroups.Any() ? orderedProteinGroups.First().Organism + "\t" : "\t");
196220

197221
foreach (var file in rawFiles)
198-
{
222+
{
199223
double intensity = GetIntensity(file);
200224
str.Append(intensity + "\t");
201225
}

mzLib/Test/Test.csproj

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,12 @@
628628
<None Update="XICData\AllPSMs.psmtsv">
629629
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
630630
</None>
631+
<None Update="XICData\AllPSMs_IDMergeConflict_2.psmtsv">
632+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
633+
</None>
634+
<None Update="XICData\AllPSMs_IDMergeConflict.psmtsv">
635+
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
636+
</None>
631637
<None Update="XICData\AllPSMs_IsoID.psmtsv">
632638
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
633639
</None>

0 commit comments

Comments
 (0)