Skip to content

Commit babcdb0

Browse files
author
stefanks
authored
Merge branch 'master' into stefanks-patch-1
2 parents c315af7 + ccb284e commit babcdb0

File tree

2 files changed

+36
-87
lines changed

2 files changed

+36
-87
lines changed

InternalLogic/AnalysisEngine.cs

Lines changed: 23 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -70,10 +70,8 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
7070
// TODO**: how to handle indistinguishable proteins?
7171
// TODO**: rewrite parsimony function using protein group objects (more efficient)
7272
// TODO**: add proteins with unique peptides first, makes algo faster, use parsimony on remainder
73-
// TODO**: how to handle multiple CompactPeptide objects that have the same basesequence (should be treated as 1 unaccounted-for
74-
// peptide, not 10 for example)
75-
// TODO**: if a peptide is shared between target and decoy proteins, remove its association with the target proteins, leave
76-
// only the decoy proteins
73+
74+
// if psm contains decoy protein, remove all target proteins associated with the psm
7775
foreach (var kvp in fullSequenceToProteinPeptideMatching)
7876
{
7977
bool psmContainsDecoyProtein = false;
@@ -170,52 +168,13 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
170168
if (!peptideListBaseSequences.Contains(peptideBaseSequence))
171169
{
172170
peptideListNoDuplicates.Add(peptide);
171+
peptideListBaseSequences.Add(peptideBaseSequence);
173172
}
174173
}
175174

176175
newDictNoDuplicatePeptides.Add(kvp.Key, peptideListNoDuplicates);
177176
}
178177

179-
/*
180-
// have found all PSMs but some of them are duplicate peptides - pick only the highest-scoring psm per peptide
181-
List<NewPsmWithFDR> newProteinGroupPsmList = new List<NewPsmWithFDR>();
182-
Dictionary<string, List<NewPsmWithFDR>> peptideSequenceToPsmMatching = new Dictionary<string, List<NewPsmWithFDR>>();
183-
foreach(var psm in proteinGroupPsmList)
184-
{
185-
CompactPeptide peptide = psm.thisPSM.newPsm.GetCompactPeptide(variableModifications, localizeableModifications);
186-
string peptideBaseSequence = string.Join("", peptide.BaseSequence.Select(b => char.ConvertFromUtf32(b)));
187-
List<NewPsmWithFDR> tempPsmList = new List<NewPsmWithFDR>();
188-
189-
if(peptideSequenceToPsmMatching.ContainsKey(peptideBaseSequence))
190-
{
191-
peptideSequenceToPsmMatching.TryGetValue(peptideBaseSequence, out tempPsmList);
192-
tempPsmList.Add(psm);
193-
}
194-
else
195-
{
196-
tempPsmList.Add(psm);
197-
peptideSequenceToPsmMatching.Add(peptideBaseSequence, tempPsmList);
198-
}
199-
}
200-
201-
// pick the best-scoring psm per peptide
202-
foreach(var kvp1 in peptideSequenceToPsmMatching)
203-
{
204-
double bestScoreSoFar = 0;
205-
NewPsmWithFDR bestPsm = null;
206-
207-
foreach(var psm in kvp1.Value)
208-
{
209-
if(psm.thisPSM.Score > bestScoreSoFar)
210-
{
211-
bestPsm = psm;
212-
}
213-
}
214-
215-
newProteinGroupPsmList.Add(bestPsm);
216-
}
217-
*/
218-
219178
// greedy algorithm adds the next protein that will account for the most unaccounted-for peptides
220179
Dictionary<Protein, HashSet<CompactPeptide>> parsimonyDict = new Dictionary<Protein, HashSet<CompactPeptide>>();
221180
HashSet<CompactPeptide> usedPeptides = new HashSet<CompactPeptide>();
@@ -232,7 +191,7 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
232191
if (bestProteinHasOnePeptide)
233192
{
234193
// attempt to find protein that best accounts for unaccounted-for peptides
235-
foreach (var kvp in newDict)
194+
foreach (var kvp in newDictNoDuplicatePeptides)
236195
{
237196
int comparisonProteinNewPeptides = 0;
238197

@@ -249,7 +208,7 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
249208
if (comparisonProteinNewPeptides > currentBestNumNewPeptides)
250209
{
251210
bestProtein = kvp.Key;
252-
bestProteinPeptideList = kvp.Value;
211+
newDict.TryGetValue(kvp.Key, out bestProteinPeptideList);
253212
currentBestNumNewPeptides = comparisonProteinNewPeptides;
254213
}
255214
}
@@ -303,7 +262,23 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
303262
}
304263
}
305264

265+
/*
306266
// add indistinguishable proteins to the protein group
267+
foreach (var kvp in newDict)
268+
{
269+
if (!parsimonyDict.ContainsKey(kvp.Key))
270+
{
271+
foreach(var kvp1 in parsimonyDict)
272+
{
273+
if(kvp.Value.SetEquals(kvp1.Value))
274+
{
275+
parsimonyDict.Add(kvp.Key, kvp.Value);
276+
}
277+
}
278+
}
279+
}
280+
*/
281+
307282

308283
// build protein group after parsimony and match it to its peptide
309284
Dictionary<CompactPeptide, HashSet<Protein>> peptideProteinListMatch = new Dictionary<CompactPeptide, HashSet<Protein>>();
@@ -329,6 +304,8 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
329304
}
330305
}
331306

307+
308+
332309
// constructs return dictionary (only use parsimony proteins for the new virtual peptide list)
333310
Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> answer = new Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>>();
334311

InternalLogic/ProteinGroup.cs

Lines changed: 13 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@ public class ProteinGroup
1919

2020
internal ProteinGroup(HashSet<Protein> proteins, List<NewPsmWithFdr> psmList, HashSet<CompactPeptide> allUniquePeptides, List<MorpheusModification> variableModifications, List<MorpheusModification> localizeableModifications)
2121
{
22-
this.Proteins = proteins;
23-
this.PsmList = psmList;
22+
Proteins = proteins;
23+
PsmList = psmList;
2424
PeptideList = new List<CompactPeptide>();
2525
UniquePeptideList = new List<CompactPeptide>();
2626
proteinGroupScore = 0;
@@ -38,14 +38,18 @@ internal ProteinGroup(HashSet<Protein> proteins, List<NewPsmWithFdr> psmList, Ha
3838
{
3939
CompactPeptide peptide = psm.thisPSM.newPsm.GetCompactPeptide(variableModifications, localizeableModifications);
4040
PeptideList.Add(peptide);
41+
proteinGroupScore += psm.thisPSM.Score;
4142

42-
// calculate the protein group score
43+
// construct list of unique peptides
4344
if (allUniquePeptides.Contains(peptide))
4445
{
4546
UniquePeptideList.Add(peptide);
46-
proteinGroupScore += psm.thisPSM.Score;
47+
//proteinGroupScore += psm.thisPSM.Score;
4748
}
4849
}
50+
51+
if (UniquePeptideList.Count == 0)
52+
proteinGroupScore = 0;
4953
}
5054

5155
#endregion Internal Constructors
@@ -89,7 +93,7 @@ public override string ToString()
8993

9094
// list of proteins in the group
9195
foreach (Protein protein in Proteins)
92-
sb.Append("" + protein.Name + " ;; ");
96+
sb.Append("" + protein.FullDescription + " ;; ");
9397
sb.Append("\t");
9498

9599
// number of proteins in group
@@ -129,41 +133,8 @@ public override string ToString()
129133
sb.Append(isDecoy);
130134
sb.Append("\t");
131135

132-
return sb.ToString();
133-
/*
134-
// proteins in protein group
135-
foreach (Protein protein in Proteins)
136-
sb.Append("" + protein.FullDescription + " ;; ");
137-
sb.Append("\t");
138-
139-
// sequences of proteins in group
140-
foreach (Protein protein in Proteins)
141-
sb.Append("" + protein.BaseSequence + " ;; ");
142-
sb.Append("\t");
143-
144-
// length of each protein
145-
foreach (Protein protein in Proteins)
146-
sb.Append("" + protein.BaseSequence.Length + " ;; ");
147-
sb.Append("\t");
148-
149-
// number of proteins in group
150-
sb.Append("" + Proteins.Count);
151-
sb.Append("\t");
152-
153-
// number of psm's for the group
154-
sb.Append("" + PsmList.Count);
155-
sb.Append("\t");
156-
157-
// number of unique peptides
158-
sb.Append("" + UniquePeptideList.Count());
159-
sb.Append("\t");
160-
161-
// summed psm precursor intensity
162-
sb.Append(summedIntensity);
163-
sb.Append("\t");
164-
165-
// summed unique peptide precursor intensity
166-
sb.Append(summedUniquePeptideIntensity);
136+
// cumulative target
137+
sb.Append(cumulativeTarget);
167138
sb.Append("\t");
168139

169140
// cumulative decoy
@@ -173,7 +144,8 @@ public override string ToString()
173144
// q value
174145
sb.Append(QValue * 100);
175146
sb.Append("\t");
176-
*/
147+
148+
return sb.ToString();
177149
}
178150

179151
#endregion Public Methods

0 commit comments

Comments
 (0)