@@ -70,10 +70,8 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
7070 // TODO**: how to handle indistinguishable proteins?
7171 // TODO**: rewrite parsimony function using protein group objects (more efficient)
7272 // TODO**: add proteins with unique peptides first, makes algo faster, use parsimony on remainder
73- // TODO**: how to handle multiple CompactPeptide objects that have the same basesequence (should be treated as 1 unaccounted-for
74- // peptide, not 10 for example)
75- // TODO**: if a peptide is shared between target and decoy proteins, remove its association with the target proteins, leave
76- // only the decoy proteins
73+
74+ // if psm contains decoy protein, remove all target proteins associated with the psm
7775 foreach ( var kvp in fullSequenceToProteinPeptideMatching )
7876 {
7977 bool psmContainsDecoyProtein = false ;
@@ -170,52 +168,13 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
170168 if ( ! peptideListBaseSequences . Contains ( peptideBaseSequence ) )
171169 {
172170 peptideListNoDuplicates . Add ( peptide ) ;
171+ peptideListBaseSequences . Add ( peptideBaseSequence ) ;
173172 }
174173 }
175174
176175 newDictNoDuplicatePeptides . Add ( kvp . Key , peptideListNoDuplicates ) ;
177176 }
178177
179- /*
180- // have found all PSMs but some of them are duplicate peptides - pick only the highest-scoring psm per peptide
181- List<NewPsmWithFDR> newProteinGroupPsmList = new List<NewPsmWithFDR>();
182- Dictionary<string, List<NewPsmWithFDR>> peptideSequenceToPsmMatching = new Dictionary<string, List<NewPsmWithFDR>>();
183- foreach(var psm in proteinGroupPsmList)
184- {
185- CompactPeptide peptide = psm.thisPSM.newPsm.GetCompactPeptide(variableModifications, localizeableModifications);
186- string peptideBaseSequence = string.Join("", peptide.BaseSequence.Select(b => char.ConvertFromUtf32(b)));
187- List<NewPsmWithFDR> tempPsmList = new List<NewPsmWithFDR>();
188-
189- if(peptideSequenceToPsmMatching.ContainsKey(peptideBaseSequence))
190- {
191- peptideSequenceToPsmMatching.TryGetValue(peptideBaseSequence, out tempPsmList);
192- tempPsmList.Add(psm);
193- }
194- else
195- {
196- tempPsmList.Add(psm);
197- peptideSequenceToPsmMatching.Add(peptideBaseSequence, tempPsmList);
198- }
199- }
200-
201- // pick the best-scoring psm per peptide
202- foreach(var kvp1 in peptideSequenceToPsmMatching)
203- {
204- double bestScoreSoFar = 0;
205- NewPsmWithFDR bestPsm = null;
206-
207- foreach(var psm in kvp1.Value)
208- {
209- if(psm.thisPSM.Score > bestScoreSoFar)
210- {
211- bestPsm = psm;
212- }
213- }
214-
215- newProteinGroupPsmList.Add(bestPsm);
216- }
217- */
218-
219178 // greedy algorithm adds the next protein that will account for the most unaccounted-for peptides
220179 Dictionary < Protein , HashSet < CompactPeptide > > parsimonyDict = new Dictionary < Protein , HashSet < CompactPeptide > > ( ) ;
221180 HashSet < CompactPeptide > usedPeptides = new HashSet < CompactPeptide > ( ) ;
@@ -232,7 +191,7 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
232191 if ( bestProteinHasOnePeptide )
233192 {
234193 // attempt to find protein that best accounts for unaccounted-for peptides
235- foreach ( var kvp in newDict )
194+ foreach ( var kvp in newDictNoDuplicatePeptides )
236195 {
237196 int comparisonProteinNewPeptides = 0 ;
238197
@@ -249,7 +208,7 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
249208 if ( comparisonProteinNewPeptides > currentBestNumNewPeptides )
250209 {
251210 bestProtein = kvp . Key ;
252- bestProteinPeptideList = kvp . Value ;
211+ newDict . TryGetValue ( kvp . Key , out bestProteinPeptideList ) ;
253212 currentBestNumNewPeptides = comparisonProteinNewPeptides ;
254213 }
255214 }
@@ -303,7 +262,23 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
303262 }
304263 }
305264
265+ /*
306266 // add indistinguishable proteins to the protein group
267+ foreach (var kvp in newDict)
268+ {
269+ if (!parsimonyDict.ContainsKey(kvp.Key))
270+ {
271+ foreach(var kvp1 in parsimonyDict)
272+ {
273+ if(kvp.Value.SetEquals(kvp1.Value))
274+ {
275+ parsimonyDict.Add(kvp.Key, kvp.Value);
276+ }
277+ }
278+ }
279+ }
280+ */
281+
307282
308283 // build protein group after parsimony and match it to its peptide
309284 Dictionary < CompactPeptide , HashSet < Protein > > peptideProteinListMatch = new Dictionary < CompactPeptide , HashSet < Protein > > ( ) ;
@@ -329,6 +304,8 @@ public static Dictionary<CompactPeptide, HashSet<PeptideWithSetModifications>> A
329304 }
330305 }
331306
307+
308+
332309 // constructs return dictionary (only use parsimony proteins for the new virtual peptide list)
333310 Dictionary < CompactPeptide , HashSet < PeptideWithSetModifications > > answer = new Dictionary < CompactPeptide , HashSet < PeptideWithSetModifications > > ( ) ;
334311
0 commit comments