@@ -33,7 +33,7 @@ public Protein(string sequence, string accession, string organism = null, List<T
3333 IDictionary < int , List < Modification > > oneBasedModifications = null , List < ProteolysisProduct > proteolysisProducts = null ,
3434 string name = null , string fullName = null , bool isDecoy = false , bool isContaminant = false , List < DatabaseReference > databaseReferences = null ,
3535 List < SequenceVariation > sequenceVariations = null , List < SequenceVariation > appliedSequenceVariations = null , string sampleNameForVariants = null ,
36- List < DisulfideBond > disulfideBonds = null , List < SpliceSite > spliceSites = null , string databaseFilePath = null )
36+ List < DisulfideBond > disulfideBonds = null , List < SpliceSite > spliceSites = null , string databaseFilePath = null , bool addBiomarkers = false )
3737 {
3838 // Mandatory
3939 BaseSequence = sequence ;
@@ -64,6 +64,10 @@ public Protein(string sequence, string accession, string organism = null, List<T
6464 DatabaseReferences = databaseReferences ?? new List < DatabaseReference > ( ) ;
6565 DisulfideBonds = disulfideBonds ?? new List < DisulfideBond > ( ) ;
6666 SpliceSites = spliceSites ?? new List < SpliceSite > ( ) ;
67+ if ( addBiomarkers )
68+ {
69+ this . AddBiomarkers ( ) ;
70+ }
6771 }
6872
6973 /// <summary>
@@ -158,9 +162,9 @@ public Protein(string variantBaseSequence, Protein protein, IEnumerable<Sequence
158162
159163 //TODO: Generate all the proteolytic products as distinct proteins during XML reading and delete the ProteolysisProducts parameter
160164 public IEnumerable < ProteolysisProduct > ProteolysisProducts
161- { get { return _proteolysisProducts ; } }
165+ { get { return _proteolysisProducts ; } }
162166
163- public IEnumerable < DatabaseReference > DatabaseReferences { get ; }
167+ public IEnumerable < DatabaseReference > DatabaseReferences { get ; }
164168 public string DatabaseFilePath { get ; }
165169
166170 /// <summary>
@@ -244,14 +248,11 @@ public IEnumerable<PeptideWithSetModifications> Digest(DigestionParams digestion
244248 variableModifications = variableModifications ?? new List < Modification > ( ) ;
245249 CleavageSpecificity searchModeType = digestionParams . SearchModeType ;
246250
247- ProteinDigestion digestion = new ProteinDigestion ( digestionParams , allKnownFixedModifications , variableModifications ) ;
248-
251+ ProteinDigestion digestion = new ( digestionParams , allKnownFixedModifications , variableModifications ) ;
249252 IEnumerable < ProteolyticPeptide > unmodifiedPeptides =
250- digestionParams . Protease . Name == "top-down biomarker" ?
251- digestion . Digestion ( this ) :
252253 searchModeType == CleavageSpecificity . Semi ?
253254 digestion . SpeedySemiSpecificDigestion ( this ) :
254- digestion . Digestion ( this ) ;
255+ digestion . Digestion ( this ) ;
255256
256257 if ( digestionParams . KeepNGlycopeptide || digestionParams . KeepOGlycopeptide )
257258 {
@@ -635,7 +636,6 @@ public void AddBiomarkersToProteolysisProducts(int fullProteinOneBasedBegin, int
635636 AddNterminalBiomarkers ( lengthOfProteolysis , fullProteinOneBasedBegin , fullProteinOneBasedEnd , minProductBaseSequenceLength , proteolyisisProductName ) ;
636637 }
637638 }
638-
639639 }
640640 else // initiator methionine cleavage is variable we have to deal both with keeping and deleting the M
641641 {
@@ -646,15 +646,13 @@ public void AddBiomarkersToProteolysisProducts(int fullProteinOneBasedBegin, int
646646 {
647647 AddNterminalBiomarkers ( lengthOfProteolysis + 1 , fullProteinOneBasedBegin , fullProteinOneBasedEnd , minProductBaseSequenceLength , proteolyisisProductName ) ;
648648 }
649-
650649 }
651650 //Digest C-terminus -- not effected by variable N-terminus behavior
652651 if ( addCterminalDigestionBiomarkers )
653652 {
654653 AddCterminalBiomarkers ( lengthOfProteolysis , fullProteinOneBasedEnd , fullProteinOneBasedBegin , minProductBaseSequenceLength , proteolyisisProductName ) ;
655654 }
656655 }
657-
658656 }
659657 else // sequence does not contain N-terminus
660658 {
@@ -712,7 +710,7 @@ private void AddNterminalBiomarkers(int lengthOfProteolysis, int fullProteinOneB
712710 /// <param name="initiatorMethionineBehavior"> this effects the intact proteoform as well as any original proteolysis products containing the N-terminus</param>
713711 /// <param name="minProductBaseSequenceLength"> the same as the min detectable peptide</param>
714712 /// <param name="lengthOfProteolysis"> the number of amino acids that can be removed from either end.</param>
715- public void AddBiomarkers ( bool addFullProtein , bool addForEachOrigninalProteolysisProduct , bool addNterminalDigestionBiomarkers , bool addCterminalDigestionBiomarkers , InitiatorMethionineBehavior initiatorMethionineBehavior , int minProductBaseSequenceLength , int lengthOfProteolysis )
713+ public void AddBiomarkers ( bool addFullProtein = true , bool addForEachOrigninalProteolysisProduct = true , bool addNterminalDigestionBiomarkers = true , bool addCterminalDigestionBiomarkers = true , InitiatorMethionineBehavior initiatorMethionineBehavior = InitiatorMethionineBehavior . Retain , int minProductBaseSequenceLength = 7 , int lengthOfProteolysis = 5 )
716714 {
717715 if ( addFullProtein ) //this loop adds the intact protoeoform and its proteolysis products to the proteolysis products list
718716 {
@@ -729,8 +727,8 @@ public void AddBiomarkers(bool addFullProtein, bool addForEachOrigninalProteolys
729727
730728 if ( addForEachOrigninalProteolysisProduct ) // this does not include the original intact proteoform
731729 {
732- RemoveMethionineWhenAppropriateFromExistingProduts ( initiatorMethionineBehavior ) ;
733- List < ProteolysisProduct > existingProducts = ProteolysisProducts . Where ( p => ! p . Type . Contains ( "biomarker" ) && ! p . Type . Contains ( "intact" ) ) . ToList ( ) ;
730+ RemoveMethionineWhenAppropriateFromExistingProduts ( initiatorMethionineBehavior ) ;
731+ List < ProteolysisProduct > existingProducts = ProteolysisProducts . Where ( p => ! p . Type . Contains ( "biomarker" ) && ! p . Type . Contains ( "intact" ) ) . ToList ( ) ;
734732 foreach ( ProteolysisProduct product in existingProducts )
735733 {
736734 if ( product . OneBasedBeginPosition . HasValue && product . OneBasedEndPosition . HasValue )
@@ -753,14 +751,15 @@ public void AddBiomarkers(bool addFullProtein, bool addForEachOrigninalProteolys
753751 }
754752 }
755753 }
754+ CleaveOnceBetweenProteolysisProducts ( ) ;
756755 }
757756
758757 /// <summary>
759758 /// When a protein has existing proteolysis products, we have to remove methionine when appropriate before creating additional proteolysis products
760759 /// </summary>
761760 /// <param name="existingProducts"></param>
762761 /// <param name="initiatorMethionineBehavior"></param>
763- private void RemoveMethionineWhenAppropriateFromExistingProduts ( InitiatorMethionineBehavior initiatorMethionineBehavior )
762+ public void RemoveMethionineWhenAppropriateFromExistingProduts ( InitiatorMethionineBehavior initiatorMethionineBehavior )
764763 {
765764 List < ProteolysisProduct > productsAtNterminusWithMethionine = _proteolysisProducts . Where ( p => ! p . Type . Contains ( "biomarker" ) && ! p . Type . Contains ( "intact" ) && p . OneBasedBeginPosition == 1 ) . ToList ( ) ;
766765
@@ -784,37 +783,82 @@ private void RemoveMethionineWhenAppropriateFromExistingProduts(InitiatorMethion
784783 {
785784 //here we don't want to do anything, we leave in the products with begin position = 1. Later we'll add an additional proteolysis product so that we get the right number
786785 }
787-
788786 }
789787 }
790788 }
791789 }
792790
793- private void AddIntactProteoformToProteolysisProducts ( InitiatorMethionineBehavior initiatorMethionineBehavior , int minProductBaseSequenceLength )
791+ public void AddIntactProteoformToProteolysisProducts ( InitiatorMethionineBehavior initiatorMethionineBehavior , int minProductBaseSequenceLength )
794792 {
795793 if ( initiatorMethionineBehavior == InitiatorMethionineBehavior . Retain || initiatorMethionineBehavior == InitiatorMethionineBehavior . Variable )
796794 {
797795 //when it's variable, we don't have to add anything here, we'll get an additonal proteolysis product later.
798- if ( BaseSequence . Length >= minProductBaseSequenceLength )
796+ if ( BaseSequence . Length >= minProductBaseSequenceLength )
799797 {
800- _proteolysisProducts . Add ( new ProteolysisProduct ( 1 , BaseSequence . Length , "intact proteoform" ) ) ;
798+ _proteolysisProducts . Add ( new ProteolysisProduct ( 1 , BaseSequence . Length , "intact proteoform biomarker " ) ) ;
801799 }
802-
803800 }
804801 else if ( initiatorMethionineBehavior == InitiatorMethionineBehavior . Cleave )
805802 {
806803 if ( BaseSequence . Substring ( 0 , 1 ) == "M" )
807804 {
808805 if ( BaseSequence . Length - 1 >= minProductBaseSequenceLength )
809806 {
810- _proteolysisProducts . Add ( new ProteolysisProduct ( 2 , BaseSequence . Length , "intact proteoform" ) ) ;
807+ _proteolysisProducts . Add ( new ProteolysisProduct ( 2 , BaseSequence . Length , "intact proteoform biomarker " ) ) ;
811808 }
812809 }
813810 else
814811 {
815812 if ( BaseSequence . Length >= minProductBaseSequenceLength )
816813 {
817- _proteolysisProducts . Add ( new ProteolysisProduct ( 1 , BaseSequence . Length , "intact proteoform" ) ) ;
814+ _proteolysisProducts . Add ( new ProteolysisProduct ( 1 , BaseSequence . Length , "intact proteoform biomarker" ) ) ;
815+ }
816+ }
817+ }
818+ }
819+
820+ /// <summary>
821+ /// proteins with multiple proteolysis products are not always full cleaved. we observed proteolysis products w/ missed cleavages.
822+ /// This method allows for one missed cleavage between proteolysis products.
823+ /// </summary>
824+ /// <param name="minimumProductLength"></param>
825+ public void CleaveOnceBetweenProteolysisProducts ( int minimumProductLength = 7 )
826+ {
827+ List < int > cleavagePostions = new ( ) ;
828+ List < int > proteolysisProductEndPositions = _proteolysisProducts . Where ( p => p . OneBasedEndPosition . HasValue ) . Select ( p => p . OneBasedEndPosition . Value ) . ToList ( ) ;
829+
830+ if ( proteolysisProductEndPositions . Count > 0 )
831+ {
832+ foreach ( int proteolysisProductEndPosition in proteolysisProductEndPositions )
833+ {
834+ if ( _proteolysisProducts . Any ( p => p . OneBasedBeginPosition == ( proteolysisProductEndPosition + 1 ) ) )
835+ {
836+ cleavagePostions . Add ( proteolysisProductEndPosition ) ;
837+ }
838+ }
839+ }
840+
841+ foreach ( int position in cleavagePostions )
842+ {
843+ if ( position - 1 >= minimumProductLength )
844+ {
845+ string leftType = $ "N-terminal Portion of Singly Cleaved Protein(1-{ position } )";
846+ ProteolysisProduct leftProduct = new ( 1 , position , leftType ) ;
847+ //here we're making sure a product with these begin/end positions isn't already present
848+ if ( ! _proteolysisProducts . Any ( p => p . OneBasedBeginPosition == leftProduct . OneBasedBeginPosition && p . OneBasedEndPosition == leftProduct . OneBasedEndPosition ) )
849+ {
850+ _proteolysisProducts . Add ( leftProduct ) ;
851+ }
852+ }
853+
854+ if ( BaseSequence . Length - position - 1 >= minimumProductLength )
855+ {
856+ string rightType = $ "C-terminal Portion of Singly Cleaved Protein({ position + 1 } -{ BaseSequence . Length } )";
857+ ProteolysisProduct rightProduct = new ( position + 1 , BaseSequence . Length , rightType ) ;
858+ //here we're making sure a product with these begin/end positions isn't already present
859+ if ( ! _proteolysisProducts . Any ( p => p . OneBasedBeginPosition == rightProduct . OneBasedBeginPosition && p . OneBasedEndPosition == rightProduct . OneBasedEndPosition ) )
860+ {
861+ _proteolysisProducts . Add ( rightProduct ) ;
818862 }
819863 }
820864 }
0 commit comments