EMSL-Computing
diff --git a/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions b/‎DESCRIPTION
Lines changed: 2 additions & 2 deletions
diff --git a/‎NAMESPACE
Lines changed: 1 addition & 0 deletions b/‎NAMESPACE
Lines changed: 1 addition & 0 deletions
diff --git a/‎R/annotated_spectrum_plot.R
Lines changed: 19 additions & 8 deletions b/‎R/annotated_spectrum_plot.R
Lines changed: 19 additions & 8 deletions
diff --git a/‎R/get_matched_peaks.R
Lines changed: 75 additions & 20 deletions b/‎R/get_matched_peaks.R
Lines changed: 75 additions & 20 deletions
diff --git a/‎R/get_peak_data.R
Lines changed: 15 additions & 3 deletions b/‎R/get_peak_data.R
Lines changed: 15 additions & 3 deletions
@@ -1,8 +1,8 @@
 Package: pspecterlib
 Type: Package
 Title: PSpecteR Library - Visualization tools for top-down and bottom-up proteomics data
-Version: 1.0.1
-Date: 2022-10-03
+Version: 1.1.0
+Date: 2023-04-13
 Authors@R: c(person("David", "Degnan", email = "[email protected]", role = c("aut", "cre")))
 Description: Provides functionality to visually examine LC-MS top-down and bottom-up proteomics data. Supports reading various mass spectrometry files, labeling spectra with fragmenetation patterns, testing post-translational modifications, plotting where identified fragments map to reference sequences, and visualizing algorithmic output from database search tools (MSPathFinder) and metadata. 
 License: MIT + file LICENSE
 
@@ -27,6 +27,7 @@ export(is_sequence)
 export(make_mass_modified_ion)
 export(make_peak_data)
 export(ms1_plots)
+export(multiple_modifications)
 export(multiply_molforms)
 export(promex_feature_plot)
 export(scan_metadata_plot)
 
@@ -83,8 +83,19 @@ annotated_spectrum_plot <- function(PeakData,
   ###############
 
   # Set color vector
-  ColorVector <- c("a" = "forestgreen", "b" = "steelblue", "c" = "darkviolet",
-                   "x" = "pink3", "y" = "red", "z" = "darkorange", "Spectrum" = "black")
+  ColorVector <- c("a" = "forestgreen", "a+" = "forestgreen", "a++" = "forestgreen",
+                   "a-" = "forestgreen", "a--" = "forestgreen", "a^" = "forestgreen", "a^^" = "forestgreen",
+                   "b" = "steelblue", "b+" = "steelblue", "b++" = "steelblue",
+                   "b-" = "steelblue", "b--" = "steelblue", "b^" = "steelblue", "b^^" = "steelblue",
+                   "c" = "darkviolet", "c+" = "darkviolet", "c++" = "darkviolet",
+                   "c-" = "darkviolet", "c--" = "darkviolet", "c^" = "darkviolet", "c^^" = "darkviolet",
+                   "x" = "pink3", "x+" = "pink3", "x++" = "pink3",
+                   "x-" = "pink3", "x--" = "pink3", "x^" = "pink3", "x^^" = "pink3",
+                   "y" = "red", "y+" = "red", "y++" = "red",
+                   "y-" = "red", "y--" = "red", "y^" = "red", "y^^" = "red",
+                   "z" = "darkorange", "z+" = "darkorange", "z++" = "darkorange",
+                   "z-" = "darkorange", "z--" = "darkorange", "z^" = "darkorange", "z^^" = "darkorange",
+                   "Spectrum" = "black")
 
   # Return just the spectrum if no fragments identified
   if (is.null(MatchedPeaks)) {
@@ -132,7 +143,7 @@ annotated_spectrum_plot <- function(PeakData,
     Peaks <- merge(Peaks, FragmentTable, by = "M/Z Experimental", all = TRUE)
 
     # Set general type to a string
-    Peaks$`General Type`[is.na(Peaks$`General Type`)] <- "Spectrum"
+    Peaks$Type[is.na(Peaks$Type)] <- "Spectrum"
 
     # Remove ion at 0 peaks
     Peaks[Peaks$Intensity == 0, "Ion"] <- NA
@@ -158,8 +169,8 @@ annotated_spectrum_plot <- function(PeakData,
 
       # Set the base spectrum
       BaseSpectrum <- ggplot2::ggplot(Peaks, ggplot2::aes(x = `M/Z Experimental`,
-        y = Intensity, color = `General Type`, label = Ion)) +
-        ggplot2::theme_bw() + ggplot2::geom_line(size = 1) +
+        y = Intensity, color = Type, label = Ion)) +
+        ggplot2::theme_bw() + ggplot2::geom_line(linewidth = 1) +
         ggplot2::scale_color_manual(values = ColorVector) + ggplot2::xlab(bquote(italic(.("M/Z")))) +
         ggplot2::theme(legend.title = ggplot2::element_blank(), plot.title = ggplot2::element_text(hjust = 0.5))
 
@@ -181,13 +192,13 @@ annotated_spectrum_plot <- function(PeakData,
       p <- plotly::plot_ly()
 
       # Set fragment type order
-      FragOrder <- Peaks$`General Type`[Peaks$`General Type` != "Spectrum"] %>% unique() %>% sort()
+      FragOrder <- Peaks$Type[Peaks$Type != "Spectrum"] %>% unique() %>% sort()
       FragOrder <- c("Spectrum", FragOrder)
 
       for (FragType in FragOrder) {
 
         # Subset Peak Data frame
-        PeakSub <- Peaks[Peaks$`General Type` == FragType,]
+        PeakSub <- Peaks[Peaks$Type == FragType,]
 
         # Create a separate "Add Trace" for Spectrum
         if (FragType == "Spectrum") {
@@ -242,7 +253,7 @@ annotated_spectrum_plot <- function(PeakData,
           Text <- list(
             x = FragmentTable$`M/Z Experimental`[row] + LabelDistance,
             y = FragmentTable$`Intensity Experimental`[row],
-            text = htmltools::HTML(paste('<span style="color: ', ColorVector[FragmentTable$`General Type`[row]],
+            text = htmltools::HTML(paste('<span style="color: ', ColorVector[FragmentTable$Type[row]],
                                          '; font-size: ', LabelSize, 'pt;"> ', FragmentTable$Ion[row], "<sup>",
                                          FragmentTable$Z[row], "</sup>, ", FragmentTable$Isotope[row], "</span>", sep = "")),
             xref = "x", yref = "y", showarrow = FALSE
 
@@ -20,13 +20,19 @@
 #'    and "highest abundance" chooses the highest intensity peak within the PPM window. "closest peak"
 #'    is recommended for peaks that have been peak picked with an external tool, 
 #'    and "highest abundance" is recommended for noisy datasets or those with many peaks. 
+#' @param IsotopeAlgorithm "isopat" uses the isopat package to calculate isotopes, while 
+#'     "Rdisop" uses the Rdisop package. Though more accurate, Rdisop has been known to 
+#'     crash on Windows computers when called iteratively more than 1000 times. 
+#'     Default is Rdisop, though isopat is an alternative.
 #' @param AlternativeIonGroups A "modified_ion" object from "make_mass_modified ions." Default is NULL.
 #' @param AlternativeSequence A proforma-acceptable string to calculate the literature 
 #'    fragments. The default is the sequence matched in the ScanMetadata file. Default is NULL.
 #' @param AlternativeSpectrum An alternative "peak_data" spectrum to use instead of the default 
 #'     PeakData. Mostly used by other packages. Default is NULL.
 #' @param AlternativeCharge A different charge value to test besides the one in the PeakData 
 #'     spectrum. 
+#' @param AlternativeGlossary Try a different glossary. See system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib)
+#'     for formatting. 
 #'
 #' @details
 #' The data.table outputted by this function contains 17 columns.
@@ -93,7 +99,8 @@
 #' BU_Match3 <- get_matched_peaks(
 #'  ScanMetadata = BU_ScanMetadata,
 #'  PeakData = BU_Peak, 
-#'  AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "+", AMU_Change = 1.00727647)
+#'  IonGroups = "b",
+#'  AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "^", AMU_Change = 1.00727647)
 #' )
 #' 
 #'
@@ -112,10 +119,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
                               MinimumAbundance = 1,
                               CorrelationScore = 0,
                               MatchingAlgorithm = "closest peak",
+                              IsotopeAlgorithm = "Rdisop",
                               AlternativeIonGroups = NULL,
                               AlternativeSequence = NULL,
                               AlternativeSpectrum = NULL,
                               AlternativeCharge = NULL,
+                              AlternativeGlossary = NULL,
                               ...) {
 
   .get_matched_peaks(
@@ -127,10 +136,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
     MinimumAbundance = MinimumAbundance,
     CorrelationScore = CorrelationScore,
     MatchingAlgorithm = MatchingAlgorithm,
+    IsotopeAlgorithm = IsotopeAlgorithm,
     AlternativeIonGroups = AlternativeIonGroups,
     AlternativeSequence = AlternativeSequence,
     AlternativeSpectrum = AlternativeSpectrum,
     AlternativeCharge = AlternativeCharge,
+    AlternativeGlossary = AlternativeGlossary,
     ...
   )
 
@@ -144,10 +155,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
                                MinimumAbundance,
                                CorrelationScore,
                                MatchingAlgorithm,
+                               IsotopeAlgorithm,
                                AlternativeIonGroups,
-                               AlternativeSequence = NULL,
-                               AlternativeSpectrum = NULL,
-                               AlternativeCharge = NULL,
+                               AlternativeSequence,
+                               AlternativeSpectrum,
+                               AlternativeCharge,
+                               AlternativeGlossary,
                                CorrelationScore_FilterNA = FALSE,
                                ChargeThresh = 5,
                                ChargeThresh2 = 10,
@@ -279,7 +292,6 @@ get_matched_peaks <- function(ScanMetadata = NULL,
       sort()
     if (length(toRm) > 0) {Fragments <- Fragments[-toRm,]}
 
-    
     # First, remove peaks that would never match 
     Fragments <- Fragments %>%
       dplyr::mutate(
@@ -291,6 +303,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
       ) %>%
       dplyr::filter(Within == TRUE) %>%
       dplyr::select(-c(`PPM Low`, `PPM High`, Within))
+  
 
     # Second take the minimum charge peak within each ppm bin to prioritize smaller charges. 
     # BinVal <- 0 # This is to count bins
@@ -322,8 +335,35 @@ get_matched_peaks <- function(ScanMetadata = NULL,
 
   # Get the sequence object 
   if (is.null(AlternativeSequence)) {
-    Sequence_Object <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Sequence"] %>% unlist() %>% convert_proforma()
-  } else {Sequence_Object <- convert_proforma(AlternativeSequence)}
+    
+    ExtractSeq <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Sequence"] %>% unlist()
+    
+    if (length(ExtractSeq) > 1) {
+      message(paste("Multiple sequences detected. Select one and pass it to AlternativeSequence. Your options are:", paste(ExtractSeq, collapse = ", ")))
+      return(NULL)
+    }
+    
+    if (is.na(ExtractSeq)) {
+      message("Sequence is NA")
+      return(NULL)
+    }
+    
+    if (is.null(AlternativeGlossary)) {
+      Sequence_Object <- convert_proforma(ExtractSeq)
+    } else {
+      Sequence_Object <- convert_proforma(ExtractSeq, AlternativeGlossary)
+    }
+    
+    
+  } else {
+    
+    if (is.null(AlternativeGlossary)) {
+      Sequence_Object <- convert_proforma(AlternativeSequence)
+    } else {
+      Sequence_Object <- convert_proforma(AlternativeSequence, AlternativeGlossary)
+    }
+    
+  }
 
   # Pull the sequence
   if (is.character(Sequence_Object)) {Sequence <- Sequence_Object} else {
@@ -332,13 +372,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
 
   # Get the precursor charge
   if (is.null(AlternativeCharge)) {
-    PrecursorCharge <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Charge"] %>% unlist()
+    PrecursorCharge <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Charge"] %>% unlist() %>% head(1)
   } else {PrecursorCharge <- AlternativeCharge}
 
   # Load Glossary
-  Glossary <- data.table::fread(
-    system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib")
-  )
+  if (is.null(AlternativeGlossary)) {
+    Glossary <- data.table::fread(
+      system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib")
+    )
+  } else {
+    Glossary <- AlternativeGlossary
+  }
+
 
   #################################
   ## 2. CALCULATE BASE FRAGMENTS ##
@@ -376,7 +421,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
       getIon <- AlternativeIonGroups$Ion[row]
 
       # Subset fragments
-      subFrag <- Fragments[Fragments$Type == getIon,]
+      subFrag <- MSnbase::calculateFragments(sequence = Sequence, type = getIon,
+                                             z = 1:PrecursorCharge) %>% data.table::data.table()
+      
+      # Rename Fragments
+      colnames(subFrag) <- c("M/Z", "Ion", "Type", "Position", "Z", "Sequence")
+      
+      # Exclude N-deamidated and C-dehydrated specific modifications
+      subFrag <- dplyr::filter(subFrag, !grepl("[.*_]", subFrag$Type))
+      
+      # Label the N-position. Remember that x,y,z fragments are determined from the C-terminus
+      subFrag$`N Position` <- ifelse(subFrag$Type %in% c("a", "b", "c"),
+                                     subFrag$Position, (nchar(Sequence) + 1) - Fragments$Position)
 
       # Proceed only if there's any fragments
       if (nrow(subFrag) > 0) {
@@ -465,6 +521,11 @@ get_matched_peaks <- function(ScanMetadata = NULL,
 
   # Trim down potential fragments to match
   Fragments <- cleanCalculatedFragments(Fragments)
+  
+  if (nrow(Fragments) == 0) {
+    message("No peaks matched.")
+    return(NULL)
+  }
 
   ###############################
   ## 5. ADD MOLECULAR FORMULAS ##
@@ -476,20 +537,14 @@ get_matched_peaks <- function(ScanMetadata = NULL,
   MolFormDF <- Fragments %>%
     dplyr::select(Sequence, Modifications) %>%
     unique()
-  
-  # Remove sequences with a single amino acid
-  MolFormDF <- MolFormDF %>% 
-    dplyr::mutate(Count = nchar(Sequence) > 1) %>% 
-    dplyr::filter(Count) %>% 
-    dplyr::select(-Count)
 
   # Iterate through, getting sequences and modifications and combining them
   MolFormDF$`Molecular Formula` <- lapply(1:nrow(MolFormDF), function(row) {
 
     # Step one: get sequence and modifications
     Seq <- MolFormDF$Sequence[row]
     Mod <- MolFormDF$Modifications[row]
-
+    
     # Step two: convert sequence to molecule object
     Atoms <- get_aa_molform(Seq)
 
@@ -559,7 +614,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
     IsotopeList <- do.call(dplyr::bind_rows, lapply(MolForms, function(MolForm) {
 
       # Get Isotope Relative Abundances
-      IsotopeResults <- calculate_iso_profile(as.molform(MolForm), min_abundance = MinimumAbundance)
+      IsotopeResults <- calculate_iso_profile(as.molform(MolForm), algorithm = IsotopeAlgorithm, min_abundance = MinimumAbundance)
       IsotopeResults$`Molecular Formula` = MolForm
       return(IsotopeResults)
 
 
@@ -6,6 +6,8 @@
 #' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata. Required.
 #' @param ScanNumber Integer indicating which scan number to pull the peak data. Required.
 #' @param MinAbundance Filter out peaks with an abundance below threshold. Ranges from 0-100. Default is 0.
+#' @param MinIntensity Filter out peaks with an intensity below the threshold. Default is 0. 
+#'     If MinAbundance is 0, then the minimum intensity filter will be applied. 
 #'
 #' @details
 #' The data.table outputted by this function contains both the M/Z and Intensity vectors of the spectra.
@@ -38,7 +40,8 @@
 #' @export
 get_peak_data <- function(ScanMetadata,
                           ScanNumber,
-                          MinAbundance = 0) {
+                          MinAbundance = 0,
+                          MinIntensity = 0) {
 
   ##################
   ## CHECK INPUTS ##
@@ -66,13 +69,18 @@ get_peak_data <- function(ScanMetadata,
 
   }
 
-  # Assert that Intensity Minimum is an integer
+  # Assert that the Abundance Minimum is a numeric value
   if (!is.numeric(MinAbundance)) {
     stop("MinAbundance needs to be a numeric value.")
   }
   if (MinAbundance < 0 | MinAbundance > 100) {
     stop("MinAbundance needs to range between 0 and 100.")
   }
+  
+  # Assert that the Intensity Minimum is a numeric value
+  if (!is.numeric(MinIntensity)) {
+    stop("MinIntensity should be a number.")
+  }
 
   ####################
   ## PULL PEAK DATA ##
@@ -118,9 +126,13 @@ get_peak_data <- function(ScanMetadata,
 
   # Remove peaks that do no meet the minimum intensity value
   NumberPeaksPostFilter <- TotalNumberPeaks
-  if (MinAbundance > 0) {
+  
+  if (MinAbundance != 0) {
     Peaks <- subset(Peaks, Abundance >= MinAbundance)
     NumberPeaksPostFilter <- nrow(Peaks)
+  } else {
+    Peaks <- subset(Peaks, Intensity >= MinIntensity)
+    NumberPeaksPostFilter <- nrow(Peaks)
   }
 
   ##################