Skip to content

Commit 8fc1642

Browse files
authored
Merge pull request #7 from EMSL-Computing/develop
Merging fixes for version 2 of the application
2 parents 2b52803 + 55ba7d2 commit 8fc1642

26 files changed

+700
-340
lines changed

DESCRIPTION

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
Package: pspecterlib
22
Type: Package
33
Title: PSpecteR Library - Visualization tools for top-down and bottom-up proteomics data
4-
Version: 1.0.1
5-
Date: 2022-10-03
4+
Version: 1.1.0
5+
Date: 2023-04-13
66
Authors@R: c(person("David", "Degnan", email = "[email protected]", role = c("aut", "cre")))
77
Description: Provides functionality to visually examine LC-MS top-down and bottom-up proteomics data. Supports reading various mass spectrometry files, labeling spectra with fragmenetation patterns, testing post-translational modifications, plotting where identified fragments map to reference sequences, and visualizing algorithmic output from database search tools (MSPathFinder) and metadata.
88
License: MIT + file LICENSE

NAMESPACE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ export(is_sequence)
2727
export(make_mass_modified_ion)
2828
export(make_peak_data)
2929
export(ms1_plots)
30+
export(multiple_modifications)
3031
export(multiply_molforms)
3132
export(promex_feature_plot)
3233
export(scan_metadata_plot)

R/annotated_spectrum_plot.R

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -83,8 +83,19 @@ annotated_spectrum_plot <- function(PeakData,
8383
###############
8484

8585
# Set color vector
86-
ColorVector <- c("a" = "forestgreen", "b" = "steelblue", "c" = "darkviolet",
87-
"x" = "pink3", "y" = "red", "z" = "darkorange", "Spectrum" = "black")
86+
ColorVector <- c("a" = "forestgreen", "a+" = "forestgreen", "a++" = "forestgreen",
87+
"a-" = "forestgreen", "a--" = "forestgreen", "a^" = "forestgreen", "a^^" = "forestgreen",
88+
"b" = "steelblue", "b+" = "steelblue", "b++" = "steelblue",
89+
"b-" = "steelblue", "b--" = "steelblue", "b^" = "steelblue", "b^^" = "steelblue",
90+
"c" = "darkviolet", "c+" = "darkviolet", "c++" = "darkviolet",
91+
"c-" = "darkviolet", "c--" = "darkviolet", "c^" = "darkviolet", "c^^" = "darkviolet",
92+
"x" = "pink3", "x+" = "pink3", "x++" = "pink3",
93+
"x-" = "pink3", "x--" = "pink3", "x^" = "pink3", "x^^" = "pink3",
94+
"y" = "red", "y+" = "red", "y++" = "red",
95+
"y-" = "red", "y--" = "red", "y^" = "red", "y^^" = "red",
96+
"z" = "darkorange", "z+" = "darkorange", "z++" = "darkorange",
97+
"z-" = "darkorange", "z--" = "darkorange", "z^" = "darkorange", "z^^" = "darkorange",
98+
"Spectrum" = "black")
8899

89100
# Return just the spectrum if no fragments identified
90101
if (is.null(MatchedPeaks)) {
@@ -132,7 +143,7 @@ annotated_spectrum_plot <- function(PeakData,
132143
Peaks <- merge(Peaks, FragmentTable, by = "M/Z Experimental", all = TRUE)
133144

134145
# Set general type to a string
135-
Peaks$`General Type`[is.na(Peaks$`General Type`)] <- "Spectrum"
146+
Peaks$Type[is.na(Peaks$Type)] <- "Spectrum"
136147

137148
# Remove ion at 0 peaks
138149
Peaks[Peaks$Intensity == 0, "Ion"] <- NA
@@ -158,8 +169,8 @@ annotated_spectrum_plot <- function(PeakData,
158169

159170
# Set the base spectrum
160171
BaseSpectrum <- ggplot2::ggplot(Peaks, ggplot2::aes(x = `M/Z Experimental`,
161-
y = Intensity, color = `General Type`, label = Ion)) +
162-
ggplot2::theme_bw() + ggplot2::geom_line(size = 1) +
172+
y = Intensity, color = Type, label = Ion)) +
173+
ggplot2::theme_bw() + ggplot2::geom_line(linewidth = 1) +
163174
ggplot2::scale_color_manual(values = ColorVector) + ggplot2::xlab(bquote(italic(.("M/Z")))) +
164175
ggplot2::theme(legend.title = ggplot2::element_blank(), plot.title = ggplot2::element_text(hjust = 0.5))
165176

@@ -181,13 +192,13 @@ annotated_spectrum_plot <- function(PeakData,
181192
p <- plotly::plot_ly()
182193

183194
# Set fragment type order
184-
FragOrder <- Peaks$`General Type`[Peaks$`General Type` != "Spectrum"] %>% unique() %>% sort()
195+
FragOrder <- Peaks$Type[Peaks$Type != "Spectrum"] %>% unique() %>% sort()
185196
FragOrder <- c("Spectrum", FragOrder)
186197

187198
for (FragType in FragOrder) {
188199

189200
# Subset Peak Data frame
190-
PeakSub <- Peaks[Peaks$`General Type` == FragType,]
201+
PeakSub <- Peaks[Peaks$Type == FragType,]
191202

192203
# Create a separate "Add Trace" for Spectrum
193204
if (FragType == "Spectrum") {
@@ -242,7 +253,7 @@ annotated_spectrum_plot <- function(PeakData,
242253
Text <- list(
243254
x = FragmentTable$`M/Z Experimental`[row] + LabelDistance,
244255
y = FragmentTable$`Intensity Experimental`[row],
245-
text = htmltools::HTML(paste('<span style="color: ', ColorVector[FragmentTable$`General Type`[row]],
256+
text = htmltools::HTML(paste('<span style="color: ', ColorVector[FragmentTable$Type[row]],
246257
'; font-size: ', LabelSize, 'pt;"> ', FragmentTable$Ion[row], "<sup>",
247258
FragmentTable$Z[row], "</sup>, ", FragmentTable$Isotope[row], "</span>", sep = "")),
248259
xref = "x", yref = "y", showarrow = FALSE

R/get_matched_peaks.R

Lines changed: 75 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -20,13 +20,19 @@
2020
#' and "highest abundance" chooses the highest intensity peak within the PPM window. "closest peak"
2121
#' is recommended for peaks that have been peak picked with an external tool,
2222
#' and "highest abundance" is recommended for noisy datasets or those with many peaks.
23+
#' @param IsotopeAlgorithm "isopat" uses the isopat package to calculate isotopes, while
24+
#' "Rdisop" uses the Rdisop package. Though more accurate, Rdisop has been known to
25+
#' crash on Windows computers when called iteratively more than 1000 times.
26+
#' Default is Rdisop, though isopat is an alternative.
2327
#' @param AlternativeIonGroups A "modified_ion" object from "make_mass_modified ions." Default is NULL.
2428
#' @param AlternativeSequence A proforma-acceptable string to calculate the literature
2529
#' fragments. The default is the sequence matched in the ScanMetadata file. Default is NULL.
2630
#' @param AlternativeSpectrum An alternative "peak_data" spectrum to use instead of the default
2731
#' PeakData. Mostly used by other packages. Default is NULL.
2832
#' @param AlternativeCharge A different charge value to test besides the one in the PeakData
2933
#' spectrum.
34+
#' @param AlternativeGlossary Try a different glossary. See system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib)
35+
#' for formatting.
3036
#'
3137
#' @details
3238
#' The data.table outputted by this function contains 17 columns.
@@ -93,7 +99,8 @@
9399
#' BU_Match3 <- get_matched_peaks(
94100
#' ScanMetadata = BU_ScanMetadata,
95101
#' PeakData = BU_Peak,
96-
#' AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "+", AMU_Change = 1.00727647)
102+
#' IonGroups = "b",
103+
#' AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "^", AMU_Change = 1.00727647)
97104
#' )
98105
#'
99106
#'
@@ -112,10 +119,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
112119
MinimumAbundance = 1,
113120
CorrelationScore = 0,
114121
MatchingAlgorithm = "closest peak",
122+
IsotopeAlgorithm = "Rdisop",
115123
AlternativeIonGroups = NULL,
116124
AlternativeSequence = NULL,
117125
AlternativeSpectrum = NULL,
118126
AlternativeCharge = NULL,
127+
AlternativeGlossary = NULL,
119128
...) {
120129

121130
.get_matched_peaks(
@@ -127,10 +136,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
127136
MinimumAbundance = MinimumAbundance,
128137
CorrelationScore = CorrelationScore,
129138
MatchingAlgorithm = MatchingAlgorithm,
139+
IsotopeAlgorithm = IsotopeAlgorithm,
130140
AlternativeIonGroups = AlternativeIonGroups,
131141
AlternativeSequence = AlternativeSequence,
132142
AlternativeSpectrum = AlternativeSpectrum,
133143
AlternativeCharge = AlternativeCharge,
144+
AlternativeGlossary = AlternativeGlossary,
134145
...
135146
)
136147

@@ -144,10 +155,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
144155
MinimumAbundance,
145156
CorrelationScore,
146157
MatchingAlgorithm,
158+
IsotopeAlgorithm,
147159
AlternativeIonGroups,
148-
AlternativeSequence = NULL,
149-
AlternativeSpectrum = NULL,
150-
AlternativeCharge = NULL,
160+
AlternativeSequence,
161+
AlternativeSpectrum,
162+
AlternativeCharge,
163+
AlternativeGlossary,
151164
CorrelationScore_FilterNA = FALSE,
152165
ChargeThresh = 5,
153166
ChargeThresh2 = 10,
@@ -279,7 +292,6 @@ get_matched_peaks <- function(ScanMetadata = NULL,
279292
sort()
280293
if (length(toRm) > 0) {Fragments <- Fragments[-toRm,]}
281294

282-
283295
# First, remove peaks that would never match
284296
Fragments <- Fragments %>%
285297
dplyr::mutate(
@@ -291,6 +303,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
291303
) %>%
292304
dplyr::filter(Within == TRUE) %>%
293305
dplyr::select(-c(`PPM Low`, `PPM High`, Within))
306+
294307

295308
# Second take the minimum charge peak within each ppm bin to prioritize smaller charges.
296309
# BinVal <- 0 # This is to count bins
@@ -322,8 +335,35 @@ get_matched_peaks <- function(ScanMetadata = NULL,
322335

323336
# Get the sequence object
324337
if (is.null(AlternativeSequence)) {
325-
Sequence_Object <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Sequence"] %>% unlist() %>% convert_proforma()
326-
} else {Sequence_Object <- convert_proforma(AlternativeSequence)}
338+
339+
ExtractSeq <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Sequence"] %>% unlist()
340+
341+
if (length(ExtractSeq) > 1) {
342+
message(paste("Multiple sequences detected. Select one and pass it to AlternativeSequence. Your options are:", paste(ExtractSeq, collapse = ", ")))
343+
return(NULL)
344+
}
345+
346+
if (is.na(ExtractSeq)) {
347+
message("Sequence is NA")
348+
return(NULL)
349+
}
350+
351+
if (is.null(AlternativeGlossary)) {
352+
Sequence_Object <- convert_proforma(ExtractSeq)
353+
} else {
354+
Sequence_Object <- convert_proforma(ExtractSeq, AlternativeGlossary)
355+
}
356+
357+
358+
} else {
359+
360+
if (is.null(AlternativeGlossary)) {
361+
Sequence_Object <- convert_proforma(AlternativeSequence)
362+
} else {
363+
Sequence_Object <- convert_proforma(AlternativeSequence, AlternativeGlossary)
364+
}
365+
366+
}
327367

328368
# Pull the sequence
329369
if (is.character(Sequence_Object)) {Sequence <- Sequence_Object} else {
@@ -332,13 +372,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
332372

333373
# Get the precursor charge
334374
if (is.null(AlternativeCharge)) {
335-
PrecursorCharge <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Charge"] %>% unlist()
375+
PrecursorCharge <- ScanMetadata[ScanMetadata$`Scan Number` == ScanNumber, "Precursor Charge"] %>% unlist() %>% head(1)
336376
} else {PrecursorCharge <- AlternativeCharge}
337377

338378
# Load Glossary
339-
Glossary <- data.table::fread(
340-
system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib")
341-
)
379+
if (is.null(AlternativeGlossary)) {
380+
Glossary <- data.table::fread(
381+
system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib")
382+
)
383+
} else {
384+
Glossary <- AlternativeGlossary
385+
}
386+
342387

343388
#################################
344389
## 2. CALCULATE BASE FRAGMENTS ##
@@ -376,7 +421,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
376421
getIon <- AlternativeIonGroups$Ion[row]
377422

378423
# Subset fragments
379-
subFrag <- Fragments[Fragments$Type == getIon,]
424+
subFrag <- MSnbase::calculateFragments(sequence = Sequence, type = getIon,
425+
z = 1:PrecursorCharge) %>% data.table::data.table()
426+
427+
# Rename Fragments
428+
colnames(subFrag) <- c("M/Z", "Ion", "Type", "Position", "Z", "Sequence")
429+
430+
# Exclude N-deamidated and C-dehydrated specific modifications
431+
subFrag <- dplyr::filter(subFrag, !grepl("[.*_]", subFrag$Type))
432+
433+
# Label the N-position. Remember that x,y,z fragments are determined from the C-terminus
434+
subFrag$`N Position` <- ifelse(subFrag$Type %in% c("a", "b", "c"),
435+
subFrag$Position, (nchar(Sequence) + 1) - Fragments$Position)
380436

381437
# Proceed only if there's any fragments
382438
if (nrow(subFrag) > 0) {
@@ -465,6 +521,11 @@ get_matched_peaks <- function(ScanMetadata = NULL,
465521

466522
# Trim down potential fragments to match
467523
Fragments <- cleanCalculatedFragments(Fragments)
524+
525+
if (nrow(Fragments) == 0) {
526+
message("No peaks matched.")
527+
return(NULL)
528+
}
468529

469530
###############################
470531
## 5. ADD MOLECULAR FORMULAS ##
@@ -476,20 +537,14 @@ get_matched_peaks <- function(ScanMetadata = NULL,
476537
MolFormDF <- Fragments %>%
477538
dplyr::select(Sequence, Modifications) %>%
478539
unique()
479-
480-
# Remove sequences with a single amino acid
481-
MolFormDF <- MolFormDF %>%
482-
dplyr::mutate(Count = nchar(Sequence) > 1) %>%
483-
dplyr::filter(Count) %>%
484-
dplyr::select(-Count)
485540

486541
# Iterate through, getting sequences and modifications and combining them
487542
MolFormDF$`Molecular Formula` <- lapply(1:nrow(MolFormDF), function(row) {
488543

489544
# Step one: get sequence and modifications
490545
Seq <- MolFormDF$Sequence[row]
491546
Mod <- MolFormDF$Modifications[row]
492-
547+
493548
# Step two: convert sequence to molecule object
494549
Atoms <- get_aa_molform(Seq)
495550

@@ -559,7 +614,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
559614
IsotopeList <- do.call(dplyr::bind_rows, lapply(MolForms, function(MolForm) {
560615

561616
# Get Isotope Relative Abundances
562-
IsotopeResults <- calculate_iso_profile(as.molform(MolForm), min_abundance = MinimumAbundance)
617+
IsotopeResults <- calculate_iso_profile(as.molform(MolForm), algorithm = IsotopeAlgorithm, min_abundance = MinimumAbundance)
563618
IsotopeResults$`Molecular Formula` = MolForm
564619
return(IsotopeResults)
565620

R/get_peak_data.R

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
#' @param ScanMetadata Object of the scan_metadata class from get_scan_metadata. Required.
77
#' @param ScanNumber Integer indicating which scan number to pull the peak data. Required.
88
#' @param MinAbundance Filter out peaks with an abundance below threshold. Ranges from 0-100. Default is 0.
9+
#' @param MinIntensity Filter out peaks with an intensity below the threshold. Default is 0.
10+
#' If MinAbundance is 0, then the minimum intensity filter will be applied.
911
#'
1012
#' @details
1113
#' The data.table outputted by this function contains both the M/Z and Intensity vectors of the spectra.
@@ -38,7 +40,8 @@
3840
#' @export
3941
get_peak_data <- function(ScanMetadata,
4042
ScanNumber,
41-
MinAbundance = 0) {
43+
MinAbundance = 0,
44+
MinIntensity = 0) {
4245

4346
##################
4447
## CHECK INPUTS ##
@@ -66,13 +69,18 @@ get_peak_data <- function(ScanMetadata,
6669

6770
}
6871

69-
# Assert that Intensity Minimum is an integer
72+
# Assert that the Abundance Minimum is a numeric value
7073
if (!is.numeric(MinAbundance)) {
7174
stop("MinAbundance needs to be a numeric value.")
7275
}
7376
if (MinAbundance < 0 | MinAbundance > 100) {
7477
stop("MinAbundance needs to range between 0 and 100.")
7578
}
79+
80+
# Assert that the Intensity Minimum is a numeric value
81+
if (!is.numeric(MinIntensity)) {
82+
stop("MinIntensity should be a number.")
83+
}
7684

7785
####################
7886
## PULL PEAK DATA ##
@@ -118,9 +126,13 @@ get_peak_data <- function(ScanMetadata,
118126

119127
# Remove peaks that do no meet the minimum intensity value
120128
NumberPeaksPostFilter <- TotalNumberPeaks
121-
if (MinAbundance > 0) {
129+
130+
if (MinAbundance != 0) {
122131
Peaks <- subset(Peaks, Abundance >= MinAbundance)
123132
NumberPeaksPostFilter <- nrow(Peaks)
133+
} else {
134+
Peaks <- subset(Peaks, Intensity >= MinIntensity)
135+
NumberPeaksPostFilter <- nrow(Peaks)
124136
}
125137

126138
##################

0 commit comments

Comments
 (0)