20
20
# ' and "highest abundance" chooses the highest intensity peak within the PPM window. "closest peak"
21
21
# ' is recommended for peaks that have been peak picked with an external tool,
22
22
# ' and "highest abundance" is recommended for noisy datasets or those with many peaks.
23
+ # ' @param IsotopeAlgorithm "isopat" uses the isopat package to calculate isotopes, while
24
+ # ' "Rdisop" uses the Rdisop package. Though more accurate, Rdisop has been known to
25
+ # ' crash on Windows computers when called iteratively more than 1000 times.
26
+ # ' Default is Rdisop, though isopat is an alternative.
23
27
# ' @param AlternativeIonGroups A "modified_ion" object from "make_mass_modified ions." Default is NULL.
24
28
# ' @param AlternativeSequence A proforma-acceptable string to calculate the literature
25
29
# ' fragments. The default is the sequence matched in the ScanMetadata file. Default is NULL.
26
30
# ' @param AlternativeSpectrum An alternative "peak_data" spectrum to use instead of the default
27
31
# ' PeakData. Mostly used by other packages. Default is NULL.
28
32
# ' @param AlternativeCharge A different charge value to test besides the one in the PeakData
29
33
# ' spectrum.
34
+ # ' @param AlternativeGlossary Try a different glossary. See system.file("extdata", "Unimod_v20220602.csv", package = "pspecterlib)
35
+ # ' for formatting.
30
36
# '
31
37
# ' @details
32
38
# ' The data.table outputted by this function contains 17 columns.
93
99
# ' BU_Match3 <- get_matched_peaks(
94
100
# ' ScanMetadata = BU_ScanMetadata,
95
101
# ' PeakData = BU_Peak,
96
- # ' AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "+", AMU_Change = 1.00727647)
102
+ # ' IonGroups = "b",
103
+ # ' AlternativeIonGroups = make_mass_modified_ion(Ion = "y", Symbol = "^", AMU_Change = 1.00727647)
97
104
# ' )
98
105
# '
99
106
# '
@@ -112,10 +119,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
112
119
MinimumAbundance = 1 ,
113
120
CorrelationScore = 0 ,
114
121
MatchingAlgorithm = " closest peak" ,
122
+ IsotopeAlgorithm = " Rdisop" ,
115
123
AlternativeIonGroups = NULL ,
116
124
AlternativeSequence = NULL ,
117
125
AlternativeSpectrum = NULL ,
118
126
AlternativeCharge = NULL ,
127
+ AlternativeGlossary = NULL ,
119
128
... ) {
120
129
121
130
.get_matched_peaks(
@@ -127,10 +136,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
127
136
MinimumAbundance = MinimumAbundance ,
128
137
CorrelationScore = CorrelationScore ,
129
138
MatchingAlgorithm = MatchingAlgorithm ,
139
+ IsotopeAlgorithm = IsotopeAlgorithm ,
130
140
AlternativeIonGroups = AlternativeIonGroups ,
131
141
AlternativeSequence = AlternativeSequence ,
132
142
AlternativeSpectrum = AlternativeSpectrum ,
133
143
AlternativeCharge = AlternativeCharge ,
144
+ AlternativeGlossary = AlternativeGlossary ,
134
145
...
135
146
)
136
147
@@ -144,10 +155,12 @@ get_matched_peaks <- function(ScanMetadata = NULL,
144
155
MinimumAbundance ,
145
156
CorrelationScore ,
146
157
MatchingAlgorithm ,
158
+ IsotopeAlgorithm ,
147
159
AlternativeIonGroups ,
148
- AlternativeSequence = NULL ,
149
- AlternativeSpectrum = NULL ,
150
- AlternativeCharge = NULL ,
160
+ AlternativeSequence ,
161
+ AlternativeSpectrum ,
162
+ AlternativeCharge ,
163
+ AlternativeGlossary ,
151
164
CorrelationScore_FilterNA = FALSE ,
152
165
ChargeThresh = 5 ,
153
166
ChargeThresh2 = 10 ,
@@ -279,7 +292,6 @@ get_matched_peaks <- function(ScanMetadata = NULL,
279
292
sort()
280
293
if (length(toRm ) > 0 ) {Fragments <- Fragments [- toRm ,]}
281
294
282
-
283
295
# First, remove peaks that would never match
284
296
Fragments <- Fragments %> %
285
297
dplyr :: mutate(
@@ -291,6 +303,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
291
303
) %> %
292
304
dplyr :: filter(Within == TRUE ) %> %
293
305
dplyr :: select(- c(`PPM Low` , `PPM High` , Within ))
306
+
294
307
295
308
# Second take the minimum charge peak within each ppm bin to prioritize smaller charges.
296
309
# BinVal <- 0 # This is to count bins
@@ -322,8 +335,35 @@ get_matched_peaks <- function(ScanMetadata = NULL,
322
335
323
336
# Get the sequence object
324
337
if (is.null(AlternativeSequence )) {
325
- Sequence_Object <- ScanMetadata [ScanMetadata $ `Scan Number` == ScanNumber , " Sequence" ] %> % unlist() %> % convert_proforma()
326
- } else {Sequence_Object <- convert_proforma(AlternativeSequence )}
338
+
339
+ ExtractSeq <- ScanMetadata [ScanMetadata $ `Scan Number` == ScanNumber , " Sequence" ] %> % unlist()
340
+
341
+ if (length(ExtractSeq ) > 1 ) {
342
+ message(paste(" Multiple sequences detected. Select one and pass it to AlternativeSequence. Your options are:" , paste(ExtractSeq , collapse = " , " )))
343
+ return (NULL )
344
+ }
345
+
346
+ if (is.na(ExtractSeq )) {
347
+ message(" Sequence is NA" )
348
+ return (NULL )
349
+ }
350
+
351
+ if (is.null(AlternativeGlossary )) {
352
+ Sequence_Object <- convert_proforma(ExtractSeq )
353
+ } else {
354
+ Sequence_Object <- convert_proforma(ExtractSeq , AlternativeGlossary )
355
+ }
356
+
357
+
358
+ } else {
359
+
360
+ if (is.null(AlternativeGlossary )) {
361
+ Sequence_Object <- convert_proforma(AlternativeSequence )
362
+ } else {
363
+ Sequence_Object <- convert_proforma(AlternativeSequence , AlternativeGlossary )
364
+ }
365
+
366
+ }
327
367
328
368
# Pull the sequence
329
369
if (is.character(Sequence_Object )) {Sequence <- Sequence_Object } else {
@@ -332,13 +372,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
332
372
333
373
# Get the precursor charge
334
374
if (is.null(AlternativeCharge )) {
335
- PrecursorCharge <- ScanMetadata [ScanMetadata $ `Scan Number` == ScanNumber , " Precursor Charge" ] %> % unlist()
375
+ PrecursorCharge <- ScanMetadata [ScanMetadata $ `Scan Number` == ScanNumber , " Precursor Charge" ] %> % unlist() % > % head( 1 )
336
376
} else {PrecursorCharge <- AlternativeCharge }
337
377
338
378
# Load Glossary
339
- Glossary <- data.table :: fread(
340
- system.file(" extdata" , " Unimod_v20220602.csv" , package = " pspecterlib" )
341
- )
379
+ if (is.null(AlternativeGlossary )) {
380
+ Glossary <- data.table :: fread(
381
+ system.file(" extdata" , " Unimod_v20220602.csv" , package = " pspecterlib" )
382
+ )
383
+ } else {
384
+ Glossary <- AlternativeGlossary
385
+ }
386
+
342
387
343
388
# ################################
344
389
# # 2. CALCULATE BASE FRAGMENTS ##
@@ -376,7 +421,18 @@ get_matched_peaks <- function(ScanMetadata = NULL,
376
421
getIon <- AlternativeIonGroups $ Ion [row ]
377
422
378
423
# Subset fragments
379
- subFrag <- Fragments [Fragments $ Type == getIon ,]
424
+ subFrag <- MSnbase :: calculateFragments(sequence = Sequence , type = getIon ,
425
+ z = 1 : PrecursorCharge ) %> % data.table :: data.table()
426
+
427
+ # Rename Fragments
428
+ colnames(subFrag ) <- c(" M/Z" , " Ion" , " Type" , " Position" , " Z" , " Sequence" )
429
+
430
+ # Exclude N-deamidated and C-dehydrated specific modifications
431
+ subFrag <- dplyr :: filter(subFrag , ! grepl(" [.*_]" , subFrag $ Type ))
432
+
433
+ # Label the N-position. Remember that x,y,z fragments are determined from the C-terminus
434
+ subFrag $ `N Position` <- ifelse(subFrag $ Type %in% c(" a" , " b" , " c" ),
435
+ subFrag $ Position , (nchar(Sequence ) + 1 ) - Fragments $ Position )
380
436
381
437
# Proceed only if there's any fragments
382
438
if (nrow(subFrag ) > 0 ) {
@@ -465,6 +521,11 @@ get_matched_peaks <- function(ScanMetadata = NULL,
465
521
466
522
# Trim down potential fragments to match
467
523
Fragments <- cleanCalculatedFragments(Fragments )
524
+
525
+ if (nrow(Fragments ) == 0 ) {
526
+ message(" No peaks matched." )
527
+ return (NULL )
528
+ }
468
529
469
530
# ##############################
470
531
# # 5. ADD MOLECULAR FORMULAS ##
@@ -476,20 +537,14 @@ get_matched_peaks <- function(ScanMetadata = NULL,
476
537
MolFormDF <- Fragments %> %
477
538
dplyr :: select(Sequence , Modifications ) %> %
478
539
unique()
479
-
480
- # Remove sequences with a single amino acid
481
- MolFormDF <- MolFormDF %> %
482
- dplyr :: mutate(Count = nchar(Sequence ) > 1 ) %> %
483
- dplyr :: filter(Count ) %> %
484
- dplyr :: select(- Count )
485
540
486
541
# Iterate through, getting sequences and modifications and combining them
487
542
MolFormDF $ `Molecular Formula` <- lapply(1 : nrow(MolFormDF ), function (row ) {
488
543
489
544
# Step one: get sequence and modifications
490
545
Seq <- MolFormDF $ Sequence [row ]
491
546
Mod <- MolFormDF $ Modifications [row ]
492
-
547
+
493
548
# Step two: convert sequence to molecule object
494
549
Atoms <- get_aa_molform(Seq )
495
550
@@ -559,7 +614,7 @@ get_matched_peaks <- function(ScanMetadata = NULL,
559
614
IsotopeList <- do.call(dplyr :: bind_rows , lapply(MolForms , function (MolForm ) {
560
615
561
616
# Get Isotope Relative Abundances
562
- IsotopeResults <- calculate_iso_profile(as.molform(MolForm ), min_abundance = MinimumAbundance )
617
+ IsotopeResults <- calculate_iso_profile(as.molform(MolForm ), algorithm = IsotopeAlgorithm , min_abundance = MinimumAbundance )
563
618
IsotopeResults $ `Molecular Formula` = MolForm
564
619
return (IsotopeResults )
565
620
0 commit comments