From 74743d67be8537f46c1587e53723dea02351a7d4 Mon Sep 17 00:00:00 2001 From: A Wokaty Date: Wed, 29 Oct 2025 11:37:38 -0400 Subject: [PATCH 1/9] bump x.y.z version to even y prior to creation of RELEASE_3_22 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index ccba2b4..81efe68 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Chromatograms Title: Infrastructure for Chromatographic Mass Spectrometry Data -Version: 0.99.7 +Version: 1.0.0 Description: The Chromatograms packages defines an efficient infrastructure for storing and handling of chromatographic mass spectrometry data. It provides different implementations of *backends* to store and represent the From c10498e70aaaa10633af7a49cd453dd211842fc7 Mon Sep 17 00:00:00 2001 From: A Wokaty Date: Wed, 29 Oct 2025 11:37:38 -0400 Subject: [PATCH 2/9] bump x.y.z version to odd y following creation of RELEASE_3_22 branch --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 81efe68..887411b 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Chromatograms Title: Infrastructure for Chromatographic Mass Spectrometry Data -Version: 1.0.0 +Version: 1.1.0 Description: The Chromatograms packages defines an efficient infrastructure for storing and handling of chromatographic mass spectrometry data. It provides different implementations of *backends* to store and represent the From 876abea51608815e7e778aff97b7546822ff3765 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Tue, 13 Jan 2026 17:01:13 +0100 Subject: [PATCH 3/9] whitespaces --- R/ChromBackendMzR.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/ChromBackendMzR.R b/R/ChromBackendMzR.R index 43fdc1d..f621039 100644 --- a/R/ChromBackendMzR.R +++ b/R/ChromBackendMzR.R @@ -24,7 +24,7 @@ NULL #' #' Implementing functionalities with the `ChromBackendMzR` backend should be #' simplified as much as possible and reuse the methods already implemented for -#' `ChromBackendMemory` when possible. +#' `ChromBackendMemory` when possible. #' #' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()] #' for more information. From c2759e92fdc17eeae19b3efb9acdd6527fc7c4b6 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 21 Jan 2026 10:20:18 +0100 Subject: [PATCH 4/9] refactor vignette --- R/ChromBackendSpectra.R | 36 +-- R/helpers.R | 33 ++- man/Chromatograms.Rd | 10 +- tests/testthat/test_ChromBackendSpectra.R | 87 +++++++ tests/testthat/test_Chromatograms.R | 87 +++++++ tests/testthat/test_helpers.R | 18 +- vignettes/using-a-chromatograms-object.Rmd | 278 ++++++++++++++++++++- 7 files changed, 509 insertions(+), 40 deletions(-) diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R index 9f45a88..37ff934 100644 --- a/R/ChromBackendSpectra.R +++ b/R/ChromBackendSpectra.R @@ -131,7 +131,8 @@ ChromBackendSpectra <- setClass( slots = c( inMemory = "logical", spectra = "Spectra", - summaryFun = "function" + summaryFun = "function", + spectraSortIndex = "integer" ), prototype = prototype( chromData = fillCoreChromVariables(data.frame()), @@ -140,7 +141,8 @@ ChromBackendSpectra <- setClass( spectra = Spectra(), version = "0.1", inMemory = FALSE, - summaryFun = sumi + summaryFun = sumi, + spectraSortIndex = integer() ) ) @@ -181,12 +183,13 @@ setMethod("backendInitialize", "ChromBackendSpectra", "it needs to be part of the `coreChromVariables()` ", "available.") ## Spectra object are not expected to be ordered by rtime, - ## so we fix that below. - spectra <- lapply(split(spectra, spectra$dataOrigin), - function(x) { - x[order(x$rtime)] - }) - spectra <- concatenateSpectra(spectra) + ## so we store a sort index instead of concatenating. + ## This allows us to keep disk-backed backends intact. + sort_idx <- order( + spectra$dataOrigin, + spectra$rtime + ) + object@spectraSortIndex <- sort_idx object@chromData <- chromData object@spectra <- spectra @@ -242,9 +245,12 @@ setMethod("factorize", "ChromBackendSpectra", drop = TRUE, sep = "_") levels(spectra_f) <- levels(cd$chromSpectraIndex) object@spectra$chromSpectraIndex <- droplevels(spectra_f) + ## Use sorted spectra for .ensure_rt_mz_columns + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + sorted_spectra_f <- spectra_f[object@spectraSortIndex] object@chromData <- .ensure_rt_mz_columns(cd, - .spectra(object), - spectra_f) + sorted_spectra, + sorted_spectra_f) } else { object@spectra$chromSpectraIndex <- spectra_f full_sp <- do.call(rbindFill, @@ -276,9 +282,11 @@ setMethod( } ## Ensure chromSpectraIndex only contains relevant levels needed valid_f <- chromSpectraIndex(object) - current_vals <- as.character(.spectra(object)$chromSpectraIndex) + ## Apply the sort index to spectra for processing + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + current_vals <- as.character(sorted_spectra$chromSpectraIndex) if (!setequal(unique(current_vals), levels(valid_f))) { - object@spectra$chromSpectraIndex <- factor( + sorted_spectra$chromSpectraIndex <- factor( current_vals, levels = levels(valid_f) ) @@ -287,8 +295,8 @@ setMethod( pd <- mapply(.process_peaks_data, cd = split(chromData(object), valid_f), s = split( - .spectra(object), - .spectra(object)$chromSpectraIndex + sorted_spectra, + sorted_spectra$chromSpectraIndex ), MoreArgs = list( columns = columns, diff --git a/R/helpers.R b/R/helpers.R index 6589b09..32765bb 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -328,19 +328,27 @@ #' - `backendInitialize()` for `ChrombackendSpectra` #' @noRd .spectra_format_chromData <- function(sps) { - data.frame( + res <- data.frame( msLevel = unique(sps$msLevel), rtMin = min(sps$rtime, na.rm = TRUE), rtMax = max(sps$rtime, na.rm = TRUE), mzMin = -Inf, mzMax = Inf, mz = Inf, - polarity = sps$polarity[1], - scanWindowLowerLimit = sps$scanWindowLowerLimit[1], - scanWindowUpperLimit = sps$scanWindowUpperLimit[1], dataOrigin = unique(sps$dataOrigin), chromSpectraIndex = unique(sps$chromSpectraIndex) ) + ## Add optional columns if present + if ("polarity" %in% Spectra::spectraVariables(sps)) { + res$polarity <- sps$polarity[1] + } + if ("scanWindowLowerLimit" %in% Spectra::spectraVariables(sps)) { + res$scanWindowLowerLimit <- sps$scanWindowLowerLimit[1] + } + if ("scanWindowUpperLimit" %in% Spectra::spectraVariables(sps)) { + res$scanWindowUpperLimit <- sps$scanWindowUpperLimit[1] + } + res } #' Used in: @@ -363,13 +371,16 @@ stop("Both 'rtMin' and 'rtMax' must be present if one", " is provided.") } else { - rt_range <- lapply(split(spectra$rtime, spectra_f), function(rt) { - list(rtMin = min(rt, na.rm = TRUE), - rtMax = max(rt, na.rm = TRUE)) - }) - rt_values <- do.call(rbind, rt_range) - chrom_data$rtMin <- rt_values[, "rtMin"] - chrom_data$rtMax <- rt_values[, "rtMax"] + levs <- levels(spectra_f) + if (is.null(levs)) { + levs <- unique(as.character(spectra_f)) + } + rt_mat <- vapply(levs, function(lvl) { + range(spectra$rtime[spectra_f == lvl], na.rm = TRUE) + }, numeric(2)) + chrom_idx <- as.character(chrom_data$chromSpectraIndex) + chrom_data$rtMin <- rt_mat[1, chrom_idx] + chrom_data$rtMax <- rt_mat[2, chrom_idx] } } chrom_data diff --git a/man/Chromatograms.Rd b/man/Chromatograms.Rd index ae8ba57..79876d7 100644 --- a/man/Chromatograms.Rd +++ b/man/Chromatograms.Rd @@ -218,14 +218,16 @@ Each row in the \code{peak.table} defines a region to extract, using minimum and maximum retention time (and m/z in the case of \code{chromBackendSpectra}) boundaries, and identifiers that uniquely match chromatograms in the object. -The resulting \strong{new} \code{Chromatograms} object contains only chromatograms overlapping -the specified regions, with updated metadata reflecting the extracted -boundaries. +The resulting \strong{new} \code{Chromatograms} object contains only chromatograms +overlapping the specified regions, with updated metadata reflecting the +extracted boundaries. This function is most commonly used to subset chromatographic data around detected peaks or predefined time/mass ranges, for example to reprocess, visualize, or quantify extracted chromatograms corresponding to known -features. +features. It's important to notes that filtering by m/z is only supported +when using a \code{ChromBackendSpectra} backend. if the \code{mzMin} and \code{mzMax} +columns are provided when using other backends, they will be ignored. } \examples{ diff --git a/tests/testthat/test_ChromBackendSpectra.R b/tests/testthat/test_ChromBackendSpectra.R index 0c96569..cad2eba 100644 --- a/tests/testthat/test_ChromBackendSpectra.R +++ b/tests/testthat/test_ChromBackendSpectra.R @@ -119,6 +119,93 @@ test_that("factorize() works", { "must be in chromData") }) +test_that("factorize() fills rt columns with numeric vectors", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(4, c(1, 2), simplify = FALSE), + intensity = replicate(4, c(10, 20), simplify = FALSE), + rtime = c(20, 10, 5, 15), + msLevel = rep(1L, 4), + dataOrigin = c("A", "A", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb@spectra <- sp + cb@chromData <- data.frame( + msLevel = 1L, + dataOrigin = c("B", "A") + ) + cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) + + cb <- factorize(cb, factorize.by = "dataOrigin") + + expect_type(cb@chromData$rtMin, "double") + expect_type(cb@chromData$rtMax, "double") + expect_false(is.list(cb@chromData$rtMin)) + expect_equal(cb@chromData$rtMin, c(5, 10)) + expect_equal(cb@chromData$rtMax, c(15, 20)) +}) + +test_that("spectraSortIndex is set and used for sorting", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(4, c(1, 2), simplify = FALSE), + intensity = replicate(4, c(10, 20), simplify = FALSE), + rtime = c(3, 1, 4, 2), + msLevel = rep(1L, 4), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb@spectra <- sp + csi <- interaction( + as.list(Spectra::spectraData(sp)[, c("msLevel", "dataOrigin"), drop = FALSE]), + drop = TRUE, sep = "_" + ) + cb@spectra$chromSpectraIndex <- csi + cb@chromData <- fillCoreChromVariables(data.frame( + msLevel = c(1L, 1L), + dataOrigin = c("A", "B"), + chromSpectraIndex = c("1_A", "1_B") + )) + cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) + + expect_identical(cb@spectraSortIndex, order(sp$dataOrigin, sp$rtime)) +}) + +test_that("[ maintains spectra and spectraSortIndex", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(4, c(1, 2), simplify = FALSE), + intensity = replicate(4, c(10, 20), simplify = FALSE), + rtime = c(3, 1, 4, 2), + msLevel = rep(1L, 4), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb@spectra <- sp + csi <- interaction( + as.list(Spectra::spectraData(sp)[, c("msLevel", "dataOrigin"), drop = FALSE]), + drop = TRUE, sep = "_" + ) + cb@spectra$chromSpectraIndex <- csi + cb@chromData <- fillCoreChromVariables(data.frame( + msLevel = c(1L, 1L), + dataOrigin = c("A", "B"), + chromSpectraIndex = c("1_A", "1_B") + )) + cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) + + keep <- chromSpectraIndex(cb) == "1_A" + cb_sub <- cb[keep] + + expect_s4_class(cb_sub, "ChromBackendSpectra") + expect_identical(length(cb_sub), 1L) + expect_true(all(cb_sub@spectra$dataOrigin == "A")) + expect_identical( + cb_sub@spectraSortIndex, + order(cb_sub@spectra$dataOrigin, cb_sub@spectra$rtime) + ) +}) + test_that("chromSpectraIndex works", { expect_error( chromSpectraIndex(1), diff --git a/tests/testthat/test_Chromatograms.R b/tests/testthat/test_Chromatograms.R index 1686f28..f8685d6 100644 --- a/tests/testthat/test_Chromatograms.R +++ b/tests/testthat/test_Chromatograms.R @@ -28,6 +28,93 @@ test_that("Chromatograms works", { expect_identical(.processingQueue(c_sp), list()) }) +test_that("Chromatograms constructor from Spectra works with all parameters", { + ## Basic construction with defaults + chr <- Chromatograms(s) + expect_s4_class(chr, "Chromatograms") + expect_s4_class(.backend(chr), "ChromBackendSpectra") + expect_equal(length(chr), 3L) + + ## With summarize.method = "sum" (default) + chr_sum <- Chromatograms(s, summarize.method = "sum") + expect_s4_class(chr_sum, "Chromatograms") + expect_identical(.backend(chr_sum)@summaryFun, sumi) + + ## With summarize.method = "max" + chr_max <- Chromatograms(s, summarize.method = "max") + expect_s4_class(chr_max, "Chromatograms") + expect_identical(.backend(chr_max)@summaryFun, maxi) + + ## With empty chromData (should create default) + chr_empty_cd <- Chromatograms(s, chromData = data.frame()) + expect_s4_class(chr_empty_cd, "Chromatograms") + expect_true(nrow(chromData(chr_empty_cd)) > 0) + expect_true(all(coreChromVariables() %in% colnames(chromData(chr_empty_cd)))) + + ## With custom chromData + custom_cd <- data.frame( + msLevel = 1L, + dataOrigin = unique(s$dataOrigin), + customCol = "test" + ) + chr_custom <- Chromatograms(s, chromData = custom_cd) + expect_s4_class(chr_custom, "Chromatograms") + expect_true("customCol" %in% colnames(chromData(chr_custom))) + expect_equal(chromData(chr_custom)$customCol, "test") + + ## With custom factorize.by + chr_factby <- Chromatograms(s, factorize.by = "dataOrigin") + expect_s4_class(chr_factby, "Chromatograms") + expect_true(all(chromData(chr_factby)$dataOrigin == + chromData(chr_factby)$chromSpectraIndex)) + + ## With spectraVariables + chr_specvars <- Chromatograms(s, spectraVariables = c("polarity")) + expect_s4_class(chr_specvars, "Chromatograms") + if ("polarity" %in% Spectra::spectraVariables(s)) { + expect_true("polarity" %in% colnames(chromData(chr_specvars))) + } +}) + +test_that("Chromatograms constructor from ChromBackend works", { + ## From ChromBackendMemory + chr_mem <- Chromatograms(be) + expect_s4_class(chr_mem, "Chromatograms") + expect_s4_class(.backend(chr_mem), "ChromBackendMemory") + expect_equal(length(chr_mem), length(be)) + + ## From ChromBackendMzR + chr_mzr <- Chromatograms(be_mzr) + expect_s4_class(chr_mzr, "Chromatograms") + expect_s4_class(.backend(chr_mzr), "ChromBackendMzR") + expect_equal(length(chr_mzr), length(be_mzr)) + + ## From ChromBackendSpectra + chr_spec <- Chromatograms(be_sp) + expect_s4_class(chr_spec, "Chromatograms") + expect_s4_class(.backend(chr_spec), "ChromBackendSpectra") + expect_equal(length(chr_spec), length(be_sp)) + + ## With processingQueue + pq <- list(function(x) x) + chr_pq <- Chromatograms(be, processingQueue = pq) + expect_equal(length(.processingQueue(chr_pq)), 1) +}) + +test_that("Chromatograms constructor handles edge cases", { + ## Empty Spectra + empty_s <- Spectra() + chr_empty <- Chromatograms(empty_s) + expect_s4_class(chr_empty, "Chromatograms") + expect_equal(length(chr_empty), 0) + + ## Missing object (creates empty ChromBackendMemory) + chr_missing <- Chromatograms() + expect_s4_class(chr_missing, "Chromatograms") + expect_s4_class(.backend(chr_missing), "ChromBackendMemory") + expect_equal(length(chr_missing), 0) +}) + test_that("show, Chromatograms - ChromBackendMemory works", { expect_output(show(c_full), "ChromBackendMemory") res <- c_full diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index c7a4bae..a5d8d2f 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -354,33 +354,37 @@ test_that("ensure_rt_mz_columns correctly handles mz and rt columns", { paste, c(as.list(Spectra::spectraData(s)[, c("msLevel", "dataOrigin")]), sep = "_"))) - chrom_data <- data.frame(msLevel = c(1,2,3)) + levs <- levels(spectra_f) + chrom_data <- data.frame(msLevel = c(1,2,3), + chromSpectraIndex = levs[1:3]) chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) expect_equal(chrom_data$mzMin, c(-Inf, -Inf, -Inf)) expect_equal(chrom_data$mzMax, c(Inf, Inf, Inf)) - chrom_data <- data.frame(mzMin = c(100)) + chrom_data <- data.frame(mzMin = c(100), chromSpectraIndex = levs[1]) expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), "must be present if one is provided.") - chrom_data <- data.frame(mzMax = c(200)) + chrom_data <- data.frame(mzMax = c(200), chromSpectraIndex = levs[1]) expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), "must be present if one is provided.") - chrom_data <- data.frame(msLevel = c(1,2,3)) + chrom_data <- data.frame(msLevel = c(1,2,3), + chromSpectraIndex = levs[1:3]) chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) s_plit <- split(spectra, spectra_f) expect_equal(chrom_data$rtMin[[1]], min(s_plit[[1]]$rtime, na.rm = TRUE)) expect_equal(chrom_data$rtMax[[1]], max(s_plit[[1]]$rtime, na.rm = TRUE)) - chrom_data <- data.frame(rtMin = c(10)) + chrom_data <- data.frame(rtMin = c(10), chromSpectraIndex = levs[1]) expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), " must be present if one is provided.") - chrom_data <- data.frame(rtMax = c(50)) + chrom_data <- data.frame(rtMax = c(50), chromSpectraIndex = levs[1]) expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), "must be present if one is provided.") chrom_data <- data.frame(mzMin = c(100), mzMax = c(200), - rtMin = c(10), rtMax = c(50)) + rtMin = c(10), rtMax = c(50), + chromSpectraIndex = levs[1]) chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) expect_equal(chrom_data$mzMin, 100) expect_equal(chrom_data$mzMax, 200) diff --git a/vignettes/using-a-chromatograms-object.Rmd b/vignettes/using-a-chromatograms-object.Rmd index 194a120..558e406 100644 --- a/vignettes/using-a-chromatograms-object.Rmd +++ b/vignettes/using-a-chromatograms-object.Rmd @@ -93,6 +93,13 @@ could be used instead. The default backends are: - `ChromBackendSpectra`: this backend is used to generate chromatographic data from a `Spectra` object. I can be use to create TIC, BPC or EICs. +All backends provide a consistent interface through the `Chromatograms` object, +regardless of where or how the data is stored. The `ChromBackendSpectra` has +a special feature: it uses an internal sort index (`spectraSortIndex`) to +maintain retention time order without physically reordering the underlying +`Spectra` object. This is particularly important for disk-backed `Spectra` +objects, as it avoids loading all data into memory. + ## Chromatographic peaks data The *peaks data variables* information in the `Chromatograms` object can be @@ -213,6 +220,26 @@ The `Chromatograms` object `chr_mzr` now contains the chromatograms from the mzML file `MRM_file`. The chromatograms can be accessed and manipulated using the `Chromatograms` object's methods and functions. +It is also possible to create a `Chromatograms` object directly from a +`Spectra` object. This is particularly useful when you want to generate total +ion chromatograms (TIC), base peak chromatograms (BPC), or extracted ion +chromatograms (EIC) from spectral data: + +```{r, eval=FALSE} +library(Spectra) + +## Create TIC (default) +chr_tic <- Chromatograms(sp, summarize.method = "sum") + +## Create BPC +chr_bpc <- Chromatograms(sp, summarize.method = "max") + +## Create with custom factorization +chr_custom <- Chromatograms(sp, + factorize.by = "dataOrigin", + summarize.method = "sum") +``` + Basic information about the `Chromatograms` object can be accessed using functions such as `length()`, which tell us how many chromatograms are contained in the object: @@ -344,6 +371,11 @@ length(chr) The number of chromatograms in the `Chromatograms` object is reduced. +Note that for `ChromBackendSpectra`, when you subset the `Chromatograms` object, +the underlying `Spectra` object and its sort index are also properly subset and +updated. This ensures that peak data extraction remains efficient even after +subsetting operations. + # Lazy Processing and Parallelization @@ -495,6 +527,24 @@ much larger than before. print(object.size(chr_mzr), units = "Mb") ``` +## Choosing the right backend + +Different backends are suited for different use cases: + +- **`ChromBackendMemory`**: Best for small to medium datasets where fast access + is needed. All data is kept in memory, providing the fastest access but higher + memory consumption. + +- **`ChromBackendMzR`**: Ideal for large datasets stored in mzML/mzXML/CDF files. + Only metadata is kept in memory, while peak data is read on-demand, significantly + reducing memory footprint at the cost of slower data access. + +- **`ChromBackendSpectra`**: Perfect for generating chromatograms from spectral + data, especially when creating TICs, BPCs, or EICs from existing `Spectra` + objects. The backend intelligently handles both in-memory and disk-backed + `Spectra` objects through its internal sorting mechanism, avoiding unnecessary + memory consumption while maintaining good performance. + # Plotting chromatograms from a `Spectra` object @@ -520,6 +570,30 @@ backend. one chromatogram was generated per file. chr_s ``` +The `ChromBackendSpectra` backend provides flexibility in how chromatograms are +generated from spectral data. By default, separate chromatograms are created for +each combination of MS level and data origin. You can control this behavior with +the `factorize.by` parameter. + +Additionally, you can provide custom chromatogram metadata to define specific +m/z and retention time ranges: + +```{r} +## Create custom metadata for EIC extraction +custom_cd <- data.frame( + msLevel = c(1L, 1L), + dataOrigin = rep(dataOrigin(sp)[1], 2), + mzMin = c(100, 200), + mzMax = c(100.5, 200.5) +) + +chr_custom <- Chromatograms(sp, chromData = custom_cd) +chr_custom +``` + +This approach allows you to pre-define the chromatographic regions you want to +extract, which is useful for targeted analysis workflows. + Now, let's say we want to plot specific area of the chromatograms. ```{r} @@ -548,14 +622,210 @@ into one plot. plotChromatogramsOverlay(chr_s, col = col3) ``` -## Impute missing values +# Extracting chromatographic regions of interest + +The `chromExtract()` function allows you to extract specific regions of interest +from a `Chromatograms` object based on a peak table. This is particularly useful +when you want to focus on specific retention time windows or m/z ranges that +correspond to detected peaks or features of interest. + +## Basic extraction by retention time + +For backends like `ChromBackendMemory` and `ChromBackendMzR`, you can extract +regions based on retention time ranges: + +```{r} +## Define peaks of interest with retention time windows +peak_table <- data.frame( + rtMin = c(8, 11), + rtMax = c(10, 13), + msLevel = c(2L, 2L), + chromIndex = c(1L, 2L) +) + +## Extract those regions +chr_extracted <- chromExtract(chr, peak_table, + by = c("msLevel", "chromIndex")) + +chr_extracted +``` + +The resulting `Chromatograms` object contains only the data within the specified +retention time windows. Note that extra columns in `peak_table` are added to +the chromatogram metadata: + +```{r} +chromData(chr_extracted) +``` + +## Extraction with m/z filtering (ChromBackendSpectra only) + +When using `ChromBackendSpectra`, you can also filter by m/z ranges, which is +useful for extracting ion chromatograms (EICs) for specific mass windows: + +```{r} +## Define peak table with both retention time and m/z windows +peak_table_mz <- data.frame( + rtMin = c(125, 125), + rtMax = c(180, 180), + mzMin = c(100, 140), + mzMax = c(100.5, 140.5), + msLevel = c(1L, 1L), + dataOrigin = rep(dataOrigin(chr_s)[1], 2), + featureID = c("feature_1", "feature_2") +) + +## Extract EICs for these features +chr_eics <- chromExtract(chr_s, peak_table_mz, + by = c("msLevel", "dataOrigin")) + +chr_eics +``` + +Notice that the custom column `featureID` from the peak table is now part of +the chromatogram metadata: -TBD +```{r} +chromData(chr_eics) +``` + +This is particularly useful for linking extracted chromatograms back to +feature tables or peak detection results. + +# Imputing missing values in chromatograms + +Real chromatographic data often has gaps or missing intensity values at certain +retention times, which can occur due to instrumental limitations, data processing +artifacts, or sparse sampling. The `imputePeaksData()` function provides several +methods to interpolate these missing values, which can improve downstream +analysis and visualization. + +## Available imputation methods + +The package provides four imputation methods: + +- **"linear"**: Linear interpolation between known values. Fast and simple, good + for data with regular gaps. +- **"spline"**: Cubic spline interpolation. Provides smooth curves but may + introduce artifacts. +- **"gaussian"**: Gaussian kernel smoothing. Uses a Gaussian kernel to estimate + values based on neighboring points. +- **"loess"**: Locally weighted scatter plot smoothing. Provides robust smoothing + with local polynomial regression. + +## Example: Imputing an extracted ion chromatogram (EIC) + +Let's extract a narrow m/z range EIC and then apply different imputation methods: + +```{r} +## Create a specific EIC +eic_table <- data.frame( + rtMin = 125, + rtMax = 180, + mzMin = 100.01, + mzMax = 100.02, + msLevel = 1L, + dataOrigin = dataOrigin(chr_s)[1] +) + +chr_eic <- chromExtract(chr_s, eic_table, by = c("msLevel", "dataOrigin")) +chr_eic +``` + +Now let's examine the raw data and apply different imputation methods: + +```{r, fig.height=10, fig.width=8} +## Create copies for comparison +chr_linear <- imputePeaksData(chr_eic, method = "linear") +chr_spline <- imputePeaksData(chr_eic, method = "spline") +chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian", + window = 5, sd = 2) +chr_loess <- imputePeaksData(chr_eic, method = "loess", span = 0.3) + +## Plot all methods for comparison +par(mfrow = c(3, 2), mar = c(4, 4, 2, 1)) + +## Original data +plotChromatograms(chr_eic, main = "Original EIC") + +## Linear interpolation +plotChromatograms(chr_linear, main = "Linear Imputation") + +## Spline interpolation +plotChromatograms(chr_spline, main = "Spline Imputation") + +## Gaussian smoothing +plotChromatograms(chr_gaussian, main = "Gaussian Smoothing (window=5, sd=2)") + +## LOESS smoothing +plotChromatograms(chr_loess, main = "LOESS Smoothing (span=0.3)") +``` +## Selecting the right imputation method -## Extracting chromatographic area of interest. +The choice of imputation method depends on your data characteristics and +analysis goals: -TBD +- Use **"linear"** for quick interpolation of small gaps in regularly sampled + data. +- Use **"spline"** for smooth curves when data is fairly regular, but be aware + it can overshoot. +- Use **"gaussian"** for local smoothing that preserves peak shapes while + filling gaps. +- Use **"loess"** when you want robust smoothing that adapts to local data + density. + +## Imputation in lazy evaluation pipelines + +For on-disk backends like `ChromBackendMzR`, imputation is particularly useful +when combined with the lazy evaluation queue. The imputation function is added +to the processing queue and is only applied when peak data is actually accessed: + +```{r} +## For on-disk backends, add imputation to the lazy queue +chr_mzr_imputed <- imputePeaksData( + chr_mzr, + method = "gaussian", + window = 5, + sd = 2 +) + +chr_mzr_imputed +``` + +The imputation is **not** performed immediately. Instead, it's stored in the +processing queue. When you call `peaksData()` on the object, the raw data is +read from the file and then imputation is applied on-the-fly: + +```{r} +## This reads from disk and applies imputation in one step +peak_data <- peaksData(chr_mzr_imputed[1]) +``` + +This approach is highly efficient for large datasets because: + +1. Data is only read from disk when needed +2. Imputation is applied on-the-fly during data access +3. No temporary files are created +4. Memory usage remains minimal + +You can verify the processing queue contains your imputation step: + +```{r} +length(chr_mzr_imputed@processingQueue) +``` + +And if you want to make the imputation permanent (for in-memory backends), +use `applyProcessing()`: + +```{r} +## For in-memory backends, you can persist the imputation +chr_in_memory <- setBackend(chr_mzr_imputed, ChromBackendMemory()) +chr_in_memory <- applyProcessing(chr_in_memory) + +# Now imputation is permanently applied +length(chr_in_memory@processingQueue) +``` # Session information From 11cfd82f5e16c897a6e5e90dfb21fb76bac486d9 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 21 Jan 2026 13:33:35 +0100 Subject: [PATCH 5/9] refactor: vignettes, bioc3.22 --- DESCRIPTION | 3 +- NAMESPACE | 1 + NEWS.md | 10 + R/ChromBackendSpectra.R | 115 ++++++--- R/Chromatograms.R | 9 +- R/helpers.R | 11 +- man/ChromBackendSpectra.Rd | 17 +- man/Chromatograms.Rd | 11 +- tests/testthat/test_ChromBackendSpectra.R | 258 +++++++++++++++++++-- tests/testthat/test_Chromatograms.R | 31 ++- vignettes/using-a-chromatograms-object.Rmd | 55 ++--- 11 files changed, 429 insertions(+), 92 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 887411b..97b965d 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: Chromatograms Title: Infrastructure for Chromatographic Mass Spectrometry Data -Version: 1.1.0 +Version: 1.1.1 Description: The Chromatograms packages defines an efficient infrastructure for storing and handling of chromatographic mass spectrometry data. It provides different implementations of *backends* to store and represent the @@ -41,6 +41,7 @@ Suggests: mzR (>= 2.41.4), MsBackendMetaboLights (>= 1.3.1), vdiffr, + IRanges, RColorBrewer License: Artistic-2.0 Encoding: UTF-8 diff --git a/NAMESPACE b/NAMESPACE index a8ab923..73e6b8a 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ export(ChromBackendMemory) export(ChromBackendMzR) export(ChromBackendSpectra) +export(chromSpectraIndex) export(coreChromVariables) export(corePeaksVariables) export(fillCoreChromVariables) diff --git a/NEWS.md b/NEWS.md index 5792b30..f15104a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,13 @@ +# Version 1.1.1 + +- Aligned the package with the Bioconductor 3.22 release. +- Expanded the vignette to cover ChromBackendSpectra usage, chromatogram + extraction with `chromExtract()`, and imputation workflows via + `imputePeaksData()`. +- Added `spectraSortIndex()` for `ChromBackendSpectra` to compute the desired + retention-time order on demand, avoiding the need to keep on-disk `Spectra` + objects sorted in memory. + # Version 0.99.7 ## Changes in 0.99.7 diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R index 37ff934..a88bb31 100644 --- a/R/ChromBackendSpectra.R +++ b/R/ChromBackendSpectra.R @@ -42,6 +42,20 @@ NULL #' replacement is unsupported — modifications are temporary to optimize memory. #' The `inMemory` slot indicates this with `TRUE`. #' +#' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a +#' `spectraSortIndex` slot that stores a sort order for the internal `Spectra` +#' object based on `dataOrigin` and `rtime`. This avoids the need to physically +#' reorder disk-backed `Spectra` objects, which would require loading all data +#' into memory. The sort index is automatically recalculated whenever the +#' `factorize()` method is called, ensuring it remains valid and consistent. +#' +#' **Factorize and Subsetting**: The `factorize()` method updates the +#' `chromSpectraIndex` in both `chromData` and the `spectra` object to reflect +#' the current grouping, and recalculates `spectraSortIndex` to maintain the +#' correct sort order. The `[` subsetting operator properly handles subsetting +#' of both `chromData`, `peaksData`, and `spectra`, while updating the +#' `spectraSortIndex` to reference valid positions in the subsetted data. +#' #' `ChromBackendSpectra` should reuse `ChromBackendMemory` methods whenever #' possible to keep implementations simple. #' @@ -213,7 +227,7 @@ setMethod("show", "ChromBackendSpectra", function(object) { }) #' @rdname ChromBackendSpectra -#' @note ensure that it returns a factor +#' @export chromSpectraIndex <- function(object) { if (!is(object, "ChromBackendSpectra")) stop("The object must be a 'ChromBackendSpectra' object.") @@ -231,35 +245,53 @@ setMethod("factorize", "ChromBackendSpectra", spectraVariables(.spectra(object)))) stop("All 'factorize.by' variables must be in the ", "Spectra object.") - spectra_f <- interaction(as.list( + + ## Create interaction factor from spectra + spectra_f <- interaction(as.list( spectraData(.spectra(object))[, - factorize.by, drop = FALSE]), + factorize.by, drop = FALSE]), drop = TRUE, sep = "_") - cd <- .chromData(object) - if (nrow(cd)) { - if (!all(factorize.by %in% chromVariables(object))) - stop("All 'factorize.by' variables must be in chromData.") - cd$chromSpectraIndex <- interaction(cd[, factorize.by, - drop = FALSE], - drop = TRUE, sep = "_") - levels(spectra_f) <- levels(cd$chromSpectraIndex) - object@spectra$chromSpectraIndex <- droplevels(spectra_f) - ## Use sorted spectra for .ensure_rt_mz_columns - sorted_spectra <- .spectra(object)[object@spectraSortIndex] - sorted_spectra_f <- spectra_f[object@spectraSortIndex] - object@chromData <- .ensure_rt_mz_columns(cd, - sorted_spectra, - sorted_spectra_f) - } else { - object@spectra$chromSpectraIndex <- spectra_f - full_sp <- do.call(rbindFill, - lapply(split(.spectra(object), spectra_f), - .spectra_format_chromData)) - rownames(full_sp) <- NULL - object@chromData <- full_sp - } - object + cd <- .chromData(object) + + if (nrow(cd)) { + ## chromData exists: validate and align spectra to it + if (!all(factorize.by %in% chromVariables(object))) + stop("All 'factorize.by' variables must be in chromData.") + + cd$chromSpectraIndex <- interaction(cd[, factorize.by, + drop = FALSE], + drop = TRUE, sep = "_") + + ## Align spectra factor to chromData levels + object@spectra$chromSpectraIndex <- factor(as.character(spectra_f), + levels = levels(cd$chromSpectraIndex)) + + ## Use sorted spectra for calculating retention time ranges + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + sorted_spectra_f <- spectra_f[object@spectraSortIndex] + + ## Ensure rt/mz columns are properly set + object@chromData <- .ensure_rt_mz_columns(cd, + sorted_spectra, + sorted_spectra_f) + } else { + ## chromData is empty: create it from spectra + object@spectra$chromSpectraIndex <- spectra_f + full_sp <- do.call(rbindFill, + lapply(split(.spectra(object), spectra_f), + .spectra_format_chromData)) + rownames(full_sp) <- NULL + object@chromData <- full_sp + } + + ## Recalculate spectraSortIndex based on the current spectra ordering + object@spectraSortIndex <- order( + object@spectra$dataOrigin, + object@spectra$rtime + ) + + object }) #' @rdname hidden_aliases @@ -331,11 +363,38 @@ setMethod( #' @rdname hidden_aliases #' @importMethodsFrom S4Vectors [ [[ +#' @importFrom MsCoreUtils i2index #' @export setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) { if (!length(i)) return(ChromBackendSpectra()) - callNextMethod() + + i <- i2index(i, length = length(x)) + + ## Subset chromData and peaksData via parent method + x@chromData <- .chromData(x)[i, , drop = FALSE] + x@peaksData <- .peaksData(x)[i] + + ## Determine which spectra to keep based on chromSpectraIndex + kept_indices <- chromSpectraIndex(x)[i] + spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices + + ## Subset the spectra object + x@spectra <- x@spectra[spectra_keep] + + ## Update spectraSortIndex to reflect the new ordering after subsetting + old_positions_kept <- which(spectra_keep) + mapping <- match(old_positions_kept, seq_along(spectra_keep)[spectra_keep]) + + kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept + x@spectraSortIndex <- mapping[match(x@spectraSortIndex[kept_sort_positions], + old_positions_kept)] + + ## Ensure chromSpectraIndex levels are still consistent + x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex) + x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex) + + x }) #' @rdname hidden_aliases diff --git a/R/Chromatograms.R b/R/Chromatograms.R index f59b121..6a72364 100644 --- a/R/Chromatograms.R +++ b/R/Chromatograms.R @@ -17,7 +17,14 @@ NULL #' metadata. The chromatographic data is represented by a *backend* extending #' the virtual [ChromBackend] class which provides the raw data to the #' `Chromatograms` object. Different backends and their properties are -#' decribed in the [ChromBackend] class documentation. +#' described in the [ChromBackend] class documentation. +#' +#' **Available Backends**: The package provides several backends: +#' - `ChromBackendMemory`: Stores data in memory (default, ideal for small datasets). +#' - `ChromBackendMzR`: Reads peaks data from raw MS files on demand. +#' - `ChromBackendSpectra`: Generates chromatographic data from a `Spectra` object. +#' This backend supports both in-memory and file-backed `Spectra` objects, using +#' an internal `spectraSortIndex` to avoid physically reordering the spectra. #' #' @section Creation of objects: #' diff --git a/R/helpers.R b/R/helpers.R index 32765bb..8834513 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -516,13 +516,18 @@ ## - BackendInitialize, chrombackendSPectra method #' @noRd .map_spectra_vars <- function(object, spectraVariables) { - ## check variable validity� + ## check variable validity spectra <- .spectra(object) cd <- .chromData(object) if (!all(spectraVariables %in% spectraVariables(spectra))) stop("All 'spectraVariables' must exist in 'spectra'.") - if (any(spectraVariables %in% colnames(cd))) - stop("None of the 'spectraVariables' must already exist in 'chromData'.") + if (any(spectraVariables %in% colnames(cd))) { + existing <- intersect(spectraVariables, colnames(cd)) + non_replaceable <- vapply(existing, function(v) !all(is.na(cd[[v]])), logical(1)) + if (any(non_replaceable)) { + stop("None of the 'spectraVariables' must already exist in 'chromData'.") + } + } idx <- spectra$chromSpectraIndex spd <- spectraData(spectra, columns = spectraVariables) diff --git a/man/ChromBackendSpectra.Rd b/man/ChromBackendSpectra.Rd index 3d53305..c94c072 100644 --- a/man/ChromBackendSpectra.Rd +++ b/man/ChromBackendSpectra.Rd @@ -87,12 +87,23 @@ No \code{peaksData} is stored until the user calls a function that generates it replacement is unsupported — modifications are temporary to optimize memory. The \code{inMemory} slot indicates this with \code{TRUE}. +\strong{Spectra Sort Index}: The \code{ChromBackendSpectra} backend maintains a +\code{spectraSortIndex} slot that stores a sort order for the internal \code{Spectra} +object based on \code{dataOrigin} and \code{rtime}. This avoids the need to physically +reorder disk-backed \code{Spectra} objects, which would require loading all data +into memory. The sort index is automatically recalculated whenever the +\code{factorize()} method is called, ensuring it remains valid and consistent. + +\strong{Factorize and Subsetting}: The \code{factorize()} method updates the +\code{chromSpectraIndex} in both \code{chromData} and the \code{spectra} object to reflect +the current grouping, and recalculates \code{spectraSortIndex} to maintain the +correct sort order. The \code{[} subsetting operator properly handles subsetting +of both \code{chromData}, \code{peaksData}, and \code{spectra}, while updating the +\code{spectraSortIndex} to reference valid positions in the subsetted data. + \code{ChromBackendSpectra} should reuse \code{ChromBackendMemory} methods whenever possible to keep implementations simple. } -\note{ -ensure that it returns a factor -} \examples{ library(Spectra) library(MsBackendMetaboLights) diff --git a/man/Chromatograms.Rd b/man/Chromatograms.Rd index 79876d7..eae6b5f 100644 --- a/man/Chromatograms.Rd +++ b/man/Chromatograms.Rd @@ -127,7 +127,16 @@ The \code{Chromatograms} class encapsules chromatographic data and related metadata. The chromatographic data is represented by a \emph{backend} extending the virtual \link{ChromBackend} class which provides the raw data to the \code{Chromatograms} object. Different backends and their properties are -decribed in the \link{ChromBackend} class documentation. +described in the \link{ChromBackend} class documentation. + +\strong{Available Backends}: The package provides several backends: +\itemize{ +\item \code{ChromBackendMemory}: Stores data in memory (default, ideal for small datasets). +\item \code{ChromBackendMzR}: Reads peaks data from raw MS files on demand. +\item \code{ChromBackendSpectra}: Generates chromatographic data from a \code{Spectra} object. +This backend supports both in-memory and file-backed \code{Spectra} objects, using +an internal \code{spectraSortIndex} to avoid physically reordering the spectra. +} } \note{ This needs to be discussed, if we want for example to be able to set a diff --git a/tests/testthat/test_ChromBackendSpectra.R b/tests/testthat/test_ChromBackendSpectra.R index cad2eba..eee1a77 100644 --- a/tests/testthat/test_ChromBackendSpectra.R +++ b/tests/testthat/test_ChromBackendSpectra.R @@ -58,11 +58,12 @@ test_that("backendInitialize works", { ), "should be one of" ) - df <- data.frame(msLevel = 1:3, mz = 1:3, + df <- data.frame(msLevel = 1L, mz = 100, dataOrigin = dataOrigin(s)[1]) bd_tmp <- backendInitialize(ChromBackendSpectra(), spectra = s, chromData = df, - factorize.by = c("msLevel", "dataOrigin")) + factorize.by = c("msLevel", "dataOrigin") + ) expect_identical(bd_tmp@chromData[, colnames(df)], df) expect_true(all(c("rtMin", "rtMax", "mzMin", "mzMax", "chromSpectraIndex") %in% colnames(bd_tmp@chromData))) @@ -181,29 +182,244 @@ test_that("[ maintains spectra and spectraSortIndex", { )) cb <- ChromBackendSpectra() - cb@spectra <- sp - csi <- interaction( - as.list(Spectra::spectraData(sp)[, c("msLevel", "dataOrigin"), drop = FALSE]), - drop = TRUE, sep = "_" - ) - cb@spectra$chromSpectraIndex <- csi - cb@chromData <- fillCoreChromVariables(data.frame( - msLevel = c(1L, 1L), - dataOrigin = c("A", "B"), - chromSpectraIndex = c("1_A", "1_B") - )) - cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) - - keep <- chromSpectraIndex(cb) == "1_A" + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial state + expect_identical(length(cb), 2L) # 2 unique combinations of msLevel and dataOrigin + expect_identical(length(cb@spectra), 4L) # 4 spectra + expect_true(length(cb@spectraSortIndex) > 0) + + # Verify spectraSortIndex is correctly set + expected_sort <- order(sp$dataOrigin, sp$rtime) + expect_identical(cb@spectraSortIndex, expected_sort) + + # Get chromSpectraIndex before subsetting + all_chrom_idx <- chromSpectraIndex(cb) + expect_identical(length(all_chrom_idx), 2L) + + # Subset to keep only "1_A" chromatograms + keep <- all_chrom_idx == "1_A" cb_sub <- cb[keep] + # Verify subsetting worked correctly expect_s4_class(cb_sub, "ChromBackendSpectra") - expect_identical(length(cb_sub), 1L) + expect_identical(length(cb_sub), 1L) # Only 1 chromatogram kept + + # Verify chromData was subsetted + sub_chrom_idx <- chromSpectraIndex(cb_sub) + expect_identical(length(sub_chrom_idx), 1L) + expect_identical(as.character(sub_chrom_idx), "1_A") + + # Verify spectra were subsetted to only include spectra for "1_A" + expect_identical(length(cb_sub@spectra), 2L) # Only 2 spectra belong to "1_A" expect_true(all(cb_sub@spectra$dataOrigin == "A")) - expect_identical( - cb_sub@spectraSortIndex, - order(cb_sub@spectra$dataOrigin, cb_sub@spectra$rtime) - ) + + # Verify spectraSortIndex is valid and functional + expect_true(length(cb_sub@spectraSortIndex) > 0) + expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) + + # Verify the sort index still provides correct ordering + sorted_rtime <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] + expect_true(all(diff(sorted_rtime) >= 0)) + + # Verify chromSpectraIndex is properly factorized + expect_true(is.factor(cb_sub@spectra$chromSpectraIndex)) + expect_identical(levels(cb_sub@spectra$chromSpectraIndex), "1_A") +}) + +test_that("factorize() handles empty chromData correctly", { + # Test the scenario from the vignette: creating Chromatograms from Spectra + # without providing explicit chromData + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = IRanges::NumericList( + c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101), + compress = FALSE + ), + intensity = IRanges::NumericList( + c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2), + compress = FALSE + ), + rtime = c(100, 110, 120, 130, 140), + msLevel = rep(1L, 5), + dataOrigin = rep("example", 5) + )) + + # Create Chromatograms with empty chromData + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp, chromData = data.frame()) + + # Should create chromData from spectra + expect_identical(nrow(cb@chromData), 1L) # One chromatogram: 1_example + expect_identical(length(cb@spectra), 5L) # All 5 spectra retained + + # Verify chromSpectraIndex was created correctly + expect_true(is.factor(cb@spectra$chromSpectraIndex)) + expect_identical(as.character(unique(cb@spectra$chromSpectraIndex)), "1_example") + + # Verify spectraSortIndex is valid + expect_identical(length(cb@spectraSortIndex), 5L) + sorted_rtimes <- cb@spectra$rtime[cb@spectraSortIndex] + expect_identical(sorted_rtimes, c(100, 110, 120, 130, 140)) + + # Verify chromData has correct rt range + expect_equal(cb@chromData$rtMin, 100) + expect_equal(cb@chromData$rtMax, 140) +}) + +test_that("factorize() recalculates spectraSortIndex correctly - in-memory backend", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = IRanges::NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), + compress = FALSE), + intensity = IRanges::NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), + compress = FALSE), + rtime = c(3, 1, 4, 2, 5), + msLevel = c(1L, 1L, 2L, 2L, 1L), + dataOrigin = rep("A", 5) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial sort order is by dataOrigin, rtime + original_sort_idx <- cb@spectraSortIndex + sorted_rtimes <- sp$rtime[original_sort_idx] + expect_true(all(diff(sorted_rtimes) >= 0)) + + # Modify msLevel and refactorize + cb@spectra$msLevel <- c(2L, 1L, 1L, 2L, 1L) + cb <- factorize(cb) + + # After factorize, spectraSortIndex should be recalculated + # and should still produce correctly sorted rtimes + new_sort_idx <- cb@spectraSortIndex + sorted_rtimes_after <- cb@spectra$rtime[new_sort_idx] + expect_true(all(diff(sorted_rtimes_after) >= 0)) + + # Verify chromData reflects the new factorization + expect_identical(nrow(cb@chromData), 2L) # Now 2 groups: 1_A and 2_A +}) + +test_that("factorize() works correctly with on-disk spectra backend", { + # Use the pre-loaded on-disk spectra from test setup (be_sp) + cb_test <- be_sp + + # Verify initial state + expect_true(length(cb_test@spectra) > 0) + expect_true(length(cb_test@spectraSortIndex) > 0) + + # Verify sort index produces sorted rtimes within groups + sorted_rtimes <- cb_test@spectra$rtime[cb_test@spectraSortIndex] + sorted_dataOrigin <- cb_test@spectra$dataOrigin[cb_test@spectraSortIndex] + # Check rtimes are sorted within each dataOrigin group + for (do in unique(sorted_dataOrigin)) { + rtimes_in_group <- sorted_rtimes[sorted_dataOrigin == do] + expect_true(all(diff(rtimes_in_group) >= 0)) + } + + # Factorize with different grouping + cb_single <- factorize(cb_test, factorize.by = "dataOrigin") + + # Should have one row per unique dataOrigin + expect_true(nrow(cb_single@chromData) >= 1) + + # spectraSortIndex should still be valid + expect_true(all(cb_single@spectraSortIndex <= length(cb_single@spectra))) + sorted_rtimes_new <- cb_single@spectra$rtime[cb_single@spectraSortIndex] + sorted_dataOrigin_new <- cb_single@spectra$dataOrigin[cb_single@spectraSortIndex] + # Check rtimes are sorted within each dataOrigin group + for (do in unique(sorted_dataOrigin_new)) { + rtimes_in_group <- sorted_rtimes_new[sorted_dataOrigin_new == do] + expect_true(all(diff(rtimes_in_group) >= 0)) + } +}) + +test_that("factorize() maintains consistency between chromData and spectra", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(6, c(1, 2), simplify = FALSE), + intensity = replicate(6, c(10, 20), simplify = FALSE), + rtime = c(1, 2, 3, 4, 5, 6), + msLevel = c(1L, 1L, 2L, 2L, 1L, 2L), + dataOrigin = c("A", "A", "A", "B", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Get the chromSpectraIndex values + chrom_idx <- chromSpectraIndex(cb) + spectra_idx <- cb@spectra$chromSpectraIndex + + # Both should have the same set of levels + expect_identical(sort(levels(chrom_idx)), sort(levels(spectra_idx))) + + # All chromSpectraIndex values in spectra should be in chromData levels + unique_spectra_idx <- unique(as.character(spectra_idx)) + expect_true(all(unique_spectra_idx %in% levels(spectra_idx))) + + # Verify consistency after subsetting + keep_idx <- chrom_idx == levels(chrom_idx)[1] + cb_sub <- cb[keep_idx] + + chrom_idx_sub <- chromSpectraIndex(cb_sub) + spectra_idx_sub <- cb_sub@spectra$chromSpectraIndex + + # After subsetting, both should have same single level + expect_identical(nlevels(chrom_idx_sub), 1L) + expect_identical(nlevels(spectra_idx_sub), 1L) + expect_identical(levels(chrom_idx_sub), levels(spectra_idx_sub)) +}) + +test_that("peaksData generation respects spectraSortIndex - in-memory", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(4, c(100, 101), simplify = FALSE), + intensity = replicate(4, c(10, 20), simplify = FALSE), + rtime = c(3, 1, 4, 2), + msLevel = rep(1L, 4), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Get peaksData + pd <- peaksData(cb) + + # Should return a list with one entry per chromatogram + expect_identical(length(pd), 2L) + + # Each entry should be a data.frame with rtime and intensity + for (i in seq_along(pd)) { + expect_true(is.data.frame(pd[[i]])) + expect_true("rtime" %in% colnames(pd[[i]])) + expect_true("intensity" %in% colnames(pd[[i]])) + } +}) + +test_that("subsetting and peaksData consistency - in-memory", { + sp <- Spectra::Spectra(S4Vectors::DataFrame( + mz = replicate(4, c(100, 101), simplify = FALSE), + intensity = replicate(4, c(10, 20), simplify = FALSE), + rtime = c(3, 1, 4, 2), + msLevel = rep(1L, 4), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Subset to one chromatogram + keep <- chromSpectraIndex(cb) == "1_A" + cb_sub <- cb[keep] + + # Get peaksData from subsetted backend + pd_sub <- peaksData(cb_sub) + + # Should have one entry + expect_identical(length(pd_sub), 1L) + + # The data should be valid + expect_true(is.data.frame(pd_sub[[1]])) + expect_true(nrow(pd_sub[[1]]) > 0) }) test_that("chromSpectraIndex works", { diff --git a/tests/testthat/test_Chromatograms.R b/tests/testthat/test_Chromatograms.R index f8685d6..0ac1f93 100644 --- a/tests/testthat/test_Chromatograms.R +++ b/tests/testthat/test_Chromatograms.R @@ -49,7 +49,8 @@ test_that("Chromatograms constructor from Spectra works with all parameters", { chr_empty_cd <- Chromatograms(s, chromData = data.frame()) expect_s4_class(chr_empty_cd, "Chromatograms") expect_true(nrow(chromData(chr_empty_cd)) > 0) - expect_true(all(coreChromVariables() %in% colnames(chromData(chr_empty_cd)))) + expect_true(all(names(coreChromVariables()) %in% + chromVariables(chr_empty_cd))) ## With custom chromData custom_cd <- data.frame( @@ -60,7 +61,7 @@ test_that("Chromatograms constructor from Spectra works with all parameters", { chr_custom <- Chromatograms(s, chromData = custom_cd) expect_s4_class(chr_custom, "Chromatograms") expect_true("customCol" %in% colnames(chromData(chr_custom))) - expect_equal(chromData(chr_custom)$customCol, "test") + expect_equal(chromData(chr_custom)$customCol, rep("test", length(unique(s$dataOrigin)))) ## With custom factorize.by chr_factby <- Chromatograms(s, factorize.by = "dataOrigin") @@ -69,11 +70,28 @@ test_that("Chromatograms constructor from Spectra works with all parameters", { chromData(chr_factby)$chromSpectraIndex)) ## With spectraVariables - chr_specvars <- Chromatograms(s, spectraVariables = c("polarity")) + expect_error(Chromatograms(s, spectraVariables = "polarity"), + "must already exist in 'chromData'") + + + chr_specvars <- Chromatograms(s, spectraVariables = c("precursorMz")) expect_s4_class(chr_specvars, "Chromatograms") - if ("polarity" %in% Spectra::spectraVariables(s)) { - expect_true("polarity" %in% colnames(chromData(chr_specvars))) + if ("precursorMz" %in% Spectra::spectraVariables(s)) { + expect_true("precursorMz" %in% colnames(chromData(chr_specvars))) } + + ## spectraVariables should replace all-NA columns in provided chromData + sp_small <- Spectra::Spectra(S4Vectors::DataFrame( + rtime = c(1, 2), + msLevel = c(1L, 1L), + dataOrigin = c("A", "A"), + polarity = c(1L, 1L) + )) + cd_na <- data.frame(msLevel = 1L, dataOrigin = "A", polarity = NA_integer_) + chr_specvars_replace <- Chromatograms(sp_small, chromData = cd_na, + spectraVariables = c("polarity")) + expect_s4_class(chr_specvars_replace, "Chromatograms") + expect_identical(chromData(chr_specvars_replace)$polarity, 1L) }) test_that("Chromatograms constructor from ChromBackend works", { @@ -96,7 +114,7 @@ test_that("Chromatograms constructor from ChromBackend works", { expect_equal(length(chr_spec), length(be_sp)) ## With processingQueue - pq <- list(function(x) x) + pq <- list(ProcessingStep("smooth", list(method = "SavitzkyGolay", halfWindowSize = 2L))) chr_pq <- Chromatograms(be, processingQueue = pq) expect_equal(length(.processingQueue(chr_pq)), 1) }) @@ -224,4 +242,3 @@ test_that("chromExtract, Chromatograms works correctly", { "Some combinations in") }) - diff --git a/vignettes/using-a-chromatograms-object.Rmd b/vignettes/using-a-chromatograms-object.Rmd index 558e406..ef11a50 100644 --- a/vignettes/using-a-chromatograms-object.Rmd +++ b/vignettes/using-a-chromatograms-object.Rmd @@ -90,15 +90,18 @@ could be used instead. The default backends are: chromatographic peaks (retention time and intensity values) from the original mzML files on-demand. -- `ChromBackendSpectra`: this backend is used to generate chromatographic data - from a `Spectra` object. I can be use to create TIC, BPC or EICs. +- `ChromBackendSpectra`: this backend generates chromatographic data from a + `Spectra` object. It can be used to create Total Ion Chromatograms (TIC), + Base Peak Chromatograms (BPC), or Extracted Ion Chromatograms (EICs). It + supports both in-memory and file-backed `Spectra` objects. All backends provide a consistent interface through the `Chromatograms` object, regardless of where or how the data is stored. The `ChromBackendSpectra` has a special feature: it uses an internal sort index (`spectraSortIndex`) to maintain retention time order without physically reordering the underlying `Spectra` object. This is particularly important for disk-backed `Spectra` -objects, as it avoids loading all data into memory. +objects, as it avoids loading all data into memory. The sort index is +automatically maintained during subsetting and factorization operations. ## Chromatographic peaks data @@ -223,22 +226,8 @@ the `Chromatograms` object's methods and functions. It is also possible to create a `Chromatograms` object directly from a `Spectra` object. This is particularly useful when you want to generate total ion chromatograms (TIC), base peak chromatograms (BPC), or extracted ion -chromatograms (EIC) from spectral data: - -```{r, eval=FALSE} -library(Spectra) - -## Create TIC (default) -chr_tic <- Chromatograms(sp, summarize.method = "sum") - -## Create BPC -chr_bpc <- Chromatograms(sp, summarize.method = "max") - -## Create with custom factorization -chr_custom <- Chromatograms(sp, - factorize.by = "dataOrigin", - summarize.method = "sum") -``` +chromatograms (EIC) from spectral data. A worked example is provided in the +[plotting](#plotting-chromatograms-from-a-spectra-object) section below. Basic information about the `Chromatograms` object can be accessed using functions such as `length()`, which tell us how many chromatograms are @@ -548,18 +537,30 @@ Different backends are suited for different use cases: # Plotting chromatograms from a `Spectra` object -For this purpose let's create a `Chromatograms` object from public spectral -data. +For this purpose let's create a lightweight in-memory `Spectra` object and +derive a `Chromatograms` from it. This avoids any external downloads while +still illustrating the `ChromBackendSpectra` workflow. ```{r, message=FALSE} library(Spectra) -library(MsBackendMetaboLights) -be <- backendInitialize(MsBackendMetaboLights(), - mtblsId = "MTBLS39", - filePattern = c("63B.cdf") +library(IRanges) +sp <- Spectra( + DataFrame( + rtime = c(100, 110, 120, 130, 140), + msLevel = c(1L, 1L, 1L, 1L, 1L), + dataOrigin = rep("example", 5L), + mz = NumericList( + c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101), + compress = FALSE + ), + intensity = NumericList( + c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2), + compress = FALSE + ) + ), + source = MsBackendDataFrame() ) -sp <- Spectra(be) -sp <- setBackend(sp, MsBackendMemory()) + chr_s <- Chromatograms(sp) ``` From 5a5537d2d17abc4317b11b09061b9a1a618f8b43 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 21 Jan 2026 13:38:40 +0100 Subject: [PATCH 6/9] fix checks --- .github/workflows/check-bioc.yml | 2 +- R/ChromBackendSpectra.R | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml index 49df17b..5ba1470 100644 --- a/.github/workflows/check-bioc.yml +++ b/.github/workflows/check-bioc.yml @@ -53,7 +53,7 @@ jobs: fail-fast: false matrix: config: - - { os: ubuntu-latest, r: 'devel', bioc: '3.22', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } + - { os: ubuntu-latest, r: 'devel', bioc: '3.23', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" } - { os: macOS-latest, r: 'latest', bioc: '3.22'} - { os: windows-latest, r: 'latest', bioc: '3.22'} env: diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R index a88bb31..4f105b5 100644 --- a/R/ChromBackendSpectra.R +++ b/R/ChromBackendSpectra.R @@ -246,7 +246,6 @@ setMethod("factorize", "ChromBackendSpectra", stop("All 'factorize.by' variables must be in the ", "Spectra object.") - ## Create interaction factor from spectra spectra_f <- interaction(as.list( spectraData(.spectra(object))[, factorize.by, drop = FALSE]), @@ -263,15 +262,12 @@ setMethod("factorize", "ChromBackendSpectra", drop = FALSE], drop = TRUE, sep = "_") - ## Align spectra factor to chromData levels object@spectra$chromSpectraIndex <- factor(as.character(spectra_f), levels = levels(cd$chromSpectraIndex)) - ## Use sorted spectra for calculating retention time ranges sorted_spectra <- .spectra(object)[object@spectraSortIndex] sorted_spectra_f <- spectra_f[object@spectraSortIndex] - ## Ensure rt/mz columns are properly set object@chromData <- .ensure_rt_mz_columns(cd, sorted_spectra, sorted_spectra_f) @@ -285,7 +281,6 @@ setMethod("factorize", "ChromBackendSpectra", object@chromData <- full_sp } - ## Recalculate spectraSortIndex based on the current spectra ordering object@spectraSortIndex <- order( object@spectra$dataOrigin, object@spectra$rtime From a07a7390067eba8c1c9136b36cba82286b969451 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Wed, 21 Jan 2026 15:14:29 +0100 Subject: [PATCH 7/9] fix jo's comments --- NAMESPACE | 1 + NEWS.md | 6 +- R/ChromBackendMemory.R | 11 +- R/ChromBackendSpectra.R | 100 +-- R/Chromatograms.R | 42 +- R/helpers.R | 27 +- man/ChromBackendMemory.Rd | 11 +- man/ChromBackendSpectra.Rd | 26 +- man/Chromatograms.Rd | 30 +- tests/testthat.R | 2 + tests/testthat/test_ChromBackendSpectra.R | 733 ++++++++++++++++++++- tests/testthat/test_Chromatograms.R | 67 +- vignettes/using-a-chromatograms-object.Rmd | 49 +- 13 files changed, 964 insertions(+), 141 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index 73e6b8a..236f694 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -124,6 +124,7 @@ importFrom(stats,filter) importFrom(stats,loess) importFrom(stats,predict) importFrom(stats,sd) +importFrom(stats,setNames) importFrom(stats,spline) importFrom(utils,capture.output) importFrom(utils,head) diff --git a/NEWS.md b/NEWS.md index f15104a..095783a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,6 @@ -# Version 1.1.1 +# Version 1.1 + +## Changes in 1.1.1 - Aligned the package with the Bioconductor 3.22 release. - Expanded the vignette to cover ChromBackendSpectra usage, chromatogram @@ -8,7 +10,7 @@ retention-time order on demand, avoiding the need to keep on-disk `Spectra` objects sorted in memory. -# Version 0.99.7 +# Version 0.99 ## Changes in 0.99.7 diff --git a/R/ChromBackendMemory.R b/R/ChromBackendMemory.R index 6f49916..1e915f5 100644 --- a/R/ChromBackendMemory.R +++ b/R/ChromBackendMemory.R @@ -41,11 +41,7 @@ NULL #' #' @examples #' -#' ## Create a ChromBackendMemory object -#' cbm <- ChromBackendMemory() -#' -#' ## Initialize the ChromBackendMemory object with a data.frame of -#' ## chromatographic data and a list of data.frame of peaks data +#' ## Method 1: Initialize backend directly #' cdata <- data.frame( #' msLevel = c(1L, 1L, 1L), #' mz = c(112.2, 123.3, 134.4), @@ -67,9 +63,14 @@ NULL #' ) #' ) #' +#' cbm <- ChromBackendMemory() #' cbm <- backendInitialize(cbm, chromData = cdata, peaksData = pdata) #' cbm #' +#' ## Method 2: Use Chromatograms constructor (recommended) +#' chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) +#' chr +#' NULL #' @noRd diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R index 4f105b5..8ced3cd 100644 --- a/R/ChromBackendSpectra.R +++ b/R/ChromBackendSpectra.R @@ -21,6 +21,14 @@ NULL #' condensing the `Spectra` data corresponding to each unique combination of #' the `factorize.by` variables. #' +#' By "factorization" we mean the process of grouping spectra +#' into chromatograms based on specified variables. For example, using +#' `factorize.by = c("msLevel", "dataOrigin")` means that all MS1 spectra from +#' file "A" form one chromatogram, all MS2 spectra from file "A" form another, +#' and so on. Each unique combination of the factorization variables creates +#' a separate chromatogram. This is essential for organizing spectral data into +#' meaningful chromatographic traces that can be visualized and analyzed. +#' #' The *dataOrigin* core chromatogram variable should reflect the *dataOrigin* #' of the `Spectra` object. The `factorize.by` parameter defines the variables #' for grouping `Spectra` data into chromatographic data. The default is @@ -44,10 +52,14 @@ NULL #' #' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a #' `spectraSortIndex` slot that stores a sort order for the internal `Spectra` -#' object based on `dataOrigin` and `rtime`. This avoids the need to physically -#' reorder disk-backed `Spectra` objects, which would require loading all data -#' into memory. The sort index is automatically recalculated whenever the -#' `factorize()` method is called, ensuring it remains valid and consistent. +#' object based on `dataOrigin` and `rtime`. To optimize performance, the sort +#' index is only computed and stored when the spectra are unsorted; if already +#' sorted (which is typical for most real-world data), `spectraSortIndex` remains +#' empty (`integer()`). This avoids unnecessary subsetting operations. The sort +#' index is automatically recalculated whenever the `factorize()` method is called, +#' ensuring it remains valid and consistent. This approach avoids the need to +#' physically reorder disk-backed `Spectra` objects, which would require loading +#' all data into memory. #' #' **Factorize and Subsetting**: The `factorize()` method updates the #' `chromSpectraIndex` in both `chromData` and the `spectra` object to reflect @@ -67,8 +79,10 @@ NULL #' `"dataOrigin"`. #' #' @param factorize.by A `character` vector of variables for grouping `Spectra` -#' data into chromatographic data. -#' Default: `c("msLevel", "dataOrigin")`. +#' data into chromatographic data (i.e., creating separate chromatograms +#' for each unique combination of these variables). +#' Default: `c("msLevel", "dataOrigin")`, which creates one chromatogram +#' per MS level per data file. #' If `chromData` is provided, it must contain these columns. #' #' @param object A `ChromBackendSpectra` object. @@ -182,12 +196,10 @@ setMethod("backendInitialize", "ChromBackendSpectra", if (!is(spectra, "Spectra")) stop("'spectra' must be a 'Spectra' object.") if (!length(spectra)) return(object) - if (!all(factorize.by %in% spectraVariables(spectra))) stop("All 'factorize.by' variables must exist in 'spectra'.") if (!is.data.frame(chromData)) stop("'chromData' must be a 'data.frame'.") - if(!nrow(chromData)) chromData <- fillCoreChromVariables(data.frame()) else validChromData(chromData) @@ -199,16 +211,17 @@ setMethod("backendInitialize", "ChromBackendSpectra", ## Spectra object are not expected to be ordered by rtime, ## so we store a sort index instead of concatenating. ## This allows us to keep disk-backed backends intact. + ## Only store sort index if data is actually unsorted (optimization). sort_idx <- order( spectra$dataOrigin, spectra$rtime ) - object@spectraSortIndex <- sort_idx + if (!identical(sort_idx, seq_along(spectra))) { + object@spectraSortIndex <- sort_idx + } object@chromData <- chromData object@spectra <- spectra - object <- factorize(object, factorize.by = factorize.by) - ## map additional spectraVariables if any if (length(spectraVariables)) { object <- .map_spectra_vars(object, @@ -245,29 +258,29 @@ setMethod("factorize", "ChromBackendSpectra", spectraVariables(.spectra(object)))) stop("All 'factorize.by' variables must be in the ", "Spectra object.") - spectra_f <- interaction(as.list( spectraData(.spectra(object))[, factorize.by, drop = FALSE]), drop = TRUE, sep = "_") - cd <- .chromData(object) if (nrow(cd)) { ## chromData exists: validate and align spectra to it if (!all(factorize.by %in% chromVariables(object))) stop("All 'factorize.by' variables must be in chromData.") - cd$chromSpectraIndex <- interaction(cd[, factorize.by, drop = FALSE], drop = TRUE, sep = "_") - object@spectra$chromSpectraIndex <- factor(as.character(spectra_f), levels = levels(cd$chromSpectraIndex)) - - sorted_spectra <- .spectra(object)[object@spectraSortIndex] - sorted_spectra_f <- spectra_f[object@spectraSortIndex] - + ## Apply sort index for processing if needed + if (length(object@spectraSortIndex)) { + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + sorted_spectra_f <- spectra_f[object@spectraSortIndex] + } else { + sorted_spectra <- .spectra(object) + sorted_spectra_f <- spectra_f + } object@chromData <- .ensure_rt_mz_columns(cd, sorted_spectra, sorted_spectra_f) @@ -280,12 +293,16 @@ setMethod("factorize", "ChromBackendSpectra", rownames(full_sp) <- NULL object@chromData <- full_sp } - - object@spectraSortIndex <- order( + ## Recalculate sort index: only store if data is unsorted (optimization) + sort_idx <- order( object@spectra$dataOrigin, object@spectra$rtime ) - + if (!identical(sort_idx, seq_along(object@spectra))) { + object@spectraSortIndex <- sort_idx + } else { + object@spectraSortIndex <- integer() + } object }) @@ -309,8 +326,12 @@ setMethod( } ## Ensure chromSpectraIndex only contains relevant levels needed valid_f <- chromSpectraIndex(object) - ## Apply the sort index to spectra for processing - sorted_spectra <- .spectra(object)[object@spectraSortIndex] + ## Apply the sort index to spectra for processing (only if unsorted) + if (length(object@spectraSortIndex)) { + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + } else { + sorted_spectra <- .spectra(object) + } current_vals <- as.character(sorted_spectra$chromSpectraIndex) if (!setequal(unique(current_vals), levels(valid_f))) { sorted_spectra$chromSpectraIndex <- factor( @@ -359,36 +380,35 @@ setMethod( #' @rdname hidden_aliases #' @importMethodsFrom S4Vectors [ [[ #' @importFrom MsCoreUtils i2index +#' @importFrom stats setNames #' @export setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) { if (!length(i)) return(ChromBackendSpectra()) i <- i2index(i, length = length(x)) - - ## Subset chromData and peaksData via parent method + kept_indices <- chromSpectraIndex(x)[i] x@chromData <- .chromData(x)[i, , drop = FALSE] x@peaksData <- .peaksData(x)[i] - - ## Determine which spectra to keep based on chromSpectraIndex - kept_indices <- chromSpectraIndex(x)[i] spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices - - ## Subset the spectra object x@spectra <- x@spectra[spectra_keep] - ## Update spectraSortIndex to reflect the new ordering after subsetting - old_positions_kept <- which(spectra_keep) - mapping <- match(old_positions_kept, seq_along(spectra_keep)[spectra_keep]) - - kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept - x@spectraSortIndex <- mapping[match(x@spectraSortIndex[kept_sort_positions], - old_positions_kept)] + ## Update spectraSortIndex: remap old positions to new positions + if (length(x@spectraSortIndex)) { + old_positions_kept <- which(spectra_keep) + ## Create mapping from old position to new position + ## e.g., if we kept positions c(2, 5, 7), they become c(1, 2, 3) + position_mapping <- setNames(seq_along(old_positions_kept), + old_positions_kept) + ## Keep only sort indices that reference kept positions + kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept + x@spectraSortIndex <- as.integer( + position_mapping[as.character(x@spectraSortIndex[kept_sort_positions])] + ) + } - ## Ensure chromSpectraIndex levels are still consistent x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex) x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex) - x }) diff --git a/R/Chromatograms.R b/R/Chromatograms.R index 6a72364..0391cbd 100644 --- a/R/Chromatograms.R +++ b/R/Chromatograms.R @@ -195,37 +195,51 @@ NULL #' #' @examples #' +#' ## Create a Chromatograms object with ChromBackendMemory +#' cdata <- data.frame( +#' msLevel = c(1L, 1L, 1L), +#' mz = c(112.2, 123.3, 134.4), +#' dataOrigin = c("mem1", "mem2", "mem3") +#' ) +#' pdata <- list( +#' data.frame(rtime = c(2.1, 2.5, 3.0, 3.4, 3.9), +#' intensity = c(100, 250, 400, 300, 150)), +#' data.frame(rtime = c(3.5, 4.0, 4.5), +#' intensity = c(80, 120, 90)), +#' data.frame(rtime = c(5.1, 5.8, 6.3, 6.9, 7.5), +#' intensity = c(80, 500, 1200, 600, 120)) +#' ) +#' chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) +#' chr +#' +#' ## Create a Chromatograms object from a Spectra object #' library(MsBackendMetaboLights) #' library(Spectra) #' -#' ## Create a Chromatograms object from a Spectra object. #' be <- backendInitialize(MsBackendMetaboLights(), #' mtblsId = "MTBLS39", #' filePattern = c("63B.cdf") #' ) #' s <- Spectra(be) #' s <- setBackend(s, MsBackendMemory()) -#' be <- backendInitialize(new("ChromBackendSpectra"), s) -#' chr <- Chromatograms(be) +#' chr <- Chromatograms(s) #' #' ## Subset #' chr[1:2] #' -#' ## access a specific variables +#' ## Access a specific variable #' chr[["msLevel"]] #' chr$msLevel #' #' ## Replace data of a specific variable #' chr$msLevel <- c(2L, 2L, 2L) #' -#' ## Can re factorize the data +#' ## Re-factorize the data #' chr <- factorize(chr) #' -#' ## Can also change the backend into memory +#' ## Change the backend to memory #' chr <- setBackend(chr, ChromBackendMemory()) #' -#' chr -#' NULL setClassUnion("ChromBackendOrMissing", c("ChromBackend", "missing")) @@ -296,10 +310,16 @@ setMethod( if (missing(object)) { object <- ChromBackendMemory() } - new("Chromatograms", + ## Extract backend-specific parameters from ... and initialize backend + dots <- list(...) + if (length(dots) > 0 && length(object) == 0) { + ## Backend is empty, initialize it with provided parameters + object <- do.call(backendInitialize, c(list(object), dots)) + dots <- list() + } + do.call(new, c(list("Chromatograms", backend = object, - processingQueue = processingQueue, ... - ) + processingQueue = processingQueue), dots)) } ) diff --git a/R/helpers.R b/R/helpers.R index 8834513..7ab1ad5 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -296,11 +296,22 @@ #' @importFrom Spectra peaksData filterRanges #' @noRd .process_peaks_data <- function(cd, s, columns, fun, drop) { - s <- filterRanges(s, - spectraVariables = rep("rtime", nrow(cd)), - ranges = as.vector(rbind(cd$rtMin, cd$rtMax)), - match = "any" - ) + ## Handle single spectrum case: filterRanges fails with length(s) == 1 + if (length(s) > 1) { + s <- filterRanges(s, + spectraVariables = rep("rtime", nrow(cd)), + ranges = as.vector(rbind(cd$rtMin, cd$rtMax)), + match = "any" + ) + } else { + ## For single spectrum, manually filter by rtime range + if (length(s) == 1) { + rt_in_range <- s$rtime >= min(cd$rtMin) & s$rtime <= max(cd$rtMax) + if (!rt_in_range) { + s <- s[integer(0)] ## Return empty Spectra + } + } + } pd <- peaksData(s, columns = c("mz", "intensity")) do_rt <- "rtime" %in% columns do_int <- "intensity" %in% columns @@ -339,13 +350,13 @@ chromSpectraIndex = unique(sps$chromSpectraIndex) ) ## Add optional columns if present - if ("polarity" %in% Spectra::spectraVariables(sps)) { + if ("polarity" %in% spectraVariables(sps)) { res$polarity <- sps$polarity[1] } - if ("scanWindowLowerLimit" %in% Spectra::spectraVariables(sps)) { + if ("scanWindowLowerLimit" %in% spectraVariables(sps)) { res$scanWindowLowerLimit <- sps$scanWindowLowerLimit[1] } - if ("scanWindowUpperLimit" %in% Spectra::spectraVariables(sps)) { + if ("scanWindowUpperLimit" %in% spectraVariables(sps)) { res$scanWindowUpperLimit <- sps$scanWindowUpperLimit[1] } res diff --git a/man/ChromBackendMemory.Rd b/man/ChromBackendMemory.Rd index 6993a8c..6ef4ce8 100644 --- a/man/ChromBackendMemory.Rd +++ b/man/ChromBackendMemory.Rd @@ -45,11 +45,7 @@ parameter and a \code{list} of \code{data.frame} entries for peaks data using th } \examples{ -## Create a ChromBackendMemory object -cbm <- ChromBackendMemory() - -## Initialize the ChromBackendMemory object with a data.frame of -## chromatographic data and a list of data.frame of peaks data +## Method 1: Initialize backend directly cdata <- data.frame( msLevel = c(1L, 1L, 1L), mz = c(112.2, 123.3, 134.4), @@ -71,9 +67,14 @@ pdata <- list( ) ) +cbm <- ChromBackendMemory() cbm <- backendInitialize(cbm, chromData = cdata, peaksData = pdata) cbm +## Method 2: Use Chromatograms constructor (recommended) +chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) +chr + } \author{ Philippine Louail diff --git a/man/ChromBackendSpectra.Rd b/man/ChromBackendSpectra.Rd index c94c072..3379882 100644 --- a/man/ChromBackendSpectra.Rd +++ b/man/ChromBackendSpectra.Rd @@ -26,8 +26,10 @@ chromSpectraIndex(object) \item{spectra}{A \code{Spectra} object.} \item{factorize.by}{A \code{character} vector of variables for grouping \code{Spectra} -data into chromatographic data. -Default: \code{c("msLevel", "dataOrigin")}. +data into chromatographic data (i.e., creating separate chromatograms +for each unique combination of these variables). +Default: \code{c("msLevel", "dataOrigin")}, which creates one chromatogram +per MS level per data file. If \code{chromData} is provided, it must contain these columns.} \item{summarize.method}{A \code{character} string specifying intensity summary: @@ -64,6 +66,14 @@ An "rtMin", "rtMax", "mzMin", and "mzMax" column will be created by condensing the \code{Spectra} data corresponding to each unique combination of the \code{factorize.by} variables. +By "factorization" we mean the process of grouping spectra +into chromatograms based on specified variables. For example, using +\code{factorize.by = c("msLevel", "dataOrigin")} means that all MS1 spectra from +file "A" form one chromatogram, all MS2 spectra from file "A" form another, +and so on. Each unique combination of the factorization variables creates +a separate chromatogram. This is essential for organizing spectral data into +meaningful chromatographic traces that can be visualized and analyzed. + The \emph{dataOrigin} core chromatogram variable should reflect the \emph{dataOrigin} of the \code{Spectra} object. The \code{factorize.by} parameter defines the variables for grouping \code{Spectra} data into chromatographic data. The default is @@ -89,10 +99,14 @@ The \code{inMemory} slot indicates this with \code{TRUE}. \strong{Spectra Sort Index}: The \code{ChromBackendSpectra} backend maintains a \code{spectraSortIndex} slot that stores a sort order for the internal \code{Spectra} -object based on \code{dataOrigin} and \code{rtime}. This avoids the need to physically -reorder disk-backed \code{Spectra} objects, which would require loading all data -into memory. The sort index is automatically recalculated whenever the -\code{factorize()} method is called, ensuring it remains valid and consistent. +object based on \code{dataOrigin} and \code{rtime}. To optimize performance, the sort +index is only computed and stored when the spectra are unsorted; if already +sorted (which is typical for most real-world data), \code{spectraSortIndex} remains +empty (\code{integer()}). This avoids unnecessary subsetting operations. The sort +index is automatically recalculated whenever the \code{factorize()} method is called, +ensuring it remains valid and consistent. This approach avoids the need to +physically reorder disk-backed \code{Spectra} objects, which would require loading +all data into memory. \strong{Factorize and Subsetting}: The \code{factorize()} method updates the \code{chromSpectraIndex} in both \code{chromData} and the \code{spectra} object to reflect diff --git a/man/Chromatograms.Rd b/man/Chromatograms.Rd index eae6b5f..334d738 100644 --- a/man/Chromatograms.Rd +++ b/man/Chromatograms.Rd @@ -241,37 +241,51 @@ columns are provided when using other backends, they will be ignored. \examples{ +## Create a Chromatograms object with ChromBackendMemory +cdata <- data.frame( + msLevel = c(1L, 1L, 1L), + mz = c(112.2, 123.3, 134.4), + dataOrigin = c("mem1", "mem2", "mem3") +) +pdata <- list( + data.frame(rtime = c(2.1, 2.5, 3.0, 3.4, 3.9), + intensity = c(100, 250, 400, 300, 150)), + data.frame(rtime = c(3.5, 4.0, 4.5), + intensity = c(80, 120, 90)), + data.frame(rtime = c(5.1, 5.8, 6.3, 6.9, 7.5), + intensity = c(80, 500, 1200, 600, 120)) +) +chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) +chr + +## Create a Chromatograms object from a Spectra object library(MsBackendMetaboLights) library(Spectra) -## Create a Chromatograms object from a Spectra object. be <- backendInitialize(MsBackendMetaboLights(), mtblsId = "MTBLS39", filePattern = c("63B.cdf") ) s <- Spectra(be) s <- setBackend(s, MsBackendMemory()) -be <- backendInitialize(new("ChromBackendSpectra"), s) -chr <- Chromatograms(be) +chr <- Chromatograms(s) ## Subset chr[1:2] -## access a specific variables +## Access a specific variable chr[["msLevel"]] chr$msLevel ## Replace data of a specific variable chr$msLevel <- c(2L, 2L, 2L) -## Can re factorize the data +## Re-factorize the data chr <- factorize(chr) -## Can also change the backend into memory +## Change the backend to memory chr <- setBackend(chr, ChromBackendMemory()) -chr - } \seealso{ \link{chromData} for a general description of the chromatographic diff --git a/tests/testthat.R b/tests/testthat.R index 33d8af9..a4613a7 100644 --- a/tests/testthat.R +++ b/tests/testthat.R @@ -2,6 +2,8 @@ library(testthat) library(Chromatograms) library(Spectra) library(MsBackendMetaboLights) ## remove MsbackendMetaboLights dependency for test and examples +library(IRanges) +library(S4Vectors) ### Test ChromBackendSpectra be <- backendInitialize(MsBackendMetaboLights(), diff --git a/tests/testthat/test_ChromBackendSpectra.R b/tests/testthat/test_ChromBackendSpectra.R index eee1a77..41668a5 100644 --- a/tests/testthat/test_ChromBackendSpectra.R +++ b/tests/testthat/test_ChromBackendSpectra.R @@ -121,7 +121,7 @@ test_that("factorize() works", { }) test_that("factorize() fills rt columns with numeric vectors", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( + sp <- Spectra(DataFrame( mz = replicate(4, c(1, 2), simplify = FALSE), intensity = replicate(4, c(10, 20), simplify = FALSE), rtime = c(20, 10, 5, 15), @@ -135,7 +135,11 @@ test_that("factorize() fills rt columns with numeric vectors", { msLevel = 1L, dataOrigin = c("B", "A") ) - cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) + ## Set spectraSortIndex manually since this unsorted data + sort_idx <- order(sp$dataOrigin, sp$rtime) + if (!identical(sort_idx, seq_along(sp))) { + cb@spectraSortIndex <- sort_idx + } cb <- factorize(cb, factorize.by = "dataOrigin") @@ -146,10 +150,189 @@ test_that("factorize() fills rt columns with numeric vectors", { expect_equal(cb@chromData$rtMax, c(15, 20)) }) +test_that("spectraSortIndex is empty for pre-sorted data", { + ## Create spectra that are already sorted by dataOrigin and rtime + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4, 5), + msLevel = rep(1L, 5), + dataOrigin = rep("A", 5) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## For pre-sorted data, spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) + expect_identical(cb@spectraSortIndex, integer()) +}) + +test_that("spectraSortIndex is set for unsorted data", { + ## Create spectra that are NOT sorted by rtime + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(5, 1, 3, 2, 4), + msLevel = rep(1L, 5), + dataOrigin = rep("A", 5) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## For unsorted data, spectraSortIndex should be set + expect_true(length(cb@spectraSortIndex) > 0) + expected_sort <- order(sp$dataOrigin, sp$rtime) + expect_identical(cb@spectraSortIndex, expected_sort) + + ## Verify the sort index produces sorted rtimes + sorted_rtimes <- sp$rtime[cb@spectraSortIndex] + expect_identical(sorted_rtimes, c(1, 2, 3, 4, 5)) +}) + +test_that("spectraSortIndex is empty for pre-sorted data with multiple dataOrigins", { + ## Create spectra sorted by dataOrigin then rtime + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4, 5, 6), + msLevel = rep(1L, 6), + dataOrigin = c("A", "A", "A", "B", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## For pre-sorted data, spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) +}) + +test_that("spectraSortIndex is set for unsorted data with multiple dataOrigins", { + ## Create spectra NOT sorted by dataOrigin and rtime + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(3, 1, 2, 6, 4, 5), + msLevel = rep(1L, 6), + dataOrigin = c("B", "A", "A", "B", "A", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## For unsorted data, spectraSortIndex should be set + expect_true(length(cb@spectraSortIndex) > 0) + expected_sort <- order(sp$dataOrigin, sp$rtime) + expect_identical(cb@spectraSortIndex, expected_sort) + + ## Verify sorting is correct + sorted_do <- sp$dataOrigin[cb@spectraSortIndex] + sorted_rt <- sp$rtime[cb@spectraSortIndex] + expect_identical(sorted_do, c("A", "A", "A", "B", "B", "B")) + expect_identical(sorted_rt, c(1, 2, 4, 3, 5, 6)) +}) + +test_that("factorize() clears spectraSortIndex when data is sorted", { + ## Create unsorted spectra + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(4, 1, 3, 2), + msLevel = rep(1L, 4), + dataOrigin = rep("A", 4) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## Initially unsorted, so spectraSortIndex should be set + expect_true(length(cb@spectraSortIndex) > 0) + + ## Now manually sort the spectra and refactorize + sorted_indices <- order(sp$dataOrigin, sp$rtime) + cb@spectra <- sp[sorted_indices] + cb@spectra$rtime <- sp$rtime[sorted_indices] + cb <- factorize(cb) + + ## After refactorize with sorted data, spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) +}) + +test_that("factorize() recalculates spectraSortIndex for unsorted data", { + ## Create spectra and manually set it to unsorted state + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = rep(1L, 4), + dataOrigin = rep("A", 4) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## Initially sorted, so spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) + + ## Modify msLevel to create different factorization + cb@spectra$msLevel <- c(1L, 2L, 1L, 2L) + cb <- factorize(cb) + + ## spectraSortIndex should still be empty since data is still sorted by rtime + expect_identical(length(cb@spectraSortIndex), 0L) +}) + +test_that("peaksData works correctly with empty spectraSortIndex", { + ## Create pre-sorted spectra + sp <- Spectra(DataFrame( + mz = NumericList(c(100, 101), c(100, 101), c(100, 101), c(100, 101), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = rep(1L, 4), + dataOrigin = rep("A", 4) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) + + ## peaksData should still work correctly + pd <- peaksData(cb) + expect_true(is.list(pd)) + expect_identical(length(pd), 1L) + expect_true(is.data.frame(pd[[1]])) +}) + +test_that("peaksData works correctly with populated spectraSortIndex", { + ## Create unsorted spectra + sp <- Spectra(DataFrame( + mz = NumericList(c(100, 101), c(100, 101), c(100, 101), c(100, 101), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(3, 1, 4, 2), + msLevel = rep(1L, 4), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + ## spectraSortIndex should be set + expect_true(length(cb@spectraSortIndex) > 0) + + ## peaksData should still work correctly + pd <- peaksData(cb) + expect_true(is.list(pd)) + expect_identical(length(pd), 2L) + expect_true(all(sapply(pd, is.data.frame))) +}) + test_that("spectraSortIndex is set and used for sorting", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( - mz = replicate(4, c(1, 2), simplify = FALSE), - intensity = replicate(4, c(10, 20), simplify = FALSE), + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), rtime = c(3, 1, 4, 2), msLevel = rep(1L, 4), dataOrigin = c("B", "A", "B", "A") @@ -158,7 +341,7 @@ test_that("spectraSortIndex is set and used for sorting", { cb <- ChromBackendSpectra() cb@spectra <- sp csi <- interaction( - as.list(Spectra::spectraData(sp)[, c("msLevel", "dataOrigin"), drop = FALSE]), + as.list(spectraData(sp)[, c("msLevel", "dataOrigin"), drop = FALSE]), drop = TRUE, sep = "_" ) cb@spectra$chromSpectraIndex <- csi @@ -167,15 +350,20 @@ test_that("spectraSortIndex is set and used for sorting", { dataOrigin = c("A", "B"), chromSpectraIndex = c("1_A", "1_B") )) - cb@spectraSortIndex <- order(sp$dataOrigin, sp$rtime) + ## Manually set spectraSortIndex since this is unsorted data + sort_idx <- order(sp$dataOrigin, sp$rtime) + if (!identical(sort_idx, seq_along(sp))) { + cb@spectraSortIndex <- sort_idx + } + expect_true(length(cb@spectraSortIndex) > 0) expect_identical(cb@spectraSortIndex, order(sp$dataOrigin, sp$rtime)) }) test_that("[ maintains spectra and spectraSortIndex", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( - mz = replicate(4, c(1, 2), simplify = FALSE), - intensity = replicate(4, c(10, 20), simplify = FALSE), + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), rtime = c(3, 1, 4, 2), msLevel = rep(1L, 4), dataOrigin = c("B", "A", "B", "A") @@ -187,6 +375,7 @@ test_that("[ maintains spectra and spectraSortIndex", { # Verify initial state expect_identical(length(cb), 2L) # 2 unique combinations of msLevel and dataOrigin expect_identical(length(cb@spectra), 4L) # 4 spectra + # spectraSortIndex should be set since data is unsorted expect_true(length(cb@spectraSortIndex) > 0) # Verify spectraSortIndex is correctly set @@ -214,13 +403,17 @@ test_that("[ maintains spectra and spectraSortIndex", { expect_identical(length(cb_sub@spectra), 2L) # Only 2 spectra belong to "1_A" expect_true(all(cb_sub@spectra$dataOrigin == "A")) - # Verify spectraSortIndex is valid and functional - expect_true(length(cb_sub@spectraSortIndex) > 0) - expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) - - # Verify the sort index still provides correct ordering - sorted_rtime <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] - expect_true(all(diff(sorted_rtime) >= 0)) + # Verify spectraSortIndex is valid (may be empty if subsetted data is sorted) + if (length(cb_sub@spectraSortIndex) > 0) { + expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) + # Verify the sort index still provides correct ordering + sorted_rtime <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] + expect_true(all(diff(sorted_rtime) >= 0)) + } else { + # If spectraSortIndex is empty, spectra should already be sorted + sorted_rtime <- cb_sub@spectra$rtime + expect_true(all(diff(sorted_rtime) >= 0)) + } # Verify chromSpectraIndex is properly factorized expect_true(is.factor(cb_sub@spectra$chromSpectraIndex)) @@ -230,12 +423,12 @@ test_that("[ maintains spectra and spectraSortIndex", { test_that("factorize() handles empty chromData correctly", { # Test the scenario from the vignette: creating Chromatograms from Spectra # without providing explicit chromData - sp <- Spectra::Spectra(S4Vectors::DataFrame( - mz = IRanges::NumericList( + sp <- Spectra(DataFrame( + mz = NumericList( c(100, 101), c(100, 101), c(100, 101), c(100, 101), c(100, 101), compress = FALSE ), - intensity = IRanges::NumericList( + intensity = NumericList( c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2), compress = FALSE ), @@ -256,10 +449,10 @@ test_that("factorize() handles empty chromData correctly", { expect_true(is.factor(cb@spectra$chromSpectraIndex)) expect_identical(as.character(unique(cb@spectra$chromSpectraIndex)), "1_example") - # Verify spectraSortIndex is valid - expect_identical(length(cb@spectraSortIndex), 5L) - sorted_rtimes <- cb@spectra$rtime[cb@spectraSortIndex] - expect_identical(sorted_rtimes, c(100, 110, 120, 130, 140)) + # Verify spectraSortIndex is valid (should be empty since data is pre-sorted) + expect_identical(length(cb@spectraSortIndex), 0L) + # For pre-sorted data, rtimes should already be in order + expect_identical(cb@spectra$rtime, c(100, 110, 120, 130, 140)) # Verify chromData has correct rt range expect_equal(cb@chromData$rtMin, 100) @@ -267,10 +460,10 @@ test_that("factorize() handles empty chromData correctly", { }) test_that("factorize() recalculates spectraSortIndex correctly - in-memory backend", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( - mz = IRanges::NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), - intensity = IRanges::NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), rtime = c(3, 1, 4, 2, 5), msLevel = c(1L, 1L, 2L, 2L, 1L), @@ -334,7 +527,7 @@ test_that("factorize() works correctly with on-disk spectra backend", { }) test_that("factorize() maintains consistency between chromData and spectra", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( + sp <- Spectra(DataFrame( mz = replicate(6, c(1, 2), simplify = FALSE), intensity = replicate(6, c(10, 20), simplify = FALSE), rtime = c(1, 2, 3, 4, 5, 6), @@ -370,7 +563,7 @@ test_that("factorize() maintains consistency between chromData and spectra", { }) test_that("peaksData generation respects spectraSortIndex - in-memory", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( + sp <- Spectra(DataFrame( mz = replicate(4, c(100, 101), simplify = FALSE), intensity = replicate(4, c(10, 20), simplify = FALSE), rtime = c(3, 1, 4, 2), @@ -396,7 +589,7 @@ test_that("peaksData generation respects spectraSortIndex - in-memory", { }) test_that("subsetting and peaksData consistency - in-memory", { - sp <- Spectra::Spectra(S4Vectors::DataFrame( + sp <- Spectra(DataFrame( mz = replicate(4, c(100, 101), simplify = FALSE), intensity = replicate(4, c(10, 20), simplify = FALSE), rtime = c(3, 1, 4, 2), @@ -467,3 +660,483 @@ test_that("chromExtract works for ChromBackendSpectra", { expect_equal(length(.peaksData(out)), nrow(peak_tbl)) }) +test_that("[ subsetting with empty spectraSortIndex works", { + ## Create pre-sorted spectra (empty spectraSortIndex) + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial state: pre-sorted data should have empty spectraSortIndex + expect_identical(length(cb@spectraSortIndex), 0L) + + # Subset to keep only first chromatogram (msLevel=1, dataOrigin=A) + keep <- chromSpectraIndex(cb) == levels(chromSpectraIndex(cb))[1] + cb_sub <- cb[keep] + + # Verify subset is correct + expect_identical(length(cb_sub), 1L) + expect_identical(length(cb_sub@spectra), 2L) + expect_true(all(cb_sub@spectra$msLevel == 1L)) + + # Verify spectraSortIndex is still empty (pre-sorted data stays pre-sorted) + expect_identical(length(cb_sub@spectraSortIndex), 0L) +}) + +test_that("[ subsetting with populated spectraSortIndex works", { + ## Create unsorted spectra (populated spectraSortIndex) + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(3, 1, 4, 2, 5, 6), # Unsorted + msLevel = c(1L, 1L, 2L, 2L, 1L, 2L), + dataOrigin = c("B", "A", "B", "A", "A", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial state: unsorted data should have populated spectraSortIndex + expect_true(length(cb@spectraSortIndex) > 0) + original_sort_idx <- cb@spectraSortIndex + + # Subset to keep only "1_A" chromatogram (msLevel=1, dataOrigin=A) + keep <- chromSpectraIndex(cb) == "1_A" + cb_sub <- cb[keep] + + # Verify subset is correct + expect_identical(length(cb_sub), 1L) + expect_identical(length(cb_sub@spectra), 2L) + + # Verify spectra in subset have correct values + expect_true(all(cb_sub@spectra$msLevel == 1L)) + expect_true(all(cb_sub@spectra$dataOrigin == "A")) + + # Verify spectraSortIndex is remapped correctly + ## Original positions that map to "1_A" are positions 2 and 5 + ## After subsetting, they should be renumbered to 1, 2 + expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) + + # Verify the sort index produces correct ordering + if (length(cb_sub@spectraSortIndex) > 0) { + sorted_rtimes <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] + expect_true(all(diff(sorted_rtimes) >= 0)) + } +}) + +test_that("[ subsetting remaps spectraSortIndex correctly", { + ## Test remapping with a specific example + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(5, 1, 3, 2, 4), # Unsorted + msLevel = rep(1L, 5), + dataOrigin = c("A", "A", "B", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Get the original spectraSortIndex + original_sort_idx <- cb@spectraSortIndex + expect_true(length(original_sort_idx) > 0) + + # Subset to keep only dataOrigin="A" (indices 1, 2, 5) + keep <- chromSpectraIndex(cb) == "1_A" + cb_sub <- cb[keep] + + # Verify we kept the right spectra + expect_identical(length(cb_sub@spectra), 3L) + kept_positions <- c(1, 2, 5) # These are the original positions we kept + expect_true(all(cb_sub@spectra$dataOrigin == "A")) + + # Verify spectraSortIndex is valid + expect_true(all(cb_sub@spectraSortIndex > 0)) + expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) + + # Verify sort order is preserved + sorted_rtimes <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] + expect_true(all(diff(sorted_rtimes) >= 0)) +}) + +test_that("[ subsetting with multiple indices works", { + ## Test subsetting with multiple chromatograms + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(2, 1, 4, 3), # Unsorted + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "A", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Subset to keep both chromatograms + keep <- c(TRUE, TRUE) # Keep both msLevel=1 and msLevel=2 + cb_sub <- cb[keep] + + # Verify all spectra are retained + expect_identical(length(cb_sub@spectra), 4L) + expect_identical(length(cb_sub), 2L) + + # Verify spectraSortIndex is still valid + if (length(cb_sub@spectraSortIndex) > 0) { + expect_true(all(cb_sub@spectraSortIndex <= length(cb_sub@spectra))) + sorted_do <- cb_sub@spectra$dataOrigin[cb_sub@spectraSortIndex] + sorted_rt <- cb_sub@spectra$rtime[cb_sub@spectraSortIndex] + # Verify sorting is correct + expect_true(all(diff(sorted_rt) >= 0)) + } +}) + +test_that("[ subsetting with single index works", { + ## Test subsetting to keep only one chromatogram + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(3, 1, 4, 2), # Unsorted + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "A", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Subset to keep only first chromatogram + keep <- c(TRUE, FALSE) + cb_sub <- cb[keep] + + # Verify subset is correct + expect_identical(length(cb_sub), 1L) + expect_identical(length(cb_sub@spectra), 2L) + expect_true(all(cb_sub@spectra$msLevel == 1L)) +}) + +test_that("factorize() with single factorize.by variable works", { + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4, 5, 6), + msLevel = c(1L, 1L, 2L, 2L, 1L, 2L), + dataOrigin = c("A", "A", "A", "A", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) # Default: factorize.by = c("msLevel", "dataOrigin") + + # Verify initial state creates 4 groups (1_A, 1_B, 2_A, 2_B) + expect_identical(length(cb), 4L) + + # Refactorize by only dataOrigin + cb_fact <- factorize(cb, factorize.by = "dataOrigin") + + # After refactorization, chromSpectraIndex changes to only have dataOrigin groups + expect_true(is.factor(cb_fact@spectra$chromSpectraIndex)) + expect_identical(nlevels(cb_fact@spectra$chromSpectraIndex), 2L) # Now only 2 levels: A and B + + # Verify spectra are correctly assigned to groups + expect_true(all(cb_fact@spectra$chromSpectraIndex[cb_fact@spectra$dataOrigin == "A"] == "A")) + expect_true(all(cb_fact@spectra$chromSpectraIndex[cb_fact@spectra$dataOrigin == "B"] == "B")) +}) + +test_that("factorize() with multiple factorize.by variables works", { + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4, 5, 6), + msLevel = c(1L, 1L, 2L, 2L, 1L, 2L), + dataOrigin = c("A", "A", "A", "A", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Factorize by msLevel and dataOrigin (default) + cb_fact <- factorize(cb, factorize.by = c("msLevel", "dataOrigin")) + + # Should create 4 chromatograms + expect_identical(length(cb_fact), 4L) + expect_identical(nrow(cb_fact@chromData), 4L) + + # Verify chromData levels + levels_str <- sort(as.character(unique(cb_fact@spectra$chromSpectraIndex))) + expected_levels <- c("1_A", "1_B", "2_A", "2_B") + expect_identical(levels_str, sort(expected_levels)) +}) + +test_that("factorize() recalculates spectraSortIndex for unsorted data", { + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(5, 1, 3, 2, 6, 4), # Unsorted + msLevel = c(1L, 1L, 2L, 2L, 1L, 2L), + dataOrigin = c("A", "A", "A", "A", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial spectraSortIndex is set (unsorted data) + expect_true(length(cb@spectraSortIndex) > 0) + initial_sort_idx <- cb@spectraSortIndex + + # Modify rtime and refactorize + cb@spectra$rtime <- c(1, 2, 3, 4, 5, 6) # Make it sorted + cb <- factorize(cb) + + # After refactorize with sorted data, spectraSortIndex should be empty + expect_identical(length(cb@spectraSortIndex), 0L) +}) + +test_that("factorize() updates chromSpectraIndex correctly", { + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(10, 20), c(10, 20), c(10, 20), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = rep("A", 4) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify chromSpectraIndex is a factor + expect_true(is.factor(cb@spectra$chromSpectraIndex)) + + # Refactorize + cb <- factorize(cb) + + # Verify chromSpectraIndex is still a factor + expect_true(is.factor(cb@spectra$chromSpectraIndex)) + + # Verify all spectra have valid chromSpectraIndex + expect_true(all(!is.na(cb@spectra$chromSpectraIndex))) + + # Verify levels match chromData + expect_identical(sort(levels(cb@spectra$chromSpectraIndex)), + sort(as.character(cb@chromData$chromSpectraIndex))) +}) + +test_that("factorize() preserves spectra data integrity", { + sp <- Spectra(DataFrame( + mz = NumericList(c(100, 101), c(100, 101), c(100, 101), c(100, 101), compress = FALSE), + intensity = NumericList(c(10, 20), c(15, 25), c(30, 5), c(12, 18), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = rep("A", 4) + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + original_mz <- cb@spectra$mz + original_intensity <- cb@spectra$intensity + + # Refactorize + cb <- factorize(cb) + + # Verify spectra data is preserved + expect_identical(cb@spectra$mz, original_mz) + expect_identical(cb@spectra$intensity, original_intensity) +}) + +test_that("[ subsetting with reordering works", { + ## Test that reordering chromatograms works correctly + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(15, 25), c(30, 5), c(12, 18), c(40, 2), c(50, 10), compress = FALSE), + rtime = c(1, 2, 3, 4, 5, 6), + msLevel = c(1L, 1L, 2L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "A", "B", "B", "B") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Get original chromSpectraIndex levels + original_idx <- chromSpectraIndex(cb) + expect_identical(length(original_idx), 4L) # 4 chromatograms: 1_A, 1_B, 2_A, 2_B + + # Reorder: reverse order + cb_reordered <- cb[c(4, 3, 2, 1)] + + # Verify length is correct + expect_identical(length(cb_reordered), 4L) + + # Verify order is reversed + reordered_idx <- chromSpectraIndex(cb_reordered) + expect_identical(as.character(reordered_idx), as.character(original_idx[c(4, 3, 2, 1)])) + + # Verify chromData is reordered + expect_identical(nrow(cb_reordered@chromData), 4L) + + # Verify spectra are still correctly associated + expect_true(all(cb_reordered@spectra$chromSpectraIndex %in% reordered_idx)) + + # Verify spectraSortIndex is still valid + if (length(cb_reordered@spectraSortIndex) > 0) { + expect_true(all(cb_reordered@spectraSortIndex <= length(cb_reordered@spectra))) + } +}) + +test_that("[ subsetting with duplication works", { + ## Test that duplicating chromatograms works correctly + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(15, 25), c(30, 5), c(12, 18), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "A", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Get original chromSpectraIndex levels + original_idx <- chromSpectraIndex(cb) + expect_identical(length(original_idx), 2L) # 2 chromatograms: 1_A, 2_A + + # Duplicate: keep first chromatogram twice, then second + cb_dup <- cb[c(1, 1, 2)] + + # Verify length includes duplicates + expect_identical(length(cb_dup), 3L) + + # Verify chromData includes duplicates + expect_identical(nrow(cb_dup@chromData), 3L) + dup_idx <- chromSpectraIndex(cb_dup) + expect_identical(as.character(dup_idx), as.character(original_idx[c(1, 1, 2)])) + + # Verify spectra: keeps all unique spectra from subsetted chromatograms + ## Both chrom 1 and 2 are kept, so all 4 original spectra are kept + expect_identical(length(cb_dup@spectra), 4L) + + # Verify all spectra belong to the right chromatograms + expect_true(all(cb_dup@spectra$chromSpectraIndex %in% dup_idx)) + + # Verify spectraSortIndex is still valid + if (length(cb_dup@spectraSortIndex) > 0) { + expect_true(all(cb_dup@spectraSortIndex <= length(cb_dup@spectra))) + } + + # Verify peaksData works with duplicated chromatograms + pd <- peaksData(cb_dup) + expect_identical(length(pd), 3L) +}) + +test_that("[ subsetting with reordering and duplication works", { + ## Test combined reordering and duplication + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(15, 25), c(30, 5), c(12, 18), compress = FALSE), + rtime = c(1, 2, 3, 4), + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("A", "A", "A", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + original_idx <- chromSpectraIndex(cb) + expect_identical(length(original_idx), 2L) # 2 chromatograms + + # Reorder and duplicate: c(2, 1, 1) + cb_mixed <- cb[c(2, 1, 1)] + + # Verify length is correct + expect_identical(length(cb_mixed), 3L) + + # Verify order matches requested subset + mixed_idx <- chromSpectraIndex(cb_mixed) + expect_identical(as.character(mixed_idx), as.character(original_idx[c(2, 1, 1)])) + + # Verify chromData has correct structure + expect_identical(nrow(cb_mixed@chromData), 3L) + + # Verify spectra are correctly associated + expect_true(all(cb_mixed@spectra$chromSpectraIndex %in% mixed_idx)) + + # Verify number of spectra: keeps unique spectra from both chromatograms + ## Both chrom 1 and 2 are in the subset, so all 4 spectra are kept + expect_identical(length(cb_mixed@spectra), 4L) + + # Verify spectraSortIndex is valid + if (length(cb_mixed@spectraSortIndex) > 0) { + expect_true(all(cb_mixed@spectraSortIndex > 0)) + expect_true(all(cb_mixed@spectraSortIndex <= length(cb_mixed@spectra))) + + # Verify sort index still provides correct ordering + sorted_rtimes <- cb_mixed@spectra$rtime[cb_mixed@spectraSortIndex] + sorted_do <- cb_mixed@spectra$dataOrigin[cb_mixed@spectraSortIndex] + + # Check rtimes are sorted within each dataOrigin group + for (do in unique(sorted_do)) { + rtimes_in_group <- sorted_rtimes[sorted_do == do] + expect_true(all(diff(rtimes_in_group) >= 0)) + } + } +}) + +test_that("[ subsetting with unsorted data and duplication works", { + ## Test duplication with unsorted spectra (non-empty spectraSortIndex) + sp <- Spectra(DataFrame( + mz = NumericList(c(1, 2), c(1, 2), c(1, 2), c(1, 2), compress = FALSE), + intensity = NumericList(c(10, 20), c(15, 25), c(30, 5), c(12, 18), compress = FALSE), + rtime = c(3, 1, 4, 2), # Unsorted + msLevel = c(1L, 1L, 2L, 2L), + dataOrigin = c("B", "A", "B", "A") + )) + + cb <- ChromBackendSpectra() + cb <- backendInitialize(cb, spectra = sp) + + # Verify initial state has populated spectraSortIndex + expect_true(length(cb@spectraSortIndex) > 0) + + original_idx <- chromSpectraIndex(cb) + expect_identical(length(original_idx), 4L) # 4 chromatograms: 1_A, 1_B, 2_A, 2_B + + # Duplicate and reorder: keep chromatogram 2, then 1 twice + cb_dup <- cb[c(2, 1, 1)] + + # Verify basic structure + expect_identical(length(cb_dup), 3L) + expect_identical(nrow(cb_dup@chromData), 3L) + + # Verify order + dup_idx <- chromSpectraIndex(cb_dup) + expect_identical(as.character(dup_idx), as.character(original_idx[c(2, 1, 1)])) + + # Verify spectra count: keeps unique spectra from chromatograms 1 and 2 + ## Chrom 1_A has 1 spectrum, chrom 1_B has 1 spectrum → 2 unique spectra + expect_identical(length(cb_dup@spectra), 2L) + + # Verify spectraSortIndex is valid after duplication + if (length(cb_dup@spectraSortIndex) > 0) { + expect_true(all(cb_dup@spectraSortIndex > 0)) + expect_true(all(cb_dup@spectraSortIndex <= length(cb_dup@spectra))) + + # Verify sorting is maintained + sorted_rtimes <- cb_dup@spectra$rtime[cb_dup@spectraSortIndex] + sorted_do <- cb_dup@spectra$dataOrigin[cb_dup@spectraSortIndex] + + # Within each dataOrigin, rtimes should be sorted + for (do in unique(sorted_do)) { + rtimes_in_group <- sorted_rtimes[sorted_do == do] + expect_true(all(diff(rtimes_in_group) >= 0)) + } + } + + # Verify peaksData works with duplicated chromatograms + pd <- peaksData(cb_dup) + expect_identical(length(pd), 3L) + expect_true(all(sapply(pd, is.data.frame))) +}) + diff --git a/tests/testthat/test_Chromatograms.R b/tests/testthat/test_Chromatograms.R index 0ac1f93..64fc66f 100644 --- a/tests/testthat/test_Chromatograms.R +++ b/tests/testthat/test_Chromatograms.R @@ -95,28 +95,51 @@ test_that("Chromatograms constructor from Spectra works with all parameters", { }) test_that("Chromatograms constructor from ChromBackend works", { - ## From ChromBackendMemory - chr_mem <- Chromatograms(be) - expect_s4_class(chr_mem, "Chromatograms") - expect_s4_class(.backend(chr_mem), "ChromBackendMemory") - expect_equal(length(chr_mem), length(be)) - - ## From ChromBackendMzR - chr_mzr <- Chromatograms(be_mzr) - expect_s4_class(chr_mzr, "Chromatograms") - expect_s4_class(.backend(chr_mzr), "ChromBackendMzR") - expect_equal(length(chr_mzr), length(be_mzr)) - - ## From ChromBackendSpectra - chr_spec <- Chromatograms(be_sp) - expect_s4_class(chr_spec, "Chromatograms") - expect_s4_class(.backend(chr_spec), "ChromBackendSpectra") - expect_equal(length(chr_spec), length(be_sp)) - - ## With processingQueue - pq <- list(ProcessingStep("smooth", list(method = "SavitzkyGolay", halfWindowSize = 2L))) - chr_pq <- Chromatograms(be, processingQueue = pq) - expect_equal(length(.processingQueue(chr_pq)), 1) + ## From ChromBackendMemory - already initialized + chr_mem <- Chromatograms(be) + expect_s4_class(chr_mem, "Chromatograms") + expect_s4_class(.backend(chr_mem), "ChromBackendMemory") + expect_equal(length(chr_mem), length(be)) + + ## From empty ChromBackendMemory with chromData and peaksData parameters + cdata <- data.frame( + msLevel = c(1L, 1L, 1L), + mz = c(112.2, 123.3, 134.4), + dataOrigin = c("mem1", "mem1", "mem1") + ) + pdata <- list( + data.frame(rtime = c(2.1, 2.5, 3.0, 3.4, 3.9), + intensity = c(100, 250, 400, 300, 150)), + data.frame(rtime = numeric(), intensity = numeric()), + data.frame(rtime = c(5.1, 5.8, 6.3, 6.9, 7.5), + intensity = c(80, 500, 1200, 600, 120)) + ) + chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) + expect_s4_class(chr, "Chromatograms") + expect_s4_class(.backend(chr), "ChromBackendMemory") + expect_equal(length(chr), 3L) + expect_identical(chromData(chr)$mz, cdata$mz) + expect_identical(peaksData(chr), pdata) + + ## From ChromBackendMzR + chr_mzr <- Chromatograms(be_mzr) + expect_s4_class(chr_mzr, "Chromatograms") + expect_s4_class(.backend(chr_mzr), "ChromBackendMzR") + expect_equal(length(chr_mzr), length(be_mzr)) + + ## From ChromBackendSpectra + chr_spec <- Chromatograms(be_sp) + expect_s4_class(chr_spec, "Chromatograms") + expect_s4_class(.backend(chr_spec), "ChromBackendSpectra") + expect_equal(length(chr_spec), length(be_sp)) + + ## With processingQueue + pq <- list(ProcessingStep( + "smooth", + list(method = "SavitzkyGolay", halfWindowSize = 2L) + )) + chr_pq <- Chromatograms(be, processingQueue = pq) + expect_equal(length(.processingQueue(chr_pq)), 1) }) test_that("Chromatograms constructor handles edge cases", { diff --git a/vignettes/using-a-chromatograms-object.Rmd b/vignettes/using-a-chromatograms-object.Rmd index ef11a50..74fd3ad 100644 --- a/vignettes/using-a-chromatograms-object.Rmd +++ b/vignettes/using-a-chromatograms-object.Rmd @@ -93,7 +93,9 @@ could be used instead. The default backends are: - `ChromBackendSpectra`: this backend generates chromatographic data from a `Spectra` object. It can be used to create Total Ion Chromatograms (TIC), Base Peak Chromatograms (BPC), or Extracted Ion Chromatograms (EICs). It - supports both in-memory and file-backed `Spectra` objects. + supports both in-memory and file-backed `Spectra` objects. The backend uses + **factorization** to group spectra into chromatograms based on variables like + MS level and data origin (see details below). All backends provide a consistent interface through the `Chromatograms` object, regardless of where or how the data is stored. The `ChromBackendSpectra` has @@ -365,6 +367,24 @@ the underlying `Spectra` object and its sort index are also properly subset and updated. This ensures that peak data extraction remains efficient even after subsetting operations. +### Re-factorizing after metadata changes + +If you modify the chromatogram metadata (particularly the factorization columns +like `msLevel` or `dataOrigin`), you may need to re-factorize the data to update +the groupings. This can be done using the `factorize()` function: + +```{r, eval=FALSE} +## Modify metadata +chr_s$msLevel <- rep(2L, length(chr_s)) + +## Re-factorize to update the groupings +chr_s <- factorize(chr_s@backend) +chr_s <- Chromatograms(chr_s) +``` + +This recalculates which spectra belong to which chromatograms based on the +updated metadata. + # Lazy Processing and Parallelization @@ -572,9 +592,30 @@ chr_s ``` The `ChromBackendSpectra` backend provides flexibility in how chromatograms are -generated from spectral data. By default, separate chromatograms are created for -each combination of MS level and data origin. You can control this behavior with -the `factorize.by` parameter. +generated from spectral data through a process called **factorization**. + +## Understanding Factorization + +Factorization is the process of grouping individual spectra into chromatograms +based on one or more variables. Think of it as creating separate "bins" where +each bin becomes one chromatogram. + +By default, the `factorize.by` parameter is set to `c("msLevel", "dataOrigin")`, +which means: + +- All MS1 spectra from file "A" → Chromatogram 1 +- All MS2 spectra from file "A" → Chromatogram 2 +- All MS1 spectra from file "B" → Chromatogram 3 +- All MS2 spectra from file "B" → Chromatogram 4 + +Each unique combination of the factorization variables creates a separate +chromatogram. This allows you to organize your spectral data into meaningful +chromatographic traces that can be visualized and analyzed together. + +You can customize the factorization behavior by changing the `factorize.by` +parameter. For example, using only `factorize.by = "dataOrigin"` would create +one chromatogram per file (combining all MS levels), while adding more variables +would create more granular groupings. Additionally, you can provide custom chromatogram metadata to define specific m/z and retention time ranges: From d26ead0cbd3d7afcc8cecd210cb561c0188362f0 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 22 Jan 2026 18:56:32 +0100 Subject: [PATCH 8/9] fix comment jo on documentation --- R/ChromBackendMzR.R | 286 ++++--- R/ChromBackendSpectra.R | 557 +++++++------ R/Chromatograms-chromData.R | 125 +-- R/Chromatograms-peaksData.R | 225 ++--- R/Chromatograms.R | 410 ++++----- R/helpers.R | 917 +++++++++++---------- man/ChromBackendMzR.Rd | 2 +- man/ChromBackendSpectra.Rd | 20 +- man/Chromatograms.Rd | 6 +- man/chromData.Rd | 6 +- man/peaksData.Rd | 6 +- vignettes/using-a-chromatograms-object.Rmd | 13 +- 12 files changed, 1413 insertions(+), 1160 deletions(-) diff --git a/R/ChromBackendMzR.R b/R/ChromBackendMzR.R index f621039..cad0092 100644 --- a/R/ChromBackendMzR.R +++ b/R/ChromBackendMzR.R @@ -18,13 +18,13 @@ NULL #' #' Note that the `ChromBackendMzR` backend is read-only and does not support #' direct modification of chromatographic data. However, it does support -#' `peaksData` slot replacement, which will modify the `peaksData` slot but not +#' `peaksData` slot replacement, which will modify the `@peaksData` slot but not #' the local mzML files. This is indicated by the "inMemory" slot being set to #' TRUE. #' #' Implementing functionalities with the `ChromBackendMzR` backend should be #' simplified as much as possible and reuse the methods already implemented for -#' `ChromBackendMemory` when possible. +#' `ChromBackendMemory` when possible. #' #' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()] #' for more information. @@ -55,23 +55,24 @@ NULL NULL #' @noRd -setClass("ChromBackendMzR", - contains = "ChromBackendMemory", - slots = c(inMemory = "logical"), - prototype = prototype( - chromData = fillCoreChromVariables(data.frame()), - peaksData = list(.EMPTY_PEAKS_DATA), - readonly = FALSE, - version = "0.1", - inMemory = FALSE - ) +setClass( + "ChromBackendMzR", + contains = "ChromBackendMemory", + slots = c(inMemory = "logical"), + prototype = prototype( + chromData = fillCoreChromVariables(data.frame()), + peaksData = list(.EMPTY_PEAKS_DATA), + readonly = FALSE, + version = "0.1", + inMemory = FALSE + ) ) #' @rdname ChromBackendMzR #' @export ChromBackendMzR ChromBackendMzR <- function() { - .check_mzR_package() - new("ChromBackendMzR") + .check_mzR_package() + new("ChromBackendMzR") } #' @rdname ChromBackendMzR @@ -79,48 +80,55 @@ ChromBackendMzR <- function() { #' @importFrom MsCoreUtils rbindFill #' @export setMethod( - "backendInitialize", "ChromBackendMzR", - function(object, files = character(), BPPARAM = bpparam(), ...) { - if (!length(files)) { - return(object) - } - if (!is.character(files)) { - stop( - "Parameter 'files' must be a character vector of ", - "file paths" - ) - } - files <- normalizePath(files, mustWork = FALSE) - chromData <- do.call( - rbindFill, - bplapply(files, FUN = function(fl) { - cbind(.mzR_format_chromData(fl)) - }, BPPARAM = BPPARAM) - ) - callNextMethod(object, chromData = chromData, ...) + "backendInitialize", + "ChromBackendMzR", + function(object, files = character(), BPPARAM = bpparam(), ...) { + if (!length(files)) { + return(object) } + if (!is.character(files)) { + stop( + "Parameter 'files' must be a character vector of ", + "file paths" + ) + } + files <- normalizePath(files, mustWork = FALSE) + chromData <- do.call( + rbindFill, + bplapply( + files, + FUN = function(fl) { + cbind(.mzR_format_chromData(fl)) + }, + BPPARAM = BPPARAM + ) + ) + callNextMethod(object, chromData = chromData, ...) + } ) #' @rdname hidden_aliases #' @export setMethod("show", "ChromBackendMzR", function(object) { - callNextMethod() - fls <- unique(dataOrigin(object)) - if (length(fls)) { - to <- min(3, length(fls)) - cat("\nfile(s):\n", paste(basename(fls[seq_len(to)]), collapse = "\n"), - "\n", - sep = "" - ) - if (length(fls) > 3) cat(" ...", length(fls) - 3, "more files\n") - } - if (object@inMemory) cat("\nPeaks data is cached in memory\n") + callNextMethod() + fls <- unique(dataOrigin(object)) + if (length(fls)) { + to <- min(3, length(fls)) + cat( + "\nfile(s):\n", + paste(basename(fls[seq_len(to)]), collapse = "\n"), + "\n", + sep = "" + ) + if (length(fls) > 3) cat(" ...", length(fls) - 3, "more files\n") + } + if (object@inMemory) cat("\nPeaks data is cached in memory\n") }) #' @rdname hidden_aliases #' @importMethodsFrom ProtGenerics backendParallelFactor setMethod("backendParallelFactor", "ChromBackendMzR", function(object, ...) { - factor(dataOrigin(object), levels = unique(dataOrigin(object))) + factor(dataOrigin(object), levels = unique(dataOrigin(object))) }) #' @rdname hidden_aliases @@ -129,47 +137,64 @@ setMethod("isReadOnly", "ChromBackendMzR", function(object) TRUE) #' @rdname hidden_aliases #' @importFrom BiocParallel bplapply -setMethod("peaksData", "ChromBackendMzR", - function(object, columns = peaksVariables(object), drop = FALSE, - BPPARAM = SerialParam(), ...) { - if (.inMemory(object) || !length(object)) return(callNextMethod()) - pv <- peaksVariables(object) - if (!any(columns %in% pv)) - stop("Some of the requested peaks variables are not", - " available") - ret <- all(pv %in% columns) - f <- factor(dataOrigin(object), - levels = unique(dataOrigin(object))) - pd <- bplapply(split(object, f = f), - function(ob) { - chr <- .get_chrom_data(fl = .chromData(ob)$dataOrigin[1L], - idx = chromIndex(ob)) - if (ret) chr - else lapply(chr, `[`, , columns, drop = drop) - }, BPPARAM = BPPARAM) - unsplit(pd, f = f) - }) +setMethod( + "peaksData", + "ChromBackendMzR", + function( + object, + columns = peaksVariables(object), + drop = FALSE, + BPPARAM = SerialParam(), + ... + ) { + if (.inMemory(object) || !length(object)) { + return(callNextMethod()) + } + pv <- peaksVariables(object) + if (!any(columns %in% pv)) { + stop("Some of the requested peaks variables are not", " available") + } + ret <- all(pv %in% columns) + f <- factor(dataOrigin(object), levels = unique(dataOrigin(object))) + pd <- bplapply( + split(object, f = f), + function(ob) { + chr <- .get_chrom_data( + fl = .chromData(ob)$dataOrigin[1L], + idx = chromIndex(ob) + ) + if (ret) { + chr + } else { + lapply(chr, `[`, , columns, drop = drop) + } + }, + BPPARAM = BPPARAM + ) + unsplit(pd, f = f) + } +) #' @rdname hidden_aliases setReplaceMethod("peaksData", "ChromBackendMzR", function(object, value) { - message( - "Please keep in mind the 'ChromBackendMzR' backend is read-only.", - " The peaksData slot will be modified but the changes will not", - " affect the local mzML files." - ) - object <- callNextMethod() - object@inMemory <- TRUE - object + message( + "Please keep in mind the 'ChromBackendMzR' backend is read-only.", + " The peaksData slot will be modified but the changes will not", + " affect the local mzML files." + ) + object <- callNextMethod() + object@inMemory <- TRUE + object }) #' @rdname hidden_aliases setReplaceMethod("chromData", "ChromBackendMzR", function(object, value) { - message( - "Please keep in mind the 'ChromBackendMzR' backend is read-only.", - " The chromData slot will be modified but the changes will not", - " affect the local mzML files." - ) - callNextMethod() + message( + "Please keep in mind the 'ChromBackendMzR' backend is read-only.", + " The chromData slot will be modified but the changes will not", + " affect the local mzML files." + ) + callNextMethod() }) #' @rdname hidden_aliases @@ -179,51 +204,64 @@ setMethod("supportsSetBackend", "ChromBackendMzR", function(object, ...) FALSE) #' @rdname hidden_aliases #' @importMethodsFrom S4Vectors [ [<- setMethod("[", "ChromBackendMzR", function(x, i, j, ...) { - if (!length(i)) { - return(ChromBackendMzR()) - } - callNextMethod() + if (!length(i)) { + return(ChromBackendMzR()) + } + callNextMethod() }) #' @rdname hidden_aliases -setMethod("chromExtract", "ChromBackendMzR", - function(object, peak.table, by, ...) { - required_cols <- c("rtMin", "rtMax", by) - .validate_chromExtract_input( - object = object, - peak.table = peak.table, - by = by, required_cols = required_cols - ) - - matched <- .match_chromdata_peaktable( - object = object, - peak.table = peak.table, - by = by - ) - cd <- .chromData(matched$object) - chrom_keys <- matched$chrom_keys - peak_keys <- matched$peak_keys - cd_split <- split(cd, chrom_keys) ## UT need to check that - pk_split <- split(peak.table, peak_keys) - - ## Check overlapping columns - overl_cols <- .check_overl_columns(peak.table = peak.table, - object = object, - required_cols = required_cols) - - ## Merge peak.table into chromData safely. - new_cdata <- mapply(function(cd_row, pks) { ## could switch to bpmapply ? - d <- suppressWarnings(cbind(cd_row, pks[!overl_cols])) - d[, names(peak.table)[overl_cols]] <- pks[, overl_cols] - d - }, cd_row = cd_split, - pks = pk_split, SIMPLIFY = FALSE) - - new_cdata <- do.call(rbind, new_cdata) - rownames(new_cdata) <- NULL - object@chromData <- new_cdata - object@peaksData <- replicate(nrow(new_cdata), - .EMPTY_PEAKS_DATA, - simplify = FALSE) - return(object) - }) +setMethod( + "chromExtract", + "ChromBackendMzR", + function(object, peak.table, by, ...) { + required_cols <- c("rtMin", "rtMax", by) + .validate_chromExtract_input( + object = object, + peak.table = peak.table, + by = by, + required_cols = required_cols + ) + + matched <- .match_chromdata_peaktable( + object = object, + peak.table = peak.table, + by = by + ) + cd <- .chromData(matched$object) + chrom_keys <- matched$chrom_keys + peak_keys <- matched$peak_keys + cd_split <- split(cd, chrom_keys) ## UT need to check that + pk_split <- split(peak.table, peak_keys) + + ## Check overlapping columns + overl_cols <- .check_overl_columns( + peak.table = peak.table, + object = object, + required_cols = required_cols + ) + + ## Merge peak.table into chromData safely. + new_cdata <- mapply( + function(cd_row, pks) { + ## could switch to bpmapply ? + d <- suppressWarnings(cbind(cd_row, pks[!overl_cols])) + d[, names(peak.table)[overl_cols]] <- pks[, overl_cols] + d + }, + cd_row = cd_split, + pks = pk_split, + SIMPLIFY = FALSE + ) + + new_cdata <- do.call(rbind, new_cdata) + rownames(new_cdata) <- NULL + object@chromData <- new_cdata + object@peaksData <- replicate( + nrow(new_cdata), + .EMPTY_PEAKS_DATA, + simplify = FALSE + ) + return(object) + } +) diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R index 8ced3cd..b9a784d 100644 --- a/R/ChromBackendSpectra.R +++ b/R/ChromBackendSpectra.R @@ -14,7 +14,7 @@ NULL #' #' It can be initialized with a `Spectra` object, which is stored in the #' `spectra` slot of the backend. Users can also provide a `data.frame` -#' containing chromatographic metadata, stored in `chromData`. This metadata +#' containing chromatographic metadata, stored in `@chromData`. This metadata #' filters the `Spectra` object and generates `peaksData`. If `chromData` is #' not provided, a default `data.frame` is created from the `Spectra` data. #' An "rtMin", "rtMax", "mzMin", and "mzMax" column will be created by @@ -34,7 +34,7 @@ NULL #' for grouping `Spectra` data into chromatographic data. The default is #' `c("msLevel", "dataOrigin")`, which will define separate chromatograms for #' each combination of `msLevel` and `dataOrigin`. These variables must be in -#' both `Spectra` and `chromData` (if provided). +#' both the `spectraData()` of the `Spectra` and `chromData` (if provided). #' #' The `summarize.method` parameter defines how spectral data intensity is #' summarized: @@ -46,9 +46,9 @@ NULL #' #' @details #' No `peaksData` is stored until the user calls a function that generates it -#' (e.g., `rtime()`, `peaksData()`, `intensity()`). The `peaksData` slot +#' (e.g., `rtime()`, `peaksData()`, `intensity()`). The `@peaksData` slot #' replacement is unsupported — modifications are temporary to optimize memory. -#' The `inMemory` slot indicates this with `TRUE`. +#' The `@inMemory` slot indicates this with `TRUE`. #' #' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a #' `spectraSortIndex` slot that stores a sort order for the internal `Spectra` @@ -62,10 +62,10 @@ NULL #' all data into memory. #' #' **Factorize and Subsetting**: The `factorize()` method updates the -#' `chromSpectraIndex` in both `chromData` and the `spectra` object to reflect +#' `chromSpectraIndex` in both `chromData` and the `@spectra` to reflect #' the current grouping, and recalculates `spectraSortIndex` to maintain the #' correct sort order. The `[` subsetting operator properly handles subsetting -#' of both `chromData`, `peaksData`, and `spectra`, while updating the +#' of both `@chromData`, `@peaksData`, and `@spectra`, while updating the #' `spectraSortIndex` to reference valid positions in the subsetted data. #' #' `ChromBackendSpectra` should reuse `ChromBackendMemory` methods whenever @@ -78,12 +78,12 @@ NULL #' `"dataOrigin"` column must match the `Spectra` object's #' `"dataOrigin"`. #' -#' @param factorize.by A `character` vector of variables for grouping `Spectra` -#' data into chromatographic data (i.e., creating separate chromatograms -#' for each unique combination of these variables). +#' @param factorize.by A `character` vector of `spectraVariables` for grouping +#' `Spectra` data into chromatographic data (i.e., creating separate +#' chromatograms for each unique combination of these variables). #' Default: `c("msLevel", "dataOrigin")`, which creates one chromatogram #' per MS level per data file. -#' If `chromData` is provided, it must contain these columns. +#' If `chromData` is provided, it **must** also contain these columns. #' #' @param object A `ChromBackendSpectra` object. #' @@ -154,162 +154,203 @@ NULL #' @noRd ChromBackendSpectra <- setClass( - "ChromBackendSpectra", - contains = "ChromBackendMemory", - slots = c( - inMemory = "logical", - spectra = "Spectra", - summaryFun = "function", - spectraSortIndex = "integer" - ), - prototype = prototype( - chromData = fillCoreChromVariables(data.frame()), - peaksData = list(.EMPTY_PEAKS_DATA), - readonly = TRUE, - spectra = Spectra(), - version = "0.1", - inMemory = FALSE, - summaryFun = sumi, - spectraSortIndex = integer() - ) + "ChromBackendSpectra", + contains = "ChromBackendMemory", + slots = c( + inMemory = "logical", + spectra = "Spectra", + summaryFun = "function", + spectraSortIndex = "integer" + ), + prototype = prototype( + chromData = fillCoreChromVariables(data.frame()), + peaksData = list(.EMPTY_PEAKS_DATA), + readonly = TRUE, + spectra = Spectra(), + version = "0.1", + inMemory = FALSE, + summaryFun = sumi, + spectraSortIndex = integer() + ) ) #' @rdname ChromBackendSpectra #' @importFrom methods new #' @export ChromBackendSpectra ChromBackendSpectra <- function() { - new("ChromBackendSpectra") + new("ChromBackendSpectra") } #' @rdname ChromBackendSpectra #' @importFrom methods callNextMethod #' @importFrom MsCoreUtils rbindFill sumi maxi -setMethod("backendInitialize", "ChromBackendSpectra", - function(object, spectra = Spectra(), - factorize.by = c("msLevel" , "dataOrigin"), - summarize.method = c("sum", "max"), - chromData = fillCoreChromVariables(), - spectraVariables = character(), - ...) { - summarize.method <- match.arg(summarize.method) - object@summaryFun <- if (summarize.method == "sum") sumi else maxi - if (!is(spectra, "Spectra")) - stop("'spectra' must be a 'Spectra' object.") - if (!length(spectra)) return(object) - if (!all(factorize.by %in% spectraVariables(spectra))) - stop("All 'factorize.by' variables must exist in 'spectra'.") - if (!is.data.frame(chromData)) - stop("'chromData' must be a 'data.frame'.") - if(!nrow(chromData)) - chromData <- fillCoreChromVariables(data.frame()) - else validChromData(chromData) - if (!all(factorize.by %in% colnames(chromData))) - stop("All 'factorize.by' variables must exist ", - "in 'chromData'. If no chromData was provided, ", - "it needs to be part of the `coreChromVariables()` ", - "available.") - ## Spectra object are not expected to be ordered by rtime, - ## so we store a sort index instead of concatenating. - ## This allows us to keep disk-backed backends intact. - ## Only store sort index if data is actually unsorted (optimization). - sort_idx <- order( - spectra$dataOrigin, - spectra$rtime - ) - if (!identical(sort_idx, seq_along(spectra))) { - object@spectraSortIndex <- sort_idx - } - object@chromData <- chromData - object@spectra <- spectra - object <- factorize(object, factorize.by = factorize.by) - ## map additional spectraVariables if any - if (length(spectraVariables)) { - object <- .map_spectra_vars(object, - spectraVariables = spectraVariables) - } - callNextMethod(object, chromData = .chromData(object)) - } - ) +setMethod( + "backendInitialize", + "ChromBackendSpectra", + function( + object, + spectra = Spectra(), + factorize.by = c("msLevel", "dataOrigin"), + summarize.method = c("sum", "max"), + chromData = fillCoreChromVariables(), + spectraVariables = character(), + ... + ) { + summarize.method <- match.arg(summarize.method) + object@summaryFun <- if (summarize.method == "sum") sumi else maxi + if (!is(spectra, "Spectra")) { + stop("'spectra' must be a 'Spectra' object.") + } + if (!length(spectra)) { + return(object) + } + if (!all(factorize.by %in% spectraVariables(spectra))) { + stop("All 'factorize.by' variables must exist in 'spectra'.") + } + if (!is.data.frame(chromData)) { + stop("'chromData' must be a 'data.frame'.") + } + if (!nrow(chromData)) { + chromData <- fillCoreChromVariables(data.frame()) + } else { + validChromData(chromData) + } + if (!all(factorize.by %in% colnames(chromData))) { + stop( + "All 'factorize.by' variables must exist ", + "in 'chromData'. If no chromData was provided, ", + "it needs to be part of the `coreChromVariables()` ", + "available." + ) + } + ## Spectra object are not expected to be ordered by rtime, + ## so we store a sort index instead of concatenating. + ## This allows us to keep disk-backed backends intact. + ## Only store sort index if data is actually unsorted (optimization). + sort_idx <- order( + spectra$dataOrigin, + spectra$rtime + ) + if (!identical(sort_idx, seq_along(spectra))) { + object@spectraSortIndex <- sort_idx + } + object@chromData <- chromData + object@spectra <- spectra + object <- factorize(object, factorize.by = factorize.by) + ## map additional spectraVariables if any + if (length(spectraVariables)) { + object <- .map_spectra_vars(object, spectraVariables = spectraVariables) + } + callNextMethod(object, chromData = .chromData(object)) + } +) #' @rdname hidden_aliases #' @importFrom methods callNextMethod setMethod("show", "ChromBackendSpectra", function(object) { - callNextMethod() - cat("\nThe Spectra object contains", length(object@spectra), "spectra\n") - if (.inMemory(object)) cat("\nPeaks data is cached in memory\n") + callNextMethod() + cat("\nThe Spectra object contains", length(object@spectra), "spectra\n") + if (.inMemory(object)) cat("\nPeaks data is cached in memory\n") }) #' @rdname ChromBackendSpectra #' @export chromSpectraIndex <- function(object) { - if (!is(object, "ChromBackendSpectra")) - stop("The object must be a 'ChromBackendSpectra' object.") - cd <- chromData(object, columns = "chromSpectraIndex", drop = TRUE) - if (!is.factor(cd)) - cd <- factor(cd) - cd <- droplevels(cd) - cd + if (!is(object, "ChromBackendSpectra")) { + stop("The object must be a 'ChromBackendSpectra' object.") + } + cd <- chromData(object, columns = "chromSpectraIndex", drop = TRUE) + if (!is.factor(cd)) { + cd <- factor(cd) + } + cd <- droplevels(cd) + cd } #' @rdname hidden_aliases -setMethod("factorize", "ChromBackendSpectra", - function(object, factorize.by = c("msLevel", "dataOrigin"),...) { - if (!all(factorize.by %in% - spectraVariables(.spectra(object)))) - stop("All 'factorize.by' variables must be in the ", - "Spectra object.") - spectra_f <- interaction(as.list( - spectraData(.spectra(object))[, - factorize.by, drop = FALSE]), - drop = TRUE, sep = "_") - cd <- .chromData(object) - - if (nrow(cd)) { - ## chromData exists: validate and align spectra to it - if (!all(factorize.by %in% chromVariables(object))) - stop("All 'factorize.by' variables must be in chromData.") - cd$chromSpectraIndex <- interaction(cd[, factorize.by, - drop = FALSE], - drop = TRUE, sep = "_") - object@spectra$chromSpectraIndex <- factor(as.character(spectra_f), - levels = levels(cd$chromSpectraIndex)) - ## Apply sort index for processing if needed - if (length(object@spectraSortIndex)) { - sorted_spectra <- .spectra(object)[object@spectraSortIndex] - sorted_spectra_f <- spectra_f[object@spectraSortIndex] - } else { - sorted_spectra <- .spectra(object) - sorted_spectra_f <- spectra_f - } - object@chromData <- .ensure_rt_mz_columns(cd, - sorted_spectra, - sorted_spectra_f) - } else { - ## chromData is empty: create it from spectra - object@spectra$chromSpectraIndex <- spectra_f - full_sp <- do.call(rbindFill, - lapply(split(.spectra(object), spectra_f), - .spectra_format_chromData)) - rownames(full_sp) <- NULL - object@chromData <- full_sp - } - ## Recalculate sort index: only store if data is unsorted (optimization) - sort_idx <- order( - object@spectra$dataOrigin, - object@spectra$rtime - ) - if (!identical(sort_idx, seq_along(object@spectra))) { - object@spectraSortIndex <- sort_idx - } else { - object@spectraSortIndex <- integer() - } - object - }) +setMethod( + "factorize", + "ChromBackendSpectra", + function(object, factorize.by = c("msLevel", "dataOrigin"), ...) { + if ( + !all( + factorize.by %in% + spectraVariables(.spectra(object)) + ) + ) { + stop("All 'factorize.by' variables must be in the ", "Spectra object.") + } + spectra_f <- interaction( + as.list( + spectraData(.spectra(object))[, + factorize.by, + drop = FALSE + ] + ), + drop = TRUE, + sep = "_" + ) + cd <- .chromData(object) + + if (nrow(cd)) { + ## chromData exists: validate and align spectra to it + if (!all(factorize.by %in% chromVariables(object))) { + stop("All 'factorize.by' variables must be in chromData.") + } + cd$chromSpectraIndex <- interaction( + cd[, factorize.by, drop = FALSE], + drop = TRUE, + sep = "_" + ) + object@spectra$chromSpectraIndex <- factor( + as.character(spectra_f), + levels = levels(cd$chromSpectraIndex) + ) + ## Apply sort index for processing if needed + if (length(object@spectraSortIndex)) { + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + sorted_spectra_f <- spectra_f[object@spectraSortIndex] + } else { + sorted_spectra <- .spectra(object) + sorted_spectra_f <- spectra_f + } + object@chromData <- .ensure_rt_mz_columns( + cd, + sorted_spectra, + sorted_spectra_f + ) + } else { + ## chromData is empty: create it from spectra + object@spectra$chromSpectraIndex <- spectra_f + full_sp <- do.call( + rbindFill, + lapply(split(.spectra(object), spectra_f), .spectra_format_chromData) + ) + rownames(full_sp) <- NULL + object@chromData <- full_sp + } + ## Recalculate sort index: only store if data is unsorted (optimization) + sort_idx <- order( + object@spectra$dataOrigin, + object@spectra$rtime + ) + if (!identical(sort_idx, seq_along(object@spectra))) { + object@spectraSortIndex <- sort_idx + } else { + object@spectraSortIndex <- integer() + } + object + } +) #' @rdname hidden_aliases #' @importMethodsFrom ProtGenerics backendParallelFactor -setMethod("backendParallelFactor", "ChromBackendSpectra", function(object, ...) +setMethod( + "backendParallelFactor", + "ChromBackendSpectra", + function(object, ...) { factor() + } ) #' @rdname hidden_aliases @@ -318,63 +359,64 @@ setMethod("isReadOnly", "ChromBackendSpectra", function(object) TRUE) #' @rdname hidden_aliases setMethod( - "peaksData", "ChromBackendSpectra", - function(object, columns = peaksVariables(object), - drop = FALSE, ...) { - if (.inMemory(object) || !length(object)) { - return(callNextMethod()) - } - ## Ensure chromSpectraIndex only contains relevant levels needed - valid_f <- chromSpectraIndex(object) - ## Apply the sort index to spectra for processing (only if unsorted) - if (length(object@spectraSortIndex)) { - sorted_spectra <- .spectra(object)[object@spectraSortIndex] - } else { - sorted_spectra <- .spectra(object) - } - current_vals <- as.character(sorted_spectra$chromSpectraIndex) - if (!setequal(unique(current_vals), levels(valid_f))) { - sorted_spectra$chromSpectraIndex <- factor( - current_vals, - levels = levels(valid_f) - ) - } - ## Process peaks data - pd <- mapply(.process_peaks_data, - cd = split(chromData(object), valid_f), - s = split( - sorted_spectra, - sorted_spectra$chromSpectraIndex - ), - MoreArgs = list( - columns = columns, - fun = object@summaryFun, - drop = drop - ), - SIMPLIFY = FALSE - ) - unlist(pd, use.names = FALSE, recursive = FALSE) + "peaksData", + "ChromBackendSpectra", + function(object, columns = peaksVariables(object), drop = FALSE, ...) { + if (.inMemory(object) || !length(object)) { + return(callNextMethod()) } + ## Ensure chromSpectraIndex only contains relevant levels needed + valid_f <- chromSpectraIndex(object) + ## Apply the sort index to spectra for processing (only if unsorted) + if (length(object@spectraSortIndex)) { + sorted_spectra <- .spectra(object)[object@spectraSortIndex] + } else { + sorted_spectra <- .spectra(object) + } + current_vals <- as.character(sorted_spectra$chromSpectraIndex) + if (!setequal(unique(current_vals), levels(valid_f))) { + sorted_spectra$chromSpectraIndex <- factor( + current_vals, + levels = levels(valid_f) + ) + } + ## Process peaks data + pd <- mapply( + .process_peaks_data, + cd = split(chromData(object), valid_f), + s = split( + sorted_spectra, + sorted_spectra$chromSpectraIndex + ), + MoreArgs = list( + columns = columns, + fun = object@summaryFun, + drop = drop + ), + SIMPLIFY = FALSE + ) + unlist(pd, use.names = FALSE, recursive = FALSE) + } ) #' @rdname hidden_aliases setReplaceMethod("peaksData", "ChromBackendSpectra", function(object, value) { - message( - "The `peaksData` slot will be modified but the changes will not", - " affect the Spectra object." - ) - object <- callNextMethod() - object@inMemory <- TRUE - object + message( + "The `peaksData` slot will be modified but the changes will not", + " affect the Spectra object." + ) + object <- callNextMethod() + object@inMemory <- TRUE + object }) - #' @rdname hidden_aliases #' @export setMethod( - "supportsSetBackend", "ChromBackendSpectra", - function(object, ...) FALSE + "supportsSetBackend", + "ChromBackendSpectra", + function(object, ...) FALSE ) #' @rdname hidden_aliases @@ -383,76 +425,91 @@ setMethod( #' @importFrom stats setNames #' @export setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) { - if (!length(i)) - return(ChromBackendSpectra()) - - i <- i2index(i, length = length(x)) - kept_indices <- chromSpectraIndex(x)[i] - x@chromData <- .chromData(x)[i, , drop = FALSE] - x@peaksData <- .peaksData(x)[i] - spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices - x@spectra <- x@spectra[spectra_keep] - - ## Update spectraSortIndex: remap old positions to new positions - if (length(x@spectraSortIndex)) { - old_positions_kept <- which(spectra_keep) - ## Create mapping from old position to new position - ## e.g., if we kept positions c(2, 5, 7), they become c(1, 2, 3) - position_mapping <- setNames(seq_along(old_positions_kept), - old_positions_kept) - ## Keep only sort indices that reference kept positions - kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept - x@spectraSortIndex <- as.integer( - position_mapping[as.character(x@spectraSortIndex[kept_sort_positions])] - ) - } - - x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex) - x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex) - x + if (!length(i)) { + return(ChromBackendSpectra()) + } + + i <- i2index(i, length = length(x)) + kept_indices <- chromSpectraIndex(x)[i] + x@chromData <- .chromData(x)[i, , drop = FALSE] + x@peaksData <- .peaksData(x)[i] + spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices + x@spectra <- x@spectra[spectra_keep] + + ## Update spectraSortIndex: remap old positions to new positions + if (length(x@spectraSortIndex)) { + old_positions_kept <- which(spectra_keep) + ## Create mapping from old position to new position + ## e.g., if we kept positions c(2, 5, 7), they become c(1, 2, 3) + position_mapping <- setNames( + seq_along(old_positions_kept), + old_positions_kept + ) + ## Keep only sort indices that reference kept positions + kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept + x@spectraSortIndex <- as.integer( + position_mapping[as.character(x@spectraSortIndex[kept_sort_positions])] + ) + } + + x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex) + x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex) + x }) #' @rdname hidden_aliases -setMethod("chromExtract", "ChromBackendSpectra", - function(object, peak.table, by, ...) { - required_cols <- c("rtMin", "rtMax", "mzMin", "mzMax", by) - .validate_chromExtract_input( - object = object, - peak.table = peak.table, - by = by, required_cols = required_cols - ) - - matched <- .match_chromdata_peaktable( - object = object, - peak.table = peak.table, - by = by - ) - cd <- .chromData(matched$object) - chrom_keys <- matched$chrom_keys - peak_keys <- matched$peak_keys - cd_split <- split(cd, chrom_keys) ## UT need to check that - pk_split <- split(peak.table, peak_keys) +setMethod( + "chromExtract", + "ChromBackendSpectra", + function(object, peak.table, by, ...) { + required_cols <- c("rtMin", "rtMax", "mzMin", "mzMax", by) + .validate_chromExtract_input( + object = object, + peak.table = peak.table, + by = by, + required_cols = required_cols + ) - ## Check overlapping columns - overl_cols <- .check_overl_columns(peak.table = peak.table, - object = object, - required_cols = required_cols) + matched <- .match_chromdata_peaktable( + object = object, + peak.table = peak.table, + by = by + ) + cd <- .chromData(matched$object) + chrom_keys <- matched$chrom_keys + peak_keys <- matched$peak_keys + cd_split <- split(cd, chrom_keys) ## UT need to check that + pk_split <- split(peak.table, peak_keys) - ## Merge peak.table into chromData safely. - new_cdata <- mapply(function(cd_row, pks) { ## could switch to bpmapply ? - d <- suppressWarnings(cbind(cd_row, pks[!overl_cols])) - d[, names(peak.table)[overl_cols]] <- pks[, overl_cols] - d - }, cd_row = cd_split, - pks = pk_split, SIMPLIFY = FALSE) + ## Check overlapping columns + overl_cols <- .check_overl_columns( + peak.table = peak.table, + object = object, + required_cols = required_cols + ) - new_cdata <- do.call(rbind, new_cdata) - rownames(new_cdata) <- NULL + ## Merge peak.table into chromData safely. + new_cdata <- mapply( + function(cd_row, pks) { + ## could switch to bpmapply ? + d <- suppressWarnings(cbind(cd_row, pks[!overl_cols])) + d[, names(peak.table)[overl_cols]] <- pks[, overl_cols] + d + }, + cd_row = cd_split, + pks = pk_split, + SIMPLIFY = FALSE + ) - object@chromData <- new_cdata - object@peaksData <- replicate(nrow(new_cdata), - .EMPTY_PEAKS_DATA, - simplify = FALSE) - object - }) + new_cdata <- do.call(rbind, new_cdata) + rownames(new_cdata) <- NULL + object@chromData <- new_cdata + object@peaksData <- replicate( + nrow(new_cdata), + .EMPTY_PEAKS_DATA, + simplify = FALSE + ) + object + } +) diff --git a/R/Chromatograms-chromData.R b/R/Chromatograms-chromData.R index 2d1dc76..b51fb26 100644 --- a/R/Chromatograms-chromData.R +++ b/R/Chromatograms-chromData.R @@ -49,7 +49,7 @@ #' `chromData()` function. it is also possible to access specific #' chromatograms variables using `$`. #' -#' `chromData` can be accessed, replaced but also filtered/subsetted. Refer to +#' `@chromData` can be accessed, replaced but also filtered/subsetted. Refer to #' the sections below for more details. #' #' @param columns A `character` vector of chromatograms variables to extract. @@ -116,9 +116,9 @@ #' @section Filter Chromatograms variables: #' #' Functions that filter `Chromatograms` based on chromatograms variables -#' (i.e, `chromData` ) will remove chromatographic data that do not meet the +#' (i.e, `@chromData` ) will remove chromatographic data that do not meet the #' specified conditions. This means that if a chromatogram is filtered out, its -#' corresponding `chromData` and `peaksData` will be removed from the object +#' corresponding `@chromData` and `@peaksData` will be removed from the object #' immediately. #' #' The available functions to filter chromatogram data are: @@ -178,63 +178,66 @@ NULL #' @rdname chromData setMethod( - "chromData", "Chromatograms", - function(object, columns = chromVariables(object), drop = FALSE) { - chromData(.backend(object), columns = columns, drop = drop) - } + "chromData", + "Chromatograms", + function(object, columns = chromVariables(object), drop = FALSE) { + chromData(.backend(object), columns = columns, drop = drop) + } ) #' @rdname chromData setReplaceMethod("chromData", "Chromatograms", function(object, value) { - chromData(object@backend) <- value - object + chromData(object@backend) <- value + object }) #' @rdname chromData setMethod("chromVariables", "Chromatograms", function(object) { - chromVariables(.backend(object)) + chromVariables(.backend(object)) }) #' @rdname chromData setMethod("chromIndex", "Chromatograms", function(object) { - chromIndex(.backend(object)) + chromIndex(.backend(object)) }) #' @rdname chromData setReplaceMethod("chromIndex", "Chromatograms", function(object, value) { - chromIndex(object@backend) <- value - object + chromIndex(object@backend) <- value + object }) #' @rdname chromData setMethod("collisionEnergy", "Chromatograms", function(object) { - collisionEnergy(.backend(object)) + collisionEnergy(.backend(object)) }) #' @rdname chromData setReplaceMethod("collisionEnergy", "Chromatograms", function(object, value) { - collisionEnergy(object@backend) <- value - object + collisionEnergy(object@backend) <- value + object }) #' @rdname chromData setMethod("dataOrigin", "Chromatograms", function(object) { - dataOrigin(.backend(object)) + dataOrigin(.backend(object)) }) #' @rdname chromData setReplaceMethod("dataOrigin", "Chromatograms", function(object, value) { - dataOrigin(object@backend) <- value - object + dataOrigin(object@backend) <- value + object }) #' @rdname chromData -setMethod("msLevel", "Chromatograms", function(object) msLevel(.backend(object))) +setMethod("msLevel", "Chromatograms", function(object) { + msLevel(.backend(object)) +}) #' @rdname chromData setReplaceMethod("msLevel", "Chromatograms", function(object, value) { - msLevel(object@backend) <- value - object + msLevel(object@backend) <- value + object }) #' @rdname chromData @@ -242,8 +245,8 @@ setMethod("mz", "Chromatograms", function(object) mz(.backend(object))) #' @rdname chromData setReplaceMethod("mz", "Chromatograms", function(object, value) { - mz(object@backend) <- value - object + mz(object@backend) <- value + object }) #' @rdname chromData @@ -251,8 +254,8 @@ setMethod("mzMax", "Chromatograms", function(object) mzMax(.backend(object))) #' @rdname chromData setReplaceMethod("mzMax", "Chromatograms", function(object, value) { - mzMax(object@backend) <- value - object + mzMax(object@backend) <- value + object }) #' @rdname chromData @@ -260,8 +263,8 @@ setMethod("mzMin", "Chromatograms", function(object) mzMin(.backend(object))) #' @rdname chromData setReplaceMethod("mzMin", "Chromatograms", function(object, value) { - mzMin(object@backend) <- value - object + mzMin(object@backend) <- value + object }) #' @rdname chromData @@ -269,82 +272,88 @@ setMethod("length", "Chromatograms", function(x) length(x@backend)) #' @rdname chromData setMethod("precursorMz", "Chromatograms", function(object) { - precursorMz(.backend(object)) + precursorMz(.backend(object)) }) #' @rdname chromData setReplaceMethod("precursorMz", "Chromatograms", function(object, value) { - precursorMz(object@backend) <- value - object + precursorMz(object@backend) <- value + object }) #' @rdname chromData setMethod("precursorMzMin", "Chromatograms", function(object) { - precursorMzMin(.backend(object)) + precursorMzMin(.backend(object)) }) #' @rdname chromData setReplaceMethod("precursorMzMin", "Chromatograms", function(object, value) { - precursorMzMin(object@backend) <- value - object + precursorMzMin(object@backend) <- value + object }) #' @rdname chromData setMethod("precursorMzMax", "Chromatograms", function(object) { - precursorMzMax(.backend(object)) + precursorMzMax(.backend(object)) }) #' @rdname chromData setReplaceMethod("precursorMzMax", "Chromatograms", function(object, value) { - precursorMzMax(object@backend) <- value - object + precursorMzMax(object@backend) <- value + object }) #' @rdname chromData setMethod("productMz", "Chromatograms", function(object) { - productMz(.backend(object)) + productMz(.backend(object)) }) #' @rdname chromData setReplaceMethod("productMz", "Chromatograms", function(object, value) { - productMz(object@backend) <- value - object + productMz(object@backend) <- value + object }) #' @rdname chromData setMethod("productMzMin", "Chromatograms", function(object) { - productMzMin(.backend(object)) + productMzMin(.backend(object)) }) #' @rdname chromData setReplaceMethod("productMzMin", "Chromatograms", function(object, value) { - productMzMin(object@backend) <- value - object + productMzMin(object@backend) <- value + object }) #' @rdname chromData setMethod("productMzMax", "Chromatograms", function(object) { - productMzMax(.backend(object)) + productMzMax(.backend(object)) }) #' @rdname chromData setReplaceMethod("productMzMax", "Chromatograms", function(object, value) { - productMzMax(object@backend) <- value - object + productMzMax(object@backend) <- value + object }) #' @rdname chromData setMethod( - "filterChromData", "Chromatograms", - function(object, - variables = character(), ranges = numeric(), - match = c("any", "all"), keep = TRUE) { - object@backend <- filterChromData(.backend(object), - variables = variables, - ranges = ranges, - match = match, - keep = keep - ) - object - } + "filterChromData", + "Chromatograms", + function( + object, + variables = character(), + ranges = numeric(), + match = c("any", "all"), + keep = TRUE + ) { + object@backend <- filterChromData( + .backend(object), + variables = variables, + ranges = ranges, + match = match, + keep = keep + ) + object + } ) diff --git a/R/Chromatograms-peaksData.R b/R/Chromatograms-peaksData.R index 1ae2154..ec19b67 100644 --- a/R/Chromatograms-peaksData.R +++ b/R/Chromatograms-peaksData.R @@ -82,7 +82,7 @@ #' #' @section Filter Peaks Variables: #' -#' Functions that filter a `Chromatograms`'s peaks data (i.e., `peaksData`). +#' Functions that filter a `Chromatograms`'s peaks data (i.e., `@peaksData`). #' These functions remove peaks data that do not meet the #' specified conditions. If a chromatogram in a `Chromatograms` object is #' filtered, only the corresponding peaks variable pairs (i.e., rows) in the @@ -101,13 +101,13 @@ #' In the case of a read-only backend, (such as the [ChromBackendMzR]), the #' replacement of the peaks data is not possible. The peaks data can be #' filtered, but the filtered data will not be saved in the backend. This means -#' the original mzml files will not be affected by computations performed on +#' the original mzML files will not be affected by computations performed on #' the [Chromatograms]. #' #' @section Impute Peaks Variables: #' #' `imputePeaksData` will impute missing values in a `Chromatograms`'s peaks data -#' (i.e., `peaksData`). This functions replace missing peaks data values with +#' (i.e., `@peaksData`). This functions replace missing peaks data values with #' specified imputation methods using various methods such as linear #' interpolation, spline interpolation, Gaussian kernel smoothing, or LOESS #' smoothing. This method modifies the peaks data in place and returns the @@ -177,130 +177,159 @@ NULL #' @rdname peaksData -setMethod("imputePeaksData", - signature = "Chromatograms", - function(object, - method = c("linear", "spline", "gaussian", "loess"), - span = 0.3, - sd = 1, - window = 2, - ...) { - method <- match.arg(method) - object <- addProcessing(object, imputePeaksData, - method = method, span = span, sd = sd, window = window - ) - object@processing <- .logging( - .processing(object), "Impute: replace missing peaks data ", - "using the '", method, "' method" - ) - object - } +setMethod( + "imputePeaksData", + signature = "Chromatograms", + function( + object, + method = c("linear", "spline", "gaussian", "loess"), + span = 0.3, + sd = 1, + window = 2, + ... + ) { + method <- match.arg(method) + object <- addProcessing( + object, + imputePeaksData, + method = method, + span = span, + sd = sd, + window = window + ) + object@processing <- .logging( + .processing(object), + "Impute: replace missing peaks data ", + "using the '", + method, + "' method" + ) + object + } ) #' @rdname peaksData -setMethod("filterPeaksData", - signature = "Chromatograms", - function(object, variables = character(), - ranges = numeric(), match = c("any", "all"), - keep = TRUE) { - object <- addProcessing(object, filterPeaksData, - variables = variables, ranges = ranges, - match = match, keep = keep - ) - object@processing <- .logging( - .processing(object), "Filter: remove peaks based ", - "on the variables: ", paste(variables, collapse = ", "), - "the ranges: ", paste(ranges, collapse = ", "), - "and the match condition: ", match - ) - object - } +setMethod( + "filterPeaksData", + signature = "Chromatograms", + function( + object, + variables = character(), + ranges = numeric(), + match = c("any", "all"), + keep = TRUE + ) { + object <- addProcessing( + object, + filterPeaksData, + variables = variables, + ranges = ranges, + match = match, + keep = keep + ) + object@processing <- .logging( + .processing(object), + "Filter: remove peaks based ", + "on the variables: ", + paste(variables, collapse = ", "), + "the ranges: ", + paste(ranges, collapse = ", "), + "and the match condition: ", + match + ) + object + } ) #' @rdname peaksData setMethod("intensity", signature = "Chromatograms", function(object, ...) { - peaksData(object, columns = "intensity", drop = TRUE) + peaksData(object, columns = "intensity", drop = TRUE) }) #' @rdname peaksData -setReplaceMethod("intensity", - signature = "Chromatograms", - function(object, value) { - if (isReadOnly(.backend(object))) { - stop("Cannot replace peaks data in a read-only backend") - } - intensity(object@backend) <- value - object - } +setReplaceMethod( + "intensity", + signature = "Chromatograms", + function(object, value) { + if (isReadOnly(.backend(object))) { + stop("Cannot replace peaks data in a read-only backend") + } + intensity(object@backend) <- value + object + } ) #' @rdname peaksData #' @importFrom MsCoreUtils between -setMethod("peaksData", - signature = "Chromatograms", - function(object, - columns = peaksVariables(object), - f = processingChunkFactor(object), - BPPARAM = bpparam(), drop = FALSE, ...) { - queue <- .processingQueue(object) - if (length(queue)) { - bd <- .run_process_queue(.backend(object), - queue = queue, - f = f, - BPPARAM = BPPARAM - ) - return(peaksData(bd, columns = columns, drop = drop)) - } - peaksData(.backend(object), columns = columns, drop = drop) - } +setMethod( + "peaksData", + signature = "Chromatograms", + function( + object, + columns = peaksVariables(object), + f = processingChunkFactor(object), + BPPARAM = bpparam(), + drop = FALSE, + ... + ) { + queue <- .processingQueue(object) + if (length(queue)) { + bd <- .run_process_queue( + .backend(object), + queue = queue, + f = f, + BPPARAM = BPPARAM + ) + return(peaksData(bd, columns = columns, drop = drop)) + } + peaksData(.backend(object), columns = columns, drop = drop) + } ) #' @rdname peaksData -setReplaceMethod("peaksData", - signature = "Chromatograms", - function(object, value) { - if (isReadOnly(.backend(object))) { - stop("Cannot replace peaks data in a read-only backend") - } - peaksData(object@backend) <- value - object - } +setReplaceMethod( + "peaksData", + signature = "Chromatograms", + function(object, value) { + if (isReadOnly(.backend(object))) { + stop("Cannot replace peaks data in a read-only backend") + } + peaksData(object@backend) <- value + object + } ) #' @rdname peaksData setMethod("peaksVariables", signature = "Chromatograms", function(object, ...) { - peaksVariables(.backend(object)) + peaksVariables(.backend(object)) }) #' @rdname peaksData setMethod("rtime", signature = "Chromatograms", function(object, ...) { - peaksData(object, columns = "rtime", drop = TRUE) + peaksData(object, columns = "rtime", drop = TRUE) }) #' @rdname peaksData -setReplaceMethod("rtime", - signature = "Chromatograms", - function(object, value) { - if (isReadOnly(.backend(object))) { - stop("Cannot replace peaks data in a read-only backend") - } - rtime(object@backend) <- value - object - } -) +setReplaceMethod("rtime", signature = "Chromatograms", function(object, value) { + if (isReadOnly(.backend(object))) { + stop("Cannot replace peaks data in a read-only backend") + } + rtime(object@backend) <- value + object +}) #' @rdname peaksData setMethod("lengths", signature = "Chromatograms", function(x) { - queue <- .processingQueue(x) - f <- processingChunkFactor(x) - if (length(queue)) { - bd <- .run_process_queue(.backend(x), - queue = queue, - f = f, - BPPARAM = bpparam() - ) - return(lengths(bd)) - } - lengths(.backend(x)) + queue <- .processingQueue(x) + f <- processingChunkFactor(x) + if (length(queue)) { + bd <- .run_process_queue( + .backend(x), + queue = queue, + f = f, + BPPARAM = bpparam() + ) + return(lengths(bd)) + } + lengths(.backend(x)) }) - diff --git a/R/Chromatograms.R b/R/Chromatograms.R index 0391cbd..4c79a28 100644 --- a/R/Chromatograms.R +++ b/R/Chromatograms.R @@ -52,21 +52,21 @@ NULL #' The *chromatograms variables* information in the `Chromatograms` object can #' be accessed using the `chromData()` function. Specific chromatograms #' variables can be accessed by either precising the `"columns"` parameter in -#' `chromData()` or using `$`. `chromData` can be accessed, replaced but +#' `chromData()` or using `$`. `@chromData` can be accessed, replaced but #' also filtered/subsetted. Refer to the [chromData] documentation for more #' details. #' #' The *peaks data variables* information in the `Chromatograms` object can be #' accessed using the `peaksData()` function. Specific peaks variables can be #' accessed by either precising the `"columns"` parameter in `peaksData()` or -#' using `$`. `peaksData` can be accessed, replaced but also +#' using `$`. `@peaksData` can be accessed, replaced but also #' filtered/subsetted. Refer to the [peaksData] documentation for more details. #' #' @section Processing of `Chromatograms` objects: #' #' Functions that process the chromatograms data in some ways can be applied to #' the object either directly or by using the `processingQueue` mechanism. The -#' `processingQueue` is a list of processing steps that are stored within the +#' `@processingQueue` is a list of processing steps that are stored within the #' object and only applied when needed. This was created so that the data can be #' processed in a single step and is very useful for larger datasets. This is #' even more true as this processing queue will call function that can be @@ -268,83 +268,97 @@ setClassUnion("ChromBackendOrMissing", c("ChromBackend", "missing")) #' @slot version `character(1)` the version of the `Chromatograms` object. #' #' @noRd -setClass("Chromatograms", - slots = c( - backend = "ChromBackend", - processingQueue = "list", - processing = "character", - processingChunkSize = "numeric", - version = "character" - ), - prototype = prototype( - version = "0.1", - processingChunkSize = Inf, - processingQueue = list(), - processing = character() - ) +setClass( + "Chromatograms", + slots = c( + backend = "ChromBackend", + processingQueue = "list", + processing = "character", + processingChunkSize = "numeric", + version = "character" + ), + prototype = prototype( + version = "0.1", + processingChunkSize = Inf, + processingQueue = list(), + processing = character() + ) ) setValidity("Chromatograms", function(object) { - msg <- character() - if (!is(.backend(object), "ChromBackend")) { - msg <- ("backend must be a ChromBackend object") - } - if (!is.numeric(processingChunkSize(object)) || - length(processingChunkSize(object)) != 1) { - msg <- c(msg, "processingChunkSize must be a numeric value") - } - msg <- c(msg, .valid_processing_queue(.processingQueue(object))) - if (length(msg)) { - msg - } else { - TRUE - } + msg <- character() + if (!is(.backend(object), "ChromBackend")) { + msg <- ("backend must be a ChromBackend object") + } + if ( + !is.numeric(processingChunkSize(object)) || + length(processingChunkSize(object)) != 1 + ) { + msg <- c(msg, "processingChunkSize must be a numeric value") + } + msg <- c(msg, .valid_processing_queue(.processingQueue(object))) + if (length(msg)) { + msg + } else { + TRUE + } }) #' @rdname Chromatograms #' @export setMethod( - "Chromatograms", "ChromBackendOrMissing", - function(object = ChromBackendMemory(), - processingQueue = list(), ...) { - if (missing(object)) { - object <- ChromBackendMemory() - } - ## Extract backend-specific parameters from ... and initialize backend - dots <- list(...) - if (length(dots) > 0 && length(object) == 0) { - ## Backend is empty, initialize it with provided parameters - object <- do.call(backendInitialize, c(list(object), dots)) - dots <- list() - } - do.call(new, c(list("Chromatograms", - backend = object, - processingQueue = processingQueue), dots)) + "Chromatograms", + "ChromBackendOrMissing", + function(object = ChromBackendMemory(), processingQueue = list(), ...) { + if (missing(object)) { + object <- ChromBackendMemory() + } + ## Extract backend-specific parameters from ... and initialize backend + dots <- list(...) + if (length(dots) > 0 && length(object) == 0) { + ## Backend is empty, initialize it with provided parameters + object <- do.call(backendInitialize, c(list(object), dots)) + dots <- list() } + do.call( + new, + c( + list( + "Chromatograms", + backend = object, + processingQueue = processingQueue + ), + dots + ) + ) + } ) #' @rdname Chromatograms #' @importFrom methods new #' @export setMethod( - "Chromatograms", "Spectra", - function(object, summarize.method = c("sum", "max"), - chromData = data.frame(), - factorize.by = c("msLevel", "dataOrigin"), - spectraVariables = character(), ...) { - bd <- backendInitialize(ChromBackendSpectra(), - spectra = object, - factorize.by = factorize.by, - chromData = chromData, - summarize.method = summarize.method, - spectraVariables = spectraVariables, - ... - ) - new("Chromatograms", - backend = bd, - processingQueue = list(), ... - ) - } + "Chromatograms", + "Spectra", + function( + object, + summarize.method = c("sum", "max"), + chromData = data.frame(), + factorize.by = c("msLevel", "dataOrigin"), + spectraVariables = character(), + ... + ) { + bd <- backendInitialize( + ChromBackendSpectra(), + spectra = object, + factorize.by = factorize.by, + chromData = chromData, + summarize.method = summarize.method, + spectraVariables = spectraVariables, + ... + ) + new("Chromatograms", backend = bd, processingQueue = list(), ...) + } ) @@ -356,37 +370,48 @@ setMethod( #' #' @exportMethod show setMethod( - "show", "Chromatograms", - function(object) { - cat("Chromatographic data (", class(object)[1L], ") with ", - length(.backend(object)), " chromatograms in a ", - class(.backend(object)), " backend:\n", - sep = "" + "show", + "Chromatograms", + function(object) { + cat( + "Chromatographic data (", + class(object)[1L], + ") with ", + length(.backend(object)), + " chromatograms in a ", + class(.backend(object)), + " backend:\n", + sep = "" + ) + if (length(.backend(object))) { + txt <- capture.output(show(.backend(object))) + cat(txt[-1], sep = "\n") + } + if (length(.processingQueue(object))) { + cat( + "Lazy evaluation queue:", + length(.processingQueue(object)), + "processing step(s)\n" + ) + } + lp <- length(.processing(object)) + if (lp) { + lps <- .processing(object) + if (lp > 3) { + lps <- lps[seq_len(3)] + lps <- c( + lps, + paste0( + "...", + lp - 3, + " more processings. ", + "Use 'processingLog' to list all." + ) ) - if (length(.backend(object))) { - txt <- capture.output(show(.backend(object))) - cat(txt[-1], sep = "\n") - } - if (length(.processingQueue(object))) { - cat( - "Lazy evaluation queue:", length(.processingQueue(object)), - "processing step(s)\n" - ) - } - lp <- length(.processing(object)) - if (lp) { - lps <- .processing(object) - if (lp > 3) { - lps <- lps[seq_len(3)] - lps <- c(lps, paste0( - "...", lp - 3, - " more processings. ", - "Use 'processingLog' to list all." - )) - } - cat("Processing:\n", paste(lps, collapse = "\n "), "\n") - } + } + cat("Processing:\n", paste(lps, collapse = "\n "), "\n") } + } ) #' @rdname Chromatograms @@ -401,60 +426,72 @@ setMethod( #' #' @exportMethod setBackend setMethod( - "setBackend", c("Chromatograms", "ChromBackend"), - function(object, backend, f = processingChunkFactor(object), - BPPARAM = SerialParam(), ...) { - backend_class <- class(.backend(object)) - BPPARAM <- backendBpparam(.backend(object), BPPARAM) - BPPARAM <- backendBpparam(backend, BPPARAM) - if (!supportsSetBackend(backend)) { - stop(class(backend), " does not support 'setBackend'") - } - if (!length(f) || length(levels(f)) == 1 || !length(object)) { - bd_new <- backendInitialize(backend, - peaksData = peaksData(object), - chromData = chromData(object) - ) - } else { - bd_new <- bplapply( - split(.backend(object), f = f), - function(z, ...) { - backendInitialize(backend, - peaksData = peaksData(z), - chromData = chromData(z), - BPPARAM = SerialParam() - ) - }, ..., - BPPARAM = BPPARAM - ) - bd_new <- backendMerge(bd_new) - } - if (any(colnames(chromData(bd_new)) %in% c("rtMin", "rtMax"))) - chromData(bd_new) <- chromData(bd_new)[, - !colnames(chromData(bd_new)) %in% - c("rtMin", "rtMax")] - object@backend <- bd_new - object@processing <- .logging( - object@processing, - "Switch backend from ", - backend_class, " to ", - class(.backend(object)) - ) - object + "setBackend", + c("Chromatograms", "ChromBackend"), + function( + object, + backend, + f = processingChunkFactor(object), + BPPARAM = SerialParam(), + ... + ) { + backend_class <- class(.backend(object)) + BPPARAM <- backendBpparam(.backend(object), BPPARAM) + BPPARAM <- backendBpparam(backend, BPPARAM) + if (!supportsSetBackend(backend)) { + stop(class(backend), " does not support 'setBackend'") + } + if (!length(f) || length(levels(f)) == 1 || !length(object)) { + bd_new <- backendInitialize( + backend, + peaksData = peaksData(object), + chromData = chromData(object) + ) + } else { + bd_new <- bplapply( + split(.backend(object), f = f), + function(z, ...) { + backendInitialize( + backend, + peaksData = peaksData(z), + chromData = chromData(z), + BPPARAM = SerialParam() + ) + }, + ..., + BPPARAM = BPPARAM + ) + bd_new <- backendMerge(bd_new) + } + if (any(colnames(chromData(bd_new)) %in% c("rtMin", "rtMax"))) { + chromData(bd_new) <- chromData(bd_new)[, + !colnames(chromData(bd_new)) %in% + c("rtMin", "rtMax") + ] } + object@backend <- bd_new + object@processing <- .logging( + object@processing, + "Switch backend from ", + backend_class, + " to ", + class(.backend(object)) + ) + object + } ) #' @rdname Chromatograms #' @export setMethod("$", signature = "Chromatograms", function(x, name) { - .backend(x)[[name]] + .backend(x)[[name]] }) #' @rdname Chromatograms #' @export setReplaceMethod("$", signature = "Chromatograms", function(x, name, value) { - x@backend[[name]] <- value - x + x@backend[[name]] <- value + x }) #' @rdname Chromatograms @@ -462,73 +499,76 @@ setReplaceMethod("$", signature = "Chromatograms", function(x, name, value) { #' @importFrom MsCoreUtils i2index #' @export setMethod("[", "Chromatograms", function(x, i, j, ..., drop = FALSE) { - if (!missing(j)) { - stop("Subsetting 'Chromatograms' by columns is not (yet) supported") - } - if (missing(i)) { - return(x) - } - slot(x, "backend", check = FALSE) <- extractByIndex( - .backend(x), i2index(i, length(x)) - ) - x + if (!missing(j)) { + stop("Subsetting 'Chromatograms' by columns is not (yet) supported") + } + if (missing(i)) { + return(x) + } + slot(x, "backend", check = FALSE) <- extractByIndex( + .backend(x), + i2index(i, length(x)) + ) + x }) #' @rdname Chromatograms #' @export setMethod("[[", "Chromatograms", function(x, i, j, ...) { - if (!is.character(i)) { - stop( - "'i' is supposed to be a character defining the chromatogram or ", - "peak variable to access." - ) - } - if (!missing(j)) { - stop("'j' is not supported.") - } - if (!(i %in% peaksVariables(x)) && !(i %in% chromVariables(x))) { - stop("No variable '", i, "' available") - } else { - do.call("[[", list(.backend(x), i)) - } + if (!is.character(i)) { + stop( + "'i' is supposed to be a character defining the chromatogram or ", + "peak variable to access." + ) + } + if (!missing(j)) { + stop("'j' is not supported.") + } + if (!(i %in% peaksVariables(x)) && !(i %in% chromVariables(x))) { + stop("No variable '", i, "' available") + } else { + do.call("[[", list(.backend(x), i)) + } }) #' @rdname Chromatograms #' #' @export setReplaceMethod("[[", "Chromatograms", function(x, i, j, ..., value) { - if (!is.character(i)) { - stop( - "'i' is supposed to be a character defining the chromatogram ", - "or peak variable to replace or create." - ) - } - if (!(i %in% peaksVariables(x)) && !(i %in% chromVariables(x))) { - stop("No variable '", i, "' available") - } - if (!missing(j)) { - stop("'j' is not supported.") - } - x@backend <- do.call("[[<-", list(.backend(x), i = i, value = value)) - x + if (!is.character(i)) { + stop( + "'i' is supposed to be a character defining the chromatogram ", + "or peak variable to replace or create." + ) + } + if (!(i %in% peaksVariables(x)) && !(i %in% chromVariables(x))) { + stop("No variable '", i, "' available") + } + if (!missing(j)) { + stop("'j' is not supported.") + } + x@backend <- do.call("[[<-", list(.backend(x), i = i, value = value)) + x }) #' @rdname Chromatograms #' @export setMethod( - "factorize", "Chromatograms", - function(object, factorize.by = c("msLevel", "dataOrigin"), ...) { - object@backend <- factorize(.backend(object), ...) - object - } + "factorize", + "Chromatograms", + function(object, factorize.by = c("msLevel", "dataOrigin"), ...) { + object@backend <- factorize(.backend(object), ...) + object + } ) #' @rdname Chromatograms #' @export -setMethod("chromExtract", "Chromatograms", function(object, peak.table, by, ...) { +setMethod( + "chromExtract", + "Chromatograms", + function(object, peak.table, by, ...) { new_bd <- chromExtract(.backend(object), peak.table, by, ...) return(Chromatograms(new_bd)) -}) - - - + } +) diff --git a/R/helpers.R b/R/helpers.R index 7ab1ad5..92f04ec 100644 --- a/R/helpers.R +++ b/R/helpers.R @@ -10,27 +10,27 @@ #' @importFrom MsCoreUtils vapply1c rbindFill #' @noRd .df_combine <- function(objects) { - if (length(objects) == 1) { - return(objects[[1]]) - } - if (!all(vapply1c(objects, class) == class(objects[[1]]))) { - stop("Can only merge backends of the same type: ", class(objects[[1]])) - } - res <- objects[[1]] - pv <- names(.peaksData(res)[[1]]) - for (i in 2:length(objects)) { - res@chromData <- rbindFill(.chromData(res), .chromData(objects[[i]])) - pv2 <- peaksVariables(objects[[i]]) - if (length(pv) == length(pv2) && all(pv == pv2)) { - res@peaksData <- c(.peaksData(res), .peaksData(objects[[i]])) - } else { - stop( - "Provided objects have different sets of peak variables. ", - "Combining such objects is currently not supported." - ) - } + if (length(objects) == 1) { + return(objects[[1]]) + } + if (!all(vapply1c(objects, class) == class(objects[[1]]))) { + stop("Can only merge backends of the same type: ", class(objects[[1]])) + } + res <- objects[[1]] + pv <- names(.peaksData(res)[[1]]) + for (i in 2:length(objects)) { + res@chromData <- rbindFill(.chromData(res), .chromData(objects[[i]])) + pv2 <- peaksVariables(objects[[i]]) + if (length(pv) == length(pv2) && all(pv == pv2)) { + res@peaksData <- c(.peaksData(res), .peaksData(objects[[i]])) + } else { + stop( + "Provided objects have different sets of peak variables. ", + "Combining such objects is currently not supported." + ) } - res + } + res } #' Helper function to check the order and data types of columns @@ -40,22 +40,25 @@ #' - `validPeaksData()` #' @noRd .check_column_order_and_types <- function(df, expected_cols, expected_types) { - if (!identical(colnames(df)[seq_len(2)], expected_cols)) { - return(paste0("Columns should be in the order 'rtime', 'intensity'.")) - } - invalid_cols <- vapply(expected_cols, function(col) { - !is(df[[col]], expected_types[[col]]) - }, logical(1)) - if (any(invalid_cols)) { - invalid_col_names <- expected_cols[invalid_cols] - return(paste0( - "The peaksData variable(s) ", paste(invalid_col_names, - collapse = ", " - ), - " have the wrong data type." - )) - } - return(NULL) + if (!identical(colnames(df)[seq_len(2)], expected_cols)) { + return(paste0("Columns should be in the order 'rtime', 'intensity'.")) + } + invalid_cols <- vapply( + expected_cols, + function(col) { + !is(df[[col]], expected_types[[col]]) + }, + logical(1) + ) + if (any(invalid_cols)) { + invalid_col_names <- expected_cols[invalid_cols] + return(paste0( + "The peaksData variable(s) ", + paste(invalid_col_names, collapse = ", "), + " have the wrong data type." + )) + } + return(NULL) } #' Helper function to check the properties of the 'rtime' column. @@ -65,18 +68,18 @@ #' - `validPeaksData()` #' @noRd .check_rtime <- function(df) { - if (nrow(df) == 0) { - return(NULL) - } - if (any(is.na(df$rtime))) { - return("'rtime' column contains NA values.") - } + if (nrow(df) == 0) { + return(NULL) + } + if (any(is.na(df$rtime))) { + return("'rtime' column contains NA values.") + } - if (!all(diff(df$rtime) > 0)) { - return("'rtime' column is not strictly increasing.") ## does it need to strictly increase ? - } + if (!all(diff(df$rtime) > 0)) { + return("'rtime' column is not strictly increasing.") ## does it need to strictly increase ? + } - return(NULL) + return(NULL) } #' Function to apply the processing queue to the backend, return a peaksData. @@ -90,29 +93,40 @@ #' #' @importFrom BiocParallel bplapply SerialParam #' @noRd -.run_process_queue <- function(object, queue, f = factor(), - BPPARAM = SerialParam()) { - BPPARAM <- backendBpparam(object, BPPARAM) - if (!length(f) || length(levels(f)) == 1) { - for (i in seq_along(queue)) { - object <- do.call(queue[[i]]@FUN, c(object, queue[[i]]@ARGS)) - } - return(object) - } - if (!is(f, "factor")) stop("f must be a factor") - if (length(f) != length(object)) { - stop( - "length 'f' has to be equal to the length of 'object' (", - length(object), ")" - ) - } - processed_data <- bplapply(split(object, f), function(x) { - for (i in seq_along(queue)) { - x <- do.call(queue[[i]]@FUN, c(x, queue[[i]]@ARGS)) - } - x - }, BPPARAM = BPPARAM) - backendMerge(processed_data) +.run_process_queue <- function( + object, + queue, + f = factor(), + BPPARAM = SerialParam() +) { + BPPARAM <- backendBpparam(object, BPPARAM) + if (!length(f) || length(levels(f)) == 1) { + for (i in seq_along(queue)) { + object <- do.call(queue[[i]]@FUN, c(object, queue[[i]]@ARGS)) + } + return(object) + } + if (!is(f, "factor")) { + stop("f must be a factor") + } + if (length(f) != length(object)) { + stop( + "length 'f' has to be equal to the length of 'object' (", + length(object), + ")" + ) + } + processed_data <- bplapply( + split(object, f), + function(x) { + for (i in seq_along(queue)) { + x <- do.call(queue[[i]]@FUN, c(x, queue[[i]]@ARGS)) + } + x + }, + BPPARAM = BPPARAM + ) + backendMerge(processed_data) } #' Function to validate each peaksData entry @@ -122,23 +136,30 @@ #' - `validPeaksData()` #' @noRd .validate_entry <- function(df, i, expected_cols, expected_types) { - msgs <- NULL - if (!is.data.frame(df)) { - msgs <- c(msgs, paste0( - "Entry ", i, ": all 'peaksData' ", - "entries should ", - "be of class 'data.frame'" - )) - } else { - msgs <- c( - msgs, .check_column_order_and_types( - df, expected_cols, - expected_types - ), - .check_rtime(df) - ) - } - return(msgs) + msgs <- NULL + if (!is.data.frame(df)) { + msgs <- c( + msgs, + paste0( + "Entry ", + i, + ": all 'peaksData' ", + "entries should ", + "be of class 'data.frame'" + ) + ) + } else { + msgs <- c( + msgs, + .check_column_order_and_types( + df, + expected_cols, + expected_types + ), + .check_rtime(df) + ) + } + return(msgs) } #' Function to validate the processingQueue slot of a Chromatograms object @@ -148,10 +169,10 @@ #' @importFrom MsCoreUtils vapply1l #' @noRd .valid_processing_queue <- function(x) { - if (length(x) && !all(vapply1l(x, inherits, "ProcessingStep"))) { - stop("'processingQueue' should only contain ProcessingStep objects.") - } - NULL + if (length(x) && !all(vapply1l(x, inherits, "ProcessingStep"))) { + stop("'processingQueue' should only contain ProcessingStep objects.") + } + NULL } #' function to loop through query column and check if within corresponding @@ -162,50 +183,54 @@ #' @importFrom MsCoreUtils between #' @noRd .filter_ranges <- function(query, ranges, match) { - nc <- ncol(query) - nr <- nrow(query) - if (length(ranges) != 2 * nc) { - stop( - "Length of 'ranges' needs to be twice the length of the ", - "parameter 'query'" - ) - } - - # Compute within_ranges for each column of the query - within_ranges <- vapply(seq_len(nc), function(i) { - pairs <- c(ranges[2 * i - 1], ranges[2 * i]) - between(query[[i]], pairs) - }, logical(nrow(query))) - - if (match == "all") { - if (nr == 1) { - return(as.integer(all(within_ranges))) - } - return(which(rowSums(within_ranges) == nc)) - } + nc <- ncol(query) + nr <- nrow(query) + if (length(ranges) != 2 * nc) { + stop( + "Length of 'ranges' needs to be twice the length of the ", + "parameter 'query'" + ) + } + + # Compute within_ranges for each column of the query + within_ranges <- vapply( + seq_len(nc), + function(i) { + pairs <- c(ranges[2 * i - 1], ranges[2 * i]) + between(query[[i]], pairs) + }, + logical(nrow(query)) + ) + + if (match == "all") { if (nr == 1) { - return(as.integer(any(within_ranges))) - } - return(which(rowSums(within_ranges) > 0)) + return(as.integer(all(within_ranges))) + } + return(which(rowSums(within_ranges) == nc)) + } + if (nr == 1) { + return(as.integer(any(within_ranges))) + } + return(which(rowSums(within_ranges) > 0)) } #' Used in: #' - `filterPeaksData()` #' @noRd .logging <- function(x, ...) { - c(x, paste0(..., " [", date(), "]")) + c(x, paste0(..., " [", date(), "]")) } #' Used In: #' - `ChromBackendMzR()` #' @noRd .check_mzR_package <- function() { - if (!requireNamespace("mzR", quietly = TRUE)) { - stop( - "The use of 'ChromBackendMzR' requires package 'mzR'. ", - "Install it using 'BiocManager::install(\"mzR\")'" - ) - } + if (!requireNamespace("mzR", quietly = TRUE)) { + stop( + "The use of 'ChromBackendMzR' requires package 'mzR'. ", + "Install it using 'BiocManager::install(\"mzR\")'" + ) + } } #' Function to create chromData form mzml file @@ -214,30 +239,32 @@ #' Helper function to format chromatographic data from mzR files. #' @noRd .mzR_format_chromData <- function(file) { - .check_mzR_package() - msd <- mzR::openMSfile(file) - on.exit(mzR::close(msd)) - tmp <- mzR::chromatogramHeader(msd) - colnames(tmp)[colnames(tmp) == - "chromatogramIndex"] <- "chromIndex" - colnames(tmp)[colnames(tmp) == - "precursorCollisionEnergy"] <- "collisionEnergy" - colnames(tmp)[colnames(tmp) == - "productIsolationWindowTargetMZ"] <- "productMz" - colnames(tmp)[colnames(tmp) == - "precursorIsolationWindowTargetMZ"] <- "precursorMz" - tmp$dataOrigin <- file - tmp + .check_mzR_package() + msd <- mzR::openMSfile(file) + on.exit(mzR::close(msd)) + tmp <- mzR::chromatogramHeader(msd) + colnames(tmp)[colnames(tmp) == "chromatogramIndex"] <- "chromIndex" + colnames(tmp)[ + colnames(tmp) == "precursorCollisionEnergy" + ] <- "collisionEnergy" + colnames(tmp)[ + colnames(tmp) == "productIsolationWindowTargetMZ" + ] <- "productMz" + colnames(tmp)[ + colnames(tmp) == "precursorIsolationWindowTargetMZ" + ] <- "precursorMz" + tmp$dataOrigin <- file + tmp } #' Used In: #' - `peaksData()` for `ChromBackendMzR` class #' @noRd .get_chrom_data <- function(fl, idx) { - .check_mzR_package() - msd <- mzR::openMSfile(fl) - on.exit(mzR::close(msd)) - mzR::chromatogram(msd, idx, drop = FALSE) + .check_mzR_package() + msd <- mzR::openMSfile(fl) + on.exit(mzR::close(msd)) + mzR::chromatogram(msd, idx, drop = FALSE) } #' Helper function to plot a single chromatogram. @@ -249,179 +276,207 @@ #' @importFrom graphics plot.new plot.window plot.xy axis box title par #' @importFrom grDevices dev.hold dev.flush xy.coords n2mfrow #' @noRd -.plot_single_chromatogram <- function(x, xlab = "rtime (s)", - ylab = "intensity", - type = "l", xlim = numeric(), - ylim = numeric(), - main = paste("m/z", round(mz(x), 1)), - col = "#00000080", add = FALSE, - axes = TRUE, frame.plot = axes, - orientation = 1, ...) { - v <- peaksData(x)[[1L]] - rts <- v$rtime - raw_ints <- v[, "intensity"] - ints <- orientation * raw_ints - if (!length(xlim)) { - xlim <- range(rts, na.rm = TRUE) - } - if (!length(ylim)) { - ylim <- range(orientation * c(0, max(abs(ints), na.rm = TRUE))) - } - if (any(is.infinite(xlim))) { - xlim <- c(0, 0) - } - if (any(is.infinite(ylim))) { - ylim <- c(0, 0) - } - if (!add) { - dev.hold() - on.exit(dev.flush()) - plot.new() - plot.window(xlim = xlim, ylim = ylim) - } - if (!add) { - if (axes) { - axis(side = 1, ...) - axis(side = 2, ...) - } - if (frame.plot) { - box(...) - } - title(main = main, xlab = xlab, ylab = ylab, ...) - } - plot.xy(xy.coords(rts, ints), type = type, col = col, ...) +.plot_single_chromatogram <- function( + x, + xlab = "rtime (s)", + ylab = "intensity", + type = "l", + xlim = numeric(), + ylim = numeric(), + main = paste("m/z", round(mz(x), 1)), + col = "#00000080", + add = FALSE, + axes = TRUE, + frame.plot = axes, + orientation = 1, + ... +) { + v <- peaksData(x)[[1L]] + rts <- v$rtime + raw_ints <- v[, "intensity"] + ints <- orientation * raw_ints + if (!length(xlim)) { + xlim <- range(rts, na.rm = TRUE) + } + if (!length(ylim)) { + ylim <- range(orientation * c(0, max(abs(ints), na.rm = TRUE))) + } + if (any(is.infinite(xlim))) { + xlim <- c(0, 0) + } + if (any(is.infinite(ylim))) { + ylim <- c(0, 0) + } + if (!add) { + dev.hold() + on.exit(dev.flush()) + plot.new() + plot.window(xlim = xlim, ylim = ylim) + } + if (!add) { + if (axes) { + axis(side = 1, ...) + axis(side = 2, ...) + } + if (frame.plot) { + box(...) + } + title(main = main, xlab = xlab, ylab = ylab, ...) + } + plot.xy(xy.coords(rts, ints), type = type, col = col, ...) } #' Used In: #' - `peaksData` for `ChromBackendSpectra` class. #' @importFrom Spectra peaksData filterRanges #' @noRd .process_peaks_data <- function(cd, s, columns, fun, drop) { - ## Handle single spectrum case: filterRanges fails with length(s) == 1 - if (length(s) > 1) { - s <- filterRanges(s, - spectraVariables = rep("rtime", nrow(cd)), - ranges = as.vector(rbind(cd$rtMin, cd$rtMax)), - match = "any" - ) - } else { - ## For single spectrum, manually filter by rtime range - if (length(s) == 1) { - rt_in_range <- s$rtime >= min(cd$rtMin) & s$rtime <= max(cd$rtMax) - if (!rt_in_range) { - s <- s[integer(0)] ## Return empty Spectra - } - } + ## Handle single spectrum case: filterRanges fails with length(s) == 1 + if (length(s) > 1) { + s <- filterRanges( + s, + spectraVariables = rep("rtime", nrow(cd)), + ranges = as.vector(rbind(cd$rtMin, cd$rtMax)), + match = "any" + ) + } else { + ## For single spectrum, manually filter by rtime range + if (length(s) == 1) { + rt_in_range <- s$rtime >= min(cd$rtMin) & s$rtime <= max(cd$rtMax) + if (!rt_in_range) { + s <- s[integer(0)] ## Return empty Spectra + } + } + } + pd <- peaksData(s, columns = c("mz", "intensity")) + do_rt <- "rtime" %in% columns + do_int <- "intensity" %in% columns + rt <- rtime(s) + lapply(seq_len(nrow(cd)), function(i) { + ## only keep the first rt if there is duplication + keep <- between(rt, c(cd$rtMin[i], cd$rtMax[i])) & !duplicated(rt) + df <- as.data.frame(matrix(ncol = 0, nrow = sum(keep))) + if (do_rt) { + df$rtime <- rt[keep] + } + if (do_int) { + df$intensity <- vapply( + pd[keep], + function(z) { + fun(z[ + between(z[, "mz"], c(cd$mzMin[i], cd$mzMax[i])), + "intensity" + ]) + }, + NA_real_, + USE.NAMES = FALSE + ) } - pd <- peaksData(s, columns = c("mz", "intensity")) - do_rt <- "rtime" %in% columns - do_int <- "intensity" %in% columns - rt <- rtime(s) - lapply(seq_len(nrow(cd)), function(i) { - ## only keep the first rt if there is duplication - keep <- between(rt, c(cd$rtMin[i], cd$rtMax[i])) & !duplicated(rt) - df <- as.data.frame(matrix(ncol = 0, nrow = sum(keep))) - if (do_rt) { - df$rtime <- rt[keep] - } - if (do_int) { - df$intensity <- vapply(pd[keep], function(z) { - fun(z[ - between(z[, "mz"], c(cd$mzMin[i], cd$mzMax[i])), - "intensity" - ]) - }, NA_real_, USE.NAMES = FALSE) - } - df[, columns, drop = drop] - }) + df[, columns, drop = drop] + }) } #' Used in: #' - `backendInitialize()` for `ChrombackendSpectra` #' @noRd .spectra_format_chromData <- function(sps) { - res <- data.frame( - msLevel = unique(sps$msLevel), - rtMin = min(sps$rtime, na.rm = TRUE), - rtMax = max(sps$rtime, na.rm = TRUE), - mzMin = -Inf, - mzMax = Inf, - mz = Inf, - dataOrigin = unique(sps$dataOrigin), - chromSpectraIndex = unique(sps$chromSpectraIndex) - ) - ## Add optional columns if present - if ("polarity" %in% spectraVariables(sps)) { - res$polarity <- sps$polarity[1] - } - if ("scanWindowLowerLimit" %in% spectraVariables(sps)) { - res$scanWindowLowerLimit <- sps$scanWindowLowerLimit[1] - } - if ("scanWindowUpperLimit" %in% spectraVariables(sps)) { - res$scanWindowUpperLimit <- sps$scanWindowUpperLimit[1] - } - res + res <- data.frame( + msLevel = unique(sps$msLevel), + rtMin = min(sps$rtime, na.rm = TRUE), + rtMax = max(sps$rtime, na.rm = TRUE), + mzMin = -Inf, + mzMax = Inf, + mz = Inf, + dataOrigin = unique(sps$dataOrigin), + chromSpectraIndex = unique(sps$chromSpectraIndex) + ) + ## Add optional columns if present + if ("polarity" %in% spectraVariables(sps)) { + res$polarity <- sps$polarity[1] + } + if ("scanWindowLowerLimit" %in% spectraVariables(sps)) { + res$scanWindowLowerLimit <- sps$scanWindowLowerLimit[1] + } + if ("scanWindowUpperLimit" %in% spectraVariables(sps)) { + res$scanWindowUpperLimit <- sps$scanWindowUpperLimit[1] + } + res } #' Used in: #' - `factorize()` for `ChrombackendSpectra` #' @noRd .ensure_rt_mz_columns <- function(chrom_data, spectra, spectra_f) { - if (!all(c("mzMin", "mzMax") %in% colnames(chrom_data))) { - if ("mzMin" %in% colnames(chrom_data) || - "mzMax" %in% colnames(chrom_data)) { - stop("Both 'mzMin' and 'mzMax' must be present if one", - " is provided.") - } else { - chrom_data$mzMin <- -Inf - chrom_data$mzMax <- Inf - } - } - if (!all(c("rtMin", "rtMax") %in% colnames(chrom_data))) { - if ("rtMin" %in% colnames(chrom_data) || "rtMax" %in% - colnames(chrom_data)) { - stop("Both 'rtMin' and 'rtMax' must be present if one", - " is provided.") - } else { - levs <- levels(spectra_f) - if (is.null(levs)) { - levs <- unique(as.character(spectra_f)) - } - rt_mat <- vapply(levs, function(lvl) { - range(spectra$rtime[spectra_f == lvl], na.rm = TRUE) - }, numeric(2)) - chrom_idx <- as.character(chrom_data$chromSpectraIndex) - chrom_data$rtMin <- rt_mat[1, chrom_idx] - chrom_data$rtMax <- rt_mat[2, chrom_idx] - } - } - chrom_data + if (!all(c("mzMin", "mzMax") %in% colnames(chrom_data))) { + if ( + "mzMin" %in% colnames(chrom_data) || "mzMax" %in% colnames(chrom_data) + ) { + stop("Both 'mzMin' and 'mzMax' must be present if one", " is provided.") + } else { + chrom_data$mzMin <- -Inf + chrom_data$mzMax <- Inf + } + } + if (!all(c("rtMin", "rtMax") %in% colnames(chrom_data))) { + if ( + "rtMin" %in% + colnames(chrom_data) || + "rtMax" %in% + colnames(chrom_data) + ) { + stop("Both 'rtMin' and 'rtMax' must be present if one", " is provided.") + } else { + levs <- levels(spectra_f) + if (is.null(levs)) { + levs <- unique(as.character(spectra_f)) + } + rt_mat <- vapply( + levs, + function(lvl) { + range(spectra$rtime[spectra_f == lvl], na.rm = TRUE) + }, + numeric(2) + ) + chrom_idx <- as.character(chrom_data$chromSpectraIndex) + chrom_data$rtMin <- rt_mat[1, chrom_idx] + chrom_data$rtMax <- rt_mat[2, chrom_idx] + } + } + chrom_data } #' Used in: #' - `chromExtract()`. #' @noRd -.validate_chromExtract_input <- function(object, - peak.table, - by, - required_cols = c("rtMin", "rtMax", - by)) { - cd <- .chromData(object) - if (!all(required_cols %in% names(peak.table))) { - stop("`peak.table` must contain columns: ", paste(required_cols, - collapse = ", "), ".") - } +.validate_chromExtract_input <- function( + object, + peak.table, + by, + required_cols = c("rtMin", "rtMax", by) +) { + cd <- .chromData(object) + if (!all(required_cols %in% names(peak.table))) { + stop( + "`peak.table` must contain columns: ", + paste(required_cols, collapse = ", "), + "." + ) + } - if (anyNA(peak.table$rtMin) || anyNA(peak.table$rtMax)) { - stop("Columns 'rtMin' and 'rtMax' in `peak.table` cannot ", - "contain NA values.") - } - if (!all(by %in% names(cd))) { - stop("All 'by' columns must be present in chromData(object).") - } - if (nrow(cd) != nrow(unique(cd[, by, drop = FALSE]))) { - stop("Combinations of 'by' columns must uniquely identify rows ", - "in chromData.") - } + if (anyNA(peak.table$rtMin) || anyNA(peak.table$rtMax)) { + stop( + "Columns 'rtMin' and 'rtMax' in `peak.table` cannot ", + "contain NA values." + ) + } + if (!all(by %in% names(cd))) { + stop("All 'by' columns must be present in chromData(object).") + } + if (nrow(cd) != nrow(unique(cd[, by, drop = FALSE]))) { + stop( + "Combinations of 'by' columns must uniquely identify rows ", + "in chromData." + ) + } } @@ -429,44 +484,46 @@ #' - `chromExract()` #' @noRd .match_chromdata_peaktable <- function(object, peak.table, by) { - cd <- .chromData(object) - chrom_keys <- interaction(cd[, by, drop = FALSE], drop = TRUE) - peak_keys <- interaction(peak.table[, by, drop = FALSE], drop = TRUE) - - # ensure all peak.table keys exist in chromData - missing_keys <- setdiff(levels(peak_keys), levels(chrom_keys)) - if (length(missing_keys)) { - stop("Some combinations in `peak.table` do not exist in chromData: ", - paste(missing_keys, collapse = ", ")) - } - - ## Subset chromdata and only keep the row of interest. - keep_idx <- chrom_keys %in% peak_keys - object <- object[keep_idx] - chrom_keys <- droplevels(chrom_keys[keep_idx]) + cd <- .chromData(object) + chrom_keys <- interaction(cd[, by, drop = FALSE], drop = TRUE) + peak_keys <- interaction(peak.table[, by, drop = FALSE], drop = TRUE) + + # ensure all peak.table keys exist in chromData + missing_keys <- setdiff(levels(peak_keys), levels(chrom_keys)) + if (length(missing_keys)) { + stop( + "Some combinations in `peak.table` do not exist in chromData: ", + paste(missing_keys, collapse = ", ") + ) + } - # align factor levels (so splitting matches between cd and peak.table) - shared_levels <- intersect(levels(peak_keys), levels(chrom_keys)) - chrom_keys <- factor(as.character(chrom_keys), levels = shared_levels) - peak_keys <- factor(as.character(peak_keys), levels = shared_levels) + ## Subset chromdata and only keep the row of interest. + keep_idx <- chrom_keys %in% peak_keys + object <- object[keep_idx] + chrom_keys <- droplevels(chrom_keys[keep_idx]) - list(object = object, chrom_keys = chrom_keys, peak_keys = peak_keys) + # align factor levels (so splitting matches between cd and peak.table) + shared_levels <- intersect(levels(peak_keys), levels(chrom_keys)) + chrom_keys <- factor(as.character(chrom_keys), levels = shared_levels) + peak_keys <- factor(as.character(peak_keys), levels = shared_levels) + list(object = object, chrom_keys = chrom_keys, peak_keys = peak_keys) } #' Used in: #' - `chromExtract()` #' @noRd .check_overl_columns <- function(object, peak.table, required_cols) { - overl_cols <- names(peak.table) %in% chromVariables(object) - extra_cols <- setdiff(names(peak.table)[overl_cols], required_cols) - if (length(extra_cols)) { - warning( "The following columns in `peak.table` already exist in ", - "`chromData` and will be replaced in the output: ", - paste(extra_cols, collapse = ", ") - ) - } - overl_cols + overl_cols <- names(peak.table) %in% chromVariables(object) + extra_cols <- setdiff(names(peak.table)[overl_cols], required_cols) + if (length(extra_cols)) { + warning( + "The following columns in `peak.table` already exist in ", + "`chromData` and will be replaced in the output: ", + paste(extra_cols, collapse = ", ") + ) + } + overl_cols } @@ -474,131 +531,153 @@ #' - `imputePeaksData()` #' @importFrom stats approx filter loess spline dnorm sd predict #' @noRd -.impute <- function(x, method, - window = 2, span = 0.25, sd = 1) { - if (all(is.na(x))) return(x) - - na_idx <- which(is.na(x)) - if (length(na_idx) == 0) return(x) - - not_na_idx <- which(!is.na(x)) - x_out <- seq_along(x) - - x[na_idx] <- switch(method, - linear = approx(not_na_idx, x[not_na_idx], - xout = na_idx, rule = 2)$y, - - spline = spline(not_na_idx, x[not_na_idx], - xout = na_idx, method = "natural")$y, - gaussian = { - # Create symmetric Gaussian kernel - kernel_range <- -window:window - w <- dnorm(kernel_range, mean = 0, sd = sd) - w <- w / sum(w) - - # Fill missing with linear approx to allow smoothing - x_filled <- x - x_filled[is.na(x_filled)] <- approx(not_na_idx, x[not_na_idx], - xout = which(is.na(x_filled)), - rule = 2)$y - smoothed <- filter(x_filled, filter = w, sides = 2, - circular = FALSE) - smoothed[na_idx] - }, - loess = { - fit <- loess(x[not_na_idx] ~ not_na_idx, span = span) - predict(fit, newdata = na_idx) - } +.impute <- function(x, method, window = 2, span = 0.25, sd = 1) { + if (all(is.na(x))) { + return(x) + } + + na_idx <- which(is.na(x)) + if (length(na_idx) == 0) { + return(x) + } + + not_na_idx <- which(!is.na(x)) + x_out <- seq_along(x) + + x[na_idx] <- switch( + method, + linear = approx(not_na_idx, x[not_na_idx], xout = na_idx, rule = 2)$y, + + spline = spline( + not_na_idx, + x[not_na_idx], + xout = na_idx, + method = "natural" + )$y, + gaussian = { + # Create symmetric Gaussian kernel + kernel_range <- -window:window + w <- dnorm(kernel_range, mean = 0, sd = sd) + w <- w / sum(w) + + # Fill missing with linear approx to allow smoothing + x_filled <- x + x_filled[is.na(x_filled)] <- approx( + not_na_idx, + x[not_na_idx], + xout = which(is.na(x_filled)), + rule = 2 + )$y + smoothed <- filter(x_filled, filter = w, sides = 2, circular = FALSE) + smoothed[na_idx] + }, + loess = { + fit <- loess(x[not_na_idx] ~ not_na_idx, span = span) + predict(fit, newdata = na_idx) + } + ) + # Fallback for any remaining NAs + na_remaining <- is.na(x) + if (any(na_remaining)) { + warning( + "Method chosen could not fill all NAs. ", + "Falling back to linear interpolation ", + "for these positions." ) - # Fallback for any remaining NAs - na_remaining <- is.na(x) - if (any(na_remaining)) { - warning("Method chosen could not fill all NAs. ", - "Falling back to linear interpolation ", - "for these positions.") - x[na_remaining] <- approx(not_na_idx, x[not_na_idx], - xout = which(na_remaining), - rule = 2)$y - } - x + x[na_remaining] <- approx( + not_na_idx, + x[not_na_idx], + xout = which(na_remaining), + rule = 2 + )$y + } + x } + ## Used in: -## - BackendInitialize, chrombackendSPectra method +## - BackendInitialize, ChromBackendSpectra method #' @noRd .map_spectra_vars <- function(object, spectraVariables) { - ## check variable validity - spectra <- .spectra(object) - cd <- .chromData(object) - if (!all(spectraVariables %in% spectraVariables(spectra))) - stop("All 'spectraVariables' must exist in 'spectra'.") - if (any(spectraVariables %in% colnames(cd))) { - existing <- intersect(spectraVariables, colnames(cd)) - non_replaceable <- vapply(existing, function(v) !all(is.na(cd[[v]])), logical(1)) - if (any(non_replaceable)) { - stop("None of the 'spectraVariables' must already exist in 'chromData'.") - } - } - idx <- spectra$chromSpectraIndex - spd <- spectraData(spectra, columns = spectraVariables) - - ## Aggregate and simplify singletons - aggregated <- as.data.frame( - lapply(spectraVariables, function(var) { - res <- tapply(spd[[var]], idx, unique, simplify = FALSE) - ## If each element is length 1, unlist to atomic vector - if (all(lengths(res) == 1L)) { - res <- unlist(res, use.names = TRUE) - } - res - }), - stringsAsFactors = FALSE + # Check variable validity for mapping from the Spectra object stored in the internal @spectra slot + spectra <- .spectra(object) # the Spectra object from @spectra slot + cd <- .chromData(object) # the chromData data.frame from @chromData slot + if (!all(spectraVariables %in% spectraVariables(spectra))) { + stop("All 'spectraVariables' must exist in the Spectra object.") + } + if (any(spectraVariables %in% colnames(cd))) { + existing <- intersect(spectraVariables, colnames(cd)) + non_replaceable <- vapply( + existing, + function(v) !all(is.na(cd[[v]])), + logical(1) ) - names(aggregated) <- spectraVariables - - ## match order and combine - aggregated <- aggregated[as.character(cd$chromSpectraIndex), , drop = FALSE] - cd <- cbind(cd, aggregated) - rownames(cd) <- NULL - object@chromData <- cd - object + if (any(non_replaceable)) { + stop( + "None of the 'spectraVariables' must already exist in the chromData data.frame." + ) + } + } + idx <- spectra$chromSpectraIndex + spd <- spectraData(spectra, columns = spectraVariables) + + ## Aggregate and simplify singletons + aggregated <- as.data.frame( + lapply(spectraVariables, function(var) { + res <- tapply(spd[[var]], idx, unique, simplify = FALSE) + ## If each element is length 1, unlist to atomic vector + if (all(lengths(res) == 1L)) { + res <- unlist(res, use.names = TRUE) + } + res + }), + stringsAsFactors = FALSE + ) + names(aggregated) <- spectraVariables + + ## match order and combine + aggregated <- aggregated[as.character(cd$chromSpectraIndex), , drop = FALSE] + cd <- cbind(cd, aggregated) + rownames(cd) <- NULL + object@chromData <- cd + object } -## Below are internal accessors functions, these are used ubiquitously in the -## package. They directly access the slots. these are NOT to be used by general -## users. +#' Below are internal accessor functions, used ubiquitously in the package. +#' These directly access the internal slots (e.g., @chromData, @peaksData, @spectra, etc.). +#' These are NOT to be used by general users. #' @noRd .backend <- function(object) { - object@backend + object@backend } .peaksData <- function(object) { - if (is(object, "Chromatograms")) { - return(object@backend@peaksData) - } - if (is(object, "ChromBackend")) { - return(object@peaksData) - } - stop("'object' must be of class 'Chromatograms' or 'ChromBackend'.") + if (is(object, "Chromatograms")) { + return(object@backend@peaksData) + } + if (is(object, "ChromBackend")) { + return(object@peaksData) + } + stop("'object' must be of class 'Chromatograms' or 'ChromBackend'.") } .chromData <- function(object) { - if (is(object, "Chromatograms")) { - return(object@backend@chromData) - } - if (is(object, "ChromBackend")) { - return(object@chromData) - } - stop("'object' must be of class 'Chromatograms' or 'ChromBackend'.") + if (is(object, "Chromatograms")) { + return(object@backend@chromData) + } + if (is(object, "ChromBackend")) { + return(object@chromData) + } + stop("'object' must be of class 'Chromatograms' or 'ChromBackend'.") } .inMemory <- function(object) { - object@inMemory + object@inMemory } .processing <- function(object) { - object@processing + object@processing } .processingQueue <- function(object) { - object@processingQueue + object@processingQueue } .spectra <- function(object) { - object@spectra + object@spectra } diff --git a/man/ChromBackendMzR.Rd b/man/ChromBackendMzR.Rd index 324a255..c4ca328 100644 --- a/man/ChromBackendMzR.Rd +++ b/man/ChromBackendMzR.Rd @@ -34,7 +34,7 @@ file path of the mzML file from which the chromatographic data was read. Note that the \code{ChromBackendMzR} backend is read-only and does not support direct modification of chromatographic data. However, it does support -\code{peaksData} slot replacement, which will modify the \code{peaksData} slot but not +\code{peaksData} slot replacement, which will modify the \verb{@peaksData} slot but not the local mzML files. This is indicated by the "inMemory" slot being set to TRUE. diff --git a/man/ChromBackendSpectra.Rd b/man/ChromBackendSpectra.Rd index 3379882..943c096 100644 --- a/man/ChromBackendSpectra.Rd +++ b/man/ChromBackendSpectra.Rd @@ -25,12 +25,12 @@ chromSpectraIndex(object) \item{spectra}{A \code{Spectra} object.} -\item{factorize.by}{A \code{character} vector of variables for grouping \code{Spectra} -data into chromatographic data (i.e., creating separate chromatograms -for each unique combination of these variables). +\item{factorize.by}{A \code{character} vector of \code{spectraVariables} for grouping +\code{Spectra} data into chromatographic data (i.e., creating separate +chromatograms for each unique combination of these variables). Default: \code{c("msLevel", "dataOrigin")}, which creates one chromatogram per MS level per data file. -If \code{chromData} is provided, it must contain these columns.} +If \code{chromData} is provided, it \strong{must} also contain these columns.} \item{summarize.method}{A \code{character} string specifying intensity summary: \code{"sum"} (default) or \code{"max"}.} @@ -59,7 +59,7 @@ summarizing chromatographic data from \code{\link[Spectra:Spectra]{Spectra::Spec It can be initialized with a \code{Spectra} object, which is stored in the \code{spectra} slot of the backend. Users can also provide a \code{data.frame} -containing chromatographic metadata, stored in \code{chromData}. This metadata +containing chromatographic metadata, stored in \verb{@chromData}. This metadata filters the \code{Spectra} object and generates \code{peaksData}. If \code{chromData} is not provided, a default \code{data.frame} is created from the \code{Spectra} data. An "rtMin", "rtMax", "mzMin", and "mzMax" column will be created by @@ -79,7 +79,7 @@ of the \code{Spectra} object. The \code{factorize.by} parameter defines the vari for grouping \code{Spectra} data into chromatographic data. The default is \code{c("msLevel", "dataOrigin")}, which will define separate chromatograms for each combination of \code{msLevel} and \code{dataOrigin}. These variables must be in -both \code{Spectra} and \code{chromData} (if provided). +both the \code{spectraData()} of the \code{Spectra} and \code{chromData} (if provided). The \code{summarize.method} parameter defines how spectral data intensity is summarized: @@ -93,9 +93,9 @@ method must be called to update \code{chromSpectraIndex}. } \details{ No \code{peaksData} is stored until the user calls a function that generates it -(e.g., \code{rtime()}, \code{peaksData()}, \code{intensity()}). The \code{peaksData} slot +(e.g., \code{rtime()}, \code{peaksData()}, \code{intensity()}). The \verb{@peaksData} slot replacement is unsupported — modifications are temporary to optimize memory. -The \code{inMemory} slot indicates this with \code{TRUE}. +The \verb{@inMemory} slot indicates this with \code{TRUE}. \strong{Spectra Sort Index}: The \code{ChromBackendSpectra} backend maintains a \code{spectraSortIndex} slot that stores a sort order for the internal \code{Spectra} @@ -109,10 +109,10 @@ physically reorder disk-backed \code{Spectra} objects, which would require loadi all data into memory. \strong{Factorize and Subsetting}: The \code{factorize()} method updates the -\code{chromSpectraIndex} in both \code{chromData} and the \code{spectra} object to reflect +\code{chromSpectraIndex} in both \code{chromData} and the \verb{@spectra} to reflect the current grouping, and recalculates \code{spectraSortIndex} to maintain the correct sort order. The \code{[} subsetting operator properly handles subsetting -of both \code{chromData}, \code{peaksData}, and \code{spectra}, while updating the +of both \verb{@chromData}, \verb{@peaksData}, and \verb{@spectra}, while updating the \code{spectraSortIndex} to reference valid positions in the subsetted data. \code{ChromBackendSpectra} should reuse \code{ChromBackendMemory} methods whenever diff --git a/man/Chromatograms.Rd b/man/Chromatograms.Rd index 334d738..6368167 100644 --- a/man/Chromatograms.Rd +++ b/man/Chromatograms.Rd @@ -173,14 +173,14 @@ is one chromatogram. The \emph{chromatograms variables} information in the \code{Chromatograms} object can be accessed using the \code{chromData()} function. Specific chromatograms variables can be accessed by either precising the \code{"columns"} parameter in -\code{chromData()} or using \code{$}. \code{chromData} can be accessed, replaced but +\code{chromData()} or using \code{$}. \verb{@chromData} can be accessed, replaced but also filtered/subsetted. Refer to the \link{chromData} documentation for more details. The \emph{peaks data variables} information in the \code{Chromatograms} object can be accessed using the \code{peaksData()} function. Specific peaks variables can be accessed by either precising the \code{"columns"} parameter in \code{peaksData()} or -using \code{$}. \code{peaksData} can be accessed, replaced but also +using \code{$}. \verb{@peaksData} can be accessed, replaced but also filtered/subsetted. Refer to the \link{peaksData} documentation for more details. } @@ -189,7 +189,7 @@ filtered/subsetted. Refer to the \link{peaksData} documentation for more details Functions that process the chromatograms data in some ways can be applied to the object either directly or by using the \code{processingQueue} mechanism. The -\code{processingQueue} is a list of processing steps that are stored within the +\verb{@processingQueue} is a list of processing steps that are stored within the object and only applied when needed. This was created so that the data can be processed in a single step and is very useful for larger datasets. This is even more true as this processing queue will call function that can be diff --git a/man/chromData.Rd b/man/chromData.Rd index 70290d8..ba82289 100644 --- a/man/chromData.Rd +++ b/man/chromData.Rd @@ -180,7 +180,7 @@ The \emph{chromatograms variables} information can be accessed using the \code{chromData()} function. it is also possible to access specific chromatograms variables using \code{$}. -\code{chromData} can be accessed, replaced but also filtered/subsetted. Refer to +\verb{@chromData} can be accessed, replaced but also filtered/subsetted. Refer to the sections below for more details. } \section{Chromatograms variables and accessor functions}{ @@ -221,9 +221,9 @@ the product's isolation window. Functions that filter \code{Chromatograms} based on chromatograms variables -(i.e, \code{chromData} ) will remove chromatographic data that do not meet the +(i.e, \verb{@chromData} ) will remove chromatographic data that do not meet the specified conditions. This means that if a chromatogram is filtered out, its -corresponding \code{chromData} and \code{peaksData} will be removed from the object +corresponding \verb{@chromData} and \verb{@peaksData} will be removed from the object immediately. The available functions to filter chromatogram data are: diff --git a/man/peaksData.Rd b/man/peaksData.Rd index ae8e656..4c8438f 100644 --- a/man/peaksData.Rd +++ b/man/peaksData.Rd @@ -134,7 +134,7 @@ to the sections below for more details. \section{Filter Peaks Variables}{ -Functions that filter a \code{Chromatograms}'s peaks data (i.e., \code{peaksData}). +Functions that filter a \code{Chromatograms}'s peaks data (i.e., \verb{@peaksData}). These functions remove peaks data that do not meet the specified conditions. If a chromatogram in a \code{Chromatograms} object is filtered, only the corresponding peaks variable pairs (i.e., rows) in the @@ -154,7 +154,7 @@ in the object, but it removes the specified peaks data (e.g., "rtime" and In the case of a read-only backend, (such as the \link{ChromBackendMzR}), the replacement of the peaks data is not possible. The peaks data can be filtered, but the filtered data will not be saved in the backend. This means -the original mzml files will not be affected by computations performed on +the original mzML files will not be affected by computations performed on the \link{Chromatograms}. } @@ -162,7 +162,7 @@ the \link{Chromatograms}. \code{imputePeaksData} will impute missing values in a \code{Chromatograms}'s peaks data -(i.e., \code{peaksData}). This functions replace missing peaks data values with +(i.e., \verb{@peaksData}). This functions replace missing peaks data values with specified imputation methods using various methods such as linear interpolation, spline interpolation, Gaussian kernel smoothing, or LOESS smoothing. This method modifies the peaks data in place and returns the diff --git a/vignettes/using-a-chromatograms-object.Rmd b/vignettes/using-a-chromatograms-object.Rmd index 74fd3ad..e1ac555 100644 --- a/vignettes/using-a-chromatograms-object.Rmd +++ b/vignettes/using-a-chromatograms-object.Rmd @@ -378,8 +378,9 @@ the groupings. This can be done using the `factorize()` function: chr_s$msLevel <- rep(2L, length(chr_s)) ## Re-factorize to update the groupings -chr_s <- factorize(chr_s@backend) -chr_s <- Chromatograms(chr_s) +chr_s <- factorize(chr_s) + +chromData(chr_s) ``` This recalculates which spectra belong to which chromatograms based on the @@ -604,7 +605,7 @@ By default, the `factorize.by` parameter is set to `c("msLevel", "dataOrigin")`, which means: - All MS1 spectra from file "A" → Chromatogram 1 -- All MS2 spectra from file "A" → Chromatogram 2 +- All MS2 spectra from file "A" → Chromatogram 2 - All MS1 spectra from file "B" → Chromatogram 3 - All MS2 spectra from file "B" → Chromatogram 4 @@ -686,7 +687,7 @@ peak_table <- data.frame( ) ## Extract those regions -chr_extracted <- chromExtract(chr, peak_table, +chr_extracted <- chromExtract(chr, peak_table, by = c("msLevel", "chromIndex")) chr_extracted @@ -718,7 +719,7 @@ peak_table_mz <- data.frame( ) ## Extract EICs for these features -chr_eics <- chromExtract(chr_s, peak_table_mz, +chr_eics <- chromExtract(chr_s, peak_table_mz, by = c("msLevel", "dataOrigin")) chr_eics @@ -780,7 +781,7 @@ Now let's examine the raw data and apply different imputation methods: ## Create copies for comparison chr_linear <- imputePeaksData(chr_eic, method = "linear") chr_spline <- imputePeaksData(chr_eic, method = "spline") -chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian", +chr_gaussian <- imputePeaksData(chr_eic, method = "gaussian", window = 5, sd = 2) chr_loess <- imputePeaksData(chr_eic, method = "loess", span = 0.3) From 5ee9cc4dc0c36427718ec70609e2d1f3beda4ae9 Mon Sep 17 00:00:00 2001 From: Philippine Louail <127301965+philouail@users.noreply.github.com> Date: Thu, 22 Jan 2026 19:05:07 +0100 Subject: [PATCH 9/9] fix test --- tests/testthat/test_Chromatograms.R | 408 +++++----- tests/testthat/test_helpers.R | 1150 ++++++++++++++------------- 2 files changed, 807 insertions(+), 751 deletions(-) diff --git a/tests/testthat/test_Chromatograms.R b/tests/testthat/test_Chromatograms.R index 64fc66f..4109f5f 100644 --- a/tests/testthat/test_Chromatograms.R +++ b/tests/testthat/test_Chromatograms.R @@ -1,97 +1,107 @@ test_that("Chromatograms works", { - ## empty object - expect_true(is(.backend(c_empty), "ChromBackendMemory")) - expect_true(is(c_empty, "Chromatograms")) - expect_true(processingChunkSize(c_empty)== Inf) - expect_true(c_empty@version == "0.1") - expect_identical(.processingQueue(c_empty), list()) - - ## object with backend - expect_true(is(.backend(c_full), "ChromBackendMemory")) - expect_true(is(c_full, "Chromatograms")) - expect_true(processingChunkSize(c_full) == Inf) - expect_true(c_full@version == "0.1") - expect_identical(.processingQueue(c_full), list()) - - expect_equal(processingChunkSize(c_full), Inf) - c_chunk <- c_full - processingChunkSize(c_chunk) <- 2 - expect_equal(processingChunkSize(c_chunk), 2) - expect_equal(levels(processingChunkFactor(c_chunk)), c("1", "2")) - - ## method with Spectra works - c_sp <- Chromatograms(s[1:2]) - expect_true(is(.backend(c_sp), "ChromBackendSpectra")) - expect_true(is(c_sp, "Chromatograms")) - expect_true(processingChunkSize(c_sp) == Inf) - expect_true(c_sp@version == "0.1") - expect_identical(.processingQueue(c_sp), list()) + ## empty object + expect_true(is(.backend(c_empty), "ChromBackendMemory")) + expect_true(is(c_empty, "Chromatograms")) + expect_true(processingChunkSize(c_empty) == Inf) + expect_true(c_empty@version == "0.1") + expect_identical(.processingQueue(c_empty), list()) + + ## object with backend + expect_true(is(.backend(c_full), "ChromBackendMemory")) + expect_true(is(c_full, "Chromatograms")) + expect_true(processingChunkSize(c_full) == Inf) + expect_true(c_full@version == "0.1") + expect_identical(.processingQueue(c_full), list()) + + expect_equal(processingChunkSize(c_full), Inf) + c_chunk <- c_full + processingChunkSize(c_chunk) <- 2 + expect_equal(processingChunkSize(c_chunk), 2) + expect_equal(levels(processingChunkFactor(c_chunk)), c("1", "2")) + + ## method with Spectra works + c_sp <- Chromatograms(s[1:2]) + expect_true(is(.backend(c_sp), "ChromBackendSpectra")) + expect_true(is(c_sp, "Chromatograms")) + expect_true(processingChunkSize(c_sp) == Inf) + expect_true(c_sp@version == "0.1") + expect_identical(.processingQueue(c_sp), list()) }) test_that("Chromatograms constructor from Spectra works with all parameters", { - ## Basic construction with defaults - chr <- Chromatograms(s) - expect_s4_class(chr, "Chromatograms") - expect_s4_class(.backend(chr), "ChromBackendSpectra") - expect_equal(length(chr), 3L) - - ## With summarize.method = "sum" (default) - chr_sum <- Chromatograms(s, summarize.method = "sum") - expect_s4_class(chr_sum, "Chromatograms") - expect_identical(.backend(chr_sum)@summaryFun, sumi) - - ## With summarize.method = "max" - chr_max <- Chromatograms(s, summarize.method = "max") - expect_s4_class(chr_max, "Chromatograms") - expect_identical(.backend(chr_max)@summaryFun, maxi) - - ## With empty chromData (should create default) - chr_empty_cd <- Chromatograms(s, chromData = data.frame()) - expect_s4_class(chr_empty_cd, "Chromatograms") - expect_true(nrow(chromData(chr_empty_cd)) > 0) - expect_true(all(names(coreChromVariables()) %in% - chromVariables(chr_empty_cd))) - - ## With custom chromData - custom_cd <- data.frame( - msLevel = 1L, - dataOrigin = unique(s$dataOrigin), - customCol = "test" - ) - chr_custom <- Chromatograms(s, chromData = custom_cd) - expect_s4_class(chr_custom, "Chromatograms") - expect_true("customCol" %in% colnames(chromData(chr_custom))) - expect_equal(chromData(chr_custom)$customCol, rep("test", length(unique(s$dataOrigin)))) - - ## With custom factorize.by - chr_factby <- Chromatograms(s, factorize.by = "dataOrigin") - expect_s4_class(chr_factby, "Chromatograms") - expect_true(all(chromData(chr_factby)$dataOrigin == - chromData(chr_factby)$chromSpectraIndex)) - - ## With spectraVariables - expect_error(Chromatograms(s, spectraVariables = "polarity"), - "must already exist in 'chromData'") - - - chr_specvars <- Chromatograms(s, spectraVariables = c("precursorMz")) - expect_s4_class(chr_specvars, "Chromatograms") - if ("precursorMz" %in% Spectra::spectraVariables(s)) { - expect_true("precursorMz" %in% colnames(chromData(chr_specvars))) - } - - ## spectraVariables should replace all-NA columns in provided chromData - sp_small <- Spectra::Spectra(S4Vectors::DataFrame( - rtime = c(1, 2), - msLevel = c(1L, 1L), - dataOrigin = c("A", "A"), - polarity = c(1L, 1L) - )) - cd_na <- data.frame(msLevel = 1L, dataOrigin = "A", polarity = NA_integer_) - chr_specvars_replace <- Chromatograms(sp_small, chromData = cd_na, - spectraVariables = c("polarity")) - expect_s4_class(chr_specvars_replace, "Chromatograms") - expect_identical(chromData(chr_specvars_replace)$polarity, 1L) + ## Basic construction with defaults + chr <- Chromatograms(s) + expect_s4_class(chr, "Chromatograms") + expect_s4_class(.backend(chr), "ChromBackendSpectra") + expect_equal(length(chr), 3L) + + ## With summarize.method = "sum" (default) + chr_sum <- Chromatograms(s, summarize.method = "sum") + expect_s4_class(chr_sum, "Chromatograms") + expect_identical(.backend(chr_sum)@summaryFun, sumi) + + ## With summarize.method = "max" + chr_max <- Chromatograms(s, summarize.method = "max") + expect_s4_class(chr_max, "Chromatograms") + expect_identical(.backend(chr_max)@summaryFun, maxi) + + ## With empty chromData (should create default) + chr_empty_cd <- Chromatograms(s, chromData = data.frame()) + expect_s4_class(chr_empty_cd, "Chromatograms") + expect_true(nrow(chromData(chr_empty_cd)) > 0) + expect_true(all( + names(coreChromVariables()) %in% + chromVariables(chr_empty_cd) + )) + + ## With custom chromData + custom_cd <- data.frame( + msLevel = 1L, + dataOrigin = unique(s$dataOrigin), + customCol = "test" + ) + chr_custom <- Chromatograms(s, chromData = custom_cd) + expect_s4_class(chr_custom, "Chromatograms") + expect_true("customCol" %in% colnames(chromData(chr_custom))) + expect_equal( + chromData(chr_custom)$customCol, + rep("test", length(unique(s$dataOrigin))) + ) + + ## With custom factorize.by + chr_factby <- Chromatograms(s, factorize.by = "dataOrigin") + expect_s4_class(chr_factby, "Chromatograms") + expect_true(all( + chromData(chr_factby)$dataOrigin == chromData(chr_factby)$chromSpectraIndex + )) + + ## With spectraVariables + expect_error( + Chromatograms(s, spectraVariables = "polarity"), + "must already exist in the chromData" + ) + + chr_specvars <- Chromatograms(s, spectraVariables = c("precursorMz")) + expect_s4_class(chr_specvars, "Chromatograms") + if ("precursorMz" %in% Spectra::spectraVariables(s)) { + expect_true("precursorMz" %in% colnames(chromData(chr_specvars))) + } + + ## spectraVariables should replace all-NA columns in provided chromData + sp_small <- Spectra::Spectra(S4Vectors::DataFrame( + rtime = c(1, 2), + msLevel = c(1L, 1L), + dataOrigin = c("A", "A"), + polarity = c(1L, 1L) + )) + cd_na <- data.frame(msLevel = 1L, dataOrigin = "A", polarity = NA_integer_) + chr_specvars_replace <- Chromatograms( + sp_small, + chromData = cd_na, + spectraVariables = c("polarity") + ) + expect_s4_class(chr_specvars_replace, "Chromatograms") + expect_identical(chromData(chr_specvars_replace)$polarity, 1L) }) test_that("Chromatograms constructor from ChromBackend works", { @@ -100,27 +110,35 @@ test_that("Chromatograms constructor from ChromBackend works", { expect_s4_class(chr_mem, "Chromatograms") expect_s4_class(.backend(chr_mem), "ChromBackendMemory") expect_equal(length(chr_mem), length(be)) - + ## From empty ChromBackendMemory with chromData and peaksData parameters cdata <- data.frame( - msLevel = c(1L, 1L, 1L), - mz = c(112.2, 123.3, 134.4), - dataOrigin = c("mem1", "mem1", "mem1") + msLevel = c(1L, 1L, 1L), + mz = c(112.2, 123.3, 134.4), + dataOrigin = c("mem1", "mem1", "mem1") ) pdata <- list( - data.frame(rtime = c(2.1, 2.5, 3.0, 3.4, 3.9), - intensity = c(100, 250, 400, 300, 150)), - data.frame(rtime = numeric(), intensity = numeric()), - data.frame(rtime = c(5.1, 5.8, 6.3, 6.9, 7.5), - intensity = c(80, 500, 1200, 600, 120)) + data.frame( + rtime = c(2.1, 2.5, 3.0, 3.4, 3.9), + intensity = c(100, 250, 400, 300, 150) + ), + data.frame(rtime = numeric(), intensity = numeric()), + data.frame( + rtime = c(5.1, 5.8, 6.3, 6.9, 7.5), + intensity = c(80, 500, 1200, 600, 120) + ) + ) + chr <- Chromatograms( + ChromBackendMemory(), + chromData = cdata, + peaksData = pdata ) - chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata) expect_s4_class(chr, "Chromatograms") expect_s4_class(.backend(chr), "ChromBackendMemory") expect_equal(length(chr), 3L) expect_identical(chromData(chr)$mz, cdata$mz) expect_identical(peaksData(chr), pdata) - + ## From ChromBackendMzR chr_mzr <- Chromatograms(be_mzr) expect_s4_class(chr_mzr, "Chromatograms") @@ -143,125 +161,125 @@ test_that("Chromatograms constructor from ChromBackend works", { }) test_that("Chromatograms constructor handles edge cases", { - ## Empty Spectra - empty_s <- Spectra() - chr_empty <- Chromatograms(empty_s) - expect_s4_class(chr_empty, "Chromatograms") - expect_equal(length(chr_empty), 0) - - ## Missing object (creates empty ChromBackendMemory) - chr_missing <- Chromatograms() - expect_s4_class(chr_missing, "Chromatograms") - expect_s4_class(.backend(chr_missing), "ChromBackendMemory") - expect_equal(length(chr_missing), 0) + ## Empty Spectra + empty_s <- Spectra() + chr_empty <- Chromatograms(empty_s) + expect_s4_class(chr_empty, "Chromatograms") + expect_equal(length(chr_empty), 0) + + ## Missing object (creates empty ChromBackendMemory) + chr_missing <- Chromatograms() + expect_s4_class(chr_missing, "Chromatograms") + expect_s4_class(.backend(chr_missing), "ChromBackendMemory") + expect_equal(length(chr_missing), 0) }) test_that("show, Chromatograms - ChromBackendMemory works", { - expect_output(show(c_full), "ChromBackendMemory") - res <- c_full - res@processing <- c("a", "b", "c", "d") - expect_output(show(res), "1 more processings") - res@processingQueue <- list("a", "b", "c", "d") - expect_output(show(res), "4 processing step") + expect_output(show(c_full), "ChromBackendMemory") + res <- c_full + res@processing <- c("a", "b", "c", "d") + expect_output(show(res), "1 more processings") + res@processingQueue <- list("a", "b", "c", "d") + expect_output(show(res), "4 processing step") }) test_that("show, Chromatograms - ChromBackendMzR works", { - expect_output(show(c_mzr), "ChromBackendMzR") - res <- c_mzr - res@processing <- c("a", "b", "c", "d") - expect_output(show(res), "1 more processings") - res@processingQueue <- list("a", "b", "c", "d") - expect_output(show(res), "4 processing step") + expect_output(show(c_mzr), "ChromBackendMzR") + res <- c_mzr + res@processing <- c("a", "b", "c", "d") + expect_output(show(res), "1 more processings") + res@processingQueue <- list("a", "b", "c", "d") + expect_output(show(res), "4 processing step") }) test_that("setBackend works correctly", { - c_mzr_new <- setBackend(c_mzr, backend = ChromBackendMemory()) - expect_s4_class(.backend(c_mzr_new), "ChromBackendMemory") - expect_identical(chromData(c_mzr_new), chromData(c_mzr)) - expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) - expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) - - processingChunkSize(c_mzr) <- 100 - f <- processingChunkFactor(c_mzr) - expect_true(length(levels(f)) > 1) - c_mzr_new <- setBackend(c_mzr, backend = ChromBackendMemory(), f = f) - expect_s4_class(.backend(c_mzr_new), "ChromBackendMemory") - expect_identical(chromData(c_mzr_new), chromData(c_mzr)) - expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) - expect_identical(.peaksData(c_mzr_new), peaksData(c_mzr)) - - expect_error( - setBackend(c_mzr, backend = ChromBackendMzR()), - "does not support" - ) + c_mzr_new <- setBackend(c_mzr, backend = ChromBackendMemory()) + expect_s4_class(.backend(c_mzr_new), "ChromBackendMemory") + expect_identical(chromData(c_mzr_new), chromData(c_mzr)) + expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) + expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) - c_sp_new <- setBackend(c_sp, backend = ChromBackendMemory()) - expect_true(!all(c("rtMin", "rtMax") %in% colnames(chromData(c_sp_new)))) + processingChunkSize(c_mzr) <- 100 + f <- processingChunkFactor(c_mzr) + expect_true(length(levels(f)) > 1) + c_mzr_new <- setBackend(c_mzr, backend = ChromBackendMemory(), f = f) + expect_s4_class(.backend(c_mzr_new), "ChromBackendMemory") + expect_identical(chromData(c_mzr_new), chromData(c_mzr)) + expect_identical(peaksData(c_mzr_new), peaksData(c_mzr)) + expect_identical(.peaksData(c_mzr_new), peaksData(c_mzr)) + + expect_error( + setBackend(c_mzr, backend = ChromBackendMzR()), + "does not support" + ) + + c_sp_new <- setBackend(c_sp, backend = ChromBackendMemory()) + expect_true(!all(c("rtMin", "rtMax") %in% colnames(chromData(c_sp_new)))) }) test_that("$ works correctly", { - expect_identical(msLevel(c_full), c_full$msLevel) - expect_identical(chromIndex(c_mzr), c_mzr$chromIndex) - expect_identical(intensity(c_full), c_full$intensity) - expect_identical(intensity(c_mzr), c_mzr$intensity) - tmp <- c_full - tmp$msLevel <- c(2L, 2L, 3L) - expect_identical(msLevel(tmp), c(2L, 2L, 3L)) - tmp$intensity <- lapply(tmp$intensity, function(x) x + 10) - expect_false(identical(intensity(tmp), intensity(c_full))) + expect_identical(msLevel(c_full), c_full$msLevel) + expect_identical(chromIndex(c_mzr), c_mzr$chromIndex) + expect_identical(intensity(c_full), c_full$intensity) + expect_identical(intensity(c_mzr), c_mzr$intensity) + tmp <- c_full + tmp$msLevel <- c(2L, 2L, 3L) + expect_identical(msLevel(tmp), c(2L, 2L, 3L)) + tmp$intensity <- lapply(tmp$intensity, function(x) x + 10) + expect_false(identical(intensity(tmp), intensity(c_full))) }) test_that("[ works correctly", { - c_sub <- c_full[1:2] - expect_true(is(c_sub, "Chromatograms")) - expect_equal(nrow(chromData(c_sub)), 2) - expect_equal(length(peaksData(c_sub)), 2) - expect_error(c_full[1:2, 1], "by columns is not") - - c_sub <- c_full[1] - expect_equal(c_sub, c_sub[]) + c_sub <- c_full[1:2] + expect_true(is(c_sub, "Chromatograms")) + expect_equal(nrow(chromData(c_sub)), 2) + expect_equal(length(peaksData(c_sub)), 2) + expect_error(c_full[1:2, 1], "by columns is not") + + c_sub <- c_full[1] + expect_equal(c_sub, c_sub[]) }) test_that("[[ works properly", { - expect_error(c_full[[1]], "character") - expect_error(c_full[["test", 1]], "not supported") - expect_error(c_full[["test"]], "No variable") - expect_equal(c_full[["msLevel"]], msLevel(c_full)) - expect_equal(c_full[["msLevel"]], .backend(c_full)[["msLevel"]]) - - ## replace - expect_error(c_full[[1]] <- 1, "character defining the chromatogram") - expect_error(c_full[["msLevel", 4]] <- 1, "not supported") - expect_error(c_full[["test"]] <- 1, "No variable") - repet <- c_full - repet[["msLevel"]] <- rep(2L, length(repet)) - expect_false(identical(msLevel(repet), msLevel(c_full))) + expect_error(c_full[[1]], "character") + expect_error(c_full[["test", 1]], "not supported") + expect_error(c_full[["test"]], "No variable") + expect_equal(c_full[["msLevel"]], msLevel(c_full)) + expect_equal(c_full[["msLevel"]], .backend(c_full)[["msLevel"]]) + + ## replace + expect_error(c_full[[1]] <- 1, "character defining the chromatogram") + expect_error(c_full[["msLevel", 4]] <- 1, "not supported") + expect_error(c_full[["test"]] <- 1, "No variable") + repet <- c_full + repet[["msLevel"]] <- rep(2L, length(repet)) + expect_false(identical(msLevel(repet), msLevel(c_full))) }) test_that("factorize() works", { - tmp <- c_sp - tmp$msLevel <- c(1L, 2L, 3L) - idx_before <- chromSpectraIndex(.backend(tmp)) - tmp <- factorize(tmp) - idx_after <- chromSpectraIndex(.backend(tmp)) - expect_false(identical(idx_before, idx_after)) + tmp <- c_sp + tmp$msLevel <- c(1L, 2L, 3L) + idx_before <- chromSpectraIndex(.backend(tmp)) + tmp <- factorize(tmp) + idx_after <- chromSpectraIndex(.backend(tmp)) + expect_false(identical(idx_before, idx_after)) }) test_that("chromExtract, Chromatograms works correctly", { - peak.table <- data.frame( - msLevel = 1L, - dataOrigin = "mem1", - rtMin = 12.5, - rtMax = 14.0 - ) - res <- chromExtract(c_full, peak.table, by = c("msLevel", "dataOrigin")) - expect_s4_class(res, "Chromatograms") - expect_true(length(res) == nrow(peak.table)) - - pk_nomatch <- transform(peak.table, dataOrigin = "no_such_sample") - expect_error(chromExtract(c_full, pk_nomatch, - by = c("msLevel", "dataOrigin")), - "Some combinations in") + peak.table <- data.frame( + msLevel = 1L, + dataOrigin = "mem1", + rtMin = 12.5, + rtMax = 14.0 + ) + res <- chromExtract(c_full, peak.table, by = c("msLevel", "dataOrigin")) + expect_s4_class(res, "Chromatograms") + expect_true(length(res) == nrow(peak.table)) + pk_nomatch <- transform(peak.table, dataOrigin = "no_such_sample") + expect_error( + chromExtract(c_full, pk_nomatch, by = c("msLevel", "dataOrigin")), + "Some combinations in" + ) }) diff --git a/tests/testthat/test_helpers.R b/tests/testthat/test_helpers.R index a5d8d2f..81c9449 100644 --- a/tests/testthat/test_helpers.R +++ b/tests/testthat/test_helpers.R @@ -1,599 +1,637 @@ test_that(".df_combine works as expected", { - combined_backend <- .df_combine(list(be, be_cd)) - expect_equal(nrow(combined_backend@chromData), nrow(cdata) + nrow(cdata)) - expect_equal( - length(.peaksData(combined_backend)), - length(.peaksData(be)) + length(.peaksData(be_cd)) - ) - - expect_equal(.df_combine(list(be)), be) - incompatible_data <- list( - data.frame( - rtime = c(10.0, 12.0), intensity = c(200, 150), - other_col = c("test", "test") - ), - data.frame( - rtime = c(30.1, 31.2), intensity = c(110, 90), - other_col = c("test", "test") - ) - ) - be_incompatible <- backendInitialize(be_empty, - chromData = cdata, - peaksData = incompatible_data - ) - - expect_error( - .df_combine(c(be, be_incompatible)), - "Provided objects have different sets of peak variables." - ) - - setClass("DummyBackend", - contains = "ChromBackend" - ) - dm <- new("DummyBackend") - - expect_error( - .df_combine(c(be, dm)), - "merge backends of the same type" - ) + combined_backend <- .df_combine(list(be, be_cd)) + expect_equal(nrow(combined_backend@chromData), nrow(cdata) + nrow(cdata)) + expect_equal( + length(.peaksData(combined_backend)), + length(.peaksData(be)) + length(.peaksData(be_cd)) + ) + + expect_equal(.df_combine(list(be)), be) + incompatible_data <- list( + data.frame( + rtime = c(10.0, 12.0), + intensity = c(200, 150), + other_col = c("test", "test") + ), + data.frame( + rtime = c(30.1, 31.2), + intensity = c(110, 90), + other_col = c("test", "test") + ) + ) + be_incompatible <- backendInitialize( + be_empty, + chromData = cdata, + peaksData = incompatible_data + ) + + expect_error( + .df_combine(c(be, be_incompatible)), + "Provided objects have different sets of peak variables." + ) + + setClass("DummyBackend", contains = "ChromBackend") + dm <- new("DummyBackend") + + expect_error( + .df_combine(c(be, dm)), + "merge backends of the same type" + ) }) test_that(".filter_ranges helper function works correctly", { - query <- data.frame( - mz = c(112.2, 123.3, 134.4), - chromIndex = c(1L, 2L, 3L) - ) - ranges <- numeric() - expect_error(.filter_ranges(query, ranges, match = "any"), "needs to be") - ranges <- c(1, 2, 3) - expect_error( - .filter_ranges(query, ranges, match = "any"), - "Length of 'ranges'" - ) - - # All ranges match (full data retained) - ranges <- c(100, 200, 1, 3) - res <- .filter_ranges(query, ranges, match = "all") - expect_equal(res, c(1, 2, 3)) - res <- .filter_ranges(query, ranges, match = "any") - expect_equal(res, c(1, 2, 3)) - - # No matches (empty result) - ranges <- c(500, 600, 4, 5) - res <- .filter_ranges(query, ranges, match = "any") - expect_equal(res, integer(0)) - - # Partially overlapping ranges - ranges <- c(120, 130, 4, 5) - res <- .filter_ranges(query, ranges, match = "any") - expect_equal(res, 2) - res <- .filter_ranges(query, ranges, match = "all") - expect_equal(res, integer(0)) - - # Single row edge case - query_single <- data.frame(mz = 125.0, chromIndex = 2L) - ranges <- c(120, 130, 1, 3) - res <- .filter_ranges(query_single, ranges, match = "all") - expect_equal(res, 1) - res <- .filter_ranges(query_single, ranges, match = "any") - expect_equal(res, 1) - - # Edge case: empty query - query_empty <- data.frame(mz = numeric(), chromIndex = integer()) - ranges <- c(100, 200, 1, 3) - res <- .filter_ranges(query_empty, ranges, match = "all") - expect_equal(res, integer(0)) - res <- .filter_ranges(query_empty, ranges, match = "any") - expect_equal(res, integer(0)) + query <- data.frame( + mz = c(112.2, 123.3, 134.4), + chromIndex = c(1L, 2L, 3L) + ) + ranges <- numeric() + expect_error(.filter_ranges(query, ranges, match = "any"), "needs to be") + ranges <- c(1, 2, 3) + expect_error( + .filter_ranges(query, ranges, match = "any"), + "Length of 'ranges'" + ) + + # All ranges match (full data retained) + ranges <- c(100, 200, 1, 3) + res <- .filter_ranges(query, ranges, match = "all") + expect_equal(res, c(1, 2, 3)) + res <- .filter_ranges(query, ranges, match = "any") + expect_equal(res, c(1, 2, 3)) + + # No matches (empty result) + ranges <- c(500, 600, 4, 5) + res <- .filter_ranges(query, ranges, match = "any") + expect_equal(res, integer(0)) + + # Partially overlapping ranges + ranges <- c(120, 130, 4, 5) + res <- .filter_ranges(query, ranges, match = "any") + expect_equal(res, 2) + res <- .filter_ranges(query, ranges, match = "all") + expect_equal(res, integer(0)) + + # Single row edge case + query_single <- data.frame(mz = 125.0, chromIndex = 2L) + ranges <- c(120, 130, 1, 3) + res <- .filter_ranges(query_single, ranges, match = "all") + expect_equal(res, 1) + res <- .filter_ranges(query_single, ranges, match = "any") + expect_equal(res, 1) + + # Edge case: empty query + query_empty <- data.frame(mz = numeric(), chromIndex = integer()) + ranges <- c(100, 200, 1, 3) + res <- .filter_ranges(query_empty, ranges, match = "all") + expect_equal(res, integer(0)) + res <- .filter_ranges(query_empty, ranges, match = "any") + expect_equal(res, integer(0)) }) test_that(".check_column_order_and_types works", { - df_valid <- data.frame( - rtime = c(1, 2, 3), - intensity = c(10, 20, 30) - ) - df_invalid_order <- data.frame( - intensity = c(10, 20, 30), - rtime = c(1, 2, 3) - ) - df_invalid_type <- data.frame( - rtime = c("a", "b", "c"), - intensity = c(10, 20, 30) - ) - df_empty <- data.frame(rtime = numeric(), intensity = numeric()) - - expect_null(.check_column_order_and_types( - df_valid, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - )) - expect_equal( - .check_column_order_and_types( - df_invalid_order, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - ), - "Columns should be in the order 'rtime', 'intensity'." - ) - expect_equal( - .check_column_order_and_types( - df_invalid_type, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - ), - "The peaksData variable(s) rtime have the wrong data type." - ) - expect_null(.check_column_order_and_types( - df_empty, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - )) + df_valid <- data.frame( + rtime = c(1, 2, 3), + intensity = c(10, 20, 30) + ) + df_invalid_order <- data.frame( + intensity = c(10, 20, 30), + rtime = c(1, 2, 3) + ) + df_invalid_type <- data.frame( + rtime = c("a", "b", "c"), + intensity = c(10, 20, 30) + ) + df_empty <- data.frame(rtime = numeric(), intensity = numeric()) + + expect_null(.check_column_order_and_types( + df_valid, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + )) + expect_equal( + .check_column_order_and_types( + df_invalid_order, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + ), + "Columns should be in the order 'rtime', 'intensity'." + ) + expect_equal( + .check_column_order_and_types( + df_invalid_type, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + ), + "The peaksData variable(s) rtime have the wrong data type." + ) + expect_null(.check_column_order_and_types( + df_empty, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + )) }) test_that(".check_rtime works", { - df_valid <- data.frame( - rtime = c(1, 2, 3), - intensity = c(10, 20, 30) - ) - df_invalid_na <- data.frame( - rtime = c(1, NA, 3), - intensity = c(10, 20, 30) - ) - df_invalid_increasing <- data.frame( - rtime = c(3, 2, 1), - intensity = c(10, 20, 30) - ) - df_empty <- data.frame(rtime = numeric(), intensity = numeric()) - - expect_null(.check_rtime(df_valid)) - expect_equal( - .check_rtime(df_invalid_na), - "'rtime' column contains NA values." - ) - expect_equal( - .check_rtime(df_invalid_increasing), - "'rtime' column is not strictly increasing." - ) - expect_null(.check_rtime(df_empty)) + df_valid <- data.frame( + rtime = c(1, 2, 3), + intensity = c(10, 20, 30) + ) + df_invalid_na <- data.frame( + rtime = c(1, NA, 3), + intensity = c(10, 20, 30) + ) + df_invalid_increasing <- data.frame( + rtime = c(3, 2, 1), + intensity = c(10, 20, 30) + ) + df_empty <- data.frame(rtime = numeric(), intensity = numeric()) + + expect_null(.check_rtime(df_valid)) + expect_equal( + .check_rtime(df_invalid_na), + "'rtime' column contains NA values." + ) + expect_equal( + .check_rtime(df_invalid_increasing), + "'rtime' column is not strictly increasing." + ) + expect_null(.check_rtime(df_empty)) }) test_that(".validate_entry works", { - df_valid <- data.frame(rtime = c(1, 2, 3), intensity = c(10, 20, 30)) - df_empty <- data.frame(rtime = numeric(), intensity = numeric()) - - expect_null(.validate_entry( - df_valid, 1, names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - )) - expect_null(.validate_entry( - df_empty, 3, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - )) - expect_equal( - .validate_entry( - c(1, 2, 3), 1, - names(.CORE_PEAKS_VARIABLES), - .CORE_PEAKS_VARIABLES - ), - "Entry 1: all 'peaksData' entries should be of class 'data.frame'" - ) + df_valid <- data.frame(rtime = c(1, 2, 3), intensity = c(10, 20, 30)) + df_empty <- data.frame(rtime = numeric(), intensity = numeric()) + + expect_null(.validate_entry( + df_valid, + 1, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + )) + expect_null(.validate_entry( + df_empty, + 3, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + )) + expect_equal( + .validate_entry( + c(1, 2, 3), + 1, + names(.CORE_PEAKS_VARIABLES), + .CORE_PEAKS_VARIABLES + ), + "Entry 1: all 'peaksData' entries should be of class 'data.frame'" + ) }) test_that(".run_process_queue ChromBackendMemory work", { - result <- .run_process_queue(c_empty@backend, - f = processingChunkFactor(c_empty), - queue = c_empty@processingQueue - ) - expect_equal(result, c_empty@backend) - - result <- .run_process_queue(c_full@backend, - f = processingChunkFactor(c_full), - queue = c_full@processingQueue - ) - expect_equal(result, c_full@backend) - - c_queued <- filterPeaksData(c_full, - variables = c("rtime"), - ranges = c(12.5, 45.5) - ) - c_queued <- filterPeaksData(c_queued, - variables = c("intensity"), - ranges = c(100, 200) - ) - - # this test for f = factor() and queue >1 - result <- .run_process_queue(c_queued@backend, - f = processingChunkFactor(c_queued), - queue = c_queued@processingQueue - ) - expect_true(inherits(result, "ChromBackend")) - peaks_result <- peaksData(result) - expect_equal(length(peaks_result), length(peaksData(c_full@backend))) - expect_false(identical(peaks_result, peaksData(c_full@backend))) - - f <- factor(c(1, 1, 2)) - c_queued <- filterPeaksData(c_full, - variables = c("rtime"), - ranges = c(12.5, 45.5) - ) - c_queued <- filterPeaksData(c_queued, - variables = c("intensity"), - ranges = c(100, 200) - ) - - result <- .run_process_queue(c_queued@backend, - f = f, - queue = c_queued@processingQueue - ) - - expect_true(inherits(result, "ChromBackend")) - expect_equal(length(peaksData(result)), length(peaksData(c_full@backend))) - - split_data <- split(c_full@backend, f) - expect_equal(length(split_data), length(levels(f))) - - f <- factor(c(1, 2)) - tmp <- c_full - tmp@processingQueue <- list(1) - expect_error( - .run_process_queue(tmp@backend, - f = f, - queue = tmp@processingQueue - ), - "length 'f' has to be equal to the length of 'object'" - ) - - f <- c(1, 2, 3) - expect_error( - .run_process_queue(tmp@backend, - f = f, queue = tmp@processingQueue - ), - "f must be a factor" - ) + result <- .run_process_queue( + c_empty@backend, + f = processingChunkFactor(c_empty), + queue = c_empty@processingQueue + ) + expect_equal(result, c_empty@backend) + + result <- .run_process_queue( + c_full@backend, + f = processingChunkFactor(c_full), + queue = c_full@processingQueue + ) + expect_equal(result, c_full@backend) + + c_queued <- filterPeaksData( + c_full, + variables = c("rtime"), + ranges = c(12.5, 45.5) + ) + c_queued <- filterPeaksData( + c_queued, + variables = c("intensity"), + ranges = c(100, 200) + ) + + # this test for f = factor() and queue >1 + result <- .run_process_queue( + c_queued@backend, + f = processingChunkFactor(c_queued), + queue = c_queued@processingQueue + ) + expect_true(inherits(result, "ChromBackend")) + peaks_result <- peaksData(result) + expect_equal(length(peaks_result), length(peaksData(c_full@backend))) + expect_false(identical(peaks_result, peaksData(c_full@backend))) + + f <- factor(c(1, 1, 2)) + c_queued <- filterPeaksData( + c_full, + variables = c("rtime"), + ranges = c(12.5, 45.5) + ) + c_queued <- filterPeaksData( + c_queued, + variables = c("intensity"), + ranges = c(100, 200) + ) + + result <- .run_process_queue( + c_queued@backend, + f = f, + queue = c_queued@processingQueue + ) + + expect_true(inherits(result, "ChromBackend")) + expect_equal(length(peaksData(result)), length(peaksData(c_full@backend))) + + split_data <- split(c_full@backend, f) + expect_equal(length(split_data), length(levels(f))) + + f <- factor(c(1, 2)) + tmp <- c_full + tmp@processingQueue <- list(1) + expect_error( + .run_process_queue(tmp@backend, f = f, queue = tmp@processingQueue), + "length 'f' has to be equal to the length of 'object'" + ) + + f <- c(1, 2, 3) + expect_error( + .run_process_queue(tmp@backend, f = f, queue = tmp@processingQueue), + "f must be a factor" + ) }) test_that(".run_processing_queue, ChromBackendMzr work", { - ## without factor and queue == 1 - c_queued <- filterPeaksData(c_mzr, - variables = c("rtime"), - ranges = c(12.5, 25.5), keep = FALSE - ) - - result1 <- .run_process_queue(c_queued@backend, - f = processingChunkFactor(c_queued), - queue = c_queued@processingQueue - ) - - expect_true(result1@inMemory) - expect_false(c_queued@backend@inMemory) - - expect_true(inherits(result1, "ChromBackendMzR")) - expect_false(identical( - lengths(rtime(result1)), - lengths(rtime(c_mzr@backend)) - )) - expect_equal(length(peaksData(result1)), length(peaksData(c_mzr@backend))) - - ## with levels(factor) > 1 and queue == 1 - processingChunkSize(c_queued) <- 100 - f <- processingChunkFactor(c_queued) # > 1 - result2 <- .run_process_queue(c_queued@backend, - f = f, - queue = c_queued@processingQueue - ) - expect_true(inherits(result2, "ChromBackendMzR")) - expect_equal(length(peaksData(result2)), length(peaksData(c_mzr@backend))) - expect_false(identical( - lengths(rtime(result2)), - lengths(rtime(c_mzr@backend)) - )) - expect_true(result2@inMemory) - expect_false(c_queued@backend@inMemory) - expect_identical(result1, result2) - - - ## without factor and queue > 1 - c_queued <- filterPeaksData(c_queued, - variables = c("intensity"), - ranges = c(45, 50) - ) - result3 <- .run_process_queue(c_queued@backend, - f = factor(), - queue = c_queued@processingQueue - ) - - expect_true(inherits(result3, "ChromBackendMzR")) - expect_equal(length(peaksData(result3)), length(peaksData(c_mzr@backend))) - expect_false(identical( - lengths(rtime(result3)), - lengths(rtime(c_mzr@backend)) - )) - expect_true(result3@inMemory) - expect_false(c_queued@backend@inMemory) - - - ## with factor and queue > 1 - f <- processingChunkFactor(c_queued) - result4 <- .run_process_queue(c_queued@backend, - f = f, - queue = c_queued@processingQueue - ) - expect_true(inherits(result4, "ChromBackendMzR")) - expect_equal( - length(peaksData(result4)), - length(peaksData(c_mzr@backend)) - ) - expect_false(identical( - lengths(rtime(result4)), - lengths(rtime(c_mzr@backend)) - )) - expect_true(result4@inMemory) - expect_false(c_queued@backend@inMemory) - expect_identical(result3, result4) + ## without factor and queue == 1 + c_queued <- filterPeaksData( + c_mzr, + variables = c("rtime"), + ranges = c(12.5, 25.5), + keep = FALSE + ) + + result1 <- .run_process_queue( + c_queued@backend, + f = processingChunkFactor(c_queued), + queue = c_queued@processingQueue + ) + + expect_true(result1@inMemory) + expect_false(c_queued@backend@inMemory) + + expect_true(inherits(result1, "ChromBackendMzR")) + expect_false(identical( + lengths(rtime(result1)), + lengths(rtime(c_mzr@backend)) + )) + expect_equal(length(peaksData(result1)), length(peaksData(c_mzr@backend))) + + ## with levels(factor) > 1 and queue == 1 + processingChunkSize(c_queued) <- 100 + f <- processingChunkFactor(c_queued) # > 1 + result2 <- .run_process_queue( + c_queued@backend, + f = f, + queue = c_queued@processingQueue + ) + expect_true(inherits(result2, "ChromBackendMzR")) + expect_equal(length(peaksData(result2)), length(peaksData(c_mzr@backend))) + expect_false(identical( + lengths(rtime(result2)), + lengths(rtime(c_mzr@backend)) + )) + expect_true(result2@inMemory) + expect_false(c_queued@backend@inMemory) + expect_identical(result1, result2) + + ## without factor and queue > 1 + c_queued <- filterPeaksData( + c_queued, + variables = c("intensity"), + ranges = c(45, 50) + ) + result3 <- .run_process_queue( + c_queued@backend, + f = factor(), + queue = c_queued@processingQueue + ) + + expect_true(inherits(result3, "ChromBackendMzR")) + expect_equal(length(peaksData(result3)), length(peaksData(c_mzr@backend))) + expect_false(identical( + lengths(rtime(result3)), + lengths(rtime(c_mzr@backend)) + )) + expect_true(result3@inMemory) + expect_false(c_queued@backend@inMemory) + + ## with factor and queue > 1 + f <- processingChunkFactor(c_queued) + result4 <- .run_process_queue( + c_queued@backend, + f = f, + queue = c_queued@processingQueue + ) + expect_true(inherits(result4, "ChromBackendMzR")) + expect_equal( + length(peaksData(result4)), + length(peaksData(c_mzr@backend)) + ) + expect_false(identical( + lengths(rtime(result4)), + lengths(rtime(c_mzr@backend)) + )) + expect_true(result4@inMemory) + expect_false(c_queued@backend@inMemory) + expect_identical(result3, result4) }) test_that(".valid_processing_queue works correctly", { - expect_null(.valid_processing_queue(list())) - valid_queue <- list(new("ProcessingStep")) - expect_null(.valid_processing_queue(valid_queue)) - - invalid_queue <- list("not_a_processing_step") - expect_error( - .valid_processing_queue(invalid_queue), - "'processingQueue' should only contain ProcessingStep objects." - ) + expect_null(.valid_processing_queue(list())) + valid_queue <- list(new("ProcessingStep")) + expect_null(.valid_processing_queue(valid_queue)) + + invalid_queue <- list("not_a_processing_step") + expect_error( + .valid_processing_queue(invalid_queue), + "'processingQueue' should only contain ProcessingStep objects." + ) }) test_that("ensure_rt_mz_columns correctly handles mz and rt columns", { - spectra <- s - spectra_f <- factor( - do.call( - paste, - c(as.list(Spectra::spectraData(s)[, c("msLevel", "dataOrigin")]), - sep = "_"))) - levs <- levels(spectra_f) - chrom_data <- data.frame(msLevel = c(1,2,3), - chromSpectraIndex = levs[1:3]) - chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) - expect_equal(chrom_data$mzMin, c(-Inf, -Inf, -Inf)) - expect_equal(chrom_data$mzMax, c(Inf, Inf, Inf)) - - chrom_data <- data.frame(mzMin = c(100), chromSpectraIndex = levs[1]) - expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), - "must be present if one is provided.") - - chrom_data <- data.frame(mzMax = c(200), chromSpectraIndex = levs[1]) - expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), - "must be present if one is provided.") - chrom_data <- data.frame(msLevel = c(1,2,3), - chromSpectraIndex = levs[1:3]) - chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) - s_plit <- split(spectra, spectra_f) - expect_equal(chrom_data$rtMin[[1]], min(s_plit[[1]]$rtime, na.rm = TRUE)) - expect_equal(chrom_data$rtMax[[1]], max(s_plit[[1]]$rtime, na.rm = TRUE)) - - chrom_data <- data.frame(rtMin = c(10), chromSpectraIndex = levs[1]) - expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), - " must be present if one is provided.") - chrom_data <- data.frame(rtMax = c(50), chromSpectraIndex = levs[1]) - expect_error(.ensure_rt_mz_columns(chrom_data, spectra, spectra_f), - "must be present if one is provided.") - - chrom_data <- data.frame(mzMin = c(100), mzMax = c(200), - rtMin = c(10), rtMax = c(50), - chromSpectraIndex = levs[1]) - chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) - expect_equal(chrom_data$mzMin, 100) - expect_equal(chrom_data$mzMax, 200) - expect_equal(chrom_data$rtMin, 10) - expect_equal(chrom_data$rtMax, 50) + spectra <- s + spectra_f <- factor( + do.call( + paste, + c( + as.list(Spectra::spectraData(s)[, c("msLevel", "dataOrigin")]), + sep = "_" + ) + ) + ) + levs <- levels(spectra_f) + chrom_data <- data.frame(msLevel = c(1, 2, 3), chromSpectraIndex = levs[1:3]) + chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) + expect_equal(chrom_data$mzMin, c(-Inf, -Inf, -Inf)) + expect_equal(chrom_data$mzMax, c(Inf, Inf, Inf)) + + chrom_data <- data.frame(mzMin = c(100), chromSpectraIndex = levs[1]) + expect_error( + .ensure_rt_mz_columns(chrom_data, spectra, spectra_f), + "must be present if one is provided." + ) + + chrom_data <- data.frame(mzMax = c(200), chromSpectraIndex = levs[1]) + expect_error( + .ensure_rt_mz_columns(chrom_data, spectra, spectra_f), + "must be present if one is provided." + ) + chrom_data <- data.frame(msLevel = c(1, 2, 3), chromSpectraIndex = levs[1:3]) + chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) + s_plit <- split(spectra, spectra_f) + expect_equal(chrom_data$rtMin[[1]], min(s_plit[[1]]$rtime, na.rm = TRUE)) + expect_equal(chrom_data$rtMax[[1]], max(s_plit[[1]]$rtime, na.rm = TRUE)) + + chrom_data <- data.frame(rtMin = c(10), chromSpectraIndex = levs[1]) + expect_error( + .ensure_rt_mz_columns(chrom_data, spectra, spectra_f), + " must be present if one is provided." + ) + chrom_data <- data.frame(rtMax = c(50), chromSpectraIndex = levs[1]) + expect_error( + .ensure_rt_mz_columns(chrom_data, spectra, spectra_f), + "must be present if one is provided." + ) + + chrom_data <- data.frame( + mzMin = c(100), + mzMax = c(200), + rtMin = c(10), + rtMax = c(50), + chromSpectraIndex = levs[1] + ) + chrom_data <- .ensure_rt_mz_columns(chrom_data, spectra, spectra_f) + expect_equal(chrom_data$mzMin, 100) + expect_equal(chrom_data$mzMax, 200) + expect_equal(chrom_data$rtMin, 10) + expect_equal(chrom_data$rtMax, 50) }) test_that(".validate_chromExtract_input works correctly", { - cdata <- data.frame( - msLevel = c(1L, 1L, 1L), - dataOrigin = c("A", "B", "C") - ) - be <- backendInitialize(new("ChromBackendMemory"), chromData = cdata) - - peak_tbl <- data.frame( - rtMin = c(1, 2, 3), - rtMax = c(5, 6, 7), - msLevel = c(1L, 1L, 1L), - dataOrigin = c("A", "B", "C") - ) - - # should pass - expect_silent( - .validate_chromExtract_input(be, peak_tbl, - by = c("msLevel", "dataOrigin")) - ) - - # missing required column - bad_tbl <- peak_tbl[, !names(peak_tbl) %in% "rtMax", drop = FALSE] - expect_error( - .validate_chromExtract_input(be, bad_tbl, - by = c("msLevel", "dataOrigin")), - "must contain columns" - ) - - # NA in rtMin - bad_tbl2 <- peak_tbl - bad_tbl2$rtMin[1] <- NA - expect_error( - .validate_chromExtract_input(be, bad_tbl2, - by = c("msLevel", "dataOrigin")), - "cannot contain NA" - ) - - # missing 'by' columns in chromData - bad_be <- backendInitialize(new("ChromBackendMemory"), - chromData = cdata[, "msLevel", drop = FALSE]) - expect_error( - .validate_chromExtract_input(bad_be, peak_tbl, - by = c("msLevel", "dataOrigin")), - "must be present" - ) - - ## unique - expect_error( - .validate_chromExtract_input(bad_be, peak_tbl, - by = "msLevel"), - "must uniquely identify rows" - ) - + cdata <- data.frame( + msLevel = c(1L, 1L, 1L), + dataOrigin = c("A", "B", "C") + ) + be <- backendInitialize(new("ChromBackendMemory"), chromData = cdata) + + peak_tbl <- data.frame( + rtMin = c(1, 2, 3), + rtMax = c(5, 6, 7), + msLevel = c(1L, 1L, 1L), + dataOrigin = c("A", "B", "C") + ) + + # should pass + expect_silent( + .validate_chromExtract_input(be, peak_tbl, by = c("msLevel", "dataOrigin")) + ) + + # missing required column + bad_tbl <- peak_tbl[, !names(peak_tbl) %in% "rtMax", drop = FALSE] + expect_error( + .validate_chromExtract_input(be, bad_tbl, by = c("msLevel", "dataOrigin")), + "must contain columns" + ) + + # NA in rtMin + bad_tbl2 <- peak_tbl + bad_tbl2$rtMin[1] <- NA + expect_error( + .validate_chromExtract_input(be, bad_tbl2, by = c("msLevel", "dataOrigin")), + "cannot contain NA" + ) + + # missing 'by' columns in chromData + bad_be <- backendInitialize( + new("ChromBackendMemory"), + chromData = cdata[, "msLevel", drop = FALSE] + ) + expect_error( + .validate_chromExtract_input( + bad_be, + peak_tbl, + by = c("msLevel", "dataOrigin") + ), + "must be present" + ) + + ## unique + expect_error( + .validate_chromExtract_input(bad_be, peak_tbl, by = "msLevel"), + "must uniquely identify rows" + ) }) test_that(".match_chromdata_peaktable aligns correctly", { - tmp_cdata <- data.frame( - msLevel = c(1L, 1L, 2L), - dataOrigin = c("A", "B", "A") - ) - tmp <- backendInitialize(new("ChromBackendMemory"), chromData = tmp_cdata) - - peak_tbl <- data.frame( - msLevel = c(1L, 2L), - dataOrigin = c("A", "A"), - rtMin = c(1, 2), - rtMax = c(5, 6) - ) - - matched <- .match_chromdata_peaktable(tmp, peak_tbl, - by = c("msLevel", "dataOrigin")) - - # Expect a subset of object - expect_s4_class(matched$object, "ChromBackendMemory") - expect_equal(length(matched$chrom_keys), nrow(.chromData(matched$object))) - - # Check factor levels alignment - expect_true(all(levels(matched$peak_keys) %in% levels(matched$chrom_keys))) - - # missing key should error - bad_tbl <- data.frame( - msLevel = 3L, dataOrigin = "Z", rtMin = 1, rtMax = 2 - ) - expect_error( - .match_chromdata_peaktable(tmp, bad_tbl, - by = c("msLevel", "dataOrigin")), - "do not exist" - ) + tmp_cdata <- data.frame( + msLevel = c(1L, 1L, 2L), + dataOrigin = c("A", "B", "A") + ) + tmp <- backendInitialize(new("ChromBackendMemory"), chromData = tmp_cdata) + + peak_tbl <- data.frame( + msLevel = c(1L, 2L), + dataOrigin = c("A", "A"), + rtMin = c(1, 2), + rtMax = c(5, 6) + ) + + matched <- .match_chromdata_peaktable( + tmp, + peak_tbl, + by = c("msLevel", "dataOrigin") + ) + + # Expect a subset of object + expect_s4_class(matched$object, "ChromBackendMemory") + expect_equal(length(matched$chrom_keys), nrow(.chromData(matched$object))) + + # Check factor levels alignment + expect_true(all(levels(matched$peak_keys) %in% levels(matched$chrom_keys))) + + # missing key should error + bad_tbl <- data.frame( + msLevel = 3L, + dataOrigin = "Z", + rtMin = 1, + rtMax = 2 + ) + expect_error( + .match_chromdata_peaktable(tmp, bad_tbl, by = c("msLevel", "dataOrigin")), + "do not exist" + ) }) test_that(".check_overl_columns warns correctly", { - tmp_cdata <- data.frame( - msLevel = 1L, - dataOrigin = "X", - mz = 100, extracol = "info" - ) - tmp <- backendInitialize(new("ChromBackendMemory"), chromData = tmp_cdata) - - peak_tbl <- data.frame( - rtMin = 1, rtMax = 2, mzMin = 99, mzMax = 101, - msLevel = 1L, dataOrigin = "X", mz = 123, extracol = "test" - ) - - req_cols <- c("rtMin", "rtMax", "mzMin", "mzMax", "msLevel", "dataOrigin") - - expect_warning( - overl <- .check_overl_columns(tmp, peak_tbl, req_cols), - "already exist" - ) - - # overlapping should include "mz" - expect_true(all(c("mz", "extracol") %in% names(peak_tbl)[overl])) - + tmp_cdata <- data.frame( + msLevel = 1L, + dataOrigin = "X", + mz = 100, + extracol = "info" + ) + tmp <- backendInitialize(new("ChromBackendMemory"), chromData = tmp_cdata) + + peak_tbl <- data.frame( + rtMin = 1, + rtMax = 2, + mzMin = 99, + mzMax = 101, + msLevel = 1L, + dataOrigin = "X", + mz = 123, + extracol = "test" + ) + + req_cols <- c("rtMin", "rtMax", "mzMin", "mzMax", "msLevel", "dataOrigin") + + expect_warning( + overl <- .check_overl_columns(tmp, peak_tbl, req_cols), + "already exist" + ) + + # overlapping should include "mz" + expect_true(all(c("mz", "extracol") %in% names(peak_tbl)[overl])) }) test_that(".impute() works correctly and without warnings", { - # Base signal with gaps - x <- c(1:5, NA, 7:10, NA, 12:15, rep(NA, 2), 18:20) - - ## linear - expect_silent({ - res_lin <- .impute(x, method = "linear") - }) - expect_false(anyNA(res_lin)) - expect_true(all(diff(res_lin) > 0)) # still increasing - - ## spline - expect_silent({ - res_spl <- .impute(x, method = "spline") - }) - expect_false(anyNA(res_spl)) - expect_equal(length(res_spl), length(x)) - - ## Gaussian - expect_silent( - res_gauss <- .impute(x, method = "gaussian", window = 2, sd = 1) - ) - expect_false(anyNA(res_gauss)) - expect_equal(length(res_gauss), length(x)) - - ## loess - expect_warning({ - res_loess <- .impute(x, method = "loess", span = 0.3) - "could not fill all NAs" - }) - expect_false(anyNA(res_loess)) - expect_equal(length(res_loess), length(x)) - - ## Consecutive NAs - x_na <- c(1, 2, NA, NA, 5, 6, 7, 8, NA, NA, 11, 12) - expect_silent({ - res_consec <- .impute(x_na, method = "linear") - }) - expect_false(anyNA(res_consec)) - expect_equal(length(res_consec), length(x_na)) - - ## No NA - x_nomiss <- 1:10 - expect_silent({ - res_nomiss <- .impute(x_nomiss, method = "spline") - }) - expect_identical(res_nomiss, x_nomiss) - - ## all Nas returns NA - x_allna <- rep(NA_real_, 8) - expect_silent({ - res_allna <- .impute(x_allna, method = "gaussian") - }) - expect_true(all(is.na(res_allna))) + # Base signal with gaps + x <- c(1:5, NA, 7:10, NA, 12:15, rep(NA, 2), 18:20) + + ## linear + expect_silent({ + res_lin <- .impute(x, method = "linear") + }) + expect_false(anyNA(res_lin)) + expect_true(all(diff(res_lin) > 0)) # still increasing + + ## spline + expect_silent({ + res_spl <- .impute(x, method = "spline") + }) + expect_false(anyNA(res_spl)) + expect_equal(length(res_spl), length(x)) + + ## Gaussian + expect_silent( + res_gauss <- .impute(x, method = "gaussian", window = 2, sd = 1) + ) + expect_false(anyNA(res_gauss)) + expect_equal(length(res_gauss), length(x)) + + ## loess + expect_warning({ + res_loess <- .impute(x, method = "loess", span = 0.3) + "could not fill all NAs" + }) + expect_false(anyNA(res_loess)) + expect_equal(length(res_loess), length(x)) + + ## Consecutive NAs + x_na <- c(1, 2, NA, NA, 5, 6, 7, 8, NA, NA, 11, 12) + expect_silent({ + res_consec <- .impute(x_na, method = "linear") + }) + expect_false(anyNA(res_consec)) + expect_equal(length(res_consec), length(x_na)) + + ## No NA + x_nomiss <- 1:10 + expect_silent({ + res_nomiss <- .impute(x_nomiss, method = "spline") + }) + expect_identical(res_nomiss, x_nomiss) + + ## all Nas returns NA + x_allna <- rep(NA_real_, 8) + expect_silent({ + res_allna <- .impute(x_allna, method = "gaussian") + }) + expect_true(all(is.na(res_allna))) }) test_that(".map_spectra_vars() correctly maps spectra variables", { - vars <- c("scanIndex", "mtbls_id") - - mapped <- .map_spectra_vars(be_sp, vars) - - expect_s4_class(mapped, "ChromBackendSpectra") - cd <- chromData(mapped) - - expect_true(all(vars %in% names(cd))) - - expect_true(all(vapply(cd$mtbls_id, function(x) length(unique(x)) == 1, - logical(1)))) - - expect_true(all(vapply(cd$scanIndex, function(x) length(x) >= 1, - logical(1)))) - - expect_identical( - rownames(cd), - rownames(chromData(be_sp)) - ) - expect_error( - .map_spectra_vars(be_sp, c("scanIndex", "fake_var")), - "must exist in 'spectra'" - ) - - cd_names <- names(chromData(be_sp)) - fake_var <- cd_names[1L] # pick a real chromData column - expect_error( - .map_spectra_vars(be_sp, fake_var), - "must already exist in 'chromData'" - ) + vars <- c("scanIndex", "mtbls_id") + + mapped <- .map_spectra_vars(be_sp, vars) + + expect_s4_class(mapped, "ChromBackendSpectra") + cd <- chromData(mapped) + + expect_true(all(vars %in% names(cd))) + + expect_true(all(vapply( + cd$mtbls_id, + function(x) length(unique(x)) == 1, + logical(1) + ))) + + expect_true(all(vapply(cd$scanIndex, function(x) length(x) >= 1, logical(1)))) + + expect_identical( + rownames(cd), + rownames(chromData(be_sp)) + ) + expect_error( + .map_spectra_vars(be_sp, c("scanIndex", "fake_var")), + "must exist in the Spectra" + ) + + cd_names <- names(chromData(be_sp)) + fake_var <- cd_names[1L] # pick a real chromData column + expect_error( + .map_spectra_vars(be_sp, fake_var), + "must already exist in the chromData" + ) }) - -