rformassspectrometry · philouail · Jan 22, 2026 · Oct 29, 2025 · Oct 29, 2025 · Jan 13, 2026
diff --git a/.github/workflows/check-bioc.yml b/.github/workflows/check-bioc.yml
@@ -53,7 +53,7 @@ jobs:
       fail-fast: false
       matrix:
         config:
-          - { os: ubuntu-latest, r: 'devel', bioc: '3.22', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
+          - { os: ubuntu-latest, r: 'devel', bioc: '3.23', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
           - { os: macOS-latest, r: 'latest', bioc: '3.22'}
           - { os: windows-latest, r: 'latest', bioc: '3.22'}
     env:

diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: Chromatograms
 Title: Infrastructure for Chromatographic Mass Spectrometry Data
-Version: 0.99.7
+Version: 1.1.1
 Description: The Chromatograms packages defines an efficient infrastructure
    for storing and handling of chromatographic mass spectrometry data. It
    provides different implementations of *backends* to store and represent the
@@ -41,6 +41,7 @@ Suggests:
     mzR (>= 2.41.4),
     MsBackendMetaboLights (>= 1.3.1),
     vdiffr,
+    IRanges,
     RColorBrewer
 License: Artistic-2.0
 Encoding: UTF-8

diff --git a/NAMESPACE b/NAMESPACE
@@ -3,6 +3,7 @@
 export(ChromBackendMemory)
 export(ChromBackendMzR)
 export(ChromBackendSpectra)
+export(chromSpectraIndex)
 export(coreChromVariables)
 export(corePeaksVariables)
 export(fillCoreChromVariables)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,13 @@
+# Version 1.1.1
+
+- Aligned the package with the Bioconductor 3.22 release.
+- Expanded the vignette to cover ChromBackendSpectra usage, chromatogram
+  extraction with `chromExtract()`, and imputation workflows via
+  `imputePeaksData()`.
+- Added `spectraSortIndex()` for `ChromBackendSpectra` to compute the desired
+  retention-time order on demand, avoiding the need to keep on-disk `Spectra`
+  objects sorted in memory.
+
 # Version 0.99.7
 
 ## Changes in 0.99.7

diff --git a/R/ChromBackendMzR.R b/R/ChromBackendMzR.R
@@ -24,7 +24,7 @@ NULL
 #'
 #' Implementing functionalities with the `ChromBackendMzR` backend should be
 #' simplified as much as possible and reuse the methods already implemented for
-#' `ChromBackendMemory` when possible.
+#' `ChromBackendMemory` when possible. 
 #'
 #' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()]
 #'        for more information.

diff --git a/R/ChromBackendSpectra.R b/R/ChromBackendSpectra.R
@@ -42,6 +42,20 @@ NULL
 #' replacement is unsupported — modifications are temporary to optimize memory.
 #' The `inMemory` slot indicates this with `TRUE`.
 #'
+#' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a
+#' `spectraSortIndex` slot that stores a sort order for the internal `Spectra`
+#' object based on `dataOrigin` and `rtime`. This avoids the need to physically
+#' reorder disk-backed `Spectra` objects, which would require loading all data
+#' into memory. The sort index is automatically recalculated whenever the
+#' `factorize()` method is called, ensuring it remains valid and consistent.
+#'
+#' **Factorize and Subsetting**: The `factorize()` method updates the
+#' `chromSpectraIndex` in both `chromData` and the `spectra` object to reflect
+#' the current grouping, and recalculates `spectraSortIndex` to maintain the
+#' correct sort order. The `[` subsetting operator properly handles subsetting
+#' of both `chromData`, `peaksData`, and `spectra`, while updating the
+#' `spectraSortIndex` to reference valid positions in the subsetted data.
+#'
 #' `ChromBackendSpectra` should reuse `ChromBackendMemory` methods whenever
 #' possible to keep implementations simple.
 #'
@@ -131,7 +145,8 @@ ChromBackendSpectra <- setClass(
     slots = c(
         inMemory = "logical",
         spectra = "Spectra",
-        summaryFun = "function"
+        summaryFun = "function",
+        spectraSortIndex = "integer"
     ),
     prototype = prototype(
         chromData = fillCoreChromVariables(data.frame()),
@@ -140,7 +155,8 @@ ChromBackendSpectra <- setClass(
         spectra = Spectra(),
         version = "0.1",
         inMemory = FALSE,
-        summaryFun = sumi
+        summaryFun = sumi,
+        spectraSortIndex = integer()
     )
 )
 
@@ -181,12 +197,13 @@ setMethod("backendInitialize", "ChromBackendSpectra",
                        "it needs to be part of the `coreChromVariables()` ",
                        "available.")
               ## Spectra object are not expected to be ordered by rtime,
-              ## so we fix that below.
-              spectra <- lapply(split(spectra, spectra$dataOrigin),
-                                function(x) {
-                  x[order(x$rtime)]
-              })
-              spectra <- concatenateSpectra(spectra)
+              ## so we store a sort index instead of concatenating.
+              ## This allows us to keep disk-backed backends intact.
+              sort_idx <- order(
+                  spectra$dataOrigin,
+                  spectra$rtime
+              )
+              object@spectraSortIndex <- sort_idx
               object@chromData <- chromData
               object@spectra <- spectra
 
@@ -210,7 +227,7 @@ setMethod("show", "ChromBackendSpectra", function(object) {
 })
 
 #' @rdname ChromBackendSpectra
-#' @note ensure that it returns a factor
+#' @export
 chromSpectraIndex <- function(object) {
     if (!is(object, "ChromBackendSpectra"))
         stop("The object must be a 'ChromBackendSpectra' object.")
@@ -228,32 +245,48 @@ setMethod("factorize", "ChromBackendSpectra",
                      spectraVariables(.spectra(object))))
                   stop("All 'factorize.by' variables must be in the ",
                        "Spectra object.")
-           spectra_f <- interaction(as.list(
+
+            spectra_f <- interaction(as.list(
                spectraData(.spectra(object))[,
-                                                    factorize.by, drop = FALSE]),
+                                            factorize.by, drop = FALSE]),
                drop = TRUE, sep = "_")
 
-           cd <- .chromData(object)
-          if (nrow(cd)) {
-              if (!all(factorize.by %in% chromVariables(object)))
-                  stop("All 'factorize.by' variables must be in chromData.")
-              cd$chromSpectraIndex <- interaction(cd[, factorize.by,
-                                                     drop = FALSE],
-                                                  drop = TRUE, sep = "_")
-              levels(spectra_f) <- levels(cd$chromSpectraIndex)
-              object@spectra$chromSpectraIndex <- droplevels(spectra_f)
-              object@chromData <- .ensure_rt_mz_columns(cd,
-                                                        .spectra(object),
-                                                        spectra_f)
-          } else {
-              object@spectra$chromSpectraIndex <- spectra_f
-              full_sp <- do.call(rbindFill,
-                                 lapply(split(.spectra(object), spectra_f),
-                                        .spectra_format_chromData))
-              rownames(full_sp) <- NULL
-              object@chromData <- full_sp
-              }
-          object
+            cd <- .chromData(object)
+
+            if (nrow(cd)) {
+                ## chromData exists: validate and align spectra to it
+                if (!all(factorize.by %in% chromVariables(object)))
+                    stop("All 'factorize.by' variables must be in chromData.")
+
+                cd$chromSpectraIndex <- interaction(cd[, factorize.by,
+                                                        drop = FALSE],
+                                                     drop = TRUE, sep = "_")
+
+                object@spectra$chromSpectraIndex <- factor(as.character(spectra_f),
+                                                           levels = levels(cd$chromSpectraIndex))
+
+                sorted_spectra <- .spectra(object)[object@spectraSortIndex]
+                sorted_spectra_f <- spectra_f[object@spectraSortIndex]
+
+                object@chromData <- .ensure_rt_mz_columns(cd,
+                                                          sorted_spectra,
+                                                          sorted_spectra_f)
+            } else {
+                ## chromData is empty: create it from spectra
+                object@spectra$chromSpectraIndex <- spectra_f
+                full_sp <- do.call(rbindFill,
+                                   lapply(split(.spectra(object), spectra_f),
+                                          .spectra_format_chromData))
+                rownames(full_sp) <- NULL
+                object@chromData <- full_sp
+            }
+
+            object@spectraSortIndex <- order(
+                object@spectra$dataOrigin,
+                object@spectra$rtime
+            )
+
+            object
           })
 
 #' @rdname hidden_aliases
@@ -276,9 +309,11 @@ setMethod(
         }
         ## Ensure chromSpectraIndex only contains relevant levels needed
         valid_f <- chromSpectraIndex(object)
-        current_vals <- as.character(.spectra(object)$chromSpectraIndex)
+        ## Apply the sort index to spectra for processing
+        sorted_spectra <- .spectra(object)[object@spectraSortIndex]
+        current_vals <- as.character(sorted_spectra$chromSpectraIndex)
         if (!setequal(unique(current_vals), levels(valid_f))) {
-            object@spectra$chromSpectraIndex <- factor(
+            sorted_spectra$chromSpectraIndex <- factor(
                 current_vals,
                 levels = levels(valid_f)
             )
@@ -287,8 +322,8 @@ setMethod(
         pd <- mapply(.process_peaks_data,
             cd = split(chromData(object), valid_f),
             s = split(
-                .spectra(object),
-                .spectra(object)$chromSpectraIndex
+                sorted_spectra,
+                sorted_spectra$chromSpectraIndex
             ),
             MoreArgs = list(
                 columns = columns,
@@ -323,11 +358,38 @@ setMethod(
 
 #' @rdname hidden_aliases
 #' @importMethodsFrom S4Vectors [ [[
+#' @importFrom MsCoreUtils i2index
 #' @export
 setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) {
     if (!length(i))
         return(ChromBackendSpectra())
-    callNextMethod()
+
+    i <- i2index(i, length = length(x))
+
+    ## Subset chromData and peaksData via parent method
+    x@chromData <- .chromData(x)[i, , drop = FALSE]
+    x@peaksData <- .peaksData(x)[i]
+
+    ## Determine which spectra to keep based on chromSpectraIndex
+    kept_indices <- chromSpectraIndex(x)[i]
+    spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices
+
+    ## Subset the spectra object
+    x@spectra <- x@spectra[spectra_keep]
+
+    ## Update spectraSortIndex to reflect the new ordering after subsetting
+    old_positions_kept <- which(spectra_keep)
+    mapping <- match(old_positions_kept, seq_along(spectra_keep)[spectra_keep])
+
+    kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept
+    x@spectraSortIndex <- mapping[match(x@spectraSortIndex[kept_sort_positions], 
+                                        old_positions_kept)]
+
+    ## Ensure chromSpectraIndex levels are still consistent
+    x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex)
+    x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex)
+
+    x
 })
 
 #' @rdname hidden_aliases

diff --git a/R/Chromatograms.R b/R/Chromatograms.R
@@ -17,7 +17,14 @@ NULL
 #' metadata. The chromatographic data is represented by a *backend* extending
 #' the virtual [ChromBackend] class which provides the raw data to the
 #' `Chromatograms` object. Different backends and their properties are
-#' decribed in the [ChromBackend] class documentation.
+#' described in the [ChromBackend] class documentation.
+#'
+#' **Available Backends**: The package provides several backends:
+#' - `ChromBackendMemory`: Stores data in memory (default, ideal for small datasets).
+#' - `ChromBackendMzR`: Reads peaks data from raw MS files on demand.
+#' - `ChromBackendSpectra`: Generates chromatographic data from a `Spectra` object.
+#'   This backend supports both in-memory and file-backed `Spectra` objects, using
+#'   an internal `spectraSortIndex` to avoid physically reordering the spectra.
 #'
 #' @section Creation of objects:
 #'

diff --git a/R/helpers.R b/R/helpers.R
@@ -328,19 +328,27 @@
 #' - `backendInitialize()` for `ChrombackendSpectra`
 #' @noRd
 .spectra_format_chromData <- function(sps) {
-    data.frame(
+    res <- data.frame(
         msLevel = unique(sps$msLevel),
         rtMin = min(sps$rtime, na.rm = TRUE),
         rtMax = max(sps$rtime, na.rm = TRUE),
         mzMin = -Inf,
         mzMax = Inf,
         mz = Inf,
-        polarity = sps$polarity[1],
-        scanWindowLowerLimit = sps$scanWindowLowerLimit[1],
-        scanWindowUpperLimit = sps$scanWindowUpperLimit[1],
         dataOrigin = unique(sps$dataOrigin),
         chromSpectraIndex = unique(sps$chromSpectraIndex)
     )
+    ## Add optional columns if present
+    if ("polarity" %in% Spectra::spectraVariables(sps)) {
+        res$polarity <- sps$polarity[1]
+    }
+    if ("scanWindowLowerLimit" %in% Spectra::spectraVariables(sps)) {
+        res$scanWindowLowerLimit <- sps$scanWindowLowerLimit[1]
+    }
+    if ("scanWindowUpperLimit" %in% Spectra::spectraVariables(sps)) {
+        res$scanWindowUpperLimit <- sps$scanWindowUpperLimit[1]
+    }
+    res
 }
 
 #' Used in:
@@ -363,13 +371,16 @@
             stop("Both 'rtMin' and 'rtMax' must be present if one",
                  " is provided.")
         } else {
-            rt_range <- lapply(split(spectra$rtime, spectra_f), function(rt) {
-                list(rtMin = min(rt, na.rm = TRUE),
-                     rtMax = max(rt, na.rm = TRUE))
-            })
-            rt_values <- do.call(rbind, rt_range)
-            chrom_data$rtMin <- rt_values[, "rtMin"]
-            chrom_data$rtMax <- rt_values[, "rtMax"]
+            levs <- levels(spectra_f)
+            if (is.null(levs)) {
+                levs <- unique(as.character(spectra_f))
+            }
+            rt_mat <- vapply(levs, function(lvl) {
+                range(spectra$rtime[spectra_f == lvl], na.rm = TRUE)
+            }, numeric(2))
+            chrom_idx <- as.character(chrom_data$chromSpectraIndex)
+            chrom_data$rtMin <- rt_mat[1, chrom_idx]
+            chrom_data$rtMax <- rt_mat[2, chrom_idx]
         }
     }
     chrom_data
@@ -505,13 +516,18 @@
 ## - BackendInitialize, chrombackendSPectra method
 #' @noRd
 .map_spectra_vars <- function(object, spectraVariables) {
-    ## check variable validity�
+    ## check variable validity
     spectra <- .spectra(object)
     cd <- .chromData(object)
     if (!all(spectraVariables %in% spectraVariables(spectra)))
         stop("All 'spectraVariables' must exist in 'spectra'.")
-    if (any(spectraVariables %in% colnames(cd)))
-        stop("None of the 'spectraVariables' must already exist in 'chromData'.")
+    if (any(spectraVariables %in% colnames(cd))) {
+        existing <- intersect(spectraVariables, colnames(cd))
+        non_replaceable <- vapply(existing, function(v) !all(is.na(cd[[v]])), logical(1))
+        if (any(non_replaceable)) {
+            stop("None of the 'spectraVariables' must already exist in 'chromData'.")
+        }
+    }
     idx <- spectra$chromSpectraIndex
     spd <- spectraData(spectra, columns = spectraVariables)
 

diff --git a/man/ChromBackendSpectra.Rd b/man/ChromBackendSpectra.Rd