Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/check-bioc.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ jobs:
fail-fast: false
matrix:
config:
- { os: ubuntu-latest, r: 'devel', bioc: '3.22', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: ubuntu-latest, r: 'devel', bioc: '3.23', cont: "bioconductor/bioconductor_docker:devel", rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest" }
- { os: macOS-latest, r: 'latest', bioc: '3.22'}
- { os: windows-latest, r: 'latest', bioc: '3.22'}
env:
Expand Down
3 changes: 2 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: Chromatograms
Title: Infrastructure for Chromatographic Mass Spectrometry Data
Version: 0.99.7
Version: 1.1.1
Description: The Chromatograms packages defines an efficient infrastructure
for storing and handling of chromatographic mass spectrometry data. It
provides different implementations of *backends* to store and represent the
Expand Down Expand Up @@ -41,6 +41,7 @@ Suggests:
mzR (>= 2.41.4),
MsBackendMetaboLights (>= 1.3.1),
vdiffr,
IRanges,
RColorBrewer
License: Artistic-2.0
Encoding: UTF-8
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
export(ChromBackendMemory)
export(ChromBackendMzR)
export(ChromBackendSpectra)
export(chromSpectraIndex)
export(coreChromVariables)
export(corePeaksVariables)
export(fillCoreChromVariables)
Expand Down Expand Up @@ -123,6 +124,7 @@ importFrom(stats,filter)
importFrom(stats,loess)
importFrom(stats,predict)
importFrom(stats,sd)
importFrom(stats,setNames)
importFrom(stats,spline)
importFrom(utils,capture.output)
importFrom(utils,head)
Expand Down
14 changes: 13 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
# Version 0.99.7
# Version 1.1

## Changes in 1.1.1

- Aligned the package with the Bioconductor 3.22 release.
- Expanded the vignette to cover ChromBackendSpectra usage, chromatogram
extraction with `chromExtract()`, and imputation workflows via
`imputePeaksData()`.
- Added `spectraSortIndex()` for `ChromBackendSpectra` to compute the desired
retention-time order on demand, avoiding the need to keep on-disk `Spectra`
objects sorted in memory.

# Version 0.99

## Changes in 0.99.7

Expand Down
11 changes: 6 additions & 5 deletions R/ChromBackendMemory.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,7 @@ NULL
#'
#' @examples
#'
#' ## Create a ChromBackendMemory object
#' cbm <- ChromBackendMemory()
#'
#' ## Initialize the ChromBackendMemory object with a data.frame of
#' ## chromatographic data and a list of data.frame of peaks data
#' ## Method 1: Initialize backend directly
#' cdata <- data.frame(
#' msLevel = c(1L, 1L, 1L),
#' mz = c(112.2, 123.3, 134.4),
Expand All @@ -67,9 +63,14 @@ NULL
#' )
#' )
#'
#' cbm <- ChromBackendMemory()
#' cbm <- backendInitialize(cbm, chromData = cdata, peaksData = pdata)
#' cbm
#'
#' ## Method 2: Use Chromatograms constructor (recommended)
#' chr <- Chromatograms(ChromBackendMemory(), chromData = cdata, peaksData = pdata)
#' chr
#'
NULL

#' @noRd
Expand Down
2 changes: 1 addition & 1 deletion R/ChromBackendMzR.R
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ NULL
#'
#' Implementing functionalities with the `ChromBackendMzR` backend should be
#' simplified as much as possible and reuse the methods already implemented for
#' `ChromBackendMemory` when possible.
#' `ChromBackendMemory` when possible.
#'
#' @param BPPARAM Parallel setup configuration. See [BiocParallel::bpparam()]
#' for more information.
Expand Down
170 changes: 126 additions & 44 deletions R/ChromBackendSpectra.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,14 @@ NULL
#' condensing the `Spectra` data corresponding to each unique combination of
#' the `factorize.by` variables.
#'
#' By "factorization" we mean the process of grouping spectra
#' into chromatograms based on specified variables. For example, using
#' `factorize.by = c("msLevel", "dataOrigin")` means that all MS1 spectra from
#' file "A" form one chromatogram, all MS2 spectra from file "A" form another,
#' and so on. Each unique combination of the factorization variables creates
#' a separate chromatogram. This is essential for organizing spectral data into
#' meaningful chromatographic traces that can be visualized and analyzed.
#'
#' The *dataOrigin* core chromatogram variable should reflect the *dataOrigin*
#' of the `Spectra` object. The `factorize.by` parameter defines the variables
#' for grouping `Spectra` data into chromatographic data. The default is
Expand All @@ -42,6 +50,24 @@ NULL
#' replacement is unsupported — modifications are temporary to optimize memory.
#' The `inMemory` slot indicates this with `TRUE`.
#'
#' **Spectra Sort Index**: The `ChromBackendSpectra` backend maintains a
#' `spectraSortIndex` slot that stores a sort order for the internal `Spectra`
#' object based on `dataOrigin` and `rtime`. To optimize performance, the sort
#' index is only computed and stored when the spectra are unsorted; if already
#' sorted (which is typical for most real-world data), `spectraSortIndex` remains
#' empty (`integer()`). This avoids unnecessary subsetting operations. The sort
#' index is automatically recalculated whenever the `factorize()` method is called,
#' ensuring it remains valid and consistent. This approach avoids the need to
#' physically reorder disk-backed `Spectra` objects, which would require loading
#' all data into memory.
#'
#' **Factorize and Subsetting**: The `factorize()` method updates the
#' `chromSpectraIndex` in both `chromData` and the `spectra` object to reflect
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you mean the actual Spectra object you should write Spectra, not spectra. If you mean the slot, you could actually also write @spectra or write that it's the Spectra stored in the internal @spectra slot? Actually, the same/similar thing maybe also for the chromData - and all other internal slots

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok i went through the package and tried to follow this.

#' the current grouping, and recalculates `spectraSortIndex` to maintain the
#' correct sort order. The `[` subsetting operator properly handles subsetting
#' of both `chromData`, `peaksData`, and `spectra`, while updating the
#' `spectraSortIndex` to reference valid positions in the subsetted data.
#'
#' `ChromBackendSpectra` should reuse `ChromBackendMemory` methods whenever
#' possible to keep implementations simple.
#'
Expand All @@ -53,8 +79,10 @@ NULL
#' `"dataOrigin"`.
#'
#' @param factorize.by A `character` vector of variables for grouping `Spectra`
#' data into chromatographic data.
#' Default: `c("msLevel", "dataOrigin")`.
#' data into chromatographic data (i.e., creating separate chromatograms
#' for each unique combination of these variables).
#' Default: `c("msLevel", "dataOrigin")`, which creates one chromatogram
#' per MS level per data file.
#' If `chromData` is provided, it must contain these columns.
#'
#' @param object A `ChromBackendSpectra` object.
Expand Down Expand Up @@ -131,7 +159,8 @@ ChromBackendSpectra <- setClass(
slots = c(
inMemory = "logical",
spectra = "Spectra",
summaryFun = "function"
summaryFun = "function",
spectraSortIndex = "integer"
),
prototype = prototype(
chromData = fillCoreChromVariables(data.frame()),
Expand All @@ -140,7 +169,8 @@ ChromBackendSpectra <- setClass(
spectra = Spectra(),
version = "0.1",
inMemory = FALSE,
summaryFun = sumi
summaryFun = sumi,
spectraSortIndex = integer()
)
)

Expand All @@ -166,12 +196,10 @@ setMethod("backendInitialize", "ChromBackendSpectra",
if (!is(spectra, "Spectra"))
stop("'spectra' must be a 'Spectra' object.")
if (!length(spectra)) return(object)

if (!all(factorize.by %in% spectraVariables(spectra)))
stop("All 'factorize.by' variables must exist in 'spectra'.")
if (!is.data.frame(chromData))
stop("'chromData' must be a 'data.frame'.")

if(!nrow(chromData))
chromData <- fillCoreChromVariables(data.frame())
else validChromData(chromData)
Expand All @@ -181,17 +209,19 @@ setMethod("backendInitialize", "ChromBackendSpectra",
"it needs to be part of the `coreChromVariables()` ",
"available.")
## Spectra object are not expected to be ordered by rtime,
## so we fix that below.
spectra <- lapply(split(spectra, spectra$dataOrigin),
function(x) {
x[order(x$rtime)]
})
spectra <- concatenateSpectra(spectra)
## so we store a sort index instead of concatenating.
## This allows us to keep disk-backed backends intact.
## Only store sort index if data is actually unsorted (optimization).
sort_idx <- order(
spectra$dataOrigin,
spectra$rtime
)
if (!identical(sort_idx, seq_along(spectra))) {
object@spectraSortIndex <- sort_idx
}
object@chromData <- chromData
object@spectra <- spectra

object <- factorize(object, factorize.by = factorize.by)

## map additional spectraVariables if any
if (length(spectraVariables)) {
object <- .map_spectra_vars(object,
Expand All @@ -210,7 +240,7 @@ setMethod("show", "ChromBackendSpectra", function(object) {
})

#' @rdname ChromBackendSpectra
#' @note ensure that it returns a factor
#' @export
chromSpectraIndex <- function(object) {
if (!is(object, "ChromBackendSpectra"))
stop("The object must be a 'ChromBackendSpectra' object.")
Expand All @@ -228,32 +258,52 @@ setMethod("factorize", "ChromBackendSpectra",
spectraVariables(.spectra(object))))
stop("All 'factorize.by' variables must be in the ",
"Spectra object.")
spectra_f <- interaction(as.list(
spectra_f <- interaction(as.list(
spectraData(.spectra(object))[,
factorize.by, drop = FALSE]),
factorize.by, drop = FALSE]),
drop = TRUE, sep = "_")

cd <- .chromData(object)
if (nrow(cd)) {
if (!all(factorize.by %in% chromVariables(object)))
stop("All 'factorize.by' variables must be in chromData.")
cd$chromSpectraIndex <- interaction(cd[, factorize.by,
drop = FALSE],
drop = TRUE, sep = "_")
levels(spectra_f) <- levels(cd$chromSpectraIndex)
object@spectra$chromSpectraIndex <- droplevels(spectra_f)
object@chromData <- .ensure_rt_mz_columns(cd,
.spectra(object),
spectra_f)
} else {
object@spectra$chromSpectraIndex <- spectra_f
full_sp <- do.call(rbindFill,
lapply(split(.spectra(object), spectra_f),
.spectra_format_chromData))
rownames(full_sp) <- NULL
object@chromData <- full_sp
}
object
cd <- .chromData(object)

if (nrow(cd)) {
## chromData exists: validate and align spectra to it
if (!all(factorize.by %in% chromVariables(object)))
stop("All 'factorize.by' variables must be in chromData.")
cd$chromSpectraIndex <- interaction(cd[, factorize.by,
drop = FALSE],
drop = TRUE, sep = "_")
object@spectra$chromSpectraIndex <- factor(as.character(spectra_f),
levels = levels(cd$chromSpectraIndex))
## Apply sort index for processing if needed
if (length(object@spectraSortIndex)) {
sorted_spectra <- .spectra(object)[object@spectraSortIndex]
sorted_spectra_f <- spectra_f[object@spectraSortIndex]
} else {
sorted_spectra <- .spectra(object)
sorted_spectra_f <- spectra_f
}
object@chromData <- .ensure_rt_mz_columns(cd,
sorted_spectra,
sorted_spectra_f)
} else {
## chromData is empty: create it from spectra
object@spectra$chromSpectraIndex <- spectra_f
full_sp <- do.call(rbindFill,
lapply(split(.spectra(object), spectra_f),
.spectra_format_chromData))
rownames(full_sp) <- NULL
object@chromData <- full_sp
}
## Recalculate sort index: only store if data is unsorted (optimization)
sort_idx <- order(
object@spectra$dataOrigin,
object@spectra$rtime
)
if (!identical(sort_idx, seq_along(object@spectra))) {
object@spectraSortIndex <- sort_idx
} else {
object@spectraSortIndex <- integer()
}
object
})

#' @rdname hidden_aliases
Expand All @@ -276,9 +326,15 @@ setMethod(
}
## Ensure chromSpectraIndex only contains relevant levels needed
valid_f <- chromSpectraIndex(object)
current_vals <- as.character(.spectra(object)$chromSpectraIndex)
## Apply the sort index to spectra for processing (only if unsorted)
if (length(object@spectraSortIndex)) {
sorted_spectra <- .spectra(object)[object@spectraSortIndex]
} else {
sorted_spectra <- .spectra(object)
}
current_vals <- as.character(sorted_spectra$chromSpectraIndex)
if (!setequal(unique(current_vals), levels(valid_f))) {
object@spectra$chromSpectraIndex <- factor(
sorted_spectra$chromSpectraIndex <- factor(
current_vals,
levels = levels(valid_f)
)
Expand All @@ -287,8 +343,8 @@ setMethod(
pd <- mapply(.process_peaks_data,
cd = split(chromData(object), valid_f),
s = split(
.spectra(object),
.spectra(object)$chromSpectraIndex
sorted_spectra,
sorted_spectra$chromSpectraIndex
),
MoreArgs = list(
columns = columns,
Expand Down Expand Up @@ -323,11 +379,37 @@ setMethod(

#' @rdname hidden_aliases
#' @importMethodsFrom S4Vectors [ [[
#' @importFrom MsCoreUtils i2index
#' @importFrom stats setNames
#' @export
setMethod("[", "ChromBackendSpectra", function(x, i, j, ...) {
if (!length(i))
return(ChromBackendSpectra())
callNextMethod()

i <- i2index(i, length = length(x))
kept_indices <- chromSpectraIndex(x)[i]
x@chromData <- .chromData(x)[i, , drop = FALSE]
x@peaksData <- .peaksData(x)[i]
spectra_keep <- x@spectra$chromSpectraIndex %in% kept_indices
x@spectra <- x@spectra[spectra_keep]

## Update spectraSortIndex: remap old positions to new positions
if (length(x@spectraSortIndex)) {
old_positions_kept <- which(spectra_keep)
## Create mapping from old position to new position
## e.g., if we kept positions c(2, 5, 7), they become c(1, 2, 3)
position_mapping <- setNames(seq_along(old_positions_kept),
old_positions_kept)
## Keep only sort indices that reference kept positions
kept_sort_positions <- x@spectraSortIndex %in% old_positions_kept
x@spectraSortIndex <- as.integer(
position_mapping[as.character(x@spectraSortIndex[kept_sort_positions])]
)
}

x@chromData$chromSpectraIndex <- droplevels(x@chromData$chromSpectraIndex)
x@spectra$chromSpectraIndex <- droplevels(x@spectra$chromSpectraIndex)
x
})

#' @rdname hidden_aliases
Expand Down
Loading