diff --git a/NAMESPACE b/NAMESPACE index 0efc422a..4990232d 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -13,10 +13,14 @@ S3method(py_to_r,collections.abc.Mapping) S3method(r_to_py,AbstractAnnData) export(AnnData) export(AnnDataView) +export(HDF5AnnData) +export(InMemoryAnnData) +export(ReticulateAnnData) export(as_AnnData) export(generate_dataset) export(get_generator_types) export(read_h5ad) +export(register_anndata_coercions) export(write_h5ad) importFrom(Matrix,as.matrix) importFrom(Matrix,sparseMatrix) diff --git a/NEWS.md b/NEWS.md index c51344af..058fb63e 100644 --- a/NEWS.md +++ b/NEWS.md @@ -5,6 +5,7 @@ * Implemented an `AnnDataView` class, which provides a lazy view of an `AnnData` object without copying data (PR #1096) * Implemented S3 methods for `AbstractAnnData` objects: `dim`, `nrow`, `ncol`, `dimnames`, `rownames`, `colnames`, and `[` (PR #1096) * Add `ReticulateAnnData` class for seamless Python integration via **{reticulate}** (PR #322) +* Added S4 `as()` coercions linking AnnData implementations with `SingleCellExperiment` and `Seurat` objects (PR #358) ## Major changes @@ -29,6 +30,7 @@ - Add checks for type arguments to `generate_dataset()` (PR #354) - Generalise the layers created by `generate_dataset()` when `format = "Seurat"` (PR #354) +- Fix inconsistency in `output_class` argument values across conversion functions (PR #358) ## Bug fixes diff --git a/R/AnnDataView.R b/R/AnnDataView.R index c52e33c3..3ecaa039 100644 --- a/R/AnnDataView.R +++ b/R/AnnDataView.R @@ -349,7 +349,6 @@ AnnDataView <- R6::R6Class( #' @param context_name Name for error messages ("observations" or "variables") #' #' @return Integer vector of indices, or NULL if subset is NULL -#' @keywords internal #' @noRd convert_to_indices <- function( subset, diff --git a/R/HDF5AnnData.R b/R/HDF5AnnData.R index 6ec276d4..2c12977a 100644 --- a/R/HDF5AnnData.R +++ b/R/HDF5AnnData.R @@ -6,6 +6,7 @@ #' See [AnnData-usage] for details on creating and using `AnnData` objects. #' #' @return An `HDF5AnnData` object +#' @export #' #' @seealso [AnnData-usage] for details on creating and using `AnnData` objects #' diff --git a/R/InMemoryAnnData.R b/R/InMemoryAnnData.R index 1e03aea3..3b7f42cc 100644 --- a/R/InMemoryAnnData.R +++ b/R/InMemoryAnnData.R @@ -10,6 +10,7 @@ #' @seealso [AnnData-usage] for details on creating and using `AnnData` objects #' #' @family AnnData classes +#' @export #' #' @examples #' ## complete example diff --git a/R/ReticulateAnnData.R b/R/ReticulateAnnData.R index b712fbd8..fb33e8f6 100644 --- a/R/ReticulateAnnData.R +++ b/R/ReticulateAnnData.R @@ -8,6 +8,7 @@ #' See [AnnData-usage] for details on creating and using `AnnData` objects. #' #' @return A `ReticulateAnnData` object +#' @export #' #' @seealso [AnnData-usage] for details on creating and using `AnnData` objects #' diff --git a/R/anndataR-package.R b/R/anndataR-package.R index 97507bf7..ef987a41 100644 --- a/R/anndataR-package.R +++ b/R/anndataR-package.R @@ -12,6 +12,11 @@ ## usethis namespace: end NULL +.onLoad <- function(libname, pkgname) { + # Register S4 coercion methods + .register_as_coercions() +} + .onAttach <- function(libname, pkgname) { # Check if the R anndata package is loaded and warn about conflicts if ("anndata" %in% loadedNamespaces()) { diff --git a/R/as-coercions.R b/R/as-coercions.R new file mode 100644 index 00000000..ea33d889 --- /dev/null +++ b/R/as-coercions.R @@ -0,0 +1,270 @@ +#' Coercion helpers for `as()` +#' +#' These helper registrations wire up S4-style `as()` conversions so that +#' AnnData implementations (including [`InMemoryAnnData`], [`HDF5AnnData`], and +#' [`ReticulateAnnData`]) as well as [`SingleCellExperiment`] and +#' [`SeuratObject::Seurat`] objects can be coerced +#' between one another without the caller needing to know the underlying helper +#' functions. Because `as()` cannot accept additional arguments, conversions +#' that require them (such as writing HDF5-backed AnnData objects) raise an +#' informative error pointing to the richer interface. +#' +#' @noRd +NULL + +# Class compatibility registrations ----------------------------------------- + +.register_oldclass <- function(class, super = character()) { + if (!methods::isClass(class)) { + methods::setOldClass(c(class, super)) + } +} + +.as_abort_extra_args <- function(from, to, helper) { + cli::cli_abort( + c( + "Can't coerce {.cls {from}} to {.cls {to}} with {.fun as} as extra arguments are required", + "i" = helper + ), + call = rlang::caller_env() + ) +} + +.warn_as_limited <- function(recommendation) { + cli::cli_warn( + c( + "Using {.fun as} to coerce objects limits control over data mapping", + "i" = recommendation + ), + call = rlang::caller_env() + ) +} + +# Handler constructors ------------------------------------------------------- + +.make_convert_handler <- function(converter, warn = NULL, pre = NULL) { + force(warn) + force(pre) + + function(from) { + if (!is.null(pre)) { + pre(from) + } + if (!is.null(warn)) { + .warn_as_limited(warn) + } + converter(from) + } +} + +.make_abort_handler <- function(from_class, to_class, helper) { + force(from_class) + force(to_class) + force(helper) + + function(from) { + .as_abort_extra_args(from_class, to_class, helper) + } +} + +.register_set_as_rules <- function(rules) { + for (rule in rules) { + tryCatch( + methods::setAs(rule$from, rule$to, rule$handler), + error = function(e) { + # Silently skip if environment is locked (e.g., during devtools::document()) + if (!grepl("locked", e$message)) { + stop(e) # Re-throw if it's not a locking error + } + } + ) + } +} + +.format_control_recommendation <- function(call_expr) { + sprintf( + "Prefer {.code %s} for fine-grained control over data mapping", + call_expr + ) +} + +#' Register S4 coercion methods +#' +#' This function registers all S4 coercion methods for converting between +#' `AnnData` objects and other formats. It's called automatically when +#' \pkg{anndataR} is loaded, but can also be called manually if you load +#' \pkg{SingleCellExperiment}, \pkg{Seurat}, or \pkg{SeuratObject} after loading +#' \pkg{anndataR}. +#' +#' @return NULL (invisibly). Called for its side effect of registering S4 methods. +#' @export +#' @examples +#' \dontrun{ +#' # If you load suggested packages after anndataR: +#' library(anndataR) +#' library(SingleCellExperiment) +#' register_anndata_coercions() # Now as() will work +#' } +register_anndata_coercions <- function() { + .register_as_coercions() + invisible(NULL) +} + +.register_as_coercions <- function() { + # Register old-style classes for S4 compatibility + .register_oldclass("AbstractAnnData", "R6") + .register_oldclass("InMemoryAnnData", c("AbstractAnnData", "R6")) + .register_oldclass("HDF5AnnData", c("AbstractAnnData", "R6")) + .register_oldclass("ReticulateAnnData", c("AbstractAnnData", "R6")) + .register_oldclass("AnnDataView", c("AbstractAnnData", "R6")) + + # AnnData <-> AnnData coercion rules -------------------------------------- + warn_ann_inmemory <- .format_control_recommendation( + "adata$as_InMemoryAnnData(...)" + ) + warn_ann_reticulate <- .format_control_recommendation( + "adata$as_ReticulateAnnData(...)" + ) + + anndata_rules <- list( + list( + from = "AbstractAnnData", + to = "InMemoryAnnData", + handler = .make_convert_handler( + converter = as_InMemoryAnnData, + warn = warn_ann_inmemory + ) + ), + list( + from = "AbstractAnnData", + to = "ReticulateAnnData", + handler = .make_convert_handler( + converter = as_ReticulateAnnData, + warn = warn_ann_reticulate + ) + ), + list( + from = "AbstractAnnData", + to = "HDF5AnnData", + handler = .make_abort_handler( + from_class = "AbstractAnnData", + to_class = "HDF5AnnData", + helper = "Use {.code adata$as_HDF5AnnData(file = )} to provide the output file" + ) + ) + ) + + .register_set_as_rules(anndata_rules) + + # SingleCellExperiment coercion rules --------------------------------------- + + # Only register coercion methods if SingleCellExperiment is available + # This prevents NOTEs about undefined classes during package load + if (rlang::is_installed("SingleCellExperiment")) { + warn_customise <- .format_control_recommendation("as_AnnData(...)") + warn_sce <- .format_control_recommendation( + "adata$as_SingleCellExperiment(...)" + ) + + single_cell_rules <- list( + list( + from = "SingleCellExperiment", + to = "InMemoryAnnData", + handler = .make_convert_handler( + converter = function(from) { + as_AnnData(from, output_class = "InMemoryAnnData") + }, + warn = warn_customise + ) + ), + list( + from = "SingleCellExperiment", + to = "ReticulateAnnData", + handler = .make_convert_handler( + converter = function(from) { + as_AnnData(from, output_class = "ReticulateAnnData") + }, + warn = warn_customise + ) + ), + list( + from = "SingleCellExperiment", + to = "HDF5AnnData", + handler = .make_abort_handler( + from_class = "SingleCellExperiment", + to_class = "HDF5AnnData", + helper = paste( + "Use {.code as_AnnData(from, output_class = \"HDF5AnnData\",", + "filename = )} to provide the output file" + ) + ) + ), + list( + from = "AbstractAnnData", + to = "SingleCellExperiment", + handler = .make_convert_handler( + converter = as_SingleCellExperiment, + warn = warn_sce + ) + ) + ) + + .register_set_as_rules(single_cell_rules) + } + + # SingleCellExperiment coercion rules --------------------------------------- + + # Only register coercion methods if SeuratObject is available + # This prevents NOTEs about undefined classes during package load + if (rlang::is_installed("SeuratObject")) { + warn_customise <- .format_control_recommendation("as_AnnData(...)") + warn_seurat <- .format_control_recommendation("adata$as_Seurat(...)") + + seurat_rules <- list( + list( + from = "Seurat", + to = "InMemoryAnnData", + handler = .make_convert_handler( + converter = function(from) { + as_AnnData(from, output_class = "InMemoryAnnData") + }, + warn = warn_customise + ) + ), + list( + from = "Seurat", + to = "ReticulateAnnData", + handler = .make_convert_handler( + converter = function(from) { + as_AnnData(from, output_class = "ReticulateAnnData") + }, + warn = warn_customise + ) + ), + list( + from = "Seurat", + to = "HDF5AnnData", + handler = .make_abort_handler( + from_class = "Seurat", + to_class = "HDF5AnnData", + helper = paste( + "Use {.code as_AnnData(from, output_class = \"HDF5AnnData\",", + "filename = )} to provide the output file" + ) + ) + ), + list( + from = "AbstractAnnData", + to = "Seurat", + handler = .make_convert_handler( + converter = as_Seurat, + warn = warn_seurat + ) + ) + ) + + .register_set_as_rules(seurat_rules) + } + + invisible(NULL) +} diff --git a/R/as_AnnData.R b/R/as_AnnData.R index 0c7c66d0..6588cff3 100644 --- a/R/as_AnnData.R +++ b/R/as_AnnData.R @@ -162,7 +162,7 @@ as_AnnData <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) { UseMethod("as_AnnData", x) @@ -182,7 +182,7 @@ as_AnnData.SingleCellExperiment <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) { from_SingleCellExperiment( @@ -215,7 +215,7 @@ as_AnnData.Seurat <- function( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) { from_Seurat( diff --git a/R/from_Seurat.R b/R/from_Seurat.R index f49f2d9a..bfaeeddb 100644 --- a/R/from_Seurat.R +++ b/R/from_Seurat.R @@ -30,7 +30,7 @@ from_Seurat <- function( obsp_mapping = TRUE, varp_mapping = TRUE, uns_mapping = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) { check_requires("Converting Seurat to AnnData", c("SeuratObject", "Seurat")) diff --git a/R/from_SingleCellExperiment.R b/R/from_SingleCellExperiment.R index 88a5e6a2..b7c0e04e 100644 --- a/R/from_SingleCellExperiment.R +++ b/R/from_SingleCellExperiment.R @@ -28,7 +28,7 @@ from_SingleCellExperiment <- function( obsp_mapping = TRUE, varp_mapping = TRUE, uns_mapping = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) { check_requires( diff --git a/man/as_AnnData.Rd b/man/as_AnnData.Rd index fe935e98..e66cf3b2 100644 --- a/man/as_AnnData.Rd +++ b/man/as_AnnData.Rd @@ -18,7 +18,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) @@ -34,7 +34,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = TRUE, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) @@ -50,7 +50,7 @@ as_AnnData( varp_mapping = TRUE, uns_mapping = TRUE, assay_name = NULL, - output_class = c("InMemory", "HDF5AnnData", "ReticulateAnnData"), + output_class = c("InMemoryAnnData", "HDF5AnnData", "ReticulateAnnData"), ... ) } diff --git a/man/generate_dataset.Rd b/man/generate_dataset.Rd index af050e62..0010834c 100644 --- a/man/generate_dataset.Rd +++ b/man/generate_dataset.Rd @@ -81,8 +81,8 @@ complete object, use \code{format = "AnnData"} followed by \code{adata$as_SingleCellExperiment()} or \code{adata$as_Seurat()}. Use \code{get_generator_types()} to get a list of the available types for each -slot, or for a specific slot by setting \verb{slot = }. If \code{example = TRUE}, only -the example types are returned. +slot, or for a specific slot by setting \code{slot}. If \code{example = TRUE}, only the +example types are returned. } \examples{ # Generate all types as a list diff --git a/man/register_anndata_coercions.Rd b/man/register_anndata_coercions.Rd new file mode 100644 index 00000000..f1b9100b --- /dev/null +++ b/man/register_anndata_coercions.Rd @@ -0,0 +1,26 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/as-coercions.R +\name{register_anndata_coercions} +\alias{register_anndata_coercions} +\title{Register S4 coercion methods} +\usage{ +register_anndata_coercions() +} +\value{ +NULL (invisibly). Called for its side effect of registering S4 methods. +} +\description{ +This function registers all S4 coercion methods for converting between +\code{AnnData} objects and other formats. It's called automatically when +\pkg{anndataR} is loaded, but can also be called manually if you load +\pkg{SingleCellExperiment}, \pkg{Seurat}, or \pkg{SeuratObject} after loading +\pkg{anndataR}. +} +\examples{ +\dontrun{ +# If you load suggested packages after anndataR: +library(anndataR) +library(SingleCellExperiment) +register_anndata_coercions() # Now as() will work +} +} diff --git a/tests/testthat/test-as-coercions.R b/tests/testthat/test-as-coercions.R new file mode 100644 index 00000000..946d0d1e --- /dev/null +++ b/tests/testthat/test-as-coercions.R @@ -0,0 +1,105 @@ +test_that("as() from SingleCellExperiment to InMemoryAnnData warns and coerces", { + skip_if_not_installed("SingleCellExperiment") + + counts <- matrix(as.numeric(1:6), nrow = 3, ncol = 2) + colnames(counts) <- paste0("cell", 1:2) + rownames(counts) <- paste0("gene", 1:3) + + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = counts) + ) + + expect_warning( + ad <- methods::as(sce, "InMemoryAnnData"), + "Prefer `as_AnnData\\(" + ) + expect_true(inherits(ad, "InMemoryAnnData")) + expect_equal(ad$n_obs(), ncol(sce)) + expect_equal(ad$n_vars(), nrow(sce)) + + expect_warning( + sce_roundtrip <- methods::as(ad, "SingleCellExperiment"), + "as_SingleCellExperiment" + ) + expect_s4_class(sce_roundtrip, "SingleCellExperiment") + expect_equal(dim(sce_roundtrip), dim(sce)) +}) + +test_that("as() refuses HDF5AnnData without extra arguments", { + skip_if_not_installed("SingleCellExperiment") + + sce <- SingleCellExperiment::SingleCellExperiment( + assays = list(counts = matrix(1, nrow = 1, ncol = 1)) + ) + + expect_error( + methods::as(sce, "HDF5AnnData"), + "Use `as_AnnData" + ) +}) + +test_that("as() converts AnnDataView to InMemoryAnnData with warning", { + ad <- AnnData(X = matrix(1:4, nrow = 2)) + view <- ad[1, ] + expect_warning( + view_materialised <- methods::as(view, "InMemoryAnnData"), + "adata\\$as_InMemoryAnnData" + ) + expect_true(inherits(view_materialised, "InMemoryAnnData")) + expect_equal(view_materialised$n_obs(), 1L) + expect_equal(view_materialised$n_vars(), ad$n_vars()) + + expect_error( + methods::as(ad, "HDF5AnnData"), + "adata\\$as_HDF5AnnData" + ) +}) + +test_that("as() converts AnnData to ReticulateAnnData when available", { + skip_if_no_anndata_py() + + ad <- AnnData(X = matrix(1:4, nrow = 2)) + + expect_warning( + ad_reticulate <- methods::as(ad, "ReticulateAnnData"), + "adata\\$as_ReticulateAnnData" + ) + expect_true(inherits(ad_reticulate, "ReticulateAnnData")) + + expect_warning( + ad_roundtrip <- methods::as(ad_reticulate, "InMemoryAnnData"), + "adata\\$as_InMemoryAnnData" + ) + expect_true(inherits(ad_roundtrip, "InMemoryAnnData")) + expect_equal(ad_roundtrip$n_obs(), ad$n_obs()) + expect_equal(ad_roundtrip$n_vars(), ad$n_vars()) +}) + +test_that("as() between Seurat and AnnData warns appropriately", { + skip_if_not_installed("Seurat") + skip_if_not_installed("SeuratObject") + skip_if_not_installed("Matrix") + + counts <- Matrix::Matrix(matrix(1:4, nrow = 2), sparse = TRUE) + colnames(counts) <- paste0("cell", 1:2) + rownames(counts) <- paste0("gene", 1:2) + + seurat_obj <- SeuratObject::CreateSeuratObject(counts = counts) + + expect_warning( + ad <- methods::as(seurat_obj, "InMemoryAnnData"), + "Prefer `as_AnnData\\(" + ) + expect_true(inherits(ad, "InMemoryAnnData")) + + expect_warning( + seurat_roundtrip <- methods::as(ad, "Seurat"), + "as_Seurat" + ) + expect_true(inherits(seurat_roundtrip, "Seurat")) + + expect_error( + methods::as(seurat_obj, "HDF5AnnData"), + "Use `as_AnnData" + ) +})