|
| 1 | +#' Make a [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] object `spatialLIBD`-compatible |
| 2 | +#' |
| 3 | +#' Adds gene info from a reference GTF, SpaceRanger analysis results, and other |
| 4 | +#' information to make a [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] |
| 5 | +#' object compatible with `spatialLIBD::run_app()`. |
| 6 | +#' |
| 7 | +#' @inheritParams read10xVisiumWrapper |
| 8 | +#' @param spe A [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] |
| 9 | +#' object, such as one created by initially running `VisiumIO::TENxVisium()` or |
| 10 | +#' `VisiumIO::TENxVisiumHD()`. |
| 11 | +#' |
| 12 | +spe_add_info <- function( |
| 13 | + spe, |
| 14 | + samples, |
| 15 | + sample_id = NULL, |
| 16 | + reference_gtf = NULL, |
| 17 | + chrM = "chrM", |
| 18 | + gtf_cols = c( |
| 19 | + "source", "type", "gene_id", "gene_version", "gene_name", |
| 20 | + "gene_type" |
| 21 | + ), |
| 22 | + add_analysis = TRUE, |
| 23 | + verbose = TRUE |
| 24 | + ) { |
| 25 | + if (!all(c("gene_name", "gene_id") %in% gtf_cols)) { |
| 26 | + stop("'gtf_cols' must include 'gene_name' and 'gene_id'", call. = FALSE) |
| 27 | + } |
| 28 | + |
| 29 | + # Attempt to guess the GTF path using the Spaceranger web_summary.html |
| 30 | + if (missing(reference_gtf)) { |
| 31 | + summary_file <- file.path(samples[1], "web_summary.html") |
| 32 | + web <- readLines(summary_file) |
| 33 | + |
| 34 | + # For spaceranger versions before 3.0 |
| 35 | + reference_path <- gsub( |
| 36 | + '.*"', |
| 37 | + "", |
| 38 | + regmatches( |
| 39 | + web, regexpr('\\["Reference Path", *"[/|A-z|0-9|-]+', web) |
| 40 | + ) |
| 41 | + ) |
| 42 | + |
| 43 | + # For recent spaceranger versions (3.0.0+?) |
| 44 | + if (length(reference_path) == 0) { |
| 45 | + reference_path <- sub( |
| 46 | + ".*--transcriptome=(\\S*).*", |
| 47 | + "\\1", |
| 48 | + web[grep("--transcriptome=", web)] |
| 49 | + ) |
| 50 | + } |
| 51 | + reference_gtf <- list.files( |
| 52 | + file.path(reference_path, "genes"), "^genes\\.gtf(\\.gz)?$", |
| 53 | + full.names = TRUE |
| 54 | + ) |
| 55 | + } |
| 56 | + reference_gtf <- reference_gtf[file.exists(reference_gtf)] |
| 57 | + if (length(reference_gtf) > 1) { |
| 58 | + stop( |
| 59 | + "More than one 'reference_gtf' was provided or detected. Manually specify the path to just one 'reference_gtf'. If different GTF files were used, then different genes will have been quantified and thus cannot be merged naively into a single SpatialExperiment object. If that's the case, we recommend you build separate SPE objects based on the different 'reference_gtf' files used.", |
| 60 | + call. = FALSE |
| 61 | + ) |
| 62 | + } else if (length(reference_gtf) == 0) { |
| 63 | + stop( |
| 64 | + "No 'reference_gtf' files were detected. Please check that the files are available.", |
| 65 | + call. = FALSE |
| 66 | + ) |
| 67 | + } |
| 68 | + |
| 69 | + ## Read in the gene information from the annotation GTF file |
| 70 | + if (verbose) message(Sys.time(), " rtracklayer::import: reading the reference GTF file") |
| 71 | + gtf <- rtracklayer::import(reference_gtf) |
| 72 | + gtf <- gtf[gtf$type == "gene"] |
| 73 | + names(gtf) <- gtf$gene_id |
| 74 | + |
| 75 | + ## Match the genes |
| 76 | + if (verbose) message(Sys.time(), " adding gene information to the SPE object") |
| 77 | + match_genes <- match(rownames(spe), gtf$gene_id) |
| 78 | + |
| 79 | + if (all(is.na(match_genes))) { |
| 80 | + ## Protect against scenario where one set has GENCODE IDs and the other one has ENSEMBL IDs. |
| 81 | + warning("Gene IDs did not match. This typically happens when you are not using the same GTF file as the one that was used by SpaceRanger. For example, one file uses GENCODE IDs and the other one ENSEMBL IDs. read10xVisiumWrapper() will try to convert them to ENSEMBL IDs.", call. = FALSE) |
| 82 | + match_genes <- match( |
| 83 | + gsub("\\..*", "", rownames(spe)), gsub("\\..*", "", gtf$gene_id) |
| 84 | + ) |
| 85 | + } |
| 86 | + |
| 87 | + if (any(is.na(match_genes))) { |
| 88 | + warning( |
| 89 | + "Dropping ", sum(is.na(match_genes)), " out of ", |
| 90 | + length(match_genes), |
| 91 | + " genes for which we don't have information on the reference GTF file. This typically happens when you are not using the same GTF file as the one that was used by SpaceRanger.", |
| 92 | + call. = FALSE |
| 93 | + ) |
| 94 | + ## Drop the few genes for which we don't have information |
| 95 | + spe <- spe[!is.na(match_genes), ] |
| 96 | + match_genes <- match_genes[!is.na(match_genes)] |
| 97 | + } |
| 98 | + |
| 99 | + ## Keep only some columns from the gtf |
| 100 | + mcols(gtf) <- mcols(gtf)[, gtf_cols[gtf_cols %in% colnames(mcols(gtf))]] |
| 101 | + |
| 102 | + ## Add the gene info to our SPE object |
| 103 | + rowRanges(spe) <- gtf[match_genes] |
| 104 | +} |
0 commit comments