Skip to content

Commit b88a39e

Browse files
committed
Begin a function providing the functionality from read10xVisiumWrapper without importing the SPE
1 parent b7bae6f commit b88a39e

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

R/spe_add_info.R

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#' Make a [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class] object `spatialLIBD`-compatible
2+
#'
3+
#' Adds gene info from a reference GTF, SpaceRanger analysis results, and other
4+
#' information to make a [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class]
5+
#' object compatible with `spatialLIBD::run_app()`.
6+
#'
7+
#' @inheritParams read10xVisiumWrapper
8+
#' @param spe A [SpatialExperiment-class][SpatialExperiment::SpatialExperiment-class]
9+
#' object, such as one created by initially running `VisiumIO::TENxVisium()` or
10+
#' `VisiumIO::TENxVisiumHD()`.
11+
#'
12+
spe_add_info <- function(
13+
spe,
14+
samples,
15+
sample_id = NULL,
16+
reference_gtf = NULL,
17+
chrM = "chrM",
18+
gtf_cols = c(
19+
"source", "type", "gene_id", "gene_version", "gene_name",
20+
"gene_type"
21+
),
22+
add_analysis = TRUE,
23+
verbose = TRUE
24+
) {
25+
if (!all(c("gene_name", "gene_id") %in% gtf_cols)) {
26+
stop("'gtf_cols' must include 'gene_name' and 'gene_id'", call. = FALSE)
27+
}
28+
29+
# Attempt to guess the GTF path using the Spaceranger web_summary.html
30+
if (missing(reference_gtf)) {
31+
summary_file <- file.path(samples[1], "web_summary.html")
32+
web <- readLines(summary_file)
33+
34+
# For spaceranger versions before 3.0
35+
reference_path <- gsub(
36+
'.*"',
37+
"",
38+
regmatches(
39+
web, regexpr('\\["Reference Path", *"[/|A-z|0-9|-]+', web)
40+
)
41+
)
42+
43+
# For recent spaceranger versions (3.0.0+?)
44+
if (length(reference_path) == 0) {
45+
reference_path <- sub(
46+
".*--transcriptome=(\\S*).*",
47+
"\\1",
48+
web[grep("--transcriptome=", web)]
49+
)
50+
}
51+
reference_gtf <- list.files(
52+
file.path(reference_path, "genes"), "^genes\\.gtf(\\.gz)?$",
53+
full.names = TRUE
54+
)
55+
}
56+
reference_gtf <- reference_gtf[file.exists(reference_gtf)]
57+
if (length(reference_gtf) > 1) {
58+
stop(
59+
"More than one 'reference_gtf' was provided or detected. Manually specify the path to just one 'reference_gtf'. If different GTF files were used, then different genes will have been quantified and thus cannot be merged naively into a single SpatialExperiment object. If that's the case, we recommend you build separate SPE objects based on the different 'reference_gtf' files used.",
60+
call. = FALSE
61+
)
62+
} else if (length(reference_gtf) == 0) {
63+
stop(
64+
"No 'reference_gtf' files were detected. Please check that the files are available.",
65+
call. = FALSE
66+
)
67+
}
68+
69+
## Read in the gene information from the annotation GTF file
70+
if (verbose) message(Sys.time(), " rtracklayer::import: reading the reference GTF file")
71+
gtf <- rtracklayer::import(reference_gtf)
72+
gtf <- gtf[gtf$type == "gene"]
73+
names(gtf) <- gtf$gene_id
74+
75+
## Match the genes
76+
if (verbose) message(Sys.time(), " adding gene information to the SPE object")
77+
match_genes <- match(rownames(spe), gtf$gene_id)
78+
79+
if (all(is.na(match_genes))) {
80+
## Protect against scenario where one set has GENCODE IDs and the other one has ENSEMBL IDs.
81+
warning("Gene IDs did not match. This typically happens when you are not using the same GTF file as the one that was used by SpaceRanger. For example, one file uses GENCODE IDs and the other one ENSEMBL IDs. read10xVisiumWrapper() will try to convert them to ENSEMBL IDs.", call. = FALSE)
82+
match_genes <- match(
83+
gsub("\\..*", "", rownames(spe)), gsub("\\..*", "", gtf$gene_id)
84+
)
85+
}
86+
87+
if (any(is.na(match_genes))) {
88+
warning(
89+
"Dropping ", sum(is.na(match_genes)), " out of ",
90+
length(match_genes),
91+
" genes for which we don't have information on the reference GTF file. This typically happens when you are not using the same GTF file as the one that was used by SpaceRanger.",
92+
call. = FALSE
93+
)
94+
## Drop the few genes for which we don't have information
95+
spe <- spe[!is.na(match_genes), ]
96+
match_genes <- match_genes[!is.na(match_genes)]
97+
}
98+
99+
## Keep only some columns from the gtf
100+
mcols(gtf) <- mcols(gtf)[, gtf_cols[gtf_cols %in% colnames(mcols(gtf))]]
101+
102+
## Add the gene info to our SPE object
103+
rowRanges(spe) <- gtf[match_genes]
104+
}

0 commit comments

Comments
 (0)