Skip to content

Commit 10f65e3

Browse files
committed
scvi scanvi initial commit
1 parent 11aa131 commit 10f65e3

4 files changed

Lines changed: 137 additions & 0 deletions

File tree

pipeline_versions.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ PeakCalling 1.0.1 2025-08-11
1616
Pipeline Name Version Date of Last Commit
1717
RNAWithUMIsPipeline 1.0.20 2026-01-21
1818
ReblockGVCF 2.4.4 2026-01-29
19+
ScviScanvi 1.0.0 2026-02-06
1920
SlideSeq 3.6.4 2026-01-22
2021
SlideTags 1.0.7 2026-01-26
2122
UltimaGenomicsJointGenotyping 1.2.3 2025-08-11
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# 1.0.0
2+
2026-02-06 (Date of Last Commit)
3+
4+
* Initial release of the ScviScanvi pipeline for cell type label transfer on Multiome data
5+
* Integrated SCVI and SCANVI models using the scvi-scanvi docker image (1.0.0-1.2-1756234975)
6+
* Added GPU support for accelerated model training
7+
* Outputs SCANVI predictions, annotated ATAC, and annotated GEX h5ad files
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
version 1.0
2+
3+
import "../../../tasks/wdl/Utilities.wdl" as utils
4+
5+
workflow ScviScanvi {
6+
7+
meta {
8+
description: "Pipeline for cell type label transfer on Multiome data using SCVI and SCANVI models. Integrates single-cell RNA (GEX) and ATAC data with an annotated reference to transfer cell type labels via semi-supervised deep generative models."
9+
allowNestedInputs: true
10+
}
11+
12+
input {
13+
# Required h5ad inputs
14+
File gex_h5ad
15+
File atac_h5ad
16+
File ref_h5ad
17+
18+
# Runtime attributes
19+
String cloud_provider
20+
Int disk_size = 500
21+
Int mem_size = 64
22+
Int nthreads = 8
23+
24+
# GPU configuration
25+
String gpu_type = "nvidia-tesla-t4"
26+
Int gpu_count = 1
27+
}
28+
29+
String pipeline_version = "1.0.0"
30+
31+
# Determine docker prefix based on cloud provider
32+
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
33+
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
34+
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix
35+
36+
# Docker image
37+
String scvi_scanvi_docker = "scvi-scanvi:1.0.0-1.2-1756234975"
38+
39+
# Make sure either 'gcp' or 'azure' is supplied as cloud_provider input. If not, raise an error
40+
if ((cloud_provider != "gcp") && (cloud_provider != "azure")) {
41+
call utils.ErrorWithMessage as ErrorMessageIncorrectInput {
42+
input:
43+
message = "cloud_provider must be supplied with either 'gcp' or 'azure'."
44+
}
45+
}
46+
47+
call MultiomeLabelTransfer {
48+
input:
49+
gex_h5ad = gex_h5ad,
50+
atac_h5ad = atac_h5ad,
51+
ref_h5ad = ref_h5ad,
52+
docker_path = docker_prefix + scvi_scanvi_docker,
53+
disk_size = disk_size,
54+
mem_size = mem_size,
55+
nthreads = nthreads,
56+
gpu_type = gpu_type,
57+
gpu_count = gpu_count
58+
}
59+
60+
output {
61+
File scanvi_predictions_h5ad = MultiomeLabelTransfer.scanvi_predictions_h5ad
62+
File atac_annotated_h5ad = MultiomeLabelTransfer.atac_annotated_h5ad
63+
File gex_annotated_h5ad = MultiomeLabelTransfer.gex_annotated_h5ad
64+
String pipeline_version_out = pipeline_version
65+
}
66+
}
67+
68+
task MultiomeLabelTransfer {
69+
input {
70+
File gex_h5ad
71+
File atac_h5ad
72+
File ref_h5ad
73+
74+
# Runtime attributes
75+
String docker_path
76+
Int disk_size = 500
77+
Int mem_size = 64
78+
Int nthreads = 8
79+
String gpu_type = "nvidia-tesla-t4"
80+
Int gpu_count = 1
81+
}
82+
83+
parameter_meta {
84+
gex_h5ad: "Gene expression AnnData h5ad file from Multiome/Optimus pipeline output."
85+
atac_h5ad: "ATAC cell-by-bin AnnData h5ad file from Multiome/PeakCalling pipeline output."
86+
ref_h5ad: "Annotated reference AnnData h5ad file with cell type labels in obs['final_annotation']."
87+
docker_path: "Docker image path containing the scvi-scanvi runtime environment."
88+
disk_size: "Disk size in GB."
89+
mem_size: "Memory size in GB."
90+
gpu_type: "GPU type for accelerated model training."
91+
gpu_count: "Number of GPUs to use."
92+
}
93+
94+
command <<<
95+
set -euo pipefail
96+
97+
python3 /usr/local/multiome_label_transfer.py \
98+
--gex-file ~{gex_h5ad} \
99+
--atac-file ~{atac_h5ad} \
100+
--ref-file ~{ref_h5ad}
101+
>>>
102+
103+
runtime {
104+
docker: docker_path
105+
disks: "local-disk ${disk_size} SSD"
106+
memory: "${mem_size} GiB"
107+
cpu: nthreads
108+
gpuType: gpu_type
109+
gpuCount: gpu_count
110+
zones: ["us-central1-c"]
111+
}
112+
113+
output {
114+
File scanvi_predictions_h5ad = "SCANVI_predictions.h5ad"
115+
File atac_annotated_h5ad = "atac_annotated_matrix.h5ad"
116+
File gex_annotated_h5ad = "gex_annotated_matrix.h5ad"
117+
}
118+
}
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
{
2+
"ScviScanvi.cloud_provider": "gcp",
3+
"ScviScanvi.gex_h5ad": "gs://broad-gotc-test-storage/Multiome/truth/scientific/10k_PBMC/10k_PBMC_gex.h5ad",
4+
"ScviScanvi.atac_h5ad": "gs://broad-gotc-test-storage/PeakCalling/input/scientific/10k_PBMC/10k_PBMC_atac.metrics.h5ad",
5+
"ScviScanvi.ref_h5ad": "gs://broad-gotc-test-storage/ScviScanvi/input/pbmc_reference.h5ad",
6+
"ScviScanvi.disk_size": 500,
7+
"ScviScanvi.mem_size": 64,
8+
"ScviScanvi.nthreads": 8,
9+
"ScviScanvi.gpu_type": "nvidia-tesla-t4",
10+
"ScviScanvi.gpu_count": 1
11+
}

0 commit comments

Comments
 (0)