Skip to content

Commit 791ad93

Browse files
authored
Merge pull request #16 from openpipelines-bio/add-visium-support
add visium support
2 parents 115ef4b + f7bdc88 commit 791ad93

11 files changed

Lines changed: 176 additions & 5 deletions

File tree

CHANGELOG.MD

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
## MINOR CHANGES
44

5+
* Added support for Visium data ingestion and QC reporting (PR #16).
6+
7+
# openpipeline_qc 0.2.0
8+
9+
## MINOR CHANGES
10+
511
* `src/workflows/generate_qc_report`: Add min and max constraints to `--max_samples_per_report`, to limit the number of samples per report between 1 and 20 (PR #11).
612

713
* Update `openpipeline` dependency to v4.0.0 and `siqc` to version v0.2.0 (PR #15).

resources_test_scripts/qc_sample_data.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,14 @@ viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
147147
--output "$OUT_DIR_SPATIAL"/xenium_dataset.json \
148148
--output_reporting_json "$OUT_DIR_SPATIAL"/xenium_report_structure.json
149149

150+
viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
151+
--input "$OUT_DIR_SPATIAL"/visium_tiny.qc.h5mu \
152+
--input "$OUT_DIR_SPATIAL"/visium_tiny.qc.h5mu \
153+
--ingestion_method visium \
154+
--min_num_nonzero_vars 1 \
155+
--output "$OUT_DIR_SPATIAL"/visium_dataset.json \
156+
--output_reporting_json "$OUT_DIR_SPATIAL"/visium_report_structure.json
157+
150158
# remove all state yaml files
151159
rm "$OUT_DIR"/*.yaml
152160
rm "$OUT_DIR_SPATIAL"/*.yaml

resources_test_scripts/spatial_qc_sample_data.sh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ param_list:
1111
input: s3://openpipelines-bio/openpipeline_spatial/resources_test/xenium/xenium_tiny.h5mu
1212
- id: Lung5_Rep2_tiny
1313
input: s3://openpipelines-bio/openpipeline_spatial/resources_test/cosmx/Lung5_Rep2_tiny.h5mu
14+
- id: visium_tiny
15+
input: s3://openpipelines-bio/openpipeline_spatial/resources_test/visium/Visium_FFPE_Human_Ovarian_Cancer_tiny.h5mu
1416
var_name_mitochondrial_genes: mitochondrial
1517
var_name_ribosomal_genes: ribosomal
1618
output: '\$id.qc.h5mu'

src/ingestion_qc/generate_html/config.vsh.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ engines:
5252
- git
5353
- type: docker
5454
run: |
55-
cd /opt && git clone -b v0.2.0 https://github.com/openpipelines-bio/siqc.git \
55+
cd /opt && git clone -b v0.2.1 https://github.com/openpipelines-bio/siqc.git \
5656
&& cd siqc && npm install \
5757
&& true
5858

src/ingestion_qc/generate_html/script.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ ABSOLUTE_OUTPUT=$(realpath $par_output_qc_report)
55
cd /opt/siqc
66
mkdir src/data
77

8-
npm run cli render -- --data "$ABSOLUTE_INPUT_DATA" --structure "$ABSOLUTE_INPUT_STRUCTURE" --output "$ABSOLUTE_OUTPUT"
8+
npm run cli render -- --data "$ABSOLUTE_INPUT_DATA" --structure "$ABSOLUTE_INPUT_STRUCTURE" --output "$ABSOLUTE_OUTPUT"

src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ argument_groups:
3434
choices:
3535
- cellranger_multi
3636
- xenium
37+
- visium
3738
description: Method that was used to ingest the data - this will define the structure of the report that is generated.
3839
- name: --obs_sample_id
3940
type: string
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
{
2+
"categories": [
3+
{
4+
"name": "Sample QC",
5+
"key": "sample_summary_stats",
6+
"additionalAxes": false,
7+
"defaultFilters": []
8+
},
9+
{
10+
"name": "Cell RNA QC",
11+
"key": "cell_rna_stats",
12+
"additionalAxes": true,
13+
"defaultFilters": [
14+
{
15+
"type": "histogram",
16+
"visualizationType": "histogram",
17+
"field": "total_counts",
18+
"label": "Total UMI per cell",
19+
"description": "Total number of RNA molecules detected per cell. Low values typically indicate empty droplets or low-quality cells that should be filtered out.",
20+
"cutoffMin": null,
21+
"cutoffMax": null,
22+
"zoomMax": null,
23+
"nBins": 50,
24+
"groupBy": "sample_id",
25+
"yAxisType": "linear"
26+
},
27+
{
28+
"type": "histogram",
29+
"visualizationType": "histogram",
30+
"field": "num_nonzero_vars",
31+
"label": "Number of non-zero genes per cell",
32+
"description": "Count of unique genes detected in each cell. Low gene counts often indicate poor-quality cells.",
33+
"cutoffMin": null,
34+
"cutoffMax": null,
35+
"zoomMax": null,
36+
"nBins": 50,
37+
"groupBy": "sample_id",
38+
"yAxisType": "linear"
39+
},
40+
{
41+
"type": "histogram",
42+
"visualizationType": "histogram",
43+
"field": "fraction_mitochondrial",
44+
"label": "Fraction UMI of mitochondrial genes per cell",
45+
"description": "Proportion of cell's RNA from mitochondrial genes.",
46+
"cutoffMin": null,
47+
"cutoffMax": null,
48+
"nBins": 50,
49+
"groupBy": "sample_id",
50+
"yAxisType": "linear"
51+
},
52+
{
53+
"type": "histogram",
54+
"visualizationType": "histogram",
55+
"field": "fraction_ribosomal",
56+
"label": "Fraction UMI of ribosomal genes per cell",
57+
"description": "Proportion of cell's RNA from ribosomal protein genes. Extreme values may indicate stress responses or cell cycle abnormalities.",
58+
"cutoffMin": null,
59+
"cutoffMax": null,
60+
"nBins": 50,
61+
"groupBy": "sample_id",
62+
"yAxisType": "linear"
63+
}
64+
]
65+
}
66+
]
67+
}

src/ingestion_qc/h5mu_to_qc_json/script.py

Lines changed: 31 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -243,6 +243,31 @@ def generate_xenium_stats(mod_obs, sample_id, required_keys):
243243
**{key: mod_obs[key] for key in metadata_obs_keys}
244244
}
245245
)
246+
247+
return cell_rna_stats
248+
249+
def generate_visium_stats(mod_obs, sample_id, required_keys):
250+
251+
# Format required columns
252+
mod_obs = format_required_columns(required_keys, mod_obs)
253+
254+
# Format visium-specific columns
255+
visium_formatted_columns = ["x_coord", "y_coord"]
256+
for key in visium_formatted_columns:
257+
mod_obs[key] = mod_obs[key].astype("float16")
258+
259+
# Fetch and format all categorical columns for grouping
260+
metadata_obs_keys, mod_obs = format_categorical_columns(mod_obs)
261+
262+
# Create cell RNA stats dataframe
263+
cell_rna_stats = pd.DataFrame(
264+
{
265+
"sample_id": pd.Categorical(sample_id),
266+
**{key: mod_obs[key] for key in required_keys},
267+
**{key: mod_obs[key] for key in visium_formatted_columns},
268+
**{key: mod_obs[key] for key in metadata_obs_keys}
269+
}
270+
)
246271

247272
return cell_rna_stats
248273

@@ -286,7 +311,7 @@ def main(par):
286311
barcodes_original_count = mod_obs.shape[0]
287312

288313
# Add coordinates to obs before filtering
289-
if par["ingestion_method"] == "xenium":
314+
if par["ingestion_method"] == "xenium" or par["ingestion_method"] == "visium":
290315
mod_obs["x_coord"] = mod_obsm["spatial"][:, 0]
291316
mod_obs["y_coord"] = mod_obsm["spatial"][:, 1]
292317

@@ -340,6 +365,9 @@ def main(par):
340365

341366
if par["ingestion_method"] == "xenium":
342367
cell_rna_stats = generate_xenium_stats(mod_obs, sample_id, required_keys)
368+
369+
if par["ingestion_method"] == "visium":
370+
cell_rna_stats = generate_visium_stats(mod_obs, sample_id, required_keys)
343371

344372
cell_stats_dfs.append(cell_rna_stats)
345373
sample_stats_dfs.append(sample_summary_stats)
@@ -374,7 +402,8 @@ def main(par):
374402

375403
report_structures = {
376404
"cellranger_multi": os.path.join(meta["resources_dir"], "report_structure/cellranger.json"),
377-
"xenium": os.path.join(meta["resources_dir"], "report_structure/xenium.json")
405+
"xenium": os.path.join(meta["resources_dir"], "report_structure/xenium.json"),
406+
"visium": os.path.join(meta["resources_dir"], "report_structure/visium.json")
378407
}
379408

380409
logger.info(f"Writing output report structure json to {par['output_reporting_json']}")

src/workflows/generate_qc_report/config.vsh.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ argument_groups:
3535
choices:
3636
- cellranger_multi
3737
- xenium
38+
- visium
3839
- name: --sample_metadata
3940
type: file
4041
required: false
@@ -131,6 +132,12 @@ test_resources:
131132
- type: nextflow_script
132133
path: test.nf
133134
entrypoint: test_with_cellbender
135+
- type: nextflow_script
136+
path: test.nf
137+
entrypoint: test_xenium
138+
- type: nextflow_script
139+
path: test.nf
140+
entrypoint: test_visium
134141

135142
dependencies:
136143
- name: metadata/add_id

src/workflows/generate_qc_report/integration_test.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,10 @@ nextflow run . \
3535
-entry test_multiple_reports \
3636
-c src/configs/labels_ci.config \
3737
-resume
38+
39+
nextflow run . \
40+
-main-script src/workflows/generate_qc_report/test.nf \
41+
-profile docker,no_publish,local \
42+
-entry test_visium \
43+
-c src/configs/labels_ci.config \
44+
-resume

0 commit comments

Comments
 (0)