Skip to content

Commit 088ba74

Browse files
authored
Merge pull request #12 from openpipelines-bio/add-cosmx-ingestion
add cosmx ingestion
2 parents 791ad93 + bc7e4a3 commit 088ba74

11 files changed

Lines changed: 490 additions & 6 deletions

File tree

CHANGELOG.MD

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# openpipeline_qc 0.2.0
22

3+
## NEW FEATURES
4+
5+
* Add support for CosMx data in the QC Reporting workflow (PR #12).
6+
37
## MINOR CHANGES
48

59
* Added support for Visium data ingestion and QC reporting (PR #16).

inspect_visium.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
2+
import mudata as mu
3+
import pandas as pd
4+
5+
def inspect_h5mu(filepath, name):
6+
print(f"--- Inspecting {name} ({filepath}) ---")
7+
try:
8+
mdata = mu.read_h5mu(filepath)
9+
rna = mdata.mod['rna']
10+
print(f"Obs columns ({len(rna.obs.columns)}):")
11+
print(rna.obs.columns.tolist())
12+
13+
print(f"\nObs head:")
14+
print(rna.obs.head())
15+
16+
print(f"\nObsm keys: {list(rna.obsm.keys())}")
17+
18+
except Exception as e:
19+
print(f"Error reading {filepath}: {e}")
20+
print("\n")
21+
22+
inspect_h5mu("resources_test/spatial_qc_sample_data/visium_tiny.qc.h5mu", "Visium")

resources_test_scripts/qc_sample_data.sh

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,14 @@ viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
147147
--output "$OUT_DIR_SPATIAL"/xenium_dataset.json \
148148
--output_reporting_json "$OUT_DIR_SPATIAL"/xenium_report_structure.json
149149

150+
viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
151+
--input "$OUT_DIR_SPATIAL"/Lung5_Rep2_tiny.qc.h5mu \
152+
--input "$OUT_DIR_SPATIAL"/Lung5_Rep2_tiny.qc.h5mu \
153+
--ingestion_method cosmx \
154+
--min_num_nonzero_vars 1 \
155+
--output "$OUT_DIR_SPATIAL"/cosmx_dataset.json \
156+
--output_reporting_json "$OUT_DIR_SPATIAL"/cosmx_report_structure.json
157+
150158
viash run src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml --engine docker -- \
151159
--input "$OUT_DIR_SPATIAL"/visium_tiny.qc.h5mu \
152160
--input "$OUT_DIR_SPATIAL"/visium_tiny.qc.h5mu \

src/ingestion_qc/h5mu_to_qc_json/config.vsh.yaml

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ argument_groups:
3434
choices:
3535
- cellranger_multi
3636
- xenium
37+
- cosmx
3738
- visium
3839
description: Method that was used to ingest the data - this will define the structure of the report that is generated.
3940
- name: --obs_sample_id
@@ -133,7 +134,41 @@ argument_groups:
133134
description: The key in the h5mu .obs field that contains the number of control codewords.
134135
default: control_codeword_counts
135136

136-
# - name: Options for CosMx reports
137+
- name: Options for CosMx reports
138+
arguments:
139+
- name: --obs_area
140+
type: string
141+
description: The key in the h5mu .obs field that contains the cell area.
142+
default: Area
143+
- name: --obs_aspect_ratio
144+
type: string
145+
description: The key in the h5mu .obs field that contains the aspect ratio.
146+
default: AspectRatio
147+
- name: --obs_dapi_stain
148+
type: string
149+
multiple: true
150+
description: The keys in the h5mu .obs field that contains the DAPI stain intensity.
151+
default: [Mean.DAPI, Max.DAPI]
152+
- name: --obs_membrane_stain
153+
type: string
154+
multiple: true
155+
description: The keys in the h5mu .obs field that contains the membrane stain intensity.
156+
default: [Mean.MembraneStain, Max.MembraneStain]
157+
- name: --obs_panck_stain
158+
type: string
159+
multiple: true
160+
description: The keys in the h5mu .obs field that contains the PanCK stain intensity.
161+
default: [Mean.PanCK, Max.PanCK]
162+
- name: --obs_cd45_stain
163+
type: string
164+
multiple: true
165+
description: The keys in the h5mu .obs field that contains the CD45 stain intensity.
166+
default: [Mean.CD45, Max.CD45]
167+
- name: --obs_cd3_stain
168+
type: string
169+
multiple: true
170+
description: The keys in the h5mu .obs field that contains the CD3 stain intensity.
171+
default: [Mean.CD3, Max.CD3]
137172

138173
resources:
139174
- type: python_script
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
{
2+
"categories": [
3+
{
4+
"name": "Sample QC",
5+
"key": "sample_summary_stats",
6+
"additionalAxes": false,
7+
"defaultFilters": []
8+
},
9+
{
10+
"name": "Cell RNA QC",
11+
"key": "cell_rna_stats",
12+
"additionalAxes": true,
13+
"defaultFilters": [
14+
{
15+
"type": "histogram",
16+
"visualizationType": "histogram",
17+
"field": "total_counts",
18+
"label": "Total UMI per cell",
19+
"description": "Total number of RNA molecules detected per cell. Low values typically indicate empty droplets or low-quality cells that should be filtered out.",
20+
"cutoffMin": null,
21+
"cutoffMax": null,
22+
"zoomMax": null,
23+
"nBins": 50,
24+
"groupBy": "sample_id",
25+
"yAxisType": "linear"
26+
},
27+
{
28+
"type": "histogram",
29+
"visualizationType": "histogram",
30+
"field": "num_nonzero_vars",
31+
"label": "Number of non-zero genes per cell",
32+
"description": "Count of unique genes detected in each cell. Low gene counts often indicate poor-quality cells.",
33+
"cutoffMin": null,
34+
"cutoffMax": null,
35+
"zoomMax": null,
36+
"nBins": 50,
37+
"groupBy": "sample_id",
38+
"yAxisType": "linear"
39+
},
40+
{
41+
"type": "histogram",
42+
"visualizationType": "histogram",
43+
"field": "fraction_mitochondrial",
44+
"label": "Fraction UMI of mitochondrial genes per cell",
45+
"description": "Proportion of cell's RNA from mitochondrial genes.",
46+
"cutoffMin": null,
47+
"cutoffMax": null,
48+
"nBins": 50,
49+
"groupBy": "sample_id",
50+
"yAxisType": "linear"
51+
},
52+
{
53+
"type": "histogram",
54+
"visualizationType": "histogram",
55+
"field": "fraction_ribosomal",
56+
"label": "Fraction UMI of ribosomal genes per cell",
57+
"description": "Proportion of cell's RNA from ribosomal protein genes. Extreme values may indicate stress responses or cell cycle abnormalities.",
58+
"cutoffMin": null,
59+
"cutoffMax": null,
60+
"nBins": 50,
61+
"groupBy": "sample_id",
62+
"yAxisType": "linear"
63+
},
64+
{
65+
"type": "histogram",
66+
"visualizationType": "histogram",
67+
"field": "Area",
68+
"label": "Cell Area",
69+
"description": "Area of the segmented cells.",
70+
"cutoffMin": null,
71+
"cutoffMax": null,
72+
"nBins": 50,
73+
"groupBy": "sample_id",
74+
"yAxisType": "linear"
75+
},
76+
{
77+
"type": "histogram",
78+
"visualizationType": "histogram",
79+
"field": "AspectRatio",
80+
"label": "Aspect Ratio",
81+
"description": "Aspect ratio of the segmented cells.",
82+
"cutoffMin": null,
83+
"cutoffMax": null,
84+
"nBins": 50,
85+
"groupBy": "sample_id",
86+
"yAxisType": "linear"
87+
},
88+
{
89+
"type": "histogram",
90+
"visualizationType": "histogram",
91+
"field": "Mean.DAPI",
92+
"label": "Mean DAPI Intensity",
93+
"description": "Mean intensity of DAPI stain.",
94+
"cutoffMin": null,
95+
"cutoffMax": null,
96+
"nBins": 50,
97+
"groupBy": "sample_id",
98+
"yAxisType": "linear"
99+
},
100+
{
101+
"type": "histogram",
102+
"visualizationType": "histogram",
103+
"field": "Mean.MembraneStain",
104+
"label": "Mean Membrane Stain Intensity",
105+
"description": "Mean intensity of Membrane stain.",
106+
"cutoffMin": null,
107+
"cutoffMax": null,
108+
"nBins": 50,
109+
"groupBy": "sample_id",
110+
"yAxisType": "linear"
111+
},
112+
{
113+
"type": "histogram",
114+
"visualizationType": "histogram",
115+
"field": "Mean.PanCK",
116+
"label": "Mean PanCK Intensity",
117+
"description": "Mean intensity of PanCK stain.",
118+
"cutoffMin": null,
119+
"cutoffMax": null,
120+
"nBins": 50,
121+
"groupBy": "sample_id",
122+
"yAxisType": "linear"
123+
},
124+
{
125+
"type": "histogram",
126+
"visualizationType": "histogram",
127+
"field": "Mean.CD45",
128+
"label": "Mean CD45 Intensity",
129+
"description": "Mean intensity of CD45 stain.",
130+
"cutoffMin": null,
131+
"cutoffMax": null,
132+
"nBins": 50,
133+
"groupBy": "sample_id",
134+
"yAxisType": "linear"
135+
},
136+
{
137+
"type": "histogram",
138+
"visualizationType": "histogram",
139+
"field": "Mean.CD3",
140+
"label": "Mean CD3 Intensity",
141+
"description": "Mean intensity of CD3 stain.",
142+
"cutoffMin": null,
143+
"cutoffMax": null,
144+
"nBins": 50,
145+
"groupBy": "sample_id",
146+
"yAxisType": "linear"
147+
},
148+
{
149+
"type": "histogram",
150+
"visualizationType": "histogram",
151+
"field": "Max.DAPI",
152+
"label": "Max DAPI Intensity",
153+
"description": "Maximum intensity of DAPI stain.",
154+
"cutoffMin": null,
155+
"cutoffMax": null,
156+
"nBins": 50,
157+
"groupBy": "sample_id",
158+
"yAxisType": "linear"
159+
},
160+
{
161+
"type": "histogram",
162+
"visualizationType": "histogram",
163+
"field": "Max.MembraneStain",
164+
"label": "Max Membrane Stain Intensity",
165+
"description": "Maximum intensity of Membrane stain.",
166+
"cutoffMin": null,
167+
"cutoffMax": null,
168+
"nBins": 50,
169+
"groupBy": "sample_id",
170+
"yAxisType": "linear"
171+
},
172+
{
173+
"type": "histogram",
174+
"visualizationType": "histogram",
175+
"field": "Max.PanCK",
176+
"label": "Max PanCK Intensity",
177+
"description": "Maximum intensity of PanCK stain.",
178+
"cutoffMin": null,
179+
"cutoffMax": null,
180+
"nBins": 50,
181+
"groupBy": "sample_id",
182+
"yAxisType": "linear"
183+
},
184+
{
185+
"type": "histogram",
186+
"visualizationType": "histogram",
187+
"field": "Max.CD45",
188+
"label": "Max CD45 Intensity",
189+
"description": "Maximum intensity of CD45 stain.",
190+
"cutoffMin": null,
191+
"cutoffMax": null,
192+
"nBins": 50,
193+
"groupBy": "sample_id",
194+
"yAxisType": "linear"
195+
},
196+
{
197+
"type": "histogram",
198+
"visualizationType": "histogram",
199+
"field": "Max.CD3",
200+
"label": "Max CD3 Intensity",
201+
"description": "Maximum intensity of CD3 stain.",
202+
"cutoffMin": null,
203+
"cutoffMax": null,
204+
"nBins": 50,
205+
"groupBy": "sample_id",
206+
"yAxisType": "linear"
207+
}
208+
]
209+
}
210+
]
211+
}

0 commit comments

Comments
 (0)