Skip to content

Commit b067b89

Browse files
committed
Merge branch 'u/morriscb/codeOceanExecuteScript'
2 parents 2bbe391 + b0400e8 commit b067b89

File tree

3 files changed

+99
-1
lines changed

3 files changed

+99
-1
lines changed

intro.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,9 @@ Available notebooks:
5858
(Kimberly Siletti)**
5959

6060
## Release Notes
61+
* **[abc_atlas_access (v0.1.2)]**
62+
* Added script for selecting genes from expression matrices for use on
63+
CodeOcean.
6164
* **[abc_atlas_access (v0.1.1)]**
6265
* Fixed compatibility issue with read only local caches specifically for
6366
fuse style file mounts such as those used on CodeOcean.

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
44

55
[project]
66
name = "abc_atlas_access"
7-
version = "0.1.1"
7+
version = "0.1.2"
88
description = "A package for accessing/processing data from the ABC Atlas"
99
dependencies = [
1010
"anndata",

scripts/create_gene_expression.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
import argparse
2+
from pathlib import Path
3+
4+
from abc_atlas_access.abc_atlas_cache.abc_project_cache import AbcProjectCache
5+
from abc_atlas_access.abc_atlas_cache.anndata_utils import get_gene_data
6+
7+
8+
if __name__ == "__main__":
9+
parser = argparse.ArgumentParser(
10+
description="Load expression matrix data from the ABC Atlas and "
11+
"extract specific genes across multiple files and all "
12+
"cells."
13+
)
14+
parser.add_argument(
15+
"--abc_atlas_cache_path",
16+
type=str,
17+
default="/root/capsule/data/abc_atlas",
18+
help="Path to the ABC Atlas cache directory."
19+
)
20+
parser.add_argument(
21+
"--manifest_version",
22+
type=str,
23+
default="releases/20240330/manifest.json",
24+
help="The version of the ABC Atlas manifest to use."
25+
)
26+
parser.add_argument(
27+
"--use_s3_cache",
28+
action="store_true",
29+
help="Use an S3 cache where the data is downloaded to disk instead of "
30+
"a local cache already stored on disk."
31+
)
32+
parser.add_argument(
33+
"--species",
34+
help="Which data to load? `mouse` or `human`?",
35+
choices=["human", "mouse"]
36+
)
37+
parser.add_argument(
38+
"--use_raw",
39+
action="store_true",
40+
help="Use raw gene expression values instead of log2 values."
41+
)
42+
parser.add_argument(
43+
"--output_file_path",
44+
type=str,
45+
help="Path to file to write to.",
46+
default="~/capsule/results/gene_data.csv"
47+
)
48+
parser.add_argument(
49+
'--genes',
50+
type=str,
51+
default="",
52+
help="A comma-separated list of gene symbols to extract from the "
53+
"expression matrix."
54+
)
55+
args = parser.parse_args()
56+
57+
genes = args.genes.split(",")
58+
for idx, gene in enumerate(genes):
59+
genes[idx] = gene.replace(" ", "")
60+
61+
print("Loading ABC Atlas cache from:", args.abc_atlas_cache_path)
62+
cache_path = Path(args.abc_atlas_cache_path)
63+
if args.use_s3_cache:
64+
abc_cache = AbcProjectCache.from_s3_cache(cache_path)
65+
else:
66+
abc_cache = AbcProjectCache.from_local_cache(cache_path)
67+
abc_cache.load_manifest(args.manifest_version)
68+
69+
if args.species == "human":
70+
directory_name = "WHB-10Xv3"
71+
elif args.species == "mouse":
72+
directory_name = "WMB-10X"
73+
else:
74+
raise ValueError(f"Unknown species requested: {args.species}")
75+
76+
cell = abc_cache.get_metadata_dataframe(
77+
directory=directory_name,
78+
file_name='cell_metadata'
79+
).set_index('cell_label')
80+
gene = abc_cache.get_metadata_dataframe(
81+
directory=directory_name,
82+
file_name='gene'
83+
).set_index('gene_identifier')
84+
85+
print("Processing genes:", genes)
86+
gene_data = get_gene_data(
87+
abc_atlas_cache=abc_cache,
88+
all_cells=cell,
89+
all_genes=gene,
90+
selected_genes=genes,
91+
data_type="raw" if args.use_raw else "log2"
92+
)
93+
94+
print("Writing gene data to:", args.output_file_path)
95+
gene_data.to_csv(args.output_file_path)

0 commit comments

Comments
 (0)