1+ import argparse
2+ from pathlib import Path
3+
4+ from abc_atlas_access .abc_atlas_cache .abc_project_cache import AbcProjectCache
5+ from abc_atlas_access .abc_atlas_cache .anndata_utils import get_gene_data
6+
7+
8+ if __name__ == "__main__" :
9+ parser = argparse .ArgumentParser (
10+ description = "Load expression matrix data from the ABC Atlas and "
11+ "extract specific genes across multiple files and all "
12+ "cells."
13+ )
14+ parser .add_argument (
15+ "--abc_atlas_cache_path" ,
16+ type = str ,
17+ default = "/root/capsule/data/abc_atlas" ,
18+ help = "Path to the ABC Atlas cache directory."
19+ )
20+ parser .add_argument (
21+ "--manifest_version" ,
22+ type = str ,
23+ default = "releases/20240330/manifest.json" ,
24+ help = "The version of the ABC Atlas manifest to use."
25+ )
26+ parser .add_argument (
27+ "--use_s3_cache" ,
28+ action = "store_true" ,
29+ help = "Use an S3 cache where the data is downloaded to disk instead of "
30+ "a local cache already stored on disk."
31+ )
32+ parser .add_argument (
33+ "--species" ,
34+ help = "Which data to load? `mouse` or `human`?" ,
35+ choices = ["human" , "mouse" ]
36+ )
37+ parser .add_argument (
38+ "--use_raw" ,
39+ action = "store_true" ,
40+ help = "Use raw gene expression values instead of log2 values."
41+ )
42+ parser .add_argument (
43+ "--output_file_path" ,
44+ type = str ,
45+ help = "Path to file to write to." ,
46+ default = "~/capsule/results/gene_data.csv"
47+ )
48+ parser .add_argument (
49+ '--genes' ,
50+ type = str ,
51+ default = "" ,
52+ help = "A comma-separated list of gene symbols to extract from the "
53+ "expression matrix."
54+ )
55+ args = parser .parse_args ()
56+
57+ genes = args .genes .split ("," )
58+ for idx , gene in enumerate (genes ):
59+ genes [idx ] = gene .replace (" " , "" )
60+
61+ print ("Loading ABC Atlas cache from:" , args .abc_atlas_cache_path )
62+ cache_path = Path (args .abc_atlas_cache_path )
63+ if args .use_s3_cache :
64+ abc_cache = AbcProjectCache .from_s3_cache (cache_path )
65+ else :
66+ abc_cache = AbcProjectCache .from_local_cache (cache_path )
67+ abc_cache .load_manifest (args .manifest_version )
68+
69+ if args .species == "human" :
70+ directory_name = "WHB-10Xv3"
71+ elif args .species == "mouse" :
72+ directory_name = "WMB-10X"
73+ else :
74+ raise ValueError (f"Unknown species requested: { args .species } " )
75+
76+ cell = abc_cache .get_metadata_dataframe (
77+ directory = directory_name ,
78+ file_name = 'cell_metadata'
79+ ).set_index ('cell_label' )
80+ gene = abc_cache .get_metadata_dataframe (
81+ directory = directory_name ,
82+ file_name = 'gene'
83+ ).set_index ('gene_identifier' )
84+
85+ print ("Processing genes:" , genes )
86+ gene_data = get_gene_data (
87+ abc_atlas_cache = abc_cache ,
88+ all_cells = cell ,
89+ all_genes = gene ,
90+ selected_genes = genes ,
91+ data_type = "raw" if args .use_raw else "log2"
92+ )
93+
94+ print ("Writing gene data to:" , args .output_file_path )
95+ gene_data .to_csv (args .output_file_path )
0 commit comments