Skip to content

Commit 4cbdefe

Browse files
committed
Added python script to transform the tsv file into csv file with corresponding table name
1 parent e2dd708 commit 4cbdefe

2 files changed

Lines changed: 45 additions & 0 deletions

File tree

src/ontology/ak-ontology.Makefile

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ exports/CL.tsv: imports/CL_import.owl | exports/
2020
--header 'ID|Label|SubClassOf [ID]' \
2121
--sort ID \
2222
--export $@
23+
python3 ../scripts/ontology_table_transform.py exports/CL.tsv CellOntology
2324

2425
exports/DOID.tsv: imports/DOID_import.owl | exports/
2526
$(ROBOT) extract \
@@ -30,6 +31,7 @@ exports/DOID.tsv: imports/DOID_import.owl | exports/
3031
--header 'ID|Label|SubClassOf [ID]' \
3132
--sort ID \
3233
--export $@
34+
python3 ../scripts/ontology_table_transform.py exports/DOID.tsv TODO
3335

3436
exports/PATO.tsv: imports/PATO_import.owl | exports/
3537
$(ROBOT) extract \
@@ -40,6 +42,7 @@ exports/PATO.tsv: imports/PATO_import.owl | exports/
4042
--header 'ID|Label|SubClassOf [ID]' \
4143
--sort ID \
4244
--export $@
45+
python3 ../scripts/ontology_table_transform.py exports/PATO.tsv TODO
4346

4447
exports/UO.tsv: imports/UO_import.owl | exports/
4548
$(ROBOT) extract \
@@ -50,6 +53,7 @@ exports/UO.tsv: imports/UO_import.owl | exports/
5053
--header 'ID|Label|SubClassOf [ID]' \
5154
--sort ID \
5255
--export $@
56+
python3 ../scripts/ontology_table_transform.py exports/UO.tsv TODO
5357

5458
exports/UBERON.tsv: imports/UBERON_import.owl | exports/
5559
$(ROBOT) extract \
@@ -60,6 +64,7 @@ exports/UBERON.tsv: imports/UBERON_import.owl | exports/
6064
--header 'ID|Label|SubClassOf [ID]' \
6165
--sort ID \
6266
--export $@
67+
python3 ../scripts/ontology_table_transform.py exports/UBERON.tsv TODO
6368

6469
exports/OBI.tsv: imports/OBI_import.owl | exports/
6570
$(ROBOT) extract \
@@ -70,6 +75,7 @@ exports/OBI.tsv: imports/OBI_import.owl | exports/
7075
--header 'ID|Label|SubClassOf [ID]' \
7176
--sort ID \
7277
--export $@
78+
python3 ../scripts/ontology_table_transform.py exports/OBI.tsv BiomedicalInvestigations
7379

7480
exports/MRO.tsv: imports/MRO_import.owl | exports/
7581
$(ROBOT) extract \
@@ -80,6 +86,7 @@ exports/MRO.tsv: imports/MRO_import.owl | exports/
8086
--header 'ID|Label|SubClassOf [ID]' \
8187
--sort ID \
8288
--export $@
89+
python3 ../scripts/ontology_table_transform.py exports/MRO.tsv TODO
8390

8491
exports/NCBITaxon.tsv: imports/NCBITaxon_import.owl | exports/
8592
$(ROBOT) extract \
@@ -90,3 +97,4 @@ exports/NCBITaxon.tsv: imports/NCBITaxon_import.owl | exports/
9097
--header 'ID|Label|SubClassOf [ID]' \
9198
--sort ID \
9299
--export $@
100+
python3 ../scripts/ontology_table_transform.py exports/NCBITaxon.tsv TODO
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import pandas as pd
2+
import os
3+
import sys
4+
5+
file_dir = 'ak-ontology/src/ontology/exports'
6+
save_file_dir = "/ak_data/ak-data-load/ontology"
7+
8+
# filename to be that needs to be converted from tsv to csv
9+
filename = f'{sys.argv[1]}'
10+
table_name = sys.argv[2]
11+
# filename for the csv file Which should be the Table name in the database
12+
ontology_term_filename = f'{table_name}.csv'
13+
term_parents_filename = f'{table_name}_parent.csv'
14+
print("====================================================================================")
15+
print(f'Now processing: {filename} for {table_name} and {table_name}_parent table.')
16+
tsv_path = os.path.join(file_dir, filename)
17+
df = pd.read_csv(tsv_path, sep = '\t', dtype=str)
18+
#set column names based on the keys of the table
19+
df.columns = ["term_id", "term_label", "parent_term_id"]
20+
df["parent_term_id"] = df["parent_term_id"].fillna(df["term_id"])
21+
# get only ontology terms and their label
22+
ontology_term = df[["term_id", "term_label"]]
23+
# put multiple parents into its own row
24+
df["parent_term_id"] = df["parent_term_id"].str.split("|")
25+
term_parents = df.explode("parent_term_id").reset_index(drop=True)
26+
# get only ontology terms and their parent
27+
term_parents = term_parents[["term_id", "parent_term_id"]]
28+
term_parents.rename(columns={"term_id": f"{table_name}_term_id"}, inplace=True)
29+
print(term_parents.head())
30+
#save these csv file in ak directory
31+
ontology_term.to_csv(f'{save_file_dir}/{ontology_term_filename}',index = False)
32+
term_parents.to_csv(f'{save_file_dir}/{term_parents_filename}',index = False)
33+
34+
print(f"Done creating {ontology_term_filename} and {term_parents_filename} file.")
35+
print("====================================================================================")
36+
37+

0 commit comments

Comments
 (0)