@@ -57,8 +57,8 @@ def ingest_ko_search(db: Session) -> None:
57
57
def get_search_records_from_delimeted_file (
58
58
file ,
59
59
term_key ,
60
- text_key ,
61
60
records ,
61
+ text_key = None ,
62
62
delimeter = "\t " ,
63
63
fallback_text_key = None ,
64
64
fieldnames = None ,
@@ -75,8 +75,10 @@ def get_search_records_from_delimeted_file(
75
75
continue
76
76
if fallback_text_key :
77
77
records [row [term_key ]] = row [text_key ] or row [fallback_text_key ]
78
- else :
78
+ elif text_key :
79
79
records [row [term_key ]] = row [text_key ]
80
+ else :
81
+ records [row [term_key ]] = ""
80
82
except FileNotFoundError :
81
83
errors ["kegg_search" ].add (f"Missing { file } " )
82
84
@@ -100,7 +102,7 @@ def get_search_records_from_delimeted_file(
100
102
101
103
cog_function_headers = ["function_code" , "sequence" , "definition" ]
102
104
103
- delimeted_files : Dict [str , List [Dict [str , Union [str , List [str ]]]]] = {
105
+ delimeted_files : Dict [str , List [Dict [str , Union [None , str , List [str ]]]]] = {
104
106
PATHWAY_FILE : [
105
107
{
106
108
"term_key" : "image_id" ,
@@ -117,22 +119,24 @@ def get_search_records_from_delimeted_file(
117
119
"hierarchy" : "cog" ,
118
120
}
119
121
],
122
+ # Cog pathways and terms come out of the same file
120
123
COG_PATHWAY_DEFS : [
124
+ # Pathways
121
125
{
122
126
"fieldnames" : cog_def_headers ,
123
127
"term_key" : cog_def_headers [4 ],
124
- "text_key" : cog_def_headers [ 4 ],
128
+ "text_key" : None , # COG pathways just have a name
125
129
"hierarchy" : "cog" ,
126
- }
127
- ],
128
- COG_TERM_DEFS : [
130
+ },
131
+ # Terms
129
132
{
130
133
"fieldnames" : cog_def_headers ,
131
134
"term_key" : cog_def_headers [0 ],
132
135
"text_key" : cog_def_headers [2 ],
133
136
"hierarchy" : "cog" ,
134
- }
137
+ },
135
138
],
139
+ # PFAM terms and clans come out of the same file
136
140
PFAM_TERM_DEFS : [
137
141
{
138
142
"fieldnames" : pfam_headers ,
@@ -157,28 +161,28 @@ def get_search_records():
157
161
"cog" : {},
158
162
}
159
163
160
- def ingest_tree (node : dict ) -> None :
164
+ def ingest_tree (node : dict , hierarchy : str ) -> None :
161
165
if not node .get ("children" , False ):
162
166
term , * text = node ["name" ].split (" " , maxsplit = 1 )
163
167
if "BR:" not in term :
164
168
# Skip over BRITE term hierarchies that have no children
165
- records [term ] = text [0 ] if text else ""
169
+ records [hierarchy ][ term ] = text [0 ] if text else ""
166
170
167
171
for child in node .get ("children" , ()):
168
- ingest_tree (child )
172
+ ingest_tree (child , hierarchy )
169
173
170
174
for url in [MODULE_URL , ORTHOLOGY_URL ]:
171
175
req = requests .get (url )
172
176
req .raise_for_status ()
173
- ingest_tree (req .json ())
177
+ ingest_tree (req .json (), "ko" )
174
178
175
179
for file , keys in delimeted_files .items ():
176
180
for key_set in keys :
177
181
get_search_records_from_delimeted_file (
178
182
file ,
179
183
key_set ["term_key" ],
180
- key_set ["text_key" ],
181
184
records [str (key_set ["hierarchy" ])],
185
+ text_key = key_set ["text_key" ],
182
186
fallback_text_key = key_set .get ("fallback_text_key" , None ),
183
187
fieldnames = key_set .get ("fieldnames" , None ),
184
188
)
0 commit comments