Skip to content

Commit 54a24ea

Browse files
authored
Merge pull request #1459 from microbiomedata/1451-fix-cog-pathway-ingest
Properly ingest ko terms and COG pathways
2 parents 8b53504 + 1219e8d commit 54a24ea

File tree

2 files changed

+25
-14
lines changed

2 files changed

+25
-14
lines changed

nmdc_server/ingest/kegg.py

+17-13
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,8 @@ def ingest_ko_search(db: Session) -> None:
5757
def get_search_records_from_delimeted_file(
5858
file,
5959
term_key,
60-
text_key,
6160
records,
61+
text_key=None,
6262
delimeter="\t",
6363
fallback_text_key=None,
6464
fieldnames=None,
@@ -75,8 +75,10 @@ def get_search_records_from_delimeted_file(
7575
continue
7676
if fallback_text_key:
7777
records[row[term_key]] = row[text_key] or row[fallback_text_key]
78-
else:
78+
elif text_key:
7979
records[row[term_key]] = row[text_key]
80+
else:
81+
records[row[term_key]] = ""
8082
except FileNotFoundError:
8183
errors["kegg_search"].add(f"Missing {file}")
8284

@@ -100,7 +102,7 @@ def get_search_records_from_delimeted_file(
100102

101103
cog_function_headers = ["function_code", "sequence", "definition"]
102104

103-
delimeted_files: Dict[str, List[Dict[str, Union[str, List[str]]]]] = {
105+
delimeted_files: Dict[str, List[Dict[str, Union[None, str, List[str]]]]] = {
104106
PATHWAY_FILE: [
105107
{
106108
"term_key": "image_id",
@@ -117,22 +119,24 @@ def get_search_records_from_delimeted_file(
117119
"hierarchy": "cog",
118120
}
119121
],
122+
# Cog pathways and terms come out of the same file
120123
COG_PATHWAY_DEFS: [
124+
# Pathways
121125
{
122126
"fieldnames": cog_def_headers,
123127
"term_key": cog_def_headers[4],
124-
"text_key": cog_def_headers[4],
128+
"text_key": None, # COG pathways just have a name
125129
"hierarchy": "cog",
126-
}
127-
],
128-
COG_TERM_DEFS: [
130+
},
131+
# Terms
129132
{
130133
"fieldnames": cog_def_headers,
131134
"term_key": cog_def_headers[0],
132135
"text_key": cog_def_headers[2],
133136
"hierarchy": "cog",
134-
}
137+
},
135138
],
139+
# PFAM terms and clans come out of the same file
136140
PFAM_TERM_DEFS: [
137141
{
138142
"fieldnames": pfam_headers,
@@ -157,28 +161,28 @@ def get_search_records():
157161
"cog": {},
158162
}
159163

160-
def ingest_tree(node: dict) -> None:
164+
def ingest_tree(node: dict, hierarchy: str) -> None:
161165
if not node.get("children", False):
162166
term, *text = node["name"].split(" ", maxsplit=1)
163167
if "BR:" not in term:
164168
# Skip over BRITE term hierarchies that have no children
165-
records[term] = text[0] if text else ""
169+
records[hierarchy][term] = text[0] if text else ""
166170

167171
for child in node.get("children", ()):
168-
ingest_tree(child)
172+
ingest_tree(child, hierarchy)
169173

170174
for url in [MODULE_URL, ORTHOLOGY_URL]:
171175
req = requests.get(url)
172176
req.raise_for_status()
173-
ingest_tree(req.json())
177+
ingest_tree(req.json(), "ko")
174178

175179
for file, keys in delimeted_files.items():
176180
for key_set in keys:
177181
get_search_records_from_delimeted_file(
178182
file,
179183
key_set["term_key"],
180-
key_set["text_key"],
181184
records[str(key_set["hierarchy"])],
185+
text_key=key_set["text_key"],
182186
fallback_text_key=key_set.get("fallback_text_key", None),
183187
fieldnames=key_set.get("fieldnames", None),
184188
)

web/src/components/FilterGene.vue

+8-1
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,16 @@ export default defineComponent({
5858
return request(() => props.geneTypeParams.searchFunction(search.value || ''));
5959
}
6060
61+
function getTermDisplayText(term: string, text: string) {
62+
if (text) {
63+
return `${term}: ${text}`;
64+
}
65+
return term;
66+
}
67+
6168
watch(search, async () => {
6269
const resp = (await geneSearch())
63-
.map((v: KeggTermSearchResponse) => ({ text: `${v.term}: ${v.text}`, value: v.term }));
70+
.map((v: KeggTermSearchResponse) => ({ text: getTermDisplayText(v.term, v.text), value: v.term }));
6471
if (resp.length === 0 && search.value && props.geneTypeParams.searchWithInputText(search.value)) {
6572
resp.push({ value: search.value, text: search.value });
6673
}

0 commit comments

Comments
 (0)