Skip to content

Commit d4e4ab5

Browse files
authored
Merge pull request #1353 from microbiomedata/1342-additional-pathway-prefixes
Support pathway search with multiple prefixes
2 parents feac95b + b38715d commit d4e4ab5

File tree

5 files changed

+76
-20
lines changed

5 files changed

+76
-20
lines changed

nmdc_server/crud.py

+22-2
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import re
12
from datetime import datetime
23
from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, cast
34
from uuid import UUID
@@ -224,6 +225,14 @@ def list_omics_processing_data_objects(db: Session, id: str) -> Query:
224225
)
225226

226227

228+
# KEGG
229+
def get_pathway_prefix(term) -> Optional[str]:
230+
pathway_prefixes = set(["map", "ko", "ec", "rn", "org"])
231+
pathway_re = f"^({'|'.join(re.escape(p) for p in pathway_prefixes)})"
232+
match = re.match(pathway_re, term)
233+
return match.group(0) if match else None
234+
235+
227236
def list_ko_terms_for_module(db: Session, module: str) -> List[str]:
228237
q = db.query(models.KoTermToModule.term).filter(models.KoTermToModule.module.ilike(module))
229238
return [row[0] for row in q]
@@ -235,13 +244,24 @@ def list_ko_terms_for_pathway(db: Session, pathway: str) -> List[str]:
235244

236245

237246
def kegg_text_search(db: Session, query: str, limit: int) -> List[models.KoTermText]:
247+
pathway_prefix = get_pathway_prefix(query)
248+
term = query.replace(pathway_prefix, "map") if pathway_prefix else query
238249
q = (
239250
db.query(models.KoTermText)
240-
.filter(models.KoTermText.text.ilike(f"%{query}%") | models.KoTermText.term.ilike(query))
251+
.filter(models.KoTermText.text.ilike(f"%{term}%") | models.KoTermText.term.ilike(term))
241252
.order_by(models.KoTermText.term)
242253
.limit(limit)
243254
)
244-
return list(q)
255+
results = list(q)
256+
if pathway_prefix:
257+
default_pathway_prefix = "map"
258+
# Transform pathway results to match given prefix. They are ingested with the
259+
# 'map' prefix, but can searched for with various other prefixes.
260+
for term_text in results:
261+
if term_text.term.startswith(default_pathway_prefix):
262+
term_text.term = term_text.term.replace(default_pathway_prefix, pathway_prefix)
263+
term_text.text = term_text.text.replace(default_pathway_prefix, pathway_prefix)
264+
return results
245265

246266

247267
# biosample

nmdc_server/query.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -292,10 +292,9 @@ def groups(self) -> Iterator[Tuple[str, Iterator[BaseConditionSchema]]]:
292292
def transform_condition(self, db, condition: BaseConditionSchema) -> List[BaseConditionSchema]:
293293
# Transform KEGG.(PATH|MODULE) queries into their respective ORTHOLOGY terms
294294
if condition.key == "Table.gene_function:id" and type(condition.value) is str:
295-
if condition.value.startswith(KeggTerms.PATHWAY[0]):
296-
searchable_name = condition.value.replace(
297-
KeggTerms.PATHWAY[0], KeggTerms.PATHWAY[1]
298-
)
295+
if any([condition.value.startswith(val) for val in KeggTerms.PATHWAY[0]]):
296+
prefix = [val for val in KeggTerms.PATHWAY[0] if condition.value.startswith(val)][0]
297+
searchable_name = condition.value.replace(prefix, KeggTerms.PATHWAY[1])
299298
ko_terms = db.query(models.KoTermToPathway.term).filter(
300299
models.KoTermToPathway.pathway.ilike(searchable_name)
301300
)

nmdc_server/table.py

+9-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,15 @@
2323

2424
class KeggTerms:
2525
ORTHOLOGY = ("KEGG.ORTHOLOGY:K", "K")
26-
PATHWAY = ("KEGG.PATHWAY:MAP", "MAP")
26+
PATHWAY = (
27+
[
28+
"KEGG.PATHWAY:MAP",
29+
"KEGG.PATHWAY:EC",
30+
"KEGG.PATHWAY:RN",
31+
"KEGG.PATHWAY:KO",
32+
],
33+
"MAP",
34+
)
2735
MODULE = ("KEGG.MODULE:M", "M")
2836

2937

web/src/components/FilterKegg.vue

+2-2
Original file line numberDiff line numberDiff line change
@@ -101,8 +101,8 @@ export default defineComponent({
101101
<p>
102102
KEGG Gene Function search filters results to
103103
samples that have at least one of the chosen KEGG terms.
104-
Orthology, Pathway, and Module are supported.
105-
Expected format: <code>K00000, M00000 or MAP00000</code>
104+
Orthology, Module, and Pathway are supported.
105+
Expected formats: <code>K00000, M00000, map00000, ko00000, rn00000, and ec00000</code>
106106
</p>
107107
<p class="text-subtitle-2">
108108
More information at <a href="https://www.genome.jp/kegg/">genome.jp/kegg</a>

web/src/encoding.ts

+40-11
Original file line numberDiff line numberDiff line change
@@ -21,26 +21,54 @@ export interface FieldsData {
2121
encode?: (input: string) => string,
2222
}
2323

24-
const KeggPrefix = {
24+
interface PrefixInfo {
25+
pattern: RegExp;
26+
short: Function;
27+
long: Function;
28+
urlBase: string;
29+
}
30+
31+
const pathwayRegex = /^((map:?)|(path:?)|(ko:?)|(ec:?)|(rn:?)|(kegg.pathway:(map|path|ec|ko|rn)))(?=\d{5})/i;
32+
33+
function pathwayPrefixShort(v: string) {
34+
const match = v.match(pathwayRegex);
35+
if (match) {
36+
const prefix = match[8] ? match[8] : match[1].replace(/:$/, '');
37+
return prefix.toLowerCase();
38+
}
39+
return 'map';
40+
}
41+
42+
function pathwayPrefixLong(v: string) {
43+
const match = v.match(pathwayRegex);
44+
if (match) {
45+
const prefix = match[7] ? match[7] : match[1].replace(match[1], `kegg.pathway:${match[1]}`);
46+
return prefix.toUpperCase();
47+
}
48+
return 'KEGG.PATHWAY.MAP';
49+
}
50+
51+
const KeggPrefix: Record<string, PrefixInfo> = {
2552
ORTHOLOGY: {
26-
pattern: /^((ko?:?)|(kegg\.orthology:k))(?=\d{5})/i,
27-
short: 'k',
28-
long: 'KEGG.ORTHOLOGY:K',
53+
pattern: /^((k:?)|(kegg\.orthology:k))(?=\d{5})/i,
54+
short: () => 'k',
55+
long: () => 'KEGG.ORTHOLOGY:K',
2956
urlBase: 'https://www.genome.jp/entry/',
3057
},
3158
PATHWAY: {
32-
pattern: /^((map:?)|(path:?)|(kegg.pathway:map))(?=\d{5})/i,
33-
short: 'map',
34-
long: 'KEGG.PATHWAY:MAP',
59+
pattern: pathwayRegex,
60+
short: pathwayPrefixShort,
61+
long: pathwayPrefixLong,
3562
urlBase: 'https://www.genome.jp/kegg-bin/show_pathway?',
3663
},
3764
MODULE: {
3865
pattern: /^((m:?)|(kegg.module:m))(?=\d{5})/i,
39-
short: 'M',
40-
long: 'KEGG.MODULE:M',
41-
urlBase: 'https://www.genome.jp/brite/',
66+
short: () => 'M',
67+
long: () => 'KEGG.MODULE:M',
68+
urlBase: 'https://www.kegg.jp/entry/',
4269
},
4370
};
71+
4472
/**
4573
* Encode a string as either the long or short variant of
4674
* a KEGG identifier term.
@@ -51,7 +79,8 @@ function keggEncode(v: string, url = false) {
5179
const {
5280
pattern, short, long, urlBase,
5381
} = prefixes[i];
54-
const transformed = v.replace(pattern, url ? short : long);
82+
const replacement = url ? short(v) : long(v);
83+
const transformed = v.replace(pattern, replacement);
5584
if (transformed !== v) {
5685
if (url) {
5786
return urlBase + transformed;

0 commit comments

Comments
 (0)