Skip to content

Commit 6b08941

Browse files
authored
Updates (#122)
- Fix PathwayCommons getter - Re-enable PathwayCommons and DisGeNet getters - Refactor OLS getter constructor to use high-level `bioregistry.Resource` interface - Skip constructing OLS getters for non-canonical resources (e.g., `orphanet.ordo`) - Catch `KeyError` in `bioversions.get_version` for when a non-existent key is given
1 parent 0a8da4b commit 6b08941

7 files changed

Lines changed: 64 additions & 43 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ dependencies = [
6060
"more_click",
6161
"pyyaml",
6262
"tqdm",
63-
"bioregistry[align]>=0.13.0",
63+
"bioregistry[align]>=0.13.47",
6464
"lxml",
6565
"pydantic>=2.0",
6666
"psycopg2-binary",

src/bioversions/sources/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,8 +155,6 @@
155155
#: These are broken beyond fixing at the moment
156156
SKIPPED = {
157157
DrugBankGetter,
158-
PathwayCommonsGetter,
159-
DisGeNetGetter,
160158
# Upper-level classes
161159
OBOFoundryGetter,
162160
UnversionedGetter,
@@ -185,8 +183,10 @@ def resolve(name: str | type[Getter], strict: Literal[True] = ...) -> VersionRes
185183
@lru_cache(None)
186184
def resolve(name: str | type[Getter], strict: bool = True) -> VersionResult | None:
187185
"""Resolve the database name to a :class:`Bioversion` instance."""
188-
getter = getter_resolver.lookup(name)
189186
try:
187+
# this can throw a key error if it can't be looked up
188+
getter = getter_resolver.lookup(name)
189+
# this can throw all sorts of errors during resolution
190190
rv = getter.resolve()
191191
except Exception:
192192
if strict:

src/bioversions/sources/flybase.py

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,14 @@
22

33
import re
44

5-
from bioversions.utils import Getter, VersionType, get_soup
5+
from bioversions.utils import HUMAN_BROWSER_AGENT, Getter, VersionType, get_soup
66

77
__all__ = [
88
"FlybaseGetter",
99
]
1010

1111
URL = "https://s3ftp.flybase.org/releases/"
1212
PATTERN = re.compile(r"FB\d{4}_\d{2}")
13-
AGENT = (
14-
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
15-
"(KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
16-
)
1713

1814

1915
class FlybaseGetter(Getter):
@@ -26,7 +22,7 @@ class FlybaseGetter(Getter):
2622

2723
def get(self) -> str:
2824
"""Get the latest FlyBase version."""
29-
soup = get_soup(URL, user_agent=AGENT)
25+
soup = get_soup(URL, user_agent=HUMAN_BROWSER_AGENT)
3026
releases = [
3127
match.group().removeprefix("FB")
3228
for anchor_tag in soup.find_all("a", href=True)

src/bioversions/sources/ols.py

Lines changed: 20 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
import bioregistry
77
from bioregistry.external.ols import get_ols_processing
8-
from bioregistry.resolve import get_name
98
from class_resolver import ClassResolver
109

1110
from bioversions.utils import Getter, VersionType
@@ -15,14 +14,11 @@
1514
ols_processing = get_ols_processing()
1615

1716

18-
def _get_version_type(bioregistry_id: str) -> VersionType | None:
19-
ols_id = bioregistry.get_ols_prefix(bioregistry_id)
20-
if ols_id is None:
21-
raise ValueError(f"Missing OLS prefix for bioregistry:{bioregistry_id}")
17+
def _get_version_type(resource: bioregistry.Resource, ols_id: str) -> VersionType | None:
2218
ols_config = ols_processing.get(ols_id)
2319
if ols_config is None:
2420
raise ValueError(
25-
f"Missing OLS configuration for bioregistry:{bioregistry_id} / ols:{ols_id}"
21+
f"Missing OLS configuration for bioregistry:{resource.prefix} / ols:{ols_id}"
2622
)
2723

2824
ols_version_type = ols_config.version_type
@@ -32,60 +28,57 @@ def _get_version_type(bioregistry_id: str) -> VersionType | None:
3228
elif ols_version_type:
3329
return VersionType[ols_version_type.name]
3430
else:
35-
logger.warning("[%s] missing version type", bioregistry_id)
31+
logger.warning("[%s] missing version type", resource.prefix)
3632
return None
3733

3834

39-
def make_ols_getter(bioregistry_id: str) -> type[Getter] | None:
35+
def make_ols_getter(resource: bioregistry.Resource) -> type[Getter] | None:
4036
"""Make a getter from OLS."""
41-
ols_id = bioregistry.get_ols_prefix(bioregistry_id)
37+
ols_id = resource.get_ols_prefix()
4238
if ols_id is None:
4339
return None
4440

45-
resource = bioregistry.get_resource(bioregistry_id)
46-
if resource is None:
47-
logger.warning(f"Invalid bioregistry prefix: {bioregistry_id}")
48-
return None
4941
if resource.ols is None:
50-
logger.warning("[%s] Missing information in OLS", bioregistry_id)
51-
return None
42+
raise RuntimeError(f"{resource.prefix} is mapped to OLS {ols_id} but is missing OLS data")
43+
5244
version = resource.ols.get("version")
5345
if version is None:
54-
logger.debug("[%s] no OLS version", bioregistry_id)
46+
logger.debug("[%s] no OLS version", resource)
5547
return None
56-
57-
_brid = bioregistry_id
58-
_name = get_name(_brid)
48+
_name = resource.get_name()
5949
if _name is None:
6050
return None
61-
_version_type = _get_version_type(bioregistry_id)
51+
_version_type = _get_version_type(resource, ols_id=ols_id)
6252
if _version_type is None:
6353
return None
6454

6555
class OlsGetter(Getter):
6656
"""A getter for OLS data from the Bioregistry."""
6757

68-
bioregistry_id: ClassVar[str] = _brid
69-
name: ClassVar[str] = _name
70-
version_type: ClassVar[str] = _version_type # type:ignore
58+
bioregistry_id: ClassVar[str] = resource.prefix
59+
name: ClassVar[str] = cast(str, _name)
60+
version_type: ClassVar[VersionType] = cast(VersionType, _version_type)
7161

7262
def get(self) -> str:
7363
"""Get the version from the Bioregistry."""
7464
return cast(str, version)
7565

76-
return type(f"{_brid.title()}Getter", (OlsGetter,), locals())
66+
class_name = f"{resource.prefix.title()}Getter"
67+
return type(class_name, (OlsGetter,), locals())
7768

7869

7970
def extend_ols(version_getter_resolver: ClassResolver[Getter]) -> None:
8071
"""Add OLS lookup."""
81-
for bioregistry_id in bioregistry.read_registry():
72+
for resource in bioregistry.resources():
73+
if resource.provides or resource.has_canonical or resource.part_of:
74+
continue
8275
try:
83-
version_getter_resolver.lookup(bioregistry_id)
76+
version_getter_resolver.lookup(resource.prefix)
8477
except KeyError:
8578
pass
8679
else:
8780
continue
88-
getter = make_ols_getter(bioregistry_id)
81+
getter = make_ols_getter(resource)
8982
if getter is None:
9083
continue
9184
version_getter_resolver.register(getter)

src/bioversions/sources/pathwaycommons.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
"""A getter for Pathway Commons."""
22

3-
from bioversions.utils import Getter, VersionType, find_soup_text, get_soup
3+
from bioversions.utils import Getter, VersionType, get_soup
44

55
__all__ = [
66
"PathwayCommonsGetter",
77
]
88

9-
URL = "https://www.pathwaycommons.org/"
9+
URL = "https://download.baderlab.org/PathwayCommons/PC2/"
1010

1111

1212
class PathwayCommonsGetter(Getter):
@@ -18,10 +18,15 @@ class PathwayCommonsGetter(Getter):
1818
def get(self) -> str:
1919
"""Get the latest Pathway Commons version number."""
2020
soup = get_soup(URL)
21-
boost_text = find_soup_text(soup, {"class": "boost"})
22-
boost_text = boost_text[len("Version ") :]
23-
boost_text = boost_text.split(":")[0]
24-
return boost_text
21+
hrefs = {
22+
int(anchor.attrs["href"].lstrip("v").rstrip("/"))
23+
for anchor in soup.find_all("a")
24+
if anchor.attrs is not None
25+
and "href" in anchor.attrs
26+
and isinstance(anchor.attrs["href"], str)
27+
and anchor.attrs["href"].startswith("v")
28+
}
29+
return str(max(hrefs))
2530

2631

2732
if __name__ == "__main__":

src/bioversions/utils.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848

4949
BIOVERSIONS_USER_AGENT = f"bioversions v{VERSION}"
5050

51+
HUMAN_BROWSER_AGENT = (
52+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
53+
"(KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36"
54+
)
55+
5156

5257
def get_soup(
5358
url: str,

tests/test_bioversions.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import bioversions
1010
from bioversions.sources import BioGRIDGetter, WikiPathwaysGetter, getter_resolver
11+
from bioversions.sources.ols import make_ols_getter
1112
from bioversions.utils import get_obo_version, get_obograph_json_version, get_owl_xml_version
1213

1314
YYYYMMDD = re.compile("\\d{4}-\\d{2}-\\d{2}")
@@ -25,11 +26,32 @@ def test_bioregistry_ids(self) -> None:
2526
with self.subTest(name=getter.name):
2627
self.assertIn(getter.bioregistry_id, prefixes)
2728

29+
def test_ols(self) -> None:
30+
"""Test getting getters."""
31+
non_canonical_resource = bioregistry.get_resource("orphanet.ordo", strict=True)
32+
g1 = make_ols_getter(non_canonical_resource)
33+
self.assertIsNone(
34+
g1,
35+
msg="As of https://github.com/biopragmatics/bioregistry/pull/1935, `orpha` is "
36+
"the canonical prefix with the OLS link",
37+
)
38+
39+
canonical_resource = bioregistry.get_resource("orpha", strict=True)
40+
g = make_ols_getter(canonical_resource)
41+
self.assertIsNotNone(g)
42+
43+
def test_bad_lookup(self) -> None:
44+
"""Test looking up a missing key."""
45+
self.assertIsNone(bioversions.get_version("asasgaasgagasg", strict=False))
46+
with self.assertRaises(KeyError):
47+
bioversions.get_version("asasgaasgagasg", strict=True)
48+
2849
def test_get(self) -> None:
2950
"""Test getters."""
3051
prefixes = [
3152
"reactome",
3253
"kegg",
54+
"orpha",
3355
]
3456
for prefix in prefixes:
3557
with self.subTest(prefix=prefix):

0 commit comments

Comments
 (0)