Skip to content

Commit a634cd8

Browse files
authored
Export additional functionality (#85)
- export functionality for iterating over all results (both failures and results) - rename tuples and export them from top level - remove resources DrugBank, DisGeNet, PathwayCommons. These still exist, but aren't possible to get version information about anymore
1 parent 15d216a commit a634cd8

4 files changed

Lines changed: 54 additions & 38 deletions

File tree

src/bioversions/__init__.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
11
"""What's the current version for each biological database?""" # noqa:D400
22

3-
from .sources import get_rows, get_version, resolve
3+
from .sources import VersionFailure, clear_cache, get_rows, get_version, iter_versions, resolve
4+
from .utils import VersionResult
45

56
__all__ = [
7+
"VersionFailure",
8+
"VersionResult",
9+
"clear_cache",
610
"get_rows",
711
"get_version",
12+
"iter_versions",
813
"resolve",
914
]

src/bioversions/resources/update.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
write_export,
1616
write_versions,
1717
)
18-
from bioversions.sources import FailureTuple, _iter_versions
18+
from bioversions.sources import VersionFailure, iter_versions
1919
from bioversions.version import get_git_hash
2020

2121
__all__ = [
@@ -49,8 +49,8 @@ def _update(force: bool): # noqa:C901
4949

5050
changes = False
5151
failure_tuples = []
52-
for bv in _iter_versions(use_tqdm=True):
53-
if isinstance(bv, FailureTuple):
52+
for bv in iter_versions(use_tqdm=True):
53+
if isinstance(bv, VersionFailure):
5454
failure_tuples.append(bv)
5555
continue
5656

src/bioversions/sources/__init__.py

Lines changed: 42 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
from __future__ import annotations
44

55
import ftplib
6-
import logging
76
import traceback
87
from collections.abc import Iterable, Mapping
98
from functools import lru_cache
109
from typing import Literal, NamedTuple, overload
1110

1211
from tqdm import tqdm
12+
from tqdm.contrib.logging import logging_redirect_tqdm
1313

1414
from .antibodyregistry import AntibodyRegistryGetter
1515
from .bigg import BiGGGetter
@@ -71,15 +71,23 @@
7171
from .uniprot import UniProtGetter
7272
from .wikipathways import WikiPathwaysGetter
7373
from .zfin import ZfinGetter
74-
from ..utils import Bioversion, Getter, norm, refresh_daily
74+
from ..utils import Getter, VersionResult, norm, refresh_daily
7575

7676
__all__ = [
77+
"VersionFailure",
78+
"clear_cache",
7779
"get_rows",
7880
"get_version",
81+
"iter_versions",
7982
"resolve",
8083
]
8184

82-
logger = logging.getLogger(__name__)
85+
#: These are broken beyond fixing at the moment
86+
SKIPPED = [
87+
DrugBankGetter,
88+
PathwayCommonsGetter,
89+
DisGeNetGetter,
90+
]
8391

8492

8593
@lru_cache(maxsize=1)
@@ -90,7 +98,6 @@ def get_getters() -> list[type[Getter]]:
9098
BioGRIDGetter,
9199
ChEMBLGetter,
92100
ComplexPortalGetter,
93-
DrugBankGetter,
94101
DrugCentralGetter,
95102
ExPASyGetter,
96103
IntActGetter,
@@ -109,7 +116,6 @@ def get_getters() -> list[type[Getter]]:
109116
RheaGetter,
110117
StringDBGetter,
111118
HomoloGeneGetter,
112-
DisGeNetGetter,
113119
MeshGetter,
114120
DGIGetter,
115121
FlybaseGetter,
@@ -131,7 +137,6 @@ def get_getters() -> list[type[Getter]]:
131137
SwissLipidGetter,
132138
ITISGetter,
133139
DepMapGetter,
134-
PathwayCommonsGetter,
135140
UMLSGetter,
136141
HGNCGetter,
137142
RGDGetter,
@@ -167,7 +172,7 @@ def get_getter_dict() -> Mapping[str, type[Getter]]:
167172
return rv
168173

169174

170-
def resolve(name: str, use_cache: bool = True) -> Bioversion:
175+
def resolve(name: str, use_cache: bool = True) -> VersionResult:
171176
"""Resolve the database name to a :class:`Bioversion` instance."""
172177
if use_cache:
173178
return _resolve_helper_cached(name)
@@ -176,11 +181,16 @@ def resolve(name: str, use_cache: bool = True) -> Bioversion:
176181

177182

178183
@refresh_daily
179-
def _resolve_helper_cached(name: str) -> Bioversion:
184+
def _resolve_helper_cached(name: str) -> VersionResult:
180185
return _resolve_helper(name)
181186

182187

183-
def _resolve_helper(name: str) -> Bioversion:
188+
def clear_cache() -> None:
189+
"""Clear the cache."""
190+
_resolve_helper_cached.clear_cache()
191+
192+
193+
def _resolve_helper(name: str) -> VersionResult:
184194
norm_name = norm(name)
185195
getter: type[Getter] = get_getter_dict()[norm_name]
186196
return getter.resolve()
@@ -217,16 +227,16 @@ def get_version(name: str, *, strict: bool = True) -> str | None:
217227
return rv
218228

219229

220-
def get_rows(use_tqdm: bool | None = False) -> list[Bioversion]:
230+
def get_rows(use_tqdm: bool | None = False) -> list[VersionResult]:
221231
"""Get the rows, refreshing once per day."""
222232
return [
223233
bioversion
224-
for bioversion in _iter_versions(use_tqdm=use_tqdm)
225-
if isinstance(bioversion, Bioversion)
234+
for bioversion in iter_versions(use_tqdm=use_tqdm)
235+
if isinstance(bioversion, VersionResult)
226236
]
227237

228238

229-
class FailureTuple(NamedTuple):
239+
class VersionFailure(NamedTuple):
230240
"""Holds information about failures."""
231241

232242
name: str
@@ -235,22 +245,23 @@ class FailureTuple(NamedTuple):
235245
trace: str
236246

237247

238-
def _iter_versions(
248+
def iter_versions(
239249
use_tqdm: bool | None = False,
240-
) -> Iterable[Bioversion | FailureTuple]:
241-
it = tqdm(get_getters(), disable=not use_tqdm)
242-
243-
for cls in it:
244-
it.set_postfix(name=cls.name)
245-
try:
246-
yv = resolve(cls.name)
247-
except (OSError, AttributeError, ftplib.error_perm):
248-
msg = f"failed to resolve {cls.name}"
249-
tqdm.write(msg)
250-
yield FailureTuple(cls.name, cls.__name__, msg, traceback.format_exc())
251-
except (ValueError, KeyError) as e:
252-
msg = f"issue parsing {cls.name}: {e}"
253-
tqdm.write(msg)
254-
yield FailureTuple(cls.name, cls.__name__, msg, traceback.format_exc())
255-
else:
256-
yield yv
250+
) -> Iterable[VersionResult | VersionFailure]:
251+
"""Iterate over versions, without caching."""
252+
with logging_redirect_tqdm():
253+
it = tqdm(get_getters(), disable=not use_tqdm, desc="Getting versions", unit="resource")
254+
for cls in it:
255+
it.set_postfix(name=cls.name)
256+
try:
257+
yv = resolve(cls.name)
258+
except (OSError, AttributeError, ftplib.error_perm):
259+
msg = f"[{cls.bioregistry_id or cls.name}] failed to resolve"
260+
tqdm.write(msg)
261+
yield VersionFailure(cls.name, cls.__name__, msg, traceback.format_exc())
262+
except (ValueError, KeyError) as e:
263+
msg = f"[{cls.bioregistry_id or cls.name}] issue parsing: {e}"
264+
tqdm.write(msg)
265+
yield VersionFailure(cls.name, cls.__name__, msg, traceback.format_exc())
266+
else:
267+
yield yv

src/bioversions/utils.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def homepage_version_transform(version: str) -> str:
158158
return version
159159

160160

161-
class Bioversion(pydantic.BaseModel):
161+
class VersionResult(pydantic.BaseModel):
162162
"""A dataclass for information about a database and version."""
163163

164164
#: The database name
@@ -217,9 +217,9 @@ def print(cls, sep: str = "\t", file=None):
217217
print(*x, sep=sep, file=file)
218218

219219
@classmethod
220-
def resolve(cls) -> Bioversion:
220+
def resolve(cls) -> VersionResult:
221221
"""Get a Bioversion data container with the data for this database."""
222-
return Bioversion(
222+
return VersionResult(
223223
name=cls.name,
224224
version=cls.version,
225225
classname=cls.__name__,

0 commit comments

Comments
 (0)