Skip to content

Commit 7d4229e

Browse files
deeenesclaude
andcommitted
Add utils subpackage and data loader from pypath_common
Migrate general-purpose utility functions from pypath_common._misc, _constants, and _process into pkg_infra.utils. This allows packages (cachedir, dlmachine, omnipath-utils) to depend on pkg-infra instead of the soon-to-be-retired pypath_common. - utils/constants.py: SIMPLE_TYPES, LIST_LIKE, BOOLEAN_*, etc. - utils/_process.py: swap_dict - utils/_misc.py: ~80 functions (to_set, first, md5, ext, etc.) - data/__init__.py: simplified package data loader (load, path, builtins) - numpy, psutil, tabulate made optional imports - Bump version to 0.1.0 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 3ddea0f commit 7d4229e

10 files changed

Lines changed: 3176 additions & 34 deletions

File tree

.bumpversion.cfg

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
[bumpversion]
2-
current_version = 0.0.1
2+
current_version = 0.1.0
33
commit = True
44
tag = True
5-
files = pyproject.toml pkg_infra/_metadata.py
5+
files = pyproject.toml src/pkg_infra/_metadata.py
66
parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)
77
serialize = {major}.{minor}.{patch}

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ name = "pkg-infra"
4848
# ]
4949
readme = "README.md"
5050
requires-python = ">=3.10"
51-
version = "0.0.2"
51+
version = "0.1.0"
5252

5353
[project.optional-dependencies]
5454
dev = [

src/pkg_infra/_metadata.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323

2424
import toml
2525

26-
_VERSION = '0.0.1'
26+
_VERSION = '0.1.0'
2727

2828

2929
def get_metadata() -> dict:

src/pkg_infra/data/__init__.py

Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
"""Access built-in data files shipped with Python packages.
2+
3+
Provides a simple API to load YAML, JSON, and other data files from a
4+
package's ``data/`` or ``_data/`` subdirectory.
5+
6+
Example::
7+
8+
from pkg_infra.data import load
9+
10+
# Load data from the calling package's data/ directory
11+
config = load('default_config.yaml')
12+
13+
# Load data from a specific package
14+
ids = load('id_types.json', module='omnipath_utils')
15+
"""
16+
17+
from __future__ import annotations
18+
19+
from typing import Any, Callable
20+
import os
21+
import json
22+
import pathlib as pl
23+
import functools
24+
import collections
25+
import logging
26+
27+
import yaml
28+
29+
__all__ = ['builtins', 'load', 'path']
30+
31+
_logger = logging.getLogger(__name__)
32+
33+
_FORMATS = {
34+
'json': functools.partial(
35+
json.load,
36+
object_pairs_hook=collections.OrderedDict,
37+
),
38+
'yaml': functools.partial(yaml.load, Loader=yaml.FullLoader),
39+
'txt': None,
40+
'': None,
41+
}
42+
43+
44+
def _caller_module() -> str:
45+
"""Get the name of the module that called this function."""
46+
47+
import inspect
48+
49+
frame = inspect.currentframe()
50+
51+
try:
52+
caller = frame.f_back.f_back
53+
return caller.f_globals.get('__name__', '__main__').split('.')[0]
54+
finally:
55+
del frame
56+
57+
58+
def _module_datadir(module: str) -> pl.Path | None:
59+
"""Find the data directory for a module."""
60+
61+
import importlib
62+
63+
try:
64+
mod = importlib.import_module(module)
65+
except ModuleNotFoundError:
66+
return None
67+
68+
if mod_path := getattr(mod, '__path__', None):
69+
base = pl.Path(mod_path[0])
70+
elif mod_file := getattr(mod, '__file__', None):
71+
base = pl.Path(mod_file).parent
72+
else:
73+
return None
74+
75+
for dirname in ('data', '_data'):
76+
datadir = base / dirname
77+
if datadir.is_dir():
78+
return datadir
79+
80+
return None
81+
82+
83+
def path(label: str, module: str | None = None) -> pl.Path | None:
84+
"""Find path to a data file shipped with a package.
85+
86+
Args:
87+
label: Filename or label of a built-in dataset.
88+
module: Package name. Defaults to the calling package.
89+
90+
Returns:
91+
Path to the file, or None if not found.
92+
"""
93+
94+
if os.path.exists(label):
95+
return pl.Path(label).absolute()
96+
97+
available = builtins(module or _caller_module())
98+
stem = label.rsplit('.', maxsplit=1)[0] if '.' in label else label
99+
return available.get(label) or available.get(stem)
100+
101+
102+
def load(
103+
label: str,
104+
module: str | None = None,
105+
reader: Callable | None = None,
106+
**kwargs,
107+
) -> Any:
108+
"""Load a data file shipped with a package.
109+
110+
Args:
111+
label: Filename or label of a built-in dataset.
112+
module: Package name. Defaults to the calling package.
113+
reader: Custom reader function. Auto-detected from extension if None.
114+
kwargs: Extra arguments passed to the reader.
115+
116+
Returns:
117+
The loaded data (typically dict or list).
118+
"""
119+
120+
module = module or _caller_module()
121+
122+
if _path := path(label, module):
123+
124+
if not reader:
125+
ext = _path.name.rsplit('.', maxsplit=1)[-1].lower()
126+
if ext == 'tsv':
127+
kwargs['sep'] = '\t'
128+
reader = _FORMATS.get(ext, lambda x: x.readlines())
129+
130+
_logger.debug(
131+
'Loading built-in data `%s` from module `%s`; path: `%s`.',
132+
label, module, _path,
133+
)
134+
135+
with open(_path) as fp:
136+
return reader(fp, **kwargs)
137+
138+
else:
139+
_logger.debug(
140+
'Could not find built-in data `%s` in module `%s`.', label, module,
141+
)
142+
143+
144+
def builtins(module: str | None = None) -> dict[str, pl.Path]:
145+
"""List built-in data files available in a package.
146+
147+
Args:
148+
module: Package name. Defaults to the calling package.
149+
150+
Returns:
151+
Dict mapping filenames (without extension) to full paths.
152+
"""
153+
154+
module = module or _caller_module()
155+
datadir = _module_datadir(module)
156+
157+
if not datadir or not datadir.is_dir():
158+
return {}
159+
160+
return {
161+
str((pl.Path(d) / pl.Path(f).stem).relative_to(datadir)): pl.Path(d) / f
162+
for d, dirs, files in os.walk(datadir)
163+
for f in files
164+
if pl.Path(f).suffix[1:].lower() in _FORMATS
165+
}

src/pkg_infra/utils.py

Lines changed: 0 additions & 22 deletions
This file was deleted.

src/pkg_infra/utils/__init__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
"""General-purpose utility functions.
2+
3+
Migrated from pypath_common._misc, _constants, and _process.
4+
Organized into submodules for clarity; all public functions are
5+
re-exported here for convenience::
6+
7+
from pkg_infra.utils import to_set, first, ext, swap_dict
8+
"""
9+
10+
from pkg_infra.utils.constants import * # noqa: F401, F403
11+
from pkg_infra.utils._process import swap_dict # noqa: F401
12+
from pkg_infra.utils._misc import * # noqa: F401, F403
13+
14+
# Preserved from original utils.py
15+
import datetime
16+
17+
def get_timestamp_now() -> str:
18+
"""Get the current UTC timestamp with an explicit ``Z`` suffix."""
19+
return datetime.datetime.now(datetime.timezone.utc).strftime(
20+
"%Y%m%dT%H%M%SZ",
21+
)

0 commit comments

Comments
 (0)