Skip to content

Commit 17adccf

Browse files
sneakers-the-ratrlymavaylon1
authored
cache __TYPE_MAP and init submodules (#1931)
Co-authored-by: Ryan Ly <[email protected]> Co-authored-by: Matthew Avaylon <[email protected]>
1 parent b9f9e5a commit 17adccf

File tree

5 files changed

+120
-32
lines changed

5 files changed

+120
-32
lines changed

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,6 @@ tests/coverage/htmlcov
7777

7878
# Version
7979
_version.py
80+
81+
.core_typemap_version
82+
core_typemap.pkl

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# PyNWB Changelog
22

3+
## PyNWB 2.8.3 (Upcoming)
4+
5+
### Performance
6+
- Cache global type map to speed import 3X. @sneakers-the-rat [#1931](https://github.com/NeurodataWithoutBorders/pynwb/pull/1931)
7+
38
## PyNWB 2.8.2 (September 9, 2024)
49

510
### Enhancements and minor changes

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ exclude = [
9898
"__pycache__",
9999
"build/",
100100
"dist/",
101-
"src/nwb-schema",
101+
"src/pynwb/nwb-schema",
102102
"docs/source/conf.py",
103103
"docs/notebooks/*",
104104
"src/pynwb/_due.py",

src/pynwb/__init__.py

Lines changed: 111 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import os.path
55
from pathlib import Path
66
from copy import deepcopy
7+
import subprocess
8+
import pickle
79
from warnings import warn
810
import h5py
911

@@ -23,6 +25,16 @@
2325
from .spec import NWBDatasetSpec, NWBGroupSpec, NWBNamespace # noqa E402
2426
from .validate import validate # noqa: F401, E402
2527

28+
try:
29+
# see https://effigies.gitlab.io/posts/python-packaging-2023/
30+
from ._version import __version__
31+
except ImportError: # pragma: no cover
32+
# this is a relatively slower method for getting the version string
33+
from importlib.metadata import version # noqa: E402
34+
35+
__version__ = version("pynwb")
36+
del version
37+
2638

2739
@docval({'name': 'config_path', 'type': str, 'doc': 'Path to the configuration file.'},
2840
{'name': 'type_map', 'type': TypeMap, 'doc': 'The TypeMap.', 'default': None},
@@ -51,7 +63,7 @@ def unload_type_config(**kwargs):
5163
type_map = kwargs['type_map'] or get_type_map()
5264
hdmf_unload_type_config(type_map=type_map)
5365

54-
def __get_resources():
66+
def __get_resources() -> dict:
5567
try:
5668
from importlib.resources import files
5769
except ImportError:
@@ -61,27 +73,35 @@ def __get_resources():
6173
__location_of_this_file = files(__name__)
6274
__core_ns_file_name = 'nwb.namespace.yaml'
6375
__schema_dir = 'nwb-schema/core'
76+
cached_core_typemap = __location_of_this_file / 'core_typemap.pkl'
77+
cached_version_indicator = __location_of_this_file / '.core_typemap_version'
6478

6579
ret = dict()
6680
ret['namespace_path'] = str(__location_of_this_file / __schema_dir / __core_ns_file_name)
81+
ret['cached_typemap_path'] = str(cached_core_typemap)
82+
ret['cached_version_indicator'] = str(cached_version_indicator)
6783
return ret
6884

6985

7086
def _get_resources():
7187
# LEGACY: Needed to support legacy implementation.
88+
# TODO: Remove this in PyNWB 3.0.
89+
warn("The function '_get_resources' is deprecated and will be removed in a future release.", DeprecationWarning)
7290
return __get_resources()
7391

7492

75-
# a global namespace catalog
76-
global __NS_CATALOG
93+
# a global type map
7794
global __TYPE_MAP
7895

79-
__NS_CATALOG = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
96+
__ns_catalog = NamespaceCatalog(NWBGroupSpec, NWBDatasetSpec, NWBNamespace)
8097

8198
hdmf_typemap = hdmf.common.get_type_map()
82-
__TYPE_MAP = TypeMap(__NS_CATALOG)
99+
__TYPE_MAP = TypeMap(__ns_catalog)
83100
__TYPE_MAP.merge(hdmf_typemap, ns_catalog=True)
84101

102+
# load the core namespace, i.e. base NWB specification
103+
__resources = __get_resources()
104+
85105

86106
@docval({'name': 'extensions', 'type': (str, TypeMap, list),
87107
'doc': 'a path to a namespace, a TypeMap, or a list consisting of paths to namespaces and TypeMaps',
@@ -139,22 +159,95 @@ def load_namespaces(**kwargs):
139159
namespace_path = getargs('namespace_path', kwargs)
140160
return __TYPE_MAP.load_namespaces(namespace_path)
141161

162+
def available_namespaces():
163+
"""Returns all namespaces registered in the namespace catalog"""
164+
return __TYPE_MAP.namespace_catalog.namespaces
142165

143-
# load the core namespace, i.e. base NWB specification
144-
__resources = __get_resources()
145-
if os.path.exists(__resources['namespace_path']):
146-
load_namespaces(__resources['namespace_path'])
147-
else:
148-
raise RuntimeError(
149-
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, you need to "
150-
"use the --recurse_submodules flag when cloning. See developer installation instructions here: "
151-
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository"
152-
)
153166

167+
def __git_cmd(*args) -> subprocess.CompletedProcess:
168+
"""
169+
Call git with the package as the directory regardless of cwd.
170+
171+
Since any folder within a git repo works, don't try to ascend to the top, since
172+
if we're *not* actually in a git repo we're only guaranteed to know about
173+
the inner `pynwb` directory.
174+
"""
175+
parent_dir = str(Path(__file__).parent)
176+
result = subprocess.run(["git", "-C", parent_dir, *args], capture_output=True)
177+
return result
178+
179+
180+
def __clone_submodules():
181+
if __git_cmd('rev-parse').returncode == 0:
182+
warn(
183+
'NWB core schema not found in cloned installation, initializing submodules...',
184+
stacklevel=1)
185+
res = __git_cmd('submodule', 'update', '--init', '--recursive')
186+
if not res.returncode == 0: # pragma: no cover
187+
raise RuntimeError(
188+
'Exception while initializing submodules, got:\n'
189+
'stdout:\n' + ('-'*20) + res.stdout + "\nstderr:\n" + ('-'*20) + res.stderr)
190+
else: # pragma: no cover
191+
raise RuntimeError("Package is not installed from a git repository, can't clone submodules")
192+
193+
194+
def __load_core_namespace(final:bool=False):
195+
"""
196+
Load the core namespace into __TYPE_MAP,
197+
either by loading a pickled version or creating one anew and pickling it.
154198
155-
def available_namespaces():
156-
"""Returns all namespaces registered in the namespace catalog"""
157-
return __NS_CATALOG.namespaces
199+
We keep a dotfile next to it that tracks what version of pynwb created it,
200+
so that we invalidate it when the code changes.
201+
202+
Args:
203+
final (bool): This function tries again if the submodules aren't cloned,
204+
but it shouldn't go into an infinite loop.
205+
If final is ``True``, don't recurse.
206+
"""
207+
global __TYPE_MAP
208+
global __resources
209+
210+
# if we have a version indicator file and it doesn't match the current version,
211+
# scrap the cached typemap
212+
if os.path.exists(__resources['cached_version_indicator']):
213+
with open(__resources['cached_version_indicator'], 'r') as f:
214+
cached_version = f.read().strip()
215+
if cached_version != __version__:
216+
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)
217+
else:
218+
# remove any cached typemap, forcing re-creation
219+
Path(__resources['cached_typemap_path']).unlink(missing_ok=True)
220+
221+
# load pickled typemap if we have one
222+
if os.path.exists(__resources['cached_typemap_path']):
223+
with open(__resources['cached_typemap_path'], 'rb') as f:
224+
__TYPE_MAP = pickle.load(f) # type: TypeMap
225+
226+
# otherwise make a new one and cache it
227+
elif os.path.exists(__resources['namespace_path']):
228+
load_namespaces(__resources['namespace_path'])
229+
with open(__resources['cached_typemap_path'], 'wb') as f:
230+
pickle.dump(__TYPE_MAP, f, protocol=pickle.HIGHEST_PROTOCOL)
231+
with open(__resources['cached_version_indicator'], 'w') as f:
232+
f.write(__version__)
233+
234+
# otherwise, we don't have the schema and try and initialize from submodules,
235+
# afterwards trying to load the namespace again
236+
else:
237+
try:
238+
__clone_submodules()
239+
except (FileNotFoundError, OSError, RuntimeError) as e: # pragma: no cover
240+
if 'core' not in available_namespaces():
241+
warn(
242+
"'core' is not a registered namespace. If you installed PyNWB locally using a git clone, "
243+
"you need to use the --recurse_submodules flag when cloning. "
244+
"See developer installation instructions here: "
245+
"https://pynwb.readthedocs.io/en/stable/install_developers.html#install-from-git-repository\n"
246+
f"Got exception: \n{e}"
247+
)
248+
if not final:
249+
__load_core_namespace(final=True)
250+
__load_core_namespace()
158251

159252

160253
# a function to register a container classes with the global map
@@ -427,15 +520,7 @@ def export(self, **kwargs):
427520
from hdmf.data_utils import DataChunkIterator # noqa: F401,E402
428521
from hdmf.backends.hdf5 import H5DataIO # noqa: F401,E402
429522

430-
try:
431-
# see https://effigies.gitlab.io/posts/python-packaging-2023/
432-
from ._version import __version__
433-
except ImportError: # pragma: no cover
434-
# this is a relatively slower method for getting the version string
435-
from importlib.metadata import version # noqa: E402
436523

437-
__version__ = version("pynwb")
438-
del version
439524

440525
from ._due import due, BibTeX # noqa: E402
441526
due.cite(

tests/back_compat/test_import_structure.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,14 @@ def test_outer_import_structure(self):
3030
"TimeSeries",
3131
"TypeMap",
3232
"_HDF5IO",
33-
"__NS_CATALOG",
34-
"__TYPE_MAP",
3533
"__builtins__",
3634
"__cached__",
3735
"__doc__",
3836
"__file__",
39-
"__get_resources",
40-
"__io",
4137
"__loader__",
4238
"__name__",
4339
"__package__",
4440
"__path__",
45-
"__resources",
4641
"__spec__",
4742
"__version__",
4843
"_due",

0 commit comments

Comments
 (0)