5353# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
5454#
5555# In PyNWB the process of iterating over large data arrays is implemented via the concept of
56- # :py:class:`~pynwb.form. data_utils.DataChunk` and :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator`.
56+ # :py:class:`~hdmf. data_utils.DataChunk` and :py:class:`~hdmf .data_utils.AbstractDataChunkIterator`.
5757#
58- # * :py:class:`~pynwb.form .data_utils.DataChunk` is a simple data structure used to describe
58+ # * :py:class:`~hdmf .data_utils.DataChunk` is a simple data structure used to describe
5959# a subset of a larger data array (i.e., a data chunk), consisting of:
6060#
6161# * ``DataChunk.data`` : the array with the data value(s) of the chunk and
6262# * ``DataChunk.selection`` : the NumPy index tuple describing the location of the chunk in the whole array.
6363#
64- # * :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
65- # data arrays one-:py:class:`~pynwb.form .data_utils.DataChunk`-at-a-time.
64+ # * :py:class:`~hdmf .data_utils.AbstractDataChunkIterator` then defines a class for iterating over large
65+ # data arrays one-:py:class:`~hdmf .data_utils.DataChunk`-at-a-time.
6666#
67- # * :py:class:`~pynwb.form .data_utils.DataChunkIterator` is a specific implementation of an
68- # :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
69- # that we iterate over the first dimension of the data array. :py:class:`~pynwb.form .data_utils.DataChunkIterator`
67+ # * :py:class:`~hdmf .data_utils.DataChunkIterator` is a specific implementation of an
68+ # :py:class:`~hdmf .data_utils.AbstractDataChunkIterator` that accepts any iterable and assumes
69+ # that we iterate over the first dimension of the data array. :py:class:`~hdmf .data_utils.DataChunkIterator`
7070# also supports buffered read, i.e., multiple values from the input iterator can be combined to a single chunk.
7171# This is useful for buffered I/O operations, e.g., to improve performance by accumulating data in memory and
7272# writing larger blocks at once.
7777# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
7878#
7979# On the front end, all a user needs to do is to create or wrap their data in a
80- # :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
81- # :py:class:`~pynwb.form .backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
80+ # :py:class:`~hdmf .data_utils.AbstractDataChunkIterator`. The I/O backend (e.g.,
81+ # :py:class:`~hdmf .backends.hdf5.h5tools.HDF5IO` or :py:class:`~pynwb.NWBHDF5IO`) then
8282# implements the iterative processing of the data chunk iterators. PyNWB also provides with
83- # :py:class:`~pynwb.form .data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
83+ # :py:class:`~hdmf .data_utils.DataChunkIterator` a specific implementation of a data chunk iterator
8484# which we can use to wrap common iterable types (e.g., generators, lists, or numpy arrays).
8585# For more advanced use cases we then need to implement our own derived class of
86- # :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator`.
86+ # :py:class:`~hdmf .data_utils.AbstractDataChunkIterator`.
8787#
8888# .. tip::
8989#
90- # Currently the HDF5 I/O backend of PyNWB (:py:class:`~pynwb.form .backends.hdf5.h5tools.HDF5IO`,
90+ # Currently the HDF5 I/O backend of PyNWB (:py:class:`~hdmf .backends.hdf5.h5tools.HDF5IO`,
9191# :py:class:`~pynwb.NWBHDF5IO`) processes itertive data writes one-dataset-at-a-time. This means, that
9292# while you may have an arbitrary number of iterative data writes, the write is performed in order.
9393# In the future we may use a queing process to enable the simultaneous processing of multiple iterative writes at
@@ -172,7 +172,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
172172# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
173173#
174174
175- from pynwb . form .data_utils import DataChunkIterator
175+ from hdmf .data_utils import DataChunkIterator
176176
177177data = DataChunkIterator (data = iter_sin (10 ))
178178
@@ -201,22 +201,22 @@ def iter_sin(chunk_length=10, max_chunks=100):
201201#
202202# maxshape=(None, 10), recommended_data_shape=(1, 10), dtype=float64
203203#
204- # As we can see :py:class:`~pynwb.form .data_utils.DataChunkIterator` automatically recommends
204+ # As we can see :py:class:`~hdmf .data_utils.DataChunkIterator` automatically recommends
205205# in its ``maxshape`` that the first dimensions of our array should be unlimited (``None``) and the second
206- # dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~pynwb.form .data_utils.DataChunkIterator`
206+ # dimension be ``10`` (i.e., the length of our chunk. Since :py:class:`~hdmf .data_utils.DataChunkIterator`
207207# has no way of knowing the minimum size of the array it automatically recommends the size of the first
208208# chunk as the minimum size (i.e, ``(1, 10)``) and also infers the data type automatically from the first chunk.
209209# To further customize this behavior we may also define the ``maxshape``, ``dtype``, and ``buffer_size`` when
210- # we create the :py:class:`~pynwb.form .data_utils.DataChunkIterator`.
210+ # we create the :py:class:`~hdmf .data_utils.DataChunkIterator`.
211211#
212212# .. tip::
213213#
214- # We here used :py:class:`~pynwb.form .data_utils.DataChunkIterator` to conveniently wrap our data stream.
215- # :py:class:`~pynwb.form .data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
214+ # We here used :py:class:`~hdmf .data_utils.DataChunkIterator` to conveniently wrap our data stream.
215+ # :py:class:`~hdmf .data_utils.DataChunkIterator` assumes that our generators yields in **consecutive order**
216216# **single** complete element along the **first dimension** of our a array (i.e., iterate over the first
217217# axis and yield one-element-at-a-time). This behavior is useful in many practical cases. However, if
218218# this strategy does not match our needs, then you can alternatively implement our own derived
219- # :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator`. We show an example of this next.
219+ # :py:class:`~hdmf .data_utils.AbstractDataChunkIterator`. We show an example of this next.
220220#
221221
222222
@@ -227,7 +227,7 @@ def iter_sin(chunk_length=10, max_chunks=100):
227227# Step 1: Create a data chunk iterator for our sparse matrix
228228# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
229229
230- from pynwb . form .data_utils import AbstractDataChunkIterator , DataChunk
230+ from hdmf .data_utils import AbstractDataChunkIterator , DataChunk
231231
232232
233233class SparseMatrixIterator (AbstractDataChunkIterator ):
@@ -306,8 +306,8 @@ def maxshape(self):
306306
307307#####################
308308# In order to also enable compression and other advanced HDF5 dataset I/O featurs we can then also
309- # wrap our data via :py:class:`~pynwb.form .backends.hdf5.h5_utils.H5DataIO`.
310- from pynwb . form .backends .hdf5 .h5_utils import H5DataIO
309+ # wrap our data via :py:class:`~hdmf .backends.hdf5.h5_utils.H5DataIO`.
310+ from hdmf .backends .hdf5 .h5_utils import H5DataIO
311311matrix2 = SparseMatrixIterator (shape = (xsize , ysize ),
312312 num_chunks = num_chunks ,
313313 chunk_shape = chunk_shape )
@@ -318,7 +318,7 @@ def maxshape(self):
318318######################
319319# We can now also customize the chunking , fillvalue and other settings
320320#
321- from pynwb . form .backends .hdf5 .h5_utils import H5DataIO
321+ from hdmf .backends .hdf5 .h5_utils import H5DataIO
322322
323323# Increase the chunk size and add compression
324324matrix3 = SparseMatrixIterator (shape = (xsize , ysize ),
@@ -427,7 +427,7 @@ def maxshape(self):
427427#
428428# **Advantages:**
429429#
430- # * We only need to hold one :py:class:`~pynwb.form .data_utils.DataChunk` in memory at any given time
430+ # * We only need to hold one :py:class:`~hdmf .data_utils.DataChunk` in memory at any given time
431431# * Only the data chunks in the HDF5 file that contain non-default values are ever being allocated
432432# * The overall size of our file is reduced significantly
433433# * Reduced I/O load
@@ -437,7 +437,7 @@ def maxshape(self):
437437#
438438# With great power comes great responsibility **!** I/O and storage cost will depend among others on the chunk size,
439439# compression options, and the write pattern, i.e., the number and structure of the
440- # :py:class:`~pynwb.form .data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
440+ # :py:class:`~hdmf .data_utils.DataChunk` objects written. For example, using ``(1,1)`` chunks and writing them
441441# one value at a time would result in poor I/O performance in most practical cases, because of the large number of
442442# chunks and large number of small I/O operations required.
443443#
@@ -489,7 +489,7 @@ def maxshape(self):
489489# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
490490#
491491# Note, we here use a generator for simplicity but we could equally well also implement our own
492- # :py:class:`~pynwb.form .data_utils.AbstractDataChunkIterator`.
492+ # :py:class:`~hdmf .data_utils.AbstractDataChunkIterator`.
493493
494494
495495def iter_largearray (filename , shape , dtype = 'float64' ):
@@ -510,7 +510,7 @@ def iter_largearray(filename, shape, dtype='float64'):
510510# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
511511#
512512
513- from pynwb . form .data_utils import DataChunkIterator
513+ from hdmf .data_utils import DataChunkIterator
514514
515515data = DataChunkIterator (data = iter_largearray (filename = 'basic_sparse_iterwrite_testdata.npy' ,
516516 shape = datashape ),
@@ -530,8 +530,8 @@ def iter_largearray(filename, shape, dtype='float64'):
530530# .. tip::
531531#
532532# Again, if we want to explicitly control how our data will be chunked (compressed etc.)
533- # in the HDF5 file then we need to wrap our :py:class:`~pynwb.form .data_utils.DataChunkIterator`
534- # using :py:class:`~pynwb.form .backends.hdf5.h5_utils.H5DataIO`
533+ # in the HDF5 file then we need to wrap our :py:class:`~hdmf .data_utils.DataChunkIterator`
534+ # using :py:class:`~hdmf .backends.hdf5.h5_utils.H5DataIO`
535535
536536####################
537537# Discussion
@@ -589,7 +589,7 @@ def iter_largearray(filename, shape, dtype='float64'):
589589# Step 1: Create a data chunk iterator for our multifile array
590590# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
591591
592- from pynwb . form .data_utils import AbstractDataChunkIterator , DataChunk # noqa
592+ from hdmf .data_utils import AbstractDataChunkIterator , DataChunk # noqa
593593
594594
595595class MultiFileArrayIterator (AbstractDataChunkIterator ):
@@ -666,16 +666,16 @@ def maxshape(self):
666666#
667667# Common mistakes that will result in errors on write:
668668#
669- # * The size of a :py:class:`~pynwb.form .data_utils.DataChunk` does not match the selection.
670- # * The selection for the :py:class:`~pynwb.form .data_utils.DataChunk` is not supported by h5py
669+ # * The size of a :py:class:`~hdmf .data_utils.DataChunk` does not match the selection.
670+ # * The selection for the :py:class:`~hdmf .data_utils.DataChunk` is not supported by h5py
671671# (e.g., unordered lists etc.)
672672#
673673# Other common mistakes:
674674#
675675# * Choosing inappropriate chunk sizes. This typically means bad performance with regard to I/O and/or storage cost.
676676# * Using auto chunking without supplying a good recommended_data_shape. h5py auto chunking can only make a good
677677# guess of what the chunking should be if it (at least roughly) knows what the shape of the array will be.
678- # * Trying to wrap a data generator using the default :py:class:`~pynwb.form .data_utils.DataChunkIterator`
678+ # * Trying to wrap a data generator using the default :py:class:`~hdmf .data_utils.DataChunkIterator`
679679# when the generator does not comply with the assumptions of the default implementation (i.e., yield
680680# individual, complete elements along the first dimension of the array one-at-a-time). Depending on the generator,
681681# this may or may not result in an error on write, but the array you are generating will probably end up
0 commit comments