@@ -680,4 +680,95 @@ def maxshape(self):
680680# individual, complete elements along the first dimension of the array one-at-a-time). Depending on the generator,
681681# this may or may not result in an error on write, but the array you are generating will probably end up
682682# at least not having the intended shape.
683+ # * The shape of the chunks returned by the ``DataChunkIterator`` do not match the shape of the chunks of the
684+ # target HDF5 dataset. This can result in slow I/O performance, for example, when each chunk of an HDF5 dataset
685+ # needs to be updated multiple times on write. For example, when using compression this would mean that HDF5
686+ # may have to read, decompress, update, compress, and write a particular chunk each time it is being updated.
683687#
688+ #
689+
690+ ####################
691+ # Alternative Approach: User-defined dataset write
692+ # ----------------------------------------------------
693+ #
694+ # In the above cases we used the built-in capabilities of PyNWB to perform iterative data write. To
695+ # gain more fine-grained control of the write process we can alternatively use PyNWB to setup the full
696+ # structure of our NWB:N file and then update select datasets afterwards. This approach is useful, e.g.,
697+ # in context of parallel write and any time we need to optimize write patterns.
698+ #
699+ #
700+
701+ ####################
702+ # Step 1: Initially allocate the data as empty
703+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
704+ #
705+ from hdmf .backends .hdf5 .h5_utils import H5DataIO
706+
707+ write_test_file (filename = 'basic_alternative_custom_write.nwb' ,
708+ data = H5DataIO (data = np .empty (shape = (0 , 10 ), dtype = 'float' ),
709+ maxshape = (None , 10 ), # <-- Make the time dimension resizable
710+ chunks = (131072 , 2 ), # <-- Use 2MB chunks
711+ compression = 'gzip' , # <-- Enable GZip compression
712+ compression_opts = 4 , # <-- GZip aggression
713+ shuffle = True , # <-- Enable shuffle filter
714+ fillvalue = np .nan # <-- Use NAN as fillvalue
715+ )
716+ )
717+
718+ ####################
719+ # Step 2: Get the dataset(s) to be updated
720+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
721+ #
722+ from pynwb import NWBHDF5IO # noqa
723+
724+ io = NWBHDF5IO ('basic_alternative_custom_write.nwb' , mode = 'a' )
725+ nwbfile = io .read ()
726+ data = nwbfile .get_acquisition ('synthetic_timeseries' ).data
727+
728+ # Let's check what the data looks like
729+ print ("Shape %s, Chunks: %s, Maxshape=%s" % (str (data .shape ), str (data .chunks ), str (data .maxshape )))
730+
731+ ####################
732+ # ``[Out]:``
733+ #
734+ # .. code-block:: python
735+ #
736+ # Shape (0, 10), Chunks: (131072, 2), Maxshape=(None, 10)
737+ #
738+
739+ ####################
740+ # Step 3: Implement custom write
741+ # ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
742+ #
743+
744+ data .resize ((8 , 10 )) # <-- Allocate the space with need
745+ data [0 :3 , :] = 1 # <-- Write timesteps 0,1,2
746+ data [3 :6 , :] = 2 # <-- Write timesteps 3,4,5, Note timesteps 6,7 are not being initialized
747+ io .close () # <-- Close the file
748+
749+
750+ ####################
751+ # Check the results
752+ # ^^^^^^^^^^^^^^^^^
753+
754+ from pynwb import NWBHDF5IO # noqa
755+
756+ io = NWBHDF5IO ('basic_alternative_custom_write.nwb' , mode = 'a' )
757+ nwbfile = io .read ()
758+ data = nwbfile .get_acquisition ('synthetic_timeseries' ).data
759+ print (data [:])
760+ io .close ()
761+
762+ ####################
763+ # ``[Out]:``
764+ #
765+ # .. code-block:: python
766+ #
767+ # [[ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
768+ # [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
769+ # [ 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
770+ # [ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
771+ # [ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
772+ # [ 2. 2. 2. 2. 2. 2. 2. 2. 2. 2.]
773+ # [ nan nan nan nan nan nan nan nan nan nan]
774+ # [ nan nan nan nan nan nan nan nan nan nan]]
0 commit comments