silx-kit
diff --git a/‎doc/information.rst‎
Lines changed: 1 addition & 1 deletion b/‎doc/information.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎lib/SPERR/CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion b/‎lib/SPERR/CMakeLists.txt‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎lib/SPERR/README.md‎
Lines changed: 10 additions & 5 deletions b/‎lib/SPERR/README.md‎
Lines changed: 10 additions & 5 deletions
diff --git a/‎lib/SPERR/handout/SC24-Tutorial-SPERR.pdf‎
784 KB b/‎lib/SPERR/handout/SC24-Tutorial-SPERR.pdf‎
784 KB
diff --git a/‎lib/SPERR/include/CDF97.h‎
Lines changed: 23 additions & 37 deletions b/‎lib/SPERR/include/CDF97.h‎
Lines changed: 23 additions & 37 deletions
diff --git a/‎lib/SPERR/include/sperr_helper.h‎
Lines changed: 8 additions & 0 deletions b/‎lib/SPERR/include/sperr_helper.h‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎lib/SPERR/src/Bitmask.cpp‎
Lines changed: 23 additions & 28 deletions b/‎lib/SPERR/src/Bitmask.cpp‎
Lines changed: 23 additions & 28 deletions
@@ -77,7 +77,7 @@ HDF5 compression filters and compression libraries sources were obtained from:
 * `SZ plugin <https://github.com/szcompressor/SZ2>`_
   (commit `f466775 <https://github.com/szcompressor/SZ2/tree/f4667759ead6a902110e80ff838ccdfddbc8dcd7>`_)
   using `SZ <https://github.com/szcompressor/SZ2>`_, ZLib and ZStd.
-* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.2).
+* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.4).
 * `SZ3 plugin <https://github.com/szcompressor/SZ3>`_
   (commit `4bbe9df7e4bcb <https://github.com/szcompressor/SZ3/commit/4bbe9df7e4bcb6ae6339fcb3033100da07fe7434>`_)
   using `SZ3 <https://github.com/szcompressor/SZ3>`_ and ZStd.
 
@@ -2,7 +2,7 @@
 
 cmake_minimum_required(VERSION 3.14)
 
-project(SPERR VERSION 0.8.2 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
+project(SPERR VERSION 0.8.4 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
 
 if(NOT CMAKE_CXX_STANDARD)
     set(CMAKE_CXX_STANDARD "20" CACHE STRING "Choose the C++ Standard to use." FORCE)
@@ -32,6 +32,7 @@ option( BUILD_SHARED_LIBS "Build shared SPERR library" ON )
 option( BUILD_UNIT_TESTS "Build unit tests using GoogleTest" ON )
 option( BUILD_CLI_UTILITIES "Build a set of command line utilities" ON )
 option( USE_OMP "Use OpenMP parallelization on 3D volumes" OFF )
+option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" ON )
 option( SPERR_PREFER_RPATH "Set RPATH; this can fight with package managers so turn off when building for them" ON )
 mark_as_advanced(FORCE SPERR_PREFER_RPATH)
 
 
@@ -27,16 +27,19 @@ mkdir SPERR/build                               # create the build directory
 cd SPERR/build                                  # enter the build directory
 cmake ..                                        # use cmake to configure the project
 cmake -DUSE_OMP=ON ..                           # Optional: enable OpenMP on 3D volumes.
-cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local .
+cmake -DENABLE_AVX2=OFF ..                      # Optional: disable AVX2 instructions. The code is slightly faster with AVX2.
 cmake -DCMAKE_CXX_STANDARD=17 ..                # Optional: use C++17 rather than C++20. The code is slightly faster with C++20.
+cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local .
 make -j 8                                       # build the project
 ctest .                                         # run unit tests, which should have 100% tests passed
 make install                                    # install the library and CLI tools to a specified directory.
 ```
 
 ## Plugin for HDF5
 SPERR is available as a *dynamically loaded plugin* for HDF5 with a registered ID of `32028`.
-This plugin is available at this [repo](https://github.com/NCAR/H5Z-SPERR).
+This plugin, H5Z-SPERR, is available at this [repo](https://github.com/NCAR/H5Z-SPERR).
+
+In the Python ecosystem, H5Z-SPERR is available through the [hdf5plugin](https://github.com/silx-kit/hdf5plugin) package.
 
 ## Wrapper for Fortran
 A Fortran wrapper for SPERR has also been created by [ofmla](https://github.com/ofmla) 
@@ -67,6 +70,8 @@ If SPERR benefits your work, please kindly cite [this publication](https://ieeex
 (Author's copy is available [here](https://vast.ucar.edu/pdfs/SPERR_IPDPS.pdf).)
 
 ## Presentations
-- FZ Workshop Hands-on: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf))
-- SC'23 Tutorial on lossy scientific data compression: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf))
-- IPDPS'23 Lossy Scientific Data Compression With SPERR: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf))
+- SC'24 Tutorial: Nov 18 2024, Atlanta, GA. ([slides](./handout/SC24-Tutorial-SPERR.pdf) 
+  by [lindstro](https://github.com/lindstro))
+- FZ Workshop: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf))
+- SC'23 Tutorial: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf))
+- IPDPS'23: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf))
@@ -18,6 +18,11 @@ namespace sperr {
 
 class CDF97 {
  public:
+  //
+  // Destructor
+  //
+  ~CDF97();
+
   //
   // Input
   //
@@ -57,49 +62,34 @@ class CDF97 {
   void idwt3d_multi_res(std::vector<vecd_type>&);
 
  private:
-  using itd_type = vecd_type::iterator;
-  using citd_type = vecd_type::const_iterator;
-
   //
   // Private methods helping DWT.
   //
 
   // Multiple levels of 1D DWT/IDWT on a given array of length array_len.
-  void m_dwt1d(itd_type array, size_t array_len, size_t num_of_xforms);
-  void m_idwt1d(itd_type array, size_t array_len, size_t num_of_xforms);
+  void m_dwt1d(double* array, size_t array_len, size_t num_of_xforms);
+  void m_idwt1d(double* array, size_t array_len, size_t num_of_xforms);
 
   // Multiple levels of 2D DWT/IDWT on a given plane by repeatedly invoking
   // m_dwt2d_one_level(). The plane has a dimension (len_xy[0], len_xy[1]).
-  void m_dwt2d(itd_type plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
-  void m_idwt2d(itd_type plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
+  void m_dwt2d(double* plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
+  void m_idwt2d(double* plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
 
   // Perform one level of interleaved 3D dwt/idwt on a given volume (m_dims),
   // specifically on its top left (len_xyz) subset.
-  void m_dwt3d_one_level(itd_type vol, std::array<size_t, 3> len_xyz);
-  void m_idwt3d_one_level(itd_type vol, std::array<size_t, 3> len_xyz);
+  void m_dwt3d_one_level(std::array<size_t, 3> len_xyz);
+  void m_idwt3d_one_level(std::array<size_t, 3> len_xyz);
 
   // Perform one level of 2D dwt/idwt on a given plane (m_dims),
   // specifically on its top left (len_xy) subset.
-  void m_dwt2d_one_level(itd_type plane, std::array<size_t, 2> len_xy);
-  void m_idwt2d_one_level(itd_type plane, std::array<size_t, 2> len_xy);
-
-  // Perform one level of 1D dwt/idwt on a given array (array_len).
-  // A buffer space (tmp_buf) should be passed in for
-  // this method to work on with length at least 2*array_len.
-  void m_dwt1d_one_level(itd_type array, size_t array_len);
-  void m_idwt1d_one_level(itd_type array, size_t array_len);
+  void m_dwt2d_one_level(double* plane, std::array<size_t, 2> len_xy);
+  void m_idwt2d_one_level(double* plane, std::array<size_t, 2> len_xy);
 
   // Separate even and odd indexed elements to be at the front and back of the dest array.
-  // Note 1: sufficient memory space should be allocated by the caller.
-  // Note 2: two versions for even and odd length input.
-  void m_gather_even(citd_type begin, citd_type end, itd_type dest) const;
-  void m_gather_odd(citd_type begin, citd_type end, itd_type dest) const;
-
   // Interleave low and high pass elements to be at even and odd positions of the dest array.
-  // Note 1: sufficient memory space should be allocated by the caller.
-  // Note 2: two versions for even and odd length input.
-  void m_scatter_even(citd_type begin, citd_type end, itd_type dest) const;
-  void m_scatter_odd(citd_type begin, citd_type end, itd_type dest) const;
+  // Note: sufficient memory space should be allocated by the caller.
+  void m_gather(const double* begin, size_t len, double* dest) const;
+  void m_scatter(const double* begin, size_t len, double* dest) const;
 
   // Two flavors of 3D transforms.
   // They should be invoked by the `dwt3d()` and `idwt3d()` public methods, not users, though.
@@ -112,28 +102,24 @@ class CDF97 {
   // It is UB if `subdims` exceeds the full dimension (`m_dims`).
   // It is UB if `dst` does not point to a big enough space.
   auto m_sub_slice(std::array<size_t, 2> subdims) const -> vecd_type;
-  void m_sub_volume(dims_type subdims, itd_type dst) const;
+  void m_sub_volume(dims_type subdims, double* dst) const;
 
   //
-  // Methods from QccPack, so keep their original names, interface, and the use of raw pointers.
+  // Methods from QccPack with slight changes to combine the even and odd length cases.
   //
-  void QccWAVCDF97AnalysisSymmetricEvenEven(double* signal, size_t signal_length);
-  void QccWAVCDF97AnalysisSymmetricOddEven(double* signal, size_t signal_length);
-  void QccWAVCDF97SynthesisSymmetricEvenEven(double* signal, size_t signal_length);
-  void QccWAVCDF97SynthesisSymmetricOddEven(double* signal, size_t signal_length);
+  void QccWAVCDF97AnalysisSymmetric(double* signal, size_t signal_length);
+  void QccWAVCDF97SynthesisSymmetric(double* signal, size_t signal_length);
 
   //
   // Private data members
   //
   vecd_type m_data_buf;          // Holds the entire input data.
   dims_type m_dims = {0, 0, 0};  // Dimension of the data volume
 
-  // Temporary buffers that are big enough for any (1D column * 2) or any 2D
-  // slice. Note: `m_qcc_buf` should be used by m_***_one_level() functions and
-  // should not be used by higher-level functions. `m_slice_buf` is only used by
-  // wavelet-packet transforms.
-  vecd_type m_qcc_buf;
+  // Temporary buffers that are big enough for any 1D column or any 2D slice.
   vecd_type m_slice_buf;
+  double* m_aligned_buf = nullptr;
+  size_t m_aligned_buf_bytes = 0;  // num. of bytes
 
   //
   // Note on the coefficients and constants:
 
@@ -66,6 +66,11 @@ enum class RTNType {
 //
 // Helper functions
 //
+
+// Allocate and deallocate a chunk of ALIGNED memory, for both UNIX and Windows.
+auto aligned_malloc(size_t alignment, size_t size) -> void*;
+void aligned_free(void* p);
+
 // Given a certain length, how many transforms to be performed?
 auto num_of_xforms(size_t len) -> size_t;
 
@@ -179,6 +184,9 @@ auto chunk_volume(dims_type vol_dim, dims_type chunk_dim) -> std::vector<std::ar
 template <typename T>
 auto calc_mean_var(const T*, size_t len, size_t omp_nthreads = 0) -> std::array<T, 2>;
 
+template <typename T>
+auto any_ge(const T* buf, size_t len, T threshold) -> bool;
+
 };  // namespace sperr
 
 #endif
@@ -10,13 +10,9 @@
 
 sperr::Bitmask::Bitmask(size_t nbits)
 {
-  if (nbits > 0) {
-    auto num_longs = nbits / 64;
-    if (nbits - num_longs * 64 != 0)
-      num_longs++;
-    m_buf.assign(num_longs, 0);
-    m_num_bits = nbits;
-  }
+  auto num_longs = (nbits + 63) / 64;
+  m_buf.assign(num_longs, 0);
+  m_num_bits = nbits;
 }
 
 auto sperr::Bitmask::size() const -> size_t
@@ -26,37 +22,35 @@ auto sperr::Bitmask::size() const -> size_t
 
 void sperr::Bitmask::resize(size_t nbits)
 {
-  auto num_longs = nbits / 64;
-  if (nbits - num_longs * 64 != 0)
-    num_longs++;
+  auto num_longs = (nbits + 63) / 64;
   m_buf.resize(num_longs, 0);
   m_num_bits = nbits;
 }
 
 auto sperr::Bitmask::rlong(size_t idx) const -> uint64_t
 {
-  return m_buf[idx / 64];
+  return m_buf[idx >> 6];
 }
 
 auto sperr::Bitmask::rbit(size_t idx) const -> bool
 {
-  auto div = idx / 64;
-  auto rem = idx - div * 64;
+  auto div = idx >> 6;  // idx / 64
+  auto rem = idx & 63;  // idx % 64
   auto word = m_buf[div];
   word &= uint64_t{1} << rem;
-  return (word != 0);
+  return word;
 }
 
 template <bool Position>
 auto sperr::Bitmask::has_true(size_t start, size_t len) const -> int64_t
 {
-  auto long_idx = start / 64;
+  auto long_idx = start >> 6;
   auto processed_bits = int64_t{0};
   auto word = m_buf[long_idx];
   auto answer = uint64_t{0};
 
   // Collect the remaining bits from the start long.
-  auto begin_idx = start - long_idx * 64;
+  auto begin_idx = start & 63;
   auto nbits = std::min(size_t{64}, begin_idx + len);
   for (auto i = begin_idx; i < nbits; i++) {
     answer |= word & (uint64_t{1} << i);
@@ -144,26 +138,27 @@ auto sperr::Bitmask::count_true() const -> size_t
 
 void sperr::Bitmask::wlong(size_t idx, uint64_t value)
 {
-  m_buf[idx / 64] = value;
+  m_buf[idx >> 6] = value;
 }
 
 void sperr::Bitmask::wbit(size_t idx, bool bit)
 {
-  const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx - wstart * 64);
-
+  const auto wstart = idx >> 6;
   auto word = m_buf[wstart];
-  if (bit)
-    word |= mask;
-  else
-    word &= ~mask;
+
+  auto mask1 = uint64_t{1} << (idx & 63);
+  word &= ~mask1;
+
+  auto mask2 = uint64_t{bit} << (idx & 63);
+  word |= mask2;
+
   m_buf[wstart] = word;
 }
 
 void sperr::Bitmask::wtrue(size_t idx)
 {
-  const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx - wstart * 64);
+  const auto wstart = idx >> 6;
+  const auto mask = uint64_t{1} << (idx & 63);
 
   auto word = m_buf[wstart];
   word |= mask;
@@ -172,8 +167,8 @@ void sperr::Bitmask::wtrue(size_t idx)
 
 void sperr::Bitmask::wfalse(size_t idx)
 {
-  const auto wstart = idx / 64;
-  const auto mask = uint64_t{1} << (idx - wstart * 64);
+  const auto wstart = idx >> 6;
+  const auto mask = uint64_t{1} << (idx & 63);
 
   auto word = m_buf[wstart];
   word &= ~mask;