Skip to content

Commit bf9bf6b

Browse files
authored
Merge pull request #356 from t20100/sperr-0.8.3
Merging without review: almost no change in hdf5plugin itself
2 parents 5f95b7a + a0f5a00 commit bf9bf6b

File tree

14 files changed

+540
-485
lines changed

14 files changed

+540
-485
lines changed

doc/information.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ HDF5 compression filters and compression libraries sources were obtained from:
7777
* `SZ plugin <https://github.com/szcompressor/SZ2>`_
7878
(commit `f466775 <https://github.com/szcompressor/SZ2/tree/f4667759ead6a902110e80ff838ccdfddbc8dcd7>`_)
7979
using `SZ <https://github.com/szcompressor/SZ2>`_, ZLib and ZStd.
80-
* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.2).
80+
* `H5Z-SPERR plugin <https://github.com/NCAR/H5Z-SPERR>`_ (v0.2.3) using `SPERR <https://github.com/NCAR/SPERR>`_ (v0.8.4).
8181
* `SZ3 plugin <https://github.com/szcompressor/SZ3>`_
8282
(commit `4bbe9df7e4bcb <https://github.com/szcompressor/SZ3/commit/4bbe9df7e4bcb6ae6339fcb3033100da07fe7434>`_)
8383
using `SZ3 <https://github.com/szcompressor/SZ3>`_ and ZStd.

lib/SPERR/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
cmake_minimum_required(VERSION 3.14)
44

5-
project(SPERR VERSION 0.8.2 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
5+
project(SPERR VERSION 0.8.4 LANGUAGES CXX DESCRIPTION "Lossy Scientific Compression with SPERR")
66

77
if(NOT CMAKE_CXX_STANDARD)
88
set(CMAKE_CXX_STANDARD "20" CACHE STRING "Choose the C++ Standard to use." FORCE)
@@ -32,6 +32,7 @@ option( BUILD_SHARED_LIBS "Build shared SPERR library" ON )
3232
option( BUILD_UNIT_TESTS "Build unit tests using GoogleTest" ON )
3333
option( BUILD_CLI_UTILITIES "Build a set of command line utilities" ON )
3434
option( USE_OMP "Use OpenMP parallelization on 3D volumes" OFF )
35+
option( ENABLE_AVX2 "Enable AVX2 instruction set compilation" ON )
3536
option( SPERR_PREFER_RPATH "Set RPATH; this can fight with package managers so turn off when building for them" ON )
3637
mark_as_advanced(FORCE SPERR_PREFER_RPATH)
3738

lib/SPERR/README.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,16 +27,19 @@ mkdir SPERR/build # create the build directory
2727
cd SPERR/build # enter the build directory
2828
cmake .. # use cmake to configure the project
2929
cmake -DUSE_OMP=ON .. # Optional: enable OpenMP on 3D volumes.
30-
cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local .
30+
cmake -DENABLE_AVX2=OFF .. # Optional: disable AVX2 instructions. The code is slightly faster with AVX2.
3131
cmake -DCMAKE_CXX_STANDARD=17 .. # Optional: use C++17 rather than C++20. The code is slightly faster with C++20.
32+
cmake -DCMAKE_INSTALL_PREFIX=/my/install/dir .. # Optional: specify a directory to install SPERR. The default is /usr/local .
3233
make -j 8 # build the project
3334
ctest . # run unit tests, which should have 100% tests passed
3435
make install # install the library and CLI tools to a specified directory.
3536
```
3637

3738
## Plugin for HDF5
3839
SPERR is available as a *dynamically loaded plugin* for HDF5 with a registered ID of `32028`.
39-
This plugin is available at this [repo](https://github.com/NCAR/H5Z-SPERR).
40+
This plugin, H5Z-SPERR, is available at this [repo](https://github.com/NCAR/H5Z-SPERR).
41+
42+
In the Python ecosystem, H5Z-SPERR is available through the [hdf5plugin](https://github.com/silx-kit/hdf5plugin) package.
4043

4144
## Wrapper for Fortran
4245
A Fortran wrapper for SPERR has also been created by [ofmla](https://github.com/ofmla)
@@ -67,6 +70,8 @@ If SPERR benefits your work, please kindly cite [this publication](https://ieeex
6770
(Author's copy is available [here](https://vast.ucar.edu/pdfs/SPERR_IPDPS.pdf).)
6871

6972
## Presentations
70-
- FZ Workshop Hands-on: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf))
71-
- SC'23 Tutorial on lossy scientific data compression: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf))
72-
- IPDPS'23 Lossy Scientific Data Compression With SPERR: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf))
73+
- SC'24 Tutorial: Nov 18 2024, Atlanta, GA. ([slides](./handout/SC24-Tutorial-SPERR.pdf)
74+
by [lindstro](https://github.com/lindstro))
75+
- FZ Workshop: Feb 15 2024, Sarasota, FL. ([handout and examples](https://vast.ucar.edu/pdfs/Li_FZ2024.pdf))
76+
- SC'23 Tutorial: Nov 13 2023, Denver CO. ([slides](https://vast.ucar.edu/pdfs/Li_SC23_Slides.pdf))
77+
- IPDPS'23: May 18 2023, St. Petersburg, FL. ([slides](https://vast.ucar.edu/pdfs/Li_IPDPS23_Slides.pdf))
784 KB
Binary file not shown.

lib/SPERR/include/CDF97.h

Lines changed: 23 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,11 @@ namespace sperr {
1818

1919
class CDF97 {
2020
public:
21+
//
22+
// Destructor
23+
//
24+
~CDF97();
25+
2126
//
2227
// Input
2328
//
@@ -57,49 +62,34 @@ class CDF97 {
5762
void idwt3d_multi_res(std::vector<vecd_type>&);
5863

5964
private:
60-
using itd_type = vecd_type::iterator;
61-
using citd_type = vecd_type::const_iterator;
62-
6365
//
6466
// Private methods helping DWT.
6567
//
6668

6769
// Multiple levels of 1D DWT/IDWT on a given array of length array_len.
68-
void m_dwt1d(itd_type array, size_t array_len, size_t num_of_xforms);
69-
void m_idwt1d(itd_type array, size_t array_len, size_t num_of_xforms);
70+
void m_dwt1d(double* array, size_t array_len, size_t num_of_xforms);
71+
void m_idwt1d(double* array, size_t array_len, size_t num_of_xforms);
7072

7173
// Multiple levels of 2D DWT/IDWT on a given plane by repeatedly invoking
7274
// m_dwt2d_one_level(). The plane has a dimension (len_xy[0], len_xy[1]).
73-
void m_dwt2d(itd_type plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
74-
void m_idwt2d(itd_type plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
75+
void m_dwt2d(double* plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
76+
void m_idwt2d(double* plane, std::array<size_t, 2> len_xy, size_t num_of_xforms);
7577

7678
// Perform one level of interleaved 3D dwt/idwt on a given volume (m_dims),
7779
// specifically on its top left (len_xyz) subset.
78-
void m_dwt3d_one_level(itd_type vol, std::array<size_t, 3> len_xyz);
79-
void m_idwt3d_one_level(itd_type vol, std::array<size_t, 3> len_xyz);
80+
void m_dwt3d_one_level(std::array<size_t, 3> len_xyz);
81+
void m_idwt3d_one_level(std::array<size_t, 3> len_xyz);
8082

8183
// Perform one level of 2D dwt/idwt on a given plane (m_dims),
8284
// specifically on its top left (len_xy) subset.
83-
void m_dwt2d_one_level(itd_type plane, std::array<size_t, 2> len_xy);
84-
void m_idwt2d_one_level(itd_type plane, std::array<size_t, 2> len_xy);
85-
86-
// Perform one level of 1D dwt/idwt on a given array (array_len).
87-
// A buffer space (tmp_buf) should be passed in for
88-
// this method to work on with length at least 2*array_len.
89-
void m_dwt1d_one_level(itd_type array, size_t array_len);
90-
void m_idwt1d_one_level(itd_type array, size_t array_len);
85+
void m_dwt2d_one_level(double* plane, std::array<size_t, 2> len_xy);
86+
void m_idwt2d_one_level(double* plane, std::array<size_t, 2> len_xy);
9187

9288
// Separate even and odd indexed elements to be at the front and back of the dest array.
93-
// Note 1: sufficient memory space should be allocated by the caller.
94-
// Note 2: two versions for even and odd length input.
95-
void m_gather_even(citd_type begin, citd_type end, itd_type dest) const;
96-
void m_gather_odd(citd_type begin, citd_type end, itd_type dest) const;
97-
9889
// Interleave low and high pass elements to be at even and odd positions of the dest array.
99-
// Note 1: sufficient memory space should be allocated by the caller.
100-
// Note 2: two versions for even and odd length input.
101-
void m_scatter_even(citd_type begin, citd_type end, itd_type dest) const;
102-
void m_scatter_odd(citd_type begin, citd_type end, itd_type dest) const;
90+
// Note: sufficient memory space should be allocated by the caller.
91+
void m_gather(const double* begin, size_t len, double* dest) const;
92+
void m_scatter(const double* begin, size_t len, double* dest) const;
10393

10494
// Two flavors of 3D transforms.
10595
// They should be invoked by the `dwt3d()` and `idwt3d()` public methods, not users, though.
@@ -112,28 +102,24 @@ class CDF97 {
112102
// It is UB if `subdims` exceeds the full dimension (`m_dims`).
113103
// It is UB if `dst` does not point to a big enough space.
114104
auto m_sub_slice(std::array<size_t, 2> subdims) const -> vecd_type;
115-
void m_sub_volume(dims_type subdims, itd_type dst) const;
105+
void m_sub_volume(dims_type subdims, double* dst) const;
116106

117107
//
118-
// Methods from QccPack, so keep their original names, interface, and the use of raw pointers.
108+
// Methods from QccPack with slight changes to combine the even and odd length cases.
119109
//
120-
void QccWAVCDF97AnalysisSymmetricEvenEven(double* signal, size_t signal_length);
121-
void QccWAVCDF97AnalysisSymmetricOddEven(double* signal, size_t signal_length);
122-
void QccWAVCDF97SynthesisSymmetricEvenEven(double* signal, size_t signal_length);
123-
void QccWAVCDF97SynthesisSymmetricOddEven(double* signal, size_t signal_length);
110+
void QccWAVCDF97AnalysisSymmetric(double* signal, size_t signal_length);
111+
void QccWAVCDF97SynthesisSymmetric(double* signal, size_t signal_length);
124112

125113
//
126114
// Private data members
127115
//
128116
vecd_type m_data_buf; // Holds the entire input data.
129117
dims_type m_dims = {0, 0, 0}; // Dimension of the data volume
130118

131-
// Temporary buffers that are big enough for any (1D column * 2) or any 2D
132-
// slice. Note: `m_qcc_buf` should be used by m_***_one_level() functions and
133-
// should not be used by higher-level functions. `m_slice_buf` is only used by
134-
// wavelet-packet transforms.
135-
vecd_type m_qcc_buf;
119+
// Temporary buffers that are big enough for any 1D column or any 2D slice.
136120
vecd_type m_slice_buf;
121+
double* m_aligned_buf = nullptr;
122+
size_t m_aligned_buf_bytes = 0; // num. of bytes
137123

138124
//
139125
// Note on the coefficients and constants:

lib/SPERR/include/sperr_helper.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,11 @@ enum class RTNType {
6666
//
6767
// Helper functions
6868
//
69+
70+
// Allocate and deallocate a chunk of ALIGNED memory, for both UNIX and Windows.
71+
auto aligned_malloc(size_t alignment, size_t size) -> void*;
72+
void aligned_free(void* p);
73+
6974
// Given a certain length, how many transforms to be performed?
7075
auto num_of_xforms(size_t len) -> size_t;
7176

@@ -179,6 +184,9 @@ auto chunk_volume(dims_type vol_dim, dims_type chunk_dim) -> std::vector<std::ar
179184
template <typename T>
180185
auto calc_mean_var(const T*, size_t len, size_t omp_nthreads = 0) -> std::array<T, 2>;
181186

187+
template <typename T>
188+
auto any_ge(const T* buf, size_t len, T threshold) -> bool;
189+
182190
}; // namespace sperr
183191

184192
#endif

lib/SPERR/src/Bitmask.cpp

Lines changed: 23 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,9 @@
1010

1111
sperr::Bitmask::Bitmask(size_t nbits)
1212
{
13-
if (nbits > 0) {
14-
auto num_longs = nbits / 64;
15-
if (nbits - num_longs * 64 != 0)
16-
num_longs++;
17-
m_buf.assign(num_longs, 0);
18-
m_num_bits = nbits;
19-
}
13+
auto num_longs = (nbits + 63) / 64;
14+
m_buf.assign(num_longs, 0);
15+
m_num_bits = nbits;
2016
}
2117

2218
auto sperr::Bitmask::size() const -> size_t
@@ -26,37 +22,35 @@ auto sperr::Bitmask::size() const -> size_t
2622

2723
void sperr::Bitmask::resize(size_t nbits)
2824
{
29-
auto num_longs = nbits / 64;
30-
if (nbits - num_longs * 64 != 0)
31-
num_longs++;
25+
auto num_longs = (nbits + 63) / 64;
3226
m_buf.resize(num_longs, 0);
3327
m_num_bits = nbits;
3428
}
3529

3630
auto sperr::Bitmask::rlong(size_t idx) const -> uint64_t
3731
{
38-
return m_buf[idx / 64];
32+
return m_buf[idx >> 6];
3933
}
4034

4135
auto sperr::Bitmask::rbit(size_t idx) const -> bool
4236
{
43-
auto div = idx / 64;
44-
auto rem = idx - div * 64;
37+
auto div = idx >> 6; // idx / 64
38+
auto rem = idx & 63; // idx % 64
4539
auto word = m_buf[div];
4640
word &= uint64_t{1} << rem;
47-
return (word != 0);
41+
return word;
4842
}
4943

5044
template <bool Position>
5145
auto sperr::Bitmask::has_true(size_t start, size_t len) const -> int64_t
5246
{
53-
auto long_idx = start / 64;
47+
auto long_idx = start >> 6;
5448
auto processed_bits = int64_t{0};
5549
auto word = m_buf[long_idx];
5650
auto answer = uint64_t{0};
5751

5852
// Collect the remaining bits from the start long.
59-
auto begin_idx = start - long_idx * 64;
53+
auto begin_idx = start & 63;
6054
auto nbits = std::min(size_t{64}, begin_idx + len);
6155
for (auto i = begin_idx; i < nbits; i++) {
6256
answer |= word & (uint64_t{1} << i);
@@ -144,26 +138,27 @@ auto sperr::Bitmask::count_true() const -> size_t
144138

145139
void sperr::Bitmask::wlong(size_t idx, uint64_t value)
146140
{
147-
m_buf[idx / 64] = value;
141+
m_buf[idx >> 6] = value;
148142
}
149143

150144
void sperr::Bitmask::wbit(size_t idx, bool bit)
151145
{
152-
const auto wstart = idx / 64;
153-
const auto mask = uint64_t{1} << (idx - wstart * 64);
154-
146+
const auto wstart = idx >> 6;
155147
auto word = m_buf[wstart];
156-
if (bit)
157-
word |= mask;
158-
else
159-
word &= ~mask;
148+
149+
auto mask1 = uint64_t{1} << (idx & 63);
150+
word &= ~mask1;
151+
152+
auto mask2 = uint64_t{bit} << (idx & 63);
153+
word |= mask2;
154+
160155
m_buf[wstart] = word;
161156
}
162157

163158
void sperr::Bitmask::wtrue(size_t idx)
164159
{
165-
const auto wstart = idx / 64;
166-
const auto mask = uint64_t{1} << (idx - wstart * 64);
160+
const auto wstart = idx >> 6;
161+
const auto mask = uint64_t{1} << (idx & 63);
167162

168163
auto word = m_buf[wstart];
169164
word |= mask;
@@ -172,8 +167,8 @@ void sperr::Bitmask::wtrue(size_t idx)
172167

173168
void sperr::Bitmask::wfalse(size_t idx)
174169
{
175-
const auto wstart = idx / 64;
176-
const auto mask = uint64_t{1} << (idx - wstart * 64);
170+
const auto wstart = idx >> 6;
171+
const auto mask = uint64_t{1} << (idx & 63);
177172

178173
auto word = m_buf[wstart];
179174
word &= ~mask;

0 commit comments

Comments
 (0)