Skip to content

Commit 7d8d5de

Browse files
committed
feat: add auto_packed_int_vector that adapts bit width automatically
1 parent 154f17e commit 7d8d5de

File tree

2 files changed

+542
-35
lines changed

2 files changed

+542
-35
lines changed

include/dwarfs/internal/packed_int_vector.h

Lines changed: 162 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -28,30 +28,38 @@
2828

2929
#pragma once
3030

31+
#include <algorithm>
32+
#include <bit>
3133
#include <concepts>
34+
#include <cstddef>
3235
#include <cstdint>
3336
#include <limits>
37+
#include <stdexcept>
3438
#include <type_traits>
3539
#include <vector>
3640

3741
#include <dwarfs/bit_view.h>
3842

3943
namespace dwarfs::internal {
4044

41-
template <std::integral T>
45+
namespace detail {
46+
47+
template <std::integral T, bool AutoBitWidth = false>
4248
requires(!std::same_as<T, bool>)
43-
class packed_int_vector {
49+
class basic_packed_int_vector {
4450
public:
4551
using value_type = T;
4652
using underlying_type = std::make_unsigned_t<T>;
47-
using size_type = size_t;
53+
using size_type = std::size_t;
54+
55+
static constexpr bool auto_bit_width = AutoBitWidth;
4856

4957
static constexpr size_type bits_per_block{
5058
std::numeric_limits<underlying_type>::digits};
5159

5260
class value_proxy {
5361
public:
54-
value_proxy(packed_int_vector& vec, size_type i)
62+
value_proxy(basic_packed_int_vector& vec, size_type i)
5563
: vec_{vec}
5664
, i_{i} {}
5765

@@ -63,43 +71,94 @@ class packed_int_vector {
6371
}
6472

6573
private:
66-
packed_int_vector& vec_;
74+
basic_packed_int_vector& vec_;
6775
size_type i_;
6876
};
6977

70-
packed_int_vector() = default;
71-
packed_int_vector(size_type bits)
72-
: bits_{bits} {}
73-
packed_int_vector(size_type bits, size_type size)
78+
basic_packed_int_vector() = default;
79+
80+
explicit basic_packed_int_vector(size_type bits)
81+
: bits_{checked_bits(bits)} {}
82+
83+
basic_packed_int_vector(size_type bits, size_type size)
7484
: size_{size}
75-
, bits_{bits}
76-
, data_{min_data_size(size, bits)} {}
85+
, bits_{checked_bits(bits)}
86+
, data_(min_data_size(size, bits_)) {}
87+
88+
basic_packed_int_vector(basic_packed_int_vector const&) = default;
89+
basic_packed_int_vector(basic_packed_int_vector&&) = default;
90+
basic_packed_int_vector& operator=(basic_packed_int_vector const&) = default;
91+
basic_packed_int_vector& operator=(basic_packed_int_vector&&) = default;
92+
93+
static constexpr size_type required_bits(T value) noexcept {
94+
if (value == 0) {
95+
return 0;
96+
}
7797

78-
packed_int_vector(packed_int_vector const&) = default;
79-
packed_int_vector(packed_int_vector&&) = default;
80-
packed_int_vector& operator=(packed_int_vector const&) = default;
81-
packed_int_vector& operator=(packed_int_vector&&) = default;
98+
auto const uvalue = static_cast<underlying_type>(value);
99+
100+
if constexpr (std::is_signed_v<T>) {
101+
if (value > 0) {
102+
return bits_per_block - std::countl_zero(uvalue) + 1;
103+
}
104+
return bits_per_block - std::countl_one(uvalue) + 1;
105+
} else {
106+
return bits_per_block - std::countl_zero(uvalue);
107+
}
108+
}
109+
110+
size_type required_bits() const {
111+
size_type result = 0;
112+
for (size_type i = 0; i < size_ && result < bits_per_block; ++i) {
113+
result = std::max(result, required_bits(get(i)));
114+
}
115+
return result;
116+
}
82117

83118
void reset(size_type bits = 0, size_type size = 0) {
119+
bits = checked_bits(bits);
120+
std::vector<underlying_type> new_data(min_data_size(size, bits));
121+
data_.swap(new_data);
84122
size_ = size;
85123
bits_ = bits;
86-
data_.clear();
87-
data_.resize(min_data_size(size, bits));
88124
}
89125

90126
void resize(size_type new_size, T value = T{}) {
91127
auto const old_size = size_;
92-
size_ = new_size;
93-
data_.resize(min_data_size(new_size, bits_));
94-
for (size_type i = old_size; i < new_size; ++i) {
95-
set(i, value);
128+
129+
if constexpr (AutoBitWidth) {
130+
if (new_size > old_size) {
131+
ensure_bits(required_bits(value), new_size);
132+
}
96133
}
134+
135+
data_.resize(min_data_size(new_size, bits_));
136+
fill_values(data_, bits_, old_size, new_size, value);
137+
size_ = new_size;
97138
}
98139

99140
void reserve(size_type size) { data_.reserve(min_data_size(size, bits_)); }
100141

101142
void shrink_to_fit() { data_.shrink_to_fit(); }
102143

144+
void optimize_storage()
145+
requires AutoBitWidth
146+
{
147+
auto const new_bits = required_bits();
148+
if (new_bits == bits_) {
149+
data_.shrink_to_fit();
150+
} else {
151+
repack_data(new_bits, size_);
152+
}
153+
}
154+
155+
void truncate_to_bits(size_type new_bits) {
156+
new_bits = checked_bits(new_bits);
157+
if (new_bits != bits_) {
158+
repack_data(new_bits, size_);
159+
}
160+
}
161+
103162
size_type capacity() const {
104163
return bits_ > 0 ? (data_.capacity() * bits_per_block) / bits_ : 0;
105164
}
@@ -118,41 +177,47 @@ class packed_int_vector {
118177

119178
bool empty() const { return size_ == 0; }
120179

121-
T operator[](size_type i) const { return this->get(i); }
180+
T operator[](size_type i) const { return get(i); }
122181

123182
T at(size_type i) const {
124183
if (i >= size_) {
125-
throw std::out_of_range("packed_int_vector::at");
184+
throw std::out_of_range("basic_packed_int_vector::at");
126185
}
127-
return this->get(i);
186+
return get(i);
128187
}
129188

130189
T get(size_type i) const {
131190
return bits_ > 0
132191
? bit_view(data_.data()).template read<T>({i * bits_, bits_})
133-
: 0;
192+
: T{};
134193
}
135194

136195
value_proxy operator[](size_type i) { return value_proxy{*this, i}; }
137196

138197
value_proxy at(size_type i) {
139198
if (i >= size_) {
140-
throw std::out_of_range("packed_int_vector::at");
199+
throw std::out_of_range("basic_packed_int_vector::at");
141200
}
142-
return this->operator[](i);
201+
return (*this)[i];
143202
}
144203

145204
void set(size_type i, T value) {
146-
if (bits_ > 0) {
147-
bit_view(data_.data()).write({i * bits_, bits_}, value);
205+
if constexpr (AutoBitWidth) {
206+
ensure_bits(required_bits(value), size_);
148207
}
208+
209+
write_value(data_, bits_, i, value);
149210
}
150211

151212
void push_back(T value) {
152-
if (min_data_size(size_ + 1, bits_) > data_.size()) {
153-
data_.resize(data_.size() + 1);
213+
if constexpr (AutoBitWidth) {
214+
ensure_bits(required_bits(value), size_ + 1);
154215
}
155-
set(size_++, value);
216+
217+
auto const new_size = size_ + 1;
218+
data_.resize(min_data_size(new_size, bits_));
219+
write_value(data_, bits_, size_, value);
220+
size_ = new_size;
156221
}
157222

158223
void pop_back() {
@@ -166,11 +231,11 @@ class packed_int_vector {
166231

167232
T back() const { return get(size_ - 1); }
168233

169-
value_proxy back() { return this->operator[](size_ - 1); }
234+
value_proxy back() { return (*this)[size_ - 1]; }
170235

171236
T front() const { return get(0); }
172237

173-
value_proxy front() { return this->operator[](0); }
238+
value_proxy front() { return (*this)[0]; }
174239

175240
std::vector<T> unpack() const {
176241
std::vector<T> result(size_);
@@ -181,13 +246,75 @@ class packed_int_vector {
181246
}
182247

183248
private:
249+
static size_type checked_bits(size_type bits) {
250+
if (bits > bits_per_block) {
251+
throw std::invalid_argument("basic_packed_int_vector: invalid bit width");
252+
}
253+
return bits;
254+
}
255+
184256
static constexpr size_type min_data_size(size_type size, size_type bits) {
185-
return (size * bits + bits_per_block - 1) / bits_per_block;
257+
return bits == 0 ? 0 : (size * bits + bits_per_block - 1) / bits_per_block;
258+
}
259+
260+
static void write_value(std::vector<underlying_type>& data, size_type bits,
261+
size_type i, T value) {
262+
if (bits > 0) {
263+
bit_view(data.data()).write({i * bits, bits}, value);
264+
}
265+
}
266+
267+
static void fill_values(std::vector<underlying_type>& data, size_type bits,
268+
size_type first, size_type last, T value) {
269+
for (size_type i = first; i < last; ++i) {
270+
write_value(data, bits, i, value);
271+
}
272+
}
273+
274+
void ensure_bits(size_type needed_bits, size_type new_size)
275+
requires AutoBitWidth
276+
{
277+
auto const new_bits = std::max(bits_, needed_bits);
278+
279+
if (new_bits != bits_) {
280+
repack_data(new_bits, new_size);
281+
}
282+
}
283+
284+
void repack_data(size_type new_bits, size_type new_size) {
285+
std::vector<underlying_type> new_data(min_data_size(new_size, new_bits));
286+
287+
if (new_bits != 0 && bits_ != 0) {
288+
auto const copy_size = std::min(size_, new_size);
289+
290+
if (copy_size > 0) {
291+
auto src = bit_view(data_.data());
292+
auto dst = bit_view(new_data.data());
293+
294+
for (size_type i = 0; i < copy_size; ++i) {
295+
dst.write({i * new_bits, new_bits},
296+
src.template read<T>({i * bits_, bits_}));
297+
}
298+
}
299+
}
300+
301+
data_.swap(new_data);
302+
bits_ = new_bits;
186303
}
187304

188305
size_type size_{0};
189306
size_type bits_{0};
190307
std::vector<underlying_type> data_;
191308
};
192309

310+
} // namespace detail
311+
312+
template <std::integral T>
313+
requires(!std::same_as<T, bool>)
314+
using packed_int_vector = detail::basic_packed_int_vector<T, false>;
315+
316+
template <std::integral T>
317+
requires(!std::same_as<T, bool>)
318+
using auto_packed_int_vector = detail::basic_packed_int_vector<T, true>;
319+
193320
} // namespace dwarfs::internal

0 commit comments

Comments
 (0)