Skip to content

Commit 1a2f139

Browse files
tanjialiangmeta-codesync[bot]
authored andcommitted
Let EncodingFactory be an instantiable class (facebookincubator#607)
Summary: Pull Request resolved: facebookincubator#607 Introduce `encoding::Decoder` class that bundles a decode function with `Encoding::Options` into a single object. This replaces the pattern of threading a bare `std::function` encoding factory through multiple layers (`NimbleData`, `ChunkedDecoder`, `NimbleParams`). Named constructors: - `encoding::Decoder::create(Options)` — uses `EncodingFactory::decode` - `encoding::Decoder::createLegacy(Options)` — uses `legacy::EncodingFactory::decode` This is a pure refactoring with no behavioral change. It simplifies the selective reader code by replacing 10+ line inline lambda ternaries with concise `encoding::Decoder::create()` / `createLegacy()` calls, and prepares the codebase for threading additional options (like varint row counts) without touching every callsite. The `Decoder` class lives in `namespace facebook::nimble::encoding` and in a separate BUCK target (`encoding_decoder`) to avoid a circular dependency between `encodings` and `legacy/encodings`. Reviewed By: xiaoxmeng Differential Revision: D97367204
1 parent 788a1d3 commit 1a2f139

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+224
-254
lines changed

dwio/nimble/encodings/DeltaEncoding.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,16 +95,17 @@ DeltaEncoding<T>::DeltaEncoding(
9595
deltasBuffer_(&memoryPool),
9696
restatementsBuffer_(&memoryPool),
9797
isRestatementsBuffer_(&memoryPool) {
98+
const EncodingFactory factory;
9899
auto pos = data.data() + Encoding::kPrefixSize;
99100
const uint32_t restatementsOffset = encoding::readUint32(pos);
100101
const uint32_t isRestatementsOffset = encoding::readUint32(pos);
101-
deltas_ = EncodingFactory::decode(
102+
deltas_ = factory.create(
102103
memoryPool, {pos, restatementsOffset}, stringBufferFactory);
103104
pos += restatementsOffset;
104-
restatements_ = EncodingFactory::decode(
105+
restatements_ = factory.create(
105106
memoryPool, {pos, isRestatementsOffset}, stringBufferFactory);
106107
pos += isRestatementsOffset;
107-
isRestatements_ = EncodingFactory::decode(
108+
isRestatements_ = factory.create(
108109
memoryPool,
109110
{pos, static_cast<size_t>(data.end() - pos)},
110111
stringBufferFactory);

dwio/nimble/encodings/DictionaryEncoding.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -92,20 +92,20 @@ DictionaryEncoding<T>::DictionaryEncoding(
9292
: TypedEncoding<T, physicalType>{pool, data, options},
9393
alphabet_{this->pool_},
9494
indicesBuffer_{this->pool_} {
95+
const EncodingFactory factory{options};
9596
const auto* pos = data.data() + this->dataOffset();
9697
const uint32_t alphabetSize = encoding::readUint32(pos);
97-
alphabetEncoding_ = EncodingFactory::decode(
98-
*this->pool_, {pos, alphabetSize}, stringBufferFactory, options);
98+
alphabetEncoding_ =
99+
factory.create(*this->pool_, {pos, alphabetSize}, stringBufferFactory);
99100
const uint32_t alphabetCount = alphabetEncoding_->rowCount();
100101
alphabet_.resize(alphabetCount);
101102
alphabetEncoding_->materialize(alphabetCount, alphabet_.data());
102103

103104
pos += alphabetSize;
104-
indicesEncoding_ = EncodingFactory::decode(
105+
indicesEncoding_ = factory.create(
105106
*this->pool_,
106107
{pos, static_cast<size_t>(data.end() - pos)},
107-
stringBufferFactory,
108-
options);
108+
stringBufferFactory);
109109
}
110110

111111
template <typename T>

dwio/nimble/encodings/EncodingFactory.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,11 @@ static std::span<const typename TypeTraits<T>::physicalType> toPhysicalSpan(
4040
}
4141
} // namespace
4242

43-
std::unique_ptr<Encoding> EncodingFactory::decode(
43+
std::unique_ptr<Encoding> EncodingFactory::create(
4444
velox::memory::MemoryPool& memoryPool,
4545
std::string_view data,
46-
std::function<void*(uint32_t)> stringBufferFactory,
47-
const Encoding::Options& options) {
46+
std::function<void*(uint32_t)> stringBufferFactory) const {
47+
const auto& options = options_;
4848
// Maybe we should have a magic number of encodings too? Hrm.
4949
const EncodingType encodingType = static_cast<EncodingType>(data[0]);
5050
const DataType dataType = static_cast<DataType>(data[1]);

dwio/nimble/encodings/EncodingFactory.h

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,20 @@ class EncodingSelectionPolicy;
3131

3232
class EncodingFactory {
3333
public:
34-
static std::unique_ptr<Encoding> decode(
34+
explicit EncodingFactory(Encoding::Options options = {})
35+
: options_{options} {}
36+
37+
virtual ~EncodingFactory() = default;
38+
39+
/// Creates an Encoding from serialized data using this factory's options.
40+
virtual std::unique_ptr<Encoding> create(
3541
velox::memory::MemoryPool& memoryPool,
3642
std::string_view data,
37-
std::function<void*(uint32_t)> stringBufferFactory,
38-
const Encoding::Options& options = {});
43+
std::function<void*(uint32_t)> stringBufferFactory) const;
44+
45+
const Encoding::Options& options() const {
46+
return options_;
47+
}
3948

4049
template <typename T>
4150
static std::string_view encode(
@@ -53,6 +62,8 @@ class EncodingFactory {
5362
const Encoding::Options& options = {});
5463

5564
private:
65+
Encoding::Options options_{};
66+
5667
template <typename T>
5768
static std::string_view encode(
5869
EncodingSelection<typename TypeTraits<T>::physicalType>&& selection,

dwio/nimble/encodings/MainlyConstantEncoding.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,15 @@ MainlyConstantEncoding<std::string_view>::MainlyConstantEncoding(
2424
std::function<void*(uint32_t)> stringBufferFactory,
2525
const Encoding::Options& options)
2626
: MainlyConstantEncodingBase<std::string_view>(memoryPool, data, options) {
27+
const EncodingFactory factory{options};
2728
const char* pos = data.data() + this->dataOffset();
2829
const uint32_t isCommonBytes = encoding::readUint32(pos);
29-
isCommon_ = EncodingFactory::decode(
30-
*this->pool_, {pos, isCommonBytes}, stringBufferFactory, options);
30+
isCommon_ =
31+
factory.create(*this->pool_, {pos, isCommonBytes}, stringBufferFactory);
3132
pos += isCommonBytes;
3233
const uint32_t otherValuesBytes = encoding::readUint32(pos);
33-
otherValues_ = EncodingFactory::decode(
34-
*this->pool_, {pos, otherValuesBytes}, stringBufferFactory, options);
34+
otherValues_ = factory.create(
35+
*this->pool_, {pos, otherValuesBytes}, stringBufferFactory);
3536
pos += otherValuesBytes;
3637
commonValue_ = encoding::read<physicalType>(pos);
3738
NIMBLE_CHECK(pos == data.end(), "Unexpected mainly constant encoding end");

dwio/nimble/encodings/MainlyConstantEncoding.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -339,14 +339,15 @@ MainlyConstantEncoding<T>::MainlyConstantEncoding(
339339
std::function<void*(uint32_t)> stringBufferFactory,
340340
const Encoding::Options& options)
341341
: MainlyConstantEncodingBase<T>(memoryPool, data, options) {
342+
const EncodingFactory factory{options};
342343
const char* pos = data.data() + this->dataOffset();
343344
const uint32_t isCommonBytes = encoding::readUint32(pos);
344-
this->isCommon_ = EncodingFactory::decode(
345-
*this->pool_, {pos, isCommonBytes}, stringBufferFactory, options);
345+
this->isCommon_ =
346+
factory.create(*this->pool_, {pos, isCommonBytes}, stringBufferFactory);
346347
pos += isCommonBytes;
347348
const uint32_t otherValuesBytes = encoding::readUint32(pos);
348-
this->otherValues_ = EncodingFactory::decode(
349-
*this->pool_, {pos, otherValuesBytes}, stringBufferFactory, options);
349+
this->otherValues_ = factory.create(
350+
*this->pool_, {pos, otherValuesBytes}, stringBufferFactory);
350351
pos += otherValuesBytes;
351352
this->commonValue_ = encoding::read<physicalType>(pos);
352353
NIMBLE_CHECK(pos == data.end(), "Unexpected mainly constant encoding end");

dwio/nimble/encodings/NullableEncoding.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,16 +99,16 @@ NullableEncoding<T>::NullableEncoding(
9999
: TypedEncoding<T, physicalType>(memoryPool, data, options),
100100
indicesBuffer_(this->pool_),
101101
nullBuffer_(this->pool_) {
102+
const EncodingFactory factory{options};
102103
const char* pos = data.data() + this->dataOffset();
103104
const uint32_t nonNullsBytes = encoding::readUint32(pos);
104-
nonNullValues_ = EncodingFactory::decode(
105-
*this->pool_, {pos, nonNullsBytes}, stringBufferFactory, options);
105+
nonNullValues_ =
106+
factory.create(*this->pool_, {pos, nonNullsBytes}, stringBufferFactory);
106107
pos += nonNullsBytes;
107-
nulls_ = EncodingFactory::decode(
108+
nulls_ = factory.create(
108109
*this->pool_,
109110
{pos, static_cast<size_t>(data.end() - pos)},
110-
stringBufferFactory,
111-
options);
111+
stringBufferFactory);
112112
NIMBLE_DCHECK_EQ(
113113
Encoding::rowCount(), nulls_->rowCount(), "Nulls count mismatch.");
114114
}

dwio/nimble/encodings/RleEncoding.h

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,12 @@ class RLEEncodingBase
6161
std::function<void*(uint32_t)> stringBufferFactory,
6262
const Encoding::Options& options = {})
6363
: TypedEncoding<T, physicalType>(memoryPool, data, options),
64-
materializedRunLengths_{EncodingFactory::decode(
64+
materializedRunLengths_{EncodingFactory(options).create(
6565
memoryPool,
6666
{data.data() + this->dataOffset() + 4,
6767
*reinterpret_cast<const uint32_t*>(
6868
data.data() + this->dataOffset())},
69-
stringBufferFactory,
70-
options)} {}
69+
stringBufferFactory)} {}
7170

7271
void reset() {
7372
materializedRunLengths_.reset();
@@ -286,14 +285,13 @@ RLEEncoding<T>::RLEEncoding(
286285
data,
287286
stringBufferFactory,
288287
options),
289-
values_{EncodingFactory::decode(
288+
values_{EncodingFactory(options).create(
290289
memoryPool,
291290
{internal::RLEEncodingBase<T, RLEEncoding<T>>::getValuesStart(),
292291
static_cast<size_t>(
293292
data.end() -
294293
internal::RLEEncodingBase<T, RLEEncoding<T>>::getValuesStart())},
295-
stringBufferFactory,
296-
options)} {
294+
stringBufferFactory)} {
297295
internal::RLEEncodingBase<T, RLEEncoding<T>>::reset();
298296
}
299297

dwio/nimble/encodings/SparseBoolEncoding.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,11 @@ SparseBoolEncoding::SparseBoolEncoding(
3131
: TypedEncoding<bool, bool>{memoryPool, data, options},
3232
sparseValue_{static_cast<bool>(data[this->dataOffset()])},
3333
indicesUncompressed_{&memoryPool},
34-
indices_{EncodingFactory::decode(
34+
indices_{EncodingFactory(options).create(
3535
memoryPool,
3636
{data.data() + this->dataOffset() + kPrefixSize,
3737
data.size() - this->dataOffset() - kPrefixSize},
38-
stringBufferFactory,
39-
options)} {
38+
stringBufferFactory)} {
4039
reset();
4140
}
4241

dwio/nimble/encodings/TrivialEncoding.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,8 @@ TrivialEncoding<std::string_view>::TrivialEncoding(
3333
const auto dataCompressionType =
3434
static_cast<CompressionType>(encoding::readChar(pos));
3535
const auto lengthsSize = encoding::readUint32(pos);
36-
lengths_ = EncodingFactory::decode(
37-
memoryPool, {pos, lengthsSize}, stringBufferFactory, options);
36+
lengths_ = EncodingFactory(options).create(
37+
memoryPool, {pos, lengthsSize}, stringBufferFactory);
3838
blob_ = pos + lengthsSize;
3939

4040
if (dataCompressionType != CompressionType::Uncompressed) {

0 commit comments

Comments
 (0)