Skip to content

Commit 454e69d

Browse files
tanjialiangfacebook-github-bot
authored andcommitted
Introduce encoding::Decoder to bundle encoding factory with options
Summary: Introduce `encoding::Decoder` class that bundles a decode function with `Encoding::Options` into a single object. This replaces the pattern of threading a bare `std::function` encoding factory through multiple layers (`NimbleData`, `ChunkedDecoder`, `NimbleParams`). Named constructors: - `encoding::Decoder::create(Options)` — uses `EncodingFactory::decode` - `encoding::Decoder::createLegacy(Options)` — uses `legacy::EncodingFactory::decode` This is a pure refactoring with no behavioral change. It simplifies the selective reader code by replacing 10+ line inline lambda ternaries with concise `encoding::Decoder::create()` / `createLegacy()` calls, and prepares the codebase for threading additional options (like varint row counts) without touching every callsite. The `Decoder` class lives in `namespace facebook::nimble::encoding` and in a separate BUCK target (`encoding_decoder`) to avoid a circular dependency between `encodings` and `legacy/encodings`. Differential Revision: D97367204
1 parent 7830071 commit 454e69d

File tree

11 files changed

+164
-101
lines changed

11 files changed

+164
-101
lines changed
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#include "dwio/nimble/encodings/EncodingDecoder.h"
17+
18+
#include "dwio/nimble/encodings/EncodingFactory.h"
19+
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"
20+
21+
namespace facebook::nimble::encoding {
22+
23+
/* static */ Decoder Decoder::create(Encoding::Options options) {
24+
return Decoder(
25+
[options](
26+
std::string_view data,
27+
StringBufferFactory stringBufferFactory,
28+
velox::memory::MemoryPool* pool) -> std::unique_ptr<Encoding> {
29+
return EncodingFactory::decode(
30+
*pool, data, std::move(stringBufferFactory), options);
31+
},
32+
options);
33+
}
34+
35+
/* static */ Decoder Decoder::createLegacy(Encoding::Options options) {
36+
return Decoder(
37+
[](std::string_view data,
38+
StringBufferFactory stringBufferFactory,
39+
velox::memory::MemoryPool* pool) -> std::unique_ptr<Encoding> {
40+
return legacy::EncodingFactory::decode(
41+
*pool, data, std::move(stringBufferFactory));
42+
},
43+
options);
44+
}
45+
46+
} // namespace facebook::nimble::encoding
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/*
2+
* Copyright (c) Meta Platforms, Inc. and affiliates.
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
#pragma once
17+
18+
#include <functional>
19+
#include <memory>
20+
#include <string_view>
21+
22+
#include "dwio/nimble/encodings/Encoding.h"
23+
24+
namespace facebook::nimble::encoding {
25+
26+
/// Bundles an encoding decode function with its options.
27+
///
28+
/// This replaces the pattern of threading a bare std::function encoding factory
29+
/// alongside a separate `bool useVarintRowCount` through multiple layers.
30+
/// Consumers can query `useVarintRowCount()` from the same object that
31+
/// performs decoding.
32+
class Decoder {
33+
public:
34+
using StringBufferFactory = std::function<void*(uint32_t)>;
35+
36+
/// Named constructors — declared here, defined in EncodingDecoder.cpp
37+
/// to avoid header dependencies on EncodingFactory and
38+
/// legacy::EncodingFactory.
39+
static Decoder create(Encoding::Options options = {});
40+
static Decoder createLegacy(Encoding::Options options = {});
41+
42+
std::unique_ptr<Encoding> decode(
43+
std::string_view data,
44+
StringBufferFactory stringBufferFactory,
45+
velox::memory::MemoryPool* pool) const {
46+
return decodeFunc_(data, std::move(stringBufferFactory), pool);
47+
}
48+
49+
bool useVarintRowCount() const {
50+
return options_.useVarintRowCount;
51+
}
52+
53+
private:
54+
using DecodeFunction = std::function<std::unique_ptr<Encoding>(
55+
std::string_view,
56+
StringBufferFactory,
57+
velox::memory::MemoryPool*)>;
58+
59+
Decoder(DecodeFunction decodeFunc, Encoding::Options options)
60+
: decodeFunc_{std::move(decodeFunc)}, options_{std::move(options)} {}
61+
62+
const DecodeFunction decodeFunc_;
63+
const Encoding::Options options_{};
64+
};
65+
66+
} // namespace facebook::nimble::encoding

dwio/nimble/encodings/tests/ReadWithVisitorTest.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929

3030
#include "dwio/nimble/common/Buffer.h"
3131
#include "dwio/nimble/common/tests/NimbleFileWriter.h"
32+
#include "dwio/nimble/encodings/EncodingDecoder.h"
3233
#include "dwio/nimble/encodings/EncodingFactory.h"
3334
#include "dwio/nimble/encodings/EncodingUtils.h"
3435
#include "dwio/nimble/encodings/tests/EncodingLayoutTestHelper.h"
@@ -123,11 +124,7 @@ class ReadWithVisitorTest : public ::testing::Test,
123124
ctx.readerBase->nimbleSchema(),
124125
*ctx.streams,
125126
ctx.rowSizeTracker.get(),
126-
[](memory::MemoryPool& pool,
127-
std::string_view data,
128-
std::function<void*(uint32_t)> sbf) -> std::unique_ptr<Encoding> {
129-
return EncodingFactory::decode(pool, data, std::move(sbf));
130-
});
127+
encoding::Decoder::create());
131128

132129
auto reader = buildColumnReader(
133130
rowType,

dwio/nimble/velox/selective/ChunkedDecoder.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,14 +49,14 @@ void ChunkedDecoder::loadNextChunk() {
4949
inputData_ += length;
5050
inputSize_ -= length;
5151
currentStringBuffers_.clear();
52-
encoding_ = encodingFactory_(
53-
*pool_,
52+
encoding_ = decoder_.decode(
5453
std::string_view(chunkData, chunkSize),
5554
[&](uint32_t totalLength) {
5655
auto& buffer = currentStringBuffers_.emplace_back(
5756
velox::AlignedBuffer::allocate<char>(totalLength, pool_));
5857
return buffer->asMutable<void>();
59-
});
58+
},
59+
pool_);
6060
remainingValues_ = encoding_->rowCount();
6161
NIMBLE_CHECK_GT(remainingValues_, 0);
6262
VLOG(1) << encoding_->debugString();

dwio/nimble/velox/selective/ChunkedDecoder.h

Lines changed: 4 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -29,23 +29,14 @@ class ChunkedDecoder {
2929
public:
3030
ChunkedDecoder(
3131
std::unique_ptr<velox::dwio::common::SeekableInputStream> input,
32-
bool decodeValuesWithNulls,
3332
std::shared_ptr<index::StreamIndex> streamIndex,
33+
bool decodeValuesWithNulls,
3434
velox::memory::MemoryPool* pool,
35-
std::function<std::unique_ptr<Encoding>(
36-
velox::memory::MemoryPool&,
37-
std::string_view,
38-
std::function<void*(uint32_t)>)> encodingFactory =
39-
[](velox::memory::MemoryPool& pool,
40-
std::string_view data,
41-
std::function<void*(uint32_t)> stringBufferFactory)
42-
-> std::unique_ptr<Encoding> {
43-
return EncodingFactory::decode(pool, data, stringBufferFactory);
44-
})
35+
encoding::Decoder decoder = encoding::Decoder::create())
4536
: input_{std::move(input)},
4637
pool_{pool},
4738
decodeValuesWithNulls_{decodeValuesWithNulls},
48-
encodingFactory_{std::move(encodingFactory)},
39+
decoder_{std::move(decoder)},
4940
streamIndex_{std::move(streamIndex)},
5041
streamRowCount_{
5142
streamIndex_ ? std::optional<uint32_t>(streamIndex_->rowCount())
@@ -418,11 +409,7 @@ class ChunkedDecoder {
418409
// encode nulls alongside values). When false, decode values without nulls
419410
// (standard case for scalar types).
420411
const bool decodeValuesWithNulls_;
421-
const std::function<std::unique_ptr<Encoding>(
422-
velox::memory::MemoryPool&,
423-
std::string_view,
424-
std::function<void*(uint32_t)>)>
425-
encodingFactory_;
412+
const encoding::Decoder decoder_;
426413
// Optional stream index for accelerating skip operations
427414
const std::shared_ptr<index::StreamIndex> streamIndex_;
428415
// Total row count in the stream, set from stream index if available.

dwio/nimble/velox/selective/FlatMapColumnReader.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -118,9 +118,10 @@ std::vector<KeyNode<T>> makeKeyNodes(
118118
if (inMapInput != nullptr) {
119119
node.inMap = std::make_unique<ChunkedDecoder>(
120120
std::move(inMapInput),
121-
/*decodeValuesWithNulls=*/false,
122121
/*streamIndex=*/nullptr,
123-
&memoryPool);
122+
/*decodeValuesWithNulls=*/false,
123+
&memoryPool,
124+
params.decoder());
124125
} else {
125126
// Missing in-map stream: either the writer skipped it because all rows
126127
// are in-map (value streams exist), or the key has no data in this

dwio/nimble/velox/selective/NimbleData.cpp

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -28,16 +28,13 @@ NimbleData::NimbleData(
2828
StripeStreams& streams,
2929
memory::MemoryPool& memoryPool,
3030
ChunkedDecoder* inMapDecoder,
31-
std::function<std::unique_ptr<Encoding>(
32-
velox::memory::MemoryPool&,
33-
std::string_view,
34-
std::function<void*(uint32_t)>)> encodingFactory,
31+
encoding::Decoder decoder,
3532
bool getStringBuffersFromDecoder)
3633
: nimbleType_(nimbleType),
3734
streams_(&streams),
3835
pool_(&memoryPool),
3936
inMapDecoder_(inMapDecoder),
40-
encodingFactory_{encodingFactory} {
37+
decoder_{std::move(decoder)} {
4138
switch (nimbleType->kind()) {
4239
case Kind::Scalar:
4340
// Nulls in scalar types will be decoded along with values.
@@ -161,8 +158,8 @@ ChunkedDecoder NimbleData::makeScalarDecoder() {
161158
const auto streamId = nimbleType_->asScalar().scalarDescriptor().offset();
162159
return ChunkedDecoder(
163160
streams_->enqueue(streamId),
164-
/*decodeValuesWithNulls=*/false,
165161
streams_->streamIndex(streamId),
162+
/*decodeValuesWithNulls=*/false,
166163
pool_);
167164
}
168165

@@ -172,8 +169,8 @@ ChunkedDecoder NimbleData::makeMicrosDecoder() {
172169
nimbleType_->asTimestampMicroNano().microsDescriptor().offset();
173170
return ChunkedDecoder(
174171
streams_->enqueue(streamId),
175-
/*decodeValuesWithNulls=*/false,
176172
streams_->streamIndex(streamId),
173+
/*decodeValuesWithNulls=*/false,
177174
pool_);
178175
}
179176

@@ -183,8 +180,8 @@ ChunkedDecoder NimbleData::makeNanosDecoder() {
183180
nimbleType_->asTimestampMicroNano().nanosDescriptor().offset();
184181
return ChunkedDecoder(
185182
streams_->enqueue(streamId),
186-
/*decodeValuesWithNulls=*/false,
187183
streams_->streamIndex(streamId),
184+
/*decodeValuesWithNulls=*/false,
188185
pool_);
189186
}
190187

@@ -211,8 +208,8 @@ std::unique_ptr<ChunkedDecoder> NimbleData::makeDecoder(
211208
}
212209
return std::make_unique<ChunkedDecoder>(
213210
std::move(input),
214-
decodeValuesWithNulls,
215211
streams_->streamIndex(descriptor.offset()),
212+
decodeValuesWithNulls,
216213
pool_);
217214
}
218215

@@ -224,7 +221,7 @@ std::unique_ptr<velox::dwio::common::FormatData> NimbleParams::toFormatData(
224221
*streams_,
225222
pool(),
226223
inMapDecoder_,
227-
encodingFactory_,
224+
decoder_,
228225
getStringBuffersFromDecoder_);
229226
}
230227

dwio/nimble/velox/selective/NimbleData.h

Lines changed: 11 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
#pragma once
1818

19+
#include "dwio/nimble/encodings/EncodingDecoder.h"
1920
#include "dwio/nimble/encodings/EncodingFactory.h"
2021
#include "dwio/nimble/velox/selective/ReaderBase.h"
2122
#include "dwio/nimble/velox/selective/RowSizeTracker.h"
@@ -32,10 +33,7 @@ class NimbleData : public velox::dwio::common::FormatData {
3233
StripeStreams& streams,
3334
velox::memory::MemoryPool& memoryPool,
3435
ChunkedDecoder* inMapDecoder,
35-
std::function<std::unique_ptr<Encoding>(
36-
velox::memory::MemoryPool&,
37-
std::string_view,
38-
std::function<void*(uint32_t)>)> encodingFactory,
36+
encoding::Decoder decoder,
3937
bool getStringBuffersFromDecoder);
4038

4139
/// Read internal node nulls. For leaf nodes, we only copy `incomingNulls' if
@@ -110,11 +108,7 @@ class NimbleData : public velox::dwio::common::FormatData {
110108
ChunkedDecoder* const inMapDecoder_;
111109
std::unique_ptr<ChunkedDecoder> nullsDecoder_;
112110
velox::BufferPtr inMap_;
113-
std::function<std::unique_ptr<Encoding>(
114-
velox::memory::MemoryPool&,
115-
std::string_view,
116-
std::function<void*(uint32_t)>)>
117-
encodingFactory_;
111+
encoding::Decoder decoder_;
118112
};
119113

120114
class NimbleParams : public velox::dwio::common::FormatParams {
@@ -125,18 +119,15 @@ class NimbleParams : public velox::dwio::common::FormatParams {
125119
const std::shared_ptr<const Type>& nimbleType,
126120
StripeStreams& streams,
127121
RowSizeTracker* rowSizeTracker,
128-
std::function<std::unique_ptr<Encoding>(
129-
velox::memory::MemoryPool&,
130-
std::string_view,
131-
std::function<void*(uint32_t)>)> encodingFactory,
122+
encoding::Decoder decoder,
132123
bool getStringBuffersFromDecoder = false,
133124
bool preserveFlatMapsInMemory = false)
134125
: FormatParams(pool, stats),
135126
nimbleType_(nimbleType),
136127
streams_(&streams),
137128
rowSizeTracker_(rowSizeTracker),
138129
preserveFlatMapsInMemory_(preserveFlatMapsInMemory),
139-
encodingFactory_(std::move(encodingFactory)),
130+
decoder_(std::move(decoder)),
140131
getStringBuffersFromDecoder_{getStringBuffersFromDecoder} {}
141132

142133
std::unique_ptr<velox::dwio::common::FormatData> toFormatData(
@@ -150,7 +141,7 @@ class NimbleParams : public velox::dwio::common::FormatParams {
150141
type,
151142
*streams_,
152143
rowSizeTracker_,
153-
encodingFactory_,
144+
decoder_,
154145
getStringBuffersFromDecoder_,
155146
preserveFlatMapsInMemory_);
156147
}
@@ -175,17 +166,17 @@ class NimbleParams : public velox::dwio::common::FormatParams {
175166
return rowSizeTracker_;
176167
}
177168

169+
const encoding::Decoder& decoder() const {
170+
return decoder_;
171+
}
172+
178173
private:
179174
const std::shared_ptr<const Type> nimbleType_;
180175
StripeStreams* const streams_{nullptr};
181176
RowSizeTracker* const rowSizeTracker_{nullptr};
182177
const bool preserveFlatMapsInMemory_{false};
183178
ChunkedDecoder* inMapDecoder_{nullptr};
184-
std::function<std::unique_ptr<Encoding>(
185-
velox::memory::MemoryPool&,
186-
std::string_view,
187-
std::function<void*(uint32_t)> stringBufferFactory)>
188-
encodingFactory_;
179+
encoding::Decoder decoder_;
189180
bool getStringBuffersFromDecoder_{false};
190181
};
191182

dwio/nimble/velox/selective/SelectiveNimbleIndexReader.cpp

Lines changed: 3 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@
1818

1919
#include <utility>
2020

21+
#include "dwio/nimble/encodings/EncodingDecoder.h"
2122
#include "dwio/nimble/encodings/EncodingFactory.h"
22-
#include "dwio/nimble/encodings/legacy/EncodingFactory.h"
2323
#include "dwio/nimble/index/IndexReader.h"
2424
#include "dwio/nimble/velox/SchemaUtils.h"
2525
#include "dwio/nimble/velox/selective/ColumnReader.h"
@@ -571,19 +571,8 @@ void SelectiveNimbleIndexReader::loadStripeWithIndex(uint32_t stripeIndex) {
571571
readerBase_->nimbleSchema(),
572572
streams_,
573573
options_.trackRowSize() ? rowSizeTracker_.get() : nullptr,
574-
options_.passStringBuffersFromDecoder()
575-
? [](velox::memory::MemoryPool& pool,
576-
std::string_view data,
577-
std::function<void*(uint32_t)> stringBufferFactory)
578-
-> std::unique_ptr<Encoding> {
579-
return EncodingFactory::decode(pool, data, std::move(stringBufferFactory));
580-
}
581-
: [](velox::memory::MemoryPool& pool,
582-
std::string_view data,
583-
std::function<void*(uint32_t)> stringBufferFactory)
584-
-> std::unique_ptr<Encoding> {
585-
return legacy::EncodingFactory::decode(pool, data, std::move(stringBufferFactory));
586-
},
574+
options_.passStringBuffersFromDecoder() ? encoding::Decoder::create()
575+
: encoding::Decoder::createLegacy(),
587576
options_.passStringBuffersFromDecoder(),
588577
options_.preserveFlatMapsInMemory());
589578

0 commit comments

Comments
 (0)