Skip to content

Commit 7830071

Browse files
xiaoxmengmeta-codesync[bot]
authored andcommitted
feat: Add IO stats to NimbleIndexProjector (facebookincubator#604)
Summary: Pull Request resolved: facebookincubator#604 CONTEXT: NimbleIndexProjector reports projection stats (numReadBytes, numReadStripes, etc.) but doesn't track actual disk IO metrics like raw bytes read, overread from coalescing, and IO time. These are useful for understanding IO efficiency and coalescing behavior. WHAT: - Add rawBytesRead, rawOverreadBytes, totalScanTimeUs fields to NimbleIndexProjector::Stats, overwritten from IoStatistics at the end of each project() call. - Add ReaderBase::create(ReadFile, ReaderOptions, maxMergeDistance) overload that internally creates BufferedInput with IoStatistics tracking. The existing create(BufferedInput, ...) overload continues to work without IO stats. - Expose ReaderBase::ioStatistics() accessor (returns nullptr when IO stats are not enabled). - Update NimbleIndexProjector tests to use the new ReaderBase factory and verify IO stats are non-zero for hits and zero for misses. Reviewed By: tanjialiang Differential Revision: D97691764 fbshipit-source-id: a5f6763af01a956d260b11bf316f3070374a6ab3
1 parent 9e30cb0 commit 7830071

File tree

5 files changed

+146
-41
lines changed

5 files changed

+146
-41
lines changed

dwio/nimble/serializer/tests/ProjectorTest.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1561,7 +1561,7 @@ TEST_F(ProjectorTest, projectFlatMapNonExistentKey) {
15611561
subfields,
15621562
pool_.get(),
15631563
{.projectVersion = SerializationVersion::kCompact}),
1564-
"Key '999' not found in FlatMapType");
1564+
"Cannot project entire FlatMap column without key subscripts");
15651565
}
15661566

15671567
// Test stream indices are correct.

dwio/nimble/velox/SchemaUtils.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -580,8 +580,10 @@ void resolveSubfield(
580580
subfield.toString());
581581
const auto& flatMap = current->asFlatMap();
582582
const auto childIdx = flatMap.findChild(keyName);
583-
NIMBLE_CHECK(
584-
childIdx.has_value(), "Key '{}' not found in FlatMapType", keyName);
583+
if (!childIdx.has_value()) {
584+
// Key not in file schema — skip this subfield silently.
585+
return;
586+
}
585587

586588
selectedChildren[current].insert(*childIdx);
587589
current = flatMap.childAt(*childIdx).get();

dwio/nimble/velox/index/NimbleIndexProjector.cpp

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
#include "dwio/nimble/serializer/SerializerImpl.h"
2323
#include "dwio/nimble/velox/SchemaUtils.h"
2424
#include "folly/ScopeGuard.h"
25+
#include "velox/common/base/SuccinctPrinter.h"
26+
#include "velox/dwio/common/BufferedInput.h"
2527

2628
namespace facebook::nimble {
2729

@@ -30,21 +32,35 @@ using namespace facebook::velox; // NOLINT(google-build-using-namespace)
3032
std::string NimbleIndexProjector::Stats::toString() const {
3133
return fmt::format(
3234
"Stats(numReadStripes={}, numScannedRows={}, numProjectedRows={}, numReadRows={}, "
33-
"numReadBytes={}, lookupTiming=[{}], scanTiming=[{}], projectionTiming=[{}])",
35+
"numReadBytes={}, rawBytesRead={}, rawOverreadBytes={}, numStorageReads={}, "
36+
"lookupTiming=[{}], scanTiming=[{}], projectionTiming=[{}])",
3437
numReadStripes,
3538
numScannedRows,
3639
numProjectedRows,
3740
numReadRows,
38-
numReadBytes,
41+
velox::succinctBytes(numReadBytes),
42+
velox::succinctBytes(rawBytesRead),
43+
velox::succinctBytes(rawOverreadBytes),
44+
numStorageReads,
3945
lookupTiming.toString(),
4046
scanTiming.toString(),
4147
projectionTiming.toString());
4248
}
4349

4450
NimbleIndexProjector::NimbleIndexProjector(
45-
std::shared_ptr<ReaderBase> readerBase,
46-
const std::vector<Subfield>& projectedSubfields)
47-
: readerBase_(std::move(readerBase)),
51+
std::shared_ptr<velox::ReadFile> readFile,
52+
const std::vector<Subfield>& projectedSubfields,
53+
const velox::dwio::common::ReaderOptions& options)
54+
: ioStatistics_{std::make_shared<velox::io::IoStatistics>()},
55+
readerBase_{ReaderBase::create(
56+
std::make_unique<velox::dwio::common::BufferedInput>(
57+
std::move(readFile),
58+
options.memoryPool(),
59+
velox::dwio::common::MetricsLog::voidLog(),
60+
ioStatistics_.get(),
61+
/*ioStats=*/nullptr,
62+
options.maxCoalesceDistance()),
63+
options)},
4864
pool_{readerBase_->pool()},
4965
tabletIndex_{readerBase_->tablet().index()},
5066
numStripes_{readerBase_->tablet().stripeCount()},
@@ -99,6 +115,8 @@ NimbleIndexProjector::Result NimbleIndexProjector::project(
99115
}
100116
processStripe(stripeIndex, requestIndices, result);
101117
}
118+
119+
updateIoStats();
102120
return result;
103121
}
104122

@@ -309,4 +327,10 @@ NimbleIndexProjector::lookupRowRanges(
309327
return result;
310328
}
311329

330+
void NimbleIndexProjector::updateIoStats() {
331+
stats_.rawBytesRead = ioStatistics_->rawBytesRead();
332+
stats_.rawOverreadBytes = ioStatistics_->rawOverreadBytes();
333+
stats_.numStorageReads = ioStatistics_->read().count();
334+
}
335+
312336
} // namespace facebook::nimble

dwio/nimble/velox/index/NimbleIndexProjector.h

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "dwio/nimble/velox/RowRange.h"
2727
#include "dwio/nimble/velox/SchemaUtils.h"
2828
#include "dwio/nimble/velox/selective/ReaderBase.h"
29+
#include "velox/common/io/IoStatistics.h"
2930
#include "velox/common/time/CpuWallTimer.h"
3031
#include "velox/serializers/KeyEncoder.h"
3132
#include "velox/type/Subfield.h"
@@ -60,8 +61,9 @@ class NimbleIndexProjector {
6061
// TODO: projectedSubfields currently must match file schema column names.
6162
// Add table-to-file column name mapping for schema evolution support.
6263
NimbleIndexProjector(
63-
std::shared_ptr<ReaderBase> readerBase,
64-
const std::vector<Subfield>& projectedSubfields);
64+
std::shared_ptr<velox::ReadFile> readFile,
65+
const std::vector<Subfield>& projectedSubfields,
66+
const velox::dwio::common::ReaderOptions& options);
6567

6668
~NimbleIndexProjector() = default;
6769

@@ -140,6 +142,16 @@ class NimbleIndexProjector {
140142
/// serialization overhead like headers and trailers).
141143
uint64_t numReadBytes{0};
142144

145+
/// Total bytes read from disk, including coalesced/merged regions and
146+
/// index/key streams. Only available when created with the ReadFile
147+
/// overload; 0 otherwise.
148+
uint64_t rawBytesRead{0};
149+
/// Bytes read from disk but not requested by the projector, due to
150+
/// BufferedInput merging adjacent regions (coalescing overhead).
151+
uint64_t rawOverreadBytes{0};
152+
/// Number of storage read operations (pread syscalls).
153+
uint64_t numStorageReads{0};
154+
143155
/// Time spent looking up stripes and row ranges via the tablet index.
144156
velox::CpuWallTiming lookupTiming;
145157
/// Time spent loading stripe stream data from tablet.
@@ -205,6 +217,9 @@ class NimbleIndexProjector {
205217
readerBase_->tablet().stripeRowCount(stripeIndex));
206218
}
207219

220+
void updateIoStats();
221+
222+
const std::shared_ptr<velox::io::IoStatistics> ioStatistics_;
208223
const std::shared_ptr<ReaderBase> readerBase_;
209224
velox::memory::MemoryPool* const pool_;
210225
const TabletIndex* const tabletIndex_;

0 commit comments

Comments
 (0)