Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions dwio/nimble/velox/selective/SelectiveNimbleReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,17 @@ std::vector<velox::core::SortOrder> toVeloxSortOrders(

namespace {

void registerColumnMetrics(
const velox::dwio::common::TypeWithId& node,
velox::dwio::common::ColumnMetricsSet& metricsSet) {
metricsSet.getOrCreate(node.id(), node.type()->kind());
for (uint32_t i = 0; i < node.size(); ++i) {
if (const auto* child = node.childAt(i).get()) {
registerColumnMetrics(*child, metricsSet);
}
}
}

class SelectiveNimbleRowReader : public dwio::common::RowReader {
public:
SelectiveNimbleRowReader(
Expand All @@ -97,6 +108,12 @@ class SelectiveNimbleRowReader : public dwio::common::RowReader {
streams_(readerBase_),
rowSizeTracker_{
std::make_unique<RowSizeTracker>(readerBase->fileSchemaWithId())} {
if (options.collectColumnStats()) {
columnReaderStatistics_.columnMetricsSet.emplace();
registerColumnMetrics(
*readerBase_->fileSchemaWithId(),
*columnReaderStatistics_.columnMetricsSet);
}
initReadRange();
initIndexBounds();
if (options.eagerFirstStripeLoad()) {
Expand Down Expand Up @@ -303,6 +320,7 @@ uint64_t SelectiveNimbleRowReader::next(
void SelectiveNimbleRowReader::updateRuntimeStats(
dwio::common::RuntimeStatistics& stats) const {
stats.skippedStrides += skippedStripes_;
stats.columnReaderStats.mergeFrom(columnReaderStatistics_);
}

void SelectiveNimbleRowReader::resetFilterCaches() {
Expand Down
53 changes: 53 additions & 0 deletions dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1909,6 +1909,59 @@ TEST_P(SelectiveNimbleReaderTest, mapAsStructAllNulls) {
velox::test::assertEqualVectors(expected, batch);
}

TEST_P(SelectiveNimbleReaderTest, columnDecodeMetrics) {
const bool passStringBuffersFromDecoder = GetParam();
const int numRows = 10'000;
auto input = makeRowVector({
makeFlatVector<int64_t>(numRows, [](auto i) { return i; }),
makeFlatVector<std::string>(
numRows, [](auto i) { return std::to_string(i); }),
});
auto scanSpec = std::make_shared<common::ScanSpec>("root");
scanSpec->addAllChildFields(*asRowType(input->type()));
auto file = test::createNimbleFile(*rootPool(), input);
auto readFile = std::make_shared<InMemoryReadFile>(file);
auto factory =
dwio::common::getReaderFactory(dwio::common::FileFormat::NIMBLE);
dwio::common::ReaderOptions options(pool());
options.setScanSpec(scanSpec);
auto reader = factory->createReader(
std::make_unique<dwio::common::BufferedInput>(readFile, *pool()),
options);
dwio::common::RowReaderOptions rowOptions;
rowOptions.setScanSpec(scanSpec);
rowOptions.setRequestedType(asRowType(input->type()));
rowOptions.setPassStringBuffersFromDecoder(passStringBuffersFromDecoder);
rowOptions.setCollectColumnStats(true);
rowOptions.setEagerFirstStripeLoad(true);
auto rowReader = reader->createRowReader(rowOptions);

VectorPtr result = BaseVector::create(asRowType(input->type()), 0, pool());
uint64_t totalRows = 0;
while (auto n = rowReader->next(1'000, result)) {
totalRows += n;
// Force materialization of lazy child vectors so that readWithTiming()
// is invoked on the leaf column readers.
auto* row = result->as<RowVector>();
for (auto i = 0; i < row->childrenSize(); ++i) {
row->childAt(i)->loadedVector();
}
}
EXPECT_EQ(totalRows, numRows) << "should read all rows";

dwio::common::RuntimeStatistics stats;
rowReader->updateRuntimeStats(stats);
ASSERT_TRUE(stats.columnReaderStats.columnMetricsSet.has_value());

// Directly check if column 1 has decode metrics
auto* col1 = stats.columnReaderStats.columnMetricsSet->getOrCreate(1);
EXPECT_GT(col1->decodeCPUTimeNanos.count(), 0)
<< "column 1 decode count should be > 0";
auto* col2 = stats.columnReaderStats.columnMetricsSet->getOrCreate(2);
EXPECT_GT(col2->decodeCPUTimeNanos.count(), 0)
<< "column 2 decode count should be > 0";
}

INSTANTIATE_TEST_CASE_P(
SelectiveNimbleReaderTestSuite,
SelectiveNimbleReaderTest,
Expand Down
Loading