diff --git a/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp b/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp index 523ba32c..8ac9e703 100644 --- a/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp +++ b/dwio/nimble/velox/selective/SelectiveNimbleReader.cpp @@ -87,6 +87,17 @@ std::vector toVeloxSortOrders( namespace { +void registerColumnMetrics( + const velox::dwio::common::TypeWithId& node, + velox::dwio::common::ColumnMetricsSet& metricsSet) { + metricsSet.getOrCreate(node.id(), node.type()->kind()); + for (uint32_t i = 0; i < node.size(); ++i) { + if (const auto* child = node.childAt(i).get()) { + registerColumnMetrics(*child, metricsSet); + } + } +} + class SelectiveNimbleRowReader : public dwio::common::RowReader { public: SelectiveNimbleRowReader( @@ -97,6 +108,12 @@ class SelectiveNimbleRowReader : public dwio::common::RowReader { streams_(readerBase_), rowSizeTracker_{ std::make_unique(readerBase->fileSchemaWithId())} { + if (options.collectColumnStats()) { + columnReaderStatistics_.columnMetricsSet.emplace(); + registerColumnMetrics( + *readerBase_->fileSchemaWithId(), + *columnReaderStatistics_.columnMetricsSet); + } initReadRange(); initIndexBounds(); if (options.eagerFirstStripeLoad()) { @@ -303,6 +320,7 @@ uint64_t SelectiveNimbleRowReader::next( void SelectiveNimbleRowReader::updateRuntimeStats( dwio::common::RuntimeStatistics& stats) const { stats.skippedStrides += skippedStripes_; + stats.columnReaderStats.mergeFrom(columnReaderStatistics_); } void SelectiveNimbleRowReader::resetFilterCaches() { diff --git a/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp b/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp index 0e7dee7f..a4aa71bb 100644 --- a/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp +++ b/dwio/nimble/velox/selective/tests/SelectiveNimbleReaderTest.cpp @@ -1909,6 +1909,59 @@ TEST_P(SelectiveNimbleReaderTest, mapAsStructAllNulls) { velox::test::assertEqualVectors(expected, batch); } +TEST_P(SelectiveNimbleReaderTest, columnDecodeMetrics) { + const bool passStringBuffersFromDecoder = GetParam(); + const int numRows = 10'000; + auto input = makeRowVector({ + makeFlatVector(numRows, [](auto i) { return i; }), + makeFlatVector( + numRows, [](auto i) { return std::to_string(i); }), + }); + auto scanSpec = std::make_shared("root"); + scanSpec->addAllChildFields(*asRowType(input->type())); + auto file = test::createNimbleFile(*rootPool(), input); + auto readFile = std::make_shared(file); + auto factory = + dwio::common::getReaderFactory(dwio::common::FileFormat::NIMBLE); + dwio::common::ReaderOptions options(pool()); + options.setScanSpec(scanSpec); + auto reader = factory->createReader( + std::make_unique(readFile, *pool()), + options); + dwio::common::RowReaderOptions rowOptions; + rowOptions.setScanSpec(scanSpec); + rowOptions.setRequestedType(asRowType(input->type())); + rowOptions.setPassStringBuffersFromDecoder(passStringBuffersFromDecoder); + rowOptions.setCollectColumnStats(true); + rowOptions.setEagerFirstStripeLoad(true); + auto rowReader = reader->createRowReader(rowOptions); + + VectorPtr result = BaseVector::create(asRowType(input->type()), 0, pool()); + uint64_t totalRows = 0; + while (auto n = rowReader->next(1'000, result)) { + totalRows += n; + // Force materialization of lazy child vectors so that readWithTiming() + // is invoked on the leaf column readers. + auto* row = result->as(); + for (auto i = 0; i < row->childrenSize(); ++i) { + row->childAt(i)->loadedVector(); + } + } + EXPECT_EQ(totalRows, numRows) << "should read all rows"; + + dwio::common::RuntimeStatistics stats; + rowReader->updateRuntimeStats(stats); + ASSERT_TRUE(stats.columnReaderStats.columnMetricsSet.has_value()); + + // Directly check if column 1 has decode metrics + auto* col1 = stats.columnReaderStats.columnMetricsSet->getOrCreate(1); + EXPECT_GT(col1->decodeCPUTimeNanos.count(), 0) + << "column 1 decode count should be > 0"; + auto* col2 = stats.columnReaderStats.columnMetricsSet->getOrCreate(2); + EXPECT_GT(col2->decodeCPUTimeNanos.count(), 0) + << "column 2 decode count should be > 0"; +} + INSTANTIATE_TEST_CASE_P( SelectiveNimbleReaderTestSuite, SelectiveNimbleReaderTest,