1616#include < boost/lexical_cast.hpp>
1717#include < boost/uuid/uuid_generators.hpp>
1818#include < boost/uuid/uuid_io.hpp>
19- #include < cmath>
2019
2120#include " velox/common/base/Exceptions.h"
2221#include " velox/common/base/Fs.h"
@@ -1588,94 +1587,13 @@ bool Expr::applyFunctionWithPeeling(
15881587 return true ;
15891588}
15901589
1591- std::unique_ptr<CpuWallTimer> Expr::cpuWallTimer (const EvalCtx& context) {
1592- // 1. Compile-time tracking (set via trackCpuUsage_) always wins.
1593- if (trackCpuUsage_) {
1594- return std::make_unique<CpuWallTimer>(stats_.timing );
1595- }
1596-
1597- // 2. Adaptive per-function sampling.
1598- if (context.adaptiveCpuSamplingEnabled ()) {
1599- switch (adaptiveState_) {
1600- case AdaptiveCpuSamplingState::kWarmup :
1601- // Warmup batch: just run the function, no timing.
1602- return nullptr ;
1603- case AdaptiveCpuSamplingState::kCalibrating : {
1604- // Measure function execution time (without CpuWallTimer).
1605- // Timer overhead is measured once per ExprSet and shared via EvalCtx.
1606- calibrationStopWatch_.emplace ();
1607- return nullptr ;
1608- }
1609- case AdaptiveCpuSamplingState::kAlwaysTrack :
1610- return std::make_unique<CpuWallTimer>(stats_.timing );
1611- case AdaptiveCpuSamplingState::kSampling :
1612- if (++adaptiveSamplingCounter_ % adaptiveSamplingRate_ == 0 ) {
1613- return std::make_unique<CpuWallTimer>(stats_.timing );
1614- }
1615- return nullptr ;
1616- }
1617- }
1618-
1619- return nullptr ;
1620- }
1621-
1622- void Expr::finalizeAdaptiveCalibration (
1623- double maxOverheadPct,
1624- uint64_t timerOverheadNanos) {
1625- switch (adaptiveState_) {
1626- case AdaptiveCpuSamplingState::kWarmup : {
1627- adaptiveState_ = AdaptiveCpuSamplingState::kCalibrating ;
1628- break ;
1629- }
1630- case AdaptiveCpuSamplingState::kCalibrating : {
1631- calibrationFunctionWallNanos_ +=
1632- calibrationStopWatch_->elapsed ().wallNanos ;
1633- calibrationStopWatch_.reset ();
1634-
1635- if (++calibrationBatchCount_ < kCalibrationBatches ) {
1636- break ;
1637- }
1638-
1639- // Use the shared timer overhead measurement, scaled by calibration
1640- // batch count. The overhead per invocation is a platform constant
1641- // measured once per ExprSet.
1642- auto totalTimerOverhead = timerOverheadNanos * calibrationBatchCount_;
1643-
1644- if (calibrationFunctionWallNanos_ > 0 && maxOverheadPct > 0 ) {
1645- double overheadPct = 100.0 * static_cast <double >(totalTimerOverhead) /
1646- static_cast <double >(calibrationFunctionWallNanos_);
1647-
1648- if (overheadPct > maxOverheadPct) {
1649- adaptiveSamplingRate_ =
1650- static_cast <uint32_t >(std::ceil (overheadPct / maxOverheadPct));
1651- // Start counter at rate-1 so the first post-calibration batch is
1652- // always timed (++counter hits rate, which passes % rate == 0).
1653- adaptiveSamplingCounter_ = adaptiveSamplingRate_ - 1 ;
1654- adaptiveState_ = AdaptiveCpuSamplingState::kSampling ;
1655- } else {
1656- adaptiveState_ = AdaptiveCpuSamplingState::kAlwaysTrack ;
1657- }
1658- } else {
1659- // Function ~0ns — timer dominates. Aggressive sampling.
1660- adaptiveSamplingRate_ = 100 ;
1661- adaptiveSamplingCounter_ = adaptiveSamplingRate_ - 1 ;
1662- adaptiveState_ = AdaptiveCpuSamplingState::kSampling ;
1663- }
1664- break ;
1665- }
1666- default :
1667- VELOX_UNREACHABLE (
1668- " Unexpected adaptive sampling state in finalizeAdaptiveCalibration" );
1669- }
1670- }
1671-
16721590void Expr::applyFunction (
16731591 const SelectivityVector& rows,
16741592 EvalCtx& context,
16751593 VectorPtr& result) {
16761594 stats_.numProcessedVectors += 1 ;
16771595 stats_.numProcessedRows += rows.countSelected ();
1678- auto timer = cpuWallTimer (context );
1596+ auto timer = cpuWallTimer ();
16791597
16801598 computeIsAsciiForInputs (vectorFunction_.get (), inputValues_, rows);
16811599 auto isAscii = type ()->isVarchar ()
@@ -1715,14 +1633,6 @@ void Expr::applyFunction(
17151633 result->asUnchecked <SimpleVector<StringView>>()->setIsAscii (
17161634 isAscii.value (), rows);
17171635 }
1718-
1719- // Only do Adaptive Calibration if the adaptive sampling is on and we are in
1720- // warmup or calibrating state.
1721- if (context.adaptiveCpuSamplingEnabled () && isCalibrating ()) {
1722- finalizeAdaptiveCalibration (
1723- context.adaptiveCpuSamplingMaxOverheadPct (),
1724- context.timerOverheadNanos ());
1725- }
17261636}
17271637
17281638void Expr::evalSpecialFormWithStats (
@@ -1731,17 +1641,9 @@ void Expr::evalSpecialFormWithStats(
17311641 VectorPtr& result) {
17321642 stats_.numProcessedVectors += 1 ;
17331643 stats_.numProcessedRows += rows.countSelected ();
1734- auto timer = cpuWallTimer (context );
1644+ auto timer = cpuWallTimer ();
17351645
17361646 evalSpecialForm (rows, context, result);
1737-
1738- // Only do Adaptive Calibration if the adaptive sampling is on and we are in
1739- // warmup or calibrating state.
1740- if (context.adaptiveCpuSamplingEnabled () && isCalibrating ()) {
1741- finalizeAdaptiveCalibration (
1742- context.adaptiveCpuSamplingMaxOverheadPct (),
1743- context.timerOverheadNanos ());
1744- }
17451647}
17461648
17471649namespace {
@@ -1971,14 +1873,7 @@ ExprSet::ExprSet(
19711873 core::ExecCtx* execCtx,
19721874 bool enableConstantFolding,
19731875 bool lazyDereference)
1974- : execCtx_(execCtx),
1975- lazyDereference_ (lazyDereference),
1976- adaptiveCpuSampling_(
1977- execCtx->queryCtx ()->queryConfig().exprAdaptiveCpuSampling()),
1978- adaptiveCpuSamplingMaxOverheadPct_(
1979- execCtx->queryCtx ()
1980- ->queryConfig()
1981- .exprAdaptiveCpuSamplingMaxOverheadPct()) {
1876+ : execCtx_(execCtx), lazyDereference_(lazyDereference) {
19821877 exprs_ = compileExpressions (sources, execCtx, this , enableConstantFolding);
19831878 if (lazyDereference_) {
19841879 validateLazyDereference (exprs_);
@@ -1991,24 +1886,6 @@ ExprSet::ExprSet(
19911886}
19921887
19931888namespace {
1994-
1995- // / If the expression is in adaptive sampling mode, extrapolate timing stats
1996- // / to approximate full-population values. Otherwise, return raw stats.
1997- exec::ExprStats adjustStats (const exec::Expr& expr) {
1998- if (expr.isAdaptiveSampling () && expr.stats ().timing .count > 0 ) {
1999- exec::ExprStats adjusted = expr.stats ();
2000- double ratio = static_cast <double >(adjusted.numProcessedVectors ) /
2001- static_cast <double >(adjusted.timing .count );
2002- adjusted.timing .cpuNanos = static_cast <uint64_t >(
2003- static_cast <double >(adjusted.timing .cpuNanos ) * ratio);
2004- adjusted.timing .wallNanos = static_cast <uint64_t >(
2005- static_cast <double >(adjusted.timing .wallNanos ) * ratio);
2006- adjusted.timing .count = adjusted.numProcessedVectors ;
2007- return adjusted;
2008- }
2009- return expr.stats ();
2010- }
2011-
20121889void addStats (
20131890 const exec::Expr& expr,
20141891 std::unordered_map<std::string, exec::ExprStats>& stats,
@@ -2027,7 +1904,7 @@ void addStats(
20271904 bool emptyStats =
20281905 !expr.stats ().numProcessedRows && !expr.stats ().defaultNullRowsSkipped ;
20291906 if (!emptyStats && !excludeSplFormExpr) {
2030- stats[expr.name ()].add (adjustStats ( expr));
1907+ stats[expr.name ()].add (expr. stats ( ));
20311908 }
20321909
20331910 for (const auto & input : expr.inputs ()) {
@@ -2139,24 +2016,6 @@ void printInputAndExprs(
21392016}
21402017} // namespace
21412018
2142- void ExprSet::initializeAdaptiveCpuSampling (EvalCtx& context) {
2143- context.setAdaptiveCpuSamplingEnabled (true );
2144- context.setAdaptiveCpuSamplingMaxOverheadPct (
2145- adaptiveCpuSamplingMaxOverheadPct_);
2146-
2147- // Measure CpuWallTimer overhead once per ExprSet (platform constant).
2148- if (!timerOverheadMeasured_) {
2149- CpuWallTiming dummyTiming;
2150- DeltaCpuWallTimeStopWatch overheadWatch;
2151- {
2152- auto dummy = std::make_unique<CpuWallTimer>(dummyTiming);
2153- }
2154- timerOverheadNanos_ = overheadWatch.elapsed ().wallNanos ;
2155- timerOverheadMeasured_ = true ;
2156- }
2157- context.setTimerOverheadNanos (timerOverheadNanos_);
2158- }
2159-
21602019void ExprSet::eval (
21612020 int32_t begin,
21622021 int32_t end,
@@ -2170,11 +2029,6 @@ void ExprSet::eval(
21702029 clearSharedSubexprs ();
21712030 }
21722031
2173- // Apply adaptive per-function CPU sampling if configured.
2174- if (adaptiveCpuSampling_) {
2175- initializeAdaptiveCpuSampling (context);
2176- }
2177-
21782032 if (!lazyDereference_) {
21792033 // Make sure LazyVectors, referenced by multiple expressions, are loaded for
21802034 // all the "rows".
0 commit comments