Skip to content

Commit e6e04d0

Browse files
Rajeev975meta-codesync[bot]
authored andcommitted
feat(expr-eval): Fix flaky adaptiveCpuSamplingPerFunctionRates test (facebookincubator#17002)
Summary: Pull Request resolved: facebookincubator#17002 Previous diff (facebookincubator#16646) had a flaky test because the test asserted that slow_add must be in kAlwaysTrack state and plus must be in kSampling state. Replaced absolute state assertions with a relative comparison: slow_add sampling rate must be ≤ plus sampling rate. This is robust because both functions share the same timerOverheadNanos_ per ExprSet, so measurement noise affects both equally and cannot flip the relative ordering. Differential Revision: D99126870
1 parent 4a966b2 commit e6e04d0

File tree

3 files changed

+26
-41
lines changed

3 files changed

+26
-41
lines changed

velox/expression/Expr.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -736,32 +736,32 @@ class Expr {
736736
/// Runtime statistics. CPU time, wall time and number of processed rows.
737737
ExprStats stats_;
738738

739-
/// Per-function adaptive CPU sampling state machine.
739+
// Per-function adaptive CPU sampling state machine.
740740
enum class AdaptiveCpuSamplingState : uint8_t {
741-
/// First batch: warm up caches, discard timing.
741+
// First batch: warm up caches, discard timing.
742742
kWarmup,
743-
/// Next N batches: measure CpuWallTimer overhead and function cost.
743+
// Next N batches: measure CpuWallTimer overhead and function cost.
744744
kCalibrating,
745-
/// Calibration complete: overhead is acceptable, always track.
745+
// Calibration complete: overhead is acceptable, always track.
746746
kAlwaysTrack,
747-
/// Calibration complete: overhead is too high, sample at computed rate.
747+
// Calibration complete: overhead is too high, sample at computed rate.
748748
kSampling,
749749
};
750750

751-
/// Number of calibration batches (more batches = less noise).
751+
// Number of calibration batches (more batches = less noise).
752752
static constexpr uint32_t kCalibrationBatches = 5;
753753

754754
AdaptiveCpuSamplingState adaptiveState_{AdaptiveCpuSamplingState::kWarmup};
755755

756-
/// Stopwatch for measuring function execution wall time during calibration.
756+
// Stopwatch for measuring function execution wall time during calibration.
757757
std::optional<DeltaCpuWallTimeStopWatch> calibrationStopWatch_;
758-
/// Accumulated function wall time (without timer) during calibration.
758+
// Accumulated function wall time (without timer) during calibration.
759759
uint64_t calibrationFunctionWallNanos_{0};
760-
/// Counter for calibration batches.
760+
// Counter for calibration batches.
761761
uint32_t calibrationBatchCount_{0};
762-
/// Computed sampling rate: 0 = always track, N = track every N-th batch.
762+
// Computed sampling rate: 0 = always track, N = track every N-th batch.
763763
uint32_t adaptiveSamplingRate_{0};
764-
/// Counter for sampling cadence.
764+
// Counter for sampling cadence.
765765
uint32_t adaptiveSamplingCounter_{0};
766766

767767
// If true computeMetaData returns, otherwise meta data is computed and the

velox/expression/benchmarks/CpuTimeTrackingBenchmark.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,10 +52,10 @@ struct MultiplyFunction {
5252
}
5353
};
5454

55-
/// Element-wise >= comparison on two int64 arrays. Mirrors the core logic of
56-
/// array_gte UDF — a representative "expensive" function because it
57-
/// iterates over array elements, allocates an output array, and touches more
58-
/// memory per row than a simple scalar op like multiply.
55+
// Element-wise >= comparison on two int64 arrays. Mirrors the core logic of
56+
// array_gte UDF — a representative "expensive" function because it
57+
// iterates over array elements, allocates an output array, and touches more
58+
// memory per row than a simple scalar op like multiply.
5959
template <typename T>
6060
struct ArrayGteFunction {
6161
VELOX_DEFINE_FUNCTION_TYPES(T);

velox/expression/tests/ExprStatsTest.cpp

Lines changed: 11 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@
2828
using namespace facebook::velox;
2929
using namespace facebook::velox::test;
3030

31-
/// A deliberately expensive scalar function used to test adaptive CPU sampling.
32-
/// The loop makes the per-row cost high enough that clock_gettime overhead is
33-
/// negligible. volatile prevents the compiler from optimizing away the loop.
31+
// A deliberately expensive scalar function used to test adaptive CPU sampling.
32+
// The loop makes the per-row cost high enough that clock_gettime overhead is
33+
// negligible. volatile prevents the compiler from optimizing away the loop.
3434
template <typename T>
3535
struct SlowAddFunction {
3636
template <typename TInput>
@@ -724,35 +724,20 @@ TEST_F(ExprStatsTest, adaptiveCpuSamplingPerFunctionRates) {
724724
ASSERT_NE(plusExpr, nullptr) << "Failed to find 'plus' expression";
725725
ASSERT_NE(slowAddExpr, nullptr) << "Failed to find 'slow_add' expression";
726726

727-
// Cheap function (plus) should be in sampling mode with rate > 1.
728-
ASSERT_TRUE(plusExpr->isAdaptiveSampling())
729-
<< "Expected cheap function 'plus' to be in sampling mode";
730-
ASSERT_GT(plusExpr->adaptiveSamplingRate(), 1u)
731-
<< "Expected sampling rate > 1 for cheap function";
732-
733-
// Expensive function (slow_add) should always track (not sampling).
734-
ASSERT_FALSE(slowAddExpr->isAdaptiveSampling())
735-
<< "Expected expensive function 'slow_add' to always track";
727+
// The expensive function (slow_add) should have a lower or equal sampling
728+
// rate compared to the cheap function (plus).
729+
ASSERT_LE(
730+
slowAddExpr->adaptiveSamplingRate(), plusExpr->adaptiveSamplingRate())
731+
<< "Expected expensive function to have lower or equal sampling rate "
732+
<< "than cheap function. slow_add rate: "
733+
<< slowAddExpr->adaptiveSamplingRate()
734+
<< ", plus rate: " << plusExpr->adaptiveSamplingRate();
736735

737736
// Both functions should have timing data.
738737
auto stats = exprSet->stats();
739738
ASSERT_GT(stats["plus"].timing.cpuNanos, 0u);
740739
ASSERT_GT(stats["slow_add"].timing.cpuNanos, 0u);
741740

742-
// slow_add is always-track after calibration. It won't have timing for the
743-
// warmup + calibration batches (1 + 5 = 6), but all post-calibration batches
744-
// should be tracked.
745-
constexpr uint64_t kCalibrationOverhead = 6; // 1 warmup + 5 calibration
746-
ASSERT_EQ(
747-
stats["slow_add"].timing.count,
748-
stats["slow_add"].numProcessedVectors - kCalibrationOverhead);
749-
750-
// plus is in sampling mode. Stats should be adjusted (extrapolated) so
751-
// timing.count matches numProcessedVectors.
752-
ASSERT_EQ(stats["plus"].timing.count, stats["plus"].numProcessedVectors);
753-
ASSERT_GT(stats["plus"].timing.cpuNanos, 0u);
754-
755-
// Verify the sampling rate for plus is reasonable (should be > 1).
756741
LOG(INFO) << "plus sampling rate: " << plusExpr->adaptiveSamplingRate()
757742
<< ", slow_add sampling rate: "
758743
<< slowAddExpr->adaptiveSamplingRate();

0 commit comments

Comments
 (0)