Restrict stopping criterion parameter usage in command line (#174)

psvvsp · Sergey Pavlov · alliepiper · web-flow · commit 433376fd8306 · 2025-04-30T15:53:45.000-04:00
* restrict stopping criterion parameter usage in command line
* Update docs for stopping criterion.
* Add convenience benchmark_base API for criterion params.
* Add more test cases for stopping criterion parsing.

---------

Co-authored-by: Sergey Pavlov &lt;psvvsp89@gmail.com&gt;
Co-authored-by: Allison Piper &lt;alliepiper16@gmail.com&gt;
diff --git a/docs/cli_help.md b/docs/cli_help.md
@@ -83,36 +83,6 @@
   * Applies to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
 
-* `--min-samples <count>`
-  * Gather at least `<count>` samples per measurement.
-  * Default is 10 samples.
-  * Applies to the most recent `--benchmark`, or all benchmarks if specified
-    before any `--benchmark` arguments.
-
-* `--stopping-criterion <criterion>`
-  * After `--min-samples` is satisfied, use `<criterion>` to detect if enough
-    samples were collected.
-  * Only applies to Cold measurements.
-  * Default is stdrel (`--stopping-criterion stdrel`)
-
-* `--min-time <seconds>`
-  * Accumulate at least `<seconds>` of execution time per measurement.
-  * Only applies to `stdrel` stopping criterion.
-  * Default is 0.5 seconds.
-  * If both GPU and CPU times are gathered, this applies to GPU time only.
-  * Applies to the most recent `--benchmark`, or all benchmarks if specified
-    before any `--benchmark` arguments.
-
-* `--max-noise <value>`
-  * Gather samples until the error in the measurement drops below `<value>`.
-  * Noise is specified as the percent relative standard deviation.
-  * Default is 0.5% (`--max-noise 0.5`)
-  * Only applies to `stdrel` stopping criterion.
-  * Only applies to Cold measurements.
-  * If both GPU and CPU times are gathered, this applies to GPU noise only.
-  * Applies to the most recent `--benchmark`, or all benchmarks if specified
-    before any `--benchmark` arguments.
-
 * `--skip-time <seconds>`
   * Skip a measurement when a warmup run executes in less than `<seconds>`.
   * Default is -1 seconds (disabled).
@@ -123,16 +93,6 @@
   * Applies to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
 
-* `--timeout <seconds>`
-  * Measurements will timeout after `<seconds>` have elapsed.
-  * Default is 15 seconds.
-  * `<seconds>` is walltime, not accumulated sample time.
-  * If a measurement times out, the default markdown log will print a warning to
-    report any outstanding termination criteria (min samples, min time, max
-    noise).
-  * Applies to the most recent `--benchmark`, or all benchmarks if specified
-    before any `--benchmark` arguments.
-
 * `--throttle-threshold <value>`
   * Set the GPU throttle threshold as percentage of the device's default clock rate.
   * Default is 75.
@@ -166,3 +126,68 @@
   * Intended for use with external profiling tools.
   * Applies to the most recent `--benchmark`, or all benchmarks if specified
     before any `--benchmark` arguments.
+
+## Stopping Criteria
+
+* `--timeout <seconds>`
+  * Measurements will timeout after `<seconds>` have elapsed.
+  * Default is 15 seconds.
+  * `<seconds>` is walltime, not accumulated sample time.
+  * If a measurement times out, the default markdown log will print a warning to
+    report any outstanding termination criteria (min samples, min time, max
+    noise).
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
+* `--min-samples <count>`
+  * Gather at least `<count>` samples per measurement before checking any
+    other stopping criterion besides the timeout.
+  * Default is 10 samples.
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
+* `--stopping-criterion <criterion>`
+  * After `--min-samples` is satisfied, use `<criterion>` to detect if enough
+    samples were collected.
+  * Only applies to Cold and CPU-only measurements.
+  * If both GPU and CPU times are gathered, GPU time is used for stopping
+    analysis.
+  * Stopping criteria provided by NVBench are:
+    * "stdrel": (default) Converges to a minimal relative standard deviation,
+       stdev / mean
+    * "entropy": Converges based on the cumulative entropy of all samples.
+  * Each stopping criterion may provide additional parameters to customize
+    behavior, as detailed below:
+
+### "stdrel" Stopping Criterion Parameters
+
+* `--min-time <seconds>`
+  * Accumulate at least `<seconds>` of execution time per measurement.
+  * Only applies to `stdrel` stopping criterion.
+  * Default is 0.5 seconds.
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
+* `--max-noise <value>`
+  * Gather samples until the error in the measurement drops below `<value>`.
+  * Noise is specified as the percent relative standard deviation (stdev/mean).
+  * Default is 0.5% (`--max-noise 0.5`)
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
+### "entropy" Stopping Criterion Parameters
+
+* `--max-angle <value>`
+  * Maximum linear regression angle of cumulative entropy.
+  * Smaller values give more accurate results.
+  * Default is 0.048.
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
+
+* `--min-r2 <value>`
+  * Minimum coefficient of determination for linear regression of cumulative
+    entropy.
+  * Larger values give more accurate results.
+  * Default is 0.36.
+  * Applies to the most recent `--benchmark`, or all benchmarks if specified
+    before any `--benchmark` arguments.
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -16,7 +16,6 @@ set(example_srcs
 add_custom_target(nvbench.example.all)
 add_dependencies(nvbench.all nvbench.example.all)
 
-
 function (nvbench_add_examples_target target_prefix cuda_std)
   add_custom_target(${target_prefix}.all)
   add_dependencies(nvbench.example.all ${target_prefix}.all)
@@ -29,9 +28,15 @@ function (nvbench_add_examples_target target_prefix cuda_std)
     target_include_directories(${example_name} PRIVATE "${CMAKE_CURRENT_LIST_DIR}")
     target_link_libraries(${example_name} PRIVATE nvbench::main)
     set_target_properties(${example_name} PROPERTIES COMPILE_FEATURES cuda_std_${cuda_std})
+
+    set(example_args --timeout 0.1)
+    # The custom_criterion example doesn't support the --min-time argument:
+    if (NOT "${example_src}" STREQUAL "custom_criterion.cu")
+      list(APPEND example_args --min-time 1e-5)
+    endif()
+
     add_test(NAME ${example_name}
-      COMMAND "$<TARGET_FILE:${example_name}>" --timeout 0.1 --min-time 1e-5
-    )
+      COMMAND "$<TARGET_FILE:${example_name}>" ${example_args})
 
     # These should not deadlock. If they do, it may be that the CUDA context was created before
     # setting CUDA_MODULE_LOAD=EAGER in main, see NVIDIA/nvbench#136.
diff --git a/nvbench/benchmark_base.cuh b/nvbench/benchmark_base.cuh
@@ -266,21 +266,52 @@ struct benchmark_base
     return *this;
   }
 
-  [[nodiscard]] nvbench::criterion_params &get_criterion_params() { return m_criterion_params; }
-  [[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
-  {
-    return m_criterion_params;
-  }
-
   /// Control the stopping criterion for the measurement loop.
   /// @{
   [[nodiscard]] const std::string &get_stopping_criterion() const { return m_stopping_criterion; }
-  benchmark_base &set_stopping_criterion(std::string criterion)
+  benchmark_base &set_stopping_criterion(std::string criterion);
+  /// @}
+
+  [[nodiscard]] bool has_criterion_param(const std::string &name) const
   {
-    m_stopping_criterion = std::move(criterion);
+    return m_criterion_params.has_value(name);
+  }
+
+  [[nodiscard]] nvbench::int64_t get_criterion_param_int64(const std::string &name) const
+  {
+    return m_criterion_params.get_int64(name);
+  }
+  benchmark_base &set_criterion_param_int64(const std::string &name, nvbench::int64_t value)
+  {
+    m_criterion_params.set_int64(name, value);
     return *this;
   }
-  /// @}
+
+  [[nodiscard]] nvbench::float64_t get_criterion_param_float64(const std::string &name) const
+  {
+    return m_criterion_params.get_float64(name);
+  }
+  benchmark_base &set_criterion_param_float64(const std::string &name, nvbench::float64_t value)
+  {
+    m_criterion_params.set_float64(name, value);
+    return *this;
+  }
+
+  [[nodiscard]] std::string get_criterion_param_string(const std::string &name) const
+  {
+    return m_criterion_params.get_string(name);
+  }
+  benchmark_base &set_criterion_param_string(const std::string &name, std::string value)
+  {
+    m_criterion_params.set_string(name, std::move(value));
+    return *this;
+  }
+
+  [[nodiscard]] nvbench::criterion_params &get_criterion_params() { return m_criterion_params; }
+  [[nodiscard]] const nvbench::criterion_params &get_criterion_params() const
+  {
+    return m_criterion_params;
+  }
 
 protected:
   friend struct nvbench::runner_base;
diff --git a/nvbench/benchmark_base.cxx b/nvbench/benchmark_base.cxx
@@ -17,6 +17,7 @@
  */
 
 #include <nvbench/benchmark_base.cuh>
+#include <nvbench/criterion_manager.cuh>
 #include <nvbench/detail/transform_reduce.cuh>
 
 namespace nvbench
@@ -88,4 +89,11 @@ std::size_t benchmark_base::get_config_count() const
   return per_device_count * m_devices.size();
 }
 
+benchmark_base &benchmark_base::set_stopping_criterion(std::string criterion)
+{
+  m_stopping_criterion = std::move(criterion);
+  m_criterion_params   = criterion_manager::get().get_criterion(m_stopping_criterion).get_params();
+  return *this;
+}
+
 } // namespace nvbench
diff --git a/nvbench/criterion_manager.cuh b/nvbench/criterion_manager.cuh
@@ -50,6 +50,9 @@ public:
 
   using params_description = std::vector<std::pair<std::string, nvbench::named_values::type>>;
   params_description get_params_description() const;
+
+  using params_map = std::unordered_map<std::string, params_description>;
+  params_map get_params_description_map() const;
 };
 
 /**
diff --git a/nvbench/criterion_manager.cxx b/nvbench/criterion_manager.cxx
@@ -104,4 +104,23 @@ nvbench::criterion_manager::params_description criterion_manager::get_params_des
   return desc;
 }
 
+criterion_manager::params_map criterion_manager::get_params_description_map() const
+{
+  params_map result;
+
+  for (auto &[criterion_name, criterion] : m_map)
+  {
+    params_description &desc         = result[criterion_name];
+    nvbench::criterion_params params = criterion->get_params();
+
+    for (auto param : params.get_names())
+    {
+      nvbench::named_values::type type = params.get_type(param);
+      desc.emplace_back(param, type);
+    }
+  }
+
+  return result;
+}
+
 } // namespace nvbench
diff --git a/nvbench/detail/measure_cold.cu b/nvbench/detail/measure_cold.cu
@@ -30,6 +30,7 @@
 #include <algorithm>
 #include <chrono>
 #include <limits>
+#include <optional>
 #include <thread>
 
 namespace nvbench::detail
@@ -387,19 +388,30 @@ void measure_cold_base::generate_summaries()
 
     if (m_max_time_exceeded)
     {
-      const auto timeout   = m_walltime_timer.get_duration();
-      const auto max_noise = m_criterion_params.get_float64("max-noise");
-      const auto min_time  = m_criterion_params.get_float64("min-time");
+      const auto timeout = m_walltime_timer.get_duration();
 
-      if (cuda_noise > max_noise)
+      auto get_param = [this](std::optional<nvbench::float64_t> &param, const std::string &name) {
+        if (m_criterion_params.has_value(name))
+        {
+          param = m_criterion_params.get_float64(name);
+        }
+      };
+
+      std::optional<nvbench::float64_t> max_noise;
+      get_param(max_noise, "max-noise");
+
+      std::optional<nvbench::float64_t> min_time;
+      get_param(max_noise, "min-time");
+
+      if (max_noise && cuda_noise > *max_noise)
       {
         printer.log(nvbench::log_level::warn,
                     fmt::format("Current measurement timed out ({:0.2f}s) "
                                 "while over noise threshold ({:0.2f}% > "
                                 "{:0.2f}%)",
                                 timeout,
                                 cuda_noise * 100,
-                                max_noise * 100));
+                                *max_noise * 100));
       }
       if (m_total_samples < m_min_samples)
       {
@@ -410,15 +422,15 @@ void measure_cold_base::generate_summaries()
                                 m_total_samples,
                                 m_min_samples));
       }
-      if (m_total_cuda_time < min_time)
+      if (min_time && m_total_cuda_time < *min_time)
       {
         printer.log(nvbench::log_level::warn,
                     fmt::format("Current measurement timed out ({:0.2f}s) "
                                 "before accumulating min_time ({:0.2f}s < "
                                 "{:0.2f}s)",
                                 timeout,
                                 m_total_cuda_time,
-                                min_time));
+                                *min_time));
       }
     }
 
diff --git a/nvbench/option_parser.cu b/nvbench/option_parser.cu
diff --git a/testing/option_parser.cu b/testing/option_parser.cu