scylladb · piodul · Nov 24, 2022 · May 30, 2023 · Nov 18, 2022 · Nov 18, 2022
diff --git a/src/bin/cql-stress-scylla-bench/args.rs b/src/bin/cql-stress-scylla-bench/args.rs
@@ -42,7 +42,8 @@ pub(crate) struct ScyllaBenchArgs {
     pub password: String,
     pub mode: Mode,
     pub latency_type: LatencyType,
-    pub max_retries_per_op: u64,
+    pub max_consecutive_errors_per_op: u64,
+    pub max_errors_in_total: u64,
     pub concurrency: u64,
     pub maximum_rate: u64,
 
@@ -174,9 +175,15 @@ where
     let max_errors_at_row = flag.u64_var(
         "error-at-row-limit",
         0,
-        "the maximum number of attempts allowed for a single operation. \
+        "the maximum number of consecutive errors allowed. \
         After exceeding it, the workflow will terminate with an error. \
-        Set to 0 if you want to have unlimited retries",
+        Set to 0 if you want to disable this limit",
+    );
+    let max_errors = flag.u64_var(
+        "error-limit",
+        0,
+        "the number of total errors after which the workflow should stop and fail; \
+        set it to 0 (the default) to disable this limit",
     );
     let concurrency = flag.u64_var("concurrency", 16, "number of used tasks");
     let maximum_rate = flag.u64_var(
@@ -330,7 +337,10 @@ where
         // Zero means unlimited tries,
         // and #tries == #retries + 1,
         // therefore just subtract with wraparound and treat u64::MAX as infinity
-        let max_retries_per_op = max_errors_at_row.get().wrapping_sub(1);
+        let max_consecutive_errors_per_op = max_errors_at_row.get().wrapping_sub(1);
+
+        // Similar to above
+        let max_errors_in_total = max_errors.get().wrapping_sub(1);
 
         let hdr_latency_resolution = match hdr_latency_units.get().as_str() {
             "ns" => 1,
@@ -376,7 +386,8 @@ where
             mode,
             concurrency,
             latency_type,
-            max_retries_per_op,
+            max_consecutive_errors_per_op,
+            max_errors_in_total,
             maximum_rate,
             test_duration: test_duration.get(),
             partition_count,
@@ -418,6 +429,22 @@ impl ScyllaBenchArgs {
         println!("Mode:\t\t\t {}", show_mode(&self.mode));
         println!("Workload:\t\t {}", show_workload(&self.workload));
         println!("Timeout:\t\t {}", format_duration(self.timeout));
+        if self.max_consecutive_errors_per_op == u64::MAX {
+            println!("Max error number at row: unlimited");
+        } else {
+            println!(
+                "Max error number at row: {}",
+                self.max_consecutive_errors_per_op as u128 + 1,
+            );
+        }
+        if self.max_errors_in_total == u64::MAX {
+            println!("Max error number:\t unlimited");
+        } else {
+            println!(
+                "Max error number:\t {}",
+                self.max_errors_in_total as u128 + 1,
+            );
+        }
         println!(
             "Consistency level:\t {}",
             show_consistency_level(&self.consistency_level)

diff --git a/src/bin/cql-stress-scylla-bench/main.rs b/src/bin/cql-stress-scylla-bench/main.rs
@@ -93,13 +93,19 @@ async fn main() -> Result<()> {
                 combined_stats.combine(&partial_stats);
             }
             result = &mut run_finished => {
+                let errors = match &result {
+                    Ok(_) => &[],
+                    Err(err) => err.errors.as_slice(),
+                };
+                // Combine stats for the last time
+                let partial_stats = sharded_stats.get_combined_and_clear();
+                combined_stats.combine(&partial_stats);
+                printer.print_final(&combined_stats, errors, &mut std::io::stdout())?;
                 if result.is_ok() {
-                    // Combine stats for the last time
-                    let partial_stats = sharded_stats.get_combined_and_clear();
-                    combined_stats.combine(&partial_stats);
-                    printer.print_final(&combined_stats, &mut std::io::stdout())?;
+                    return Ok(());
+                } else {
+                    return Err(anyhow::anyhow!("Benchmark failed"));
                 }
-                return result.context("An error occurred during the benchmark");
             }
         }
     }
@@ -145,7 +151,8 @@ async fn prepare(args: Arc<ScyllaBenchArgs>, stats: Arc<ShardedStats>) -> Result
         concurrency: args.concurrency,
         rate_limit_per_second,
         operation_factory,
-        max_retries_per_op: args.max_retries_per_op as usize,
+        max_consecutive_errors_per_op: args.max_consecutive_errors_per_op,
+        max_errors_in_total: args.max_errors_in_total,
     })
 }
 

diff --git a/src/bin/cql-stress-scylla-bench/stats.rs b/src/bin/cql-stress-scylla-bench/stats.rs
@@ -252,7 +252,12 @@ impl StatsPrinter {
         Ok(())
     }
 
-    pub fn print_final(&self, stats: &Stats, out: &mut impl Write) -> Result<()> {
+    pub fn print_final(
+        &self,
+        stats: &Stats,
+        errors: &[anyhow::Error],
+        out: &mut impl Write,
+    ) -> Result<()> {
         let time = Instant::now() - self.start_time;
         writeln!(out)?;
         writeln!(out, "Results:")?;
@@ -274,7 +279,16 @@ impl StatsPrinter {
             self.print_final_latency_histogram("c-o fixed latency", &ls.co_fixed, out)?;
         }
 
-        // TODO: "critical errors"
+        if !errors.is_empty() {
+            writeln!(
+                out,
+                "\nFollowing critical errors were caught during the run:"
+            )?;
+            for err in errors {
+                // The {:#} syntax makes sure that the error is printed in one line
+                writeln!(out, "    {:#}", err)?;
+            }
+        }
 
         Ok(())
     }

diff --git a/src/configuration.rs b/src/configuration.rs
@@ -33,9 +33,12 @@ pub struct Configuration {
     /// during the stress.
     pub operation_factory: Arc<dyn OperationFactory>,
 
-    /// The maximum number of attempts an operation should be retried
-    /// before giving up.
-    pub max_retries_per_op: usize,
+    /// The maximum number of consecutive errors allowed before giving up.
+    pub max_consecutive_errors_per_op: u64,
+
+    /// The maximum, global number of errors allowed during the test.
+    /// After exceeding this number, the bench will be stopped.
+    pub max_errors_in_total: u64,
 }
 
 /// Contains all necessary context needed to execute an Operation.