Collect average "iterations" in parallel_bench.

liyuying0000 · copybara-github · commit ad6b189b8d08 · 2025-02-27T09:52:17.000-08:00
PiperOrigin-RevId: 731762429
Change-Id: Ia1f1b795232a9ef2939c7ee4d7fde3a58e1ea3bb
diff --git a/fleetbench/parallel/parallel_bench_lib.py b/fleetbench/parallel/parallel_bench_lib.py
@@ -91,6 +91,8 @@ class BenchmarkMetrics:
   per_iteration_wall_time: float
   # per benchmark iteration cpu time
   per_iteration_cpu_time: float
+  # per benchmark iteration
+  per_bm_run_iteration: int
 
 
 class ParallelBench:
@@ -179,6 +181,7 @@ def _PreRun(
                 total_duration=1.0,
                 per_iteration_wall_time=0.0,
                 per_iteration_cpu_time=0.0,
+                per_bm_run_iteration=0,
             )
         ]
         for benchmark in self.benchmarks.keys()
@@ -306,6 +309,7 @@ def _RunSchedulingLoop(self) -> None:
                   total_duration=r.duration,
                   per_iteration_wall_time=r.bm_wall_time,
                   per_iteration_cpu_time=r.bm_cpu_time,
+                  per_bm_run_iteration=r.iteration,
               )
           )
 
@@ -373,6 +377,7 @@ def ConvertToDataFrame(self) -> pd.DataFrame:
             "Benchmark": benchmark,
             "WallTimes": t.per_iteration_wall_time,
             "CPUTimes": t.per_iteration_cpu_time,
+            "Iterations": t.per_bm_run_iteration,
         }
         data.append(entry)
     runtimes = pd.DataFrame(data)
@@ -443,6 +448,7 @@ def Run(
                 total_duration=r.duration,
                 per_iteration_wall_time=r.bm_wall_time,
                 per_iteration_cpu_time=r.bm_cpu_time,
+                per_bm_run_iteration=r.iteration,
             )
         )
 
diff --git a/fleetbench/parallel/parallel_bench_lib_test.py b/fleetbench/parallel/parallel_bench_lib_test.py
@@ -49,13 +49,15 @@ def tearDown(self):
   @mock.patch.object(bm, "GetSubBenchmarks", autospec=True)
   @mock.patch.object(run.Run, "Execute", autospec=True)
   @mock.patch.object(cpu, "Utilization", autospec=True)
+  @mock.patch.object(reporter, "GenerateBenchmarkReport", autospec=True)
   @mock.patch.object(reporter, "SaveBenchmarkResults", autospec=True)
   @flagsaver.flagsaver(
       benchmark_dir=absltest.get_default_test_tmpdir(),
   )
   def testRun(
       self,
       mock_save_benchmark_results,
+      mock_generate_benchmark_report,
       mock_utilization,
       mock_execute,
       mock_get_subbenchmarks,
@@ -81,6 +83,7 @@ def fake_utilization(unused_cpus):
 
     mock_utilization.side_effect = fake_utilization
 
+    mock_generate_benchmark_report.return_value = pd.DataFrame()
     mock_save_benchmark_results.return_value = None
 
     self.pb = parallel_bench_lib.ParallelBench(
@@ -126,23 +129,27 @@ def test_convert_to_dataframe(self):
             total_duration=10,
             per_iteration_wall_time=1,
             per_iteration_cpu_time=1,
+            per_bm_run_iteration=2,
         ),
         parallel_bench_lib.BenchmarkMetrics(
             total_duration=2,
             per_iteration_wall_time=3.01,
             per_iteration_cpu_time=3,
+            per_bm_run_iteration=4,
         ),
     ]
     self.pb.runtimes["BM_Test2"] = [
         parallel_bench_lib.BenchmarkMetrics(
             total_duration=10,
             per_iteration_wall_time=1,
             per_iteration_cpu_time=1,
+            per_bm_run_iteration=10,
         ),
         parallel_bench_lib.BenchmarkMetrics(
             total_duration=4,
             per_iteration_wall_time=4,
             per_iteration_cpu_time=5,
+            per_bm_run_iteration=8,
         ),
     ]
     self.pb.utilization_samples.append((pd.Timestamp.now(), 0.5))
@@ -151,8 +158,18 @@ def test_convert_to_dataframe(self):
     self.assertEqual(
         df.to_dict("records"),
         [
-            {"Benchmark": "BM_Test1", "WallTimes": 3.01, "CPUTimes": 3},
-            {"Benchmark": "BM_Test2", "WallTimes": 4, "CPUTimes": 5},
+            {
+                "Benchmark": "BM_Test1",
+                "WallTimes": 3.01,
+                "CPUTimes": 3,
+                "Iterations": 4,
+            },
+            {
+                "Benchmark": "BM_Test2",
+                "WallTimes": 4,
+                "CPUTimes": 5,
+                "Iterations": 8,
+            },
         ],
     )
 
diff --git a/fleetbench/parallel/reporter.py b/fleetbench/parallel/reporter.py
@@ -103,9 +103,13 @@ def GenerateBenchmarkReport(
           Count=("WallTimes", "count"),
           Mean_Wall_Time=("WallTimes", "mean"),
           Mean_CPU_Time=("CPUTimes", "mean"),
+          Mean_Iterations=("Iterations", "mean"),
       )
       .round(3)
   )
+  grouped_results["Mean_Iterations"] = grouped_results[
+      "Mean_Iterations"
+  ].astype(int)
 
   # Combine perf_counter_df and benchmark run results on the same
   # "benchmark" entry.
@@ -134,6 +138,7 @@ def SaveBenchmarkResults(output_dir, df: pd.DataFrame) -> None:
       columns={
           "Mean_Wall_Time": "real_time",
           "Mean_CPU_Time": "cpu_time",
+          "Mean_Iterations": "iterations",
       }
   )
   data = df.reset_index().to_dict(orient="records")
diff --git a/fleetbench/parallel/reporter_test.py b/fleetbench/parallel/reporter_test.py
@@ -115,9 +115,24 @@ def test_aggregate_constext_inconsistent_data(self):
 
   def test_generate_benchmark_report(self):
     benchmark_df = pd.DataFrame([
-        {"Benchmark": "BM_Test1", "WallTimes": 3, "CPUTimes": 3},
-        {"Benchmark": "BM_Test2", "WallTimes": 4, "CPUTimes": 5},
-        {"Benchmark": "BM_Test1", "WallTimes": 6, "CPUTimes": 7},
+        {
+            "Benchmark": "BM_Test1",
+            "WallTimes": 3,
+            "CPUTimes": 3,
+            "Iterations": 10,
+        },
+        {
+            "Benchmark": "BM_Test2",
+            "WallTimes": 4,
+            "CPUTimes": 5,
+            "Iterations": 20,
+        },
+        {
+            "Benchmark": "BM_Test1",
+            "WallTimes": 6,
+            "CPUTimes": 7,
+            "Iterations": 50,
+        },
     ])
     perf_counter_df = pd.DataFrame([
         {"Benchmark": "BM_Test1", "instructions": 130.0, "cycles": 3.0},
@@ -134,6 +149,7 @@ def test_generate_benchmark_report(self):
             "Count": 2,
             "Mean_Wall_Time": 4.5,
             "Mean_CPU_Time": 5.0,
+            "Mean_Iterations": 30,
             "instructions": 130.0,
             "cycles": 3.0,
         },
@@ -142,6 +158,7 @@ def test_generate_benchmark_report(self):
             "Count": 1,
             "Mean_Wall_Time": 4,
             "Mean_CPU_Time": 5.0,
+            "Mean_Iterations": 20,
             "instructions": 200.0,
             "cycles": 2.0,
         },
diff --git a/fleetbench/parallel/result.py b/fleetbench/parallel/result.py
@@ -30,3 +30,4 @@ class Result:
   duration: Optional[float] = None
   bm_wall_time: Optional[float] = None
   bm_cpu_time: Optional[float] = None
+  iteration: Optional[int] = None
diff --git a/fleetbench/parallel/run.py b/fleetbench/parallel/run.py
@@ -52,15 +52,16 @@ def Execute(self) -> result.Result:
 
     with open(self.out_file, "r") as f:
       benchmark_output = f.read()
-    benchmark_cpu_time, benchmark_wall_time = self._GetBenchmarkRuntime(
-        benchmark_output
+    benchmark_cpu_time, benchmark_wall_time, iteration = (
+        self._GetBenchmarkRuntime(benchmark_output)
     )
     return result.Result(
         benchmark=self.benchmark.Name(),
         rc=proc.returncode,
         duration=end - start,
         bm_wall_time=benchmark_wall_time,
         bm_cpu_time=benchmark_cpu_time,
+        iteration=iteration,
         result=benchmark_output,
         stdout=proc.stdout,
         stderr=proc.stderr,
@@ -72,6 +73,12 @@ def _CommandLine(self) -> list[str]:
         f"--benchmark_out={self.out_file}",
     ]
 
-  def _GetBenchmarkRuntime(self, benchmark_output: str) -> tuple[float, float]:
+  def _GetBenchmarkRuntime(
+      self, benchmark_output: str
+  ) -> tuple[float, float, int]:
     data = json.loads(benchmark_output)
-    return data["benchmarks"][0]["cpu_time"], data["benchmarks"][0]["real_time"]
+    return (
+        data["benchmarks"][0]["cpu_time"],
+        data["benchmarks"][0]["real_time"],
+        data["benchmarks"][0]["iterations"],
+    )
diff --git a/fleetbench/parallel/run_test.py b/fleetbench/parallel/run_test.py
@@ -33,7 +33,11 @@ class RunTest(absltest.TestCase):
   def testRun(self, mock_run):
     output_file = self.create_tempfile()
 
-    data = {"benchmarks": [{"cpu_time": 12.345, "real_time": 12.3}]}
+    data = {
+        "benchmarks": [
+            {"cpu_time": 12.345, "real_time": 12.3, "iterations": 10}
+        ]
+    }
     json_object = json.dumps(data, indent=4)
     with open(output_file.full_path, "w") as f:
       f.write(json_object)
@@ -56,6 +60,7 @@ def testRun(self, mock_run):
     self.assertGreater(result.duration, 0)
     self.assertEqual(result.bm_cpu_time, 12.345)
     self.assertEqual(result.bm_wall_time, 12.3)
+    self.assertEqual(result.iteration, 10)
     self.assertEqual(result.rc, 0)
 
   @flagsaver.flagsaver(

Original file line number	Diff line number	Diff line change
`@@ -91,6 +91,8 @@ class BenchmarkMetrics:`
`91`	`91`	`per_iteration_wall_time: float`
`92`	`92`	`# per benchmark iteration cpu time`
`93`	`93`	`per_iteration_cpu_time: float`
	`94`	`+ # per benchmark iteration`
	`95`	`+ per_bm_run_iteration: int`
`94`	`96`
`95`	`97`
`96`	`98`	`class ParallelBench:`
`@@ -179,6 +181,7 @@ def _PreRun(`
`179`	`181`	`total_duration=1.0,`
`180`	`182`	`per_iteration_wall_time=0.0,`
`181`	`183`	`per_iteration_cpu_time=0.0,`
	`184`	`+ per_bm_run_iteration=0,`
`182`	`185`	`)`
`183`	`186`	`]`
`184`	`187`	`for benchmark in self.benchmarks.keys()`
`@@ -306,6 +309,7 @@ def _RunSchedulingLoop(self) -> None:`
`306`	`309`	`total_duration=r.duration,`
`307`	`310`	`per_iteration_wall_time=r.bm_wall_time,`
`308`	`311`	`per_iteration_cpu_time=r.bm_cpu_time,`
	`312`	`+ per_bm_run_iteration=r.iteration,`
`309`	`313`	`)`
`310`	`314`	`)`
`311`	`315`
`@@ -373,6 +377,7 @@ def ConvertToDataFrame(self) -> pd.DataFrame:`
`373`	`377`	`"Benchmark": benchmark,`
`374`	`378`	`"WallTimes": t.per_iteration_wall_time,`
`375`	`379`	`"CPUTimes": t.per_iteration_cpu_time,`
	`380`	`+ "Iterations": t.per_bm_run_iteration,`
`376`	`381`	`}`
`377`	`382`	`data.append(entry)`
`378`	`383`	`runtimes = pd.DataFrame(data)`
`@@ -443,6 +448,7 @@ def Run(`
`443`	`448`	`total_duration=r.duration,`
`444`	`449`	`per_iteration_wall_time=r.bm_wall_time,`
`445`	`450`	`per_iteration_cpu_time=r.bm_cpu_time,`
	`451`	`+ per_bm_run_iteration=r.iteration,`
`446`	`452`	`)`
`447`	`453`	`)`
`448`	`454`
Original file line number	Diff line number	Diff line change
`@@ -103,9 +103,13 @@ def GenerateBenchmarkReport(`
`103`	`103`	`Count=("WallTimes", "count"),`
`104`	`104`	`Mean_Wall_Time=("WallTimes", "mean"),`
`105`	`105`	`Mean_CPU_Time=("CPUTimes", "mean"),`
	`106`	`+ Mean_Iterations=("Iterations", "mean"),`
`106`	`107`	`)`
`107`	`108`	`.round(3)`
`108`	`109`	`)`
	`110`	`+ grouped_results["Mean_Iterations"] = grouped_results[`
	`111`	`+ "Mean_Iterations"`
	`112`	`+ ].astype(int)`
`109`	`113`
`110`	`114`	`# Combine perf_counter_df and benchmark run results on the same`
`111`	`115`	`# "benchmark" entry.`
`@@ -134,6 +138,7 @@ def SaveBenchmarkResults(output_dir, df: pd.DataFrame) -> None:`
`134`	`138`	`columns={`
`135`	`139`	`"Mean_Wall_Time": "real_time",`
`136`	`140`	`"Mean_CPU_Time": "cpu_time",`
	`141`	`+ "Mean_Iterations": "iterations",`
`137`	`142`	`}`
`138`	`143`	`)`
`139`	`144`	`data = df.reset_index().to_dict(orient="records")`