google
diff --git a/‎fleetbench/parallel/BUILD‎
Lines changed: 21 additions & 1 deletion b/‎fleetbench/parallel/BUILD‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎fleetbench/parallel/parallel_bench_lib.py‎
Lines changed: 3 additions & 72 deletions b/‎fleetbench/parallel/parallel_bench_lib.py‎
Lines changed: 3 additions & 72 deletions
diff --git a/‎fleetbench/parallel/parallel_bench_lib_test.py‎
Lines changed: 28 additions & 50 deletions b/‎fleetbench/parallel/parallel_bench_lib_test.py‎
Lines changed: 28 additions & 50 deletions
@@ -56,7 +56,7 @@ py_library(
     deps = [
         ":benchmark",
         ":cpu",
-        ":result",
+        ":reporter",
         ":run",
         ":worker",
         "@com_google_absl_py//absl/logging",
@@ -65,6 +65,15 @@ py_library(
     ],
 )
 
+py_library(
+    name = "reporter",
+    srcs = ["reporter.py"],
+    deps = [
+        "@com_google_absl_py//absl/logging",
+        requirement("pandas"),
+    ],
+)
+
 py_binary(
     name = "parallel_bench",
     testonly = True,
@@ -86,6 +95,7 @@ py_test(
         ":benchmark",
         ":cpu",
         ":parallel_bench_lib",
+        ":reporter",
         ":result",
         ":run",
         "@com_google_absl_py//absl/testing:absltest",
@@ -134,3 +144,13 @@ py_test(
         "@com_google_absl_py//absl/testing:flagsaver",
     ],
 )
+
+py_test(
+    name = "reporter_test",
+    srcs = ["reporter_test.py"],
+    deps = [
+        ":reporter",
+        "@com_google_absl_py//absl/testing:absltest",
+        requirement("pandas"),
+    ],
+)
@@ -26,6 +26,7 @@
 
 from fleetbench.parallel import benchmark as bm
 from fleetbench.parallel import cpu
+from fleetbench.parallel import reporter
 from fleetbench.parallel import run
 from fleetbench.parallel import worker
 
@@ -308,76 +309,6 @@ def _RunSchedulingLoop(self) -> None:
               )
           )
 
-  def GenerateBenchmarkReport(
-      self,
-      df: pd.DataFrame,
-      perf_counter_df: pd.DataFrame | None,
-  ) -> pd.DataFrame:
-    """Generates a DataFrame of aggregated benchmark results.
-
-    Args:
-      df: A DataFrame of benchmark results.
-      perf_counter_df: A DataFrame of performance counter results.
-
-    Returns:
-      A DataFrame of aggregated benchmark results.
-    """
-
-    # Remove "fleetbench (" prefix and ")" suffix
-    df["Benchmark"] = (
-        df["Benchmark"]
-        .astype(str)
-        .str.replace(r"fleetbench \((.*)\)", r"\1", regex=True)
-    )
-
-    grouped_results = (
-        df.groupby("Benchmark")
-        .agg(
-            Count=("WallTimes", "count"),
-            Mean_Wall_Time=("WallTimes", "mean"),
-            Mean_CPU_Time=("CPUTimes", "mean"),
-        )
-        .round(3)
-    )
-
-    # Combine perf_counter_df and benchmark run results on the same
-    # "benchmark" entry.
-    if perf_counter_df is not None:
-      grouped_results = pd.merge(
-          grouped_results, perf_counter_df, on="Benchmark", how="left"
-      )
-
-    print(grouped_results.to_string())
-    return grouped_results
-
-  def SaveBenchmarkResults(self, df: pd.DataFrame) -> None:
-    """Saves benchmark results to a JSON file for predictiveness analysis."""
-
-    file_name = os.path.join(self.temp_root, "results.json")
-
-    # Convert DataFrame to a list of dictionaries (one for each row)
-    # Rename the column "Benchmark" to "Name"
-    # TODO: This only works for open source benchmark version.
-
-    # We use "Benchmark" column as the index, and rename it to "name"
-    df.index.name = "name"
-    df = df.rename(
-        columns={
-            "Mean_Wall_Time": "real_time",
-            "Mean_CPU_Time": "cpu_time",
-        }
-    )
-    data = df.reset_index().to_dict(orient="records")
-
-    try:
-      with open(file_name, "w") as json_file:
-        json.dump(
-            data, json_file, indent=4
-        )  # Serialize and write with indentation
-        logging.info("Summary results successfully written to %s", file_name)
-    except (IOError, json.JSONDecodeError) as e:
-      print(f"Error writing JSON data: {e}")
-
   def GeneratePerfCounterDataFrame(
       self, benchmark_perf_counters: str
   ) -> pd.DataFrame | None:
@@ -461,8 +392,8 @@ def PostProcessBenchmarkResults(self, benchmark_perf_counters: str) -> None:
     df = self.ConvertToDataFrame()
 
     perf_counter_df = self.GeneratePerfCounterDataFrame(benchmark_perf_counters)
-    df = self.GenerateBenchmarkReport(df, perf_counter_df)
-    self.SaveBenchmarkResults(df)
+    df = reporter.GenerateBenchmarkReport(df, perf_counter_df)
+    reporter.SaveBenchmarkResults(self.temp_root, df)
 
   def Run(
       self,
 
@@ -23,6 +23,7 @@
 from fleetbench.parallel import benchmark as bm
 from fleetbench.parallel import cpu
 from fleetbench.parallel import parallel_bench_lib
+from fleetbench.parallel import reporter
 from fleetbench.parallel import result
 from fleetbench.parallel import run
 
@@ -48,10 +49,17 @@ def tearDown(self):
   @mock.patch.object(bm, "GetSubBenchmarks", autospec=True)
   @mock.patch.object(run.Run, "Execute", autospec=True)
   @mock.patch.object(cpu, "Utilization", autospec=True)
+  @mock.patch.object(reporter, "SaveBenchmarkResults", autospec=True)
   @flagsaver.flagsaver(
       benchmark_dir=absltest.get_default_test_tmpdir(),
   )
-  def testRun(self, mock_utilization, mock_execute, mock_get_subbenchmarks):
+  def testRun(
+      self,
+      mock_save_benchmark_results,
+      mock_utilization,
+      mock_execute,
+      mock_get_subbenchmarks,
+  ):
     mock_get_subbenchmarks.return_value = ["BM_Test1", "BM_Test2"]
     mock_execute.return_value = result.Result(
         benchmark="fake_bench (BM_Test1)",
@@ -72,6 +80,9 @@ def fake_utilization(unused_cpus):
       return fake_utilizations[min(mock_utilization.call_count - 1, 1)]
 
     mock_utilization.side_effect = fake_utilization
+
+    mock_save_benchmark_results.return_value = None
+
     self.pb = parallel_bench_lib.ParallelBench(
         cpus=[0, 1],
         cpu_affinity=False,
@@ -193,57 +204,24 @@ def test_generate_perf_counter_dataframe(self):
     ]).set_index("Benchmark")
     pd.testing.assert_frame_equal(df, expected_df)
 
-  def test_generate_benchmark_report(self):
-    benchmark_df = pd.DataFrame([
-        {"Benchmark": "BM_Test1", "WallTimes": 3, "CPUTimes": 3},
-        {"Benchmark": "BM_Test2", "WallTimes": 4, "CPUTimes": 5},
-        {"Benchmark": "BM_Test1", "WallTimes": 6, "CPUTimes": 7},
-    ])
-    perf_counter_df = pd.DataFrame([
-        {"Benchmark": "BM_Test1", "instructions": 130.0, "cycles": 3.0},
-        {"Benchmark": "BM_Test2", "instructions": 200.0, "cycles": 2.0},
-    ])
-    combined_df = self.pb.GenerateBenchmarkReport(benchmark_df, perf_counter_df)
-    self.assertIsInstance(combined_df, pd.DataFrame)
-    self.assertLen(combined_df, 2)  # Two benchmarks: BM_Test1 and BM_Test2
-    expected_df = pd.DataFrame([
-        {
-            "Benchmark": "BM_Test1",
-            "Count": 2,
-            "Mean_Wall_Time": 4.5,
-            "Mean_CPU_Time": 5.0,
-            "instructions": 130.0,
-            "cycles": 3.0,
-        },
-        {
-            "Benchmark": "BM_Test2",
-            "Count": 1,
-            "Mean_Wall_Time": 4,
-            "Mean_CPU_Time": 5.0,
-            "instructions": 200.0,
-            "cycles": 2.0,
-        },
+  @mock.patch.object(reporter, "SaveBenchmarkResults", autospec=True)
+  @mock.patch.object(reporter, "GenerateBenchmarkReport", autospec=True)
+  @mock.patch.object(
+      parallel_bench_lib.ParallelBench, "ConvertToDataFrame", autospec=True
+  )
+  def test_post_processing_benchmark_results(
+      self,
+      mock_convert_to_dataframe,
+      mock_generate_benchmark_report,
+      mock_save_benchmark_results,
+  ):
+    mock_convert_to_dataframe.return_value = pd.DataFrame([
+        {"Benchmark": "test_benchmark1", "WallTimes": 10, "CPUTimes": 10},
     ])
-    pd.testing.assert_frame_equal(combined_df, expected_df)
 
-  def test_save_benchmark_results(self):
-    df = pd.DataFrame([
-        {"Benchmark": "BM_Test1", "Mean_Wall_Time": 1, "Mean_CPU_Time": 1},
-        {"Benchmark": "BM_Test2", "Mean_Wall_Time": 1, "Mean_CPU_Time": 2},
-    ]).set_index("Benchmark")
-
-    self.pb.SaveBenchmarkResults(df)
-    file_name = os.path.join(absltest.get_default_test_tmpdir(), "results.json")
-    self.assertTrue(os.path.exists(file_name))
-    with open(file_name, "r") as json_file:
-      data = json.load(json_file)
-    self.assertEqual(
-        data,
-        [
-            {"name": "BM_Test1", "real_time": 1, "cpu_time": 1},
-            {"name": "BM_Test2", "real_time": 1, "cpu_time": 2},
-        ],
-    )
+    self.pb.PostProcessBenchmarkResults("instructions,cycles")
+    mock_generate_benchmark_report.assert_called_once()
+    mock_save_benchmark_results.assert_called_once()
 
 
 class ParseBenchmarkWeightsTest(absltest.TestCase):