Use math.fsum for duration aggregation in SimpleProfiler (#21525)

KAVYANSHTYAGI · bhimrazy · web-flow · commit 215d7d9737fc · 2026-02-03T17:50:29.000+05:45
* Update test_mlflow.py

* test_mlflow.py

* Optimize SimpleProfiler duration aggregation

* re run ci

* Add tests for SimpleProfiler extended report functionality

---------

Co-authored-by: bhimrazy &lt;bhimrajyadav977@gmail.com&gt;
diff --git a/src/lightning/pytorch/profilers/simple.py b/src/lightning/pytorch/profilers/simple.py
@@ -14,13 +14,13 @@
 """Profiler to check if there are any bottlenecks in your code."""
 
 import logging
+import math
 import os
 import time
 from collections import defaultdict
 from pathlib import Path
 from typing import Optional, Union
 
-import torch
 from typing_extensions import override
 
 from lightning.pytorch.profilers.profiler import Profiler
@@ -86,9 +86,8 @@ def _make_report_extended(self) -> tuple[_TABLE_DATA_EXTENDED, float, float]:
         report = []
 
         for a, d in self.recorded_durations.items():
-            d_tensor = torch.tensor(d)
             len_d = len(d)
-            sum_d = torch.sum(d_tensor).item()
+            sum_d = math.fsum(d)
             percentage_d = 100.0 * sum_d / total_duration
 
             report.append((a, sum_d / len_d, len_d, sum_d, percentage_d))
@@ -100,8 +99,7 @@ def _make_report_extended(self) -> tuple[_TABLE_DATA_EXTENDED, float, float]:
     def _make_report(self) -> _TABLE_DATA:
         report = []
         for action, d in self.recorded_durations.items():
-            d_tensor = torch.tensor(d)
-            sum_d = torch.sum(d_tensor).item()
+            sum_d = math.fsum(d)
 
             report.append((action, sum_d / len(d), sum_d))
 
diff --git a/tests/tests_pytorch/profilers/test_profiler.py b/tests/tests_pytorch/profilers/test_profiler.py
@@ -194,6 +194,29 @@ def test_simple_profiler_logs(tmp_path, caplog, simple_profiler):
     assert caplog.text.count("Profiler Report") == 2
 
 
+def test_simple_profiler_uses_math_fsum(monkeypatch):
+    profiler = SimpleProfiler()
+    profiler.recorded_durations["action"] = [1.0, 2.0, 3.0]
+    profiler.start_time = 0.0
+
+    fsum_calls: list[list[float]] = []
+
+    def _fake_fsum(values):
+        fsum_calls.append(list(values))
+        return sum(values)
+
+    monkeypatch.setattr("lightning.pytorch.profilers.simple.math.fsum", _fake_fsum)
+
+    # Test non-extended report
+    profiler._make_report()
+    assert fsum_calls == [[1.0, 2.0, 3.0]]
+
+    # Test extended report
+    fsum_calls.clear()
+    profiler._make_report_extended()
+    assert fsum_calls == [[1.0, 2.0, 3.0]]
+
+
 @pytest.mark.parametrize("extended", [True, False])
 @patch("time.perf_counter", return_value=70)
 def test_simple_profiler_summary(tmp_path, extended):