Skip to content

Commit bb8c49d

Browse files
resolved the reviewers comments
1 parent 8f09c93 commit bb8c49d

1 file changed

Lines changed: 4 additions & 13 deletions

File tree

src/lightning/fabric/utilities/throughput.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -88,13 +88,10 @@ class Throughput:
8888
world_size: Number of devices available across hosts. Global metrics are not included if the world size is 1.
8989
window_size: Number of batches to use for a rolling average.
9090
separator: Key separator to use when creating per-device and global metrics.
91-
using_sparse_model: User intent for whether the model uses structured sparsity.
92-
``True`` scales ``available_flops`` by ``sparse_cuda_acceleration_factor`` to reflect the higher
93-
theoretical peak achievable with structured sparsity (e.g. NVIDIA's 2:4 sparse Tensor Cores on
94-
Ampere and later). ``False`` keeps the dense peak. ``None`` (the default) keeps the dense peak
95-
and emits a one-time warning so users explicitly choose; set this flag to silence the warning.
96-
sparse_cuda_acceleration_factor: Multiplier applied to ``available_flops`` when ``using_sparse_model``
97-
is ``True``. Defaults to ``2.0``, matching the 2x speedup of NVIDIA 2:4 structured sparsity.
91+
using_sparse_model: Whether the model uses structured sparsity. If ``True``, scales ``available_flops`` by
92+
``sparse_cuda_acceleration_factor``. ``None`` (default) assumes dense and warns once.
93+
sparse_cuda_acceleration_factor: Multiplier applied to ``available_flops`` when ``using_sparse_model`` is
94+
``True``. Defaults to ``2.0`` for NVIDIA 2:4 structured sparsity.
9895
9996
"""
10097

@@ -107,14 +104,8 @@ def __init__(
107104
using_sparse_model: Optional[bool] = None,
108105
sparse_cuda_acceleration_factor: float = 2.0,
109106
) -> None:
110-
# For sparse models, hardware can achieve a higher theoretical peak (e.g. NVIDIA's 2:4 structured
111-
# sparsity doubles tensor-core throughput). Scaling the peak here keeps MFU = measured / peak
112-
# consistent with the sparse ceiling, so users get a meaningful utilization figure.
113-
# If `available_flops is None` the peak is unknown, so MFU is skipped in `compute()` regardless
114-
# (see the `if self.available_flops:` guard), and we leave it as None rather than erroring.
115107
assert sparse_cuda_acceleration_factor >= 1.0, "sparse acceleration factor cannot reduce peak FLOPs"
116108
if using_sparse_model is None and available_flops is not None:
117-
# the user didn't tell us their intent; default to dense and warn so MFU isn't silently ambiguous
118109
rank_zero_warn(
119110
"MFU assumes dense model FLOPs (no sparsity acceleration)."
120111
" Set 'using_sparse_model=True' for mfu to use sparse flops."

0 commit comments

Comments
 (0)