fix(tutorial): correct inverted error bars in grouped_gemm benchmark

niyunsheng · niyunsheng · commit 4bc754234776 · 2026-02-11T21:56:16.000+08:00
The grouped_gemm tutorial plots absolute latency `runtime(ms)` on the y-axis, but incorrectly returned `(ms, max_ms, min_ms)` from the benchmark function. This inverted the lower and upper bounds of the error bars in the generated plot. This commit fixes the return order to `(ms, min_ms, max_ms)` to ensure accurate variance visualization.
diff --git a/python/tutorials/08-grouped-gemm.py b/python/tutorials/08-grouped-gemm.py
@@ -487,7 +487,7 @@ def benchmark_square_matrices(N, provider):
         ms, min_ms, max_ms = triton.testing.do_bench(
             lambda: triton_tma_perf_fn(d_a_ptrs, d_b_t_ptrs, d_c_ptrs, d_g_sizes, d_g_lds, group_size, dtype=torch.
                                        float16), quantiles=quantiles)
-    return ms, max_ms, min_ms
+    return ms, min_ms, max_ms
 
 
 @triton.testing.perf_report(
@@ -558,7 +558,7 @@ def benchmark_batches(M, provider):
         ms, min_ms, max_ms = triton.testing.do_bench(
             lambda: triton_tma_perf_fn(d_a_ptrs, d_b_t_ptrs, d_c_ptrs, d_g_sizes, d_g_t_lds, group_size, dtype=torch.
                                        float16), quantiles=quantiles)
-    return ms, max_ms, min_ms
+    return ms, min_ms, max_ms
 
 
 benchmark_square_matrices.run(show_plots=True, print_data=True)