Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 28 additions & 6 deletions superbench/benchmarks/micro_benchmarks/hipblaslt_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ def _process_raw_result(self, cmd_idx, raw_output):
lines = raw_output.splitlines()
index = None

# Find the line containing 'hipblaslt-Gflops'
# Find the header line containing 'hipblaslt-Gflops'
for i, line in enumerate(lines):
if 'hipblaslt-Gflops' in line:
index = i
Expand All @@ -119,16 +119,38 @@ def _process_raw_result(self, cmd_idx, raw_output):
if index is None:
raise ValueError('Line with "hipblaslt-Gflops" not found in the log.')

# Split the line into fields using a comma as the delimiter
# Parse the header to find the column index of 'hipblaslt-Gflops'.
# This is needed because hipBLASLt output format varies across versions:
# - v600 (old): 23 columns, Gflops at index -2
# - v1500 (new): 34 columns, added a_type/b_type/c_type/scaleA-D/amaxD/
# bias_type/aux_type/GB_s columns, Gflops at index -3
# Using header-based lookup ensures compatibility with both formats
# and any future column additions.
Comment on lines +123 to +128
Copy link

Copilot AI Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the implementation no longer relies on positional indices, the comment stating exact positions (e.g., 'Gflops at index -2/-3') can become misleading as formats evolve. Consider removing the index claims and focusing the comment on the key guarantee (header-driven lookup + header/data width validation). Also, the comment mentions GB_s but the sample header uses hipblaslt-GB/s.

Suggested change
# This is needed because hipBLASLt output format varies across versions:
# - v600 (old): 23 columns, Gflops at index -2
# - v1500 (new): 34 columns, added a_type/b_type/c_type/scaleA-D/amaxD/
# bias_type/aux_type/GB_s columns, Gflops at index -3
# Using header-based lookup ensures compatibility with both formats
# and any future column additions.
# This is needed because the hipBLASLt output format varies across versions:
# - v600 (old): 23 columns.
# - v1500 (new): 34 columns, adding a_type/b_type/c_type/scaleA-D/amaxD/
# bias_type/aux_type/hipblaslt-GB/s columns.
# Using header-based lookup (plus header/data width validation) ensures
# compatibility across existing formats and resilience to future changes.

Copilot uses AI. Check for mistakes.
header_fields = lines[index].strip().split(',')
# Strip leading markers like '[0]' or '[0]:' from the first header field
header_fields[0] = header_fields[0].split(']')[-1].lstrip(':')
gflops_col = None
for col_idx, col_name in enumerate(header_fields):
if 'hipblaslt-Gflops' in col_name:
Copy link

Copilot AI Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The column lookup uses substring matching (in), which can accidentally match unexpected column names (e.g., a future column like hipblaslt-Gflops-peak). This should be an exact match after normalizing whitespace (e.g., col_name.strip() == 'hipblaslt-Gflops') to ensure the parser reliably targets the intended metric.

Suggested change
if 'hipblaslt-Gflops' in col_name:
if col_name.strip() == 'hipblaslt-Gflops':

Copilot uses AI. Check for mistakes.
gflops_col = col_idx
break
if gflops_col is None:
raise ValueError('Column "hipblaslt-Gflops" not found in header.')

# Split the data line into fields using a comma as the delimiter
fields = lines[index + 1].strip().split(',')

# Check the number of fields and the format of the first two fields
if len(fields) != 23:
raise ValueError('Invalid result')
# Validate that the data line has the same number of columns as the header
if len(fields) != len(header_fields):
raise ValueError(
f'Field count mismatch: header has {len(header_fields)} columns '
f'but data has {len(fields)} columns'
)

# batch_count (index 3) and m,n,k (indices 4-6) are stable across all known formats
self._result.add_result(
f'{self._precision_in_commands[cmd_idx]}_{fields[3]}_{"_".join(fields[4:7])}_flops',
float(fields[-2]) / 1000
float(fields[gflops_col]) / 1000
)
except BaseException as e:
self._result.set_return_code(ReturnCode.MICROBENCHMARK_RESULT_PARSING_FAILURE)
Expand Down
31 changes: 30 additions & 1 deletion tests/benchmarks/micro_benchmarks/test_hipblaslt_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def test_hipblaslt_gemm_result_parsing(self):
benchmark._args = SimpleNamespace(shapes=['896,896,896'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# Old format (hipBLASLt v600, 23 columns)
example_raw_output = """
hipBLASLt version: 600
hipBLASLt git version: 52776da
Expand All @@ -101,7 +102,7 @@ def test_hipblaslt_gemm_result_parsing(self):
[0]transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,d_type,compute_type,activation_type,bias_vector,hipblaslt-Gflops,us
N,N,0,1,896,896,896,1,896,802816,0,896,802816,896,802816,896,802816,fp16_r,f32_r,none,0, 58624.5, 24.54
"""
# Positive case - valid raw output
# Positive case - valid raw output (old format)
self.assertTrue(benchmark._process_raw_result(0, example_raw_output))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

Expand All @@ -110,3 +111,31 @@ def test_hipblaslt_gemm_result_parsing(self):

# Negative case - invalid raw output
self.assertFalse(benchmark._process_raw_result(1, 'HipBLAS API failed'))

def test_hipblaslt_gemm_result_parsing_new_format(self):
"""Test hipblaslt-bench benchmark result parsing with new 34-column format (hipBLASLt v1500+)."""
benchmark = self.get_benchmark()
self.assertTrue(benchmark._preprocess())
benchmark._args = SimpleNamespace(shapes=['4096,4096,4096'], in_types=['fp16'], log_raw_data=False)
benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1)

# New format (hipBLASLt v1500, 34 columns) - includes a_type, b_type, c_type, d_type,
# scaleA, scaleB, scaleC, scaleD, amaxD, bias_type, aux_type, and hipblaslt-GB/s columns
example_raw_output_new = """
hipBLASLt version: 1500
hipBLASLt git version: 8c69191d
Query device success: there are 1 devices. (Target device ID is 0)
Device ID 0 : gfx942:sramecc+:xnack-
with 205.6 GB memory, max. SCLK 2100 MHz, max. MCLK 1300 MHz, compute capability 9.4
maxGridDimX 2147483647, sharedMemPerBlock 65.5 KB, maxThreadsPerBlock 1024, warpSize 64

Is supported 1 / Total solutions: 1
[0]:transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,a_type,b_type,c_type,d_type,compute_type,scaleA,scaleB,scaleC,scaleD,amaxD,activation_type,bias_vector,bias_type,aux_type,hipblaslt-Gflops,hipblaslt-GB/s,us
N,N,0,1,4096,4096,4096,1,4096,16777216,0,4096,16777216,4096,16777216,4096,16777216,f16_r,f16_r,f16_r,f16_r,f32_r,0,0,0,0,0,none,0,f16_r,f16_r,678209,462.62,202.65
"""
# Positive case - valid raw output (new format)
self.assertTrue(benchmark._process_raw_result(0, example_raw_output_new))
self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code)

self.assertEqual(2, len(benchmark.result))
self.assertEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0])
Comment on lines +140 to +141
Copy link

Copilot AI Mar 20, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

self.assertEqual(2, len(benchmark.result)) is a brittle assertion because it couples the test to the total number of emitted metrics rather than the behavior under test (correct Gflops extraction). Prefer asserting that the expected key exists (and optionally that no parsing error occurred) without pinning the total metric count. Also, comparing floats with assertEqual can be flaky due to floating-point representation; use assertAlmostEqual(..., places=...) for the numeric check.

Suggested change
self.assertEqual(2, len(benchmark.result))
self.assertEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0])
self.assertIn('fp16_1_4096_4096_4096_flops', benchmark.result)
self.assertAlmostEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0], places=3)

Copilot uses AI. Check for mistakes.
Loading