-
Notifications
You must be signed in to change notification settings - Fork 86
Enhancement - Fix hipblaslt-gemm result parsing for hipBLASLt v1500+ output format #791
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
|
|
@@ -110,7 +110,7 @@ def _process_raw_result(self, cmd_idx, raw_output): | |||||
| lines = raw_output.splitlines() | ||||||
| index = None | ||||||
|
|
||||||
| # Find the line containing 'hipblaslt-Gflops' | ||||||
| # Find the header line containing 'hipblaslt-Gflops' | ||||||
| for i, line in enumerate(lines): | ||||||
| if 'hipblaslt-Gflops' in line: | ||||||
| index = i | ||||||
|
|
@@ -119,16 +119,38 @@ def _process_raw_result(self, cmd_idx, raw_output): | |||||
| if index is None: | ||||||
| raise ValueError('Line with "hipblaslt-Gflops" not found in the log.') | ||||||
|
|
||||||
| # Split the line into fields using a comma as the delimiter | ||||||
| # Parse the header to find the column index of 'hipblaslt-Gflops'. | ||||||
| # This is needed because hipBLASLt output format varies across versions: | ||||||
| # - v600 (old): 23 columns, Gflops at index -2 | ||||||
| # - v1500 (new): 34 columns, added a_type/b_type/c_type/scaleA-D/amaxD/ | ||||||
| # bias_type/aux_type/GB_s columns, Gflops at index -3 | ||||||
| # Using header-based lookup ensures compatibility with both formats | ||||||
| # and any future column additions. | ||||||
| header_fields = lines[index].strip().split(',') | ||||||
| # Strip leading markers like '[0]' or '[0]:' from the first header field | ||||||
| header_fields[0] = header_fields[0].split(']')[-1].lstrip(':') | ||||||
| gflops_col = None | ||||||
| for col_idx, col_name in enumerate(header_fields): | ||||||
| if 'hipblaslt-Gflops' in col_name: | ||||||
|
||||||
| if 'hipblaslt-Gflops' in col_name: | |
| if col_name.strip() == 'hipblaslt-Gflops': |
| Original file line number | Diff line number | Diff line change | ||||||||
|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -87,6 +87,7 @@ def test_hipblaslt_gemm_result_parsing(self): | |||||||||
| benchmark._args = SimpleNamespace(shapes=['896,896,896'], in_types=['fp16'], log_raw_data=False) | ||||||||||
| benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1) | ||||||||||
|
|
||||||||||
| # Old format (hipBLASLt v600, 23 columns) | ||||||||||
| example_raw_output = """ | ||||||||||
| hipBLASLt version: 600 | ||||||||||
| hipBLASLt git version: 52776da | ||||||||||
|
|
@@ -101,7 +102,7 @@ def test_hipblaslt_gemm_result_parsing(self): | |||||||||
| [0]transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,d_type,compute_type,activation_type,bias_vector,hipblaslt-Gflops,us | ||||||||||
| N,N,0,1,896,896,896,1,896,802816,0,896,802816,896,802816,896,802816,fp16_r,f32_r,none,0, 58624.5, 24.54 | ||||||||||
| """ | ||||||||||
| # Positive case - valid raw output | ||||||||||
| # Positive case - valid raw output (old format) | ||||||||||
| self.assertTrue(benchmark._process_raw_result(0, example_raw_output)) | ||||||||||
| self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code) | ||||||||||
|
|
||||||||||
|
|
@@ -110,3 +111,31 @@ def test_hipblaslt_gemm_result_parsing(self): | |||||||||
|
|
||||||||||
| # Negative case - invalid raw output | ||||||||||
| self.assertFalse(benchmark._process_raw_result(1, 'HipBLAS API failed')) | ||||||||||
|
|
||||||||||
| def test_hipblaslt_gemm_result_parsing_new_format(self): | ||||||||||
| """Test hipblaslt-bench benchmark result parsing with new 34-column format (hipBLASLt v1500+).""" | ||||||||||
| benchmark = self.get_benchmark() | ||||||||||
| self.assertTrue(benchmark._preprocess()) | ||||||||||
| benchmark._args = SimpleNamespace(shapes=['4096,4096,4096'], in_types=['fp16'], log_raw_data=False) | ||||||||||
| benchmark._result = BenchmarkResult(self.benchmark_name, BenchmarkType.MICRO, ReturnCode.SUCCESS, run_count=1) | ||||||||||
|
|
||||||||||
| # New format (hipBLASLt v1500, 34 columns) - includes a_type, b_type, c_type, d_type, | ||||||||||
| # scaleA, scaleB, scaleC, scaleD, amaxD, bias_type, aux_type, and hipblaslt-GB/s columns | ||||||||||
| example_raw_output_new = """ | ||||||||||
| hipBLASLt version: 1500 | ||||||||||
| hipBLASLt git version: 8c69191d | ||||||||||
| Query device success: there are 1 devices. (Target device ID is 0) | ||||||||||
| Device ID 0 : gfx942:sramecc+:xnack- | ||||||||||
| with 205.6 GB memory, max. SCLK 2100 MHz, max. MCLK 1300 MHz, compute capability 9.4 | ||||||||||
| maxGridDimX 2147483647, sharedMemPerBlock 65.5 KB, maxThreadsPerBlock 1024, warpSize 64 | ||||||||||
|
|
||||||||||
| Is supported 1 / Total solutions: 1 | ||||||||||
| [0]:transA,transB,grouped_gemm,batch_count,m,n,k,alpha,lda,stride_a,beta,ldb,stride_b,ldc,stride_c,ldd,stride_d,a_type,b_type,c_type,d_type,compute_type,scaleA,scaleB,scaleC,scaleD,amaxD,activation_type,bias_vector,bias_type,aux_type,hipblaslt-Gflops,hipblaslt-GB/s,us | ||||||||||
| N,N,0,1,4096,4096,4096,1,4096,16777216,0,4096,16777216,4096,16777216,4096,16777216,f16_r,f16_r,f16_r,f16_r,f32_r,0,0,0,0,0,none,0,f16_r,f16_r,678209,462.62,202.65 | ||||||||||
| """ | ||||||||||
| # Positive case - valid raw output (new format) | ||||||||||
| self.assertTrue(benchmark._process_raw_result(0, example_raw_output_new)) | ||||||||||
| self.assertEqual(ReturnCode.SUCCESS, benchmark.return_code) | ||||||||||
|
|
||||||||||
| self.assertEqual(2, len(benchmark.result)) | ||||||||||
| self.assertEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0]) | ||||||||||
|
Comment on lines
+140
to
+141
|
||||||||||
| self.assertEqual(2, len(benchmark.result)) | |
| self.assertEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0]) | |
| self.assertIn('fp16_1_4096_4096_4096_flops', benchmark.result) | |
| self.assertAlmostEqual(678.209, benchmark.result['fp16_1_4096_4096_4096_flops'][0], places=3) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Since the implementation no longer relies on positional indices, the comment stating exact positions (e.g., 'Gflops at index -2/-3') can become misleading as formats evolve. Consider removing the index claims and focusing the comment on the key guarantee (header-driven lookup + header/data width validation). Also, the comment mentions
GB_sbut the sample header useshipblaslt-GB/s.