Skip to content

Commit 51d0748

Browse files
authored
Testing: Defer leading dimension calculation when not specified in YAML (#467)
Benchmarking: Fix run to run variation caused by memory bloating in case lds not mentioned The YAML parser's computeMaxMinLegalLD() pre-computed a single worst-case leading dimension across all cartesian-product parameter combinations. This over-allocated LD will cause memory bloating. Replace the parse-time LD computation with a ld=-1 sentinel that defers resolution to the point of use: - Remove computeMaxMinLegalLD() from yaml_parser.cc,emit ld=-1 when LDA/LDB/LDC are absent in YAML - Update check_valid_params() in test_gemm.cc, test_batch_gemm.cc to resolve ld=-1 into the correct per-config minimum LD before validating constraints - Add checkValidGemmParams() in bench_gemm.cc to validate configs before fixture allocation, skipping invalid cartesian combinations early and avoiding unnecessary Matrix construction - Fixed testsuite segfaults being caused when negative ld's are passed - Refactored unit tests to use new sentinel leading dimension = -1 The Matrix constructor handles leadingDim==-1 by computing the correct value from rows/cols, so the sentinel flows cleanly through both test and benchmark paths. AMD-Internal: [CPUPL-8118] [CPUPL-8146] Signed-off-by: Vishal A <Vishal.Akula@amd.com>
1 parent 32ffabb commit 51d0748

10 files changed

Lines changed: 420 additions & 394 deletions

File tree

bench/bench_gemm.cc

Lines changed: 109 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@ class OptimizedGemmBenchmark : public ConcreteUAL
119119

120120
const size_t alignment = 4096;
121121

122+
// Create matrices
122123
A_ = Matrix(a_rows, a_cols, a_type_, layout_, config.lda, transA_,
123124
false, alignment);
124125
B_ = Matrix(b_rows, b_cols, b_type_, layout_, config.ldb, transB_,
@@ -271,6 +272,91 @@ class OptimizedGemmBenchmark : public ConcreteUAL
271272
// Typedef for DLP backend
272273
using OptimizedGemmBenchmarkDlp = OptimizedGemmBenchmark<UalDlp>;
273274

275+
// ============================================================================
276+
// BENCHMARK PARAMETER VALIDATION
277+
// ============================================================================
278+
279+
/**
280+
* @brief Validate GEMM parameters before benchmark registration
281+
*
282+
* Checks dimension, layout, type, and leading dimension constraints to skip
283+
* invalid configurations early, avoiding unnecessary fixture allocation.
284+
* Logic mirrors UalRef::checkValidGemmParams but operates directly on
285+
* GemmBenchConfig without requiring Matrix objects.
286+
*
287+
* @param config The benchmark configuration to validate
288+
* @return true if parameters are valid, false otherwise
289+
*/
290+
static bool
291+
checkValidGemmParams(const GemmBenchConfig& config)
292+
{
293+
md_t m = config.m;
294+
md_t n = config.n;
295+
md_t k = config.k;
296+
297+
// Validate dimensions
298+
if (m <= 0 || n <= 0 || k <= 0) {
299+
return false;
300+
}
301+
302+
bool row_stored = (config.storage_format == MatrixLayout::ROW_MAJOR);
303+
bool col_stored = (config.storage_format == MatrixLayout::COLUMN_MAJOR);
304+
305+
bool nota = !config.transA;
306+
bool notb = !config.transB;
307+
bool ta = config.transA;
308+
bool tb = config.transB;
309+
310+
// Physical matrix dimensions
311+
md_t a_rows = ta ? k : m;
312+
md_t a_cols = ta ? m : k;
313+
md_t b_rows = tb ? n : k;
314+
md_t b_cols = tb ? k : n;
315+
316+
// LD = -1 means not specified in YAML; compute from dimensions.
317+
// Row-major: LD = cols, Col-major: LD = rows.
318+
md_t lda = config.lda;
319+
md_t ldb = config.ldb;
320+
md_t ldc = config.ldc;
321+
322+
if (lda == -1) {
323+
lda = row_stored ? a_cols : a_rows;
324+
}
325+
if (ldb == -1) {
326+
ldb = row_stored ? b_cols : b_rows;
327+
}
328+
if (ldc == -1) {
329+
ldc = row_stored ? n : m;
330+
}
331+
332+
// Validate leading dimensions
333+
// Matrix A leading dimension checks
334+
if (row_stored && ((nota && (lda < k)) || (ta && (lda < m)))) {
335+
return false;
336+
}
337+
if (col_stored && ((nota && (lda < m)) || (ta && (lda < k)))) {
338+
return false;
339+
}
340+
341+
// Matrix B leading dimension checks
342+
if (row_stored && ((notb && (ldb < n)) || (tb && (ldb < k)))) {
343+
return false;
344+
}
345+
if (col_stored && ((notb && (ldb < k)) || (tb && (ldb < n)))) {
346+
return false;
347+
}
348+
349+
// Matrix C leading dimension checks
350+
if (row_stored && (ldc < n)) {
351+
return false;
352+
}
353+
if (col_stored && (ldc < m)) {
354+
return false;
355+
}
356+
357+
return true;
358+
}
359+
274360
// ============================================================================
275361
// OPTIMIZED BENCHMARK REGISTRATION
276362
// ============================================================================
@@ -295,7 +381,16 @@ registerOptimizedBenchmarks(const std::vector<GemmBenchConfig>& configs)
295381
numa_node = std::atoi(numa_env);
296382
}
297383

384+
std::cerr << "================================================"
385+
<< std::endl;
298386
for (const auto& config : configs) {
387+
388+
if (!checkValidGemmParams(config)) {
389+
std::cerr << "Skipping Invalid Configuration : " << config.name
390+
<< std::endl;
391+
continue;
392+
}
393+
299394
// Create fixture ONCE per benchmark
300395
auto fixture =
301396
std::make_unique<OptimizedGemmBenchmarkDlp>(config, numa_node);
@@ -311,6 +406,8 @@ registerOptimizedBenchmarks(const std::vector<GemmBenchConfig>& configs)
311406
->Unit(benchmark::kMillisecond)
312407
->MinTime(3.0);
313408
}
409+
std::cerr << "================================================"
410+
<< std::endl;
314411
}
315412

316413
// ============================================================================
@@ -336,10 +433,10 @@ main(int argc, char** argv)
336433
std::string yaml_file = parser.getYamlFile();
337434
if (yaml_file.empty()) {
338435
yaml_file = BENCH_CONFIG_DIR "/gemm_bench_f32_basic_config.yaml";
339-
std::cout << "Using default YAML configuration file: " << yaml_file
436+
std::cerr << "Using default YAML configuration file: " << yaml_file
340437
<< std::endl;
341438
} else {
342-
std::cout << "Using YAML configuration file: " << yaml_file
439+
std::cerr << "Using YAML configuration file: " << yaml_file
343440
<< std::endl;
344441
}
345442

@@ -361,27 +458,27 @@ main(int argc, char** argv)
361458
return 1;
362459
}
363460

364-
std::cout << "=== AOCL-DLP Benchmark ===" << std::endl;
365-
std::cout << "Configuration file: " << yaml_file << std::endl;
366-
std::cout << "Loaded " << configs.size() << " configurations" << std::endl;
461+
std::cerr << "=== AOCL-DLP Benchmark ===" << std::endl;
462+
std::cerr << "Configuration file: " << yaml_file << std::endl;
463+
std::cerr << "Loaded " << configs.size() << " configurations" << std::endl;
367464

368465
#ifdef DLP_ENABLE_OPENMP
369-
std::cout << "OpenMP: Enabled" << std::endl;
370-
std::cout << " OMP_NUM_THREADS = " << omp_get_max_threads() << std::endl;
466+
std::cerr << "OpenMP: Enabled" << std::endl;
467+
std::cerr << " OMP_NUM_THREADS = " << omp_get_max_threads() << std::endl;
371468
char* omp_proc_bind = std::getenv("OMP_PROC_BIND");
372469
if (omp_proc_bind) {
373-
std::cout << " OMP_PROC_BIND = " << omp_proc_bind << std::endl;
470+
std::cerr << " OMP_PROC_BIND = " << omp_proc_bind << std::endl;
374471
} else {
375-
std::cout << " OMP_PROC_BIND = (not set)" << std::endl;
472+
std::cerr << " OMP_PROC_BIND = (not set)" << std::endl;
376473
}
377474
char* omp_places = std::getenv("OMP_PLACES");
378475
if (omp_places) {
379-
std::cout << " OMP_PLACES = " << omp_places << std::endl;
476+
std::cerr << " OMP_PLACES = " << omp_places << std::endl;
380477
} else {
381-
std::cout << " OMP_PLACES = (not set)" << std::endl;
478+
std::cerr << " OMP_PLACES = (not set)" << std::endl;
382479
}
383480
#else
384-
std::cout << "OpenMP: Disabled" << std::endl;
481+
std::cerr << "OpenMP: Disabled" << std::endl;
385482
#endif
386483

387484
// Register all benchmarks

tests/classic/test_batch_gemm.cc

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ makeF32Group(md_t m,
145145
md_t b_rows = transB ? n : k;
146146
md_t b_cols = transB ? k : n;
147147

148-
Matrix A(a_rows, a_cols, MatrixType::f32, layoutA, 0, transA);
149-
Matrix B(b_rows, b_cols, MatrixType::f32, layoutB, 0, transB);
150-
Matrix C(m, n, MatrixType::f32, layoutC, 0, false);
148+
Matrix A(a_rows, a_cols, MatrixType::f32, layoutA, -1, transA);
149+
Matrix B(b_rows, b_cols, MatrixType::f32, layoutB, -1, transB);
150+
Matrix C(m, n, MatrixType::f32, layoutC, -1, false);
151151

152152
A.fillRandom(static_cast<uint32_t>(42 + seed_offset + i));
153153
B.fillRandom(static_cast<uint32_t>(142 + seed_offset + i));
@@ -588,6 +588,24 @@ check_valid_batch_params(const BatchGemmTestConfig& config)
588588
}
589589
}
590590

591+
// Physical matrix dimensions
592+
md_t a_rows = transA ? k : m;
593+
md_t a_cols = transA ? m : k;
594+
md_t b_rows = transB ? n : k;
595+
md_t b_cols = transB ? k : n;
596+
597+
// LD < 0 means not specified in YAML; compute from dimensions.
598+
// Row-major: LD = cols, Col-major: LD = rows.
599+
if (lda == -1) {
600+
lda = row_stored ? a_cols : a_rows;
601+
}
602+
if (ldb == -1) {
603+
ldb = row_stored ? b_cols : b_rows;
604+
}
605+
if (ldc == -1) {
606+
ldc = row_stored ? n : m;
607+
}
608+
591609
// Leading dimension checks for matrix A
592610
// Skip for reordered matrices as they have custom layouts
593611
if (!reorderA) {

tests/classic/test_comprehensive_postops.cc

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -256,11 +256,11 @@ TEST_F(ComprehensivePostOpsTest, PostOpsWithGemmIntegrationTest)
256256
std::cout << "=== PostOps GEMM Integration Test ===" << std::endl;
257257

258258
// Create matrices for testing
259-
Matrix A(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0, false);
260-
Matrix B(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0, false);
261-
Matrix C_dlp(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0,
259+
Matrix A(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1, false);
260+
Matrix B(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1, false);
261+
Matrix C_dlp(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1,
262262
false);
263-
Matrix C_ref(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0,
263+
Matrix C_ref(10, 10, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1,
264264
false);
265265

266266
// Initialize matrices
@@ -382,9 +382,9 @@ TEST_F(ComprehensivePostOpsTest, BackwardCompatibilityTest)
382382
std::cout << " PostOps: CORRECTLY ABSENT" << std::endl;
383383

384384
// Test normal GEMM operation
385-
Matrix A(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0, false);
386-
Matrix B(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0, false);
387-
Matrix C(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, 0, false);
385+
Matrix A(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1, false);
386+
Matrix B(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1, false);
387+
Matrix C(4, 4, MatrixType::f32, MatrixLayout::ROW_MAJOR, -1, false);
388388

389389
A.fillValue(0.5f);
390390
B.fillValue(0.2f);

0 commit comments

Comments
 (0)