Add ExtendedTestBase shared base class for benchmark and functional tests (#3689)

lajagapp · web-flow · commit c6bc4782f8c3 · 2026-03-10T06:39:46.000+05:30
### Summary Extracts shared test infrastructure from `BenchmarkBase` into a new `ExtendedTestBase` base class, reducing code duplication between benchmark and functional tests. This addresses the reviewer feedback on #3648 about sharing common logic between `benchmark_base.py` and `functional_base.py`. ### Changes #### New: `utils/extended_test_base.py` - Created ExtendedTestBase class with following shared infrastructure: execute_command(), get_gpu_architecture(), detect_gpu_count(), create_test_result(), calculate_statistics(), upload_results() #### Updated: `benchmark/scripts/benchmark_base.py` - BenchmarkBase now inherits from ExtendedTestBase - Removed duplicated methods: execute_command, _detect_gpu_count, calculate_statistics, upload_results #### Updated: `benchmark/scripts/test_rccl_benchmark.py` - self._detect_gpu_count() → self.detect_gpu_count() (now inherited from base) #### Updated: `utils/__init__.py`, `README.md`, `utils/README.md` - Added ExtendedTestBase to exports and documentation #### Inheritance Hierarchy ``` ExtendedTestBase (utils/extended_test_base.py) ├── BenchmarkBase (benchmark/scripts/benchmark_base.py) │ ├── ROCfftBenchmark, RCCLBenchmark, ROCblasBenchmark, ... └── FunctionalBase (functional/scripts/functional_base.py) ← will inherit in follow-up PR ├── MIOpenDriverConv, RcclTestInfra, ... ``` #### Follow-up - Update FunctionalBase to inherit from ExtendedTestBase (after this PR merges) --------- Signed-off-by: Lenine Ajagappane <Lenine.Ajagappane@amd.com>
diff --git a/tests/extended_tests/README.md b/tests/extended_tests/README.md
@@ -82,13 +82,14 @@ extended_tests/
 │   └── README.md                  # Functional-specific docs (placeholder - tests to be added in follow-up PRs)
 │
 └── utils/                          # SHARED utilities for all test types
+    ├── extended_test_base.py      # ExtendedTestBase - shared base class for all tests
+    ├── extended_test_client.py    # ExtendedTestClient - system detection & result reporting
     ├── exceptions.py              # Custom exception classes
     │   ├── BenchmarkExecutionError   # Execution/parsing failures
     │   ├── BenchmarkResultError      # Result validation failures
     │   └── FrameworkException        # Base exception
     │
     ├── logger.py                  # Logging utilities
-    ├── extended_test_client.py    # ExtendedTestClient API
     ├── constants.py               # Global constants
     │
     ├── config/                    # Configuration parsers
diff --git a/tests/extended_tests/benchmark/scripts/benchmark_base.py b/tests/extended_tests/benchmark/scripts/benchmark_base.py
@@ -3,30 +3,26 @@
 
 """Base class for benchmark tests with common functionality."""
 
-import os
-import shlex
 import shutil
-import subprocess
 import sys
 from pathlib import Path
-from typing import Dict, List, Tuple, Any, IO
+from typing import Dict, List, Any
 from prettytable import PrettyTable
 
 # Add parent directory to path for utils import
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
-# Add build_tools/github_actions to path for github_actions_utils
-sys.path.insert(
-    0, str(Path(__file__).resolve().parents[4] / "build_tools" / "github_actions")
-)
-from utils import ExtendedTestClient, HardwareDetector
+
 from utils.logger import log
 from utils.exceptions import TestExecutionError
-from github_actions_utils import gha_append_step_summary
+from utils.extended_test_base import ExtendedTestBase, gha_append_step_summary
 
 
-class BenchmarkBase:
+class BenchmarkBase(ExtendedTestBase):
     """Base class providing common benchmark logic.
 
+    Inherits shared infrastructure from ExtendedTestBase (execute_command,
+    create_test_result, calculate_statistics, upload_results, etc.).
+
     Child classes must implement run_benchmarks() and parse_results().
     """
 
@@ -37,88 +33,9 @@ def __init__(self, benchmark_name: str, display_name: str = None):
             benchmark_name: Internal benchmark name (e.g., 'rocfft')
             display_name: Display name for reports (e.g., 'ROCfft'), defaults to benchmark_name
         """
+        super().__init__(benchmark_name, display_name or benchmark_name.upper())
         self.benchmark_name = benchmark_name
-        self.display_name = display_name or benchmark_name.upper()
-
-        # Environment variables
-        self.therock_bin_dir = os.getenv("THEROCK_BIN_DIR")
-        self.artifact_run_id = os.getenv("ARTIFACT_RUN_ID")
-        self.amdgpu_families = os.getenv("AMDGPU_FAMILIES")
         self.script_dir = Path(__file__).resolve().parent
-        self.therock_dir = Path(__file__).resolve().parents[4]
-
-        # Initialize test client (will be set in run())
-        self.client = None
-
-    def execute_command(
-        self, cmd: List[str], log_file_handle: IO, env: Dict[str, str] = None
-    ) -> int:
-        """Execute a command and stream output to log file.
-
-        Args:
-            cmd: Command list to execute
-            log_file_handle: File handle to write output
-            env: Optional environment variables to set
-
-        Returns:
-            Exit code from the command
-        """
-        log.info(f"++ Exec [{self.therock_dir}]$ {shlex.join(cmd)}")
-        log_file_handle.write(f"{shlex.join(cmd)}\n")
-
-        # Merge custom env with current environment
-        process_env = os.environ.copy()
-        if env:
-            process_env.update(env)
-
-        process = subprocess.Popen(
-            cmd,
-            cwd=self.therock_dir,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.STDOUT,
-            text=True,
-            bufsize=1,
-            env=process_env,
-        )
-
-        for line in process.stdout:
-            log.info(line.strip())
-            log_file_handle.write(f"{line}")
-
-        process.wait()
-        return process.returncode
-
-    def _detect_gpu_count(self) -> int:
-        """Detect the number of available GPUs using HardwareDetector.
-
-        Returns:
-            Number of GPUs detected
-
-        Raises:
-            RuntimeError: If no GPUs detected or detection fails
-        """
-        try:
-            detector = HardwareDetector()
-            gpu_list = detector.detect_gpu()
-            gpu_count = len(gpu_list)
-
-            if gpu_count == 0:
-                raise RuntimeError(
-                    "No GPUs detected. Benchmarks require at least one GPU. "
-                    "Ensure ROCm drivers are installed and GPU devices are accessible."
-                )
-
-            log.info(f"Detected {gpu_count} GPU(s)")
-            return gpu_count
-
-        except RuntimeError:
-            # Re-raise RuntimeError as-is
-            raise
-        except Exception as e:
-            raise RuntimeError(
-                f"Failed to detect GPUs: {e}. "
-                "Ensure ROCm drivers are installed and GPU devices are accessible."
-            ) from e
 
     def _validate_openmpi(self) -> None:
         """Check if OpenMPI is installed and available in the system.
@@ -143,7 +60,11 @@ def create_test_result(
         flag: str,
         **kwargs,
     ) -> Dict[str, Any]:
-        """Create a standardized test result dictionary.
+        """Create a standardized benchmark test result dictionary.
+
+        Overrides ExtendedTestBase.create_test_result to enforce benchmark-specific
+        required fields (score, unit, flag) and provide defaults for
+        batch_size and ngpu.
 
         Args:
             test_name: Benchmark name
@@ -157,91 +78,22 @@ def create_test_result(
         Returns:
             Dict[str, Any]: Test result dictionary with test data and configuration
         """
-        # Extract common parameters with defaults
-        batch_size = kwargs.get("batch_size", 0)
-        ngpu = kwargs.get("ngpu", 1)
-
-        # Build test config with all parameters
-        test_config = {
-            "test_name": test_name,
-            "sub_test_name": subtest_name,
-            "python_version": f"{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}",
-            "environment_dependencies": [],
-            "batch_size": batch_size,
-            "ngpu": ngpu,
-        }
-
-        # Add any additional kwargs to test_config
-        for key, value in kwargs.items():
-            if key not in ["batch_size", "ngpu"]:
-                test_config[key] = value
-
-        return {
-            "test_name": test_name,
-            "subtest": subtest_name,
-            "batch_size": batch_size,
-            "ngpu": ngpu,
-            "status": status,
-            "score": float(score),
-            "unit": unit,
-            "flag": flag,
-            "test_config": test_config,
-        }
-
-    def calculate_statistics(
-        self, test_results: List[Dict[str, Any]]
-    ) -> Dict[str, Any]:
-        """Calculate test statistics from results.
-
-        Args:
-            test_results: List of test result dictionaries with 'status' key
-
-        Returns:
-            Dictionary with:
-                - passed: Number of passed tests
-                - failed: Number of failed tests
-                - total: Total number of tests
-                - overall_status: 'PASS' if no failures, else 'FAIL'
-        """
-        passed = sum(1 for r in test_results if r.get("status") == "PASS")
-        failed = sum(1 for r in test_results if r.get("status") == "FAIL")
-        overall_status = "PASS" if failed == 0 else "FAIL"
-
-        return {
-            "passed": passed,
-            "failed": failed,
-            "total": len(test_results),
-            "overall_status": overall_status,
-        }
-
-    def upload_results(
-        self, test_results: List[Dict[str, Any]], stats: Dict[str, Any]
-    ) -> bool:
-        """Upload results to API and save locally."""
-        log.info("Uploading Results to API")
-        success = self.client.upload_results(
-            test_name=f"{self.benchmark_name}_benchmark",
-            test_results=test_results,
-            test_status=stats["overall_status"],
-            test_metadata={
-                "artifact_run_id": self.artifact_run_id,
-                "amdgpu_families": self.amdgpu_families,
-                "benchmark_name": self.benchmark_name,
-                "total_subtests": stats["total"],
-                "passed_subtests": stats["passed"],
-                "failed_subtests": stats["failed"],
-            },
-            save_local=True,
-            output_dir=str(self.script_dir / "results"),
+        # Extract benchmark-specific parameters with defaults
+        batch_size = kwargs.pop("batch_size", 0)
+        ngpu = kwargs.pop("ngpu", 1)
+
+        return super().create_test_result(
+            test_name=test_name,
+            subtest_name=subtest_name,
+            status=status,
+            score=float(score),
+            unit=unit,
+            flag=flag,
+            batch_size=batch_size,
+            ngpu=ngpu,
+            **kwargs,
         )
 
-        if success:
-            log.info("Results uploaded successfully")
-        else:
-            log.info("Results saved locally only (API upload disabled or failed)")
-
-        return success
-
     def compare_with_lkg(self, tables: Any) -> Any:
         """Compare results with Last Known Good baseline."""
         log.info("Comparing results with LKG")
@@ -319,10 +171,6 @@ def run(self) -> int:
         """Execute benchmark workflow and return exit code (0=PASS, 1=FAIL)."""
         log.info(f"Initializing {self.display_name} Benchmark Test")
 
-        # Initialize extended test client and print system info
-        self.client = ExtendedTestClient(auto_detect=True)
-        self.client.print_system_summary()
-
         # Run benchmarks (implemented by child class)
         self.run_benchmarks()
 
@@ -338,7 +186,18 @@ def run(self) -> int:
         log.info(f"Test Summary: {stats['passed']} passed, {stats['failed']} failed")
 
         # Upload results
-        self.upload_results(test_results, stats)
+        self.upload_results(
+            test_results=test_results,
+            stats=stats,
+            test_type="benchmark",
+            output_dir=str(self.script_dir / "results"),
+            extra_metadata={
+                "benchmark_name": self.benchmark_name,
+                "total_subtests": stats["total"],
+                "passed_subtests": stats["passed"],
+                "failed_subtests": stats["failed"],
+            },
+        )
 
         # Compare with LKG (compares each table individually and prints results)
         final_tables = self.compare_with_lkg(tables)
diff --git a/tests/extended_tests/benchmark/scripts/test_rccl_benchmark.py b/tests/extended_tests/benchmark/scripts/test_rccl_benchmark.py
@@ -17,6 +17,7 @@
 sys.path.insert(0, str(Path(__file__).resolve().parents[2]))  # For extended_tests/utils
 sys.path.insert(0, str(Path(__file__).parent))  # For benchmark_base
 from benchmark_base import BenchmarkBase, run_benchmark_main
+from github_actions_utils import get_visible_gpu_count
 from utils.logger import log
 
 
@@ -26,7 +27,7 @@ class RCCLBenchmark(BenchmarkBase):
     def __init__(self):
         super().__init__(benchmark_name="rccl", display_name="RCCL")
         self.log_file = self.script_dir / "rccl_bench.log"
-        self.ngpu = self._detect_gpu_count()
+        self.ngpu = get_visible_gpu_count(therock_bin_dir=self.therock_bin_dir)
 
         # Validate OpenMPI is available (from base class)
         self._validate_openmpi()
diff --git a/tests/extended_tests/utils/README.md b/tests/extended_tests/utils/README.md
@@ -7,7 +7,8 @@ Utility modules organized into logical subdirectories for maintainability and sc
 ```
 extended_tests/utils/
 ├── __init__.py              # Public exports
-├── extended_test_client.py  # Main ExtendedTestClient API
+├── extended_test_base.py    # ExtendedTestBase - shared base class for all tests
+├── extended_test_client.py  # ExtendedTestClient - system detection & result reporting
 ├── constants.py             # Framework constants
 ├── exceptions.py            # Custom exceptions
 ├── logger.py                # Logging configuration
@@ -36,12 +37,28 @@ extended_tests/utils/
 
 ## Usage
 
-### From Benchmark Scripts
+### From Extended Test Base Classes
 
-Benchmark scripts add `extended_tests/` to `sys.path`, then import:
+Both `BenchmarkBase` and `FunctionalBase` inherit from `ExtendedTestBase`, which provides
+shared infrastructure (command execution, GPU detection, result creation, statistics, uploads):
 
 ```python
-# Import path setup (already done in benchmark_base.py)
+# In benchmark_base.py / functional_base.py
+from utils.extended_test_base import ExtendedTestBase
+
+
+class BenchmarkBase(ExtendedTestBase): ...
+
+
+class FunctionalBase(ExtendedTestBase): ...
+```
+
+### From Test Scripts
+
+Test scripts add `extended_tests/` to `sys.path`, then import:
+
+```python
+# Import path setup (already done in base classes)
 sys.path.insert(
     0, str(Path(__file__).resolve().parents[2])
 )  # Adds extended_tests/ to path
@@ -52,6 +69,7 @@ from utils.constants import Constants
 from utils.exceptions import ConfigurationError
 
 # Main API classes
+from utils.extended_test_base import ExtendedTestBase
 from utils.extended_test_client import ExtendedTestClient
 from utils.system.system_detector import SystemDetector
 from utils.config.config_helper import ConfigHelper
@@ -80,10 +98,11 @@ from utils.results import ResultsHandler, ResultsAPI
 
 ### Root Level
 
+- **extended_test_base.py** - `ExtendedTestBase` shared base class for benchmark and functional tests (command execution, GPU detection, test result creation, statistics, result uploads)
+- **extended_test_client.py** - `ExtendedTestClient` API for system detection and result reporting
 - **constants.py** - Framework constants and defaults
 - **exceptions.py** - Custom exception classes
 - **logger.py** - Logging configuration
-- **extended_test_client.py** - Main ExtendedTestClient API
 
 ### Config
 
diff --git a/tests/extended_tests/utils/__init__.py b/tests/extended_tests/utils/__init__.py
@@ -21,6 +21,8 @@
     "exceptions",
     # Main API
     "ExtendedTestClient",
+    # Shared test base class
+    "ExtendedTestBase",
     # Commonly used exports
     "SystemContext",
     "SystemDetector",
diff --git a/tests/extended_tests/utils/extended_test_base.py b/tests/extended_tests/utils/extended_test_base.py