Merge pull request #48 from redhat-performance/fix/RPOPC-1316-version-conflation

grdumas · web-flow · commit fe9c9db52135 · 2026-06-15T20:13:14.000-04:00
RPOPC-1316: Fix BaseProcessor.build_test_info() version conflation
diff --git a/src/chronicler/processors/base_processor.py b/src/chronicler/processors/base_processor.py
@@ -196,10 +196,42 @@ def build_metadata(self) -> Metadata:
         )
 
     def build_test_info(self) -> TestInfo:
-        """Build test information section"""
+        """
+        Build test information section.
+
+        By default, this method extracts the wrapper version from the test_info
+        file (the orchestrator's test repository version, e.g., "v2.8") and sets
+        BOTH test.version and test.wrapper_version to this same value.
+
+        IMPORTANT: This is intentional base behavior, but often incorrect semantically.
+        For benchmarks with independent versioning (e.g., FIO 3.36, STREAMS 5.10),
+        processors MUST override this method to extract and return the benchmark's
+        own version in test.version while preserving wrapper_version.
+
+        Examples of processors that should override:
+        - FIO: test.version = "fio-3.36", wrapper_version = "v2.1"
+        - STREAMS: test.version = "5.10", wrapper_version = "v2.8"
+        - CoreMark: test.version = "v1.01", wrapper_version = "v2.0"
+
+        See VERSION_CONFLATION_IMPACT.md for full analysis of affected processors.
+
+        Override pattern:
+            def build_test_info(self) -> TestInfo:
+                base_info = super().build_test_info()
+                benchmark_version = self._extract_benchmark_version()
+                return TestInfo(
+                    name=self.get_test_name(),
+                    version=benchmark_version or base_info.version,
+                    wrapper_version=base_info.wrapper_version
+                )
+
+        Returns:
+            TestInfo with name, version (wrapper by default), and wrapper_version
+        """
         test_name = self.get_test_name()
 
-        # Try to get version from test_info file
+        # Extract wrapper version from orchestrator's test_info file
+        # This contains the test wrapper repository version (e.g., "v2.8" from "v2.8.tar.gz")
         test_info_file = self.result_dir / "test_info"
         version = None
 
@@ -211,15 +243,28 @@ def build_test_info(self) -> TestInfo:
                 # Find test in test_info
                 for key, test_data in test_info_data.items():
                     if test_data.get('test_name') == test_name:
-                        version = test_data.get('repo_file', '').replace('.tar.gz', '')
+                        # Extract wrapper version from repo_file
+                        # Handle various archive extensions (.tar.gz, .tar.xz, .zip, etc.)
+                        repo_file = test_data.get('repo_file', '')
+                        if isinstance(repo_file, str):
+                            # Strip common archive extensions
+                            for ext in ['.tar.gz', '.tar.xz', '.tar.bz2', '.zip', '.tgz']:
+                                if repo_file.endswith(ext):
+                                    version = repo_file[:-len(ext)]
+                                    break
+                            else:
+                                # No known extension found, use as-is
+                                version = repo_file if repo_file else None
                         break
-            except (OSError, json.JSONDecodeError, KeyError, TypeError) as e:
+            except (OSError, json.JSONDecodeError, KeyError, TypeError, AttributeError) as e:
                 logger.warning(f"Failed to parse test_info: {e}")
 
         return TestInfo(
             name=test_name,
-            version=version or "unknown",
-            wrapper_version=version or "unknown"
+            # NOTE: Both fields set to wrapper version by default
+            # Processors with independent benchmark versions MUST override to fix this
+            version=version or "unknown",           # Should be benchmark version (override needed)
+            wrapper_version=version or "unknown"    # Correct: wrapper repository version
         )
 
     def build_system_under_test(self) -> SystemUnderTest:
diff --git a/tests/test_base_processor.py b/tests/test_base_processor.py
@@ -0,0 +1,156 @@
+"""
+Tests for BaseProcessor.build_test_info() method.
+
+Tests verify wrapper version extraction from test_info file
+and document the current behavior where both version and
+wrapper_version fields are set to the same value.
+"""
+
+import json
+import pytest
+from pathlib import Path
+
+from chronicler.processors.base_processor import BaseProcessor
+from chronicler.schema import TestInfo
+
+pytestmark = pytest.mark.unit
+
+
+class MinimalProcessor(BaseProcessor):
+    """Minimal concrete processor for testing BaseProcessor methods."""
+
+    def get_test_name(self) -> str:
+        return "minimal_test"
+
+    def parse_runs(self, extracted_result):
+        return {}
+
+
+def test_build_test_info_extracts_wrapper_version_from_test_info(tmp_path):
+    """
+    Verify build_test_info() extracts wrapper version from test_info file.
+
+    The test_info file contains wrapper repository versions (e.g., "v2.8")
+    which should be extracted and set to both version and wrapper_version
+    fields (current behavior).
+    """
+    # Setup: Create test_info file with wrapper version
+    test_info_data = {
+        "minimal_test": {
+            "test_name": "minimal_test",
+            "repo_file": "v2.8.tar.gz"
+        }
+    }
+    test_info_file = tmp_path / "test_info"
+    test_info_file.write_text(json.dumps(test_info_data))
+
+    # Execute
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify
+    assert isinstance(result, TestInfo)
+    assert result.name == "minimal_test"
+    assert result.version == "v2.8", "Should extract wrapper version from repo_file"
+    assert result.wrapper_version == "v2.8", "Should set wrapper_version to same value"
+
+
+def test_build_test_info_returns_unknown_when_no_test_info_file(tmp_path):
+    """
+    When test_info file is missing, both version fields should be "unknown".
+    """
+    # Execute (no test_info file created)
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify
+    assert result.version == "unknown"
+    assert result.wrapper_version == "unknown"
+
+
+def test_build_test_info_returns_unknown_when_test_not_in_test_info(tmp_path):
+    """
+    When test_info exists but doesn't contain the test, return "unknown".
+    """
+    # Setup: Create test_info with different test
+    test_info_data = {
+        "other_test": {
+            "test_name": "other_test",
+            "repo_file": "v1.0.tar.gz"
+        }
+    }
+    test_info_file = tmp_path / "test_info"
+    test_info_file.write_text(json.dumps(test_info_data))
+
+    # Execute
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify
+    assert result.version == "unknown"
+    assert result.wrapper_version == "unknown"
+
+
+def test_build_test_info_handles_malformed_json(tmp_path):
+    """
+    Malformed test_info file should log warning and return "unknown".
+    """
+    # Setup: Create invalid JSON
+    test_info_file = tmp_path / "test_info"
+    test_info_file.write_text("{ invalid json }")
+
+    # Execute
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify
+    assert result.version == "unknown"
+    assert result.wrapper_version == "unknown"
+
+
+def test_build_test_info_handles_non_string_repo_file(tmp_path):
+    """
+    Non-string repo_file values should be handled gracefully.
+
+    If test_info contains valid JSON but repo_file is null, an integer,
+    or other non-string value, the processor should handle it gracefully
+    rather than raising AttributeError.
+    """
+    # Setup: Create test_info with null repo_file
+    test_info_data = {
+        "minimal_test": {
+            "test_name": "minimal_test",
+            "repo_file": None
+        }
+    }
+    test_info_file = tmp_path / "test_info"
+    test_info_file.write_text(json.dumps(test_info_data))
+
+    # Execute
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify - should fall back to "unknown" rather than crash
+    assert result.version == "unknown"
+    assert result.wrapper_version == "unknown"
+
+
+def test_build_test_info_handles_non_dict_test_info(tmp_path):
+    """
+    Non-dict test_info data should be handled gracefully.
+
+    If test_info contains a list or other non-dict structure,
+    the processor should handle it gracefully rather than raising
+    AttributeError when calling .items().
+    """
+    # Setup: Create test_info with list instead of dict
+    test_info_file = tmp_path / "test_info"
+    test_info_file.write_text("[]")
+
+    # Execute
+    processor = MinimalProcessor(str(tmp_path))
+    result = processor.build_test_info()
+
+    # Verify - should fall back to "unknown" rather than crash
+    assert result.version == "unknown"
+    assert result.wrapper_version == "unknown"