From c399628375362085af6204a2dea978f2fee36e82 Mon Sep 17 00:00:00 2001 From: Guygored Date: Sun, 7 Dec 2025 08:29:38 +0200 Subject: [PATCH 1/5] Creating NNO dashboard adding new utills for generating new dasboard for NNO --- .github/workflows/generate_matrix_page.yaml | 43 +- workflows/README.md | 3 +- workflows/common/__init__.py | 94 +++++ workflows/common/data_fetching.py | 177 ++++++++ workflows/common/data_structures.py | 59 +++ workflows/common/gcs_utils.py | 130 ++++++ workflows/common/html_builders.py | 162 ++++++++ workflows/common/validation.py | 90 +++++ .../gpu_operator_dashboard/fetch_ci_data.py | 98 +---- .../generate_ci_dashboard.py | 34 +- workflows/nno_dashboard/README.md | 182 +++++++++ workflows/nno_dashboard/__init__.py | 3 + workflows/nno_dashboard/fetch_ci_data.py | 378 ++++++++++++++++++ .../nno_dashboard/generate_ci_dashboard.py | 342 ++++++++++++++++ workflows/nno_dashboard/requirements.txt | 4 + workflows/nno_dashboard/templates/header.html | 12 + .../nno_dashboard/templates/main_table.html | 9 + .../templates/test_flavor_section.html | 15 + 18 files changed, 1718 insertions(+), 117 deletions(-) create mode 100644 workflows/common/data_fetching.py create mode 100644 workflows/common/data_structures.py create mode 100644 workflows/common/gcs_utils.py create mode 100644 workflows/common/html_builders.py create mode 100644 workflows/common/validation.py create mode 100644 workflows/nno_dashboard/README.md create mode 100644 workflows/nno_dashboard/__init__.py create mode 100644 workflows/nno_dashboard/fetch_ci_data.py create mode 100644 workflows/nno_dashboard/generate_ci_dashboard.py create mode 100644 workflows/nno_dashboard/requirements.txt create mode 100644 workflows/nno_dashboard/templates/header.html create mode 100644 workflows/nno_dashboard/templates/main_table.html create mode 100644 workflows/nno_dashboard/templates/test_flavor_section.html diff --git a/.github/workflows/generate_matrix_page.yaml b/.github/workflows/generate_matrix_page.yaml index 0b614398b..10b122508 100644 --- a/.github/workflows/generate_matrix_page.yaml +++ b/.github/workflows/generate_matrix_page.yaml @@ -32,8 +32,12 @@ jobs: steps: - name: Set dynamic env vars run: | + # GPU Operator dashboard paths echo "DASHBOARD_DATA_FILEPATH=${DASHBOARD_OUTPUT_DIR}/gpu_operator_matrix.json" >> "$GITHUB_ENV" echo "DASHBOARD_HTML_FILEPATH=${DASHBOARD_OUTPUT_DIR}/gpu_operator_matrix.html" >> "$GITHUB_ENV" + # Network Operator dashboard paths + echo "NNO_DASHBOARD_DATA_FILEPATH=${DASHBOARD_OUTPUT_DIR}/network_operator_matrix.json" >> "$GITHUB_ENV" + echo "NNO_DASHBOARD_HTML_FILEPATH=${DASHBOARD_OUTPUT_DIR}/network_operator_matrix.html" >> "$GITHUB_ENV" echo "GH_PAGES_BRANCH=${{ github.event.inputs.gh_pages_branch || 'gh-pages' }}" >> "$GITHUB_ENV" env: DASHBOARD_OUTPUT_DIR: ${{ env.DASHBOARD_OUTPUT_DIR }} @@ -67,27 +71,62 @@ jobs: - name: Install Dependencies run: | pip install -r workflows/gpu_operator_dashboard/requirements.txt + pip install -r workflows/nno_dashboard/requirements.txt - name: Fetch CI Data run: | echo "Processing PR: ${{ steps.determine_pr.outputs.PR_NUMBER }}" + # GPU Operator python -m workflows.gpu_operator_dashboard.fetch_ci_data \ --pr_number "${{ steps.determine_pr.outputs.PR_NUMBER }}" \ --baseline_data_filepath "${{ env.DASHBOARD_DATA_FILEPATH }}" \ --merged_data_filepath "${{ env.DASHBOARD_DATA_FILEPATH }}" + # Network Operator + python -m workflows.nno_dashboard.fetch_ci_data \ + --pr_number "${{ steps.determine_pr.outputs.PR_NUMBER }}" \ + --baseline_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" \ + --merged_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" - name: Generate HTML Dashboard (only if JSON changed) run: | cd "${{ env.DASHBOARD_OUTPUT_DIR }}" + + # Check if GPU Operator JSON changed + GPU_CHANGED=false if [[ ${{ github.event_name }} == "pull_request_target" ]] && git diff --exit-code gpu_operator_matrix.json; then - echo "no changes" + echo "GPU Operator: no changes" + else + echo "GPU Operator: changes detected" + GPU_CHANGED=true + fi + + # Check if Network Operator JSON changed + NNO_CHANGED=false + if [[ ${{ github.event_name }} == "pull_request_target" ]] && git diff --exit-code network_operator_matrix.json; then + echo "Network Operator: no changes" else - cd "${{ github.workspace }}" + echo "Network Operator: changes detected" + NNO_CHANGED=true + fi + + cd "${{ github.workspace }}" + + # Generate GPU Operator dashboard if changed + if [ "$GPU_CHANGED" = true ]; then + echo "Generating GPU Operator dashboard..." python -m workflows.gpu_operator_dashboard.generate_ci_dashboard \ --dashboard_data_filepath "${{ env.DASHBOARD_DATA_FILEPATH }}" \ --dashboard_html_filepath "${{ env.DASHBOARD_HTML_FILEPATH }}" fi + + # Generate Network Operator dashboard if changed + if [ "$NNO_CHANGED" = true ]; then + echo "Generating Network Operator dashboard..." + python -m workflows.nno_dashboard.generate_ci_dashboard \ + --dashboard_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" \ + --dashboard_html_filepath "${{ env.NNO_DASHBOARD_HTML_FILEPATH }}" + fi - name: Deploy HTML to GitHub Pages uses: JamesIves/github-pages-deploy-action@v4 diff --git a/workflows/README.md b/workflows/README.md index 768d3978e..e84979d73 100644 --- a/workflows/README.md +++ b/workflows/README.md @@ -6,8 +6,9 @@ This directory contains multiple workflows for automating various aspects of the - [gpu_operator_versions/](./gpu_operator_versions/) — Automation for updating versions and triggering CI jobs - [gpu_operator_dashboard/](./gpu_operator_dashboard/) — CI dashboard generation for NVIDIA GPU Operator test results +- [nno_dashboard/](./nno_dashboard/) — CI dashboard generation for NVIDIA Network Operator test results - [microshift_dashboard/](./microshift_dashboard/) — MicroShift NVIDIA Device Plugin testing dashboard -- Shared modules: [utils.py](./utils.py), [templates.py](./templates.py) +- [common/](./common/) — Shared utilities: logging, templates, GCS access, HTML builders, data structures See the individual README files in each subdirectory for detailed information. diff --git a/workflows/common/__init__.py b/workflows/common/__init__.py index e69de29bb..09661a8dd 100644 --- a/workflows/common/__init__.py +++ b/workflows/common/__init__.py @@ -0,0 +1,94 @@ +""" +Common utilities shared across NVIDIA CI workflows. +""" + +from workflows.common.utils import get_logger, logger +from workflows.common.templates import load_template +from workflows.common.data_structures import ( + TestResult, + OCP_FULL_VERSION, + OPERATOR_VERSION, + GPU_OPERATOR_VERSION, + STATUS_SUCCESS, + STATUS_FAILURE, + STATUS_ABORTED, +) +from workflows.common.gcs_utils import ( + http_get_json, + fetch_gcs_file_content, + build_prow_job_url, + fetch_filtered_files, + build_job_history_url, + GCS_API_BASE_URL, + GCS_MAX_RESULTS_PER_REQUEST, +) +from workflows.common.html_builders import ( + build_toc, + build_notes, + build_history_bar, + build_last_updated_footer, + sanitize_id, +) +from workflows.common.validation import ( + is_valid_ocp_version, + has_valid_semantic_versions, + is_infrastructure_type, +) +from workflows.common.data_fetching import ( + build_version_lookups, + build_finished_lookup, + extract_test_status, + extract_timestamp, + determine_repo_from_job_name, + convert_sets_to_lists_recursive, + merge_job_history_links, + int_or_none, +) + +__all__ = [ + # Utils + "get_logger", + "logger", + "load_template", + + # Data structures + "TestResult", + "OCP_FULL_VERSION", + "OPERATOR_VERSION", + "GPU_OPERATOR_VERSION", + "STATUS_SUCCESS", + "STATUS_FAILURE", + "STATUS_ABORTED", + + # GCS utilities + "http_get_json", + "fetch_gcs_file_content", + "build_prow_job_url", + "fetch_filtered_files", + "build_job_history_url", + "GCS_API_BASE_URL", + "GCS_MAX_RESULTS_PER_REQUEST", + + # HTML builders + "build_toc", + "build_notes", + "build_history_bar", + "build_last_updated_footer", + "sanitize_id", + + # Validation + "is_valid_ocp_version", + "has_valid_semantic_versions", + "is_infrastructure_type", + + # Data fetching + "build_version_lookups", + "build_finished_lookup", + "extract_test_status", + "extract_timestamp", + "determine_repo_from_job_name", + "convert_sets_to_lists_recursive", + "merge_job_history_links", + "int_or_none", +] + diff --git a/workflows/common/data_fetching.py b/workflows/common/data_fetching.py new file mode 100644 index 000000000..e69484815 --- /dev/null +++ b/workflows/common/data_fetching.py @@ -0,0 +1,177 @@ +""" +Common data fetching patterns for CI dashboards. +Shared logic for building file lookups and processing builds. +""" + +import json +from typing import Dict, Any, List, Tuple, Optional +from workflows.common.utils import logger +from workflows.common.gcs_utils import fetch_gcs_file_content + + +def build_version_lookups( + version_files_list: List[Tuple[str, List[Dict[str, Any]]]] +) -> Dict[str, Dict[str, str]]: + """ + Build lookup dictionaries for version files organized by build directory. + + Args: + version_files_list: List of tuples (file_type, file_items) + e.g., [("ocp", ocp_files), ("operator", operator_files)] + + Returns: + Dict mapping file_type to {build_dir: content} + e.g., {"ocp": {build_dir: "4.17.16"}, "operator": {build_dir: "25.4.0"}} + """ + version_lookups = {} + + for file_type, file_items in version_files_list: + lookup = {} + for file_item in file_items: + path = file_item["name"] + build_dir = path.rsplit("/", 1)[0] + try: + content = fetch_gcs_file_content(path) + lookup[build_dir] = content.strip() + except Exception as e: + logger.warning(f"Failed to fetch {file_type} from {path}: {e}") + version_lookups[file_type] = lookup + + return version_lookups + + +def build_finished_lookup( + finished_files: List[Dict[str, Any]] +) -> Dict[str, Dict[str, Any]]: + """ + Build lookup dictionary for finished.json files by build directory. + + Args: + finished_files: List of finished.json file items from GCS + + Returns: + Dict mapping build_dir to parsed finished.json content + """ + finished_lookup = {} + + for finished_item in finished_files: + finished_path = finished_item["name"] + build_dir = finished_path.rsplit("/", 1)[0] + try: + content = fetch_gcs_file_content(finished_path) + finished_lookup[build_dir] = json.loads(content) + except Exception as e: + logger.warning(f"Failed to fetch/parse finished.json from {finished_path}: {e}") + + return finished_lookup + + +def extract_test_status( + finished_json: Dict[str, Any], + status_success: str, + status_failure: str, + status_aborted: str +) -> str: + """ + Extract and normalize test status from finished.json. + + Args: + finished_json: Parsed finished.json content + status_success: String constant for success status + status_failure: String constant for failure status + status_aborted: String constant for aborted status + + Returns: + Normalized test status string + """ + result_str = finished_json.get("result", "UNKNOWN").upper() + if result_str in [status_success, status_failure, status_aborted]: + return result_str + return status_failure + + +def extract_timestamp(finished_json: Dict[str, Any]) -> int: + """ + Extract timestamp from finished.json. + + Args: + finished_json: Parsed finished.json content + + Returns: + Unix timestamp (defaults to 0 if not found) + """ + return finished_json.get("timestamp", 0) + + +def determine_repo_from_job_name(job_name: str) -> str: + """ + Determine repository from job name pattern. + + Args: + job_name: Job name string + + Returns: + Repository identifier ('openshift_release' or 'rh-ecosystem-edge_nvidia-ci') + """ + return "openshift_release" if job_name.startswith("rehearse-") else "rh-ecosystem-edge_nvidia-ci" + + +def convert_sets_to_lists_recursive(data: Any) -> Any: + """ + Recursively convert sets to sorted lists for JSON serialization. + + Args: + data: Any data structure that may contain sets + + Returns: + Data structure with sets converted to sorted lists + """ + if isinstance(data, set): + return sorted(list(data)) + elif isinstance(data, dict): + return {k: convert_sets_to_lists_recursive(v) for k, v in data.items()} + elif isinstance(data, list): + return [convert_sets_to_lists_recursive(item) for item in data] + else: + return data + + +def merge_job_history_links( + new_links: Any, + existing_links: Any +) -> List[str]: + """ + Merge and deduplicate job history links. + + Args: + new_links: New links (can be set or list) + existing_links: Existing links (can be set or list) + + Returns: + Sorted list of unique links + """ + # Convert both to sets + new_set = set(new_links) if isinstance(new_links, (set, list)) else set() + existing_set = set(existing_links) if isinstance(existing_links, (set, list)) else set() + + # Merge and return sorted list + all_links = new_set | existing_set + return sorted(list(all_links)) + + +def int_or_none(value: Optional[str]) -> Optional[int]: + """ + Convert string to int or None for unlimited. + + Args: + value: String value to convert + + Returns: + Integer or None + """ + if value is None: + return None + if value.lower() in ('none', 'unlimited'): + return None + return int(value) + diff --git a/workflows/common/data_structures.py b/workflows/common/data_structures.py new file mode 100644 index 000000000..6348d8b9e --- /dev/null +++ b/workflows/common/data_structures.py @@ -0,0 +1,59 @@ +""" +Shared data structures and constants for CI dashboards. +""" + +from dataclasses import dataclass +from typing import Any, Dict, Optional + +# Constants for version field names (shared across dashboards) +OCP_FULL_VERSION = "ocp_full_version" +OPERATOR_VERSION = "operator_version" # Generic operator version field + +# GPU Operator specific (for backward compatibility) +GPU_OPERATOR_VERSION = "gpu_operator_version" + +# Constants for job statuses +STATUS_SUCCESS = "SUCCESS" +STATUS_FAILURE = "FAILURE" +STATUS_ABORTED = "ABORTED" + + +@dataclass(frozen=True) +class TestResult: + """Represents a single test run result (shared data structure).""" + ocp_full_version: str + operator_version: str # Can be GPU or Network operator version + test_status: str + prow_job_url: str + job_timestamp: str + test_flavor: Optional[str] = None # Optional: for dashboards with test flavors (NNO) + + def to_dict(self) -> Dict[str, Any]: + """Convert TestResult to dictionary format for JSON serialization.""" + result = { + OCP_FULL_VERSION: self.ocp_full_version, + "operator_version": self.operator_version, + "test_status": self.test_status, + "prow_job_url": self.prow_job_url, + "job_timestamp": self.job_timestamp, + } + # Include test_flavor only if it's set + if self.test_flavor is not None: + result["test_flavor"] = self.test_flavor + return result + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "TestResult": + """Create TestResult from dictionary.""" + # Handle backward compatibility with GPU operator data + operator_version = data.get("operator_version") or data.get(GPU_OPERATOR_VERSION) + + return cls( + ocp_full_version=data[OCP_FULL_VERSION], + operator_version=operator_version, + test_status=data["test_status"], + prow_job_url=data["prow_job_url"], + job_timestamp=data["job_timestamp"], + test_flavor=data.get("test_flavor"), + ) + diff --git a/workflows/common/gcs_utils.py b/workflows/common/gcs_utils.py new file mode 100644 index 000000000..5e28d9494 --- /dev/null +++ b/workflows/common/gcs_utils.py @@ -0,0 +1,130 @@ +""" +GCS (Google Cloud Storage) utilities for fetching CI test artifacts. +Shared across GPU Operator and Network Operator dashboards. +""" + +import re +import urllib.parse +from typing import Dict, Any + +import requests + +from workflows.common.utils import logger + +# GCS API base URL for test-platform-results bucket +GCS_API_BASE_URL = "https://storage.googleapis.com/storage/v1/b/test-platform-results/o" + +# Maximum number of results per GCS API request for pagination +GCS_MAX_RESULTS_PER_REQUEST = 1000 + + +def http_get_json(url: str, params: Dict[str, Any] = None, headers: Dict[str, str] = None) -> Dict[str, Any]: + """ + Send an HTTP GET request and return the JSON response. + + Args: + url: URL to fetch + params: Optional query parameters + headers: Optional HTTP headers + + Returns: + Parsed JSON response + + Raises: + requests.HTTPError: If the request fails + """ + response = requests.get(url, params=params, headers=headers, timeout=30) + response.raise_for_status() + return response.json() + + +def fetch_gcs_file_content(file_path: str) -> str: + """ + Fetch the raw text content from a file in GCS. + + Args: + file_path: Path to the file in GCS (e.g., "pr-logs/pull/...") + + Returns: + File content as string + + Raises: + requests.HTTPError: If the file cannot be fetched + """ + logger.info(f"Fetching file content for {file_path}") + response = requests.get( + url=f"{GCS_API_BASE_URL}/{urllib.parse.quote_plus(file_path)}", + params={"alt": "media"}, + timeout=30, + ) + response.raise_for_status() + return response.content.decode("UTF-8") + + +def build_prow_job_url(finished_json_path: str) -> str: + """ + Build a Prow job URL from a finished.json file path. + + Args: + finished_json_path: Path to finished.json file (e.g., "pr-logs/pull/.../finished.json") + + Returns: + Full URL to the Prow job artifacts page + """ + directory_path = finished_json_path[:-len('/finished.json')] + return f"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/{directory_path}" + + +def fetch_filtered_files(pr_number: str, glob_pattern: str) -> list[Dict[str, Any]]: + """ + Fetch files from GCS matching a specific pattern for a PR. + + Args: + pr_number: Pull request number + glob_pattern: Glob pattern to match files (e.g., "*/finished.json", "*/ocp.version") + + Returns: + List of file metadata dictionaries from GCS API + """ + all_items = [] + + # Search in both possible PR locations + for prefix in [ + f"pr-logs/pull/rh-ecosystem-edge_nvidia-ci/{pr_number}/", + f"pr-logs/pull/openshift_release/{pr_number}/" + ]: + page_token = None # Reset pagination token for each prefix + while True: + params = { + "prefix": prefix, + "delimiter": "", + "matchGlob": glob_pattern, + "maxResults": GCS_MAX_RESULTS_PER_REQUEST, + } + if page_token: + params["pageToken"] = page_token + + data = http_get_json(GCS_API_BASE_URL, params=params) + items = data.get("items", []) + all_items.extend(items) + + page_token = data.get("nextPageToken") + if not page_token: + break + + logger.info(f"Found {len(all_items)} files matching pattern '{glob_pattern}' for PR #{pr_number}") + return all_items + + +def build_job_history_url(job_name: str) -> str: + """ + Build a Prow job history URL for a given job name. + + Args: + job_name: Name of the CI job + + Returns: + Full URL to the job history page + """ + return f"https://prow.ci.openshift.org/job-history/gs/test-platform-results/pr-logs/directory/{job_name}" + diff --git a/workflows/common/html_builders.py b/workflows/common/html_builders.py new file mode 100644 index 000000000..7c31b3605 --- /dev/null +++ b/workflows/common/html_builders.py @@ -0,0 +1,162 @@ +""" +Shared HTML building utilities for CI dashboards. +""" + +import html +from typing import List, Dict, Any +from datetime import datetime, timezone + + +def build_toc(ocp_keys: List[str]) -> str: + """ + Build a Table of Contents (TOC) for OpenShift versions. + + Args: + ocp_keys: List of OCP version strings to include in TOC + + Returns: + HTML string containing the TOC + """ + toc_links = ", ".join( + f'{ocp_version}' for ocp_version in ocp_keys + ) + return f""" +
+
OpenShift Versions
+ {toc_links} +
+ """ + + +def build_notes(notes: List[str]) -> str: + """ + Build an HTML snippet with manual notes for an OCP version. + + Args: + notes: List of note strings to display + + Returns: + HTML string containing the notes section, or empty string if no notes + """ + if not notes: + return "" + + # Escape HTML in notes for safety + escaped_notes = [html.escape(note) for note in notes] + items = "\n".join(f'
  • {n}
  • ' for n in escaped_notes) + return f""" +
    Notes
    +
    + +
    + """ + + +def build_history_bar( + results: List[Dict[str, Any]], + title: str, + success_key: str = "test_status", + success_value: str = "SUCCESS" +) -> str: + """ + Build a history bar showing test result squares (success/failure/aborted). + + Args: + results: List of test result dictionaries + title: Title to display above the history bar + success_key: Key in result dict to check for status + success_value: Value that indicates success + + Returns: + HTML string containing the history bar + """ + if not results: + return "" + + # Sort by timestamp, most recent first + sorted_results = sorted( + results, key=lambda r: int(r.get("job_timestamp", 0)), reverse=True + ) + + leftmost_result = sorted_results[0] + last_date = datetime.fromtimestamp( + int(leftmost_result.get("job_timestamp", 0)), timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + + history_html = f""" +
    + {html.escape(title)} +
    +
    +
    + Last Job Date: {last_date} +
    + """ + + for result in sorted_results: + status = result.get(success_key, "Unknown").upper() + if status == success_value: + status_class = "history-success" + elif status == "FAILURE": + status_class = "history-failure" + else: + status_class = "history-aborted" + + timestamp_str = datetime.fromtimestamp( + int(result.get("job_timestamp", 0)), timezone.utc + ).strftime("%Y-%m-%d %H:%M:%S UTC") + + prow_url = result.get("prow_job_url", "#") + history_html += f""" +
    + + Status: {status} | Timestamp: {timestamp_str} + +
    + """ + + history_html += "
    " + return history_html + + +def build_last_updated_footer(timestamp: str = None) -> str: + """ + Build a footer showing when the dashboard was last updated. + + Args: + timestamp: Optional timestamp string. If not provided, uses current time. + + Returns: + HTML string containing the footer + """ + if timestamp is None: + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + + return f""" +
    + Last updated: {timestamp} UTC +
    + + + """ + + +def sanitize_id(text: str) -> str: + """ + Sanitize a string to be used as an HTML ID. + + Args: + text: String to sanitize + + Returns: + Sanitized string safe for use as HTML ID + """ + # Replace spaces and special characters with hyphens + sanitized = text.lower().replace(" ", "-").replace("_", "-") + # Remove any characters that aren't alphanumeric or hyphens + sanitized = "".join(c for c in sanitized if c.isalnum() or c == "-") + return sanitized + diff --git a/workflows/common/validation.py b/workflows/common/validation.py new file mode 100644 index 000000000..3d3a654ac --- /dev/null +++ b/workflows/common/validation.py @@ -0,0 +1,90 @@ +""" +Validation utilities for CI dashboards. +Shared validation logic for GPU Operator and Network Operator dashboards. +""" + +from typing import Dict, Any +import semver + + +def is_valid_ocp_version(version: str) -> bool: + """ + Check if a version string is a valid OpenShift version (not an infrastructure type). + + Args: + version: Version string to validate + + Returns: + True if valid OCP version, False otherwise + + Examples: + >>> is_valid_ocp_version("4.17.16") + True + >>> is_valid_ocp_version("doca4") + False + >>> is_valid_ocp_version("bare-metal") + False + """ + invalid_keys = ["doca4", "bare-metal", "hosted", "unknown"] + if version.lower() in invalid_keys: + return False + if not version or not version[0].isdigit(): + return False + if '.' not in version: + return False + parts = version.split('.') + if len(parts) < 2: + return False + try: + int(parts[0]) + int(parts[1]) + return True + except (ValueError, IndexError): + return False + + +def has_valid_semantic_versions(result: Dict[str, Any], ocp_key: str = "ocp_full_version", operator_key: str = "operator_version") -> bool: + """ + Check if both OCP and operator versions contain valid semantic versions. + + Args: + result: Test result dictionary containing version fields + ocp_key: Key name for OCP version in result dict + operator_key: Key name for operator version in result dict + + Returns: + True if both versions are valid semantic versions, False otherwise + """ + try: + ocp_version = result.get(ocp_key, "") + operator_version = result.get(operator_key, "") + + if not ocp_version or not operator_version: + return False + + # Parse OCP version (should be like "4.14.1") + semver.VersionInfo.parse(ocp_version) + + # Parse operator version (may have suffix like "23.9.0(bundle)" - extract version part) + operator_version_clean = operator_version.split("(")[0].strip() + semver.VersionInfo.parse(operator_version_clean) + + except (ValueError, TypeError): + return False + else: + return True + + +def is_infrastructure_type(value: str) -> bool: + """ + Check if a string is an infrastructure type rather than a version. + + Args: + value: String to check + + Returns: + True if it's an infrastructure type, False otherwise + """ + infrastructure_types = ["doca4", "bare-metal", "hosted", "unknown"] + return value.lower() in infrastructure_types + diff --git a/workflows/gpu_operator_dashboard/fetch_ci_data.py b/workflows/gpu_operator_dashboard/fetch_ci_data.py index d2e1f823b..b91be2157 100644 --- a/workflows/gpu_operator_dashboard/fetch_ci_data.py +++ b/workflows/gpu_operator_dashboard/fetch_ci_data.py @@ -11,6 +11,18 @@ import semver from workflows.common.utils import logger +from workflows.common.gcs_utils import ( + GCS_API_BASE_URL, + GCS_MAX_RESULTS_PER_REQUEST, + http_get_json, + fetch_gcs_file_content, + build_prow_job_url, + fetch_filtered_files, +) +from workflows.common.data_fetching import ( + int_or_none, + merge_job_history_links, +) # Constants for version field names @@ -22,13 +34,6 @@ STATUS_FAILURE = "FAILURE" STATUS_ABORTED = "ABORTED" - -# ============================================================================= -# Constants -# ============================================================================= - -GCS_API_BASE_URL = "https://storage.googleapis.com/storage/v1/b/test-platform-results/o" - # Regular expression to match test result paths. TEST_RESULT_PATH_REGEX = re.compile( r"pr-logs/pull/(?P[^/]+)/(?P\d+)/" @@ -37,37 +42,11 @@ r"(?P[^/]+)" ) -# Maximum number of results per GCS API request for pagination -GCS_MAX_RESULTS_PER_REQUEST = 1000 - # ============================================================================= # Data Fetching & JSON Update Functions # ============================================================================= -def http_get_json(url: str, params: Dict[str, Any] = None, headers: Dict[str, str] = None) -> Dict[str, Any]: - """Send an HTTP GET request and return the JSON response.""" - response = requests.get(url, params=params, headers=headers, timeout=30) - response.raise_for_status() - return response.json() - - -def fetch_gcs_file_content(file_path: str) -> str: - """Fetch the raw text content from a file in GCS.""" - logger.info(f"Fetching file content for {file_path}") - response = requests.get( - url=f"{GCS_API_BASE_URL}/{urllib.parse.quote_plus(file_path)}", - params={"alt": "media"}, - timeout=30, - ) - response.raise_for_status() - return response.content.decode("UTF-8") - - -def build_prow_job_url(finished_json_path: str) -> str: - directory_path = finished_json_path[:-len('/finished.json')] - return f"https://gcsweb-ci.apps.ci.l2s4.p1.openshiftapps.com/gcs/test-platform-results/{directory_path}" - # --- Pydantic Model and Domain Model for Test Results --- @@ -131,40 +110,6 @@ def has_exact_versions(self) -> bool: return True -def fetch_filtered_files(pr_number: str, glob_pattern: str) -> List[Dict[str, Any]]: - """Fetch files matching a specific glob pattern for a PR.""" - logger.info(f"Fetching files matching pattern: {glob_pattern}") - - params = { - "prefix": f"pr-logs/pull/rh-ecosystem-edge_nvidia-ci/{pr_number}/", - "alt": "json", - "matchGlob": glob_pattern, - "maxResults": str(GCS_MAX_RESULTS_PER_REQUEST), - "projection": "noAcl", - } - headers = {"Accept": "application/json"} - - all_items = [] - next_page_token = None - - # Handle pagination - while True: - if next_page_token: - params["pageToken"] = next_page_token - - response_data = http_get_json( - GCS_API_BASE_URL, params=params, headers=headers) - items = response_data.get("items", []) - all_items.extend(items) - - next_page_token = response_data.get("nextPageToken") - if not next_page_token: - break - - logger.info(f"Found {len(all_items)} files matching {glob_pattern}") - return all_items - - def fetch_pr_files(pr_number: str) -> Tuple[List[Dict[str, Any]], List[Dict[str, Any]], List[Dict[str, Any]]]: """Fetch all required file types for a PR using targeted filtering.""" logger.info(f"Fetching files for PR #{pr_number}") @@ -593,11 +538,10 @@ def merge_ocp_version_results( new_job_history_links = new_version_data.get("job_history_links", set()) existing_job_history_links = merged_version_data.get("job_history_links", []) - # Convert existing list back to set, then merge with new set - all_job_history_links = set(existing_job_history_links) - all_job_history_links.update(new_job_history_links) - # Convert back to sorted list for JSON serialization - merged_version_data["job_history_links"] = sorted(list(all_job_history_links)) + # Use common merge function + merged_version_data["job_history_links"] = merge_job_history_links( + new_job_history_links, existing_job_history_links + ) return merged_version_data @@ -628,20 +572,10 @@ def merge_and_save_results( logger.info(f"Results saved to {output_file}") - - # ============================================================================= # Main Workflow: Update JSON # ============================================================================= -def int_or_none(value: Optional[str]) -> Optional[int]: - """Convert string to int or None for unlimited.""" - if value is None: - return None - if value.lower() in ('none', 'unlimited'): - return None - return int(value) - def main() -> None: parser = argparse.ArgumentParser(description="Test Matrix Utility") diff --git a/workflows/gpu_operator_dashboard/generate_ci_dashboard.py b/workflows/gpu_operator_dashboard/generate_ci_dashboard.py index c55d6c819..6c52d4f04 100644 --- a/workflows/gpu_operator_dashboard/generate_ci_dashboard.py +++ b/workflows/gpu_operator_dashboard/generate_ci_dashboard.py @@ -8,41 +8,11 @@ from workflows.common.utils import logger from workflows.common.templates import load_template +from workflows.common.validation import has_valid_semantic_versions from workflows.gpu_operator_dashboard.fetch_ci_data import ( OCP_FULL_VERSION, GPU_OPERATOR_VERSION, STATUS_ABORTED) -def has_valid_semantic_versions(result: Dict[str, Any]) -> bool: - """ - Check if both ocp_full_version and gpu_operator_version contain valid semantic versions. - - Args: - result: Test result dictionary containing version fields - - Returns: - True if both versions are valid semantic versions, False otherwise - """ - try: - ocp_version = result.get(OCP_FULL_VERSION, "") - gpu_version = result.get(GPU_OPERATOR_VERSION, "") - - if not ocp_version or not gpu_version: - return False - - # Parse OCP version (should be like "4.14.1") - semver.VersionInfo.parse(ocp_version) - - # Parse GPU operator version (may have suffix like "23.9.0(bundle)" - extract version part) - gpu_version_clean = gpu_version.split("(")[0].strip() - semver.VersionInfo.parse(gpu_version_clean) - - except (ValueError, TypeError): - logger.warning(f"Invalid semantic version in result: ocp={result.get(OCP_FULL_VERSION)}, gpu={result.get(GPU_OPERATOR_VERSION)}") - return False - else: - return True - - def generate_test_matrix(ocp_data: Dict[str, Dict[str, Any]]) -> str: """ Build the final HTML report by: @@ -67,7 +37,7 @@ def generate_test_matrix(ocp_data: Dict[str, Dict[str, Any]]) -> str: for r in release_results: # Only include entries with valid semantic versions # Ignore ABORTED results for regular (non-bundle) results - if has_valid_semantic_versions(r) and r.get("test_status") != STATUS_ABORTED: + if has_valid_semantic_versions(r, OCP_FULL_VERSION, GPU_OPERATOR_VERSION) and r.get("test_status") != STATUS_ABORTED: regular_results.append(r) notes_html = build_notes(notes) table_rows_html = build_catalog_table_rows(regular_results) diff --git a/workflows/nno_dashboard/README.md b/workflows/nno_dashboard/README.md new file mode 100644 index 000000000..7a2dd8788 --- /dev/null +++ b/workflows/nno_dashboard/README.md @@ -0,0 +1,182 @@ +# NVIDIA Network Operator CI Dashboard + +This module generates an HTML dashboard displaying CI test results for NVIDIA Network Operator on Red Hat OpenShift. + +## Overview + +The dashboard fetches test results from OpenShift CI (Prow) stored in Google Cloud Storage and generates an interactive HTML page showing: + +- Test results organized by OpenShift version +- Multiple test flavors (infrastructure types, RDMA configurations, GPU tests) +- Success/failure status with links to detailed test logs +- Historical test data + +## Test Flavors + +Network Operator tests run across multiple configurations: + +### Infrastructure Types +- **DOCA4**: Tests on DOCA4 infrastructure +- **Bare Metal**: Tests on bare metal servers +- **Hosted**: Tests in hosted environments + +### Test Types +- **RDMA Legacy SR-IOV**: Legacy SR-IOV RDMA testing +- **RDMA Shared Device**: Shared device RDMA testing +- **RDMA SR-IOV**: SR-IOV with RDMA +- **E2E**: End-to-end integration tests + +### GPU Support +Tests can run with or without GPU: +- **with GPU**: Tests including GPU/GPUDirect functionality +- **(no suffix)**: Tests without GPU + +### Example Flavors +- `DOCA4 - RDMA Legacy SR-IOV` +- `Bare Metal - E2E` +- `Hosted - RDMA SR-IOV with GPU` +- `DOCA4 - RDMA Shared Device` + +## Data Structure + +The dashboard uses a JSON file with this structure: + +```json +{ + "4.17.16": { + "notes": [], + "bundle_tests": [], + "release_tests": [], + "job_history_links": [...], + "test_flavors": { + "DOCA4 - RDMA Legacy SR-IOV": { + "results": [ + { + "ocp_full_version": "4.17.16", + "operator_version": "25.4.0", + "test_status": "SUCCESS", + "prow_job_url": "https://...", + "job_timestamp": 1756406663, + "test_flavor": "DOCA4 - RDMA Legacy SR-IOV" + } + ], + "job_history_links": [...] + } + } + } +} +``` + +## Scripts + +### fetch_ci_data.py + +Fetches test results from GCS for a specific PR. + +```bash +python -m workflows.nno_dashboard.fetch_ci_data \ + --pr_number 67673 \ + --baseline_data_filepath output/network_operator_matrix.json \ + --merged_data_filepath output/network_operator_matrix.json +``` + +**What it does:** +1. Fetches `finished.json`, `ocp.version`, and `operator.version` files from GCS +2. Extracts test flavor from job name (infrastructure + test type + GPU) +3. Validates OpenShift versions (filters out infrastructure types) +4. Organizes results by OCP version and test flavor +5. Merges with existing data + +### generate_ci_dashboard.py + +Generates HTML dashboard from JSON data. + +```bash +python -m workflows.nno_dashboard.generate_ci_dashboard \ + --dashboard_data_filepath output/network_operator_matrix.json \ + --dashboard_html_filepath output/network_operator_matrix.html +``` + +**What it does:** +1. Loads JSON data +2. Filters valid OCP versions +3. Builds HTML sections for each test flavor +4. Groups results by OCP and operator versions +5. Creates clickable links with success/failure styling + +## Shared Utilities + +This module uses shared utilities from `workflows/common/`: + +- **gcs_utils**: GCS API access (fetch files, build URLs) +- **html_builders**: HTML generation (TOC, notes, footers) +- **data_structures**: `TestResult` dataclass and constants +- **templates**: Template loading utilities +- **utils**: Logging + +## Example Job Names + +Network Operator CI jobs follow this pattern: + +``` +pull-ci-rh-ecosystem-edge-nvidia-ci-main-{infrastructure}-nvidia-network-operator-{test-type} +``` + +Examples: +- `pull-ci-...-doca4-nvidia-network-operator-legacy-sriov-rdma` +- `pull-ci-...-bare-metal-nvidia-network-operator-bare-metal-e2e-doca4-latest` +- `pull-ci-...-hosted-nvidia-network-operator-sriov-rdma-gpu` + +## Dashboard Output + +The generated HTML includes: + +- **Table of Contents**: Quick navigation to OCP versions +- **OCP Version Sections**: One per OpenShift version +- **Test Flavor Tables**: One table per flavor showing: + - OpenShift version + - Network Operator version (clickable to test logs) + - Color-coded success/failure status + +## Development + +### Adding New Test Flavors + +1. Update `extract_test_flavor_from_job_name()` in `fetch_ci_data.py` +2. Add pattern matching for the new flavor +3. Test with actual PR data + +### Modifying HTML Layout + +1. Edit templates in `templates/`: + - `header.html`: Page header and scripts + - `main_table.html`: OCP version container + - `test_flavor_section.html`: Individual flavor tables +2. Use `{placeholders}` for dynamic content +3. Regenerate dashboard to test changes + +## Integration + +This module is called by the GitHub Actions workflow in `.github/workflows/generate_matrix_page.yaml`: + +```yaml +- name: Fetch Network Operator CI Data + run: | + python -m workflows.nno_dashboard.fetch_ci_data \ + --pr_number "${{ steps.determine_pr.outputs.PR_NUMBER }}" \ + --baseline_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" \ + --merged_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" + +- name: Generate Network Operator HTML Dashboard + run: | + python -m workflows.nno_dashboard.generate_ci_dashboard \ + --dashboard_data_filepath "${{ env.NNO_DASHBOARD_DATA_FILEPATH }}" \ + --dashboard_html_filepath "${{ env.NNO_DASHBOARD_HTML_FILEPATH }}" +``` + +## See Also + +- [GPU Operator Dashboard](../gpu_operator_dashboard/) - Similar dashboard for GPU Operator +- [Common Utilities](../common/) - Shared code across dashboards +- [GitHub Actions Workflow](../../.github/workflows/generate_matrix_page.yaml) - CI automation + diff --git a/workflows/nno_dashboard/__init__.py b/workflows/nno_dashboard/__init__.py new file mode 100644 index 000000000..feb024a42 --- /dev/null +++ b/workflows/nno_dashboard/__init__.py @@ -0,0 +1,3 @@ +""" +NVIDIA Network Operator CI Dashboard generation tools. +""" diff --git a/workflows/nno_dashboard/fetch_ci_data.py b/workflows/nno_dashboard/fetch_ci_data.py new file mode 100644 index 000000000..5b50e1e66 --- /dev/null +++ b/workflows/nno_dashboard/fetch_ci_data.py @@ -0,0 +1,378 @@ +#!/usr/bin/env python3 +""" +Fetch Network Operator CI data from OpenShift CI (Prow) and store results. + +This script fetches test results for NVIDIA Network Operator from Google Cloud Storage +where OpenShift CI stores Prow job artifacts. +""" + +import argparse +import json +import re +from typing import Dict, Any, List, Tuple + +from workflows.common import ( + logger, + fetch_gcs_file_content, + fetch_filtered_files, + build_prow_job_url, + build_job_history_url, + TestResult, + OCP_FULL_VERSION, + OPERATOR_VERSION, + STATUS_SUCCESS, + STATUS_FAILURE, + STATUS_ABORTED, + is_valid_ocp_version, +) + + +# Regular expression to match Network Operator test result paths +# Example: pr-logs/pull/openshift_release/67673/rehearse-67673-pull-ci-rh-ecosystem-edge-nvidia-ci-main-doca4-nvidia-network-operator-legacy-sriov-rdma/1961127149603655680 +NNO_TEST_PATH_REGEX = re.compile( + r"pr-logs/pull/(?P[^/]+)/(?P\d+)/" + r"(?P(?:rehearse-\d+-)?pull-ci-rh-ecosystem-edge-nvidia-ci-main-" + r"(?P[^-]+)-nvidia-network-operator-(?P.+))/" + r"(?P[^/]+)" +) + + +def extract_test_flavor_from_job_name(job_name: str) -> str: + """ + Extract test flavor from Network Operator job name. + + Test flavors combine: + - Infrastructure: DOCA4, Bare Metal, Hosted + - Test type: Legacy SR-IOV RDMA, Shared Device RDMA, E2E, etc. + - GPU presence: with GPU or without + + Examples: + - "pull-ci-...-doca4-nvidia-network-operator-legacy-sriov-rdma" -> "DOCA4 - RDMA Legacy SR-IOV" + - "pull-ci-...-bare-metal-nvidia-network-operator-bare-metal-e2e-doca4-latest" -> "Bare Metal - E2E" + - "pull-ci-...-hosted-nvidia-network-operator-sriov-rdma-gpu" -> "Hosted - RDMA SR-IOV with GPU" + + Args: + job_name: Full job name from Prow + + Returns: + Human-readable test flavor string + """ + job_lower = job_name.lower() + + # Identify infrastructure type + infrastructure = None + if "doca4" in job_lower and "bare-metal" not in job_lower: + infrastructure = "DOCA4" + elif "bare-metal" in job_lower: + infrastructure = "Bare Metal" + elif "hosted" in job_lower: + infrastructure = "Hosted" + + # Identify RDMA/test type + rdma_type = None + if "legacy-sriov-rdma" in job_lower or "rdma-legacy-sriov" in job_lower: + rdma_type = "RDMA Legacy SR-IOV" + elif "shared-device-rdma" in job_lower or "rdma-shared-dev" in job_lower: + rdma_type = "RDMA Shared Device" + elif "sriov" in job_lower and "rdma" in job_lower: + rdma_type = "RDMA SR-IOV" + elif "rdma" in job_lower: + rdma_type = "RDMA" + + # Identify test type if not RDMA + test_type = None + if not rdma_type: + if "bare-metal-e2e" in job_lower: + test_type = "E2E" + elif "nvidia-network-operator-e2e" in job_lower or "-e2e" in job_lower: + test_type = "E2E" + + # Check for GPU + has_gpu = "gpu" in job_lower or "gpudirect" in job_lower + + # Build flavor string + parts = [] + + if infrastructure: + parts.append(infrastructure) + + if rdma_type: + if has_gpu: + parts.append(f"{rdma_type} with GPU") + else: + parts.append(rdma_type) + elif test_type: + if has_gpu: + parts.append(f"{test_type} with GPU") + else: + parts.append(test_type) + elif has_gpu: + parts.append("with GPU") + + if not parts: + if infrastructure: + return infrastructure + return "Standard" + + return " - ".join(parts) + + +def process_single_nno_build( + pr_number: str, + job_name: str, + build_id: str, + finished_file: Dict[str, Any], + ocp_file: str, + operator_file: str +) -> TestResult: + """ + Process a single Network Operator build and create a TestResult. + + Args: + pr_number: PR number + job_name: Job name + build_id: Build ID + finished_file: Parsed finished.json dict + ocp_file: ocp.version content string + operator_file: operator.version content string + + Returns: + TestResult object containing the build information + """ + from workflows.common.data_fetching import extract_test_status, extract_timestamp, determine_repo_from_job_name + + # Extract test flavor from job name + test_flavor = extract_test_flavor_from_job_name(job_name) + + # Get OCP version + ocp_version = ocp_file if ocp_file else "Unknown" + + # Get operator version + operator_version = operator_file if operator_file else "Unknown" + + # Get test status using common function + test_status = extract_test_status(finished_file, STATUS_SUCCESS, STATUS_FAILURE, STATUS_ABORTED) + + # Get timestamp using common function + timestamp = extract_timestamp(finished_file) + + # Build Prow URL (construct the finished.json path) + repo = determine_repo_from_job_name(job_name) + finished_path = f"pr-logs/pull/{repo}/{pr_number}/{job_name}/{build_id}/finished.json" + prow_url = build_prow_job_url(finished_path) + + return TestResult( + ocp_full_version=ocp_version, + operator_version=operator_version, + test_status=test_status, + prow_job_url=prow_url, + job_timestamp=str(timestamp), + test_flavor=test_flavor + ) + + +def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any]]) -> None: + from workflows.common.data_fetching import build_version_lookups + + logger.info(f"Fetching Network Operator test data for PR #{pr_number}") + + # Fetch all relevant files + finished_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/finished.json") + ocp_version_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/ocp.version") + operator_version_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/operator.version") + + logger.info(f"Found {len(finished_files)} finished.json files") + + # Build lookup dictionaries using common function + version_lookups = build_version_lookups([ + ("ocp", ocp_version_files), + ("operator", operator_version_files) + ]) + ocp_lookup = version_lookups["ocp"] + operator_lookup = version_lookups["operator"] + + # Process each finished.json file + processed_count = 0 + for finished_item in finished_files: + finished_path = finished_item["name"] + build_dir = finished_path.rsplit("/", 1)[0] + + # Parse the path to extract job information + match = NNO_TEST_PATH_REGEX.search(finished_path) + if not match: + logger.warning(f"Could not parse path: {finished_path}") + continue + + job_name = match.group("job_name") + build_id = match.group("build_id") + + logger.info(f"Processing build {build_id} for job {job_name}") + + # Fetch finished.json content + try: + finished_content_str = fetch_gcs_file_content(finished_path) + finished_json = json.loads(finished_content_str) + except Exception as e: + logger.warning(f"Failed to fetch/parse finished.json from {finished_path}: {e}") + continue + + # Get OCP and operator versions + ocp_content = ocp_lookup.get(build_dir) + operator_content = operator_lookup.get(build_dir) + + if not ocp_content: + logger.warning(f"Missing ocp.version for {build_dir}") + continue + if not operator_content: + logger.warning(f"Missing operator.version for {build_dir}") + continue + + # Create TestResult + try: + result = process_single_nno_build( + pr_number, + job_name, + build_id, + finished_json, + ocp_content, + operator_content + ) + except Exception as e: + logger.error(f"Failed to process build {build_id}: {e}") + continue + + # Validate OCP version + if not is_valid_ocp_version(result.ocp_full_version): + logger.warning(f"Skipping result - invalid OCP version '{result.ocp_full_version}' for job {job_name}") + continue + + # Initialize OCP version structure if needed + ocp_version = result.ocp_full_version + if ocp_version not in results_by_ocp: + results_by_ocp[ocp_version] = { + "notes": [], + "bundle_tests": [], + "release_tests": [], + "job_history_links": set(), + "test_flavors": {} + } + + # Add job history link + job_history_url = build_job_history_url(job_name) + results_by_ocp[ocp_version]["job_history_links"].add(job_history_url) + + # Add result to appropriate test flavor + test_flavor = result.test_flavor + if test_flavor not in results_by_ocp[ocp_version]["test_flavors"]: + results_by_ocp[ocp_version]["test_flavors"][test_flavor] = { + "results": [], + "job_history_links": set() + } + + results_by_ocp[ocp_version]["test_flavors"][test_flavor]["results"].append(result.to_dict()) + results_by_ocp[ocp_version]["test_flavors"][test_flavor]["job_history_links"].add(job_history_url) + + processed_count += 1 + + logger.info(f"Processed {processed_count} builds for PR #{pr_number}") + + +def merge_and_save_results( + new_results: Dict[str, Dict[str, Any]], + output_filepath: str, + existing_results: Dict[str, Dict[str, Any]] = None +) -> None: + """ + Merge new results with existing results and save to file. + + Args: + new_results: New test results to add + output_filepath: Path to save merged results + existing_results: Existing results to merge with (optional) + """ + from workflows.common.data_fetching import merge_job_history_links, convert_sets_to_lists_recursive + + if existing_results is None: + existing_results = {} + + # Merge results + merged = dict(existing_results) + for ocp_version, version_data in new_results.items(): + if ocp_version not in merged: + merged[ocp_version] = version_data + # Convert sets to lists for JSON serialization + merged[ocp_version]["job_history_links"] = merge_job_history_links( + version_data.get("job_history_links", set()), [] + ) + for flavor, flavor_data in merged[ocp_version].get("test_flavors", {}).items(): + flavor_data["job_history_links"] = merge_job_history_links( + flavor_data.get("job_history_links", set()), [] + ) + else: + # Merge test flavors + existing_flavors = merged[ocp_version].get("test_flavors", {}) + new_flavors = version_data.get("test_flavors", {}) + + for flavor, flavor_data in new_flavors.items(): + if flavor not in existing_flavors: + existing_flavors[flavor] = flavor_data + existing_flavors[flavor]["job_history_links"] = merge_job_history_links( + flavor_data.get("job_history_links", set()), [] + ) + else: + # Merge results + existing_flavors[flavor]["results"].extend(flavor_data["results"]) + + # Merge job history links using common function + existing_flavors[flavor]["job_history_links"] = merge_job_history_links( + flavor_data.get("job_history_links", set()), + existing_flavors[flavor].get("job_history_links", []) + ) + + merged[ocp_version]["test_flavors"] = existing_flavors + + # Merge global job history links using common function + merged[ocp_version]["job_history_links"] = merge_job_history_links( + version_data.get("job_history_links", set()), + merged[ocp_version].get("job_history_links", []) + ) + + # Save to file + with open(output_filepath, 'w') as f: + json.dump(merged, f, indent=2) + + logger.info(f"Saved merged results to {output_filepath}") + + +def main() -> None: + """Main entry point.""" + parser = argparse.ArgumentParser(description="Fetch Network Operator CI data") + parser.add_argument("--pr_number", required=True, help="PR number to process") + parser.add_argument("--baseline_data_filepath", help="Path to existing JSON data") + parser.add_argument("--merged_data_filepath", required=True, help="Path to save merged data") + + args = parser.parse_args() + + # Load existing data if available + existing_data = {} + if args.baseline_data_filepath: + try: + with open(args.baseline_data_filepath, 'r') as f: + existing_data = json.load(f) + logger.info(f"Loaded existing data from {args.baseline_data_filepath}") + except FileNotFoundError: + logger.info("No existing data file found, starting fresh") + except json.JSONDecodeError as e: + logger.warning(f"Failed to parse existing data: {e}, starting fresh") + + # Fetch new data + new_data = {} + process_tests_for_pr(args.pr_number, new_data) + + # Merge and save + merge_and_save_results(new_data, args.merged_data_filepath, existing_data) + + +if __name__ == "__main__": + main() + diff --git a/workflows/nno_dashboard/generate_ci_dashboard.py b/workflows/nno_dashboard/generate_ci_dashboard.py new file mode 100644 index 000000000..52b34c8eb --- /dev/null +++ b/workflows/nno_dashboard/generate_ci_dashboard.py @@ -0,0 +1,342 @@ +#!/usr/bin/env python3 +""" +Generate Network Operator HTML dashboard from JSON data. +Matrix-style layout organized by Network Operator version. +""" + +import argparse +import json +import semver +from typing import Dict, List, Any, Set +from datetime import datetime, timezone +from collections import defaultdict + +from workflows.common import ( + logger, + load_template, + OCP_FULL_VERSION, + OPERATOR_VERSION, + is_valid_ocp_version, + sanitize_id, +) + + +# Map test flavors from job names to display columns +FLAVOR_COLUMN_MAP = { + "DOCA4 - RDMA Legacy SR-IOV": "Legacy SR-IOV Ethernet", + "Bare Metal - RDMA Legacy SR-IOV": "Legacy SR-IOV Ethernet", + "Hosted - RDMA Legacy SR-IOV": "Legacy SR-IOV Ethernet", + + "DOCA4 - RDMA Legacy SR-IOV with GPU": "Legacy SR-IOV Ethernet + GPU Direct", + "Bare Metal - RDMA Legacy SR-IOV with GPU": "Legacy SR-IOV Ethernet + GPU Direct", + + "DOCA4 - RDMA": "Shared InfiniBand", + "Bare Metal - RDMA": "Shared InfiniBand", + "Hosted - RDMA": "Shared InfiniBand", + + "DOCA4 - RDMA with GPU": "Shared InfiniBand + GPU Direct", + "Bare Metal - RDMA with GPU": "Shared InfiniBand + GPU Direct", + + "DOCA4 - RDMA Shared Device": "Shared Ethernet", + "Bare Metal - RDMA Shared Device": "Shared Ethernet", + "Hosted - RDMA Shared Device": "Shared Ethernet", + + "DOCA4 - RDMA Shared Device with GPU": "Shared Ethernet + GPU Direct", + "Bare Metal - RDMA Shared Device with GPU": "Shared Ethernet + GPU Direct", + + "DOCA4 - E2E": "Legacy SR-IOV Ethernet", + "Bare Metal - E2E": "Legacy SR-IOV Ethernet", + "Hosted - E2E": "Legacy SR-IOV Ethernet", +} + +# Column order for the matrix table +COLUMN_ORDER = [ + "GPU Operator", + "Shared Ethernet", + "Shared Ethernet + GPU Direct", + "Shared InfiniBand", + "Shared InfiniBand + GPU Direct", + "Legacy SR-IOV Ethernet", + "Legacy SR-IOV Ethernet + GPU Direct", +] + + +def restructure_data_by_nno_version(ocp_data: Dict[str, Dict[str, Any]]) -> Dict[str, Dict[str, Any]]: + """ + Restructure data from OCP-centric to NNO-centric. + + Input structure (OCP-centric): + { + "4.17.16": { + "test_flavors": { + "DOCA4 - RDMA Legacy SR-IOV": { + "results": [{operator_version: "25.4.0", ...}] + } + } + } + } + + Output structure (NNO-centric): + { + "25.4.0": { + "ocp_versions": { + "4.17": { + "gpu_operators": { + "25.10.0": { + "Legacy SR-IOV Ethernet": { + "status": "SUCCESS", + "url": "...", + "hardware": "ConnectX-5 Ex" + } + } + } + } + } + } + } + """ + nno_data = defaultdict(lambda: {"ocp_versions": defaultdict(lambda: {"gpu_operators": defaultdict(dict)})}) + + for ocp_full, ocp_info in ocp_data.items(): + if not is_valid_ocp_version(ocp_full): + continue + + # Get major.minor OCP version (e.g., "4.17.16" -> "4.17") + ocp_parts = ocp_full.split('.') + ocp_major_minor = f"{ocp_parts[0]}.{ocp_parts[1]}" + + test_flavors = ocp_info.get("test_flavors", {}) + + for flavor_name, flavor_data in test_flavors.items(): + # Map flavor to column + column = FLAVOR_COLUMN_MAP.get(flavor_name, "Other") + + for result in flavor_data.get("results", []): + nno_version = result.get("operator_version") or result.get("gpu_operator_version", "Unknown") + gpu_operator_version = "master-latest" # TODO: Extract from test data if available + + status = result.get("test_status", "UNKNOWN") + url = result.get("prow_job_url", "#") + + # Store result + if column not in nno_data[nno_version]["ocp_versions"][ocp_major_minor]["gpu_operators"][gpu_operator_version]: + nno_data[nno_version]["ocp_versions"][ocp_major_minor]["gpu_operators"][gpu_operator_version][column] = { + "status": status, + "url": url, + "hardware": "ConnectX-6 Dx" # TODO: Extract from test data if available + } + + return dict(nno_data) + + +def build_matrix_table(ocp_version: str, gpu_operators_data: Dict[str, Dict[str, Any]]) -> str: + """ + Build a matrix table for a specific OCP version. + Only includes columns that have actual data (removes empty columns). + + Args: + ocp_version: OCP version (e.g., "4.20") + gpu_operators_data: Dictionary of GPU operator versions and their test results + + Returns: + HTML table string + """ + if not gpu_operators_data: + return "" + + # Sort GPU operator versions + sorted_gpu_ops = sorted(gpu_operators_data.keys()) + + # Determine which columns have data (not all empty) + columns_with_data = set() + for gpu_op_version in sorted_gpu_ops: + test_results = gpu_operators_data[gpu_op_version] + for column in COLUMN_ORDER[1:]: # Skip "GPU Operator" column + if column in test_results: + columns_with_data.add(column) + + # Build list of columns to display (in order) + active_columns = ["GPU Operator"] # Always include first column + for column in COLUMN_ORDER[1:]: + if column in columns_with_data: + active_columns.append(column) + + # If no data columns, don't build table + if len(active_columns) == 1: + return "" + + # Build table header + table_html = f""" + + + + +""" + + for column in active_columns: + table_html += f" \n" + + table_html += """ + + +""" + + # Build table rows (one per GPU operator version) + for gpu_op_version in sorted_gpu_ops: + test_results = gpu_operators_data[gpu_op_version] + + table_html += f""" + +""" + + # Add cells for each active test flavor column + for column in active_columns[1:]: # Skip "GPU Operator" column + if column in test_results: + result = test_results[column] + status = result.get("status", "UNKNOWN") + url = result.get("url", "#") + hardware = result.get("hardware", "") + + if status == "SUCCESS": + icon = "✓" + css_class = "success-link" + elif status == "FAILURE": + icon = "✗" + css_class = "failed-link" + else: + icon = "⚬" + css_class = "pending" + + if status in ["SUCCESS", "FAILURE"]: + table_html += f' \n' + else: + # This shouldn't happen since we filtered columns, but keep as fallback + table_html += ' \n' + + table_html += " \n" + + table_html += """ +
    {column}
    {gpu_op_version}{icon}' + else: + table_html += f' {icon}' + + if hardware: + table_html += f'
    {hardware}' + table_html += '
    +""" + + return table_html + + +def generate_test_matrix(ocp_data: Dict[str, Dict[str, Any]], templates_dir: str) -> str: + """ + Generate the complete HTML dashboard. + + Args: + ocp_data: Dictionary of OCP versions and their test data (OCP-centric format) + templates_dir: Path to templates directory + + Returns: + Complete HTML string + """ + # Load header template + header_template = load_template("header.html", templates_dir) + html_content = header_template + + # Restructure data to be NNO-centric + nno_data = restructure_data_by_nno_version(ocp_data) + + if not nno_data: + html_content += "

    No valid test data found.

    " + html_content += """ +
    + Last updated: """ + datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + """ UTC +
    + +""" + return html_content + + # Sort NNO versions + sorted_nno_versions = sorted(nno_data.keys(), reverse=True) + + # Build TOC + toc_links = [] + for nno_version in sorted_nno_versions: + version_id = sanitize_id(nno_version) + toc_links.append(f'{nno_version}') + + html_content += f""" +
    +
    Network Operator Versions
    + {", ".join(toc_links)} +
    +""" + + # Build sections for each NNO version + for nno_version in sorted_nno_versions: + nno_info = nno_data[nno_version] + version_id = sanitize_id(nno_version) + + html_content += f""" +
    +
    + Network Operator {nno_version} +
    +""" + + # Sort OCP versions + ocp_versions = nno_info.get("ocp_versions", {}) + sorted_ocp_versions = sorted(ocp_versions.keys(), reverse=True) + + # Build matrix table for each OCP version + for ocp_version in sorted_ocp_versions: + gpu_operators_data = ocp_versions[ocp_version].get("gpu_operators", {}) + table_html = build_matrix_table(ocp_version, gpu_operators_data) + html_content += table_html + + html_content += "
    \n\n" + + # Add footer + timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%d %H:%M:%S") + html_content += f"""
    + Last updated: {timestamp} UTC +
    + + +""" + + return html_content + + +def main(): + """Main entry point.""" + parser = argparse.ArgumentParser(description="Generate Network Operator CI Dashboard HTML") + parser.add_argument("--dashboard_data_filepath", required=True, help="Path to JSON data file") + parser.add_argument("--dashboard_html_filepath", required=True, help="Path to output HTML file") + + args = parser.parse_args() + + # Load JSON data + logger.info(f"Loading data from {args.dashboard_data_filepath}") + with open(args.dashboard_data_filepath, 'r') as f: + ocp_data = json.load(f) + + # Get templates directory + import os + script_dir = os.path.dirname(os.path.abspath(__file__)) + templates_dir = os.path.join(script_dir, "templates") + + # Generate HTML + logger.info("Generating HTML dashboard") + html_content = generate_test_matrix(ocp_data, templates_dir) + + # Save HTML + logger.info(f"Saving HTML to {args.dashboard_html_filepath}") + with open(args.dashboard_html_filepath, 'w') as f: + f.write(html_content) + + logger.info("Dashboard generation complete") + + +if __name__ == "__main__": + main() diff --git a/workflows/nno_dashboard/requirements.txt b/workflows/nno_dashboard/requirements.txt new file mode 100644 index 000000000..826f7b1da --- /dev/null +++ b/workflows/nno_dashboard/requirements.txt @@ -0,0 +1,4 @@ +requests>=2.31.0 +semver>=3.0.0 +pydantic>=2.0.0 + diff --git a/workflows/nno_dashboard/templates/header.html b/workflows/nno_dashboard/templates/header.html new file mode 100644 index 000000000..2df235b42 --- /dev/null +++ b/workflows/nno_dashboard/templates/header.html @@ -0,0 +1,12 @@ + + + + + + + Test Matrix: NVIDIA Network Operator on Red Hat OpenShift + + + + +

    Test Matrix: NVIDIA Network Operator on Red Hat OpenShift

    diff --git a/workflows/nno_dashboard/templates/main_table.html b/workflows/nno_dashboard/templates/main_table.html new file mode 100644 index 000000000..61a430d0c --- /dev/null +++ b/workflows/nno_dashboard/templates/main_table.html @@ -0,0 +1,9 @@ + +
    +
    + OpenShift {ocp_key} +
    + {notes} + {test_flavors_sections} +
    + diff --git a/workflows/nno_dashboard/templates/test_flavor_section.html b/workflows/nno_dashboard/templates/test_flavor_section.html new file mode 100644 index 000000000..f4fe7c649 --- /dev/null +++ b/workflows/nno_dashboard/templates/test_flavor_section.html @@ -0,0 +1,15 @@ +
    + + + + + + + + + + {flavor_table_rows} + +
    OpenShiftNVIDIA Network Operator
    +
    + From 66a9a576fa4e4057230974fff0b64bd4fa67731a Mon Sep 17 00:00:00 2001 From: Guygored Date: Sun, 7 Dec 2025 10:36:08 +0200 Subject: [PATCH 2/5] fix: Network Operator dashboard data fetching Critical fixes for NNO dashboard data collection: 1. Fixed file structure detection: - Network Operator jobs have finished.json at build root (not in artifacts/) - Fetch all finished.json files and filter for 'nvidia-network-operator' in path - Ensures finished.json is at correct location 2. Added robust version file detection: - Try multiple possible artifact paths for ocp.version and operator.version - Patterns: **/network-operator-e2e/, **/artifacts/, **/nvidia-network-operator*/ - Filter results to only include Network Operator jobs - Handles various test configurations (DOCA4, bare-metal, hosted, etc.) 3. Improved file filtering: - Filter out nested artifacts to avoid duplicates - Verify files are in expected locations These changes fix the issue where GitHub Actions was finding 0 Network Operator files despite PR #67673 containing Network Operator jobs. Related to: PR #67673 testing and dashboard generation --- .gitignore | 2 + workflows/nno_dashboard/fetch_ci_data.py | 48 +++++++++++++++++-- .../output/network_operator_matrix.json | 1 + 3 files changed, 47 insertions(+), 4 deletions(-) create mode 100644 workflows/test_matrix_dashboard/output/network_operator_matrix.json diff --git a/.gitignore b/.gitignore index 06cf92b78..ac23b99e1 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ __pycache__/ venv/ *.pyc +.DS_Store +.vscode/ \ No newline at end of file diff --git a/workflows/nno_dashboard/fetch_ci_data.py b/workflows/nno_dashboard/fetch_ci_data.py index 5b50e1e66..92a9c1ec8 100644 --- a/workflows/nno_dashboard/fetch_ci_data.py +++ b/workflows/nno_dashboard/fetch_ci_data.py @@ -176,10 +176,50 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any logger.info(f"Fetching Network Operator test data for PR #{pr_number}") - # Fetch all relevant files - finished_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/finished.json") - ocp_version_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/ocp.version") - operator_version_files = fetch_filtered_files(pr_number, "**/nvidia-network-operator*/operator.version") + # Fetch all finished.json files, then filter for network operator jobs + # Network operator finished.json files are at the build root, so we need to: + # 1. Fetch all finished.json files + # 2. Filter for paths containing "nvidia-network-operator" in the job name + all_finished_files = fetch_filtered_files(pr_number, "**/finished.json") + + # Filter for Network Operator jobs by checking if job name contains "nvidia-network-operator" + finished_files = [] + for file_item in all_finished_files: + path = file_item.get("name", "") + # Check if this is a network operator job by looking for the pattern in the path + if "nvidia-network-operator" in path and path.endswith("/finished.json"): + # Additional check: must be at build root, not nested in artifacts + # Path should look like: .../rehearse-X-...-nvidia-network-operator-.../BUILD_ID/finished.json + if path.count("/finished.json") == 1 and "/artifacts/" not in path.split("/finished.json")[0]: + finished_files.append(file_item) + + # Now fetch version files - try multiple possible artifact paths + # Network Operator artifacts can be in different locations depending on test type + ocp_version_files = [] + operator_version_files = [] + + # Try common artifact patterns + version_patterns = [ + "**/network-operator-e2e/ocp.version", + "**/artifacts/ocp.version", + "**/nvidia-network-operator*/ocp.version", + ] + + for pattern in version_patterns: + files = fetch_filtered_files(pr_number, pattern) + for file_item in files: + path = file_item.get("name", "") + # Only include if it's part of a network operator job + if "nvidia-network-operator" in path: + ocp_version_files.append(file_item) + + for pattern in version_patterns: + operator_pattern = pattern.replace("ocp.version", "operator.version") + files = fetch_filtered_files(pr_number, operator_pattern) + for file_item in files: + path = file_item.get("name", "") + if "nvidia-network-operator" in path: + operator_version_files.append(file_item) logger.info(f"Found {len(finished_files)} finished.json files") diff --git a/workflows/test_matrix_dashboard/output/network_operator_matrix.json b/workflows/test_matrix_dashboard/output/network_operator_matrix.json new file mode 100644 index 000000000..0967ef424 --- /dev/null +++ b/workflows/test_matrix_dashboard/output/network_operator_matrix.json @@ -0,0 +1 @@ +{} From beb982738d2e99da5b44061cd9610dfbd7475f36 Mon Sep 17 00:00:00 2001 From: Guygored Date: Sun, 7 Dec 2025 10:53:07 +0200 Subject: [PATCH 3/5] fix: Network Operator version file lookup key mismatch The version files were being fetched correctly but the lookup dictionary keys didn't match the build directory keys used during processing. Problem: - Version files are deeply nested: .../BUILD_ID/artifacts/.../ocp.version - Was using parent directory as key (just the artifacts dir) - Build processing was looking up by full build path - Result: 'Missing ocp.version' warnings despite files being found Solution: - Extract BUILD_ID from version file paths using regex - Build lookup dictionary with full build path as key: pr-logs/pull/{repo}/{pr}/{job_name}/{build_id} - Use same key format when looking up versions during processing - Now keys match and versions are found! This fixes the issue where GitHub Actions found version files but processed 0 builds due to lookup key mismatch. --- workflows/nno_dashboard/fetch_ci_data.py | 49 +++++++++++++++++++----- 1 file changed, 40 insertions(+), 9 deletions(-) diff --git a/workflows/nno_dashboard/fetch_ci_data.py b/workflows/nno_dashboard/fetch_ci_data.py index 92a9c1ec8..d6149b852 100644 --- a/workflows/nno_dashboard/fetch_ci_data.py +++ b/workflows/nno_dashboard/fetch_ci_data.py @@ -223,19 +223,45 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any logger.info(f"Found {len(finished_files)} finished.json files") - # Build lookup dictionaries using common function - version_lookups = build_version_lookups([ - ("ocp", ocp_version_files), - ("operator", operator_version_files) - ]) - ocp_lookup = version_lookups["ocp"] - operator_lookup = version_lookups["operator"] + # Build lookup dictionaries by BUILD_ID (not by parent directory) + # Network Operator version files are deeply nested, so we need to extract the build ID from the path + ocp_lookup = {} + operator_lookup = {} + + for file_item in ocp_version_files: + path = file_item["name"] + # Extract build ID from path using regex + match = NNO_TEST_PATH_REGEX.search(path) + if match: + build_id = match.group("build_id") + pr_num = match.group("pr_number") + job_name = match.group("job_name") + # Use the full build directory path as key + build_dir_key = f"pr-logs/pull/{match.group('repo')}/{pr_num}/{job_name}/{build_id}" + try: + content = fetch_gcs_file_content(path) + ocp_lookup[build_dir_key] = content.strip() + except Exception as e: + logger.warning(f"Failed to fetch OCP version from {path}: {e}") + + for file_item in operator_version_files: + path = file_item["name"] + match = NNO_TEST_PATH_REGEX.search(path) + if match: + build_id = match.group("build_id") + pr_num = match.group("pr_number") + job_name = match.group("job_name") + build_dir_key = f"pr-logs/pull/{match.group('repo')}/{pr_num}/{job_name}/{build_id}" + try: + content = fetch_gcs_file_content(path) + operator_lookup[build_dir_key] = content.strip() + except Exception as e: + logger.warning(f"Failed to fetch operator version from {path}: {e}") # Process each finished.json file processed_count = 0 for finished_item in finished_files: finished_path = finished_item["name"] - build_dir = finished_path.rsplit("/", 1)[0] # Parse the path to extract job information match = NNO_TEST_PATH_REGEX.search(finished_path) @@ -245,6 +271,11 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any job_name = match.group("job_name") build_id = match.group("build_id") + pr_num = match.group("pr_number") + repo = match.group("repo") + + # Build the lookup key (same format as we used when building the lookup dictionaries) + build_dir = f"pr-logs/pull/{repo}/{pr_num}/{job_name}/{build_id}" logger.info(f"Processing build {build_id} for job {job_name}") @@ -256,7 +287,7 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any logger.warning(f"Failed to fetch/parse finished.json from {finished_path}: {e}") continue - # Get OCP and operator versions + # Get OCP and operator versions using the build directory key ocp_content = ocp_lookup.get(build_dir) operator_content = operator_lookup.get(build_dir) From 677c9214d4aefbd2c4678e4647e4de6bfb751504 Mon Sep 17 00:00:00 2001 From: Guygored Date: Sun, 7 Dec 2025 11:11:18 +0200 Subject: [PATCH 4/5] debug: Add logging to troubleshoot version lookup keys Added debug logging to show: - The build_dir key being used for lookup - Available keys in ocp_lookup dictionary - Available keys in operator_lookup dictionary This will help identify why lookups are failing. --- workflows/nno_dashboard/fetch_ci_data.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/workflows/nno_dashboard/fetch_ci_data.py b/workflows/nno_dashboard/fetch_ci_data.py index d6149b852..7b18e84e7 100644 --- a/workflows/nno_dashboard/fetch_ci_data.py +++ b/workflows/nno_dashboard/fetch_ci_data.py @@ -278,6 +278,9 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any build_dir = f"pr-logs/pull/{repo}/{pr_num}/{job_name}/{build_id}" logger.info(f"Processing build {build_id} for job {job_name}") + logger.debug(f"Build dir key: {build_dir}") + logger.debug(f"Available OCP keys: {list(ocp_lookup.keys())[:3] if ocp_lookup else 'EMPTY'}") + logger.debug(f"Available Operator keys: {list(operator_lookup.keys())[:3] if operator_lookup else 'EMPTY'}") # Fetch finished.json content try: From a38ffe2885bb21eafac9d65e475db326eab1b4a9 Mon Sep 17 00:00:00 2001 From: Guygored Date: Sun, 7 Dec 2025 11:37:42 +0200 Subject: [PATCH 5/5] Fix NNO data fetching: Extract build_id from correct path position The issue was in how we extracted the build_id from version file paths: - Version files are deeply nested: .../build_id/artifacts/.../ocp.version - Using regex on the full path matched 'artifacts' as build_id - Solution: Extract build_id from path position (parts[5]) This fixes the 'Missing ocp.version' warnings and allows proper data collection. --- workflows/nno_dashboard/fetch_ci_data.py | 53 ++++++++----------- .../output/network_operator_matrix.json | 2 +- 2 files changed, 24 insertions(+), 31 deletions(-) diff --git a/workflows/nno_dashboard/fetch_ci_data.py b/workflows/nno_dashboard/fetch_ci_data.py index 7b18e84e7..18d098f55 100644 --- a/workflows/nno_dashboard/fetch_ci_data.py +++ b/workflows/nno_dashboard/fetch_ci_data.py @@ -223,38 +223,34 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any logger.info(f"Found {len(finished_files)} finished.json files") - # Build lookup dictionaries by BUILD_ID (not by parent directory) - # Network Operator version files are deeply nested, so we need to extract the build ID from the path + # Build lookup dictionaries by BUILD_ID only (simplest approach) + # Extract build_id from the known path structure: + # pr-logs/pull/{repo}/{pr}/{job_name}/{build_id}/... ocp_lookup = {} operator_lookup = {} for file_item in ocp_version_files: path = file_item["name"] - # Extract build ID from path using regex - match = NNO_TEST_PATH_REGEX.search(path) - if match: - build_id = match.group("build_id") - pr_num = match.group("pr_number") - job_name = match.group("job_name") - # Use the full build directory path as key - build_dir_key = f"pr-logs/pull/{match.group('repo')}/{pr_num}/{job_name}/{build_id}" + # Extract build_id from the 6th path segment (0-indexed: 0=pr-logs, 1=pull, 2=repo, 3=pr, 4=job_name, 5=build_id) + parts = path.split("/") + if len(parts) >= 6: + build_id = parts[5] try: content = fetch_gcs_file_content(path) - ocp_lookup[build_dir_key] = content.strip() + ocp_lookup[build_id] = content.strip() + logger.info(f"Added OCP version for build {build_id}") except Exception as e: logger.warning(f"Failed to fetch OCP version from {path}: {e}") for file_item in operator_version_files: path = file_item["name"] - match = NNO_TEST_PATH_REGEX.search(path) - if match: - build_id = match.group("build_id") - pr_num = match.group("pr_number") - job_name = match.group("job_name") - build_dir_key = f"pr-logs/pull/{match.group('repo')}/{pr_num}/{job_name}/{build_id}" + parts = path.split("/") + if len(parts) >= 6: + build_id = parts[5] try: content = fetch_gcs_file_content(path) - operator_lookup[build_dir_key] = content.strip() + operator_lookup[build_id] = content.strip() + logger.info(f"Added Operator version for build {build_id}") except Exception as e: logger.warning(f"Failed to fetch operator version from {path}: {e}") @@ -263,8 +259,11 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any for finished_item in finished_files: finished_path = finished_item["name"] + # Remove /finished.json from path before parsing (regex expects path without filename) + path_without_file = finished_path.rsplit("/finished.json", 1)[0] if finished_path.endswith("/finished.json") else finished_path + # Parse the path to extract job information - match = NNO_TEST_PATH_REGEX.search(finished_path) + match = NNO_TEST_PATH_REGEX.search(path_without_file) if not match: logger.warning(f"Could not parse path: {finished_path}") continue @@ -274,13 +273,7 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any pr_num = match.group("pr_number") repo = match.group("repo") - # Build the lookup key (same format as we used when building the lookup dictionaries) - build_dir = f"pr-logs/pull/{repo}/{pr_num}/{job_name}/{build_id}" - logger.info(f"Processing build {build_id} for job {job_name}") - logger.debug(f"Build dir key: {build_dir}") - logger.debug(f"Available OCP keys: {list(ocp_lookup.keys())[:3] if ocp_lookup else 'EMPTY'}") - logger.debug(f"Available Operator keys: {list(operator_lookup.keys())[:3] if operator_lookup else 'EMPTY'}") # Fetch finished.json content try: @@ -290,15 +283,15 @@ def process_tests_for_pr(pr_number: str, results_by_ocp: Dict[str, Dict[str, Any logger.warning(f"Failed to fetch/parse finished.json from {finished_path}: {e}") continue - # Get OCP and operator versions using the build directory key - ocp_content = ocp_lookup.get(build_dir) - operator_content = operator_lookup.get(build_dir) + # Get OCP and operator versions using the build_id + ocp_content = ocp_lookup.get(build_id) + operator_content = operator_lookup.get(build_id) if not ocp_content: - logger.warning(f"Missing ocp.version for {build_dir}") + logger.warning(f"Missing ocp.version for build {build_id}") continue if not operator_content: - logger.warning(f"Missing operator.version for {build_dir}") + logger.warning(f"Missing operator.version for build {build_id}") continue # Create TestResult diff --git a/workflows/test_matrix_dashboard/output/network_operator_matrix.json b/workflows/test_matrix_dashboard/output/network_operator_matrix.json index 0967ef424..9e26dfeeb 100644 --- a/workflows/test_matrix_dashboard/output/network_operator_matrix.json +++ b/workflows/test_matrix_dashboard/output/network_operator_matrix.json @@ -1 +1 @@ -{} +{} \ No newline at end of file