diff --git a/.dockerignore b/.dockerignore index 5b34fe7..55a3fdb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,9 @@ node_modules .venv .ijwb -.idea \ No newline at end of file +.idea +temp +csaf_analysis +bazel-* +.git +container_data \ No newline at end of file diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2454fd2..394e28c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -37,6 +37,8 @@ jobs: bazel test //apollo/tests:test_auth --test_output=all bazel test //apollo/tests:test_validation --test_output=all bazel test //apollo/tests:test_admin_routes_supported_products --test_output=all + bazel test //apollo/tests:test_api_osv --test_output=all + bazel test //apollo/tests:test_database_service --test_output=all - name: Integration Tests run: ./build/scripts/test.bash diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index 762cedf..95175c4 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -4,24 +4,65 @@ from common.logger import Logger from apollo.rpm_helpers import parse_nevra -# Initialize Info before Logger for this module - logger = Logger() +EUS_CPE_PRODUCTS = frozenset([ + "rhel_eus", # Extended Update Support + "rhel_e4s", # Update Services for SAP Solutions + "rhel_aus", # Advanced Update Support (IBM Power) + "rhel_tus", # Telecommunications Update Service +]) + +EUS_PRODUCT_NAME_KEYWORDS = frozenset([ + "e4s", + "eus", + "aus", + "tus", + "extended update support", + "update services for sap", + "advanced update support", + "telecommunications update service", +]) + +def _is_eus_product(product_name: str, cpe: str) -> bool: + """ + Detects if a product is EUS-related based on product name and CPE. + + Args: + product_name: Full product name (e.g., "Red Hat Enterprise Linux AppStream E4S (v.9.0)") + cpe: CPE string (e.g., "cpe:/a:redhat:rhel_e4s:9.0::appstream") + + Returns: + True if product is EUS/E4S/AUS/TUS, False otherwise + """ + if cpe: + parts = cpe.split(":") + if len(parts) > 3: + cpe_product = parts[3] + if cpe_product in EUS_CPE_PRODUCTS: + return True + + if product_name: + name_lower = product_name.lower() + for keyword in EUS_PRODUCT_NAME_KEYWORDS: + if keyword in name_lower: + return True + + return False + + def extract_rhel_affected_products_for_db(csaf: dict) -> set: """ Extracts all needed info for red_hat_advisory_affected_products table from CSAF product_tree. Expands 'noarch' to all main arches and maps names to user-friendly values. Returns a set of tuples: (variant, name, major_version, minor_version, arch) """ - # Maps architecture short names to user-friendly product names arch_name_map = { "aarch64": "Red Hat Enterprise Linux for ARM 64", "x86_64": "Red Hat Enterprise Linux for x86_64", "s390x": "Red Hat Enterprise Linux for IBM z Systems", "ppc64le": "Red Hat Enterprise Linux for Power, little endian", } - # List of main architectures to expand 'noarch' main_arches = list(arch_name_map.keys()) affected_products = set() product_tree = csaf.get("product_tree", {}) @@ -29,25 +70,20 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: logger.warning("No product tree found in CSAF document") return affected_products - # Iterate over all vendor branches in the product tree for vendor_branch in product_tree.get("branches", []): - # Find the product_family branch for RHEL family_branch = None arches = set() for branch in vendor_branch.get("branches", []): if branch.get("category") == "product_family" and branch.get("name") == "Red Hat Enterprise Linux": family_branch = branch - # Collect all architecture branches at the same level as product_family elif branch.get("category") == "architecture": arch = branch.get("name") if arch: arches.add(arch) - # If 'noarch' is present, expand to all main architectures if "noarch" in arches: arches = set(main_arches) if not family_branch: continue - # Find the product_name branch for CPE/version info prod_name = None cpe = None for branch in family_branch.get("branches", []): @@ -59,24 +95,24 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: if not prod_name or not cpe: continue - # Parses the CPE string to extract major and minor version numbers + if _is_eus_product(prod_name, cpe): + logger.debug(f"Skipping EUS product: {prod_name}") + continue + # Example CPE: "cpe:/a:redhat:enterprise_linux:9::appstream" - parts = cpe.split(":") # Split the CPE string by colon + parts = cpe.split(":") major = None minor = None if len(parts) > 4: - version = parts[4] # The version is typically the 5th field (index 4) + version = parts[4] if version: if "." in version: - # If the version contains a dot, split into major and minor major, minor = version.split(".", 1) major = int(major) minor = int(minor) else: - # If no dot, only major version is present major = int(version) - # For each architecture, add a tuple with product info to the set for arch in arches: name = arch_name_map.get(arch) if name is None: @@ -84,26 +120,142 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: continue if major: affected_products.add(( - family_branch.get("name"), # variant (e.g., "Red Hat Enterprise Linux") - name, # user-friendly architecture name - major, # major version number - minor, # minor version number (may be None) - arch # architecture short name + family_branch.get("name"), + name, + major, + minor, + arch )) logger.debug(f"Number of affected products: {len(affected_products)}") return affected_products + +def _traverse_for_eus(branches, product_eus_map=None): + """ + Recursively traverse CSAF branches to build EUS product map. + + Args: + branches: List of CSAF branch dictionaries to traverse + product_eus_map: Optional dict to accumulate results + + Returns: + Dict mapping product_id to boolean indicating if product is EUS + """ + if product_eus_map is None: + product_eus_map = {} + + for branch in branches: + category = branch.get("category") + + if category == "product_name": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + + if product_id: + product_name = prod.get("name", "") + cpe = prod.get("product_identification_helper", {}).get("cpe", "") + is_eus = _is_eus_product(product_name, cpe) + product_eus_map[product_id] = is_eus + + if "branches" in branch: + _traverse_for_eus(branch["branches"], product_eus_map) + + return product_eus_map + + +def _extract_packages_from_branches(branches, product_eus_map, packages=None): + """ + Recursively traverse CSAF branches to extract package NEVRAs. + + Args: + branches: List of CSAF branch dictionaries to traverse + product_eus_map: Dict mapping product_id to EUS status + packages: Optional set to accumulate results + + Returns: + Set of NEVRA strings + """ + if packages is None: + packages = set() + + for branch in branches: + category = branch.get("category") + + if category == "product_version": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + purl = prod.get("product_identification_helper", {}).get("purl") + + if not product_id: + continue + + if purl and not purl.startswith("pkg:rpm/"): + continue + + # Product IDs for packages can have format: "AppStream-9.0.0.Z.E4S:package-nevra" + # or just "package-nevra" for packages in product_version entries + skip_eus = False + for eus_prod_id, is_eus in product_eus_map.items(): + if is_eus and (":" in product_id and product_id.startswith(eus_prod_id + ":")): + skip_eus = True + break + + if skip_eus: + continue + + # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" + packages.add(product_id.split("::")[0]) + + if "branches" in branch: + _extract_packages_from_branches(branch["branches"], product_eus_map, packages) + + return packages + + +def _extract_packages_from_product_tree(csaf: dict) -> set: + """ + Extracts fixed packages from CSAF product_tree using product_id fields. + Handles both regular and modular packages by extracting NEVRAs directly from product_id. + Filters out EUS products. + + Args: + csaf: CSAF document dict + + Returns: + Set of NEVRA strings + """ + product_tree = csaf.get("product_tree", {}) + + if not product_tree: + return set() + + product_eus_map = {} + for vendor_branch in product_tree.get("branches", []): + product_eus_map = _traverse_for_eus(vendor_branch.get("branches", []), product_eus_map) + + packages = set() + for vendor_branch in product_tree.get("branches", []): + packages = _extract_packages_from_branches(vendor_branch.get("branches", []), product_eus_map, packages) + + return packages + + def red_hat_advisory_scraper(csaf: dict): # At the time of writing there are ~254 advisories that do not have any vulnerabilities. if not csaf.get("vulnerabilities"): logger.warning("No vulnerabilities found in CSAF document") return None - # red_hat_advisories table values - red_hat_issued_at = csaf["document"]["tracking"]["initial_release_date"] # "2025-02-24T03:42:46+00:00" - red_hat_updated_at = csaf["document"]["tracking"]["current_release_date"] # "2025-04-17T12:08:56+00:00" - name = csaf["document"]["tracking"]["id"] # "RHSA-2025:1234" - red_hat_synopsis = csaf["document"]["title"] # "Red Hat Bug Fix Advisory: Red Hat Quay v3.13.4 bug fix release" + name = csaf["document"]["tracking"]["id"] + + red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) + if not red_hat_affected_products: + logger.info(f"Skipping advisory {name}: all products are EUS-only") + return None + + red_hat_issued_at = csaf["document"]["tracking"]["initial_release_date"] + red_hat_updated_at = csaf["document"]["tracking"]["current_release_date"] + red_hat_synopsis = csaf["document"]["title"] red_hat_description = None topic = None for item in csaf["document"]["notes"]: @@ -112,59 +264,31 @@ def red_hat_advisory_scraper(csaf: dict): elif item["category"] == "summary": topic = item["text"] kind_lookup = {"RHSA": "Security", "RHBA": "Bug Fix", "RHEA": "Enhancement"} - kind = kind_lookup[name.split("-")[0]] # "RHSA-2025:1234" --> "Security" - severity = csaf["document"]["aggregate_severity"]["text"] # "Important" + kind = kind_lookup[name.split("-")[0]] + severity = csaf["document"]["aggregate_severity"]["text"] - # To maintain consistency with the existing database, we need to replace the + # To maintain consistency with the existing database, replace # "Red Hat [KIND] Advisory:" prefixes with the severity level. red_hat_synopsis = red_hat_synopsis.replace("Red Hat Bug Fix Advisory: ", f"{severity}:") red_hat_synopsis = red_hat_synopsis.replace("Red Hat Security Advisory:", f"{severity}:") red_hat_synopsis = red_hat_synopsis.replace("Red Hat Enhancement Advisory: ", f"{severity}:") - # red_hat_advisory_packages table values - red_hat_fixed_packages = set() + red_hat_fixed_packages = _extract_packages_from_product_tree(csaf) + red_hat_cve_set = set() red_hat_bugzilla_set = set() - product_id_suffix_list = ( - ".aarch64", - ".i386", - ".i686", - ".noarch", - ".ppc", - ".ppc64", - ".ppc64le", - ".s390", - ".s390x", - ".src", - ".x86_64" - ) # TODO: find a better way to filter product IDs. This is a workaround for the fact that - # the product IDs in the CSAF documents also contain artifacts like container images - # and we only are interested in RPMs. + for vulnerability in csaf["vulnerabilities"]: - for product_id in vulnerability["product_status"]["fixed"]: - if product_id.endswith(product_id_suffix_list): - # These IDs are in the format product:package_nevra - # ie- AppStream-9.4.0.Z.EUS:rsync-0:3.2.3-19.el9_4.1.aarch64" - split_on_colon = product_id.split(":") - product = split_on_colon[0] - package_nevra = ":".join(split_on_colon[-2:]) - red_hat_fixed_packages.add(package_nevra) - - # red_hat_advisory_cves table values. Many older advisories do not have CVEs and so we need to handle that. cve_id = vulnerability.get("cve", None) cve_cvss3_scoring_vector = vulnerability.get("scores", [{}])[0].get("cvss_v3", {}).get("vectorString", None) cve_cvss3_base_score = vulnerability.get("scores", [{}])[0].get("cvss_v3", {}).get("baseScore", None) cve_cwe = vulnerability.get("cwe", {}).get("id", None) red_hat_cve_set.add((cve_id, cve_cvss3_scoring_vector, cve_cvss3_base_score, cve_cwe)) - # red_hat_advisory_bugzilla_bugs table values for bug_id in vulnerability.get("ids", []): if bug_id.get("system_name") == "Red Hat Bugzilla ID": red_hat_bugzilla_set.add(bug_id["text"]) - # red_hat_advisory_affected_products table values - red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) - return { "red_hat_issued_at": str(red_hat_issued_at), "red_hat_updated_at": str(red_hat_updated_at), diff --git a/apollo/rhworker/poll_rh_activities.py b/apollo/rhworker/poll_rh_activities.py index e592136..85a4380 100644 --- a/apollo/rhworker/poll_rh_activities.py +++ b/apollo/rhworker/poll_rh_activities.py @@ -651,8 +651,11 @@ async def fetch_csv_with_dates(session, url): releases = await fetch_csv_with_dates(session, base_url + "releases.csv") deletions = await fetch_csv_with_dates(session, base_url + "deletions.csv") - # Merge changes and releases, keeping the most recent timestamp for each advisory - all_advisories = {**changes, **releases} + # Merge changes and releases, prioritizing changes.csv for updated timestamps + # changes.csv contains the most recent modification time for each advisory + # releases.csv contains original publication dates + # We want changes.csv to take precedence to catch updates to existing advisories + all_advisories = {**releases, **changes} # Remove deletions for advisory_id in deletions: all_advisories.pop(advisory_id, None) diff --git a/apollo/server/routes/admin_supported_products.py b/apollo/server/routes/admin_supported_products.py index 730ef5b..c9eddb8 100644 --- a/apollo/server/routes/admin_supported_products.py +++ b/apollo/server/routes/admin_supported_products.py @@ -1296,6 +1296,8 @@ async def _get_mirror_config_data(mirror: SupportedProductsRhMirror) -> Dict[str def _json_serializer(obj): """Custom JSON serializer for non-standard types""" if isinstance(obj, Decimal): + if obj % 1 == 0: + return int(obj) return float(obj) raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable") diff --git a/apollo/server/routes/admin_workflows.py b/apollo/server/routes/admin_workflows.py index ef319dc..cfb26ec 100644 --- a/apollo/server/routes/admin_workflows.py +++ b/apollo/server/routes/admin_workflows.py @@ -21,7 +21,8 @@ async def admin_workflows(request: Request, user: User = Depends(admin_user_sche """Render admin workflows page for manual workflow triggering""" db_service = DatabaseService() env_info = await db_service.get_environment_info() - + index_state = await db_service.get_last_indexed_at() + return templates.TemplateResponse( "admin_workflows.jinja", { "request": request, @@ -29,6 +30,8 @@ async def admin_workflows(request: Request, user: User = Depends(admin_user_sche "env_name": env_info["environment"], "is_production": env_info["is_production"], "reset_allowed": env_info["reset_allowed"], + "last_indexed_at": index_state.get("last_indexed_at_iso"), + "last_indexed_exists": index_state.get("exists", False), } ) @@ -92,6 +95,39 @@ async def trigger_poll_rhcsaf( return RedirectResponse(url="/admin/workflows", status_code=303) +@router.post("/workflows/update-index-timestamp") +async def update_index_timestamp( + request: Request, + new_timestamp: str = Form(...), + user: User = Depends(admin_user_scheme) +): + """Update the last_indexed_at timestamp in red_hat_index_state""" + try: + # Parse the timestamp + timestamp_dt = datetime.fromisoformat(new_timestamp.replace("Z", "+00:00")) + + db_service = DatabaseService() + result = await db_service.update_last_indexed_at(timestamp_dt, user.email) + + Logger().info(f"Admin user {user.email} updated last_indexed_at to {new_timestamp}") + + # Store success message in session + request.session["workflow_message"] = result["message"] + request.session["workflow_type"] = "success" + + except ValueError as e: + Logger().error(f"Invalid timestamp format: {str(e)}") + request.session["workflow_message"] = f"Invalid timestamp format: {str(e)}" + request.session["workflow_type"] = "error" + + except Exception as e: + Logger().error(f"Error updating last_indexed_at: {str(e)}") + request.session["workflow_message"] = f"Error updating timestamp: {str(e)}" + request.session["workflow_type"] = "error" + + return RedirectResponse(url="/admin/workflows", status_code=303) + + @router.get("/workflows/database/preview-reset") async def preview_database_reset( request: Request, diff --git a/apollo/server/routes/api_osv.py b/apollo/server/routes/api_osv.py index f0022ee..debf89a 100644 --- a/apollo/server/routes/api_osv.py +++ b/apollo/server/routes/api_osv.py @@ -143,7 +143,6 @@ def to_osv_advisory(ui_url: str, advisory: Advisory) -> OSVAdvisory: for pkg in affected_packages: x = pkg[0] nevra = pkg[1] - # Only process "src" packages if nevra.group(5) != "src": continue if x.nevra in processed_nvra: @@ -198,11 +197,9 @@ def to_osv_advisory(ui_url: str, advisory: Advisory) -> OSVAdvisory: if advisory.red_hat_advisory: osv_credits.append(OSVCredit(name="Red Hat")) - # Calculate severity by finding the highest CVSS score highest_cvss_base_score = 0.0 final_score_vector = None for x in advisory.cves: - # Convert cvss3_scoring_vector to a float base_score = x.cvss3_base_score if base_score and base_score != "UNKNOWN": base_score = float(base_score) @@ -255,15 +252,14 @@ async def get_advisories_osv( cve, synopsis, severity, - kind="Security", + kind=None, fetch_related=True, ) - count = fetch_adv[0] advisories = fetch_adv[1] ui_url = await get_setting(UI_URL) - osv_advisories = [to_osv_advisory(ui_url, x) for x in advisories] - page = create_page(osv_advisories, count, params) + osv_advisories = [to_osv_advisory(ui_url, adv) for adv in advisories if adv.cves] + page = create_page(osv_advisories, len(osv_advisories), params) state = await RedHatIndexState.first() page.last_updated_at = ( @@ -282,7 +278,7 @@ async def get_advisories_osv( ) async def get_advisory_osv(advisory_id: str): advisory = ( - await Advisory.filter(name=advisory_id, kind="Security") + await Advisory.filter(name=advisory_id) .prefetch_related( "packages", "cves", @@ -295,7 +291,7 @@ async def get_advisory_osv(advisory_id: str): .get_or_none() ) - if not advisory: + if not advisory or not advisory.cves: raise HTTPException(404) ui_url = await get_setting(UI_URL) diff --git a/apollo/server/services/database_service.py b/apollo/server/services/database_service.py index 78d6fb0..0a66800 100644 --- a/apollo/server/services/database_service.py +++ b/apollo/server/services/database_service.py @@ -123,4 +123,67 @@ async def get_environment_info(self) -> Dict[str, str]: "environment": env_name, "is_production": self.is_production_environment(), "reset_allowed": not self.is_production_environment() - } \ No newline at end of file + } + + async def get_last_indexed_at(self) -> Dict[str, Any]: + """ + Get the current last_indexed_at timestamp from red_hat_index_state + + Returns: + Dictionary with timestamp information + """ + index_state = await RedHatIndexState.first() + + if not index_state or not index_state.last_indexed_at: + return { + "last_indexed_at": None, + "last_indexed_at_iso": None, + "exists": False + } + + return { + "last_indexed_at": index_state.last_indexed_at, + "last_indexed_at_iso": index_state.last_indexed_at.isoformat(), + "exists": True + } + + async def update_last_indexed_at(self, new_timestamp: datetime, user_email: str) -> Dict[str, Any]: + """ + Update the last_indexed_at timestamp in red_hat_index_state + + Args: + new_timestamp: New timestamp to set + user_email: Email of user making the change (for logging) + + Returns: + Dictionary with operation results + + Raises: + ValueError: If timestamp is invalid + """ + logger = Logger() + + try: + # Get or create index state + index_state = await RedHatIndexState.first() + + old_timestamp = None + if index_state: + old_timestamp = index_state.last_indexed_at + index_state.last_indexed_at = new_timestamp + await index_state.save() + logger.info(f"Updated last_indexed_at by {user_email}: {old_timestamp} -> {new_timestamp}") + else: + await RedHatIndexState.create(last_indexed_at=new_timestamp) + logger.info(f"Created last_indexed_at by {user_email}: {new_timestamp}") + + return { + "success": True, + "old_timestamp": old_timestamp.isoformat() if old_timestamp else None, + "new_timestamp": new_timestamp.isoformat(), + "message": f"Successfully updated last_indexed_at to {new_timestamp.isoformat()}" + } + + except Exception as e: + logger.error(f"Failed to update last_indexed_at: {e}") + raise RuntimeError(f"Failed to update timestamp: {e}") from e \ No newline at end of file diff --git a/apollo/server/templates/admin_workflows.jinja b/apollo/server/templates/admin_workflows.jinja index 6dd396a..b3f7916 100644 --- a/apollo/server/templates/admin_workflows.jinja +++ b/apollo/server/templates/admin_workflows.jinja @@ -71,7 +71,7 @@
Polls Red Hat for new CSAF (Common Security Advisory Framework) advisories.
-