From dd425604a2b887b163fc09e53e022963b180201d Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Mon, 10 Nov 2025 16:21:41 -0700 Subject: [PATCH 01/30] Improve generate_rocky_config.py version matching and naming flexibility This commit enhances the generate_rocky_config.py script with two key improvements: 1. Flexible version matching for RHEL 8/9/10+ compatibility: - Major-only filtering (e.g., --version 9): Matches any minor version within that major version (9.0, 9.1, 9.2, 9.6, etc.) - Full version filtering (e.g., --version 9.6): Requires exact match to the specified major.minor version This addresses differences in Red Hat's advisory format across RHEL versions: - RHEL 8 & 9: Advisories typically don't include minor versions - RHEL 10+: Advisories now include minor versions (e.g., "RHEL 10.2") The flexible matching ensures that repository configurations can be generated with appropriate version matching rules (NULL match_minor_version for RHEL 8/9, specific match_minor_version for RHEL 10+). 2. Custom mirror naming with --mirror-name-base option: - Allows specifying a custom base name for generated mirror configurations - Example: --mirror-name-base "Rocky Linux 9" generates "Rocky Linux 9 x86_64" instead of "Rocky Linux 9.6 x86_64" - Useful for creating legacy product entries or custom naming schemes - Works in combination with --name-suffix for additional flexibility These changes improve Apollo's ability to generate configurations that align with Red Hat's advisory matching requirements across different major versions. --- scripts/generate_rocky_config.py | 47 ++++++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 12 deletions(-) diff --git a/scripts/generate_rocky_config.py b/scripts/generate_rocky_config.py index 1ae0438..8aa6299 100644 --- a/scripts/generate_rocky_config.py +++ b/scripts/generate_rocky_config.py @@ -602,7 +602,7 @@ def parse_repomd_path(repomd_url: str, base_url: str) -> Dict[str, str]: def build_mirror_config( - version: str, arch: str, name_suffix: Optional[str] = None + version: str, arch: str, name_suffix: Optional[str] = None, mirror_name_base: Optional[str] = None ) -> Dict[str, Any]: """ Build a mirror configuration dictionary. @@ -611,15 +611,24 @@ def build_mirror_config( version: Rocky Linux version arch: Architecture name_suffix: Optional suffix for mirror name + mirror_name_base: Optional custom base for mirror name (e.g., "Rocky Linux 9") Returns: Mirror configuration dictionary """ - # Build mirror name with optional suffix - if name_suffix is not None and name_suffix != "": - mirror_name = f"Rocky Linux {version} {name_suffix} {arch}" + # Build mirror name with optional custom base or suffix + if mirror_name_base is not None and mirror_name_base != "": + # Use custom base name (e.g., "Rocky Linux 9") + if name_suffix is not None and name_suffix != "": + mirror_name = f"{mirror_name_base} {name_suffix} {arch}" + else: + mirror_name = f"{mirror_name_base} {arch}" else: - mirror_name = f"Rocky Linux {version} {arch}" + # Use default naming with version + if name_suffix is not None and name_suffix != "": + mirror_name = f"Rocky Linux {version} {name_suffix} {arch}" + else: + mirror_name = f"Rocky Linux {version} {arch}" # Parse version to extract major and minor components if version != UNKNOWN_VALUE and "." in version: @@ -690,6 +699,7 @@ def generate_rocky_config( include_source: bool = True, architectures: List[str] = None, name_suffix: Optional[str] = None, + mirror_name_base: Optional[str] = None, ) -> List[Dict[str, Any]]: """ Generate Rocky Linux configuration by discovering repository structure. @@ -702,6 +712,7 @@ def generate_rocky_config( include_source: Whether to include source repository URLs (default: True) architectures: List of architectures to include (default: auto-detect) name_suffix: Optional suffix to add to mirror names (e.g., "test", "staging") + mirror_name_base: Optional custom base for mirror name (e.g., "Rocky Linux 9") Returns: List of configuration dictionaries ready for JSON export @@ -730,12 +741,16 @@ def generate_rocky_config( continue # Skip if version filter specified and doesn't match - if ( - version - and metadata["version"] != version - and metadata["version"] != UNKNOWN_VALUE - ): - continue + if version and metadata["version"] != UNKNOWN_VALUE: + # If version filter has no dot (major only), match major version only + if "." not in version: + # Extract major version from metadata version + metadata_major = metadata["version"].split(".")[0] if "." in metadata["version"] else metadata["version"] + if metadata_major != version: + continue + # If version filter has dot (major.minor), require exact match + elif metadata["version"] != version: + continue # Skip debug repos if not wanted if not include_debug and metadata["repo_type"] == "debug": @@ -773,7 +788,7 @@ def generate_rocky_config( if not detected_version: detected_version = UNKNOWN_VALUE - mirror_config = build_mirror_config(detected_version, arch, name_suffix) + mirror_config = build_mirror_config(detected_version, arch, name_suffix, mirror_name_base) # Group repos by name and type repo_groups = {} @@ -828,6 +843,8 @@ def main(): %(prog)s https://mirror.example.com/pub/rocky/ --output rocky_config.json %(prog)s https://mirror.example.com/pub/rocky/ --name-suffix test --version 9.6 %(prog)s https://staging.example.com/pub/rocky/ --name-suffix staging --arch riscv64 + %(prog)s https://mirror.example.com/pub/rocky/ --mirror-name-base "Rocky Linux 9" --version 9.6 + %(prog)s https://mirror.example.com/pub/rocky/ --mirror-name-base "Rocky Linux 9 (Legacy)" --version 9 """, ) @@ -880,6 +897,11 @@ def main(): help="Optional suffix to add to mirror names (e.g., 'test', 'staging')", ) + parser.add_argument( + "--mirror-name-base", + help="Optional custom base for mirror name (e.g., 'Rocky Linux 9' instead of 'Rocky Linux 9.6')", + ) + parser.add_argument("--output", "-o", help="Output file path (default: stdout)") parser.add_argument( @@ -926,6 +948,7 @@ def main(): include_source=not args.no_source, architectures=args.arch, name_suffix=args.name_suffix, + mirror_name_base=args.mirror_name_base, ) if not config: From 95319c3d99ca5e10aab1178aa621aa5d1c914858 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Wed, 12 Nov 2025 14:48:42 -0700 Subject: [PATCH 02/30] Simplify conditional logic in generate_rocky_config.py - Remove redundant None and empty string checks in mirror name building - Consolidate version filtering logic into single condition block - Eliminate unnecessary ternary operator in version parsing --- scripts/generate_rocky_config.py | 35 +++++++++++++------------------- 1 file changed, 14 insertions(+), 21 deletions(-) diff --git a/scripts/generate_rocky_config.py b/scripts/generate_rocky_config.py index 8aa6299..1aafea8 100644 --- a/scripts/generate_rocky_config.py +++ b/scripts/generate_rocky_config.py @@ -617,18 +617,13 @@ def build_mirror_config( Mirror configuration dictionary """ # Build mirror name with optional custom base or suffix - if mirror_name_base is not None and mirror_name_base != "": - # Use custom base name (e.g., "Rocky Linux 9") - if name_suffix is not None and name_suffix != "": - mirror_name = f"{mirror_name_base} {name_suffix} {arch}" - else: - mirror_name = f"{mirror_name_base} {arch}" + if not mirror_name_base: + mirror_name_base = f"Rocky Linux {version}" + + if name_suffix: + mirror_name = f"{mirror_name_base} {name_suffix} {arch}" else: - # Use default naming with version - if name_suffix is not None and name_suffix != "": - mirror_name = f"Rocky Linux {version} {name_suffix} {arch}" - else: - mirror_name = f"Rocky Linux {version} {arch}" + mirror_name = f"{mirror_name_base} {arch}" # Parse version to extract major and minor components if version != UNKNOWN_VALUE and "." in version: @@ -741,16 +736,14 @@ def generate_rocky_config( continue # Skip if version filter specified and doesn't match - if version and metadata["version"] != UNKNOWN_VALUE: - # If version filter has no dot (major only), match major version only - if "." not in version: - # Extract major version from metadata version - metadata_major = metadata["version"].split(".")[0] if "." in metadata["version"] else metadata["version"] - if metadata_major != version: - continue - # If version filter has dot (major.minor), require exact match - elif metadata["version"] != version: - continue + # Supports both exact version match (e.g., "9.5") and major version match (e.g., "9") + if ( + version + and metadata["version"] != version + and metadata["version"] != UNKNOWN_VALUE + and metadata["version"].split(".")[0] != version + ): + continue # Skip debug repos if not wanted if not include_debug and metadata["repo_type"] == "debug": From cb9aacc817f2cdcff0961a099ee3dbb7563690c0 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Tue, 21 Oct 2025 14:24:46 -0600 Subject: [PATCH 03/30] Return all advisories with CVEs Any advisory that addresses at least one CVE should be considered a Security Advisory and should returned by the OSV api. Instead of filtering strictly on the advisory "kind" (eg- Security, Bug Fix, Enhancement) we should instead filter based on if there are associated CVEs for the given advisory. --- .github/workflows/test.yaml | 1 + apollo/server/routes/api_osv.py | 14 +- apollo/tests/BUILD.bazel | 8 ++ apollo/tests/test_api_osv.py | 248 ++++++++++++++++++++++++++++++++ 4 files changed, 266 insertions(+), 5 deletions(-) create mode 100644 apollo/tests/test_api_osv.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 2454fd2..3791963 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -37,6 +37,7 @@ jobs: bazel test //apollo/tests:test_auth --test_output=all bazel test //apollo/tests:test_validation --test_output=all bazel test //apollo/tests:test_admin_routes_supported_products --test_output=all + bazel test //apollo/tests:test_api_osv --test_output=all - name: Integration Tests run: ./build/scripts/test.bash diff --git a/apollo/server/routes/api_osv.py b/apollo/server/routes/api_osv.py index f0022ee..5fbfadb 100644 --- a/apollo/server/routes/api_osv.py +++ b/apollo/server/routes/api_osv.py @@ -255,15 +255,18 @@ async def get_advisories_osv( cve, synopsis, severity, - kind="Security", + kind=None, fetch_related=True, ) count = fetch_adv[0] advisories = fetch_adv[1] + # Filter to only include advisories with CVE references + advisories_with_cves = [adv for adv in advisories if len(adv.cves) > 0] + ui_url = await get_setting(UI_URL) - osv_advisories = [to_osv_advisory(ui_url, x) for x in advisories] - page = create_page(osv_advisories, count, params) + osv_advisories = [to_osv_advisory(ui_url, x) for x in advisories_with_cves] + page = create_page(osv_advisories, len(advisories_with_cves), params) state = await RedHatIndexState.first() page.last_updated_at = ( @@ -282,7 +285,7 @@ async def get_advisories_osv( ) async def get_advisory_osv(advisory_id: str): advisory = ( - await Advisory.filter(name=advisory_id, kind="Security") + await Advisory.filter(name=advisory_id) .prefetch_related( "packages", "cves", @@ -295,7 +298,8 @@ async def get_advisory_osv(advisory_id: str): .get_or_none() ) - if not advisory: + # Only return advisories with CVE references + if not advisory or len(advisory.cves) == 0: raise HTTPException(404) ui_url = await get_setting(UI_URL) diff --git a/apollo/tests/BUILD.bazel b/apollo/tests/BUILD.bazel index b658f79..7609a83 100644 --- a/apollo/tests/BUILD.bazel +++ b/apollo/tests/BUILD.bazel @@ -61,3 +61,11 @@ py_test( "//apollo/server:server_lib", ], ) + +py_test( + name = "test_api_osv", + srcs = ["test_api_osv.py"], + deps = [ + "//apollo/server:server_lib", + ], +) diff --git a/apollo/tests/test_api_osv.py b/apollo/tests/test_api_osv.py new file mode 100644 index 0000000..6422c3d --- /dev/null +++ b/apollo/tests/test_api_osv.py @@ -0,0 +1,248 @@ +""" +Tests for OSV API CVE filtering functionality +""" + +import unittest +import datetime +from unittest.mock import Mock + +from apollo.server.routes.api_osv import to_osv_advisory + + +class MockSupportedProduct: + """Mock SupportedProduct model""" + + def __init__(self, variant="Rocky Linux", vendor="Rocky Enterprise Software Foundation"): + self.variant = variant + self.vendor = vendor + + +class MockSupportedProductsRhMirror: + """Mock SupportedProductsRhMirror model""" + + def __init__(self, match_major_version=9): + self.match_major_version = match_major_version + + +class MockPackage: + """Mock Package model""" + + def __init__( + self, + nevra, + product_name="Rocky Linux 9", + repo_name="BaseOS", + supported_product=None, + supported_products_rh_mirror=None, + ): + self.nevra = nevra + self.product_name = product_name + self.repo_name = repo_name + self.supported_product = supported_product or MockSupportedProduct() + self.supported_products_rh_mirror = supported_products_rh_mirror + + +class MockCVE: + """Mock CVE model""" + + def __init__( + self, + cve="CVE-2024-1234", + cvss3_base_score="7.5", + cvss3_scoring_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + ): + self.cve = cve + self.cvss3_base_score = cvss3_base_score + self.cvss3_scoring_vector = cvss3_scoring_vector + + +class MockFix: + """Mock Fix model""" + + def __init__(self, source="https://bugzilla.redhat.com/show_bug.cgi?id=1234567"): + self.source = source + + +class MockAdvisory: + """Mock Advisory model""" + + def __init__( + self, + name="RLSA-2024:1234", + synopsis="Important: test security update", + description="A security update for test package", + published_at=None, + updated_at=None, + packages=None, + cves=None, + fixes=None, + red_hat_advisory=None, + ): + self.name = name + self.synopsis = synopsis + self.description = description + self.published_at = published_at or datetime.datetime.now( + datetime.timezone.utc + ) + self.updated_at = updated_at or datetime.datetime.now(datetime.timezone.utc) + self.packages = packages or [] + self.cves = cves or [] + self.fixes = fixes or [] + self.red_hat_advisory = red_hat_advisory + + +class TestOSVCVEFiltering(unittest.TestCase): + """Test CVE filtering logic in OSV API""" + + def test_advisory_with_cve_has_upstream_references(self): + """Test that advisories with CVEs have upstream references populated""" + packages = [ + MockPackage( + nevra="pcs-0:0.11.8-2.el9_5.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [MockCVE(cve="CVE-2024-1234")] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + self.assertIsNotNone(result.upstream) + self.assertEqual(len(result.upstream), 1) + self.assertIn("CVE-2024-1234", result.upstream) + + def test_advisory_with_multiple_cves(self): + """Test that advisories with multiple CVEs include all in upstream""" + packages = [ + MockPackage( + nevra="openssl-1:3.0.7-28.el9_5.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [ + MockCVE(cve="CVE-2024-1111"), + MockCVE(cve="CVE-2024-2222"), + MockCVE(cve="CVE-2024-3333"), + ] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + self.assertIsNotNone(result.upstream) + self.assertEqual(len(result.upstream), 3) + self.assertIn("CVE-2024-1111", result.upstream) + self.assertIn("CVE-2024-2222", result.upstream) + self.assertIn("CVE-2024-3333", result.upstream) + + def test_advisory_without_cves_has_empty_upstream(self): + """Test that advisories without CVEs have empty upstream list""" + packages = [ + MockPackage( + nevra="kernel-0:5.14.0-427.el9.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + + advisory = MockAdvisory(packages=packages, cves=[]) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + self.assertIsNotNone(result.upstream) + self.assertEqual(len(result.upstream), 0) + + def test_source_packages_only(self): + """Test that only source packages are processed, not binary packages""" + packages = [ + MockPackage( + nevra="httpd-0:2.4.57-8.el9.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + MockPackage( + nevra="httpd-0:2.4.57-8.el9.x86_64", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + MockPackage( + nevra="httpd-0:2.4.57-8.el9.aarch64", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [MockCVE()] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + # Should only have 1 affected package (the source package) + self.assertEqual(len(result.affected), 1) + self.assertEqual(result.affected[0].package.name, "httpd") + + def test_severity_from_highest_cvss(self): + """Test that severity uses the highest CVSS score from multiple CVEs""" + packages = [ + MockPackage( + nevra="vim-2:9.0.1592-1.el9.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [ + MockCVE( + cve="CVE-2024-1111", + cvss3_base_score="5.5", + cvss3_scoring_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:L/I:N/A:N", + ), + MockCVE( + cve="CVE-2024-2222", + cvss3_base_score="9.8", + cvss3_scoring_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", + ), + MockCVE( + cve="CVE-2024-3333", + cvss3_base_score="7.5", + cvss3_scoring_vector="CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:N/A:N", + ), + ] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + self.assertIsNotNone(result.severity) + self.assertEqual(len(result.severity), 1) + self.assertEqual(result.severity[0].type, "CVSS_V3") + self.assertEqual( + result.severity[0].score, "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H" + ) + + def test_ecosystem_format(self): + """Test that ecosystem field is formatted correctly""" + packages = [ + MockPackage( + nevra="bash-0:5.1.8-9.el9.src", + product_name="Rocky Linux 9", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [MockCVE()] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + self.assertEqual(len(result.affected), 1) + self.assertEqual(result.affected[0].package.ecosystem, "Rocky Linux:9") + + def test_version_format_with_epoch(self): + """Test that fixed version includes epoch in epoch:version-release format""" + packages = [ + MockPackage( + nevra="systemd-0:252-38.el9_5.src", + supported_products_rh_mirror=MockSupportedProductsRhMirror(9), + ), + ] + cves = [MockCVE()] + + advisory = MockAdvisory(packages=packages, cves=cves) + result = to_osv_advisory("https://errata.rockylinux.org", advisory) + + fixed_version = result.affected[0].ranges[0].events[1].fixed + self.assertEqual(fixed_version, "0:252-38.el9_5") + + +if __name__ == "__main__": + unittest.main(verbosity=2) From 531bbd0087bebc2c3eae392b8f4af9d023fb197e Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Wed, 12 Nov 2025 15:49:31 -0700 Subject: [PATCH 04/30] Remove redundant comments from OSV API Remove self-explanatory comments that restate what the code does: - Removed obvious filter condition comments - Removed type conversion comment - Removed severity calculation comment --- apollo/server/routes/api_osv.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/apollo/server/routes/api_osv.py b/apollo/server/routes/api_osv.py index 5fbfadb..88be77d 100644 --- a/apollo/server/routes/api_osv.py +++ b/apollo/server/routes/api_osv.py @@ -143,7 +143,6 @@ def to_osv_advisory(ui_url: str, advisory: Advisory) -> OSVAdvisory: for pkg in affected_packages: x = pkg[0] nevra = pkg[1] - # Only process "src" packages if nevra.group(5) != "src": continue if x.nevra in processed_nvra: @@ -198,11 +197,9 @@ def to_osv_advisory(ui_url: str, advisory: Advisory) -> OSVAdvisory: if advisory.red_hat_advisory: osv_credits.append(OSVCredit(name="Red Hat")) - # Calculate severity by finding the highest CVSS score highest_cvss_base_score = 0.0 final_score_vector = None for x in advisory.cves: - # Convert cvss3_scoring_vector to a float base_score = x.cvss3_base_score if base_score and base_score != "UNKNOWN": base_score = float(base_score) @@ -261,7 +258,6 @@ async def get_advisories_osv( count = fetch_adv[0] advisories = fetch_adv[1] - # Filter to only include advisories with CVE references advisories_with_cves = [adv for adv in advisories if len(adv.cves) > 0] ui_url = await get_setting(UI_URL) @@ -298,7 +294,6 @@ async def get_advisory_osv(advisory_id: str): .get_or_none() ) - # Only return advisories with CVE references if not advisory or len(advisory.cves) == 0: raise HTTPException(404) From f1d918b5c190cc51dd3c9e74ce8ad48aa0217e29 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:03:54 -0700 Subject: [PATCH 05/30] Simplify OSV API advisory filtering logic --- apollo/server/routes/api_osv.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/apollo/server/routes/api_osv.py b/apollo/server/routes/api_osv.py index 88be77d..debf89a 100644 --- a/apollo/server/routes/api_osv.py +++ b/apollo/server/routes/api_osv.py @@ -255,14 +255,11 @@ async def get_advisories_osv( kind=None, fetch_related=True, ) - count = fetch_adv[0] advisories = fetch_adv[1] - advisories_with_cves = [adv for adv in advisories if len(adv.cves) > 0] - ui_url = await get_setting(UI_URL) - osv_advisories = [to_osv_advisory(ui_url, x) for x in advisories_with_cves] - page = create_page(osv_advisories, len(advisories_with_cves), params) + osv_advisories = [to_osv_advisory(ui_url, adv) for adv in advisories if adv.cves] + page = create_page(osv_advisories, len(osv_advisories), params) state = await RedHatIndexState.first() page.last_updated_at = ( @@ -294,7 +291,7 @@ async def get_advisory_osv(advisory_id: str): .get_or_none() ) - if not advisory or len(advisory.cves) == 0: + if not advisory or not advisory.cves: raise HTTPException(404) ui_url = await get_setting(UI_URL) From 25f74e0d7ac088e81cca684ebdef48d18d8ee59e Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 6 Nov 2025 17:26:51 -0700 Subject: [PATCH 06/30] Fix CSAF parser for modular packages and add EUS filtering This commit refactors the Red Hat CSAF parser to fix two major issues: 1. Modular Package Extraction Bug - Old code failed to extract modular packages due to ::module:stream suffix - New code extracts NEVRA directly from product_tree product_id field - Strips ::module:stream suffix while preserving full NEVRA with epoch - Fixes 12+ affected advisories (e.g., RHSA-2025:12008 for redis:7) 2. EUS Advisory Filtering - Detects EUS/E4S/AUS/TUS products via CPE and product name - Filters out EUS-only advisories during ingestion - Reduces processed advisories by ~50% - Skips advisories where all products are EUS-related Changes: - apollo/rhcsaf/__init__.py: - Added _is_eus_product() helper for EUS detection - Added _extract_packages_from_product_tree() for product_tree parsing - Updated extract_rhel_affected_products_for_db() to filter EUS products - Updated red_hat_advisory_scraper() to use new extraction and skip EUS-only - apollo/tests/test_rhcsaf.py: - Updated test data to include product_version entries - Added TestEUSDetection class (3 tests) - Added TestModularPackages class (1 test) - Added TestEUSAdvisoryFiltering class (1 test) Validation: - Standalone testing in temp/modular_package_fix/ confirmed: - 18 modular packages extracted (was 0) - Regular packages work identically (no regression) - EUS advisories correctly filtered - All data fields preserved (CVEs, Bugzillas, metadata) --- apollo/rhcsaf/__init__.py | 173 +++++++++++++++++++++---- apollo/tests/test_rhcsaf.py | 249 +++++++++++++++++++++++++++++++++++- 2 files changed, 395 insertions(+), 27 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index 762cedf..7bc585c 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -8,6 +8,38 @@ logger = Logger() +def _is_eus_product(product_name: str, cpe: str) -> bool: + """ + Detects if a product is EUS-related based on product name and CPE. + + Args: + product_name: Full product name (e.g., "Red Hat Enterprise Linux AppStream E4S (v.9.0)") + cpe: CPE string (e.g., "cpe:/a:redhat:rhel_e4s:9.0::appstream") + + Returns: + True if product is EUS/E4S/AUS/TUS, False otherwise + """ + # Check CPE product field (most reliable indicator) + if cpe: + parts = cpe.split(":") + if len(parts) > 3: + cpe_product = parts[3] + if cpe_product in ("rhel_eus", "rhel_e4s", "rhel_aus", "rhel_tus"): + return True + + # Check product name keywords as fallback + if product_name: + name_lower = product_name.lower() + eus_keywords = ["e4s", "eus", "aus", "tus", "extended update support", + "update services for sap", "advanced update support", + "telecommunications update service"] + for keyword in eus_keywords: + if keyword in name_lower: + return True + + return False + + def extract_rhel_affected_products_for_db(csaf: dict) -> set: """ Extracts all needed info for red_hat_advisory_affected_products table from CSAF product_tree. @@ -50,15 +82,22 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: # Find the product_name branch for CPE/version info prod_name = None cpe = None + product_full_name = None for branch in family_branch.get("branches", []): if branch.get("category") == "product_name": prod = branch.get("product", {}) prod_name = prod.get("name") + product_full_name = prod.get("name") cpe = prod.get("product_identification_helper", {}).get("cpe") break if not prod_name or not cpe: continue + # Skip if this is an EUS product + if _is_eus_product(product_full_name, cpe): + logger.debug(f"Skipping EUS product: {product_full_name}") + continue + # Parses the CPE string to extract major and minor version numbers # Example CPE: "cpe:/a:redhat:enterprise_linux:9::appstream" parts = cpe.split(":") # Split the CPE string by colon @@ -93,6 +132,106 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: logger.debug(f"Number of affected products: {len(affected_products)}") return affected_products + +def _extract_packages_from_product_tree(csaf: dict) -> set: + """ + Extracts fixed packages from CSAF product_tree using product_id fields. + Handles both regular and modular packages by extracting NEVRAs directly from product_id. + Filters out EUS products. + + Args: + csaf: CSAF document dict + + Returns: + Set of NEVRA strings + """ + packages = set() + product_tree = csaf.get("product_tree", {}) + + if not product_tree: + return packages + + # Build a map of product_id -> is_eus + product_eus_map = {} + + def traverse_for_eus(branches): + """Recursively traverse to build EUS map""" + for branch in branches: + category = branch.get("category") + + # Check if this is a product_name with CPE + if category == "product_name": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + product_name = prod.get("name", "") + cpe = prod.get("product_identification_helper", {}).get("cpe", "") + + if product_id: + is_eus = _is_eus_product(product_name, cpe) + product_eus_map[product_id] = is_eus + + # Recurse into nested branches + if "branches" in branch: + traverse_for_eus(branch["branches"]) + + # First pass: build EUS map + for vendor_branch in product_tree.get("branches", []): + traverse_for_eus(vendor_branch.get("branches", [])) + + # Now extract packages from product_version entries + def extract_packages_from_branches(branches): + """Recursively traverse to extract packages""" + for branch in branches: + category = branch.get("category") + + if category == "product_version": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + purl = prod.get("product_identification_helper", {}).get("purl") + + # Skip if no product_id + if not product_id: + continue + + # Check if this is an RPM using PURL (not container or other) + if purl and not purl.startswith("pkg:rpm/"): + continue + + # Skip if product is EUS (check product_id prefix) + # Product IDs for packages can have format: "AppStream-9.0.0.Z.E4S:package-nevra" + # or just "package-nevra" for packages in product_version entries + # We need to check if any parent product is EUS + skip_eus = False + for eus_prod_id, is_eus in product_eus_map.items(): + if is_eus and (":" in product_id and product_id.startswith(eus_prod_id + ":")): + skip_eus = True + break + + if skip_eus: + continue + + # Extract NEVRA from product_id + # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" + nevra = product_id + + # For modular packages, strip off the "::module:stream" suffix + if "::" in nevra: + nevra = nevra.split("::")[0] + + if nevra: + packages.add(nevra) + + # Recurse + if "branches" in branch: + extract_packages_from_branches(branch["branches"]) + + # Second pass: extract packages + for vendor_branch in product_tree.get("branches", []): + extract_packages_from_branches(vendor_branch.get("branches", [])) + + return packages + + def red_hat_advisory_scraper(csaf: dict): # At the time of writing there are ~254 advisories that do not have any vulnerabilities. if not csaf.get("vulnerabilities"): @@ -122,34 +261,13 @@ def red_hat_advisory_scraper(csaf: dict): red_hat_synopsis = red_hat_synopsis.replace("Red Hat Enhancement Advisory: ", f"{severity}:") # red_hat_advisory_packages table values - red_hat_fixed_packages = set() + # Extract packages from product_tree (handles both regular and modular packages) + red_hat_fixed_packages = _extract_packages_from_product_tree(csaf) + red_hat_cve_set = set() red_hat_bugzilla_set = set() - product_id_suffix_list = ( - ".aarch64", - ".i386", - ".i686", - ".noarch", - ".ppc", - ".ppc64", - ".ppc64le", - ".s390", - ".s390x", - ".src", - ".x86_64" - ) # TODO: find a better way to filter product IDs. This is a workaround for the fact that - # the product IDs in the CSAF documents also contain artifacts like container images - # and we only are interested in RPMs. - for vulnerability in csaf["vulnerabilities"]: - for product_id in vulnerability["product_status"]["fixed"]: - if product_id.endswith(product_id_suffix_list): - # These IDs are in the format product:package_nevra - # ie- AppStream-9.4.0.Z.EUS:rsync-0:3.2.3-19.el9_4.1.aarch64" - split_on_colon = product_id.split(":") - product = split_on_colon[0] - package_nevra = ":".join(split_on_colon[-2:]) - red_hat_fixed_packages.add(package_nevra) + for vulnerability in csaf["vulnerabilities"]: # red_hat_advisory_cves table values. Many older advisories do not have CVEs and so we need to handle that. cve_id = vulnerability.get("cve", None) cve_cvss3_scoring_vector = vulnerability.get("scores", [{}])[0].get("cvss_v3", {}).get("vectorString", None) @@ -165,6 +283,11 @@ def red_hat_advisory_scraper(csaf: dict): # red_hat_advisory_affected_products table values red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) + # If all products were EUS (none left after filtering), skip this advisory + if len(red_hat_affected_products) == 0: + logger.info(f"Skipping advisory {name}: all products are EUS-only") + return None + return { "red_hat_issued_at": str(red_hat_issued_at), "red_hat_updated_at": str(red_hat_updated_at), diff --git a/apollo/tests/test_rhcsaf.py b/apollo/tests/test_rhcsaf.py index 1c62f0a..2b4fc62 100644 --- a/apollo/tests/test_rhcsaf.py +++ b/apollo/tests/test_rhcsaf.py @@ -52,7 +52,29 @@ def setUp(self): "product_identification_helper": { "cpe": "cpe:/o:redhat:enterprise_linux:9.4" } - } + }, + "branches": [ + { + "category": "product_version", + "name": "rsync-0:3.2.3-19.el9_4.1.x86_64", + "product": { + "product_id": "rsync-0:3.2.3-19.el9_4.1.x86_64", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/rsync@3.2.3-19.el9_4.1?arch=x86_64" + } + } + }, + { + "category": "product_version", + "name": "rsync-0:3.2.3-19.el9_4.1.src", + "product": { + "product_id": "rsync-0:3.2.3-19.el9_4.1.src", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/rsync@3.2.3-19.el9_4.1?arch=src" + } + } + } + ] } ] }, @@ -252,4 +274,227 @@ def test_major_only_version(self): self.assertIn( ("Red Hat Enterprise Linux", "Red Hat Enterprise Linux for x86_64", 9, None, "x86_64"), result - ) \ No newline at end of file + ) + + +class TestEUSDetection(unittest.TestCase): + """Test EUS product detection and filtering""" + + def setUp(self): + with patch('common.logger.Logger') as mock_logger_class: + mock_logger_class.return_value = MagicMock() + from apollo.rhcsaf import _is_eus_product + self._is_eus_product = _is_eus_product + + def test_detect_eus_via_cpe(self): + """Test EUS detection via CPE product field""" + # EUS CPE products + self.assertTrue(self._is_eus_product("Some Product", "cpe:/a:redhat:rhel_eus:9.4::appstream")) + self.assertTrue(self._is_eus_product("Some Product", "cpe:/a:redhat:rhel_e4s:9.0::appstream")) + self.assertTrue(self._is_eus_product("Some Product", "cpe:/a:redhat:rhel_aus:8.2::appstream")) + self.assertTrue(self._is_eus_product("Some Product", "cpe:/a:redhat:rhel_tus:8.8::appstream")) + + # Non-EUS CPE product + self.assertFalse(self._is_eus_product("Some Product", "cpe:/a:redhat:enterprise_linux:9::appstream")) + + def test_detect_eus_via_name(self): + """Test EUS detection via product name keywords""" + self.assertTrue(self._is_eus_product("Red Hat Enterprise Linux AppStream EUS (v.9.4)", "")) + self.assertTrue(self._is_eus_product("Red Hat Enterprise Linux AppStream E4S (v.9.0)", "")) + self.assertTrue(self._is_eus_product("Red Hat Enterprise Linux AppStream AUS (v.8.2)", "")) + self.assertTrue(self._is_eus_product("Red Hat Enterprise Linux AppStream TUS (v.8.8)", "")) + + # Non-EUS product name + self.assertFalse(self._is_eus_product("Red Hat Enterprise Linux AppStream", "")) + + def test_eus_filtering_in_affected_products(self): + """Test that EUS products are filtered from affected products""" + csaf = { + "product_tree": { + "branches": [ + { + "branches": [ + { + "category": "product_family", + "name": "Red Hat Enterprise Linux", + "branches": [ + { + "category": "product_name", + "product": { + "name": "Red Hat Enterprise Linux AppStream EUS (v.9.4)", + "product_identification_helper": { + "cpe": "cpe:/a:redhat:rhel_eus:9.4::appstream" + } + } + } + ] + }, + { + "category": "architecture", + "name": "x86_64" + } + ] + } + ] + } + } + + result = extract_rhel_affected_products_for_db(csaf) + # Should be empty because the only product is EUS + self.assertEqual(len(result), 0) + + +class TestModularPackages(unittest.TestCase): + """Test modular package extraction""" + + def test_extract_modular_packages(self): + """Test extraction of modular packages with ::module:stream suffix""" + csaf = { + "document": { + "tracking": { + "initial_release_date": "2025-07-28T00:00:00+00:00", + "current_release_date": "2025-07-28T00:00:00+00:00", + "id": "RHSA-2025:12008" + }, + "title": "Red Hat Security Advisory: Important: redis:7 security update", + "aggregate_severity": {"text": "Important"}, + "notes": [ + {"category": "general", "text": "Test description"}, + {"category": "summary", "text": "Test topic"} + ] + }, + "product_tree": { + "branches": [ + { + "branches": [ + { + "category": "product_family", + "name": "Red Hat Enterprise Linux", + "branches": [ + { + "category": "product_name", + "name": "Red Hat Enterprise Linux 9", + "product": { + "name": "Red Hat Enterprise Linux 9", + "product_identification_helper": { + "cpe": "cpe:/o:redhat:enterprise_linux:9::appstream" + } + }, + "branches": [ + { + "category": "product_version", + "name": "redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.x86_64::redis:7", + "product": { + "product_id": "redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.x86_64::redis:7", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/redis@7.2.10-1.module+el9.6.0+23332+115a3b01?arch=x86_64&rpmmod=redis:7:9060020250716081121:9" + } + } + }, + { + "category": "product_version", + "name": "redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.src::redis:7", + "product": { + "product_id": "redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.src::redis:7", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/redis@7.2.10-1.module+el9.6.0+23332+115a3b01?arch=src&rpmmod=redis:7:9060020250716081121:9" + } + } + } + ] + } + ] + }, + { + "category": "architecture", + "name": "x86_64" + } + ] + } + ] + }, + "vulnerabilities": [ + { + "cve": "CVE-2025-12345", + "ids": [{"system_name": "Red Hat Bugzilla ID", "text": "123456"}], + "product_status": {"fixed": []}, + "scores": [{"cvss_v3": {"vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", "baseScore": 9.8}}], + "cwe": {"id": "CWE-79"} + } + ] + } + + result = red_hat_advisory_scraper(csaf) + + # Check that modular packages were extracted with ::module:stream stripped + self.assertIn("redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.x86_64", result["red_hat_fixed_packages"]) + self.assertIn("redis-0:7.2.10-1.module+el9.6.0+23332+115a3b01.src", result["red_hat_fixed_packages"]) + + # Verify epoch is preserved + for pkg in result["red_hat_fixed_packages"]: + if "redis" in pkg: + self.assertIn("0:", pkg, "Epoch should be preserved in NEVRA") + + +class TestEUSAdvisoryFiltering(unittest.TestCase): + """Test that EUS-only advisories are filtered out""" + + def test_eus_only_advisory_returns_none(self): + """Test that advisory with only EUS products returns None""" + csaf = { + "document": { + "tracking": { + "initial_release_date": "2025-01-01T00:00:00+00:00", + "current_release_date": "2025-01-01T00:00:00+00:00", + "id": "RHSA-2025:9756" + }, + "title": "Red Hat Security Advisory: Important: package security update", + "aggregate_severity": {"text": "Important"}, + "notes": [ + {"category": "general", "text": "EUS advisory"}, + {"category": "summary", "text": "EUS topic"} + ] + }, + "product_tree": { + "branches": [ + { + "branches": [ + { + "category": "product_family", + "name": "Red Hat Enterprise Linux", + "branches": [ + { + "category": "product_name", + "name": "Red Hat Enterprise Linux AppStream EUS (v.9.4)", + "product": { + "name": "Red Hat Enterprise Linux AppStream EUS (v.9.4)", + "product_identification_helper": { + "cpe": "cpe:/a:redhat:rhel_eus:9.4::appstream" + } + } + } + ] + }, + { + "category": "architecture", + "name": "x86_64" + } + ] + } + ] + }, + "vulnerabilities": [ + { + "cve": "CVE-2025-99999", + "ids": [{"system_name": "Red Hat Bugzilla ID", "text": "999999"}], + "product_status": {"fixed": []}, + "scores": [{"cvss_v3": {"vectorString": "CVSS:3.1/AV:N/AC:L/PR:N/UI:N/S:U/C:H/I:H/A:H", "baseScore": 9.8}}], + "cwe": {"id": "CWE-79"} + } + ] + } + + result = red_hat_advisory_scraper(csaf) + + # Advisory should be filtered out (return None) because all products are EUS + self.assertIsNone(result) \ No newline at end of file From 48a57c8b993f67e59a16150507dc5be7377aaef5 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 6 Nov 2025 17:39:25 -0700 Subject: [PATCH 07/30] Fix CSV merge to prioritize changes.csv over releases.csv The previous code incorrectly let releases.csv overwrite changes.csv timestamps. This caused the workflow to miss advisory updates, as changes.csv contains the most recent modification times while releases.csv contains original publication dates. With this fix, when Red Hat updates advisories (like the mass update on 2025-11-07), the workflow will correctly detect and reprocess them. Changes: - Reversed merge order: {**releases, **changes} so changes.csv takes precedence - Updated comment to clarify the intended behavior - Ensures updated advisories are reprocessed to catch corrections/additions --- apollo/rhworker/poll_rh_activities.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apollo/rhworker/poll_rh_activities.py b/apollo/rhworker/poll_rh_activities.py index e592136..85a4380 100644 --- a/apollo/rhworker/poll_rh_activities.py +++ b/apollo/rhworker/poll_rh_activities.py @@ -651,8 +651,11 @@ async def fetch_csv_with_dates(session, url): releases = await fetch_csv_with_dates(session, base_url + "releases.csv") deletions = await fetch_csv_with_dates(session, base_url + "deletions.csv") - # Merge changes and releases, keeping the most recent timestamp for each advisory - all_advisories = {**changes, **releases} + # Merge changes and releases, prioritizing changes.csv for updated timestamps + # changes.csv contains the most recent modification time for each advisory + # releases.csv contains original publication dates + # We want changes.csv to take precedence to catch updates to existing advisories + all_advisories = {**releases, **changes} # Remove deletions for advisory_id in deletions: all_advisories.pop(advisory_id, None) From c555bcfb652fcffd79e14ac84049dfa1b6faffc5 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 6 Nov 2025 18:08:32 -0700 Subject: [PATCH 08/30] Add web UI for managing CSAF index timestamp Add admin interface to view and update the last_indexed_at timestamp that controls which CSAF advisories are processed by the Poll RHCSAF workflow. Changes: - Add DatabaseService methods for getting and updating last_indexed_at - Add admin route handlers for timestamp management - Add UI section with date picker and automatic ISO 8601 conversion - Remove duplicate timestamp display from Poll RHCSAF section - Fix preview results text readability - Add comprehensive unit tests for DatabaseService - Update BUILD.bazel and CI workflow to include new tests --- .github/workflows/test.yaml | 1 + apollo/server/routes/admin_workflows.py | 38 ++- apollo/server/services/database_service.py | 65 ++++- apollo/server/templates/admin_workflows.jinja | 66 ++++- apollo/tests/BUILD.bazel | 10 + apollo/tests/test_database_service.py | 226 ++++++++++++++++++ 6 files changed, 400 insertions(+), 6 deletions(-) create mode 100644 apollo/tests/test_database_service.py diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 3791963..394e28c 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -38,6 +38,7 @@ jobs: bazel test //apollo/tests:test_validation --test_output=all bazel test //apollo/tests:test_admin_routes_supported_products --test_output=all bazel test //apollo/tests:test_api_osv --test_output=all + bazel test //apollo/tests:test_database_service --test_output=all - name: Integration Tests run: ./build/scripts/test.bash diff --git a/apollo/server/routes/admin_workflows.py b/apollo/server/routes/admin_workflows.py index ef319dc..cfb26ec 100644 --- a/apollo/server/routes/admin_workflows.py +++ b/apollo/server/routes/admin_workflows.py @@ -21,7 +21,8 @@ async def admin_workflows(request: Request, user: User = Depends(admin_user_sche """Render admin workflows page for manual workflow triggering""" db_service = DatabaseService() env_info = await db_service.get_environment_info() - + index_state = await db_service.get_last_indexed_at() + return templates.TemplateResponse( "admin_workflows.jinja", { "request": request, @@ -29,6 +30,8 @@ async def admin_workflows(request: Request, user: User = Depends(admin_user_sche "env_name": env_info["environment"], "is_production": env_info["is_production"], "reset_allowed": env_info["reset_allowed"], + "last_indexed_at": index_state.get("last_indexed_at_iso"), + "last_indexed_exists": index_state.get("exists", False), } ) @@ -92,6 +95,39 @@ async def trigger_poll_rhcsaf( return RedirectResponse(url="/admin/workflows", status_code=303) +@router.post("/workflows/update-index-timestamp") +async def update_index_timestamp( + request: Request, + new_timestamp: str = Form(...), + user: User = Depends(admin_user_scheme) +): + """Update the last_indexed_at timestamp in red_hat_index_state""" + try: + # Parse the timestamp + timestamp_dt = datetime.fromisoformat(new_timestamp.replace("Z", "+00:00")) + + db_service = DatabaseService() + result = await db_service.update_last_indexed_at(timestamp_dt, user.email) + + Logger().info(f"Admin user {user.email} updated last_indexed_at to {new_timestamp}") + + # Store success message in session + request.session["workflow_message"] = result["message"] + request.session["workflow_type"] = "success" + + except ValueError as e: + Logger().error(f"Invalid timestamp format: {str(e)}") + request.session["workflow_message"] = f"Invalid timestamp format: {str(e)}" + request.session["workflow_type"] = "error" + + except Exception as e: + Logger().error(f"Error updating last_indexed_at: {str(e)}") + request.session["workflow_message"] = f"Error updating timestamp: {str(e)}" + request.session["workflow_type"] = "error" + + return RedirectResponse(url="/admin/workflows", status_code=303) + + @router.get("/workflows/database/preview-reset") async def preview_database_reset( request: Request, diff --git a/apollo/server/services/database_service.py b/apollo/server/services/database_service.py index 78d6fb0..769e5e3 100644 --- a/apollo/server/services/database_service.py +++ b/apollo/server/services/database_service.py @@ -123,4 +123,67 @@ async def get_environment_info(self) -> Dict[str, str]: "environment": env_name, "is_production": self.is_production_environment(), "reset_allowed": not self.is_production_environment() - } \ No newline at end of file + } + + async def get_last_indexed_at(self) -> Dict[str, Any]: + """ + Get the current last_indexed_at timestamp from red_hat_index_state + + Returns: + Dictionary with timestamp information + """ + index_state = await RedHatIndexState.first() + + if not index_state or not index_state.last_indexed_at: + return { + "last_indexed_at": None, + "last_indexed_at_iso": None, + "exists": False + } + + return { + "last_indexed_at": index_state.last_indexed_at, + "last_indexed_at_iso": index_state.last_indexed_at.isoformat(), + "exists": True + } + + async def update_last_indexed_at(self, new_timestamp: datetime, user_email: str) -> Dict[str, Any]: + """ + Update the last_indexed_at timestamp in red_hat_index_state + + Args: + new_timestamp: New timestamp to set + user_email: Email of user making the change (for logging) + + Returns: + Dictionary with operation results + + Raises: + ValueError: If timestamp is invalid + """ + logger = Logger() + + try: + # Get or create index state + index_state = await RedHatIndexState.first() + + old_timestamp = None + if index_state: + old_timestamp = index_state.last_indexed_at + index_state.last_indexed_at = new_timestamp + await index_state.save() + logger.info(f"Updated last_indexed_at by {user_email}: {old_timestamp} -> {new_timestamp}") + else: + await RedHatIndexState.create(last_indexed_at=new_timestamp) + logger.info(f"Created last_indexed_at by {user_email}: {new_timestamp}") + + return { + "success": True, + "old_timestamp": old_timestamp.isoformat() if old_timestamp else None, + "new_timestamp": new_timestamp.isoformat(), + "message": f"Successfully updated last_indexed_at to {new_timestamp.isoformat()}" + } + + except Exception as e: + logger.error(f"Failed to update last_indexed_at: {str(e)}") + raise RuntimeError(f"Failed to update timestamp: {str(e)}") \ No newline at end of file diff --git a/apollo/server/templates/admin_workflows.jinja b/apollo/server/templates/admin_workflows.jinja index 6dd396a..b3f7916 100644 --- a/apollo/server/templates/admin_workflows.jinja +++ b/apollo/server/templates/admin_workflows.jinja @@ -71,7 +71,7 @@

Poll RH CSAF Advisories Workflow

Polls Red Hat for new CSAF (Common Security Advisory Framework) advisories.

-
+ @@ -80,6 +80,44 @@ + +
+
+
+

Update CSAF Index Timestamp

+

Set the last_indexed_at timestamp to control which advisories are processed by the Poll RHCSAF workflow.

+ + {% if last_indexed_exists %} +

+ Current last_indexed_at: {{ last_indexed_at }} +

+ {% else %} +

+ No timestamp set - workflow will process all advisories +

+ {% endif %} + + +
+ + + +
+ The workflow will process advisories with timestamps after this date.
+ Time will be set to 00:00:00 UTC. +
+
+ + + +
+
+
+ {% if reset_allowed %}
@@ -114,7 +152,7 @@
- -{% if reset_allowed %} -{% endif %} {% endblock %} \ No newline at end of file diff --git a/apollo/tests/BUILD.bazel b/apollo/tests/BUILD.bazel index 7609a83..b08f0d9 100644 --- a/apollo/tests/BUILD.bazel +++ b/apollo/tests/BUILD.bazel @@ -69,3 +69,13 @@ py_test( "//apollo/server:server_lib", ], ) + +py_test( + name = "test_database_service", + srcs = ["test_database_service.py"], + deps = [ + "//apollo/server:server_lib", + "//apollo/db:db_lib", + "//common:common_lib", + ], +) diff --git a/apollo/tests/test_database_service.py b/apollo/tests/test_database_service.py new file mode 100644 index 0000000..0a8c215 --- /dev/null +++ b/apollo/tests/test_database_service.py @@ -0,0 +1,226 @@ +""" +Tests for DatabaseService functionality +Tests utility functions for database operations including timestamp management +""" + +import unittest +import asyncio +from datetime import datetime, timezone +from unittest.mock import Mock, AsyncMock, patch +import os + +# Add the project root to the Python path +import sys + +sys.path.insert(0, os.path.join(os.path.dirname(__file__), "../..")) + +from apollo.server.services.database_service import DatabaseService + + +class TestEnvironmentDetection(unittest.TestCase): + """Test environment detection functionality.""" + + def test_is_production_when_env_is_production(self): + """Test production detection when ENV=production.""" + with patch.dict(os.environ, {"ENV": "production"}): + service = DatabaseService() + self.assertTrue(service.is_production_environment()) + + def test_is_not_production_when_env_is_development(self): + """Test production detection when ENV=development.""" + with patch.dict(os.environ, {"ENV": "development"}): + service = DatabaseService() + self.assertFalse(service.is_production_environment()) + + def test_is_not_production_when_env_not_set(self): + """Test production detection when ENV is not set.""" + with patch.dict(os.environ, {}, clear=True): + service = DatabaseService() + self.assertFalse(service.is_production_environment()) + + def test_is_not_production_with_staging_env(self): + """Test production detection with staging environment.""" + with patch.dict(os.environ, {"ENV": "staging"}): + service = DatabaseService() + self.assertFalse(service.is_production_environment()) + + def test_get_environment_info_production(self): + """Test getting environment info for production.""" + with patch.dict(os.environ, {"ENV": "production"}): + service = DatabaseService() + result = asyncio.run(service.get_environment_info()) + + self.assertEqual(result["environment"], "production") + self.assertTrue(result["is_production"]) + self.assertFalse(result["reset_allowed"]) + + def test_get_environment_info_development(self): + """Test getting environment info for development.""" + with patch.dict(os.environ, {"ENV": "development"}): + service = DatabaseService() + result = asyncio.run(service.get_environment_info()) + + self.assertEqual(result["environment"], "development") + self.assertFalse(result["is_production"]) + self.assertTrue(result["reset_allowed"]) + + +class TestLastIndexedAtOperations(unittest.TestCase): + """Test last_indexed_at timestamp operations.""" + + def test_get_last_indexed_at_when_exists(self): + """Test getting last_indexed_at when record exists.""" + mock_index_state = Mock() + test_time = datetime(2025, 7, 1, 0, 0, 0, tzinfo=timezone.utc) + mock_index_state.last_indexed_at = test_time + + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state: + mock_state.first = AsyncMock(return_value=mock_index_state) + + service = DatabaseService() + result = asyncio.run(service.get_last_indexed_at()) + + self.assertEqual(result["last_indexed_at"], test_time) + self.assertEqual(result["last_indexed_at_iso"], "2025-07-01T00:00:00+00:00") + self.assertTrue(result["exists"]) + + def test_get_last_indexed_at_when_not_exists(self): + """Test getting last_indexed_at when no record exists.""" + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state: + mock_state.first = AsyncMock(return_value=None) + + service = DatabaseService() + result = asyncio.run(service.get_last_indexed_at()) + + self.assertIsNone(result["last_indexed_at"]) + self.assertIsNone(result["last_indexed_at_iso"]) + self.assertFalse(result["exists"]) + + def test_get_last_indexed_at_when_timestamp_is_none(self): + """Test getting last_indexed_at when timestamp field is None.""" + mock_index_state = Mock() + mock_index_state.last_indexed_at = None + + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state: + mock_state.first = AsyncMock(return_value=mock_index_state) + + service = DatabaseService() + result = asyncio.run(service.get_last_indexed_at()) + + self.assertIsNone(result["last_indexed_at"]) + self.assertIsNone(result["last_indexed_at_iso"]) + self.assertFalse(result["exists"]) + + def test_update_last_indexed_at_existing_record(self): + """Test updating last_indexed_at for existing record.""" + old_time = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc) + new_time = datetime(2025, 7, 1, 0, 0, 0, tzinfo=timezone.utc) + + mock_index_state = Mock() + mock_index_state.last_indexed_at = old_time + mock_index_state.save = AsyncMock() + + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state, \ + patch("apollo.server.services.database_service.Logger"): + mock_state.first = AsyncMock(return_value=mock_index_state) + + service = DatabaseService() + result = asyncio.run(service.update_last_indexed_at(new_time, "admin@example.com")) + + self.assertTrue(result["success"]) + self.assertEqual(result["old_timestamp"], "2025-06-01T00:00:00+00:00") + self.assertEqual(result["new_timestamp"], "2025-07-01T00:00:00+00:00") + self.assertIn("Successfully updated", result["message"]) + + # Verify save was called + mock_index_state.save.assert_called_once() + # Verify timestamp was updated + self.assertEqual(mock_index_state.last_indexed_at, new_time) + + def test_update_last_indexed_at_create_new_record(self): + """Test updating last_indexed_at when no record exists (creates new).""" + new_time = datetime(2025, 7, 1, 0, 0, 0, tzinfo=timezone.utc) + + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state, \ + patch("apollo.server.services.database_service.Logger"): + mock_state.first = AsyncMock(return_value=None) + mock_state.create = AsyncMock() + + service = DatabaseService() + result = asyncio.run(service.update_last_indexed_at(new_time, "admin@example.com")) + + self.assertTrue(result["success"]) + self.assertIsNone(result["old_timestamp"]) + self.assertEqual(result["new_timestamp"], "2025-07-01T00:00:00+00:00") + self.assertIn("Successfully updated", result["message"]) + + # Verify create was called with correct timestamp + mock_state.create.assert_called_once_with(last_indexed_at=new_time) + + def test_update_last_indexed_at_handles_exception(self): + """Test that update_last_indexed_at handles database exceptions.""" + new_time = datetime(2025, 7, 1, 0, 0, 0, tzinfo=timezone.utc) + + with patch("apollo.server.services.database_service.RedHatIndexState") as mock_state, \ + patch("apollo.server.services.database_service.Logger"): + mock_state.first = AsyncMock(side_effect=Exception("Database error")) + + service = DatabaseService() + + with self.assertRaises(RuntimeError) as cm: + asyncio.run(service.update_last_indexed_at(new_time, "admin@example.com")) + + self.assertIn("Failed to update timestamp", str(cm.exception)) + + +class TestPartialResetValidation(unittest.TestCase): + """Test partial reset validation logic.""" + + def test_preview_partial_reset_blocks_in_production(self): + """Test that preview_partial_reset raises error in production.""" + with patch.dict(os.environ, {"ENV": "production"}): + service = DatabaseService() + cutoff_date = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc) + + with self.assertRaises(ValueError) as cm: + asyncio.run(service.preview_partial_reset(cutoff_date)) + + self.assertIn("production environment", str(cm.exception)) + + def test_preview_partial_reset_rejects_future_date(self): + """Test that preview_partial_reset rejects future dates.""" + with patch.dict(os.environ, {"ENV": "development"}): + service = DatabaseService() + future_date = datetime(2099, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + with self.assertRaises(ValueError) as cm: + asyncio.run(service.preview_partial_reset(future_date)) + + self.assertIn("must be in the past", str(cm.exception)) + + def test_perform_partial_reset_blocks_in_production(self): + """Test that perform_partial_reset raises error in production.""" + with patch.dict(os.environ, {"ENV": "production"}): + service = DatabaseService() + cutoff_date = datetime(2025, 6, 1, 0, 0, 0, tzinfo=timezone.utc) + + with self.assertRaises(ValueError) as cm: + asyncio.run(service.perform_partial_reset(cutoff_date, "admin@example.com")) + + self.assertIn("production environment", str(cm.exception)) + + def test_perform_partial_reset_rejects_future_date(self): + """Test that perform_partial_reset rejects future dates.""" + with patch.dict(os.environ, {"ENV": "development"}): + service = DatabaseService() + future_date = datetime(2099, 1, 1, 0, 0, 0, tzinfo=timezone.utc) + + with self.assertRaises(ValueError) as cm: + asyncio.run(service.perform_partial_reset(future_date, "admin@example.com")) + + self.assertIn("must be in the past", str(cm.exception)) + + +if __name__ == "__main__": + # Run with verbose output + unittest.main(verbosity=2) From ced651af0f92ba70346ed4789e8c1bd0c58b4e41 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Mon, 10 Nov 2025 12:56:48 -0700 Subject: [PATCH 09/30] Fix test_csaf_processing to work with refactored CSAF parser and Bazel This commit fixes multiple issues in test_csaf_processing.py that caused CI failures: 1. Missing unittest.main() call - Added 'if __name__ == "__main__": unittest.main()' block - Without this, Bazel's py_test runs the file as a script but never executes the tests, causing false positives - pytest doesn't need this (auto-discovers tests), but Bazel does 2. Fixed async test lifecycle methods - Changed 'async def tearDown' to 'async def asyncTearDown' - Removed incorrect @classmethod decorators from asyncSetUp/asyncTearDown - These must be instance methods in unittest.IsolatedAsyncioTestCase - Consolidated setUp logic into asyncSetUp - Added close_test_db() call to asyncTearDown for proper cleanup 3. Updated test CSAF data structure - Added product_version entries in product_tree (required by refactored parser) - Changed from EUS to MAIN product variant (EUS products are filtered out) - Added proper product_id, purl, and CPE format - The refactored CSAF parser (commit ccb297e) extracts packages from product_tree instead of vulnerabilities.product_status.fixed 4. Fixed test assertions - Changed minor_version expectation from 4 to None (CPE has no minor version) - Fixed test_no_fixed_packages to remove product_tree entries instead of just clearing the fixed array Root cause analysis: - Bazel tests were never actually running (missing unittest.main()) - GitHub Actions tests were running via pytest in Integration Tests step - pytest auto-discovers unittest tests without needing __main__ block - This is why CI showed failures while local Bazel tests appeared to pass All tests now pass in both Bazel and pytest environments. --- apollo/tests/test_csaf_processing.py | 76 +++++++++++++++++++--------- 1 file changed, 51 insertions(+), 25 deletions(-) diff --git a/apollo/tests/test_csaf_processing.py b/apollo/tests/test_csaf_processing.py index dbd0f91..aa31c10 100644 --- a/apollo/tests/test_csaf_processing.py +++ b/apollo/tests/test_csaf_processing.py @@ -22,17 +22,10 @@ ) class TestCsafProcessing(unittest.IsolatedAsyncioTestCase): - @classmethod - async def asyncSetUp(cls): - # Initialize test database for all tests in this class + async def asyncSetUp(self): + # Initialize test database before each test await initialize_test_db() - - @classmethod - async def asyncTearDown(cls): - # Close database connections when tests are done - await close_test_db() - def setUp(self): # Create sample CSAF data matching schema requirements self.sample_csaf = { "document": { @@ -69,10 +62,35 @@ def setUp(self): "name": "Red Hat Enterprise Linux 9", "product": { "name": "Red Hat Enterprise Linux 9", + "product_id": "AppStream-9.4.0.Z.MAIN", "product_identification_helper": { - "cpe": "cpe:/o:redhat:enterprise_linux:9.4" + "cpe": "cpe:/o:redhat:enterprise_linux:9::appstream" + } + }, + "branches": [ + { + "category": "product_version", + "name": "rsync-0:3.2.3-19.el9_4.1.x86_64", + "product": { + "name": "rsync-0:3.2.3-19.el9_4.1.x86_64", + "product_id": "rsync-0:3.2.3-19.el9_4.1.x86_64", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/rsync@3.2.3-19.el9_4.1?arch=x86_64" + } + } + }, + { + "category": "product_version", + "name": "rsync-0:3.2.3-19.el9_4.1.src", + "product": { + "name": "rsync-0:3.2.3-19.el9_4.1.src", + "product_id": "rsync-0:3.2.3-19.el9_4.1.src", + "product_identification_helper": { + "purl": "pkg:rpm/redhat/rsync@3.2.3-19.el9_4.1?arch=src" + } + } } - } + ] } ] }, @@ -95,8 +113,8 @@ def setUp(self): ], "product_status": { "fixed": [ - "AppStream-9.4.0.Z.EUS:rsync-0:3.2.3-19.el9_4.1.x86_64", - "AppStream-9.4.0.Z.EUS:rsync-0:3.2.3-19.el9_4.1.src" + "AppStream-9.4.0.Z.MAIN:rsync-0:3.2.3-19.el9_4.1.x86_64", + "AppStream-9.4.0.Z.MAIN:rsync-0:3.2.3-19.el9_4.1.src" ] }, "scores": [{ @@ -117,28 +135,31 @@ def setUp(self): } ] } - + # Create a temporary file with the sample data self.test_file = pathlib.Path("test_csaf.json") with open(self.test_file, "w") as f: json.dump(self.sample_csaf, f) - async def tearDown(self): - # Clean up database and temporary files after each test + async def asyncTearDown(self): + # Clean up database entries and temporary files after each test await RedHatAdvisory.all().delete() await RedHatAdvisoryPackage.all().delete() await RedHatAdvisoryCVE.all().delete() - await RedHatAdvisoryBugzillaBug.all().delete() + await RedHatAdvisoryBugzillaBug.all().delete() await RedHatAdvisoryAffectedProduct.all().delete() - - # Clean up temporary file + + # Close database connections + await close_test_db() + + # Clean up temporary files self.test_file.unlink(missing_ok=True) pathlib.Path("invalid_csaf.json").unlink(missing_ok=True) async def test_new_advisory_creation(self): # Test creating a new advisory with a real test database result = await process_csaf_file(self.sample_csaf, "test.json") - + # Verify advisory was created correctly advisory = await RedHatAdvisory.get_or_none(name="RHSA-2025:1234") self.assertIsNotNone(advisory) @@ -176,7 +197,8 @@ async def test_new_advisory_creation(self): self.assertEqual(products[0].variant, "Red Hat Enterprise Linux") self.assertEqual(products[0].arch, "x86_64") self.assertEqual(products[0].major_version, 9) - self.assertEqual(products[0].minor_version, 4) + # Minor version is None because CPE doesn't include minor version + self.assertIsNone(products[0].minor_version) async def test_advisory_update(self): # First create an advisory with different values @@ -224,12 +246,13 @@ async def test_no_vulnerabilities(self): self.assertEqual(count, 0) async def test_no_fixed_packages(self): - # Test CSAF with vulnerabilities but no fixed packages + # Test CSAF with vulnerabilities but no fixed packages in product_tree csaf = self.sample_csaf.copy() - csaf["vulnerabilities"][0]["product_status"]["fixed"] = [] + # Remove product_version entries from product_tree to simulate no fixed packages + csaf["product_tree"]["branches"][0]["branches"][0]["branches"][0].pop("branches", None) result = await process_csaf_file(csaf, "test.json") self.assertIsNone(result) - + # Verify nothing was created count = await RedHatAdvisory.all().count() self.assertEqual(count, 0) @@ -239,4 +262,7 @@ async def test_db_exception(self, mock_get_or_none): # Simulate a database error mock_get_or_none.side_effect = Exception("DB error") with self.assertRaises(Exception): - await process_csaf_file(self.sample_csaf, "test.json") \ No newline at end of file + await process_csaf_file(self.sample_csaf, "test.json") + +if __name__ == '__main__': + unittest.main() \ No newline at end of file From d43f40c80061390c8af0ac28a122fee5bdcb0c67 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:24:27 -0700 Subject: [PATCH 10/30] Refactor EUS product identifiers into file-level constants Extracted magic constants from _is_eus_product() function to improve maintainability and readability: - EUS_CPE_PRODUCTS: CPE product identifiers for EUS variants - EUS_PRODUCT_NAME_KEYWORDS: Keywords for identifying EUS products Using frozenset for better performance on membership checks. --- apollo/rhcsaf/__init__.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index 7bc585c..ada28a2 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -8,6 +8,24 @@ logger = Logger() +EUS_CPE_PRODUCTS = frozenset([ + "rhel_eus", # Extended Update Support + "rhel_e4s", # Update Services for SAP Solutions + "rhel_aus", # Advanced Update Support (IBM Power) + "rhel_tus", # Telecommunications Update Service +]) + +EUS_PRODUCT_NAME_KEYWORDS = frozenset([ + "e4s", + "eus", + "aus", + "tus", + "extended update support", + "update services for sap", + "advanced update support", + "telecommunications update service", +]) + def _is_eus_product(product_name: str, cpe: str) -> bool: """ Detects if a product is EUS-related based on product name and CPE. @@ -24,16 +42,13 @@ def _is_eus_product(product_name: str, cpe: str) -> bool: parts = cpe.split(":") if len(parts) > 3: cpe_product = parts[3] - if cpe_product in ("rhel_eus", "rhel_e4s", "rhel_aus", "rhel_tus"): + if cpe_product in EUS_CPE_PRODUCTS: return True # Check product name keywords as fallback if product_name: name_lower = product_name.lower() - eus_keywords = ["e4s", "eus", "aus", "tus", "extended update support", - "update services for sap", "advanced update support", - "telecommunications update service"] - for keyword in eus_keywords: + for keyword in EUS_PRODUCT_NAME_KEYWORDS: if keyword in name_lower: return True From 12695ae685ac3960aee7bc5266ca0fcae8ac0ec0 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:32:04 -0700 Subject: [PATCH 11/30] Simplify package extraction logic - Move product_name and cpe declarations closer to usage - Simplify modular package NEVRA extraction using split directly - Remove redundant nevra variable and empty string check --- apollo/rhcsaf/__init__.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index ada28a2..3b68259 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -178,10 +178,10 @@ def traverse_for_eus(branches): if category == "product_name": prod = branch.get("product", {}) product_id = prod.get("product_id") - product_name = prod.get("name", "") - cpe = prod.get("product_identification_helper", {}).get("cpe", "") if product_id: + product_name = prod.get("name", "") + cpe = prod.get("product_identification_helper", {}).get("cpe", "") is_eus = _is_eus_product(product_name, cpe) product_eus_map[product_id] = is_eus @@ -227,14 +227,8 @@ def extract_packages_from_branches(branches): # Extract NEVRA from product_id # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" - nevra = product_id - # For modular packages, strip off the "::module:stream" suffix - if "::" in nevra: - nevra = nevra.split("::")[0] - - if nevra: - packages.add(nevra) + packages.add(product_id.split("::")[0]) # Recurse if "branches" in branch: From 43ee7338495df6e28d71354ea6d183db8bfde93b Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:33:53 -0700 Subject: [PATCH 12/30] Use Pythonic empty set check Replace explicit length comparison with truthiness check for red_hat_affected_products set. --- apollo/rhcsaf/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index 3b68259..e2cc9e0 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -293,7 +293,7 @@ def red_hat_advisory_scraper(csaf: dict): red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) # If all products were EUS (none left after filtering), skip this advisory - if len(red_hat_affected_products) == 0: + if not red_hat_affected_products: logger.info(f"Skipping advisory {name}: all products are EUS-only") return None From 6b997e54bf2aed9a916c3ce69358d3baabce3a70 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:38:54 -0700 Subject: [PATCH 13/30] Remove redundant comments from CSAF processing code Removed comments that simply restated what the code clearly does. Kept only comments that provide non-obvious context such as: - CPE format examples - Product ID format variations - Business logic explanations --- apollo/rhcsaf/__init__.py | 65 +++++++++------------------------------ 1 file changed, 14 insertions(+), 51 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index e2cc9e0..d7dd7a4 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -4,8 +4,6 @@ from common.logger import Logger from apollo.rpm_helpers import parse_nevra -# Initialize Info before Logger for this module - logger = Logger() EUS_CPE_PRODUCTS = frozenset([ @@ -37,7 +35,6 @@ def _is_eus_product(product_name: str, cpe: str) -> bool: Returns: True if product is EUS/E4S/AUS/TUS, False otherwise """ - # Check CPE product field (most reliable indicator) if cpe: parts = cpe.split(":") if len(parts) > 3: @@ -45,7 +42,6 @@ def _is_eus_product(product_name: str, cpe: str) -> bool: if cpe_product in EUS_CPE_PRODUCTS: return True - # Check product name keywords as fallback if product_name: name_lower = product_name.lower() for keyword in EUS_PRODUCT_NAME_KEYWORDS: @@ -61,14 +57,12 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: Expands 'noarch' to all main arches and maps names to user-friendly values. Returns a set of tuples: (variant, name, major_version, minor_version, arch) """ - # Maps architecture short names to user-friendly product names arch_name_map = { "aarch64": "Red Hat Enterprise Linux for ARM 64", "x86_64": "Red Hat Enterprise Linux for x86_64", "s390x": "Red Hat Enterprise Linux for IBM z Systems", "ppc64le": "Red Hat Enterprise Linux for Power, little endian", } - # List of main architectures to expand 'noarch' main_arches = list(arch_name_map.keys()) affected_products = set() product_tree = csaf.get("product_tree", {}) @@ -76,25 +70,20 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: logger.warning("No product tree found in CSAF document") return affected_products - # Iterate over all vendor branches in the product tree for vendor_branch in product_tree.get("branches", []): - # Find the product_family branch for RHEL family_branch = None arches = set() for branch in vendor_branch.get("branches", []): if branch.get("category") == "product_family" and branch.get("name") == "Red Hat Enterprise Linux": family_branch = branch - # Collect all architecture branches at the same level as product_family elif branch.get("category") == "architecture": arch = branch.get("name") if arch: arches.add(arch) - # If 'noarch' is present, expand to all main architectures if "noarch" in arches: arches = set(main_arches) if not family_branch: continue - # Find the product_name branch for CPE/version info prod_name = None cpe = None product_full_name = None @@ -108,29 +97,24 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: if not prod_name or not cpe: continue - # Skip if this is an EUS product if _is_eus_product(product_full_name, cpe): logger.debug(f"Skipping EUS product: {product_full_name}") continue - # Parses the CPE string to extract major and minor version numbers # Example CPE: "cpe:/a:redhat:enterprise_linux:9::appstream" - parts = cpe.split(":") # Split the CPE string by colon + parts = cpe.split(":") major = None minor = None if len(parts) > 4: - version = parts[4] # The version is typically the 5th field (index 4) + version = parts[4] if version: if "." in version: - # If the version contains a dot, split into major and minor major, minor = version.split(".", 1) major = int(major) minor = int(minor) else: - # If no dot, only major version is present major = int(version) - # For each architecture, add a tuple with product info to the set for arch in arches: name = arch_name_map.get(arch) if name is None: @@ -138,11 +122,11 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: continue if major: affected_products.add(( - family_branch.get("name"), # variant (e.g., "Red Hat Enterprise Linux") - name, # user-friendly architecture name - major, # major version number - minor, # minor version number (may be None) - arch # architecture short name + family_branch.get("name"), + name, + major, + minor, + arch )) logger.debug(f"Number of affected products: {len(affected_products)}") return affected_products @@ -166,7 +150,6 @@ def _extract_packages_from_product_tree(csaf: dict) -> set: if not product_tree: return packages - # Build a map of product_id -> is_eus product_eus_map = {} def traverse_for_eus(branches): @@ -174,7 +157,6 @@ def traverse_for_eus(branches): for branch in branches: category = branch.get("category") - # Check if this is a product_name with CPE if category == "product_name": prod = branch.get("product", {}) product_id = prod.get("product_id") @@ -185,15 +167,12 @@ def traverse_for_eus(branches): is_eus = _is_eus_product(product_name, cpe) product_eus_map[product_id] = is_eus - # Recurse into nested branches if "branches" in branch: traverse_for_eus(branch["branches"]) - # First pass: build EUS map for vendor_branch in product_tree.get("branches", []): traverse_for_eus(vendor_branch.get("branches", [])) - # Now extract packages from product_version entries def extract_packages_from_branches(branches): """Recursively traverse to extract packages""" for branch in branches: @@ -204,18 +183,14 @@ def extract_packages_from_branches(branches): product_id = prod.get("product_id") purl = prod.get("product_identification_helper", {}).get("purl") - # Skip if no product_id if not product_id: continue - # Check if this is an RPM using PURL (not container or other) if purl and not purl.startswith("pkg:rpm/"): continue - # Skip if product is EUS (check product_id prefix) # Product IDs for packages can have format: "AppStream-9.0.0.Z.E4S:package-nevra" # or just "package-nevra" for packages in product_version entries - # We need to check if any parent product is EUS skip_eus = False for eus_prod_id, is_eus in product_eus_map.items(): if is_eus and (":" in product_id and product_id.startswith(eus_prod_id + ":")): @@ -225,16 +200,12 @@ def extract_packages_from_branches(branches): if skip_eus: continue - # Extract NEVRA from product_id # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" - # For modular packages, strip off the "::module:stream" suffix packages.add(product_id.split("::")[0]) - # Recurse if "branches" in branch: extract_packages_from_branches(branch["branches"]) - # Second pass: extract packages for vendor_branch in product_tree.get("branches", []): extract_packages_from_branches(vendor_branch.get("branches", [])) @@ -247,11 +218,10 @@ def red_hat_advisory_scraper(csaf: dict): logger.warning("No vulnerabilities found in CSAF document") return None - # red_hat_advisories table values - red_hat_issued_at = csaf["document"]["tracking"]["initial_release_date"] # "2025-02-24T03:42:46+00:00" - red_hat_updated_at = csaf["document"]["tracking"]["current_release_date"] # "2025-04-17T12:08:56+00:00" - name = csaf["document"]["tracking"]["id"] # "RHSA-2025:1234" - red_hat_synopsis = csaf["document"]["title"] # "Red Hat Bug Fix Advisory: Red Hat Quay v3.13.4 bug fix release" + red_hat_issued_at = csaf["document"]["tracking"]["initial_release_date"] + red_hat_updated_at = csaf["document"]["tracking"]["current_release_date"] + name = csaf["document"]["tracking"]["id"] + red_hat_synopsis = csaf["document"]["title"] red_hat_description = None topic = None for item in csaf["document"]["notes"]: @@ -260,39 +230,32 @@ def red_hat_advisory_scraper(csaf: dict): elif item["category"] == "summary": topic = item["text"] kind_lookup = {"RHSA": "Security", "RHBA": "Bug Fix", "RHEA": "Enhancement"} - kind = kind_lookup[name.split("-")[0]] # "RHSA-2025:1234" --> "Security" - severity = csaf["document"]["aggregate_severity"]["text"] # "Important" + kind = kind_lookup[name.split("-")[0]] + severity = csaf["document"]["aggregate_severity"]["text"] - # To maintain consistency with the existing database, we need to replace the + # To maintain consistency with the existing database, replace # "Red Hat [KIND] Advisory:" prefixes with the severity level. red_hat_synopsis = red_hat_synopsis.replace("Red Hat Bug Fix Advisory: ", f"{severity}:") red_hat_synopsis = red_hat_synopsis.replace("Red Hat Security Advisory:", f"{severity}:") red_hat_synopsis = red_hat_synopsis.replace("Red Hat Enhancement Advisory: ", f"{severity}:") - # red_hat_advisory_packages table values - # Extract packages from product_tree (handles both regular and modular packages) red_hat_fixed_packages = _extract_packages_from_product_tree(csaf) red_hat_cve_set = set() red_hat_bugzilla_set = set() for vulnerability in csaf["vulnerabilities"]: - # red_hat_advisory_cves table values. Many older advisories do not have CVEs and so we need to handle that. cve_id = vulnerability.get("cve", None) cve_cvss3_scoring_vector = vulnerability.get("scores", [{}])[0].get("cvss_v3", {}).get("vectorString", None) cve_cvss3_base_score = vulnerability.get("scores", [{}])[0].get("cvss_v3", {}).get("baseScore", None) cve_cwe = vulnerability.get("cwe", {}).get("id", None) red_hat_cve_set.add((cve_id, cve_cvss3_scoring_vector, cve_cvss3_base_score, cve_cwe)) - # red_hat_advisory_bugzilla_bugs table values for bug_id in vulnerability.get("ids", []): if bug_id.get("system_name") == "Red Hat Bugzilla ID": red_hat_bugzilla_set.add(bug_id["text"]) - # red_hat_advisory_affected_products table values red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) - - # If all products were EUS (none left after filtering), skip this advisory if not red_hat_affected_products: logger.info(f"Skipping advisory {name}: all products are EUS-only") return None From c87bb75d04e539a16de05265b0dbbf9d2e3107c7 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:41:18 -0700 Subject: [PATCH 14/30] Improve exception handling in database_service - Remove redundant str() calls in f-strings - Use 'raise ... from e' to preserve exception chain --- apollo/server/services/database_service.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apollo/server/services/database_service.py b/apollo/server/services/database_service.py index 769e5e3..0a66800 100644 --- a/apollo/server/services/database_service.py +++ b/apollo/server/services/database_service.py @@ -185,5 +185,5 @@ async def update_last_indexed_at(self, new_timestamp: datetime, user_email: str) } except Exception as e: - logger.error(f"Failed to update last_indexed_at: {str(e)}") - raise RuntimeError(f"Failed to update timestamp: {str(e)}") \ No newline at end of file + logger.error(f"Failed to update last_indexed_at: {e}") + raise RuntimeError(f"Failed to update timestamp: {e}") from e \ No newline at end of file From db07012f9aa19116ff44ce4ed7b67dd1e0c6e94c Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 11:48:26 -0700 Subject: [PATCH 15/30] Refactor nested functions to pure functions Converted nested helper functions to standalone pure functions: - _traverse_for_eus: Now takes and returns product_eus_map explicitly - _extract_packages_from_branches: Now takes and returns packages explicitly This makes the code more testable, readable, and eliminates hidden state mutations from closure variables. --- apollo/rhcsaf/__init__.py | 134 +++++++++++++++++++++++--------------- 1 file changed, 82 insertions(+), 52 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index d7dd7a4..98eb7c2 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -132,82 +132,112 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: return affected_products -def _extract_packages_from_product_tree(csaf: dict) -> set: +def _traverse_for_eus(branches, product_eus_map=None): """ - Extracts fixed packages from CSAF product_tree using product_id fields. - Handles both regular and modular packages by extracting NEVRAs directly from product_id. - Filters out EUS products. + Recursively traverse CSAF branches to build EUS product map. Args: - csaf: CSAF document dict + branches: List of CSAF branch dictionaries to traverse + product_eus_map: Optional dict to accumulate results + + Returns: + Dict mapping product_id to boolean indicating if product is EUS + """ + if product_eus_map is None: + product_eus_map = {} + + for branch in branches: + category = branch.get("category") + + if category == "product_name": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + + if product_id: + product_name = prod.get("name", "") + cpe = prod.get("product_identification_helper", {}).get("cpe", "") + is_eus = _is_eus_product(product_name, cpe) + product_eus_map[product_id] = is_eus + + if "branches" in branch: + _traverse_for_eus(branch["branches"], product_eus_map) + + return product_eus_map + + +def _extract_packages_from_branches(branches, product_eus_map, packages=None): + """ + Recursively traverse CSAF branches to extract package NEVRAs. + + Args: + branches: List of CSAF branch dictionaries to traverse + product_eus_map: Dict mapping product_id to EUS status + packages: Optional set to accumulate results Returns: Set of NEVRA strings """ - packages = set() - product_tree = csaf.get("product_tree", {}) + if packages is None: + packages = set() - if not product_tree: - return packages + for branch in branches: + category = branch.get("category") - product_eus_map = {} + if category == "product_version": + prod = branch.get("product", {}) + product_id = prod.get("product_id") + purl = prod.get("product_identification_helper", {}).get("purl") - def traverse_for_eus(branches): - """Recursively traverse to build EUS map""" - for branch in branches: - category = branch.get("category") + if not product_id: + continue - if category == "product_name": - prod = branch.get("product", {}) - product_id = prod.get("product_id") + if purl and not purl.startswith("pkg:rpm/"): + continue - if product_id: - product_name = prod.get("name", "") - cpe = prod.get("product_identification_helper", {}).get("cpe", "") - is_eus = _is_eus_product(product_name, cpe) - product_eus_map[product_id] = is_eus + # Product IDs for packages can have format: "AppStream-9.0.0.Z.E4S:package-nevra" + # or just "package-nevra" for packages in product_version entries + skip_eus = False + for eus_prod_id, is_eus in product_eus_map.items(): + if is_eus and (":" in product_id and product_id.startswith(eus_prod_id + ":")): + skip_eus = True + break - if "branches" in branch: - traverse_for_eus(branch["branches"]) + if skip_eus: + continue - for vendor_branch in product_tree.get("branches", []): - traverse_for_eus(vendor_branch.get("branches", [])) + # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" + packages.add(product_id.split("::")[0]) - def extract_packages_from_branches(branches): - """Recursively traverse to extract packages""" - for branch in branches: - category = branch.get("category") + if "branches" in branch: + _extract_packages_from_branches(branch["branches"], product_eus_map, packages) - if category == "product_version": - prod = branch.get("product", {}) - product_id = prod.get("product_id") - purl = prod.get("product_identification_helper", {}).get("purl") + return packages - if not product_id: - continue - if purl and not purl.startswith("pkg:rpm/"): - continue +def _extract_packages_from_product_tree(csaf: dict) -> set: + """ + Extracts fixed packages from CSAF product_tree using product_id fields. + Handles both regular and modular packages by extracting NEVRAs directly from product_id. + Filters out EUS products. - # Product IDs for packages can have format: "AppStream-9.0.0.Z.E4S:package-nevra" - # or just "package-nevra" for packages in product_version entries - skip_eus = False - for eus_prod_id, is_eus in product_eus_map.items(): - if is_eus and (":" in product_id and product_id.startswith(eus_prod_id + ":")): - skip_eus = True - break + Args: + csaf: CSAF document dict - if skip_eus: - continue + Returns: + Set of NEVRA strings + """ + product_tree = csaf.get("product_tree", {}) - # Format: "package-epoch:version-release.arch" or "package-epoch:version-release.arch::module:stream" - packages.add(product_id.split("::")[0]) + if not product_tree: + return set() - if "branches" in branch: - extract_packages_from_branches(branch["branches"]) + product_eus_map = {} + for vendor_branch in product_tree.get("branches", []): + product_eus_map = _traverse_for_eus(vendor_branch.get("branches", []), product_eus_map) + packages = set() for vendor_branch in product_tree.get("branches", []): - extract_packages_from_branches(vendor_branch.get("branches", [])) + packages = _extract_packages_from_branches(vendor_branch.get("branches", []), product_eus_map, packages) return packages From 0dc061c3840cf2ed5c8fa14eb6f6295b6f942d2e Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 12:24:59 -0700 Subject: [PATCH 16/30] Move EUS-only check earlier to avoid unnecessary work Check if advisory only affects EUS products immediately after verifying vulnerabilities exist, before extracting packages, CVEs, and other data. This saves processing time for advisories that will be skipped anyway. Also cleaned up redundant product_full_name variable. --- apollo/rhcsaf/__init__.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/apollo/rhcsaf/__init__.py b/apollo/rhcsaf/__init__.py index 98eb7c2..95175c4 100644 --- a/apollo/rhcsaf/__init__.py +++ b/apollo/rhcsaf/__init__.py @@ -86,19 +86,17 @@ def extract_rhel_affected_products_for_db(csaf: dict) -> set: continue prod_name = None cpe = None - product_full_name = None for branch in family_branch.get("branches", []): if branch.get("category") == "product_name": prod = branch.get("product", {}) prod_name = prod.get("name") - product_full_name = prod.get("name") cpe = prod.get("product_identification_helper", {}).get("cpe") break if not prod_name or not cpe: continue - if _is_eus_product(product_full_name, cpe): - logger.debug(f"Skipping EUS product: {product_full_name}") + if _is_eus_product(prod_name, cpe): + logger.debug(f"Skipping EUS product: {prod_name}") continue # Example CPE: "cpe:/a:redhat:enterprise_linux:9::appstream" @@ -248,9 +246,15 @@ def red_hat_advisory_scraper(csaf: dict): logger.warning("No vulnerabilities found in CSAF document") return None + name = csaf["document"]["tracking"]["id"] + + red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) + if not red_hat_affected_products: + logger.info(f"Skipping advisory {name}: all products are EUS-only") + return None + red_hat_issued_at = csaf["document"]["tracking"]["initial_release_date"] red_hat_updated_at = csaf["document"]["tracking"]["current_release_date"] - name = csaf["document"]["tracking"]["id"] red_hat_synopsis = csaf["document"]["title"] red_hat_description = None topic = None @@ -285,11 +289,6 @@ def red_hat_advisory_scraper(csaf: dict): if bug_id.get("system_name") == "Red Hat Bugzilla ID": red_hat_bugzilla_set.add(bug_id["text"]) - red_hat_affected_products = extract_rhel_affected_products_for_db(csaf) - if not red_hat_affected_products: - logger.info(f"Skipping advisory {name}: all products are EUS-only") - return None - return { "red_hat_issued_at": str(red_hat_issued_at), "red_hat_updated_at": str(red_hat_updated_at), From 42aea5ce47644300e8b1c46f89b0afd76fc05e79 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Mon, 3 Nov 2025 11:55:08 -0700 Subject: [PATCH 17/30] Fix config import validation issues This commit addresses two validation issues that prevented importing configuration files exported from production: 1. Export serializer converting version numbers to floats: - The _json_serializer in admin_supported_products.py was converting all Decimal types to float, including version numbers - Version numbers (match_major_version, match_minor_version) should be integers, not floats - Updated serializer to check if Decimal is a whole number and convert to int, preserving proper type semantics 2. Name validation rejecting parentheses: - Production database contains legacy products with names like "Rocky Linux 8.5 x86_64 (Legacy)" - Validation pattern only allowed: letters, numbers, spaces, dots, hyphens, and underscores - Updated NAME_PATTERN to allow parentheses for legacy product naming - Updated error message to reflect allowed characters These changes ensure that configurations exported from production can be successfully imported into development environments without manual data cleanup. --- apollo/server/routes/admin_supported_products.py | 4 ++++ apollo/server/validation.py | 6 +++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/apollo/server/routes/admin_supported_products.py b/apollo/server/routes/admin_supported_products.py index 730ef5b..f938374 100644 --- a/apollo/server/routes/admin_supported_products.py +++ b/apollo/server/routes/admin_supported_products.py @@ -1296,6 +1296,10 @@ async def _get_mirror_config_data(mirror: SupportedProductsRhMirror) -> Dict[str def _json_serializer(obj): """Custom JSON serializer for non-standard types""" if isinstance(obj, Decimal): + # Convert Decimal to int for version numbers (which should be integers) + # Check if it's a whole number to preserve integer type + if obj % 1 == 0: + return int(obj) return float(obj) raise TypeError(f"Object of type {obj.__class__.__name__} is not JSON serializable") diff --git a/apollo/server/validation.py b/apollo/server/validation.py index ec078b2..4927b18 100644 --- a/apollo/server/validation.py +++ b/apollo/server/validation.py @@ -60,8 +60,8 @@ class ValidationPatterns: # URL validation - must start with http:// or https:// URL_PATTERN = re.compile(r"^https?://.+") - # Name patterns - alphanumeric with common special characters and spaces - NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._\s-]+$") + # Name patterns - alphanumeric with common special characters, spaces, and parentheses + NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._\s()\-]+$") # Architecture validation ARCH_PATTERN = re.compile(r"^(x86_64|aarch64|i386|i686|ppc64|ppc64le|s390x|riscv64|noarch)$") @@ -107,7 +107,7 @@ def validate_name(name: str, min_length: int = 3, field_name: str = "name") -> s if not ValidationPatterns.NAME_PATTERN.match(trimmed_name): raise ValidationError( - f"{field_name.title()} can only contain letters, numbers, spaces, dots, hyphens, and underscores", + f"{field_name.title()} can only contain letters, numbers, spaces, dots, hyphens, underscores, and parentheses", ValidationErrorType.INVALID_FORMAT, field_name, ) From 2e3b51f1e47a3f8d1daaa0fa90bb8c42e9ae5ac4 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 6 Nov 2025 11:07:51 -0700 Subject: [PATCH 18/30] Update tests for integer Decimal serialization Update test expectations to match the new behavior where whole number Decimal values are serialized as integers instead of floats. This aligns with the change to _json_serializer that preserves integer types for version numbers and other integer values. --- .dockerignore | 7 ++++++- apollo/tests/test_admin_routes_supported_products.py | 9 +++++---- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.dockerignore b/.dockerignore index 5b34fe7..55a3fdb 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,4 +1,9 @@ node_modules .venv .ijwb -.idea \ No newline at end of file +.idea +temp +csaf_analysis +bazel-* +.git +container_data \ No newline at end of file diff --git a/apollo/tests/test_admin_routes_supported_products.py b/apollo/tests/test_admin_routes_supported_products.py index 32ad971..1265d9c 100644 --- a/apollo/tests/test_admin_routes_supported_products.py +++ b/apollo/tests/test_admin_routes_supported_products.py @@ -168,8 +168,8 @@ def test_json_serializer_decimal_integer(self): """Test JSON serializer with integer Decimal.""" decimal_val = Decimal("42") result = _json_serializer(decimal_val) - self.assertEqual(result, 42.0) - self.assertIsInstance(result, float) + self.assertEqual(result, 42) + self.assertIsInstance(result, int) def test_json_serializer_unsupported_type(self): """Test JSON serializer with unsupported type.""" @@ -211,10 +211,11 @@ def test_format_export_data_with_decimal(self): result = _format_export_data(data) - # Should be valid JSON with Decimals converted to floats + # Should be valid JSON with Decimals converted appropriately + # (floats for decimals, ints for whole numbers) parsed = json.loads(result) self.assertEqual(parsed[0]["price"], 19.99) - self.assertEqual(parsed[1]["price"], 99.0) + self.assertEqual(parsed[1]["price"], 99) def test_format_export_data_empty(self): """Test formatting empty export data.""" From 6c7eb9e0308a9c0acdb3838fe11ae598356bd45d Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 14:09:55 -0700 Subject: [PATCH 19/30] Remove unnecessary comments --- apollo/server/routes/admin_supported_products.py | 2 -- apollo/tests/test_admin_routes_supported_products.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/apollo/server/routes/admin_supported_products.py b/apollo/server/routes/admin_supported_products.py index f938374..c9eddb8 100644 --- a/apollo/server/routes/admin_supported_products.py +++ b/apollo/server/routes/admin_supported_products.py @@ -1296,8 +1296,6 @@ async def _get_mirror_config_data(mirror: SupportedProductsRhMirror) -> Dict[str def _json_serializer(obj): """Custom JSON serializer for non-standard types""" if isinstance(obj, Decimal): - # Convert Decimal to int for version numbers (which should be integers) - # Check if it's a whole number to preserve integer type if obj % 1 == 0: return int(obj) return float(obj) diff --git a/apollo/tests/test_admin_routes_supported_products.py b/apollo/tests/test_admin_routes_supported_products.py index 1265d9c..bd518c0 100644 --- a/apollo/tests/test_admin_routes_supported_products.py +++ b/apollo/tests/test_admin_routes_supported_products.py @@ -211,8 +211,6 @@ def test_format_export_data_with_decimal(self): result = _format_export_data(data) - # Should be valid JSON with Decimals converted appropriately - # (floats for decimals, ints for whole numbers) parsed = json.loads(result) self.assertEqual(parsed[0]["price"], 19.99) self.assertEqual(parsed[1]["price"], 99) From 98d911235373c9fd94ad8e2473464ffc57510f14 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Thu, 13 Nov 2025 14:47:10 -0700 Subject: [PATCH 20/30] Remove redundant comments from validation module --- apollo/server/validation.py | 32 +------------------------------- 1 file changed, 1 insertion(+), 31 deletions(-) diff --git a/apollo/server/validation.py b/apollo/server/validation.py index 4927b18..976869c 100644 --- a/apollo/server/validation.py +++ b/apollo/server/validation.py @@ -57,16 +57,12 @@ def __init__( class ValidationPatterns: """Regex patterns for common validations.""" - # URL validation - must start with http:// or https:// URL_PATTERN = re.compile(r"^https?://.+") - # Name patterns - alphanumeric with common special characters, spaces, and parentheses NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._\s()\-]+$") - # Architecture validation ARCH_PATTERN = re.compile(r"^(x86_64|aarch64|i386|i686|ppc64|ppc64le|s390x|riscv64|noarch)$") - # Repository name - more permissive for repo naming conventions REPO_NAME_PATTERN = re.compile(r"^[a-zA-Z0-9._-]+$") @@ -176,7 +172,6 @@ def validate_architecture(arch: str, field_name: str = "architecture") -> str: trimmed_arch = arch.strip() - # Check if it's a valid architecture enum value try: Architecture(trimmed_arch) except ValueError: @@ -277,27 +272,23 @@ def validate_config_structure(config: Any, config_index: int) -> List[str]: errors.append(f"Config {config_index}: Must be a dictionary") return errors - # Validate required top-level keys required_keys = ["product", "mirror", "repositories"] for key in required_keys: if key not in config: errors.append(f"Config {config_index}: Missing required key '{key}'") - # Validate product data structure if "product" in config: product_errors = ConfigValidator.validate_product_config( config["product"], config_index ) errors.extend(product_errors) - # Validate mirror data structure if "mirror" in config: mirror_errors = ConfigValidator.validate_mirror_config( config["mirror"], config_index ) errors.extend(mirror_errors) - # Validate repositories data structure if "repositories" in config: repo_errors = ConfigValidator.validate_repositories_config( config["repositories"], config_index @@ -324,7 +315,6 @@ def validate_product_config(product: Any, config_index: int) -> List[str]: errors.append(f"Config {config_index}: Product must be a dictionary") return errors - # Validate required product fields required_fields = ["name", "variant", "vendor"] for field in required_fields: if field not in product or not product[field]: @@ -332,7 +322,6 @@ def validate_product_config(product: Any, config_index: int) -> List[str]: f"Config {config_index}: Product missing required field '{field}'" ) - # Validate product name format if present if product.get("name"): try: FieldValidator.validate_name( @@ -361,7 +350,6 @@ def validate_mirror_config(mirror: Any, config_index: int) -> List[str]: errors.append(f"Config {config_index}: Mirror must be a dictionary") return errors - # Validate required mirror fields required_fields = ["name", "match_variant", "match_major_version", "match_arch"] for field in required_fields: if field not in mirror or mirror[field] is None: @@ -369,7 +357,6 @@ def validate_mirror_config(mirror: Any, config_index: int) -> List[str]: f"Config {config_index}: Mirror missing required field '{field}'" ) - # Validate mirror name format if present if mirror.get("name"): try: FieldValidator.validate_name( @@ -378,7 +365,6 @@ def validate_mirror_config(mirror: Any, config_index: int) -> List[str]: except ValidationError as e: errors.append(f"Config {config_index}: Mirror name '{mirror['name']}' - {e.message}") - # Validate architecture if present if mirror.get("match_arch"): try: FieldValidator.validate_architecture( @@ -387,7 +373,6 @@ def validate_mirror_config(mirror: Any, config_index: int) -> List[str]: except ValidationError as e: errors.append(f"Config {config_index}: Mirror architecture '{mirror['match_arch']}' - {e.message}") - # Validate major version is numeric if present if mirror.get("match_major_version") is not None: if ( not isinstance(mirror["match_major_version"], int) @@ -397,7 +382,6 @@ def validate_mirror_config(mirror: Any, config_index: int) -> List[str]: f"Config {config_index}: Mirror match_major_version must be a non-negative integer" ) - # Validate minor version is numeric if present if mirror.get("match_minor_version") is not None: if ( not isinstance(mirror["match_minor_version"], int) @@ -458,7 +442,6 @@ def validate_repository_config( ) return errors - # Validate required repository fields required_fields = ["repo_name", "arch", "production", "url"] for field in required_fields: if field not in repo or repo[field] is None: @@ -466,7 +449,6 @@ def validate_repository_config( f"Config {config_index}, Repo {repo_index}: Missing required field '{field}'" ) - # Validate repository name format if present if repo.get("repo_name"): try: FieldValidator.validate_repo_name( @@ -475,7 +457,6 @@ def validate_repository_config( except ValidationError as e: errors.append(f"Config {config_index}, Repo {repo_index}: Repository name '{repo['repo_name']}' - {e.message}") - # Validate architecture if present if repo.get("arch"): try: FieldValidator.validate_architecture( @@ -484,10 +465,9 @@ def validate_repository_config( except ValidationError as e: errors.append(f"Config {config_index}, Repo {repo_index}: Architecture '{repo['arch']}' - {e.message}") - # Validate URLs if present for url_field in ["url", "debug_url", "source_url"]: url_value = repo.get(url_field) - if url_value: # Only validate if not empty + if url_value: try: FieldValidator.validate_url( url_value, @@ -499,7 +479,6 @@ def validate_repository_config( f"Config {config_index}, Repo {repo_index}: {url_field.replace('_', ' ').title()} '{url_value}' - {e.message}" ) - # Validate production is boolean if present if "production" in repo and repo["production"] is not None: if not isinstance(repo["production"], bool): errors.append( @@ -528,7 +507,6 @@ def validate_mirror_form( validated_data = {} errors = [] - # Validate name try: validated_data["name"] = FieldValidator.validate_name( form_data.get("name", ""), min_length=3, field_name="mirror name" @@ -536,7 +514,6 @@ def validate_mirror_form( except ValidationError as e: errors.append(e.message) - # Validate architecture try: validated_data["match_arch"] = FieldValidator.validate_architecture( form_data.get("match_arch", ""), field_name="architecture" @@ -544,7 +521,6 @@ def validate_mirror_form( except ValidationError as e: errors.append(e.message) - # Copy other fields as-is for now (they have different validation requirements) for field in ["match_variant", "match_major_version", "match_minor_version"]: if field in form_data: validated_data[field] = form_data[field] @@ -567,7 +543,6 @@ def validate_repomd_form( validated_data = {} errors = [] - # Validate repository name try: validated_data["repo_name"] = FieldValidator.validate_repo_name( form_data.get("repo_name", ""), @@ -577,7 +552,6 @@ def validate_repomd_form( except ValidationError as e: errors.append(e.message) - # Validate main URL (required) try: validated_data["url"] = FieldValidator.validate_url( form_data.get("url", ""), field_name="repository URL", required=True @@ -585,7 +559,6 @@ def validate_repomd_form( except ValidationError as e: errors.append(e.message) - # Validate debug URL (optional) try: debug_url = FieldValidator.validate_url( form_data.get("debug_url", ""), field_name="debug URL", required=False @@ -594,7 +567,6 @@ def validate_repomd_form( except ValidationError as e: errors.append(e.message) - # Validate source URL (optional) try: source_url = FieldValidator.validate_url( form_data.get("source_url", ""), field_name="source URL", required=False @@ -603,7 +575,6 @@ def validate_repomd_form( except ValidationError as e: errors.append(e.message) - # Validate architecture try: validated_data["arch"] = FieldValidator.validate_architecture( form_data.get("arch", ""), field_name="architecture" @@ -611,7 +582,6 @@ def validate_repomd_form( except ValidationError as e: errors.append(e.message) - # Copy production flag validated_data["production"] = form_data.get("production", False) return validated_data, errors From 7182460b841c872d6c14f8f8c1b2251da7ad9990 Mon Sep 17 00:00:00 2001 From: Sam Thornton Date: Tue, 4 Nov 2025 12:59:28 -0700 Subject: [PATCH 21/30] Add active field to mirror configuration Add boolean active field to supported_products_rh_mirrors table to allow disabling mirrors without deleting them. This preserves historical data and mirror relationships while preventing the mirror from being used in new advisory processing. Changes: - Add active column with default true to supported_products_rh_mirrors - Add database index on active field for query performance - Add migration script for schema change - Update DB model with active field - Add active field to admin UI forms (create and edit) - Update mirror filtering in workflow service to respect active flag - Update configuration import/export to handle active field - Add active field validation in form processing --- apollo/db/__init__.py | 1 + .../20251104111759_add_mirror_active_field.sql | 11 +++++++++++ apollo/rpmworker/rh_matcher_activities.py | 5 +++-- apollo/schema.sql | 10 +++++++++- apollo/server/routes/admin_supported_products.py | 12 +++++++++++- apollo/server/services/workflow_service.py | 4 ++-- .../admin_supported_product_mirror.jinja | 16 ++++++++++++++++ .../admin_supported_product_mirror_new.jinja | 16 ++++++++++++++++ apollo/server/validation.py | 10 ++++++++++ 9 files changed, 79 insertions(+), 6 deletions(-) create mode 100644 apollo/migrations/20251104111759_add_mirror_active_field.sql diff --git a/apollo/db/__init__.py b/apollo/db/__init__.py index 2b98c4c..1fb0f25 100644 --- a/apollo/db/__init__.py +++ b/apollo/db/__init__.py @@ -201,6 +201,7 @@ class SupportedProductsRhMirror(Model): match_major_version = fields.IntField() match_minor_version = fields.IntField(null=True) match_arch = fields.CharField(max_length=255) + active = fields.BooleanField(default=True) rpm_repomds: fields.ReverseRelation["SupportedProductsRpmRepomd"] rpm_rh_overrides: fields.ReverseRelation["SupportedProductsRpmRhOverride"] diff --git a/apollo/migrations/20251104111759_add_mirror_active_field.sql b/apollo/migrations/20251104111759_add_mirror_active_field.sql new file mode 100644 index 0000000..6c12b1b --- /dev/null +++ b/apollo/migrations/20251104111759_add_mirror_active_field.sql @@ -0,0 +1,11 @@ +-- migrate:up +alter table supported_products_rh_mirrors +add column active boolean not null default true; + +create index supported_products_rh_mirrors_active_idx +on supported_products_rh_mirrors(active); + + +-- migrate:down +drop index if exists supported_products_rh_mirrors_active_idx; +alter table supported_products_rh_mirrors drop column active; diff --git a/apollo/rpmworker/rh_matcher_activities.py b/apollo/rpmworker/rh_matcher_activities.py index eb5f95b..2813f32 100644 --- a/apollo/rpmworker/rh_matcher_activities.py +++ b/apollo/rpmworker/rh_matcher_activities.py @@ -277,7 +277,7 @@ async def get_supported_products_with_rh_mirrors(filter_major_versions: Optional Filtering now happens at the mirror level within match_rh_repos activity. """ logger = Logger() - rh_mirrors = await SupportedProductsRhMirror.all().prefetch_related( + rh_mirrors = await SupportedProductsRhMirror.filter(active=True).prefetch_related( "rpm_repomds", ) ret = [] @@ -841,7 +841,8 @@ async def block_remaining_rh_advisories(supported_product_id: int) -> None: ).first().prefetch_related("rh_mirrors") for mirror in supported_product.rh_mirrors: mirrors = await SupportedProductsRhMirror.filter( - supported_product_id=supported_product_id + supported_product_id=supported_product_id, + active=True ) for mirror in mirrors: advisories = await get_matching_rh_advisories(mirror) diff --git a/apollo/schema.sql b/apollo/schema.sql index 686021e..46c385a 100644 --- a/apollo/schema.sql +++ b/apollo/schema.sql @@ -623,7 +623,8 @@ CREATE TABLE public.supported_products_rh_mirrors ( match_variant text NOT NULL, match_major_version numeric NOT NULL, match_minor_version numeric, - match_arch text NOT NULL + match_arch text NOT NULL, + active boolean DEFAULT true NOT NULL ); @@ -1507,6 +1508,13 @@ CREATE INDEX supported_products_rh_mirrors_match_variant_idx ON public.supported CREATE INDEX supported_products_rh_mirrors_supported_product_idx ON public.supported_products_rh_mirrors USING btree (supported_product_id); +-- +-- Name: supported_products_rh_mirrors_active_idx; Type: INDEX; Schema: public; Owner: - +-- + +CREATE INDEX supported_products_rh_mirrors_active_idx ON public.supported_products_rh_mirrors USING btree (active); + + -- -- Name: supported_products_rpm_repomds_arch_idx; Type: INDEX; Schema: public; Owner: - -- diff --git a/apollo/server/routes/admin_supported_products.py b/apollo/server/routes/admin_supported_products.py index c9eddb8..3312707 100644 --- a/apollo/server/routes/admin_supported_products.py +++ b/apollo/server/routes/admin_supported_products.py @@ -276,6 +276,9 @@ async def _import_configuration(import_data: List[Dict[str, Any]], replace_exist # Delete existing repositories await SupportedProductsRpmRepomd.filter(supported_products_rh_mirror=existing_mirror).delete() mirror = existing_mirror + # Update active field if provided + mirror.active = mirror_data.get("active", True) + await mirror.save() updated_count += 1 else: # Create new mirror @@ -285,7 +288,8 @@ async def _import_configuration(import_data: List[Dict[str, Any]], replace_exist match_variant=mirror_data["match_variant"], match_major_version=mirror_data["match_major_version"], match_minor_version=mirror_data.get("match_minor_version"), - match_arch=mirror_data["match_arch"] + match_arch=mirror_data["match_arch"], + active=mirror_data.get("active", True) ) await mirror.save() created_count += 1 @@ -475,6 +479,7 @@ async def admin_supported_product_mirror_new_post( match_major_version: int = Form(), match_minor_version: Optional[int] = Form(default=None), match_arch: str = Form(), + active: str = Form(default="true"), ): product = await get_entity_or_error_response( request, @@ -492,6 +497,7 @@ async def admin_supported_product_mirror_new_post( "match_major_version": match_major_version, "match_minor_version": match_minor_version, "match_arch": match_arch, + "active": active, } try: @@ -521,6 +527,7 @@ async def admin_supported_product_mirror_new_post( match_major_version=match_major_version, match_minor_version=match_minor_version, match_arch=validated_arch, + active=(active == "true"), ) await mirror.save() @@ -563,6 +570,7 @@ async def admin_supported_product_mirror_post( match_major_version: int = Form(), match_minor_version: Optional[int] = Form(default=None), match_arch: str = Form(), + active: str = Form(default="true"), ): mirror = await get_entity_or_error_response( request, @@ -606,6 +614,7 @@ async def admin_supported_product_mirror_post( mirror.match_major_version = match_major_version mirror.match_minor_version = match_minor_version mirror.match_arch = validated_arch + mirror.active = (active == "true") await mirror.save() # Re-fetch the mirror with all required relations after saving @@ -1274,6 +1283,7 @@ async def _get_mirror_config_data(mirror: SupportedProductsRhMirror) -> Dict[str "match_major_version": mirror.match_major_version, "match_minor_version": mirror.match_minor_version, "match_arch": mirror.match_arch, + "active": mirror.active, "created_at": mirror.created_at.isoformat(), "updated_at": mirror.updated_at.isoformat() if mirror.updated_at else None, }, diff --git a/apollo/server/services/workflow_service.py b/apollo/server/services/workflow_service.py index 18511de..56d5103 100644 --- a/apollo/server/services/workflow_service.py +++ b/apollo/server/services/workflow_service.py @@ -206,9 +206,9 @@ async def _validate_major_versions(self, major_versions: List[int]) -> None: # Import here to avoid circular imports from apollo.db import SupportedProductsRhMirror - + # Get available major versions from RH mirrors - rh_mirrors = await SupportedProductsRhMirror.all() + rh_mirrors = await SupportedProductsRhMirror.filter(active=True) available_versions = {int(mirror.match_major_version) for mirror in rh_mirrors} # Check if all requested major versions are available diff --git a/apollo/server/templates/admin_supported_product_mirror.jinja b/apollo/server/templates/admin_supported_product_mirror.jinja index 58298ed..a16e35c 100644 --- a/apollo/server/templates/admin_supported_product_mirror.jinja +++ b/apollo/server/templates/admin_supported_product_mirror.jinja @@ -119,6 +119,22 @@ +
+
+ Status +
+ + +
+
+ Only active mirrors are used for advisory processing. Deactivate to exclude from workflows without deleting. +
+
+
+ diff --git a/apollo/server/templates/admin_supported_product_mirror_new.jinja b/apollo/server/templates/admin_supported_product_mirror_new.jinja index 003f826..1b9e039 100644 --- a/apollo/server/templates/admin_supported_product_mirror_new.jinja +++ b/apollo/server/templates/admin_supported_product_mirror_new.jinja @@ -108,6 +108,22 @@ +
+
+ Status +
+ + +
+
+ Only active mirrors are used for advisory processing. Deactivate to exclude from workflows without deleting. +
+
+
+