From 8540a208c830623e87bb8c4a1982c9b1ed0374fe Mon Sep 17 00:00:00 2001 From: adekoder Date: Wed, 29 Apr 2026 13:13:53 +0100 Subject: [PATCH] feat: Add support for multiple matching algorithm versions --- infra/production.nix | 1 + infra/staging.nix | 1 + src/project/settings.py | 19 ++ src/shared/listeners/algorithms/__init__.py | 57 +++++ src/shared/listeners/algorithms/v1.py | 196 +++++++++++++++++ src/shared/listeners/automatic_linkage.py | 201 ++---------------- src/shared/listeners/cache_suggestions.py | 5 +- .../commands/regenerate_cached_suggestions.py | 22 +- ...vationclusterproposal_algorithm_version.py | 18 ++ src/shared/models/linkage.py | 18 ++ src/shared/tests/conftest.py | 2 + src/shared/tests/test_algorithm_registry.py | 151 +++++++++++++ src/shared/tests/test_linkage.py | 26 ++- src/shared/tests/test_proposal_queryset.py | 117 ++++++++++ src/shared/tests/test_suggestion_caching.py | 32 ++- src/webview/suggestions/views/lists.py | 3 +- .../tests/test_algorithm_version_filter.py | 143 +++++++++++++ 17 files changed, 805 insertions(+), 207 deletions(-) create mode 100644 src/shared/listeners/algorithms/__init__.py create mode 100644 src/shared/listeners/algorithms/v1.py create mode 100644 src/shared/migrations/0083_cvederivationclusterproposal_algorithm_version.py create mode 100644 src/shared/tests/test_algorithm_registry.py create mode 100644 src/shared/tests/test_proposal_queryset.py create mode 100644 src/webview/tests/test_algorithm_version_filter.py diff --git a/infra/production.nix b/infra/production.nix index ade2d715c..be6b84d17 100644 --- a/infra/production.nix +++ b/infra/production.nix @@ -105,6 +105,7 @@ in EMAIL_USE_SSL = true; EMAIL_HOST_USER = "noreply-securitytracker@nixos.org"; DEFAULT_FROM_EMAIL = "noreply-securitytracker@nixos.org"; + ACTIVE_MATCHING_ALGORITHM_VERSION = 1; }; secrets = { diff --git a/infra/staging.nix b/infra/staging.nix index 63dd4dd74..36528ee0b 100644 --- a/infra/staging.nix +++ b/infra/staging.nix @@ -114,6 +114,7 @@ in GH_SECURITY_TEAM = "sectracker-testing-security"; GH_COMMITTERS_TEAM = "sectracker-testing-committers"; EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend"; + ACTIVE_MATCHING_ALGORITHM_VERSION = 1; }; secrets = { diff --git a/src/project/settings.py b/src/project/settings.py index c1ff2966b..31b864c6b 100644 --- a/src/project/settings.py +++ b/src/project/settings.py @@ -169,6 +169,25 @@ class DjangoSettings(BaseModel): """, default=1_000, ) + ACTIVE_MATCHING_ALGORITHM_VERSION: int = Field( + description=""" + Controls which registered matching algorithm version is used when + linking CVEs to derivations. Must match a VERSION defined in + shared/listeners/algorithms/. Bump this setting to activate a new + algorithm version without changing code. + """, + default=1, + ) + CANDIDATE_MATCHING_ALGORITHM_VERSION: int | None = Field( + description=""" + Optional. When set, identifies a new algorithm version being evaluated + in parallel. The candidate version does not run automatically — it is + only invoked by the test-run management command, which generates proposals + tagged with this version number for later metric comparison. + Set to None when no candidate is under evaluation. + """, + default=None, + ) SHOW_DEMO_DISCLAIMER: bool = Field( description=""" When set to True, the application will display a disclaimer about diff --git a/src/shared/listeners/algorithms/__init__.py b/src/shared/listeners/algorithms/__init__.py new file mode 100644 index 000000000..d72ff9b3a --- /dev/null +++ b/src/shared/listeners/algorithms/__init__.py @@ -0,0 +1,57 @@ +""" +Algorithm registry for CVE-to-derivation matching. + +Each algorithm version lives in its own module (v1.py, v2.py, ...) and must expose: + - VERSION + - build_new_links + +Modules register themselves by calling `register()`. The listener in +""" + +from typing import Protocol + +from django.conf import settings + +from shared.models.cve import Container + + +class MatchingAlgorithm(Protocol): + VERSION: int + + def build_new_links(self, container: Container) -> bool: ... + + +_registry: dict[int, MatchingAlgorithm] = {} + + +def register(module: MatchingAlgorithm) -> None: + """Register an algorithm module under its VERSION.""" + _registry[module.VERSION] = module + + +def _resolve(version: int) -> MatchingAlgorithm: + """Return a resigister alogirithm that match the version.""" + if not _registry: + raise RuntimeError("No matching algorithm registered.") + try: + return _registry[version] + except KeyError: + raise KeyError(f"No matching algorithm registered for version {version}.") + + +def current_algorithm() -> MatchingAlgorithm: + """Return the active algorithm (ACTIVE_MATCHING_ALGORITHM_VERSION).""" + return _resolve(settings.ACTIVE_MATCHING_ALGORITHM_VERSION) + + +# TODO (@adekoder) will be used when we create the process to run the inactive new algorithm +# verison +def candidate_algorithm() -> MatchingAlgorithm | None: + """ + Return the candidate algorithm (CANDIDATE_MATCHING_ALGORITHM_VERSION), + or None if no candidate is configured. + """ + version = settings.CANDIDATE_MATCHING_ALGORITHM_VERSION + if version is None: + return None + return _resolve(version) diff --git a/src/shared/listeners/algorithms/v1.py b/src/shared/listeners/algorithms/v1.py new file mode 100644 index 000000000..a183a3729 --- /dev/null +++ b/src/shared/listeners/algorithms/v1.py @@ -0,0 +1,196 @@ +""" +Matching algorithm version 1. + +Candidates are found by matching package name / product name (case-insensitive +substring) against derivation names in the latest completed evaluation of each +major channel. No version constraint checking is applied at this stage. +""" + +import logging +import sys + +from django.conf import settings +from django.db.models import ( + Case, + Exists, + F, + IntegerField, + OuterRef, + Q, + QuerySet, + Value, + When, + Window, +) +from django.db.models.functions import RowNumber + +from shared.models.cve import AffectedProduct, Container, Cpe +from shared.models.linkage import CVEDerivationClusterProposal, ProvenanceFlags +from shared.models.nix_evaluation import MAJOR_CHANNELS, NixDerivation, NixEvaluation + +from . import register + +VERSION: int = 1 + +logger = logging.getLogger(__name__) + + +def produce_linkage_candidates( + container: Container, + filtered_affected: QuerySet[AffectedProduct], +) -> dict[NixDerivation, ProvenanceFlags]: + # FIXME(@fricklerhandwerk): This will fall apart when we obtain the channel structure dynamically [ref:channel-structure] + active_channels_q = Q() + for ch in MAJOR_CHANNELS: + active_channels_q |= Q(channel__channel_branch__contains=ch) + + latest_complete_channels = ( + NixEvaluation.objects.filter( + active_channels_q, + state=NixEvaluation.EvaluationState.COMPLETED, + ) + .annotate( + row_num=Window( + expression=RowNumber(), + partition_by=[F("channel")], + order_by=F("updated_at").desc(), + ), + ) + .filter(row_num=1) + ) + + package_names = ( + filtered_affected.exclude(package_name__isnull=True) + .values_list("package_name", flat=True) + .distinct() + ) + products = ( + filtered_affected.exclude(product__isnull=True) + .values_list("product", flat=True) + .distinct() + ) + + package_q = Q() + for name in package_names: + package_q |= Q(name__icontains=name) + + product_q = Q() + for product in products: + product_q |= Q(name__icontains=product) + + if not package_q | product_q: + return {} + + annotations = {} + if package_q: + annotations["package_match"] = Case( + When(package_q, then=Value(ProvenanceFlags.PACKAGE_NAME_MATCH)), + default=Value(0), + output_field=IntegerField(), + ) + if product_q: + annotations["product_match"] = Case( + When(product_q, then=Value(ProvenanceFlags.PRODUCT_MATCH)), + default=Value(0), + output_field=IntegerField(), + ) + + candidates: dict[NixDerivation, ProvenanceFlags] = {} + matches = NixDerivation.objects.filter( + package_q | product_q, + parent_evaluation__in=list(latest_complete_channels), + ).annotate(**annotations) + for drv in matches.iterator(): + flags = getattr(drv, "package_match", 0) | getattr(drv, "product_match", 0) + candidates[drv] = ProvenanceFlags(flags) + + return candidates + + +def build_new_links(container: Container) -> bool: + if container.cve.triaged: + logger.info( + "Container received for '%s', but already triaged, skipping linkage.", + container.cve, + ) + return False + + if CVEDerivationClusterProposal.objects.filter( + cve=container.cve, algorithm_version=VERSION + ).exists(): + logger.info("Suggestion already exists for '%s', skipping", container.cve) + return False + + if container.tags.filter(value="exclusively-hosted-service").exists(): + logger.info( + "Container for '%s' is exclusively-hosted-service, rejecting without match.", + container.cve, + ) + CVEDerivationClusterProposal.objects.create( + cve=container.cve, + status=CVEDerivationClusterProposal.Status.REJECTED, + rejection_reason=CVEDerivationClusterProposal.RejectionReason.EXCLUSIVELY_HOSTED_SERVICE, + algorithm_version=VERSION, + ) + return True + + has_any_cpe = Exists(Cpe.objects.filter(affectedproduct=OuterRef("pk"))) + has_non_hardware_cpe = Exists( + Cpe.objects.filter(affectedproduct=OuterRef("pk")).exclude( + name__istartswith="cpe:2.3:h:" + ) + ) + filtered_affected = container.affected.exclude(has_any_cpe & ~has_non_hardware_cpe) + + if container.affected.exists() and not filtered_affected.exists(): + logger.info( + "Container for '%s' has only hardware CPEs, rejecting without match.", + container.cve, + ) + CVEDerivationClusterProposal.objects.create( + cve=container.cve, + status=CVEDerivationClusterProposal.Status.REJECTED, + rejection_reason=CVEDerivationClusterProposal.RejectionReason.HARDWARE_ONLY_CPE, + algorithm_version=VERSION, + ) + return True + + drvs = produce_linkage_candidates(container, filtered_affected) + if not drvs: + logger.info("No derivations matching '%s', ignoring", container.cve) + return False + + if len(drvs) > settings.MAX_MATCHES: + logger.warning( + "More than '%d' derivations matching '%s', ignoring", + settings.MAX_MATCHES, + container.cve, + ) + return False + + proposal = CVEDerivationClusterProposal.objects.create( + cve=container.cve, + algorithm_version=VERSION, + ) + + drvs_throughs = [ + CVEDerivationClusterProposal.derivations.through( + proposal_id=proposal.pk, derivation_id=drv.pk, provenance_flags=flags + ) + for drv, flags in drvs.items() + ] + + CVEDerivationClusterProposal.derivations.through.objects.bulk_create(drvs_throughs) + + if drvs_throughs: + logger.info( + "Matching suggestion for '%s': %d derivations found.", + container.cve, + len(drvs_throughs), + ) + + return True + + +# Self-register when imported +register(sys.modules[__name__]) # type: ignore[arg-type] diff --git a/src/shared/listeners/automatic_linkage.py b/src/shared/listeners/automatic_linkage.py index 1bb9ba8a3..a6955fb41 100644 --- a/src/shared/listeners/automatic_linkage.py +++ b/src/shared/listeners/automatic_linkage.py @@ -1,195 +1,20 @@ -import logging +""" +CVE-to-derivation matching listener. + +Delegates to the highest registered algorithm version in the algorithms registry. +To add a new algorithm version, create shared/listeners/algorithms/vN.py and +import it here so it self-registers. +""" import pgpubsub -from django.conf import settings -from django.db import models -from django.db.models import ( - Case, - Exists, - F, - IntegerField, - OuterRef, - Q, - Value, - When, - Window, -) -from django.db.models.functions import RowNumber +# Import all algorithm modules so they self-register. +import shared.listeners.algorithms.v1 # noqa: F401 from shared.channels import ContainerChannel -from shared.models.cve import Container, Cpe -from shared.models.linkage import CVEDerivationClusterProposal, ProvenanceFlags -from shared.models.nix_evaluation import MAJOR_CHANNELS, NixDerivation, NixEvaluation - -logger = logging.getLogger(__name__) - - -def produce_linkage_candidates( - container: Container, - filtered_affected: models.QuerySet, -) -> dict[NixDerivation, ProvenanceFlags]: - # FIXME(@fricklerhandwerk): This will fall apart when we obtain the channel structure dynamically [ref:channel-structure] - active_channels_q = Q() - for ch in MAJOR_CHANNELS: - active_channels_q |= Q(channel__channel_branch__contains=ch) - - latest_complete_channels = ( - NixEvaluation.objects.filter( - active_channels_q, - state=NixEvaluation.EvaluationState.COMPLETED, - ) - .annotate( - row_num=Window( - expression=RowNumber(), - partition_by=[F("channel")], - order_by=F("updated_at").desc(), - ), - ) - .filter(row_num=1) - ) - - package_names = ( - filtered_affected.exclude(package_name__isnull=True) - .values_list("package_name", flat=True) - .distinct() - ) - products = ( - filtered_affected.exclude(product__isnull=True) - .values_list("product", flat=True) - .distinct() - ) - - package_q = Q() - for name in package_names: - package_q |= Q(name__icontains=name) - - product_q = Q() - for product in products: - product_q |= Q(name__icontains=product) - - # This does not seem to happen in practice though - if not package_q | product_q: - return {} - - annotations = {} - if package_q: - annotations["package_match"] = Case( - When(package_q, then=Value(ProvenanceFlags.PACKAGE_NAME_MATCH)), - default=Value(0), - output_field=IntegerField(), - ) - if product_q: - annotations["product_match"] = Case( - When(product_q, then=Value(ProvenanceFlags.PRODUCT_MATCH)), - default=Value(0), - output_field=IntegerField(), - ) - - # Methodology: - # We start with a large list and we remove things as we sort out that list. - # Our initialization must be as large as possible. - # TODO: record what is used to expand the candidate list. - candidates: dict[NixDerivation, ProvenanceFlags] = {} - # TODO: improve accuracy by using bigrams similarity with a `| Q(...)` query. - matches = NixDerivation.objects.filter( - package_q | product_q, - parent_evaluation__in=list(latest_complete_channels), - ).annotate(**annotations) - for drv in matches.iterator(): - flags = getattr(drv, "package_match", 0) | getattr(drv, "product_match", 0) - candidates[drv] = ProvenanceFlags(flags) - - # TODO: restrain further the list by checking all version constraints. - # TODO: restrain further the list by checking hardware constraints or kernel constraints. - # Remove anything that says that it's *not* the list of potential kernel that are in use: - # macOS, Linux, Windows, *BSD. - # TODO: teach it about newcomers kernels such as Redox. - - return candidates - - -def build_new_links(container: Container) -> bool: - if container.cve.triaged: - logger.info( - "Container received for '%s', but already triaged, skipping linkage.", - container.cve, - ) - return False - - if CVEDerivationClusterProposal.objects.filter(cve=container.cve).exists(): - logger.info("Suggestion already exists for '%s', skipping", container.cve) - return False - - if container.tags.filter(value="exclusively-hosted-service").exists(): - logger.info( - "Container for '%s' is exclusively-hosted-service, rejecting without match.", - container.cve, - ) - CVEDerivationClusterProposal.objects.create( - cve=container.cve, - status=CVEDerivationClusterProposal.Status.REJECTED, - rejection_reason=CVEDerivationClusterProposal.RejectionReason.EXCLUSIVELY_HOSTED_SERVICE, - ) - return True - - # FIXME(@fricklerhandwerk): This only works because we're validating syntax on ingestion. - # Use a proper parsing library such as https://github.com/nilp0inter/cpe to work on structured data. - # That particular one looks like the best candidate, but appears unmaintained (or could just be very stable); needs thorough review before adopting it. - has_any_cpe = Exists(Cpe.objects.filter(affectedproduct=OuterRef("pk"))) - has_non_hardware_cpe = Exists( - Cpe.objects.filter(affectedproduct=OuterRef("pk")).exclude( - name__istartswith="cpe:2.3:h:" - ) - ) - filtered_affected = container.affected.exclude(has_any_cpe & ~has_non_hardware_cpe) - - if container.affected.exists() and not filtered_affected.exists(): - logger.info( - "Container for '%s' has only hardware CPEs, rejecting without match.", - container.cve, - ) - CVEDerivationClusterProposal.objects.create( - cve=container.cve, - status=CVEDerivationClusterProposal.Status.REJECTED, - rejection_reason=CVEDerivationClusterProposal.RejectionReason.HARDWARE_ONLY_CPE, - ) - return True - - drvs = produce_linkage_candidates(container, filtered_affected) - if not drvs: - logger.info("No derivations matching '%s', ignoring", container.cve) - return False - - if len(drvs) > settings.MAX_MATCHES: - logger.warning( - "More than '%d' derivations matching '%s', ignoring", - settings.MAX_MATCHES, - container.cve, - ) - return False - - proposal = CVEDerivationClusterProposal.objects.create(cve=container.cve) - - drvs_throughs = [ - CVEDerivationClusterProposal.derivations.through( - proposal_id=proposal.pk, derivation_id=drv.pk, provenance_flags=flags - ) - for drv, flags in drvs.items() - ] - - # We create all the set in one shot. - CVEDerivationClusterProposal.derivations.through.objects.bulk_create(drvs_throughs) - - if drvs_throughs: - logger.info( - "Matching suggestion for '%s': %d derivations found.", - container.cve, - len(drvs_throughs), - ) - - return True +from shared.listeners.algorithms import current_algorithm +from shared.models.cve import Container @pgpubsub.post_insert_listener(ContainerChannel) -def build_new_links_following_new_containers(old: Container, new: Container) -> None: - build_new_links(new) +def match_derivations_on_container_insert(old: Container, new: Container) -> bool: + return current_algorithm().build_new_links(new) diff --git a/src/shared/listeners/cache_suggestions.py b/src/shared/listeners/cache_suggestions.py index 5497e4b11..d7b3229aa 100644 --- a/src/shared/listeners/cache_suggestions.py +++ b/src/shared/listeners/cache_suggestions.py @@ -274,7 +274,10 @@ def cache_new_suggestions(suggestion: CVEDerivationClusterProposal) -> None: def cache_new_suggestions_following_new_container( old: CVEDerivationClusterProposal, new: CVEDerivationClusterProposal ) -> None: - cache_new_suggestions(new) + + # let generate cache suggestion for only active algorithm proposal + if new.is_active_algorithm_match: + cache_new_suggestions(new) def is_version_affected(version_statuses: list[str]) -> Version.Status: diff --git a/src/shared/management/commands/regenerate_cached_suggestions.py b/src/shared/management/commands/regenerate_cached_suggestions.py index 9115c203d..c82adea6f 100644 --- a/src/shared/management/commands/regenerate_cached_suggestions.py +++ b/src/shared/management/commands/regenerate_cached_suggestions.py @@ -28,18 +28,26 @@ def handle(self, *args: Any, **options: Any) -> None: label = "all" deleted_count, _ = CachedSuggestions.objects.all().delete() self.stdout.write(f"Purged {deleted_count} cached suggestion(s).") - proposals = CVEDerivationClusterProposal.objects.order_by( - "-updated_at" - ).iterator() + proposals = ( + CVEDerivationClusterProposal.objects.active() + .order_by("-updated_at") + .iterator() + ) else: label = "stale/missing" stale = CachedSuggestions.objects.stale() num_stale = stale.count() - proposals = CVEDerivationClusterProposal.objects.filter( - Q(pk__in=stale.values("proposal_id")) - | ~Exists(CachedSuggestions.objects.filter(proposal_id=OuterRef("pk"))) - ).order_by("-updated_at") + proposals = ( + CVEDerivationClusterProposal.objects.active() + .filter( + Q(pk__in=stale.values("proposal_id")) + | ~Exists( + CachedSuggestions.objects.filter(proposal_id=OuterRef("pk")) + ) + ) + .order_by("-updated_at") + ) num_total = proposals.count() num_missing = num_total - num_stale self.stdout.write( diff --git a/src/shared/migrations/0083_cvederivationclusterproposal_algorithm_version.py b/src/shared/migrations/0083_cvederivationclusterproposal_algorithm_version.py new file mode 100644 index 000000000..81724e73f --- /dev/null +++ b/src/shared/migrations/0083_cvederivationclusterproposal_algorithm_version.py @@ -0,0 +1,18 @@ +# Generated by Django 5.2.12 on 2026-04-29 12:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('shared', '0082_alter_derivationclusterproposallink_derivation_and_more'), + ] + + operations = [ + migrations.AddField( + model_name='cvederivationclusterproposal', + name='algorithm_version', + field=models.IntegerField(default=1, help_text='Version of the matching algorithm that generated this proposal'), + ), + ] diff --git a/src/shared/models/linkage.py b/src/shared/models/linkage.py index 216e0bc9e..82e80417c 100644 --- a/src/shared/models/linkage.py +++ b/src/shared/models/linkage.py @@ -2,6 +2,7 @@ from typing import Any import pghistory +from django.conf import settings from django.db import models from django.db.models.signals import post_delete, post_save from django.dispatch import receiver @@ -12,6 +13,12 @@ from shared.models.nix_evaluation import NixDerivation, NixMaintainer, TimeStampMixin +class CVEDerivationClusterProposalQuerySet(models.QuerySet): + def active(self) -> "CVEDerivationClusterProposalQuerySet": + """Return only proposals generated by the currently active algorithm version.""" + return self.filter(algorithm_version=settings.ACTIVE_MATCHING_ALGORITHM_VERSION) + + @pghistory.track( fields=["status", "rejection_reason"], model_name="CVEDerivationClusterProposalStatusEvent", @@ -21,6 +28,8 @@ class CVEDerivationClusterProposal(TimeStampMixin): A proposal to link a CVE to a set of derivations. """ + objects = CVEDerivationClusterProposalQuerySet.as_manager() + class Status(models.TextChoices): PENDING = "pending", _("pending") REJECTED = "rejected", _("rejected") @@ -75,6 +84,11 @@ class RejectionReason(models.TextChoices): help_text=_("Reason for rejection (automatic or manual)"), ) + algorithm_version = models.IntegerField( + default=1, + help_text=_("Version of the matching algorithm that generated this proposal"), + ) + @property def is_frozen(self) -> bool: return self.status not in [ @@ -91,6 +105,10 @@ def references(self) -> list[Reference]: def is_cache_stale(self) -> bool: return not getattr(self, "cached", None) or self.cached.is_stale + @property + def is_active_algorithm_match(self) -> bool: + return self.algorithm_version == settings.ACTIVE_MATCHING_ALGORITHM_VERSION + def ensure_fresh_cache(self) -> None: """Regenerate stale or missing cache for this suggestion.""" if self.is_cache_stale: diff --git a/src/shared/tests/conftest.py b/src/shared/tests/conftest.py index 1c477aea4..d9c948438 100644 --- a/src/shared/tests/conftest.py +++ b/src/shared/tests/conftest.py @@ -258,11 +258,13 @@ def wrapped( status: CVEDerivationClusterProposal.Status = CVEDerivationClusterProposal.Status.PENDING, rejection_reason: CVEDerivationClusterProposal.RejectionReason | None = None, age: timedelta = timedelta(0), + algorithm_version: int = settings.ACTIVE_MATCHING_ALGORITHM_VERSION, ) -> CVEDerivationClusterProposal: suggestion = CVEDerivationClusterProposal.objects.create( status=status, rejection_reason=rejection_reason, cve=container.cve, + algorithm_version=algorithm_version, ) if age > timedelta(0): diff --git a/src/shared/tests/test_algorithm_registry.py b/src/shared/tests/test_algorithm_registry.py new file mode 100644 index 000000000..ceefe9822 --- /dev/null +++ b/src/shared/tests/test_algorithm_registry.py @@ -0,0 +1,151 @@ +from collections.abc import Callable, Generator + +import pytest +from django.test import override_settings + +from shared.listeners.algorithms import ( + _registry, + _resolve, + candidate_algorithm, + current_algorithm, + register, +) +from shared.models.cve import Container + + +class _StubV1: + VERSION: int = 1 + + def build_new_links(self, container: Container) -> bool: + return True + + +class _StubV2: + VERSION: int = 2 + + def build_new_links(self, container: Container) -> bool: + return True + + +@pytest.fixture(autouse=True) +def clean_registry() -> Generator: + """ + Isolate each test by clearing the registry before and after. + """ + _registry.clear() + yield + _registry.clear() + + +@pytest.fixture +def registry_with_v1() -> type[_StubV1]: + register(_StubV1()) + return _StubV1 + + +@pytest.fixture +def registry_with_v1_and_v2() -> None: + register(_StubV1()) + register(_StubV2()) + + +def test_register_module_versions() -> None: + register(_StubV1()) + register(_StubV2()) + assert 1 in _registry + assert 2 in _registry + + +def test_register_overwrites_same_version() -> None: + """Re-registering the same version replaces the previous entry.""" + first = _StubV1() + second = _StubV1() + register(first) + register(second) + assert _registry[1] is second + + +def test_resolve_returns_correct_module( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + result = _resolve(1) + assert result.VERSION == 1 + + +def test_resolve_raises_key_error_for_unknown_version( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + with pytest.raises(KeyError, match="99"): + _resolve(99) + + +def test_resolve_raises_runtime_error_when_registry_empty() -> None: + with pytest.raises(RuntimeError, match="No matching algorithm registered"): + _resolve(1) + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_current_algorithm_returns_active_version( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + algo = current_algorithm() + assert algo.VERSION == 1 + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=2) +def test_current_algorithm_returns_correct_version_when_multiple_registered( + registry_with_v1_and_v2: None, +) -> None: + algo = current_algorithm() + assert algo.VERSION == 2 + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=99) +def test_current_algorithm_raises_when_version_not_registered( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + with pytest.raises(KeyError, match="99"): + current_algorithm() + + +@override_settings(CANDIDATE_MATCHING_ALGORITHM_VERSION=None) +def test_candidate_algorithm_returns_none_when_not_configured( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + assert candidate_algorithm() is None + + +@override_settings( + ACTIVE_MATCHING_ALGORITHM_VERSION=1, + CANDIDATE_MATCHING_ALGORITHM_VERSION=2, +) +def test_candidate_algorithm_returns_candidate_version( + registry_with_v1_and_v2: None, +) -> None: + algo = candidate_algorithm() + assert algo is not None + assert algo.VERSION == 2 + + +@override_settings( + ACTIVE_MATCHING_ALGORITHM_VERSION=1, + CANDIDATE_MATCHING_ALGORITHM_VERSION=2, +) +def test_candidate_algorithm_raises_when_version_not_registered( + registry_with_v1: Callable[..., type[_StubV1]], +) -> None: + """Candidate version is configured but the module was never registered.""" + with pytest.raises(KeyError, match="2"): + candidate_algorithm() + + +@override_settings( + ACTIVE_MATCHING_ALGORITHM_VERSION=1, + CANDIDATE_MATCHING_ALGORITHM_VERSION=2, +) +def test_active_and_candidate_are_independent(registry_with_v1_and_v2: None) -> None: + """Changing which version is candidate does not affect the active one.""" + assert current_algorithm().VERSION == 1 + candidate = candidate_algorithm() + assert candidate is not None + assert candidate.VERSION == 2 diff --git a/src/shared/tests/test_linkage.py b/src/shared/tests/test_linkage.py index e7e0b2ba5..532aa1d9c 100644 --- a/src/shared/tests/test_linkage.py +++ b/src/shared/tests/test_linkage.py @@ -2,8 +2,9 @@ from datetime import timedelta import pytest +from django.test import override_settings -from shared.listeners.automatic_linkage import build_new_links +from shared.listeners.automatic_linkage import match_derivations_on_container_insert from shared.listeners.cache_suggestions import cache_new_suggestions from shared.models.cve import Container, Tag from shared.models.linkage import ( @@ -19,6 +20,7 @@ ) +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_link_only_latest_eval( make_container: Callable[..., Container], make_channel: Callable[..., NixChannel], @@ -67,7 +69,7 @@ def test_link_only_latest_eval( ) container = make_container(package_name="foo", affected_version="<3.2") - match = build_new_links(container) + match = match_derivations_on_container_insert(old=container, new=container) assert match suggestion = CVEDerivationClusterProposal.objects.first() assert suggestion @@ -80,6 +82,7 @@ def test_link_only_latest_eval( cache_new_suggestions(suggestion) +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_link_only_major_channels( make_container: Callable[..., Container], make_channel: Callable[..., NixChannel], @@ -97,9 +100,10 @@ def test_link_only_major_channels( make_drv(pname="foo", evaluation=old_eval) container = make_container(package_name="foo") - assert not build_new_links(container) + assert not match_derivations_on_container_insert(old=container, new=container) +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) @pytest.mark.parametrize( "package_name,product,drv_pname,expected_flags", [ @@ -133,7 +137,7 @@ def test_link_product_or_package_name( container = make_container(package_name=package_name, product=product) drv = make_drv(pname=drv_pname) - match = build_new_links(container) + match = match_derivations_on_container_insert(old=container, new=container) if expected_flags: assert match @@ -145,6 +149,7 @@ def test_link_product_or_package_name( assert not match +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_exclusively_hosted_service_creates_rejected_proposal( make_container: Callable[..., Container], ) -> None: @@ -153,7 +158,7 @@ def test_exclusively_hosted_service_creates_rejected_proposal( tag, _ = Tag.objects.get_or_create(value="exclusively-hosted-service") container.tags.add(tag) - result = build_new_links(container) + result = match_derivations_on_container_insert(old=container, new=container) assert result is True proposal = CVEDerivationClusterProposal.objects.get(cve=container.cve) @@ -165,6 +170,7 @@ def test_exclusively_hosted_service_creates_rejected_proposal( assert proposal.derivations.count() == 0 +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_hardware_cpe_produces_no_match( make_container: Callable[..., Container], make_drv: Callable[..., NixDerivation], @@ -176,7 +182,7 @@ def test_hardware_cpe_produces_no_match( ) make_drv(pname="some_router") - assert build_new_links(container) is True + assert match_derivations_on_container_insert(old=container, new=container) is True proposal = CVEDerivationClusterProposal.objects.get(cve=container.cve) assert proposal.status == CVEDerivationClusterProposal.Status.REJECTED assert ( @@ -186,6 +192,7 @@ def test_hardware_cpe_produces_no_match( assert proposal.derivations.count() == 0 +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_application_cpe_produces_match( make_container: Callable[..., Container], make_drv: Callable[..., NixDerivation], @@ -197,9 +204,10 @@ def test_application_cpe_produces_match( ) make_drv(pname="myapp") - assert build_new_links(container) + assert match_derivations_on_container_insert(old=container, new=container) +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) def test_mixed_cpe_parts_skips_hardware_only_affected_products( make_container: Callable[..., Container], make_drv: Callable[..., NixDerivation], @@ -221,11 +229,11 @@ def test_mixed_cpe_parts_skips_hardware_only_affected_products( make_drv(pname="some_router", version="1.0") make_drv(pname="myapp", version="1.0", attribute="myapp") - build_new_links(hw_container) + match_derivations_on_container_insert(old=hw_container, new=hw_container) assert not CVEDerivationClusterProposal.objects.get( cve=hw_container.cve ).derivations.exists() - assert build_new_links(app_container) + assert match_derivations_on_container_insert(old=app_container, new=app_container) suggestion = CVEDerivationClusterProposal.objects.get(cve=app_container.cve) assert suggestion.derivations.filter(name__startswith="myapp").exists() assert not suggestion.derivations.filter(name__startswith="some_router").exists() diff --git a/src/shared/tests/test_proposal_queryset.py b/src/shared/tests/test_proposal_queryset.py new file mode 100644 index 000000000..e1785aed7 --- /dev/null +++ b/src/shared/tests/test_proposal_queryset.py @@ -0,0 +1,117 @@ +from collections.abc import Callable + +from django.test import override_settings + +from shared.models.cve import Container +from shared.models.linkage import CVEDerivationClusterProposal, ProvenanceFlags +from shared.models.nix_evaluation import NixDerivation + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_active_returns_proposal_matching_active_version( + make_suggestion: Callable[..., CVEDerivationClusterProposal], +) -> None: + """A proposal whose version equals the active version appears in .active().""" + proposal = make_suggestion() + + qs = CVEDerivationClusterProposal.objects.active() + + assert proposal in qs + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_active_excludes_proposal_with_different_version( + make_suggestion: Callable[..., CVEDerivationClusterProposal], +) -> None: + """A proposal from a candidate (non-active) algorithm is hidden from .active().""" + candidate_proposal = make_suggestion(algorithm_version=2) + + qs = CVEDerivationClusterProposal.objects.active() + + assert candidate_proposal not in qs + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_active_returns_only_matching_version_when_both_exist( + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], +) -> None: + """When proposals exist for two versions, .active() returns only the active one.""" + active_proposal = make_suggestion() + + # Build a second container so we can create a second, independent proposal + second_container = make_container(cve_id="CVE-2025-9999") + second_drv = make_drv(pname="bar", attribute="bar") + candidate_proposal = make_suggestion( + algorithm_version=2, + container=second_container, + drvs={second_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + ) + + qs = CVEDerivationClusterProposal.objects.active() + + assert active_proposal in qs + assert candidate_proposal not in qs + assert qs.count() == 1 + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_all_returns_proposals_of_any_version( + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], +) -> None: + """.objects.all() is unrestricted and returns every proposal regardless of version.""" + v1_proposal = make_suggestion() + + second_container = make_container(cve_id="CVE-2025-8888") + second_drv = make_drv(pname="baz", attribute="baz") + v2_proposal = make_suggestion( + algorithm_version=2, + container=second_container, + drvs={second_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + ) + + all_qs = CVEDerivationClusterProposal.objects.all() + + assert v1_proposal in all_qs + assert v2_proposal in all_qs + assert all_qs.count() == 2 + + +def test_active_respects_setting_change( + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], +) -> None: + """Changing ACTIVE_MATCHING_ALGORITHM_VERSION switches which proposals .active() returns.""" + v1_proposal = make_suggestion() + + second_container = make_container(cve_id="CVE-2025-7777") + second_drv = make_drv(pname="qux", attribute="qux") + v2_proposal = make_suggestion( + algorithm_version=2, + container=second_container, + drvs={second_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + ) + + with override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1): + qs_v1_active = CVEDerivationClusterProposal.objects.active() + assert v1_proposal in qs_v1_active + assert v2_proposal not in qs_v1_active + + with override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=2): + qs_v2_active = CVEDerivationClusterProposal.objects.active() + assert v2_proposal in qs_v2_active + assert v1_proposal not in qs_v2_active + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=99) +def test_active_returns_empty_when_no_version_matches( + make_suggestion: Callable[..., CVEDerivationClusterProposal], +) -> None: + """When no proposal has the active version, .active() is empty.""" + qs = CVEDerivationClusterProposal.objects.active() + + assert qs.count() == 0 diff --git a/src/shared/tests/test_suggestion_caching.py b/src/shared/tests/test_suggestion_caching.py index 183bd9ab6..57dab009b 100644 --- a/src/shared/tests/test_suggestion_caching.py +++ b/src/shared/tests/test_suggestion_caching.py @@ -1,7 +1,12 @@ from collections.abc import Callable from datetime import timedelta -from shared.listeners.cache_suggestions import cache_new_suggestions +from django.test import override_settings + +from shared.listeners.cache_suggestions import ( + cache_new_suggestions, + cache_new_suggestions_following_new_container, +) from shared.models.cached import CachedSuggestions from shared.models.cve import Container from shared.models.linkage import ( @@ -83,3 +88,28 @@ def test_cache_stale_check( cached.schema_version = CachedSuggestions.CURRENT_SCHEMA_VERSION - 1 # type: ignore cached.save() assert cached.is_stale is True + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_listener_caches_proposal_with_active_algorithm_version( + suggestion: CVEDerivationClusterProposal, +) -> None: + """Listener caches a proposal whose version matches the active version.""" + cache_new_suggestions_following_new_container(old=suggestion, new=suggestion) + + assert CachedSuggestions.objects.filter(proposal=suggestion).exists() + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_listener_skips_caching_for_inactive_algorithm_version( + suggestion: CVEDerivationClusterProposal, +) -> None: + """Listener does not cache a proposal whose version differs from the active version.""" + CVEDerivationClusterProposal.objects.filter(pk=suggestion.pk).update( + algorithm_version=2 + ) + suggestion.refresh_from_db() + + cache_new_suggestions_following_new_container(old=suggestion, new=suggestion) + + assert not CachedSuggestions.objects.filter(proposal=suggestion).exists() diff --git a/src/webview/suggestions/views/lists.py b/src/webview/suggestions/views/lists.py index f5cb5124f..05a3800f0 100644 --- a/src/webview/suggestions/views/lists.py +++ b/src/webview/suggestions/views/lists.py @@ -55,7 +55,8 @@ def get_queryset(self) -> QuerySet[CVEDerivationClusterProposal]: if self.package_filter is not None: query_filters &= Q(cached__payload__packages__has_key=self.package_filter) return ( - CVEDerivationClusterProposal.objects.select_related( + CVEDerivationClusterProposal.objects.active() + .select_related( "cached", ) .prefetch_related("cve__container__references__tags") diff --git a/src/webview/tests/test_algorithm_version_filter.py b/src/webview/tests/test_algorithm_version_filter.py new file mode 100644 index 000000000..5e54489d3 --- /dev/null +++ b/src/webview/tests/test_algorithm_version_filter.py @@ -0,0 +1,143 @@ +""" +Tests verifying that suggestion list views only surface proposals generated by the +currently active matching algorithm version. + +A proposal whose algorithm_version differs from ACTIVE_MATCHING_ALGORITHM_VERSION +must not appear in any list view, even when it has a valid cache entry. +""" + +from collections.abc import Callable + +import pytest +from django.test import override_settings +from django.urls import reverse +from playwright.sync_api import Page, expect +from pytest_django.live_server_helper import LiveServer + +from shared.listeners.cache_suggestions import cache_new_suggestions +from shared.models.cve import Container +from shared.models.linkage import CVEDerivationClusterProposal, ProvenanceFlags +from shared.models.nix_evaluation import NixDerivation + + +@pytest.mark.parametrize( + "url_path, status", + [ + ( + reverse("webview:suggestion:untriaged_suggestions"), + CVEDerivationClusterProposal.Status.PENDING, + ), + ( + reverse("webview:suggestion:accepted_suggestions"), + CVEDerivationClusterProposal.Status.ACCEPTED, + ), + ( + reverse("webview:suggestion:dismissed_suggestions"), + CVEDerivationClusterProposal.Status.REJECTED, + ), + ], +) +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_list_view_shows_active_version_only( + live_server: LiveServer, + as_staff: Page, + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], + url_path: str, + status: CVEDerivationClusterProposal.Status, +) -> None: + """Active-version proposal is visible; same-status candidate-version proposal is not.""" + drv = make_drv() + + active_container = make_container(cve_id="CVE-2025-1001") + active_proposal = make_suggestion( + container=active_container, + drvs={drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + status=status, + ) + cache_new_suggestions(active_proposal) + + candidate_container = make_container(cve_id="CVE-2025-1002") + candidate_drv = make_drv(pname="candidate-pkg", attribute="candidate-pkg") + candidate_proposal = make_suggestion( + container=candidate_container, + drvs={candidate_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + status=status, + algorithm_version=2, + ) + cache_new_suggestions(candidate_proposal) + + as_staff.goto(live_server.url + url_path) + + expect(as_staff.locator(f"#suggestion-{active_proposal.pk}")).to_be_visible() + expect(as_staff.locator(f"#suggestion-{candidate_proposal.pk}")).not_to_be_visible() + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=1) +def test_by_package_view_shows_active_version_only( + live_server: LiveServer, + as_staff: Page, + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], +) -> None: + """The by-package view also filters by active algorithm version.""" + shared_drv = make_drv(pname="shared-pkg", attribute="shared-pkg") + + active_container = make_container(cve_id="CVE-2025-2001") + active_proposal = make_suggestion( + container=active_container, + drvs={shared_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + ) + cache_new_suggestions(active_proposal) + + candidate_container = make_container(cve_id="CVE-2025-2002") + candidate_proposal = make_suggestion( + container=candidate_container, + drvs={shared_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + algorithm_version=2, + ) + cache_new_suggestions(candidate_proposal) + + url = reverse( + "webview:suggestion:suggestions_by_package", + kwargs={"package_name": shared_drv.attribute}, + ) + as_staff.goto(live_server.url + url) + + expect(as_staff.locator(f"#suggestion-{active_proposal.pk}")).to_be_visible() + expect(as_staff.locator(f"#suggestion-{candidate_proposal.pk}")).not_to_be_visible() + + +@override_settings(ACTIVE_MATCHING_ALGORITHM_VERSION=2) +def test_switching_active_version_changes_visible_proposals( + live_server: LiveServer, + as_staff: Page, + make_suggestion: Callable[..., CVEDerivationClusterProposal], + make_drv: Callable[..., NixDerivation], + make_container: Callable[..., Container], +) -> None: + """When the active version is bumped to 2, only v2 proposals appear in the list.""" + drv = make_drv() + + v1_container = make_container(cve_id="CVE-2025-3001") + v1_proposal = make_suggestion( + container=v1_container, + drvs={drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + ) + cache_new_suggestions(v1_proposal) + + v2_container = make_container(cve_id="CVE-2025-3002") + v2_drv = make_drv(pname="v2-pkg", attribute="v2-pkg") + v2_proposal = make_suggestion( + container=v2_container, + drvs={v2_drv: ProvenanceFlags.PACKAGE_NAME_MATCH}, + algorithm_version=2, + ) + cache_new_suggestions(v2_proposal) + + as_staff.goto(live_server.url + reverse("webview:suggestion:untriaged_suggestions")) + + expect(as_staff.locator(f"#suggestion-{v2_proposal.pk}")).to_be_visible() + expect(as_staff.locator(f"#suggestion-{v1_proposal.pk}")).not_to_be_visible()