Skip to content

Commit 8540a20

Browse files
committed
feat: Add support for multiple matching algorithm versions
1 parent e3ead57 commit 8540a20

17 files changed

Lines changed: 805 additions & 207 deletions

infra/production.nix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ in
105105
EMAIL_USE_SSL = true;
106106
EMAIL_HOST_USER = "[email protected]";
107107
DEFAULT_FROM_EMAIL = "[email protected]";
108+
ACTIVE_MATCHING_ALGORITHM_VERSION = 1;
108109
};
109110

110111
secrets = {

infra/staging.nix

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ in
114114
GH_SECURITY_TEAM = "sectracker-testing-security";
115115
GH_COMMITTERS_TEAM = "sectracker-testing-committers";
116116
EMAIL_BACKEND = "django.core.mail.backends.console.EmailBackend";
117+
ACTIVE_MATCHING_ALGORITHM_VERSION = 1;
117118
};
118119

119120
secrets = {

src/project/settings.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,25 @@ class DjangoSettings(BaseModel):
169169
""",
170170
default=1_000,
171171
)
172+
ACTIVE_MATCHING_ALGORITHM_VERSION: int = Field(
173+
description="""
174+
Controls which registered matching algorithm version is used when
175+
linking CVEs to derivations. Must match a VERSION defined in
176+
shared/listeners/algorithms/. Bump this setting to activate a new
177+
algorithm version without changing code.
178+
""",
179+
default=1,
180+
)
181+
CANDIDATE_MATCHING_ALGORITHM_VERSION: int | None = Field(
182+
description="""
183+
Optional. When set, identifies a new algorithm version being evaluated
184+
in parallel. The candidate version does not run automatically — it is
185+
only invoked by the test-run management command, which generates proposals
186+
tagged with this version number for later metric comparison.
187+
Set to None when no candidate is under evaluation.
188+
""",
189+
default=None,
190+
)
172191
SHOW_DEMO_DISCLAIMER: bool = Field(
173192
description="""
174193
When set to True, the application will display a disclaimer about
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
"""
2+
Algorithm registry for CVE-to-derivation matching.
3+
4+
Each algorithm version lives in its own module (v1.py, v2.py, ...) and must expose:
5+
- VERSION
6+
- build_new_links
7+
8+
Modules register themselves by calling `register()`. The listener in
9+
"""
10+
11+
from typing import Protocol
12+
13+
from django.conf import settings
14+
15+
from shared.models.cve import Container
16+
17+
18+
class MatchingAlgorithm(Protocol):
19+
VERSION: int
20+
21+
def build_new_links(self, container: Container) -> bool: ...
22+
23+
24+
_registry: dict[int, MatchingAlgorithm] = {}
25+
26+
27+
def register(module: MatchingAlgorithm) -> None:
28+
"""Register an algorithm module under its VERSION."""
29+
_registry[module.VERSION] = module
30+
31+
32+
def _resolve(version: int) -> MatchingAlgorithm:
33+
"""Return a resigister alogirithm that match the version."""
34+
if not _registry:
35+
raise RuntimeError("No matching algorithm registered.")
36+
try:
37+
return _registry[version]
38+
except KeyError:
39+
raise KeyError(f"No matching algorithm registered for version {version}.")
40+
41+
42+
def current_algorithm() -> MatchingAlgorithm:
43+
"""Return the active algorithm (ACTIVE_MATCHING_ALGORITHM_VERSION)."""
44+
return _resolve(settings.ACTIVE_MATCHING_ALGORITHM_VERSION)
45+
46+
47+
# TODO (@adekoder) will be used when we create the process to run the inactive new algorithm
48+
# verison
49+
def candidate_algorithm() -> MatchingAlgorithm | None:
50+
"""
51+
Return the candidate algorithm (CANDIDATE_MATCHING_ALGORITHM_VERSION),
52+
or None if no candidate is configured.
53+
"""
54+
version = settings.CANDIDATE_MATCHING_ALGORITHM_VERSION
55+
if version is None:
56+
return None
57+
return _resolve(version)
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""
2+
Matching algorithm version 1.
3+
4+
Candidates are found by matching package name / product name (case-insensitive
5+
substring) against derivation names in the latest completed evaluation of each
6+
major channel. No version constraint checking is applied at this stage.
7+
"""
8+
9+
import logging
10+
import sys
11+
12+
from django.conf import settings
13+
from django.db.models import (
14+
Case,
15+
Exists,
16+
F,
17+
IntegerField,
18+
OuterRef,
19+
Q,
20+
QuerySet,
21+
Value,
22+
When,
23+
Window,
24+
)
25+
from django.db.models.functions import RowNumber
26+
27+
from shared.models.cve import AffectedProduct, Container, Cpe
28+
from shared.models.linkage import CVEDerivationClusterProposal, ProvenanceFlags
29+
from shared.models.nix_evaluation import MAJOR_CHANNELS, NixDerivation, NixEvaluation
30+
31+
from . import register
32+
33+
VERSION: int = 1
34+
35+
logger = logging.getLogger(__name__)
36+
37+
38+
def produce_linkage_candidates(
39+
container: Container,
40+
filtered_affected: QuerySet[AffectedProduct],
41+
) -> dict[NixDerivation, ProvenanceFlags]:
42+
# FIXME(@fricklerhandwerk): This will fall apart when we obtain the channel structure dynamically [ref:channel-structure]
43+
active_channels_q = Q()
44+
for ch in MAJOR_CHANNELS:
45+
active_channels_q |= Q(channel__channel_branch__contains=ch)
46+
47+
latest_complete_channels = (
48+
NixEvaluation.objects.filter(
49+
active_channels_q,
50+
state=NixEvaluation.EvaluationState.COMPLETED,
51+
)
52+
.annotate(
53+
row_num=Window(
54+
expression=RowNumber(),
55+
partition_by=[F("channel")],
56+
order_by=F("updated_at").desc(),
57+
),
58+
)
59+
.filter(row_num=1)
60+
)
61+
62+
package_names = (
63+
filtered_affected.exclude(package_name__isnull=True)
64+
.values_list("package_name", flat=True)
65+
.distinct()
66+
)
67+
products = (
68+
filtered_affected.exclude(product__isnull=True)
69+
.values_list("product", flat=True)
70+
.distinct()
71+
)
72+
73+
package_q = Q()
74+
for name in package_names:
75+
package_q |= Q(name__icontains=name)
76+
77+
product_q = Q()
78+
for product in products:
79+
product_q |= Q(name__icontains=product)
80+
81+
if not package_q | product_q:
82+
return {}
83+
84+
annotations = {}
85+
if package_q:
86+
annotations["package_match"] = Case(
87+
When(package_q, then=Value(ProvenanceFlags.PACKAGE_NAME_MATCH)),
88+
default=Value(0),
89+
output_field=IntegerField(),
90+
)
91+
if product_q:
92+
annotations["product_match"] = Case(
93+
When(product_q, then=Value(ProvenanceFlags.PRODUCT_MATCH)),
94+
default=Value(0),
95+
output_field=IntegerField(),
96+
)
97+
98+
candidates: dict[NixDerivation, ProvenanceFlags] = {}
99+
matches = NixDerivation.objects.filter(
100+
package_q | product_q,
101+
parent_evaluation__in=list(latest_complete_channels),
102+
).annotate(**annotations)
103+
for drv in matches.iterator():
104+
flags = getattr(drv, "package_match", 0) | getattr(drv, "product_match", 0)
105+
candidates[drv] = ProvenanceFlags(flags)
106+
107+
return candidates
108+
109+
110+
def build_new_links(container: Container) -> bool:
111+
if container.cve.triaged:
112+
logger.info(
113+
"Container received for '%s', but already triaged, skipping linkage.",
114+
container.cve,
115+
)
116+
return False
117+
118+
if CVEDerivationClusterProposal.objects.filter(
119+
cve=container.cve, algorithm_version=VERSION
120+
).exists():
121+
logger.info("Suggestion already exists for '%s', skipping", container.cve)
122+
return False
123+
124+
if container.tags.filter(value="exclusively-hosted-service").exists():
125+
logger.info(
126+
"Container for '%s' is exclusively-hosted-service, rejecting without match.",
127+
container.cve,
128+
)
129+
CVEDerivationClusterProposal.objects.create(
130+
cve=container.cve,
131+
status=CVEDerivationClusterProposal.Status.REJECTED,
132+
rejection_reason=CVEDerivationClusterProposal.RejectionReason.EXCLUSIVELY_HOSTED_SERVICE,
133+
algorithm_version=VERSION,
134+
)
135+
return True
136+
137+
has_any_cpe = Exists(Cpe.objects.filter(affectedproduct=OuterRef("pk")))
138+
has_non_hardware_cpe = Exists(
139+
Cpe.objects.filter(affectedproduct=OuterRef("pk")).exclude(
140+
name__istartswith="cpe:2.3:h:"
141+
)
142+
)
143+
filtered_affected = container.affected.exclude(has_any_cpe & ~has_non_hardware_cpe)
144+
145+
if container.affected.exists() and not filtered_affected.exists():
146+
logger.info(
147+
"Container for '%s' has only hardware CPEs, rejecting without match.",
148+
container.cve,
149+
)
150+
CVEDerivationClusterProposal.objects.create(
151+
cve=container.cve,
152+
status=CVEDerivationClusterProposal.Status.REJECTED,
153+
rejection_reason=CVEDerivationClusterProposal.RejectionReason.HARDWARE_ONLY_CPE,
154+
algorithm_version=VERSION,
155+
)
156+
return True
157+
158+
drvs = produce_linkage_candidates(container, filtered_affected)
159+
if not drvs:
160+
logger.info("No derivations matching '%s', ignoring", container.cve)
161+
return False
162+
163+
if len(drvs) > settings.MAX_MATCHES:
164+
logger.warning(
165+
"More than '%d' derivations matching '%s', ignoring",
166+
settings.MAX_MATCHES,
167+
container.cve,
168+
)
169+
return False
170+
171+
proposal = CVEDerivationClusterProposal.objects.create(
172+
cve=container.cve,
173+
algorithm_version=VERSION,
174+
)
175+
176+
drvs_throughs = [
177+
CVEDerivationClusterProposal.derivations.through(
178+
proposal_id=proposal.pk, derivation_id=drv.pk, provenance_flags=flags
179+
)
180+
for drv, flags in drvs.items()
181+
]
182+
183+
CVEDerivationClusterProposal.derivations.through.objects.bulk_create(drvs_throughs)
184+
185+
if drvs_throughs:
186+
logger.info(
187+
"Matching suggestion for '%s': %d derivations found.",
188+
container.cve,
189+
len(drvs_throughs),
190+
)
191+
192+
return True
193+
194+
195+
# Self-register when imported
196+
register(sys.modules[__name__]) # type: ignore[arg-type]

0 commit comments

Comments
 (0)