Skip to content

Commit 584b077

Browse files
authored
Merge pull request #1863 from aboutcode-org/make-adv-id-unique
Make advisory content_id a unique field
2 parents 3fb3e75 + b99b0c0 commit 584b077

17 files changed

+283
-120
lines changed

Diff for: .github/workflows/docs.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on: [push, pull_request]
44

55
jobs:
66
build:
7-
runs-on: ubuntu-20.04
7+
runs-on: ubuntu-22.04
88

99
strategy:
1010
max-parallel: 4

Diff for: .github/workflows/main.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ env:
99

1010
jobs:
1111
build:
12-
runs-on: ubuntu-20.04
12+
runs-on: ubuntu-22.04
1313

1414
services:
1515
postgres:

Diff for: .github/workflows/pypi-release.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ on:
2121
jobs:
2222
build-pypi-distribs:
2323
name: Build and publish library to PyPI
24-
runs-on: ubuntu-20.04
24+
runs-on: ubuntu-22.04
2525

2626
steps:
2727
- uses: actions/checkout@master

Diff for: docs/source/conf.py

+2
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
"https://www.softwaretestinghelp.com/how-to-write-good-bug-report/", # Cloudflare protection
3737
"https://www.openssl.org/news/vulnerabilities.xml", # OpenSSL legacy advisory URL, not longer available
3838
"https://example.org/api/non-existent-packages",
39+
"https://github.com/aboutcode-org/vulnerablecode/pull/495/commits",
40+
"https://nvd.nist.gov/products/cpe",
3941
]
4042

4143
# Add any Sphinx extension module names here, as strings. They can be

Diff for: vulnerabilities/import_runner.py

+15-9
Original file line numberDiff line numberDiff line change
@@ -104,24 +104,30 @@ def process_advisories(
104104
advisories = []
105105
for data in advisory_datas:
106106
content_id = compute_content_id(advisory_data=data)
107+
advisory = {
108+
"summary": data.summary,
109+
"affected_packages": [pkg.to_dict() for pkg in data.affected_packages],
110+
"references": [ref.to_dict() for ref in data.references],
111+
"date_published": data.date_published,
112+
"weaknesses": data.weaknesses,
113+
"created_by": importer_name,
114+
"date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
115+
}
107116
try:
108117
aliases = get_or_create_aliases(aliases=data.aliases)
109118
obj, created = Advisory.objects.get_or_create(
110119
unique_content_id=content_id,
111120
url=data.url,
112-
defaults={
113-
"summary": data.summary,
114-
"affected_packages": [pkg.to_dict() for pkg in data.affected_packages],
115-
"references": [ref.to_dict() for ref in data.references],
116-
"date_published": data.date_published,
117-
"weaknesses": data.weaknesses,
118-
"created_by": importer_name,
119-
"date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
120-
},
121+
defaults=advisory,
121122
)
122123
obj.aliases.add(*aliases)
123124
if not obj.date_imported:
124125
advisories.append(obj)
126+
except Advisory.MultipleObjectsReturned:
127+
logger.error(
128+
f"Multiple Advisories returned: unique_content_id: {content_id}, url: {data.url}, advisory: {advisory!r}"
129+
)
130+
raise
125131
except Exception as e:
126132
logger.error(
127133
f"Error while processing {data!r} with aliases {data.aliases!r}: {e!r} \n {traceback_format_exc()}"
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
# Generated by Django 4.2.17 on 2025-04-04 16:08
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
("vulnerabilities", "0090_migrate_advisory_aliases"),
10+
]
11+
12+
operations = [
13+
migrations.AlterUniqueTogether(
14+
name="advisory",
15+
unique_together=set(),
16+
),
17+
migrations.AlterField(
18+
model_name="advisory",
19+
name="unique_content_id",
20+
field=models.CharField(
21+
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
22+
max_length=64,
23+
unique=True,
24+
),
25+
),
26+
migrations.AlterField(
27+
model_name="advisory",
28+
name="url",
29+
field=models.URLField(help_text="Link to the advisory on the upstream website"),
30+
),
31+
]

Diff for: vulnerabilities/models.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -1321,6 +1321,7 @@ class Advisory(models.Model):
13211321
max_length=64,
13221322
blank=False,
13231323
null=False,
1324+
unique=True,
13241325
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
13251326
)
13261327
aliases = models.ManyToManyField(
@@ -1355,14 +1356,14 @@ class Advisory(models.Model):
13551356
"vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline",
13561357
)
13571358
url = models.URLField(
1358-
blank=True,
1359+
blank=False,
1360+
null=False,
13591361
help_text="Link to the advisory on the upstream website",
13601362
)
13611363

13621364
objects = AdvisoryQuerySet.as_manager()
13631365

13641366
class Meta:
1365-
unique_together = ["unique_content_id", "date_published", "url"]
13661367
ordering = ["date_published", "unique_content_id"]
13671368

13681369
def save(self, *args, **kwargs):

Diff for: vulnerabilities/pipes/advisory.py

+22-16
Original file line numberDiff line numberDiff line change
@@ -43,20 +43,27 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable =
4343
aliases = get_or_create_aliases(aliases=advisory.aliases)
4444
content_id = compute_content_id(advisory_data=advisory)
4545
try:
46+
default_data = {
47+
"summary": advisory.summary,
48+
"affected_packages": [pkg.to_dict() for pkg in advisory.affected_packages],
49+
"references": [ref.to_dict() for ref in advisory.references],
50+
"date_published": advisory.date_published,
51+
"weaknesses": advisory.weaknesses,
52+
"created_by": pipeline_id,
53+
"date_collected": datetime.now(timezone.utc),
54+
}
55+
4656
advisory_obj, _ = Advisory.objects.get_or_create(
4757
unique_content_id=content_id,
4858
url=advisory.url,
49-
defaults={
50-
"summary": advisory.summary,
51-
"affected_packages": [pkg.to_dict() for pkg in advisory.affected_packages],
52-
"references": [ref.to_dict() for ref in advisory.references],
53-
"date_published": advisory.date_published,
54-
"weaknesses": advisory.weaknesses,
55-
"created_by": pipeline_id,
56-
"date_collected": datetime.now(timezone.utc),
57-
},
59+
defaults=default_data,
5860
)
5961
advisory_obj.aliases.add(*aliases)
62+
except Advisory.MultipleObjectsReturned:
63+
logger.error(
64+
f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}"
65+
)
66+
raise
6067
except Exception as e:
6168
if logger:
6269
logger(
@@ -137,19 +144,18 @@ def import_advisory(
137144
},
138145
)
139146
vulnerability.severities.add(vulnerability_severity)
147+
if not created and logger:
148+
logger(
149+
f"Severity updated for reference {ref.url!r} to value: {severity.value!r} "
150+
f"and scoring_elements: {severity.scoring_elements!r}",
151+
level=logging.DEBUG,
152+
)
140153
except:
141154
if logger:
142155
logger(
143156
f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}",
144157
level=logging.ERROR,
145158
)
146-
if not created:
147-
if logger:
148-
logger(
149-
f"Severity updated for reference {ref.url!r} to value: {severity.value!r} "
150-
f"and scoring_elements: {severity.scoring_elements!r}",
151-
level=logging.DEBUG,
152-
)
153159

154160
for affected_purl in affected_purls or []:
155161
vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl)

Diff for: vulnerabilities/tests/conftest.py

-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ def no_rmtree(monkeypatch):
2525
# Step 2: Run test for importer only if it is activated (pytestmark = pytest.mark.skipif(...))
2626
# Step 3: Migrate all the tests
2727
collect_ignore = [
28-
"test_models.py",
2928
"test_rust.py",
3029
"test_suse_backports.py",
3130
"test_suse.py",

Diff for: vulnerabilities/tests/pipelines/test_populate_vulnerability_summary_pipeline.py

+4
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def test_populate_missing_summaries_from_nvd(self):
4343
created_by="nvd_importer",
4444
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
4545
unique_content_id="Test",
46+
url="https://test.com",
4647
)
4748
adv.aliases.add(alias)
4849

@@ -110,6 +111,7 @@ def test_non_nvd_advisory_ignored(self):
110111
created_by="other_importer",
111112
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
112113
unique_content_id="Test",
114+
url="https://test.com",
113115
)
114116

115117
adv.aliases.add(alias)
@@ -138,6 +140,7 @@ def test_multiple_matching_advisories(self):
138140
created_by="nvd_importer",
139141
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
140142
unique_content_id="Test",
143+
url="https://test.com",
141144
)
142145

143146
adv1.aliases.add(alias)
@@ -147,6 +150,7 @@ def test_multiple_matching_advisories(self):
147150
created_by="nvd_importer",
148151
date_collected=datetime.datetime(2024, 1, 2, tzinfo=pytz.UTC),
149152
unique_content_id="Test-1",
153+
url="https://test.com",
150154
)
151155

152156
adv2.aliases.add(alias)

Diff for: vulnerabilities/tests/pipelines/test_remove_duplicate_advisories.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ def setUp(self):
3232
)
3333
],
3434
references=[Reference(url="https://example.com/vuln1")],
35+
url="https://test.url/",
3536
)
3637

3738
def test_remove_duplicates_keeps_oldest(self):
@@ -49,9 +50,10 @@ def test_remove_duplicates_keeps_oldest(self):
4950
]
5051

5152
advisories = []
52-
for date in dates:
53+
for i, date in enumerate(dates):
5354
advisory = Advisory.objects.create(
54-
unique_content_id=compute_content_id(advisory_data=self.advisory_data),
55+
unique_content_id=f"incorrect-content-id{i}",
56+
url=self.advisory_data.url,
5557
summary=self.advisory_data.summary,
5658
affected_packages=[pkg.to_dict() for pkg in self.advisory_data.affected_packages],
5759
references=[ref.to_dict() for ref in self.advisory_data.references],
@@ -77,6 +79,7 @@ def test_different_content_preserved(self):
7779
# Create two advisories with different content
7880
advisory1 = Advisory.objects.create(
7981
unique_content_id="test-id1",
82+
url="https://test.url/",
8083
summary="Summary 1",
8184
affected_packages=[],
8285
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
@@ -87,6 +90,7 @@ def test_different_content_preserved(self):
8790

8891
advisory2 = Advisory.objects.create(
8992
unique_content_id="test-id2",
93+
url="https://test.url/",
9094
summary="Summary 2",
9195
affected_packages=[],
9296
references=[],
@@ -111,6 +115,7 @@ def test_recompute_content_ids(self):
111115
# Create advisory without content ID
112116
advisory = Advisory.objects.create(
113117
unique_content_id="incorrect-content-id",
118+
url=self.advisory_data.url,
114119
summary=self.advisory_data.summary,
115120
affected_packages=[pkg.to_dict() for pkg in self.advisory_data.affected_packages],
116121
references=[ref.to_dict() for ref in self.advisory_data.references],
@@ -125,4 +130,4 @@ def test_recompute_content_ids(self):
125130
# Check that content ID was updated
126131
advisory.refresh_from_db()
127132
expected_content_id = compute_content_id(advisory_data=self.advisory_data)
128-
self.assertNotEqual(advisory.unique_content_id, expected_content_id)
133+
self.assertEqual(advisory.unique_content_id, expected_content_id)

0 commit comments

Comments
 (0)