Skip to content

Make advisory content_id a unique field #1863

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Apr 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ on: [push, pull_request]

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04

strategy:
max-parallel: 4
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ env:

jobs:
build:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04

services:
postgres:
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/pypi-release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ on:
jobs:
build-pypi-distribs:
name: Build and publish library to PyPI
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04

steps:
- uses: actions/checkout@master
Expand Down
2 changes: 2 additions & 0 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
"https://www.softwaretestinghelp.com/how-to-write-good-bug-report/", # Cloudflare protection
"https://www.openssl.org/news/vulnerabilities.xml", # OpenSSL legacy advisory URL, not longer available
"https://example.org/api/non-existent-packages",
"https://github.com/aboutcode-org/vulnerablecode/pull/495/commits",
"https://nvd.nist.gov/products/cpe",
]

# Add any Sphinx extension module names here, as strings. They can be
Expand Down
24 changes: 15 additions & 9 deletions vulnerabilities/import_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,24 +104,30 @@ def process_advisories(
advisories = []
for data in advisory_datas:
content_id = compute_content_id(advisory_data=data)
advisory = {
"summary": data.summary,
"affected_packages": [pkg.to_dict() for pkg in data.affected_packages],
"references": [ref.to_dict() for ref in data.references],
"date_published": data.date_published,
"weaknesses": data.weaknesses,
"created_by": importer_name,
"date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
}
try:
aliases = get_or_create_aliases(aliases=data.aliases)
obj, created = Advisory.objects.get_or_create(
unique_content_id=content_id,
url=data.url,
defaults={
"summary": data.summary,
"affected_packages": [pkg.to_dict() for pkg in data.affected_packages],
"references": [ref.to_dict() for ref in data.references],
"date_published": data.date_published,
"weaknesses": data.weaknesses,
"created_by": importer_name,
"date_collected": datetime.datetime.now(tz=datetime.timezone.utc),
},
defaults=advisory,
)
obj.aliases.add(*aliases)
if not obj.date_imported:
advisories.append(obj)
except Advisory.MultipleObjectsReturned:
logger.error(
f"Multiple Advisories returned: unique_content_id: {content_id}, url: {data.url}, advisory: {advisory!r}"
)
raise
except Exception as e:
logger.error(
f"Error while processing {data!r} with aliases {data.aliases!r}: {e!r} \n {traceback_format_exc()}"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Generated by Django 4.2.17 on 2025-04-04 16:08

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("vulnerabilities", "0090_migrate_advisory_aliases"),
]

operations = [
migrations.AlterUniqueTogether(
name="advisory",
unique_together=set(),
),
migrations.AlterField(
model_name="advisory",
name="unique_content_id",
field=models.CharField(
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
max_length=64,
unique=True,
),
),
migrations.AlterField(
model_name="advisory",
name="url",
field=models.URLField(help_text="Link to the advisory on the upstream website"),
),
]
5 changes: 3 additions & 2 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1321,6 +1321,7 @@ class Advisory(models.Model):
max_length=64,
blank=False,
null=False,
unique=True,
help_text="A 64 character unique identifier for the content of the advisory since we use sha256 as hex",
)
aliases = models.ManyToManyField(
Expand Down Expand Up @@ -1355,14 +1356,14 @@ class Advisory(models.Model):
"vulnerabilities.pipeline.nginx_importer.NginxImporterPipeline",
)
url = models.URLField(
blank=True,
blank=False,
null=False,
help_text="Link to the advisory on the upstream website",
)

objects = AdvisoryQuerySet.as_manager()

class Meta:
unique_together = ["unique_content_id", "date_published", "url"]
ordering = ["date_published", "unique_content_id"]

def save(self, *args, **kwargs):
Expand Down
38 changes: 22 additions & 16 deletions vulnerabilities/pipes/advisory.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,20 +43,27 @@ def insert_advisory(advisory: AdvisoryData, pipeline_id: str, logger: Callable =
aliases = get_or_create_aliases(aliases=advisory.aliases)
content_id = compute_content_id(advisory_data=advisory)
try:
default_data = {
"summary": advisory.summary,
"affected_packages": [pkg.to_dict() for pkg in advisory.affected_packages],
"references": [ref.to_dict() for ref in advisory.references],
"date_published": advisory.date_published,
"weaknesses": advisory.weaknesses,
"created_by": pipeline_id,
"date_collected": datetime.now(timezone.utc),
}

advisory_obj, _ = Advisory.objects.get_or_create(
unique_content_id=content_id,
url=advisory.url,
defaults={
"summary": advisory.summary,
"affected_packages": [pkg.to_dict() for pkg in advisory.affected_packages],
"references": [ref.to_dict() for ref in advisory.references],
"date_published": advisory.date_published,
"weaknesses": advisory.weaknesses,
"created_by": pipeline_id,
"date_collected": datetime.now(timezone.utc),
},
defaults=default_data,
)
advisory_obj.aliases.add(*aliases)
except Advisory.MultipleObjectsReturned:
logger.error(
f"Multiple Advisories returned: unique_content_id: {content_id}, url: {advisory.url}, advisory: {advisory!r}"
)
raise
except Exception as e:
if logger:
logger(
Expand Down Expand Up @@ -137,19 +144,18 @@ def import_advisory(
},
)
vulnerability.severities.add(vulnerability_severity)
if not created and logger:
logger(
f"Severity updated for reference {ref.url!r} to value: {severity.value!r} "
f"and scoring_elements: {severity.scoring_elements!r}",
level=logging.DEBUG,
)
except:
if logger:
logger(
f"Failed to create VulnerabilitySeverity for: {severity} with error:\n{traceback_format_exc()}",
level=logging.ERROR,
)
if not created:
if logger:
logger(
f"Severity updated for reference {ref.url!r} to value: {severity.value!r} "
f"and scoring_elements: {severity.scoring_elements!r}",
level=logging.DEBUG,
)

for affected_purl in affected_purls or []:
vulnerable_package, _ = Package.objects.get_or_create_from_purl(purl=affected_purl)
Expand Down
1 change: 0 additions & 1 deletion vulnerabilities/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ def no_rmtree(monkeypatch):
# Step 2: Run test for importer only if it is activated (pytestmark = pytest.mark.skipif(...))
# Step 3: Migrate all the tests
collect_ignore = [
"test_models.py",
"test_rust.py",
"test_suse_backports.py",
"test_suse.py",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ def test_populate_missing_summaries_from_nvd(self):
created_by="nvd_importer",
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
unique_content_id="Test",
url="https://test.com",
)
adv.aliases.add(alias)

Expand Down Expand Up @@ -110,6 +111,7 @@ def test_non_nvd_advisory_ignored(self):
created_by="other_importer",
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
unique_content_id="Test",
url="https://test.com",
)

adv.aliases.add(alias)
Expand Down Expand Up @@ -138,6 +140,7 @@ def test_multiple_matching_advisories(self):
created_by="nvd_importer",
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
unique_content_id="Test",
url="https://test.com",
)

adv1.aliases.add(alias)
Expand All @@ -147,6 +150,7 @@ def test_multiple_matching_advisories(self):
created_by="nvd_importer",
date_collected=datetime.datetime(2024, 1, 2, tzinfo=pytz.UTC),
unique_content_id="Test-1",
url="https://test.com",
)

adv2.aliases.add(alias)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ def setUp(self):
)
],
references=[Reference(url="https://example.com/vuln1")],
url="https://test.url/",
)

def test_remove_duplicates_keeps_oldest(self):
Expand All @@ -49,9 +50,10 @@ def test_remove_duplicates_keeps_oldest(self):
]

advisories = []
for date in dates:
for i, date in enumerate(dates):
advisory = Advisory.objects.create(
unique_content_id=compute_content_id(advisory_data=self.advisory_data),
unique_content_id=f"incorrect-content-id{i}",
url=self.advisory_data.url,
summary=self.advisory_data.summary,
affected_packages=[pkg.to_dict() for pkg in self.advisory_data.affected_packages],
references=[ref.to_dict() for ref in self.advisory_data.references],
Expand All @@ -77,6 +79,7 @@ def test_different_content_preserved(self):
# Create two advisories with different content
advisory1 = Advisory.objects.create(
unique_content_id="test-id1",
url="https://test.url/",
summary="Summary 1",
affected_packages=[],
date_collected=datetime.datetime(2024, 1, 1, tzinfo=pytz.UTC),
Expand All @@ -87,6 +90,7 @@ def test_different_content_preserved(self):

advisory2 = Advisory.objects.create(
unique_content_id="test-id2",
url="https://test.url/",
summary="Summary 2",
affected_packages=[],
references=[],
Expand All @@ -111,6 +115,7 @@ def test_recompute_content_ids(self):
# Create advisory without content ID
advisory = Advisory.objects.create(
unique_content_id="incorrect-content-id",
url=self.advisory_data.url,
summary=self.advisory_data.summary,
affected_packages=[pkg.to_dict() for pkg in self.advisory_data.affected_packages],
references=[ref.to_dict() for ref in self.advisory_data.references],
Expand All @@ -125,4 +130,4 @@ def test_recompute_content_ids(self):
# Check that content ID was updated
advisory.refresh_from_db()
expected_content_id = compute_content_id(advisory_data=self.advisory_data)
self.assertNotEqual(advisory.unique_content_id, expected_content_id)
self.assertEqual(advisory.unique_content_id, expected_content_id)
Loading