Skip to content

Optimize export management command #1868

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 30, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 34 additions & 5 deletions vulnerabilities/management/commands/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,16 @@
# See https://github.com/aboutcode-org/vulnerablecode for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#
import itertools
import logging
from itertools import groupby
from pathlib import Path
from timeit import default_timer as timer
from traceback import format_exc as traceback_format_exc

import saneyaml
from aboutcode.pipeline import LoopProgress
from aboutcode.pipeline import humanize_time
from django.core.management.base import BaseCommand
from django.core.management.base import CommandError
from packageurl import PackageURL
Expand All @@ -26,7 +31,7 @@ def serialize_severity(sev):
"score": sev.value,
"scoring_system": sev.scoring_system,
"scoring_elements": sev.scoring_elements,
"published_at": sev.published_at,
"published_at": str(sev.published_at),
"url": sev.url,
}

Expand Down Expand Up @@ -88,8 +93,22 @@ def export_data(self, base_path: Path):
"""
i = 0
seen_vcid = set()
export_start_time = timer()

for i, (purl_without_version, package_versions) in enumerate(packages_by_type_ns_name(), 1):
distinct_packages_count = (
Package.objects.values("type", "namespace", "name")
.distinct("type", "namespace", "name")
.count()
)

progress = LoopProgress(
total_iterations=distinct_packages_count,
progress_step=1,
logger=self.stdout.write,
)
for i, (purl_without_version, package_versions) in enumerate(
progress.iter(packages_by_type_ns_name()), 1
):
pkg_version = None
try:
package_urls = []
Expand All @@ -108,7 +127,11 @@ def export_data(self, base_path: Path):
}
package_vulnerabilities.append(package_data)

for vuln in pkg_version.vulnerabilities:
vulnerabilities = itertools.chain(
pkg_version.affected_by_vulnerabilities.all(),
pkg_version.fixing_vulnerabilities.all(),
)
for vuln in vulnerabilities:
vcid = vuln.vulnerability_id
# do not write twice the same file
if vcid in seen_vcid:
Expand All @@ -131,9 +154,15 @@ def export_data(self, base_path: Path):
self.stdout.write(f"Processed {i} package. Last PURL: {purl_without_version}")

except Exception as e:
raise Exception(f"Failed to process Package: {pkg_version}") from e
self.stdout.write(
self.style.ERROR(
f"Failed to process Package {pkg_version}: {e!r} \n {traceback_format_exc()}"
)
)

self.stdout.write(f"Exported data for: {i} package and {len(seen_vcid)} vulnerabilities.")
export_run_time = timer() - export_start_time
self.stdout.write(f"Export completed in {humanize_time(export_run_time)}")


def by_purl_type_ns_name(package):
Expand All @@ -159,7 +188,7 @@ def packages_by_type_ns_name():
"fixing_vulnerabilities__weaknesses",
"fixing_vulnerabilities__severities",
)
.paginated()
.iterator()
)

for tp_ns_name, packages in groupby(qs, key=by_purl_type_ns_name):
Expand Down
4 changes: 0 additions & 4 deletions vulnerabilities/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -994,10 +994,6 @@ def next_non_vulnerable_version(self):
next_non_vulnerable, _ = self.get_non_vulnerable_versions()
return next_non_vulnerable.version if next_non_vulnerable else None

@property
def vulnerabilities(self):
return self.affected_by_vulnerabilities.all() | self.fixing_vulnerabilities.all()

@property
def latest_non_vulnerable_version(self):
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ severities:
- score: '7.0'
scoring_system: cvssv3_vector
scoring_elements: CVSS:3.0/AV:N/AC:L/PR:N/UI:N/S:U/C:N/I:N/A:H
published_at:
published_at: None
url: https://..
weaknesses:
- CWE-15
Expand Down
4 changes: 1 addition & 3 deletions vulnerabilities/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -428,9 +428,7 @@ def test_affecting_vulnerabilities_vulnerabilityqueryset_method(self):
searched_for_package = self.package_pypi_redis_4_1_1

# Return a queryset of Vulnerabilities that affect this Package.
this_package_vulnerabilities = (
searched_for_package.vulnerabilities.affecting_vulnerabilities()
)
this_package_vulnerabilities = searched_for_package.affected_by

assert this_package_vulnerabilities[0] == self.vuln_VCID_g2fu_45jw_aaan
assert this_package_vulnerabilities[1] == self.vuln_VCID_rqe1_dkmg_aaad
Expand Down