Skip to content

Commit 91bdcf9

Browse files
authored
Merge pull request #10 from cisagov/improvement/support-nvd-2.0-json
Support NVD 2.0 JSON
2 parents c5e87d2 + cc6472a commit 91bdcf9

File tree

9 files changed

+476
-100
lines changed

9 files changed

+476
-100
lines changed

README.md

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ and then load the data into to your database.
5656

5757
```python
5858
import asyncio
59-
from cyhy_cvesync import DEFAULT_CVE_URL_PATTERN
59+
from cyhy_cvesync import DEFAULT_CVE_AUTHORITATIVE_SOURCE, DEFAULT_CVE_URL_PATTERN
6060
from cyhy_cvesync.cve_sync import process_urls
6161
from cyhy_db import initialize_db
6262
from cyhy_db.models import CVEDoc
@@ -73,7 +73,7 @@ async def main():
7373
cve_url = DEFAULT_CVE_URL_PATTERN.format(year=2024)
7474
print(f"Processing CVE data from: {cve_url}...")
7575
created_cve_docs_count, updated_cve_docs_count, deleted_cve_docs_count = await process_urls(
76-
[cve_url], cve_data_gzipped=True, concurrency=1)
76+
[cve_url], cve_data_gzipped=True, concurrency=1, cve_authoritative_source=DEFAULT_CVE_AUTHORITATIVE_SOURCE)
7777

7878
print(f"Created CVE documents: {created_cve_docs_count}")
7979
print(f"Updated CVE documents: {updated_cve_docs_count}")
@@ -90,12 +90,12 @@ Output:
9090

9191
```console
9292
CVE documents in DB before sync: 20
93-
Processing CVE data from: https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2024.json.gz...
93+
Processing CVE data from: https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-2024.json.gz...
9494
Deleting outdated CVE docs ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
95-
Created CVE documents: 12174
95+
Created CVE documents: 18272
9696
Updated CVE documents: 0
9797
Deleted CVE documents: 0
98-
CVE documents in DB after sync: 12194
98+
CVE documents in DB after sync: 18272
9999
```
100100

101101
### Environment Variables ###

src/cyhy_cvesync/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,11 +7,12 @@
77
# directly used, it populates the value package_name.__version__, which is
88
# used to get version information about this Python package.
99

10+
DEFAULT_CVE_AUTHORITATIVE_SOURCE = "nvd@nist.gov"
1011
DEFAULT_CVE_URL_PATTERN = (
11-
"https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-{year}.json.gz"
12+
"https://nvd.nist.gov/feeds/json/cve/2.0/nvdcve-2.0-{year}.json.gz"
1213
)
1314

1415
from ._version import __version__ # noqa: F401, E402
1516
from .main import do_cve_sync # noqa: E402
1617

17-
__all__ = [DEFAULT_CVE_URL_PATTERN, "do_cve_sync"]
18+
__all__ = [DEFAULT_CVE_AUTHORITATIVE_SOURCE, DEFAULT_CVE_URL_PATTERN, "do_cve_sync"]

src/cyhy_cvesync/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
"""This file defines the version of this module."""
22

3-
__version__ = "1.2.1"
3+
__version__ = "2.0.0"

src/cyhy_cvesync/cve_sync.py

Lines changed: 40 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020
ALLOWED_URL_SCHEMES = ["http", "https"]
2121
CVE_URL_RETRY_WAIT_SEC = 5
2222
MAX_CVE_URL_RETRIES = 10
23+
# Preferred CVSS metrics listed in order of preference
24+
PREFERRED_CVSS_METRICS = ["cvssMetricV31", "cvssMetricV30", "cvssMetricV2"]
2325

2426
# Map to track existing CVE documents that were not updated
2527
cve_map: Dict[str, CVEDoc] = {}
@@ -28,12 +30,15 @@
2830
logger = logging.getLogger(f"{CYHY_ROOT_LOGGER}.{__name__}")
2931

3032

31-
async def process_cve_json(cve_json: dict) -> Tuple[int, int]:
33+
async def process_cve_json(
34+
cve_json: dict, cve_authoritative_source: str
35+
) -> Tuple[int, int]:
3236
"""
3337
Process the provided CVEs JSON and update the database with their contents.
3438
3539
Args:
3640
cve_json (dict): The JSON data containing information about CVEs.
41+
cve_authoritative_source (str): The authoritative source for CVE data.
3742
3843
Returns:
3944
Tuple[int, int]: A tuple containing the counts of created and updated
@@ -45,19 +50,21 @@ async def process_cve_json(cve_json: dict) -> Tuple[int, int]:
4550
created_cve_docs_count = 0
4651
updated_cve_docs_count = 0
4752

48-
if cve_json.get("CVE_data_type") != "CVE":
53+
if cve_json.get("format") != "NVD_CVE":
4954
raise ValueError("JSON does not look like valid CVE data.")
5055

51-
cve_items = cve_json.get("CVE_Items", [])
56+
cve_items = cve_json.get("vulnerabilities", [])
5257

5358
logger.info(
5459
"Async task %d: Starting to process %d CVEs",
5560
id(asyncio.current_task()),
5661
len(cve_items),
5762
)
63+
# Create a set of preferred CVSS metrics for quick lookup
64+
preferred_cvss_metrics_set = set(PREFERRED_CVSS_METRICS)
5865
for cve in cve_items:
5966
try:
60-
cve_id = cve["cve"]["CVE_data_meta"]["ID"]
67+
cve_id = cve["cve"]["id"]
6168
except KeyError:
6269
# JSON might be malformed, so we'll log what the CVE object looks like
6370
# and then raise an error
@@ -67,21 +74,36 @@ async def process_cve_json(cve_json: dict) -> Tuple[int, int]:
6774
if not cve_id:
6875
raise ValueError("CVE ID is empty.")
6976

70-
# Only process CVEs that have CVSS V2 or V3 data
71-
if any(k in cve["impact"] for k in ["baseMetricV2", "baseMetricV3"]):
77+
# Only process CVEs that have our preferred CVSS metrics
78+
metrics = cve.get("cve", {}).get("metrics", {}).keys()
79+
if metrics & preferred_cvss_metrics_set:
7280
# Check if the CVE document already exists in the database
7381
global cve_map
7482
async with cve_map_lock:
7583
cve_doc = cve_map.pop(cve_id, None)
7684

77-
version = "V3" if "baseMetricV3" in cve["impact"] else "V2"
85+
# Grab newest CVSS metrics from the authoritative source
86+
cvss_base_score = None
87+
cvss_version_temp = None
7888
try:
79-
cvss_base_score = cve["impact"]["baseMetric" + version][
80-
"cvss" + version
81-
]["baseScore"]
82-
cvss_version_temp = cve["impact"]["baseMetric" + version][
83-
"cvss" + version
84-
]["version"]
89+
for v in PREFERRED_CVSS_METRICS:
90+
if v in cve["cve"].get("metrics", {}):
91+
for metric in cve["cve"]["metrics"][v]:
92+
if metric.get("source") == cve_authoritative_source:
93+
cvss_base_score = metric["cvssData"]["baseScore"]
94+
cvss_version_temp = metric["cvssData"]["version"]
95+
break
96+
if cvss_base_score is not None:
97+
# Break out of outer loop
98+
break
99+
100+
if cvss_base_score is None or cvss_version_temp is None:
101+
logger.debug(
102+
"Skipping %s; no preferred CVSS metrics found from authoritative source (%s).",
103+
cve_id,
104+
cve_authoritative_source,
105+
)
106+
continue
85107
except KeyError:
86108
logger.error("CVE object: %s", cve)
87109
raise ValueError("JSON does not look like valid CVE data.")
@@ -168,6 +190,7 @@ async def process_urls(
168190
cve_urls: List[str],
169191
cve_data_gzipped: bool,
170192
concurrency: int,
193+
cve_authoritative_source: str,
171194
) -> Tuple[int, int, int]:
172195
"""
173196
Process URLs containing CVE data.
@@ -180,6 +203,7 @@ async def process_urls(
180203
cve_urls (List[str]): A list of URLs containing CVE data.
181204
cve_data_gzipped (bool): A flag indicating whether the CVE data is gzipped.
182205
concurrency (int): The number of concurrent URL requests to make and process.
206+
cve_authoritative_source (str): The authoritative source for CVE data.
183207
184208
Returns:
185209
Tuple[int, int, int]: A tuple containing the counts of created, updated,
@@ -201,7 +225,9 @@ async def process_single_url(
201225
async with semaphore:
202226
logging.info("Processing URL: %s", cve_url)
203227
cve_json = await fetch_cve_data(session, cve_url, cve_data_gzipped)
204-
created_count, updated_count = await process_cve_json(cve_json)
228+
created_count, updated_count = await process_cve_json(
229+
cve_json, cve_authoritative_source
230+
)
205231
async with cve_docs_count_lock:
206232
created_cve_docs_count += created_count
207233
updated_cve_docs_count += updated_count

src/cyhy_cvesync/main.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,10 @@ async def do_cve_sync(
6060
# Fetch the CVE URLs and put the CVE data into the database
6161
created_cve_docs_count, updated_cve_docs_count, deleted_cve_docs_count = (
6262
await process_urls(
63-
cve_urls, config.cvesync.json_url_gzipped, config.cvesync.url_concurrency
63+
cve_urls,
64+
config.cvesync.json_url_gzipped,
65+
config.cvesync.url_concurrency,
66+
config.cvesync.cve_authoritative_source,
6467
)
6568
)
6669

src/cyhy_cvesync/models/config_model.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,18 @@
66
# Third-Party Libraries
77
from pydantic import BaseModel, ConfigDict, Field
88

9-
from .. import DEFAULT_CVE_URL_PATTERN
9+
from .. import DEFAULT_CVE_AUTHORITATIVE_SOURCE, DEFAULT_CVE_URL_PATTERN
1010

1111

1212
class CVESync(BaseModel):
1313
"""Definition of a CVE Sync configuration."""
1414

1515
model_config = ConfigDict(extra="forbid")
1616

17+
cve_authoritative_source: str = Field(
18+
default=DEFAULT_CVE_AUTHORITATIVE_SOURCE,
19+
description="The authoritative source for CVE data",
20+
)
1721
db_auth_uri: str = Field(
1822
pattern=r"^mongodb://", description="MongoDB connection URI"
1923
)

tests/test_config_model.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,11 @@
55
import pytest
66

77
# cisagov Libraries
8-
from cyhy_cvesync.models.config_model import DEFAULT_CVE_URL_PATTERN, CVESync
8+
from cyhy_cvesync.models.config_model import (
9+
DEFAULT_CVE_AUTHORITATIVE_SOURCE,
10+
DEFAULT_CVE_URL_PATTERN,
11+
CVESync,
12+
)
913

1014

1115
def test_set_json_url_pattern():
@@ -36,6 +40,15 @@ def test_default_url_concurrency():
3640
assert config.url_concurrency == 10
3741

3842

43+
def test_default_cve_authoritative_source():
44+
"""Test the default CVE authoritative source."""
45+
config = CVESync(
46+
db_auth_uri="mongodb://localhost:27017",
47+
db_name="test_db",
48+
)
49+
assert config.cve_authoritative_source == DEFAULT_CVE_AUTHORITATIVE_SOURCE
50+
51+
3952
def test_invalid_db_auth_uri():
4053
"""Test an invalid database authentication URI."""
4154
with pytest.raises(ValidationError):

0 commit comments

Comments
 (0)