2020ALLOWED_URL_SCHEMES = ["http" , "https" ]
2121CVE_URL_RETRY_WAIT_SEC = 5
2222MAX_CVE_URL_RETRIES = 10
23+ # Preferred CVSS metrics listed in order of preference
24+ PREFERRED_CVSS_METRICS = ["cvssMetricV31" , "cvssMetricV30" , "cvssMetricV2" ]
2325
2426# Map to track existing CVE documents that were not updated
2527cve_map : Dict [str , CVEDoc ] = {}
2830logger = logging .getLogger (f"{ CYHY_ROOT_LOGGER } .{ __name__ } " )
2931
3032
31- async def process_cve_json (cve_json : dict ) -> Tuple [int , int ]:
33+ async def process_cve_json (
34+ cve_json : dict , cve_authoritative_source : str
35+ ) -> Tuple [int , int ]:
3236 """
3337 Process the provided CVEs JSON and update the database with their contents.
3438
3539 Args:
3640 cve_json (dict): The JSON data containing information about CVEs.
41+ cve_authoritative_source (str): The authoritative source for CVE data.
3742
3843 Returns:
3944 Tuple[int, int]: A tuple containing the counts of created and updated
@@ -45,19 +50,21 @@ async def process_cve_json(cve_json: dict) -> Tuple[int, int]:
4550 created_cve_docs_count = 0
4651 updated_cve_docs_count = 0
4752
48- if cve_json .get ("CVE_data_type " ) != "CVE " :
53+ if cve_json .get ("format " ) != "NVD_CVE " :
4954 raise ValueError ("JSON does not look like valid CVE data." )
5055
51- cve_items = cve_json .get ("CVE_Items " , [])
56+ cve_items = cve_json .get ("vulnerabilities " , [])
5257
5358 logger .info (
5459 "Async task %d: Starting to process %d CVEs" ,
5560 id (asyncio .current_task ()),
5661 len (cve_items ),
5762 )
63+ # Create a set of preferred CVSS metrics for quick lookup
64+ preferred_cvss_metrics_set = set (PREFERRED_CVSS_METRICS )
5865 for cve in cve_items :
5966 try :
60- cve_id = cve ["cve" ]["CVE_data_meta" ][ "ID " ]
67+ cve_id = cve ["cve" ]["id " ]
6168 except KeyError :
6269 # JSON might be malformed, so we'll log what the CVE object looks like
6370 # and then raise an error
@@ -67,21 +74,36 @@ async def process_cve_json(cve_json: dict) -> Tuple[int, int]:
6774 if not cve_id :
6875 raise ValueError ("CVE ID is empty." )
6976
70- # Only process CVEs that have CVSS V2 or V3 data
71- if any (k in cve ["impact" ] for k in ["baseMetricV2" , "baseMetricV3" ]):
77+ # Only process CVEs that have our preferred CVSS metrics
78+ metrics = cve .get ("cve" , {}).get ("metrics" , {}).keys ()
79+ if metrics & preferred_cvss_metrics_set :
7280 # Check if the CVE document already exists in the database
7381 global cve_map
7482 async with cve_map_lock :
7583 cve_doc = cve_map .pop (cve_id , None )
7684
77- version = "V3" if "baseMetricV3" in cve ["impact" ] else "V2"
85+ # Grab newest CVSS metrics from the authoritative source
86+ cvss_base_score = None
87+ cvss_version_temp = None
7888 try :
79- cvss_base_score = cve ["impact" ]["baseMetric" + version ][
80- "cvss" + version
81- ]["baseScore" ]
82- cvss_version_temp = cve ["impact" ]["baseMetric" + version ][
83- "cvss" + version
84- ]["version" ]
89+ for v in PREFERRED_CVSS_METRICS :
90+ if v in cve ["cve" ].get ("metrics" , {}):
91+ for metric in cve ["cve" ]["metrics" ][v ]:
92+ if metric .get ("source" ) == cve_authoritative_source :
93+ cvss_base_score = metric ["cvssData" ]["baseScore" ]
94+ cvss_version_temp = metric ["cvssData" ]["version" ]
95+ break
96+ if cvss_base_score is not None :
97+ # Break out of outer loop
98+ break
99+
100+ if cvss_base_score is None or cvss_version_temp is None :
101+ logger .debug (
102+ "Skipping %s; no preferred CVSS metrics found from authoritative source (%s)." ,
103+ cve_id ,
104+ cve_authoritative_source ,
105+ )
106+ continue
85107 except KeyError :
86108 logger .error ("CVE object: %s" , cve )
87109 raise ValueError ("JSON does not look like valid CVE data." )
@@ -168,6 +190,7 @@ async def process_urls(
168190 cve_urls : List [str ],
169191 cve_data_gzipped : bool ,
170192 concurrency : int ,
193+ cve_authoritative_source : str ,
171194) -> Tuple [int , int , int ]:
172195 """
173196 Process URLs containing CVE data.
@@ -180,6 +203,7 @@ async def process_urls(
180203 cve_urls (List[str]): A list of URLs containing CVE data.
181204 cve_data_gzipped (bool): A flag indicating whether the CVE data is gzipped.
182205 concurrency (int): The number of concurrent URL requests to make and process.
206+ cve_authoritative_source (str): The authoritative source for CVE data.
183207
184208 Returns:
185209 Tuple[int, int, int]: A tuple containing the counts of created, updated,
@@ -201,7 +225,9 @@ async def process_single_url(
201225 async with semaphore :
202226 logging .info ("Processing URL: %s" , cve_url )
203227 cve_json = await fetch_cve_data (session , cve_url , cve_data_gzipped )
204- created_count , updated_count = await process_cve_json (cve_json )
228+ created_count , updated_count = await process_cve_json (
229+ cve_json , cve_authoritative_source
230+ )
205231 async with cve_docs_count_lock :
206232 created_cve_docs_count += created_count
207233 updated_cve_docs_count += updated_count
0 commit comments