1- # https://www.rapid7.com/db/
1+ import httpx
2+ from bs4 import BeautifulSoup
3+ from typing import List
4+ from dateutil import parser as dateutil_parser
5+ import re
6+ from concurrent .futures import ThreadPoolExecutor , as_completed
7+
8+ from models .vulnerability import Vulnerability
9+ from services .api .source import Source
10+ from services .vulnerabilities .factories .vulnerability_factory import VulnerabilityFactory , DEFAULT_VALUES
11+
12+
13+ class RAPID7 (Source ):
14+ def __init__ (self ):
15+ self .base_url = "https://www.rapid7.com"
16+ self .search_url = f"{ self .base_url } /db/"
17+ self .session = httpx .Client ()
18+
19+ def search (self , keywords : List [str ], max_results = 100 ) -> List [Vulnerability ]:
20+ vulnerabilities = []
21+ if not max_results :
22+ max_results = 100
23+
24+ search_query = "+" .join (keywords )
25+ page = 1
26+ results_count = 0
27+
28+ while results_count < max_results :
29+ url = f"{ self .search_url } ?q={ search_query } &type=nexpose&page={ page } "
30+ response = self .session .get (url )
31+
32+ if response .status_code != 200 :
33+ break
34+
35+ soup = BeautifulSoup (response .text , 'html.parser' )
36+ results_section = soup .find ('section' , class_ = 'vulndb__results' )
37+ if not results_section :
38+ break
39+
40+ result_links = results_section .find_all ('a' , class_ = 'vulndb__result resultblock' )
41+
42+ if not result_links :
43+ break
44+
45+ with ThreadPoolExecutor (max_workers = 10 ) as executor :
46+ futures = []
47+ for result_link in result_links :
48+ if results_count >= max_results :
49+ break
50+
51+ future = executor .submit (self .process_vulnerability_link , result_link )
52+ futures .append (future )
53+
54+ for future in as_completed (futures ):
55+ result = future .result ()
56+ if result :
57+ vulnerabilities .append (result )
58+ results_count += 1
59+ if results_count >= max_results :
60+ break
61+
62+ pagination = soup .find ('ul' , class_ = 'pagination' )
63+ next_page = pagination .find ('a' , text = str (page + 1 )) if pagination else None
64+ if next_page :
65+ page += 1
66+ else :
67+ break
68+
69+ self .session .close ()
70+ return vulnerabilities
71+
72+ def process_vulnerability_link (self , result_link ):
73+ try :
74+ title = result_link .find ('div' , class_ = 'resultblock__info-title' ).text .strip ()
75+ href = result_link ['href' ]
76+ detail_url = f"{ self .base_url } { href } "
77+
78+ cve_id = self .extract_cve_id_from_title (title )
79+ if not cve_id :
80+ return None
81+
82+ meta_info = result_link .find ('div' , class_ = 'resultblock__info-meta' ).text .strip ()
83+ published_date = DEFAULT_VALUES ['date' ]
84+ base_score = DEFAULT_VALUES ['base_score' ]
85+
86+ if "Published:" in meta_info :
87+ date_part = meta_info .split ("Published:" )[1 ].split ("|" )[0 ].strip ()
88+ published_date = dateutil_parser .parse (date_part ).strftime ('%Y-%m-%d' )
89+
90+ if "Severity:" in meta_info :
91+ score_part = meta_info .split ("Severity:" )[1 ].strip ()
92+ try :
93+ base_score = float (score_part )
94+ except ValueError :
95+ pass
96+
97+ base_severity = self .calculate_severity_from_score (base_score )
98+
99+ detail_response = self .session .get (detail_url )
100+ if detail_response .status_code != 200 :
101+ return None
102+
103+ detail_soup = BeautifulSoup (detail_response .text , 'html.parser' )
104+
105+ description_div = detail_soup .find ('div' , class_ = 'vulndb__detail-content bottom-border' )
106+ description = ""
107+ if description_div :
108+ description_paragraphs = description_div .find_all ('p' )
109+ description = " " .join (p .text .strip () for p in description_paragraphs if p .text .strip ())
110+
111+ components_section = detail_soup .find ('section' , class_ = 'vulndb__references bottom-border' )
112+ vulnerable_components = []
113+ if components_section :
114+ components_list = components_section .find_all ('li' )
115+ vulnerable_components = [li .text .strip () for li in components_list ]
116+
117+ references_div = detail_soup .find ('div' , class_ = 'vulndb__related-content' )
118+ reference_urls = set ()
119+ if references_div :
120+ reference_links = references_div .find_all ('a' , href = True )
121+ reference_urls = {link ['href' ] for link in reference_links }
122+
123+ vulnerability = VulnerabilityFactory .make (
124+ id = cve_id ,
125+ source = self .__class__ .__name__ ,
126+ url = detail_url ,
127+ date = published_date ,
128+ title = title ,
129+ reference_urls = list (reference_urls ),
130+ base_score = str (base_score ),
131+ base_severity = base_severity ,
132+ description = description ,
133+ vulnerable_components = vulnerable_components ,
134+ weaknesses = [],
135+ )
136+
137+ return vulnerability
138+ except Exception as e :
139+ return None
140+
141+ @staticmethod
142+ def extract_cve_id_from_title (title : str ) -> str :
143+ match = re .search (r'CVE-\d{4}-\d{4,7}' , title , re .IGNORECASE )
144+ return match .group (0 ) if match else None
145+
146+ @staticmethod
147+ def calculate_severity_from_score (score : float ) -> str :
148+ if score >= 9.0 :
149+ return "Critical"
150+ elif score >= 7.0 :
151+ return "High"
152+ elif score >= 5.0 :
153+ return "Medium"
154+ else :
155+ return "Low"
0 commit comments