Skip to content
This repository was archived by the owner on Nov 12, 2025. It is now read-only.

Commit 4c86765

Browse files
Merge pull request #23 from krystianbajno/feature/trickest-cve
feature/trickest-cve
2 parents de848fe + a0269fd commit 4c86765

File tree

4 files changed

+274
-146
lines changed

4 files changed

+274
-146
lines changed
Lines changed: 121 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -1,76 +1,143 @@
11
import httpx
22
from typing import Dict
3-
3+
import re
44

55
def github_fetch_cve_data(cve: str) -> Dict:
66
year = cve.split('-')[1]
7-
url = f"https://raw.githubusercontent.com/trickest/cve/refs/heads/main/{year}/{cve}.md"
7+
url = f"https://raw.githubusercontent.com/trickest/cve/main/{year}/{cve}.md"
88

99
data = {
10+
'cve_id': cve,
11+
'cve_link': None,
1012
'github_related_urls': [],
1113
'github_description': None,
12-
'github_product': None,
13-
'github_version': None,
14-
'github_cwe': None,
14+
'github_products': [],
15+
'github_versions': [],
16+
'github_cwe': [],
17+
'github_vulnerability_type': [],
1518
'github_pocs': [],
1619
'github_mitigation': None,
17-
'github_tags': []
20+
'github_tags': [],
21+
'github_references': [],
1822
}
1923

2024
try:
2125
response = httpx.get(url, timeout=15)
2226
if response.status_code == 200:
23-
content = response.text
24-
lines = content.splitlines()
25-
26-
description = []
27-
in_description = False
28-
29-
for line in lines:
30-
if line.startswith('- https://github.com/'):
31-
data['github_related_urls'].append(line.strip("- "))
32-
33-
elif line.startswith('### Description'):
34-
in_description = True
35-
36-
elif in_description:
37-
if line.startswith('###') and not line.startswith('### Description'):
38-
in_description = False
27+
cve_content = response.text
28+
else:
29+
print(f"[!] CVE {cve} not found on GitHub.")
30+
return data
31+
32+
lines = cve_content.splitlines()
33+
description_lines = []
34+
mitigation_lines = []
35+
in_description = False
36+
current_section = None
37+
38+
for idx, line in enumerate(lines):
39+
line = line.strip()
40+
41+
if line.startswith('### [' + cve + ']('):
42+
match = re.search(r'\((.*?)\)', line)
43+
if match:
44+
data['cve_link'] = match.group(1)
45+
46+
elif line.startswith('![](https://img.shields.io/static/v1?label='):
47+
badge_info = extract_badge_info(line)
48+
label = badge_info['label']
49+
message = badge_info['message']
50+
if message.lower() == 'n/a':
51+
continue
52+
if label == 'Product':
53+
data['github_products'].append(message)
54+
elif label == 'Version':
55+
data['github_versions'].append(message)
56+
elif label == 'Vulnerability':
57+
if 'CWE-' in message:
58+
cwe_list = [msg.strip() for msg in re.split(',|;', message)]
59+
data['github_cwe'].extend(cwe_list)
3960
else:
40-
description.append(line.strip())
41-
42-
elif line.startswith('![](https://img.shields.io/static/v1?label=Product'):
43-
product_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
44-
data['github_product'] = product_info.strip()
45-
46-
elif line.startswith('![](https://img.shields.io/static/v1?label=Version'):
47-
version_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
48-
data['github_version'] = version_info.strip()
49-
50-
elif 'label=Vulnerability' in line and 'CWE-' in line:
51-
cwe_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
52-
data['github_cwe'] = cwe_info.strip()
53-
54-
elif line.lower().startswith('#### github'):
55-
pocs = []
56-
poc_index = lines.index(line) + 1
57-
while poc_index < len(lines) and lines[poc_index].startswith('- '):
58-
poc_link = lines[poc_index].split('- ')[1].strip()
59-
pocs.append(poc_link)
60-
poc_index += 1
61-
data['github_pocs'].extend(pocs)
62-
63-
elif 'mitigates this vulnerability' in line.lower():
64-
mitigation_index = lines.index(line)
65-
mitigation_info = []
66-
while mitigation_index < len(lines) and not lines[mitigation_index].startswith('###'):
67-
mitigation_info.append(lines[mitigation_index].strip())
68-
mitigation_index += 1
69-
data['github_mitigation'] = ' '.join(mitigation_info)
70-
71-
data['github_description'] = ' '.join(description) if description else None
61+
data['github_vulnerability_type'].append(message)
62+
elif label == 'CWE':
63+
cwe_list = [msg.strip() for msg in re.split(',|;', message)]
64+
data['github_cwe'].extend(cwe_list)
65+
66+
elif line.startswith('### Description'):
67+
in_description = True
68+
current_section = 'description'
69+
70+
elif line.startswith('### Mitigation') or line.startswith('### Workarounds'):
71+
current_section = 'mitigation'
72+
in_description = False
73+
74+
elif line.startswith('### POC'):
75+
current_section = 'poc'
76+
in_description = False
77+
78+
elif line.startswith('#### Reference'):
79+
current_section = 'reference'
80+
81+
elif line.startswith('#### Github'):
82+
current_section = 'github'
83+
84+
elif line.startswith('###') and not line.startswith('### Description') and not line.startswith('### Mitigation') and not line.startswith('### Workarounds'):
85+
in_description = False
86+
current_section = None
87+
88+
else:
89+
if in_description:
90+
description_lines.append(line)
91+
elif current_section == 'reference':
92+
if line.startswith('- '):
93+
ref_url = line[2:].strip()
94+
if ref_url.lower().startswith('no pocs from references'):
95+
continue # Skip this line
96+
data['github_references'].append(ref_url)
97+
elif current_section == 'github':
98+
if line.startswith('- '):
99+
github_url = line[2:].strip()
100+
data['github_related_urls'].append(github_url)
101+
elif current_section == 'poc':
102+
if line.startswith('- '):
103+
poc_url = line[2:].strip()
104+
data['github_pocs'].append(poc_url)
105+
elif current_section == 'mitigation':
106+
mitigation_lines.append(line)
107+
108+
if description_lines:
109+
data['github_description'] = ' '.join(description_lines)
110+
111+
if mitigation_lines:
112+
data['github_mitigation'] = ' '.join(mitigation_lines)
113+
114+
# Remove duplicates in lists
115+
data['github_products'] = list(set(data['github_products']))
116+
data['github_versions'] = list(set(data['github_versions']))
117+
data['github_cwe'] = list(set(data['github_cwe']))
118+
data['github_vulnerability_type'] = list(set(data['github_vulnerability_type']))
119+
data['github_related_urls'] = list(set(data['github_related_urls']))
120+
data['github_references'] = list(set(data['github_references']))
121+
data['github_pocs'] = list(set(data['github_pocs']))
72122

73123
except Exception as e:
74124
print(f"[!] Error fetching data for CVE {cve} from GitHub: {e}")
75125

76126
return data
127+
128+
def extract_badge_info(line: str) -> Dict[str, str]:
129+
"""
130+
Extracts the label and message from a Shields.io badge in markdown format.
131+
"""
132+
match = re.search(r'label=(.*?)&message=(.*?)&', line)
133+
if match:
134+
label = match.group(1).replace('%20', ' ').strip()
135+
message = match.group(2).replace('%20', ' ').strip()
136+
return {'label': label, 'message': message}
137+
else:
138+
parts = line.split('label=')[-1].split('&message=')
139+
if len(parts) == 2:
140+
label = parts[0].replace('%20', ' ').strip()
141+
message = parts[1].split('&')[0].replace('%20', ' ').strip()
142+
return {'label': label, 'message': message}
143+
return {'label': '', 'message': ''}
Lines changed: 112 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -1,80 +1,139 @@
11
from typing import Dict
22
from services.cache.cache_manager import CacheManager
3+
import re
34

45
def github_fetch_cve_data_cached(cve: str, cache_manager: CacheManager) -> Dict:
5-
cache_manager.wait_for_data('github_cached')
6+
cache_manager.wait_for_data('trickest_cve_github_cached')
67

7-
cve_data_cache = cache_manager.get_data('github_cached')
8+
cve_data_cache = cache_manager.get_data('trickest_cve_github_cached')
89

910
if not cve_data_cache:
1011
print("[!] GitHub CVE data is unavailable.")
1112
return {}
12-
13+
1314
data = {
15+
'cve_id': cve,
16+
'cve_link': None,
1417
'github_related_urls': [],
1518
'github_description': None,
16-
'github_product': None,
17-
'github_version': None,
18-
'github_cwe': None,
19+
'github_products': [],
20+
'github_versions': [],
21+
'github_cwe': [],
22+
'github_vulnerability_type': [],
1923
'github_pocs': [],
2024
'github_mitigation': None,
21-
'github_tags': []
25+
'github_tags': [],
26+
'github_references': [],
2227
}
23-
28+
2429
cve_content = cve_data_cache.get(cve)
2530
if not cve_content:
2631
return data
27-
32+
2833
try:
2934
lines = cve_content.splitlines()
30-
description = []
35+
description_lines = []
36+
mitigation_lines = []
3137
in_description = False
32-
38+
current_section = None
39+
3340
for idx, line in enumerate(lines):
34-
if line.startswith('- https://github.com/'):
35-
data['github_related_urls'].append(line.strip("- "))
36-
41+
line = line.strip()
42+
43+
if line.startswith('### [' + cve + ']('):
44+
match = re.search(r'\((.*?)\)', line)
45+
if match:
46+
data['cve_link'] = match.group(1)
47+
48+
elif line.startswith('![](https://img.shields.io/static/v1?label='):
49+
badge_info = extract_badge_info(line)
50+
label = badge_info['label']
51+
message = badge_info['message']
52+
if message.lower() == 'n/a':
53+
continue # Skip 'n/a' values
54+
if label == 'Product':
55+
data['github_products'].append(message)
56+
elif label == 'Version':
57+
data['github_versions'].append(message)
58+
elif label == 'Vulnerability':
59+
if 'CWE-' in message:
60+
data['github_cwe'].append(message)
61+
else:
62+
data['github_vulnerability_type'].append(message)
63+
elif label == 'CWE':
64+
data['github_cwe'].append(message)
65+
3766
elif line.startswith('### Description'):
3867
in_description = True
39-
40-
elif in_description:
41-
if line.startswith('###') and not line.startswith('### Description'):
42-
in_description = False
43-
else:
44-
description.append(line.strip())
45-
46-
elif line.startswith('![](https://img.shields.io/static/v1?label=Product'):
47-
product_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
48-
data['github_product'] = product_info.strip()
49-
50-
elif line.startswith('![](https://img.shields.io/static/v1?label=Version'):
51-
version_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
52-
data['github_version'] = version_info.strip()
53-
54-
elif 'label=Vulnerability' in line and 'CWE-' in line:
55-
cwe_info = line.split('message=')[-1].split('&')[0].replace('%20', ' ')
56-
data['github_cwe'] = cwe_info.strip()
57-
58-
elif line.lower().startswith('#### github'):
59-
pocs = []
60-
poc_index = idx + 1
61-
while poc_index < len(lines) and lines[poc_index].startswith('- '):
62-
poc_link = lines[poc_index].split('- ')[1].strip()
63-
pocs.append(poc_link)
64-
poc_index += 1
65-
data['github_pocs'].extend(pocs)
66-
67-
elif 'mitigates this vulnerability' in line.lower():
68-
mitigation_info = []
69-
mitigation_index = idx
70-
while mitigation_index < len(lines) and not lines[mitigation_index].startswith('###'):
71-
mitigation_info.append(lines[mitigation_index].strip())
72-
mitigation_index += 1
73-
data['github_mitigation'] = ' '.join(mitigation_info)
74-
75-
data['github_description'] = ' '.join(description) if description else None
76-
68+
current_section = 'description'
69+
70+
elif line.startswith('### Mitigation') or line.startswith('### Workarounds'):
71+
current_section = 'mitigation'
72+
in_description = False
73+
74+
elif line.startswith('### POC'):
75+
current_section = 'poc'
76+
in_description = False
77+
78+
elif line.startswith('#### Reference'):
79+
current_section = 'reference'
80+
81+
elif line.startswith('#### Github'):
82+
current_section = 'github'
83+
84+
elif line.startswith('###') and not line.startswith('### Description') and not line.startswith('### Mitigation') and not line.startswith('### Workarounds'):
85+
in_description = False
86+
current_section = None
87+
88+
else:
89+
if in_description:
90+
description_lines.append(line)
91+
elif current_section == 'reference':
92+
if line.startswith('- '):
93+
ref_url = line[2:].strip()
94+
if ref_url.lower().startswith('no pocs from references'):
95+
continue # Skip this line
96+
data['github_references'].append(ref_url)
97+
elif current_section == 'github':
98+
if line.startswith('- '):
99+
github_url = line[2:].strip()
100+
data['github_related_urls'].append(github_url)
101+
elif current_section == 'poc':
102+
if line.startswith('- '):
103+
poc_url = line[2:].strip()
104+
data['github_pocs'].append(poc_url)
105+
elif current_section == 'mitigation':
106+
mitigation_lines.append(line)
107+
108+
if description_lines:
109+
data['github_description'] = ' '.join(description_lines)
110+
111+
if mitigation_lines:
112+
data['github_mitigation'] = ' '.join(mitigation_lines)
113+
114+
data['github_products'] = list(set(data['github_products']))
115+
data['github_versions'] = list(set(data['github_versions']))
116+
data['github_cwe'] = list(set(data['github_cwe']))
117+
data['github_vulnerability_type'] = list(set(data['github_vulnerability_type']))
118+
data['github_related_urls'] = list(set(data['github_related_urls']))
119+
data['github_references'] = list(set(data['github_references']))
120+
data['github_pocs'] = list(set(data['github_pocs']))
121+
77122
except Exception as e:
78123
print(f"[!] Error parsing data for CVE {cve}: {e}")
79-
124+
80125
return data
126+
127+
def extract_badge_info(line: str) -> Dict[str, str]:
128+
match = re.search(r'label=(.*?)&message=(.*?)&', line)
129+
if match:
130+
label = match.group(1).replace('%20', ' ').strip()
131+
message = match.group(2).replace('%20', ' ').strip()
132+
return {'label': label, 'message': message}
133+
else:
134+
parts = line.split('label=')[-1].split('&message=')
135+
if len(parts) == 2:
136+
label = parts[0].replace('%20', ' ').strip()
137+
message = parts[1].split('&')[0].replace('%20', ' ').strip()
138+
return {'label': label, 'message': message}
139+
return {'label': '', 'message': ''}

0 commit comments

Comments
 (0)