-
Notifications
You must be signed in to change notification settings - Fork 14
/
Copy pathgrcAssist.py
112 lines (93 loc) · 4.18 KB
/
grcAssist.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# grcAssist.py
# GRC Relevant News Assist
#This program is designed to pull relevant current news articles for keywords defined in a keywords.csv fie.
#GRC professionals can use this to build a bank of quic-to-access relevant cyber news stories or for a Just-in-Time news story to educate end users.
#The python file is called grcAssist.py.
#The keywords.csv file should have keyword(s) per row.
#Multiple keywords should be placed on same row with a %20 separating keywords
#e.g. cybersecurity OR cybersecurity%20healthcare
#IF you have row 1 say cybersecurity and row 2 say healthcare, the script will run two separate queries, one for cybersecurity and one for healthcare. This will likely result in less helpful stories.
#The grcdata.csv file is the output file that the script appends to. It has 5 columns. date,keyword,title,desc,url
#all 3 files should be in the same directory.
#run "python grcAssist.py"
#Python 3
#You need to register a free tier API key from newsdata.io.
#You get 200 API pulls a day and each story counts as 10 pulls (per newsdata.io site)
#I'd suggest checking API terms when creating your free API key.
# https://newsdata.io/
# Replace your api key in script with the one issued you by Newsdata.io
#Gerald Auger, 7/22/24, SimplyCyber.io
import csv
import requests
import datetime
from openpyxl import Workbook
import urllib.parse
def search_news(keyword, api_key, category="technology", language="en"):
"""
Searches NewsData.io API for a keyword and returns relevant articles.
Args:
keyword: The keyword to search for (e.g., "cybersecurity").
api_key: Your NewsData.io API key.
category: Optional category to filter results (defaults to "technology").
language: Optional language parameter (defaults to "en").
Returns:
A list of dictionaries, each containing headline, description, and url.
"""
url = f"https://newsdata.io/api/1/news?apikey={api_key}&q={keyword}&language={language}&category={category}"
response = requests.get(url)
print(url)
try:
data = response.json()
articles = []
if data["status"] == "success":
for article in data["results"]:
articles.append({
"headline": article["title"],
"description": article["description"],
"url": article["link"],
})
# Write articles to spreadsheet
filename = "grcdata.csv"
write_to_spreadsheet(articles, filename, keyword)
return articles
except (requests.exceptions.RequestException, KeyError):
print(f"Error: An error occurred while fetching data from the API.")
return []
def write_to_spreadsheet(articles, filename, keyword):
"""
Writes a list of articles to a spreadsheet file.
Args:
articles: A list of dictionaries containing article data.
filename: The filename for the spreadsheet.
"""
today = datetime.date.today().strftime("%Y-%m-%d")
with open(filename, 'a', newline='', encoding='utf-8') as csvfile:
writer = csv.writer(csvfile)
for article in articles:
writer.writerow([today, keyword, article["headline"], article["description"], article["url"]])
def main():
"""
Reads keywords from a CSV file or user input and searches for cybersecurity news.
"""
# Get keywords (modify to read from CSV or get user input)
keywords_file = "keywords.csv" # Replace with your filename
keywords = []
with open(keywords_file, 'r') as file:
reader = csv.reader(file)
for row in reader:
keywords.append(urllib.parse.unquote(row[0])) #URL-decode each keyword
# Alternatively, get keywords from user input
# keywords = input("Enter keywords separated by commas: ").split(",")
api_key = "YOUR_API_KEY_HERE" # Replace with your actual NewsData.io API key
for keyword in keywords:
articles = search_news(keyword.strip(), api_key)
if articles:
print(f"\nSearch results for '{keyword}':")
for article in articles:
print(f"\t- {article['headline']}")
print(f"\t\t{article['description']}")
print(f"\t\t{article['url']}\n")
else:
print(f"No articles found for '{keyword}'.")
if __name__ == "__main__":
main()