Skip to content

Commit c2dc9fc

Browse files
authored
Merge pull request #899 from cisagov/crasm-1939
Adds exceptions and example implementation in vulnScanningSync.py
2 parents 4b838c2 + 5ac10c0 commit c2dc9fc

File tree

3 files changed

+277
-45
lines changed

3 files changed

+277
-45
lines changed

backend/src/xfd_django/xfd_api/api_methods/sync.py

Lines changed: 47 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from django.db import transaction
1616
from fastapi import HTTPException, Request
1717
from xfd_api.tasks.vulnScanningSync import save_organization_to_mdl
18+
from xfd_api.utils.scan_utils.alerting import SyncError
1819
from xfd_mini_dl.models import Organization, Sector
1920

2021
from ..auth import is_global_view_admin
@@ -38,43 +39,58 @@ async def sync_post(sync_body, request: Request, current_user):
3839
raise HTTPException(status_code=500, detail="Checksum doesn't match error.")
3940

4041
# Use MinIO client to save CSV data to S3
41-
s3_client = S3Client()
42-
start_bound, end_bound = parse_cursor(headers.get("x-cursor"))
43-
file_name = generate_s3_filename(start_bound, end_bound)
44-
45-
s3_url = s3_client.save_csv(sync_body.data, file_name)
46-
if not s3_url:
47-
raise HTTPException(status_code=500, detail="No S3 URL.")
48-
49-
parsed_data = json.loads(sync_body.data)
50-
51-
for item in parsed_data:
52-
try:
53-
org = save_organization_to_mdl(
54-
org_dict=item,
55-
network_list=item["cidrs"],
56-
location=item["location"],
57-
db_name="mini_data_lake",
58-
)
59-
60-
if org:
61-
link_parent_organization(
62-
org, item.get("parent"), db_name="mini_data_lake"
63-
)
64-
link_sectors_to_organization(
65-
org, item.get("sectors", []), db_name="mini_data_lake"
66-
)
67-
68-
except Exception as e:
69-
raise HTTPException(status_code=500, detail=str(e))
70-
71-
return SyncResponse(status="success")
42+
try:
43+
process_request(headers, sync_body)
44+
except Exception as e:
45+
raise SyncError(
46+
"VulnScanningSync Endpoint", str(e), "Error syncing VS data"
47+
)
7248

7349
except HTTPException as http_exc:
7450
raise http_exc
7551
except Exception as e:
7652
print(e)
7753
raise HTTPException(status_code=500, detail=str(e))
54+
except SyncError as sync_exc:
55+
raise HTTPException(
56+
status_code=500,
57+
detail=f"SyncError: {sync_exc.message} - {sync_exc.error_message}",
58+
)
59+
60+
61+
def process_request(headers, sync_body):
62+
"""Process the request to save organization data."""
63+
s3_client = S3Client()
64+
start_bound, end_bound = parse_cursor(headers.get("x-cursor"))
65+
file_name = generate_s3_filename(start_bound, end_bound)
66+
67+
s3_url = s3_client.save_csv(sync_body.data, file_name)
68+
if not s3_url:
69+
raise HTTPException(status_code=500, detail="No S3 URL.")
70+
71+
parsed_data = json.loads(sync_body.data)
72+
73+
for item in parsed_data:
74+
try:
75+
org = save_organization_to_mdl(
76+
org_dict=item,
77+
network_list=item["cidrs"],
78+
location=item["location"],
79+
db_name="mini_data_lake",
80+
)
81+
82+
if org:
83+
link_parent_organization(
84+
org, item.get("parent"), db_name="mini_data_lake"
85+
)
86+
link_sectors_to_organization(
87+
org, item.get("sectors", []), db_name="mini_data_lake"
88+
)
89+
90+
except Exception as e:
91+
raise HTTPException(status_code=500, detail=str(e))
92+
93+
return SyncResponse(status="success")
7894

7995

8096
def parse_cursor(cursor_header):

backend/src/xfd_django/xfd_api/tasks/vulnScanningSync.py

Lines changed: 77 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
from xfd_api.utils.chunk import chunk_list_by_bytes
3434
from xfd_api.utils.csv_utils import create_checksum
3535
from xfd_api.utils.hash import hash_ip
36+
from xfd_api.utils.scan_utils.alerting import (
37+
IngestionError,
38+
QueryError,
39+
ScanExecutionError,
40+
SyncError,
41+
)
3642
from xfd_api.utils.scan_utils.vuln_scanning_sync_utils import (
3743
enforce_latest_flag_port_scan,
3844
fetch_orgs_and_relations,
@@ -67,6 +73,7 @@
6773
)
6874
LOGGER = logging.getLogger(__name__)
6975
IS_LOCAL = os.getenv("IS_LOCAL")
76+
SCAN_NAME = "VulnScanningSync"
7077

7178
VS_PULL_DATE_RANGE = os.getenv("VS_PULL_DATE_RANGE", "2")
7279

@@ -89,8 +96,9 @@ def handler(event):
8996
main()
9097
return {"status_code": 200, "body": "VS Sync completed successfully"}
9198
except Exception as e:
92-
LOGGER.info("Error occurred: %s", e)
93-
return {"status_code": 500, "body": str(e)}
99+
raise ScanExecutionError(SCAN_NAME, str(e), event) from e
100+
# LOGGER.info("Error occurred: %s", e)
101+
# return {"status_code": 500, "body": str(e)}
94102

95103

96104
def query_redshift(query, params=None):
@@ -111,6 +119,8 @@ def query_redshift(query, params=None):
111119
cursor.execute(query) # <-- this avoids the IndexError
112120
results = cursor.fetchall()
113121
return [dict(row) for row in results]
122+
except Exception as e:
123+
raise QueryError(SCAN_NAME, str(e)) from e
114124
finally:
115125
cursor.close()
116126
conn.close()
@@ -217,12 +227,33 @@ def main():
217227
chunk_number - 1,
218228
)
219229
LOGGER.info("Finished processing tickets")
220-
create_vuln_scan_summary()
230+
try:
231+
create_vuln_scan_summary()
232+
except Exception as e:
233+
raise QueryError(
234+
SCAN_NAME, str(e), "Error creating vulnerability scan summary"
235+
) from e
221236

222-
create_domain_view("mini_data_lake")
223-
create_service_view("mini_data_lake")
224-
create_vuln_normal_views("mini_data_lake")
225-
create_vuln_materialized_views("mini_data_lake")
237+
try:
238+
create_domain_view("mini_data_lake")
239+
except Exception as e:
240+
raise QueryError(SCAN_NAME, str(e), "Error creating domain view") from e
241+
try:
242+
create_service_view("mini_data_lake")
243+
except Exception as e:
244+
raise QueryError(SCAN_NAME, str(e), "Error creating service view") from e
245+
try:
246+
create_vuln_normal_views("mini_data_lake")
247+
except Exception as e:
248+
raise QueryError(
249+
SCAN_NAME, str(e), "Error creating vulnerability normal views"
250+
) from e
251+
try:
252+
create_vuln_materialized_views("mini_data_lake")
253+
except Exception as e:
254+
raise QueryError(
255+
SCAN_NAME, str(e), "Error creating vulnerability materialized views"
256+
) from e
226257

227258

228259
def detect_data_set(query):
@@ -291,6 +322,7 @@ def send_organizations_to_dmz():
291322
traceback.format_exc(),
292323
)
293324
print(e)
325+
raise SyncError(SCAN_NAME, str(e), "Error sending organizations to dmz") from e
294326

295327

296328
def send_csv_to_sync(csv_data, bounds):
@@ -330,6 +362,10 @@ def send_csv_to_sync(csv_data, bounds):
330362
)
331363
except Exception as e:
332364
LOGGER.error("Unexpected error sending chunk: %s", str(e))
365+
raise SyncError(
366+
SCAN_NAME,
367+
str(e),
368+
) from e
333369

334370

335371
def process_vulnerability_scans(vuln_scans, org_id_dict):
@@ -357,9 +393,14 @@ def process_vulnerability_scans(vuln_scans, org_id_dict):
357393
except Exception as e:
358394
LOGGER.error("Error saving vulnerability scan: %s", e)
359395
print(traceback.format_exc())
396+
# Raise to catch in the outer block
397+
raise e
360398
except Exception as e:
361399
LOGGER.error("Error processing Vulnerability Scan: %s", e)
362400
print(traceback.format_exc())
401+
raise IngestionError(
402+
SCAN_NAME, str(e), "Failed processing vulnerability scans"
403+
) from e
363404

364405

365406
def safe_fromisoformat(date_input) -> datetime.datetime | None:
@@ -518,6 +559,9 @@ def create_daily_host_summary(org_id_dict, summary_date=None):
518559
owner_id,
519560
e,
520561
)
562+
raise QueryError(
563+
SCAN_NAME, str(e), "Error creating daily host summary"
564+
) from e
521565

522566
LOGGER.info("Completed host summary creation from Redshift.")
523567

@@ -581,6 +625,7 @@ def create_port_scan_summary(summary_date=None):
581625

582626
except Exception as e:
583627
print("Error creating port scan summary: {}".format(e))
628+
raise QueryError(SCAN_NAME, str(e), "Error creating port scan summary") from e
584629

585630

586631
def create_port_scan_service_summaries(summary_date=None):
@@ -633,6 +678,9 @@ def create_port_scan_service_summaries(summary_date=None):
633678
)
634679
except Exception as e:
635680
print("Error creating port scan service summary: {}".format(e))
681+
raise QueryError(
682+
SCAN_NAME, str(e), "Error creating port scan service summary"
683+
) from e
636684

637685

638686
def process_tickets(tickets, org_id_dict):
@@ -706,6 +754,7 @@ def process_tickets(tickets, org_id_dict):
706754
print(
707755
f"Error processing ticket data: {e} - {owner_id} - {ticket.get('owner')}"
708756
)
757+
raise IngestionError(SCAN_NAME, str(e), "Failed processing tickets") from e
709758

710759

711760
def get_asset_owned_count(org):
@@ -1034,6 +1083,9 @@ def process_port_scans(port_scans, org_id_dict):
10341083
save_port_scan_to_datalake(port_scan_dict)
10351084
except Exception as e:
10361085
print(f"Error processing port scan data: {e}")
1086+
raise IngestionError(
1087+
SCAN_NAME, str(e), "Failed processing port scans"
1088+
) from e
10371089

10381090

10391091
def process_orgs(request_list):
@@ -1044,16 +1096,23 @@ def process_orgs(request_list):
10441096
parent_child_dict = {}
10451097

10461098
# Process the request data
1047-
if request_list and isinstance(request_list, list):
1048-
process_request(request_list, sector_child_dict, parent_child_dict, org_id_dict)
1099+
try:
1100+
if request_list and isinstance(request_list, list):
1101+
process_request(
1102+
request_list, sector_child_dict, parent_child_dict, org_id_dict
1103+
)
10491104

1050-
# Link parent-child organizations
1051-
link_parent_child_organizations(parent_child_dict, org_id_dict)
1105+
# Link parent-child organizations
1106+
link_parent_child_organizations(parent_child_dict, org_id_dict)
10521107

1053-
# Assign organizations to sectors
1054-
assign_organizations_to_sectors(sector_child_dict, org_id_dict)
1108+
# Assign organizations to sectors
1109+
assign_organizations_to_sectors(sector_child_dict, org_id_dict)
10551110

1056-
return org_id_dict
1111+
return org_id_dict
1112+
except Exception as e:
1113+
raise IngestionError(
1114+
SCAN_NAME, str(e), "Failed processing organizations"
1115+
) from e
10571116

10581117

10591118
def link_parent_child_organizations(
@@ -1108,6 +1167,7 @@ def assign_organizations_to_sectors(
11081167
except Exception as e:
11091168
print("Error assigning organization to sectors:")
11101169
print(e)
1170+
raise e
11111171

11121172

11131173
def process_request(request_list, sector_child_dict, parent_child_dict, org_id_dict):
@@ -1264,6 +1324,9 @@ def process_organization(request, network_list, location_dict, org_id_dict):
12641324
org_id_dict[request["_id"]] = org_record.id
12651325
except Exception as e:
12661326
LOGGER.info("Error saving organization: %s - %s", e, request["_id"])
1327+
raise IngestionError(
1328+
SCAN_NAME, str(e), "Failed processing organizations"
1329+
) from e
12671330

12681331

12691332
if __name__ == "__main__":

0 commit comments

Comments
 (0)