Skip to content

Commit c49bdcc

Browse files
ablakley-r7rbowden-r7
authored andcommitted
[SOAR-18657] mimecast v2 (#3068)
* Update threads, error handling, custom config, rate limiting * Add connection test * Fix lint * Add unit tests * Add unit tests * Fix requiremnts * Fix requiremnts
1 parent e45b9db commit c49bdcc

13 files changed

+541
-66
lines changed

plugins/mimecast_v2/icon_mimecast_v2/connection/connection.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from icon_mimecast_v2.util.api import API
55

66
# Custom imports below
7+
from datetime import datetime, timezone
78

89

910
class Connection(insightconnect_plugin_runtime.Connection):
@@ -23,3 +24,27 @@ def test(self):
2324
return {"success": True}
2425
except PluginException as error:
2526
raise ConnectionTestException(cause=error.cause, assistance=error.assistance, data=error.data)
27+
28+
def test_task(self):
29+
try:
30+
now_date = datetime.now(tz=timezone.utc).date()
31+
self.api.get_siem_logs(log_type="receipt", query_date=now_date, page_size=1, max_threads=1, next_page=None)
32+
self.logger.info("The connection test to Mimecast was successful.")
33+
return {"success": True}
34+
except PluginException as error:
35+
return_message = ""
36+
failed_message = "The connection test to Mimecast for has failed."
37+
self.logger.info(failed_message)
38+
return_message += f"{failed_message}\n"
39+
40+
cause_message = f"This failure was caused by: '{error.cause}'"
41+
self.logger.info(cause_message)
42+
return_message += f"{cause_message}\n"
43+
44+
self.logger.info(error.assistance)
45+
return_message += f"{error.assistance}\n"
46+
raise ConnectionTestException(
47+
cause="Configured credentials do not have permission for this API endpoint.",
48+
assistance="Please ensure credentials have required permissions.",
49+
data=return_message,
50+
)
Lines changed: 85 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,32 @@
11
import insightconnect_plugin_runtime
22
from insightconnect_plugin_runtime.exceptions import APIException, PluginException
3+
from insightconnect_plugin_runtime.helper import compare_and_dedupe_hashes, hash_sha1
34
from .schema import MonitorSiemLogsInput, MonitorSiemLogsOutput, MonitorSiemLogsState, Input, Output, Component, State
45
from typing import Dict, List, Tuple
56
from datetime import datetime, timezone, timedelta
67
import copy
78

9+
# Date format for conversion
10+
DATE_FORMAT = "%Y-%m-%d"
11+
# Default and max values
812
LOG_TYPES = ["receipt", "url protect", "attachment protect"]
13+
DEFAULT_THREAD_COUNT = 10
14+
DEFAULT_PAGE_SIZE = 100
915
MAX_LOOKBACK_DAYS = 7
1016
INITIAL_MAX_LOOKBACK_DAYS = 1
17+
# Run type
1118
INITIAL_RUN = "initial_run"
1219
SUBSEQUENT_RUN = "subsequent_run"
1320
PAGINATION_RUN = "pagination_run"
21+
# Access keys for state and custom config
22+
LOG_HASHES = "log_hashes"
23+
QUERY_CONFIG = "query_config"
24+
QUERY_DATE = "query_date"
25+
CAUGHT_UP = "caught_up"
26+
NEXT_PAGE = "next_page"
27+
# Access keys for custom config
28+
THREAD_COUNT = "thread_count"
29+
PAGE_SIZE = "page_size"
1430

1531

1632
class MonitorSiemLogs(insightconnect_plugin_runtime.Task):
@@ -24,21 +40,21 @@ def __init__(self):
2440
)
2541

2642
def run(self, params={}, state={}, custom_config={}): # pylint: disable=unused-argument
27-
self.logger.info(f"TASK: Received State: {state}")
43+
self.logger.info(f"TASK: Received State: {state.get(QUERY_CONFIG)}")
2844
existing_state = state.copy()
2945
try:
30-
# TODO: Additional error handling
31-
run_condition = self.detect_run_condition(state.get("query_config", {}))
32-
self.logger.info(f"TASK: Current run state is {run_condition}")
33-
state = self.update_state(state, custom_config)
34-
self.logger.info(f"NEW STATE: {state}")
3546
now_date = datetime.now(tz=timezone.utc).date()
47+
run_condition = self.detect_run_condition(state.get(QUERY_CONFIG, {}), now_date)
48+
self.logger.info(f"TASK: Run state is {run_condition}")
49+
state = self.update_state(state)
50+
page_size, thead_count = self.apply_custom_config(state, custom_config)
3651
max_run_lookback_date = self.get_max_lookback_date(now_date, run_condition, bool(custom_config))
37-
query_config = self.prepare_query_params(state.get("query_config", {}), max_run_lookback_date, now_date)
38-
logs, query_config = self.get_all_logs(run_condition, query_config)
39-
# TODO: Dedupe
52+
query_config = self.prepare_query_params(state.get(QUERY_CONFIG, {}), max_run_lookback_date, now_date)
53+
logs, query_config = self.get_all_logs(run_condition, query_config, page_size, thead_count)
4054
self.logger.info(f"TASK: Total logs collected this run {len(logs)}")
41-
exit_state, has_more_pages = self.prepare_exit_state(state, query_config, now_date)
55+
logs, log_hashes = compare_and_dedupe_hashes(state.get(LOG_HASHES, []), logs)
56+
self.logger.info(f"TASK: Total logs after deduplication {len(logs)}")
57+
exit_state, has_more_pages = self.prepare_exit_state(state, query_config, now_date, log_hashes)
4258
return logs, exit_state, has_more_pages, 200, None
4359
except APIException as error:
4460
self.logger.info(
@@ -47,39 +63,40 @@ def run(self, params={}, state={}, custom_config={}): # pylint: disable=unused-
4763
return [], existing_state, False, error.status_code, error
4864
except PluginException as error:
4965
self.logger.info(f"Error: A Plugin exception has occurred. Cause: {error.cause} Error data: {error.data}.")
50-
return [], existing_state, False, error.status_code, error
66+
return [], existing_state, False, 500, error
5167
except Exception as error:
5268
self.logger.info(f"Error: Unknown exception has occurred. No results returned. Error Data: {error}")
5369
return [], existing_state, False, 500, PluginException(preset=PluginException.Preset.UNKNOWN, data=error)
5470

55-
def detect_run_condition(self, query_config: Dict) -> str:
71+
def detect_run_condition(self, query_config: Dict, now_date: datetime) -> str:
5672
"""
5773
Return runtype based on query configuration
5874
:param query_config:
75+
:param now_date:
5976
:return: runtype string
6077
"""
6178
if not query_config:
6279
return INITIAL_RUN
6380
for log_type_config in query_config.values():
64-
if not log_type_config.get("caught_up"):
81+
if not log_type_config.get(CAUGHT_UP) or log_type_config.get(QUERY_DATE) not in str(now_date):
6582
return PAGINATION_RUN
6683
return SUBSEQUENT_RUN
6784

68-
def update_state(self, state: Dict, custom_config: Dict) -> Dict:
85+
def update_state(self, state: Dict) -> Dict:
6986
"""
7087
Initialise state, validate state, apply custom config
7188
:param state:
72-
:param custom_config:
73-
:return:
89+
:return: State
7490
"""
75-
initial_log_type_config = {"caught_up": False}
91+
initial_log_type_config = {CAUGHT_UP: False}
7692
if not state:
77-
state = {"query_config": {log_type: copy.deepcopy(initial_log_type_config) for log_type in LOG_TYPES}}
78-
self.apply_custom_config(state, custom_config)
93+
self.logger.info("TASK: Initializing first state...")
94+
state = {QUERY_CONFIG: {log_type: copy.deepcopy(initial_log_type_config) for log_type in LOG_TYPES}}
7995
else:
8096
for log_type in LOG_TYPES:
81-
if log_type not in state.get("query_config", {}).keys():
82-
state["query_config"][log_type] = copy.deepcopy(initial_log_type_config)
97+
if log_type not in state.get(QUERY_CONFIG, {}).keys():
98+
self.logger.info(f"TASK: {log_type} missing from state. Initializing...")
99+
state[QUERY_CONFIG][log_type] = copy.deepcopy(initial_log_type_config)
83100
return state
84101

85102
def get_max_lookback_date(self, now_date: datetime, run_condition: str, custom_config: bool) -> datetime:
@@ -97,18 +114,23 @@ def get_max_lookback_date(self, now_date: datetime, run_condition: str, custom_c
97114
max_run_lookback_date = now_date - timedelta(days=max_run_lookback_days)
98115
return max_run_lookback_date
99116

100-
def apply_custom_config(self, state: Dict, custom_config: Dict) -> None:
117+
def apply_custom_config(self, state: Dict, custom_config: Dict = {}) -> Tuple[int, int]:
101118
"""
102119
Apply custom configuration for lookback, query date applies to start and end time of query
103120
:param current_query_config:
104121
:param custom_config:
105-
:return: N/A
122+
:return:
106123
"""
107-
# TODO: Additional custom config for page size, thread size, limit
108-
current_query_config = state.get("query_config")
109-
for log_type, lookback_date_string in custom_config.items():
110-
self.logger.info(f"TASK: Supplied lookback date of {lookback_date_string} for {log_type} log type")
111-
current_query_config[log_type] = {"query_date": lookback_date_string}
124+
if custom_config:
125+
self.logger.info("TASK: Custom config detected")
126+
if not state:
127+
current_query_config = state.get(QUERY_CONFIG)
128+
for log_type, query_date_string in custom_config.items():
129+
self.logger.info(f"TASK: Supplied lookback date of {query_date_string} for log type {log_type}")
130+
current_query_config[log_type] = {QUERY_DATE: query_date_string}
131+
page_size = max(1, min(custom_config.get(PAGE_SIZE, DEFAULT_PAGE_SIZE), DEFAULT_PAGE_SIZE))
132+
thread_count = max(1, custom_config.get(THREAD_COUNT, DEFAULT_THREAD_COUNT))
133+
return page_size, thread_count
112134

113135
def prepare_query_params(self, query_config: Dict, max_lookback_date: Dict, now_date: datetime) -> Dict:
114136
"""
@@ -119,18 +141,19 @@ def prepare_query_params(self, query_config: Dict, max_lookback_date: Dict, now_
119141
:return:
120142
"""
121143
for log_type, log_type_config in query_config.items():
122-
query_date_str = log_type_config.get("query_date")
123-
self.logger.info(f"PREPPING {log_type_config}")
124-
self.logger.info(f"{log_type}, {query_date_str}")
144+
query_date_str = log_type_config.get(QUERY_DATE)
125145
if query_date_str:
126-
query_date = datetime.strptime(query_date_str, "%Y-%m-%d").date()
146+
query_date = datetime.strptime(query_date_str, DATE_FORMAT).date()
127147
if not query_date_str:
128-
log_type_config["query_date"] = max_lookback_date
129-
elif query_date < now_date and log_type_config.get("caught_up") is True:
148+
self.logger.info(
149+
f"TASK: Query date for {log_type} log type is not present. Initializing a {max_lookback_date}"
150+
)
151+
log_type_config[QUERY_DATE] = max_lookback_date
152+
elif query_date < now_date and log_type_config.get(CAUGHT_UP) is True:
130153
self.logger.info(f"TASK: Log type {log_type} has caught up for {query_date}")
131-
log_type_config["query_date"] = query_date + timedelta(days=1)
132-
log_type_config["caught_up"] = False
133-
log_type_config.pop("next_page")
154+
log_type_config[QUERY_DATE] = query_date + timedelta(days=1)
155+
log_type_config[CAUGHT_UP] = False
156+
log_type_config.pop(NEXT_PAGE)
134157
query_config[log_type] = self.validate_config_lookback(log_type_config, max_lookback_date, now_date)
135158
return query_config
136159

@@ -142,51 +165,61 @@ def validate_config_lookback(self, log_type_config: Dict, max_lookback_date: dat
142165
:param now_date:
143166
:return: log_type_config
144167
"""
145-
query_date = log_type_config.get("query_date")
168+
query_date = log_type_config.get(QUERY_DATE)
146169
if isinstance(query_date, str):
147-
query_date = datetime.strptime(query_date, "%Y-%m-%d").date()
170+
query_date = datetime.strptime(query_date, DATE_FORMAT).date()
148171
if query_date < max_lookback_date:
149-
return {"query_date": max_lookback_date}
172+
return {QUERY_DATE: max_lookback_date}
150173
if query_date > now_date:
151-
log_type_config["query_date"] = now_date
174+
log_type_config[QUERY_DATE] = now_date
152175
return log_type_config
153176

154-
def get_all_logs(self, run_condition: str, query_config: Dict) -> Tuple[List, Dict]:
177+
def get_all_logs(
178+
self, run_condition: str, query_config: Dict, page_size: int, thead_count: int
179+
) -> Tuple[List, Dict]:
155180
"""
156181
Gets all logs of provided log type. First retrieves batch URLs. Then downloads and reads batches, pooling logs.
157182
:param run_condition:
158183
:param query_config:
184+
:param page_size:
185+
:param thead_count:
159186
:return: Logs, updated query configuration (state)
160187
"""
161188
complete_logs = []
162189
for log_type, log_type_config in query_config.items():
163-
if (not log_type_config.get("caught_up")) or (run_condition != PAGINATION_RUN):
190+
if (not log_type_config.get(CAUGHT_UP)) or (run_condition != PAGINATION_RUN):
164191
logs, results_next_page, caught_up = self.connection.api.get_siem_logs(
165192
log_type=log_type,
166-
query_date=log_type_config.get("query_date"),
167-
next_page=log_type_config.get("next_page"),
193+
query_date=log_type_config.get(QUERY_DATE),
194+
next_page=log_type_config.get(NEXT_PAGE),
195+
page_size=page_size,
196+
max_threads=thead_count,
168197
)
169198
complete_logs.extend(logs)
170-
log_type_config.update({"next_page": results_next_page, "caught_up": caught_up})
199+
log_type_config.update({NEXT_PAGE: results_next_page, CAUGHT_UP: caught_up})
171200
else:
172201
self.logger.info(f"TASK: Query for {log_type} is caught up. Skipping as we are currently paginating")
173202
return complete_logs, query_config
174203

175-
def prepare_exit_state(self, state: dict, query_config: dict, now_date: datetime) -> Tuple[Dict, bool]:
204+
def prepare_exit_state(
205+
self, state: dict, query_config: dict, now_date: datetime, log_hashes: List[str]
206+
) -> Tuple[Dict, bool]:
176207
"""
177208
Prepare state and pagination for task completion. Format date time.
178209
:param state:
179210
:param query_config:
180211
:param now_date:
212+
:param log_hashes:
181213
:return: state, has_more_pages
182214
"""
183215
has_more_pages = False
184216
for log_type_config in query_config.values():
185-
query_date = log_type_config.get("query_date")
217+
query_date = log_type_config.get(QUERY_DATE)
186218
if isinstance(query_date, str):
187-
query_date = datetime.strptime(query_date, "%Y-%m-%d").date()
188-
if (not log_type_config.get("caught_up")) or query_date < now_date:
219+
query_date = datetime.strptime(query_date, DATE_FORMAT).date()
220+
if (not log_type_config.get(CAUGHT_UP)) or query_date < now_date:
189221
has_more_pages = True
190-
log_type_config["query_date"] = query_date.strftime("%Y-%m-%d")
191-
state["query_config"] = query_config
222+
log_type_config[QUERY_DATE] = query_date.strftime(DATE_FORMAT)
223+
state[QUERY_CONFIG] = query_config
224+
state[LOG_HASHES] = log_hashes
192225
return state, has_more_pages

plugins/mimecast_v2/icon_mimecast_v2/util/api.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@
55
HTTPStatusCodes,
66
ResponseExceptionData,
77
)
8-
from insightconnect_plugin_runtime.helper import make_request, extract_json
8+
from insightconnect_plugin_runtime.helper import extract_json, make_request, rate_limiting
99
from logging import Logger
1010
from requests import Response, Request
1111
from io import BytesIO
12-
from icon_mimecast_v2.util.endpoints import Endpoints
12+
from icon_mimecast_v2.util.constants import Endpoints
1313
from typing import Dict, List, Tuple
14+
from multiprocessing.dummy import Pool
1415
import gzip
1516
import json
1617

@@ -39,17 +40,19 @@ def authenticate(self) -> None:
3940
self.logger.info("API: Authenticated")
4041

4142
def get_siem_logs(
42-
self, log_type: str, query_date: str, next_page: str, page_size: int = 100
43+
self, log_type: str, query_date: str, next_page: str, page_size: int = 100, max_threads: int = 10
4344
) -> Tuple[List[str], str, bool]:
4445
batch_download_urls, result_next_page, caught_up = self.get_siem_batches(
4546
log_type, query_date, next_page, page_size
4647
)
4748
logs = []
4849
self.logger.info(f"API: Getting SIEM logs from batches for log type {log_type}...")
49-
for url in batch_download_urls:
50-
batch_logs = self.get_siem_logs_from_batch(url=url)
51-
if isinstance(batch_logs, (List, Dict)):
52-
logs.extend(batch_logs)
50+
self.logger.info(f"API: Applying page size limit of {page_size}")
51+
with Pool(max_threads) as pool:
52+
batch_logs = pool.imap(self.get_siem_logs_from_batch, batch_download_urls)
53+
for result in batch_logs:
54+
if isinstance(result, (List, Dict)):
55+
logs.extend(result)
5356
self.logger.info(f"API: Discovered {len(logs)} logs for log type {log_type}")
5457
return logs, result_next_page, caught_up
5558

@@ -77,7 +80,6 @@ def get_siem_batches(
7780
return urls, batch_response.get("@nextPage"), caught_up
7881

7982
def get_siem_logs_from_batch(self, url: str):
80-
# TODO: Threading
8183
response = requests.request(method=GET, url=url, stream=False)
8284
with gzip.GzipFile(fileobj=BytesIO(response.content), mode="rb") as file_:
8385
logs = []
@@ -87,6 +89,7 @@ def get_siem_logs_from_batch(self, url: str):
8789
logs.append(json.loads(decoded_line))
8890
return logs
8991

92+
@rate_limiting(5)
9093
def make_api_request(
9194
self,
9295
url: str,
@@ -101,7 +104,6 @@ def make_api_request(
101104
if auth:
102105
headers["Authorization"] = f"Bearer {self.access_token}"
103106
request = Request(url=url, method=method, headers=headers, params=params, data=data, json=json)
104-
# TODO: Handle rate limit, handle retry backoff
105107
try:
106108
response = make_request(
107109
_request=request,
@@ -117,11 +119,16 @@ def make_api_request(
117119
status_code=exception.data.status_code,
118120
)
119121
raise exception
120-
if (
121-
response.status_code == HTTPStatusCodes.UNAUTHORIZED
122-
and extract_json(response).get("fail", [{}])[0].get("code") == "token_expired"
123-
):
124-
self.authenticate()
122+
if response.status_code == HTTPStatusCodes.UNAUTHORIZED:
123+
json_data = extract_json(response)
124+
if json_data.get("fail", [{}])[0].get("code") == "token_expired":
125+
self.authenticate()
126+
self.logger.info("API: Token has expired, attempting re-authentication...")
127+
return self.make_api_request(url, method, headers, json, data, params, return_json, auth)
128+
if response.status_code == HTTPStatusCodes.UNAUTHORIZED:
129+
raise APIException(
130+
preset=PluginException.Preset.API_KEY, data=response.text, status_code=response.status_code
131+
)
125132
if return_json:
126133
json_data = extract_json(response)
127134
return json_data

plugins/mimecast_v2/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
11
# List third-party dependencies here, separated by newlines.
22
# All dependencies must be version-pinned, eg. requests==1.2.0
33
# See: https://pip.pypa.io/en/stable/user_guide/#requirements-files
4+
parameterized==0.8.1
5+
freezegun==1.5.1

0 commit comments

Comments
 (0)