2
2
3
3
import os
4
4
import time
5
+ from collections import deque
5
6
from datetime import datetime , timezone
7
+ from math import ceil
8
+
9
+ import boto3
6
10
import pytz
7
11
8
12
from tap_cloudwatch .exception import InvalidQueryException
9
13
10
- import boto3
11
- from math import ceil
12
14
13
15
class CloudwatchAPI :
14
16
"""Cloudwatch class for interacting with the API."""
@@ -17,6 +19,8 @@ def __init__(self, logger):
17
19
"""Initialize CloudwatchAPI."""
18
20
self ._client = None
19
21
self .logger = logger
22
+ self .limit = 10000
23
+ self .max_concurrent_queries = 20
20
24
21
25
@property
22
26
def client (self ):
@@ -64,7 +68,7 @@ def _create_client(self, config):
64
68
def _request_more_records ():
65
69
return True
66
70
67
- def split_batch_into_windows (self , start_time , end_time , batch_increment_s ):
71
+ def _split_batch_into_windows (self , start_time , end_time , batch_increment_s ):
68
72
diff_s = end_time - start_time
69
73
total_batches = ceil (diff_s / batch_increment_s )
70
74
batch_windows = []
@@ -79,71 +83,125 @@ def split_batch_into_windows(self, start_time, end_time, batch_increment_s):
79
83
batch_windows .append ((query_start , query_end ))
80
84
return batch_windows
81
85
82
- def validate_query (self , query ):
86
+ def _validate_query (self , query ):
83
87
if "|sort" in query .replace (" " , "" ):
84
88
raise InvalidQueryException ("sort not allowed" )
85
89
if "|limit" in query .replace (" " , "" ):
86
90
raise InvalidQueryException ("limit not allowed" )
87
91
if "stats" in query :
88
92
raise InvalidQueryException ("stats not allowed" )
89
93
if "@timestamp" not in query .split ("|" )[0 ]:
90
- raise InvalidQueryException ("@timestamp field is used as the replication key so it must be selected" )
94
+ raise InvalidQueryException (
95
+ "@timestamp field is used as the replication key so it must be selected"
96
+ )
91
97
92
98
def get_records_iterator (self , bookmark , log_group , query , batch_increment_s ):
93
99
"""Retrieve records from Cloudwatch."""
94
100
end_time = datetime .now (timezone .utc ).timestamp ()
95
101
start_time = bookmark .timestamp ()
96
- self .validate_query (query )
97
- batch_windows = self .split_batch_into_windows (start_time , end_time , batch_increment_s )
102
+ self ._validate_query (query )
103
+ batch_windows = self ._split_batch_into_windows (
104
+ start_time , end_time , batch_increment_s
105
+ )
98
106
107
+ queue = deque ()
99
108
for window in batch_windows :
100
- yield self .handle_batch_window (window [0 ], window [1 ], log_group , query )
101
-
102
- def handle_limit_exceeded (self , response , log_group , query_start , query_end , query ):
109
+ if len (queue ) < (self .max_concurrent_queries - 1 ):
110
+ queue .append (
111
+ (
112
+ self ._start_query (window [0 ], window [1 ], log_group , query ),
113
+ window [0 ],
114
+ window [1 ],
115
+ )
116
+ )
117
+ else :
118
+ query_id , start , end = queue .popleft ()
119
+ queue .append (
120
+ (
121
+ self ._start_query (window [0 ], window [1 ], log_group , query ),
122
+ window [0 ],
123
+ window [1 ],
124
+ )
125
+ )
126
+ results = self ._get_results (log_group , start , end , query , query_id )
127
+ yield results
128
+
129
+ while len (queue ) > 0 :
130
+ query_id , start , end = queue .popleft ()
131
+ results = self ._get_results (log_group , start , end , query , query_id )
132
+ yield results
133
+
134
+ def _handle_limit_exceeded (
135
+ self , response , log_group , query_start , query_end , query
136
+ ):
103
137
results = response .get ("results" )
104
138
last_record = results [- 1 ]
105
139
106
- latest_ts_str = [i ["value" ] for i in last_record if i ["field" ] == "@timestamp" ][0 ]
140
+ latest_ts_str = [i ["value" ] for i in last_record if i ["field" ] == "@timestamp" ][
141
+ 0
142
+ ]
107
143
# Include latest ts in query, this could cause duplicates but
108
144
# without it we might miss ties
109
- query_start = int (datetime .fromisoformat (latest_ts_str ).replace (tzinfo = pytz .UTC ).timestamp ())
110
- self .handle_batch_window (query_start , query_end , log_group , query , prev_start = query_start )
145
+ new_query_start = int (
146
+ datetime .fromisoformat (latest_ts_str ).replace (tzinfo = pytz .UTC ).timestamp ()
147
+ )
148
+ new_query_id = self ._start_query (new_query_start , query_end , log_group , query )
149
+ return self ._get_results (
150
+ log_group , new_query_start , query_end , query , new_query_id
151
+ )
111
152
112
- def alter_query (self , query ):
153
+ def _alter_query (self , query ):
113
154
query += " | sort @timestamp asc"
114
155
return query
115
156
116
- def handle_batch_window (self , query_start , query_end , log_group , query , prev_start = None ):
157
+ def _start_query (self , query_start , query_end , log_group , query , prev_start = None ):
117
158
self .logger .info (
118
159
(
119
- "Retrieving batch from:"
160
+ "Submitting query for batch from:"
120
161
f" `{ datetime .utcfromtimestamp (query_start ).isoformat ()} UTC` -"
121
162
f" `{ datetime .utcfromtimestamp (query_end ).isoformat ()} UTC`"
122
163
)
123
164
)
124
- limit = 10000
125
- query = self .alter_query (query )
165
+ query = self ._alter_query (query )
126
166
start_query_response = self .client .start_query (
127
167
logGroupName = log_group ,
128
168
startTime = query_start ,
129
169
endTime = query_end ,
130
170
queryString = query ,
131
- limit = limit ,
171
+ limit = self . limit ,
132
172
)
173
+ return start_query_response ["queryId" ]
133
174
134
- query_id = start_query_response ["queryId" ]
135
- response = None
175
+ def _get_results (
176
+ self , log_group , query_start , query_end , query , query_id , prev_start = None
177
+ ):
178
+ self .logger .info (
179
+ (
180
+ "Retrieving results for batch from:"
181
+ f" `{ datetime .utcfromtimestamp (query_start ).isoformat ()} UTC` -"
182
+ f" `{ datetime .utcfromtimestamp (query_end ).isoformat ()} UTC`"
183
+ )
184
+ )
185
+ response = self .client .get_query_results (queryId = query_id )
136
186
while response is None or response ["status" ] == "Running" :
137
- time .sleep (1 )
187
+ time .sleep (0.5 )
138
188
response = self .client .get_query_results (queryId = query_id )
139
189
if response .get ("ResponseMetadata" , {}).get ("HTTPStatusCode" ) != 200 :
140
190
raise Exception (f"Failed: { response } " )
141
191
result_size = response .get ("statistics" , {}).get ("recordsMatched" )
142
- if result_size > limit :
192
+ results = response ["results" ]
193
+ self .logger .info (f"Result set size '{ int (result_size )} ' received." )
194
+ if result_size > self .limit :
143
195
if prev_start == query_start :
144
- raise Exception ("Stuck in a loop, smaller batch still exceeds limit. Reduce batch window." )
196
+ raise Exception (
197
+ "Stuck in a loop, smaller batch still exceeds limit."
198
+ "Reduce batch window."
199
+ )
145
200
self .logger .info (
146
- f"Result set size '{ int (result_size )} ' exceeded limit '{ limit } '. Re-running sub-batch..."
201
+ f"Result set size '{ int (result_size )} ' exceeded limit "
202
+ f"'{ self .limit } '. Re-running sub-batch..."
203
+ )
204
+ results += self ._handle_limit_exceeded (
205
+ response , log_group , query_start , query_end , query
147
206
)
148
- self .handle_limit_exceeded (response , log_group , query_start , query_end , query )
149
- return response
207
+ return results
0 commit comments