1- from __future__ import absolute_import
2- from __future__ import division , print_function , unicode_literals
1+ # Copy this file into pull_alerts.py in from https://github.com/lyft/opsreview
2+ # To include low_urgency call it like this: python pull_alerts.py --include-low
3+
4+ from __future__ import (absolute_import , division , print_function ,
5+ unicode_literals )
36
47import argparse
58import logging
6- import urllib
7- from collections import Counter , OrderedDict , defaultdict , namedtuple
9+ from collections import defaultdict
810from datetime import datetime , timedelta
9- from dateutil import tz
10- import dateutil .parser
1111
12+ import dateutil .parser
1213import pygerduty .v2
13- from prettytable import PrettyTable
14-
15- try :
16- import settings
17- except ImportError :
18- print ("*** Error: Follow setup instructions in README.md to create settings.py" )
19- raise SystemExit (1 )
14+ from dateutil import relativedelta , tz
2015
16+ import settings
2117
2218logger = logging .getLogger (__name__ )
2319
2420pagerduty_service = pygerduty .v2 .PagerDuty (settings .PAGERDUTY_API_TOKEN )
2521LOCAL_TZ = tz .tzlocal ()
26- Tag = namedtuple ("Tag" , ["tag" , "display_name" ])
27- TAGS = [
28- Tag (tag = "#a" , display_name = "Actionable (#a)" ),
29- Tag (tag = "#na" , display_name = "Non Actionable (#na)" ),
30- Tag (tag = "#t" , display_name = "Transient (#t)" ),
31- Tag (tag = "#s" , display_name = "Seasonal (#s)" ),
32- Tag (tag = "#abot" , display_name = "Actionable By Other Team (#abot)" ),
33- ]
3422
3523
3624class FormattedIncident (object ):
3725 def pretty_output (self ):
3826 return u'Time: {}\n Service: {}\n Description: {}\n URL: {}\n Notes:\n {}\n ' .format (
39- self .created_on . strftime ( '%A, %B %-d - %-I:%M %p %z' ) ,
27+ self .formatted_created_at ,
4028 self .service ,
4129 self .description ,
4230 self .url ,
4331 self .notes ,
4432 )
4533
34+ @property
35+ def is_high_urgency (self ):
36+ return not (self .urgency == 'low' or '-low-' in self .service )
37+
38+ @property
39+ def formatted_created_at (self ):
40+ return self .created_on .strftime ('%a, %b %-d - %-I:%M %p' )
4641
47- def recent_incidents_for_services (services , time_window ):
42+
43+ def recent_incidents_for_services (services ):
4844 service_ids = [service .id for service in services ]
49- try :
50- recent_incidents = list (pagerduty_service .incidents .list (
51- service_ids = service_ids ,
52- since = datetime .now (tz = LOCAL_TZ ) - time_window
53- ))
54- return recent_incidents
55-
56- except urllib .error .HTTPError as e :
57- if e .reason == 'URI Too Long' :
58- mid_point = int (len (services )/ 2 )
59- return recent_incidents_for_services (
60- services [:mid_point ],
61- time_window ,
62- ) + recent_incidents_for_services (
63- services [mid_point :],
64- time_window ,
65- )
66- raise
45+ on_call_start = get_oncall_start ()
46+ on_call_end = on_call_start + timedelta (days = 8 )
47+ recent_incidents = list (pagerduty_service .incidents .list (
48+ service_ids = service_ids ,
49+ since = on_call_start ,
50+ until = on_call_end
51+ ))
52+ return recent_incidents
53+
54+
55+ def get_oncall_start ():
56+ # oncall starts on Wednesday 12PM
57+ # get last Wed but not today if today is a Wed
58+ today = datetime .now (tz = tz .tzlocal ())
59+ today = today .replace (hour = 12 , minute = 0 , second = 0 , microsecond = 0 )
60+ if today .weekday () == 2 :
61+ on_call_start = today + relativedelta .relativedelta (days = - 1 , weekday = relativedelta .WE (- 1 ))
62+ else :
63+ on_call_start = today + relativedelta .relativedelta (weekday = relativedelta .WE (- 1 ))
64+
65+ return on_call_start
6766
6867
6968def print_all_incidents (
70- silent ,
71- time_window_days ,
72- group_by_description = False ,
73- group_by_service = False ,
74- include_stats = False ,
75- include_incidents_as_blockquote = False ,
69+ include_low
7670):
7771 services = []
7872 for escalation_policy in settings .ESCALATION_POLICIES :
7973 services .extend (list (pagerduty_service .escalation_policies .show (escalation_policy ).services ))
8074
81- recent_incidents = recent_incidents_for_services (services , timedelta (days = time_window_days ))
82- formatted_incidents = get_formatted_incidents (recent_incidents )
75+ recent_incidents = recent_incidents_for_services (services )
76+ all_incidents = get_formatted_incidents (recent_incidents )
77+ high_urg_incidents = [i for i in all_incidents if i .is_high_urgency ]
78+ low_urg_incidents = [i for i in all_incidents if not i .is_high_urgency ]
79+ print ('\n ########## High Urgency Pages ##########' )
80+ print_pages_by_description (high_urg_incidents )
81+ if include_low :
82+ print ('\n ########## Low Urgency Pages ##########' )
83+ print_pages_by_description (low_urg_incidents )
8384
84- all_incidents , sorted_description_to_incident_list , sorted_service_to_incident_list = sort_incidents (
85- formatted_incidents ,
86- group_by_description ,
87- group_by_service
88- )
89- print_stats (all_incidents , include_stats )
90- if include_incidents_as_blockquote :
91- print ("""# Raw incident log
92- ```
93- """ )
94- if group_by_service :
95- sorted_group_to_incident_list = sorted_service_to_incident_list
96- elif group_by_description :
97- sorted_group_to_incident_list = sorted_description_to_incident_list
98- if group_by_service or group_by_description :
99- for group , incident_list in sorted_group_to_incident_list .items ():
100- print ("########### {}: {} ##########\n " .format (len (incident_list ), group ))
101- if not silent :
102- for incident in incident_list :
103- print (incident .pretty_output ())
104- else :
105- for incident in all_incidents :
106- print (incident .pretty_output ())
85+ print_stats (high_urg_incidents , low_urg_incidents )
10786
10887 print ('Total Pages: {}' .format (len (all_incidents )))
109- if include_incidents_as_blockquote :
110- print ("```" )
88+
89+
90+ def print_pages_by_notes (incidents ):
91+ note_to_incident_list = defaultdict (list )
92+ for incident in incidents :
93+ note_to_incident_list [incident .last_note ].append (incident )
94+
95+ for note , incidents in note_to_incident_list .items ():
96+ print ('\n {} generated {} incidents:' .format (note , len (incidents )))
97+ for i in incidents :
98+ print ('\t - {} ({})' .format (i .description , i .url ))
99+
100+
101+ def print_pages_by_description (incidents ):
102+ desc_to_incident_list = defaultdict (list )
103+ for incident in incidents :
104+ desc_to_incident_list [incident .description ].append (incident )
105+
106+ for desc , incidents in desc_to_incident_list .items ():
107+ print ('\n **{}** [Paged {} times]:' .format (desc , len (incidents )))
108+ for i in incidents :
109+ if i .last_note == 'NO NOTE' :
110+ print ('- [alarm paged]({}) - no note' .format (i .url ))
111+ else :
112+ print ('- [alarm paged]({}) - {}' .format (i .url , i .last_note ))
111113
112114
113115def get_formatted_incidents (recent_incidents ):
@@ -116,6 +118,7 @@ def get_formatted_incidents(recent_incidents):
116118 formatted_incident = FormattedIncident ()
117119 formatted_incident .service = incident .service .summary
118120 formatted_incident .url = incident .html_url
121+ formatted_incident .urgency = incident .urgency
119122 if hasattr (incident , 'title' ):
120123 formatted_incident .description = incident .title
121124 elif hasattr (incident , 'summary' ):
@@ -131,107 +134,70 @@ def get_formatted_incidents(recent_incidents):
131134 for note in notes :
132135 formatted_notes .append (u'{}: {}' .format (note .user .summary , note .content ))
133136 formatted_incident .notes = formatted_notes
137+ formatted_incident .last_note = formatted_notes [- 1 ] if formatted_notes else 'NO NOTE'
134138 formatted_incidents .append (formatted_incident )
135139
136140 return formatted_incidents
137141
138142
139- def _tag_incident (incident , tag_stats ):
140- tagged = False
141- for tag in TAGS :
142- found_tag = any (tag .tag in note for note in incident .notes )
143- if not found_tag :
144- continue
145- tagged = True
146- tag_stats [tag ] += 1
147- return tagged
143+ def print_stats (high_urg_incidents , low_urg_incidents ):
144+ h_a , h_na , h_t , h_nt = get_breakdown (high_urg_incidents )
145+ l_a , l_na , l_t , l_nt = get_breakdown (low_urg_incidents )
146+ oncall_start = get_oncall_start ()
147+ oncall_end = oncall_start + timedelta (days = 7 )
148+ formatted_start = oncall_start .strftime ('%m/%d %H:%M' )
149+ formatted_end = oncall_end .strftime ('%m/%d %H:%M' )
150+ print ("""\n # Statistics from {} to {}
151+ | Incidents | High Urgency | Low Urgency |
152+ | -------------------- | ------------ | ----------- |
153+ | Actionable (#a) | {:12} | {:11} |
154+ | Non Actionable (#na) | {:12} | {:11} |
155+ | Transient (#t) | {:12} | {:11} |
156+ | Not Tagged | {:12} | {:11} |
157+ | TOTAL | {:12} | {:11} |
158+ """ .format (
159+ formatted_start , formatted_end , h_a , l_a , h_na , l_na , h_t , l_t , h_nt , l_nt ,
160+ len (high_urg_incidents ), len (low_urg_incidents )
161+ ))
148162
149163
150- def print_stats (all_incidents , include_stats ):
151- if not include_stats :
152- return
164+ def get_breakdown (incidents ):
165+ actionable = 0
166+ non_actionable = 0
167+ transient = 0
168+ not_tagged = 0
169+ for i in incidents :
170+ if is_actionable (i ):
171+ actionable += 1
172+ elif is_non_actionable (i ):
173+ non_actionable += 1
174+ elif is_transient (i ):
175+ transient += 1
176+ else :
177+ not_tagged += 1
178+ return actionable , non_actionable , transient , not_tagged
153179
154- stats_table = PrettyTable ()
155- stats_table .field_names = ["Incidents" , "Number" ]
156- stats_table .align ["Incidents" ] = "l"
157- stats_table .align ["Number" ] = "r"
158180
159- tag_stats = Counter ()
181+ def is_actionable (incident ):
182+ return any ('#a' in note for note in incident .notes )
160183
161- not_tagged = 0
162- for i in all_incidents :
163- tagged = _tag_incident (i , tag_stats )
164- not_tagged += not tagged
165-
166- for tag in TAGS :
167- stats_table .add_row ([tag .display_name , tag_stats [tag ]])
168- stats_table .add_row (["Not Tagged" , not_tagged ])
169- stats_table .add_row (["Total" , len (all_incidents )])
170-
171- print (stats_table )
172-
173-
174- def sort_incidents (all_incidents , group_by_description , group_by_service ):
175- description_to_incident_list = defaultdict (list )
176- service_to_incident_list = defaultdict (list )
177- for incident in all_incidents :
178- description_to_incident_list [incident .description ].append (incident )
179- for incident in all_incidents :
180- service_to_incident_list [incident .service ].append (incident )
181- # Sort by desc count
182- sorted_description_to_incident_list = OrderedDict (sorted (
183- description_to_incident_list .items (),
184- key = lambda x : len (x [1 ]),
185- reverse = True
186- ))
187- sorted_service_to_incident_list = OrderedDict (sorted (
188- service_to_incident_list .items (),
189- key = lambda x : len (x [1 ]),
190- reverse = True
191- ))
192184
193- if group_by_description :
194- all_incidents = []
195- for incident_list in sorted_description_to_incident_list .values ():
196- all_incidents += incident_list
197- else :
198- all_incidents = sorted (all_incidents , key = lambda i : i .created_on )
199- return all_incidents , sorted_description_to_incident_list , sorted_service_to_incident_list
185+ def is_non_actionable (incident ):
186+ return any ('#na' in note for note in incident .notes )
187+
188+
189+ def is_transient (incident ):
190+ return any ('#t' in note for note in incident .notes )
200191
201192
202193if __name__ == '__main__' :
203194 logging .basicConfig ()
204195 parser = argparse .ArgumentParser ()
205- parser .add_argument ("--silent" ,
206- action = "store_true" ,
207- default = False ,
208- help = "Do not print each description" )
209- parser .add_argument ("--group-by-description" ,
210- action = "store_true" ,
211- default = False ,
212- help = "Group PD incidents by description" )
213- parser .add_argument ("--group-by-service" ,
214- action = "store_true" ,
215- default = False ,
216- help = "Group PD incidents by service" )
217- parser .add_argument ("--include-stats" ,
218- action = "store_true" ,
219- default = False ,
220- help = "Include incidents stats" )
221- parser .add_argument ("--include-incidents-as-blockquote" ,
196+ parser .add_argument ("--include-low" ,
222197 action = "store_true" ,
223198 default = False ,
224- help = "Include raw incident log as markdown blockquote" )
225- parser .add_argument ('--days' ,
226- type = int ,
227- default = 7 ,
228- help = 'time window days' )
199+ help = "Include low urgency detailed view" )
229200 args = parser .parse_args ()
230201 print_all_incidents (
231- silent = args .silent ,
232- group_by_description = args .group_by_description ,
233- group_by_service = args .group_by_service ,
234- include_stats = args .include_stats ,
235- include_incidents_as_blockquote = args .include_incidents_as_blockquote ,
236- time_window_days = args .days
202+ include_low = args .include_low
237203 )
0 commit comments