-
Notifications
You must be signed in to change notification settings - Fork 65
/
Copy pathchef_node_checkin.py
executable file
·64 lines (52 loc) · 2.11 KB
/
chef_node_checkin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
#!/usr/bin/python
from datetime import datetime
import os
def PopulateMetrics(log):
clientRuns = []
logFile = open(log, 'r')
for line in logFile:
if 'INFO: Chef Run complete in' in line:
date = line[1:11].split('-')
time = line[12:20].split(':')
clientRuns.append(datetime(int(date[0]), int(date[1]), int(
date[2]), int(time[0]), int(time[1]), int(time[2]), 0))
metrics['checkInDuration'] = int(float(line.split()[6]))
metrics['timeSinceCheckIn'] = int(
datetime.now().strftime('%s')) - int(
sorted(clientRuns)[-1].strftime('%s')
)
metrics['processesAmount'] = 0
pids = [pid for pid in os.listdir('/proc') if pid.isdigit()]
for pid in pids:
if 'chef-client' in open(os.path.join('/proc', pid, 'cmdline'), 'rb').read():
metrics['processesAmount'] += 1
if metrics['timeSinceCheckIn'] > 86400:
print "status Critcal node has not checked in for {0} seconds".format(
metrics['timeSinceCheckIn']
)
elif metrics['timeSinceCheckIn'] > 3600:
print "status Warning node has not checked in for {0} seconds".format(
metrics['timeSinceCheckIn']
)
else:
print "status OK node checked in {0} seconds ago".format(
metrics['timeSinceCheckIn']
)
return metrics
logfile = '/var/log/chef/client.log'
metrics = {'timeSinceCheckIn': 0, 'checkInDuration': 0, 'processesAmount': 0}
try:
metrics = PopulateMetrics(logfile)
# Anything OS related with file handling should warn and exit
except IOError as err:
print "status Warning {0}".format(err)
# Handle the log regex not returning a poplated array
except IndexError:
print "status OK node has not generated a parsable log"
except:
print "status Warning unhandled error executing script"
# Always print out metrics to allow REACH to report metrics
finally:
print "metric timeSinceCheckIn int32", metrics['timeSinceCheckIn']
print "metric checkInDuration int32", metrics['checkInDuration']
print "metric numberOfClients int32", metrics['processesAmount']