Skip to content

Commit b6e6dae

Browse files
committed
Add slabinfo checks
1 parent 35b7ad6 commit b6e6dae

File tree

1 file changed

+160
-0
lines changed

1 file changed

+160
-0
lines changed

scripts/osc_slabinfo.nhc

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
# OSC NHC - slabinfo checks
2+
#
3+
# Trey Dockendorf <[email protected]>
4+
# 21 September 2018
5+
#
6+
7+
SLAB_NAME=()
8+
SLAB_ACTIVE_OBJS=()
9+
SLAB_NUM_OBJS=()
10+
SLAB_ACTIVE_SLABS=()
11+
SLAB_NUM_SLABS=()
12+
export SLAB_NAME SLAB_ACTIVE_OBJS SLAB_NUM_OBJS SLAB_ACTIVE_SLABS SLAB_NUM_SLABS
13+
14+
function nhc_slabinfo_gather_data() {
15+
local LINE_CNT
16+
local -a FIELD
17+
18+
SLAB_NAME=() SLAB_ACTIVE_OBJS=() SLAB_NUM_OBJS=() SLAB_ACTIVE_SLABS=() SLAB_NUM_SLABS=()
19+
20+
((LINE_CNT=0))
21+
while read -a FIELD; do
22+
if [[ "${FIELD[0]}" == "#" ]]; then
23+
continue
24+
fi
25+
if [[ "${FIELD[0]}" == "slabinfo" ]]; then
26+
continue
27+
fi
28+
SLAB_NAME[$LINE_CNT]="${FIELD[0]}"
29+
SLAB_ACTIVE_OBJS[$LINE_CNT]="${FIELD[1]}"
30+
SLAB_NUM_OBJS[$LINE_CNT]="${FIELD[2]}"
31+
SLAB_ACTIVE_SLABS[$LINE_CNT]="${FIELD[13]}"
32+
SLAB_NUM_SLABS[$LINE_CNT]="${FIELD[14]}"
33+
dbg "Got slabinfo ${SLAB_NAME[$LINE_CNT]} ${SLAB_ACTIVE_OBJS[$LINE_CNT]} ${SLAB_NUM_OBJS[$LINE_CNT]} ${SLAB_ACTIVE_SLABS[$LINE_CNT]} ${SLAB_NUM_SLABS[$LINE_CNT]}"
34+
((LINE_CNT++))
35+
done < /proc/slabinfo
36+
37+
export SLAB_NAME SLAB_ACTIVE_OBJS SLAB_NUM_OBJS SLAB_ACTIVE_SLABS SLAB_NUM_SLABS
38+
}
39+
40+
# Do the actual work of looking at slabinfo.
41+
# Used by check_slabinfo_{active_objs,num_objs,active_slabs,num_slabs}.
42+
# Additional parameters are the check name and the type of resource to look at.
43+
function nhc_slabinfo_check() {
44+
local NONFATAL=0 ALL=0 LOG=0 SYSLOG=0 MATCH="" ACTION=""
45+
local CHECK="$1" COL="$2" THRESHOLD
46+
local THIS_NAME MSG i
47+
48+
if [[ ${#SLAB_NAME[*]} -eq 0 ]]; then
49+
nhc_slabinfo_gather_data
50+
fi
51+
52+
shift 2
53+
OPTIND=1
54+
while getopts ":0alsm:e:" OPTION ; do
55+
case "$OPTION" in
56+
0) NONFATAL=1 ;;
57+
a) ALL=1 ;;
58+
l) LOG=1 ;;
59+
s) SYSLOG=1 ;;
60+
m) MATCH="$OPTARG" ;;
61+
e) ACTION="$OPTARG" ;;
62+
:) die 1 "$CHECK: Option -$OPTARG requires an argument." ; return 1 ;;
63+
\?) die 1 "$CHECK: Invalid option: -$OPTARG" ; return 1 ;;
64+
esac
65+
done
66+
shift $((OPTIND-1))
67+
THRESHOLD="$1"
68+
if [[ -z "$THRESHOLD" || "${THRESHOLD//[^0-9]}" != "$THRESHOLD" ]]; then
69+
die 1 "$CHECK: Syntax error: threshold must be an integer."
70+
fi
71+
if [[ -z "$MATCH" ]]; then
72+
MATCH="*"
73+
fi
74+
dbg "Looking for slabinfo matching \"$MATCH\""
75+
for ((i=0; i < ${#SLAB_NAME[*]}; i++)); do
76+
THIS_NAME="${SLAB_NAME[$i]}"
77+
dbg "CHECKING \"$THIS_NAME\" vs. \"$MATCH\""
78+
if ! mcheck "$THIS_NAME" "$MATCH"; then
79+
continue
80+
fi
81+
dbg "Matching slabinfo found: $THIS_NAME: active_objs=${SLAB_ACTIVE_OBJS[$i]} num_objs=${SLAB_NUM_OBJS[$i]} active_slabs=${SLAB_ACTIVE_SLABS[$i]} num_slabs=${SLAB_NUM_SLABS[$i]}"
82+
case $COL in
83+
1) if [[ ${SLAB_ACTIVE_OBJS[$i]} -lt $THRESHOLD ]]; then continue ; fi
84+
MSG="$CHECK: slabinfo \"$THIS_NAME\" using ${SLAB_ACTIVE_OBJS[$i]} active_objs (limit $THRESHOLD)"
85+
;;
86+
2) if [[ ${SLAB_NUM_OBJS[$i]} -lt $THRESHOLD ]]; then continue ; fi
87+
MSG="$CHECK: slabinfo \"$THIS_NAME\" using ${SLAB_NUM_OBJS[$i]} num_objs (limit $THRESHOLD)"
88+
;;
89+
3) if [[ ${SLAB_ACTIVE_SLABS[$i]} -lt $THRESHOLD ]]; then continue ; fi
90+
MSG="$CHECK: slabinfo \"$THIS_NAME\" using ${SLAB_ACTIVE_SLABS[$i]} active_slabs (limit $THRESHOLD)"
91+
;;
92+
4) if [[ ${SLAB_NUM_SLABS[$i]} -lt $THRESHOLD ]]; then continue ; fi
93+
MSG="$CHECK: slabinfo \"$THIS_NAME\" using ${SLAB_NUM_SLABS[$i]} num_slabs (limit $THRESHOLD)"
94+
;;
95+
esac
96+
# We have a winner. Or loser, as the case may be.
97+
if [[ "$LOG" == "1" ]]; then
98+
log $MSG
99+
fi
100+
if [[ "$SYSLOG" == "1" ]]; then
101+
syslog $MSG
102+
fi
103+
if [[ "$ACTION" != "" ]]; then
104+
${SHELL:-/bin/bash} -c "$ACTION" &
105+
fi
106+
if [[ $ALL -ge 1 ]]; then
107+
if [[ -n "$MSG" ]]; then
108+
log "$MSG ($ALL)"
109+
fi
110+
((ALL++))
111+
continue
112+
elif [[ $NONFATAL == 1 ]]; then
113+
if [[ -n "$MSG" ]]; then
114+
log "$MSG (non-fatal)"
115+
fi
116+
return 0
117+
fi
118+
die 1 "$MSG"
119+
return 1
120+
done
121+
# -a (all) does not necessarily imply -0 (non-fatal). A value of 1 for $ALL
122+
# means -a was passed in but no errors were found. 2 or above is an error.
123+
if [[ $ALL -gt 1 ]]; then
124+
# We had at least 1 flagged process. Fail unless we're also non-fatal.
125+
if [[ $NONFATAL == 1 ]]; then
126+
if [[ -n "$MSG" ]]; then
127+
log "$MSG (non-fatal)"
128+
fi
129+
return 0
130+
fi
131+
((ALL--))
132+
die $ALL "$MSG (last of $ALL)"
133+
return $ALL
134+
fi
135+
return 0
136+
}
137+
138+
# Checks slabinfo active_objs is above a threshold
139+
# check_slabinfo_active_objs [-0] [-a] [-l] [-s] [-m <match>] [-e <action>] <threshold>
140+
function check_slabinfo_active_objs() {
141+
nhc_slabinfo_check $FUNCNAME 1 "$@"
142+
}
143+
144+
# Checks slabinfo num_objs is above a threshold
145+
# check_slabinfo_num_objs [-0] [-a] [-l] [-s] [-m <match>] [-e <action>] <threshold>
146+
function check_slabinfo_num_objs() {
147+
nhc_slabinfo_check $FUNCNAME 2 "$@"
148+
}
149+
150+
# Checks slabinfo active_slabs is above a threshold
151+
# check_slabinfo_active_slabs [-0] [-a] [-l] [-s] [-m <match>] [-e <action>] <threshold>
152+
function check_slabinfo_active_slabs() {
153+
nhc_slabinfo_check $FUNCNAME 3 "$@"
154+
}
155+
156+
# Checks slabinfo num_slabs is above a threshold
157+
# check_slabinfo_num_slabs [-0] [-a] [-l] [-s] [-m <match>] [-e <action>] <threshold>
158+
function check_slabinfo_num_slabs() {
159+
nhc_slabinfo_check $FUNCNAME 4 "$@"
160+
}

0 commit comments

Comments
 (0)