@@ -19,11 +19,13 @@ shopt -s nullglob nocaseglob extglob
1919# JSON body:
2020# {
2121# "disable": false,
22+ # "waagent_full": false,
2223# "files": [ "/etc/skel/.bashrc", "/etc/skel/.bash_profile" ],
2324# "pod_log_namespaces": [ "default", "pahealy" ],
2425# "iptables": false,
2526# "nftables": false,
26- # "netns": true
27+ # "netns": true,
28+ # "sysinfo": false
2729# }
2830CONFIG=$( curl -s -H Metadata:true --noproxy ' *' ' http://169.254.169.254/metadata/instance/compute?api-version=2021-02-01' | jq ' .tagsList[] | select(.name=="aks-log-collector") | .value | fromjson' )
2931
@@ -36,6 +38,8 @@ CONFIG=$(curl -s -H Metadata:true --noproxy '*' 'http://169.254.169.254/metadata
3638COLLECT_IPTABLES=$( <<< " $CONFIG" jq -esRr ' try fromjson catch null | .iptables? // false' )
3739COLLECT_NFTABLES=$( <<< " $CONFIG" jq -esRr ' try fromjson catch null | .nftables? // false' )
3840COLLECT_NETNS=$( <<< " $CONFIG" jq -esRr ' try fromjson catch null | .netns? // false' )
41+ COLLECT_SYSINFO=$( <<< " $CONFIG" jq -esRr ' try fromjson catch null | .sysinfo? // false' )
42+ COLLECT_WAAGENT_FULL=$( <<< " $CONFIG" jq -esRr ' try fromjson catch null | .waagent_full? // false' )
3943
4044# ## START CONFIGURATION
4145ZIP=" aks_logs.zip"
@@ -75,63 +79,71 @@ GLOBS+=(/var/log/nvidia*.log)
7579GLOBS+=(/var/log/azure/nvidia* .log)
7680GLOBS+=(/var/log/fabricmanager* .log)
7781
78- # based on MANIFEST_FULL from Azure Linux Agent's log collector
79- # https://github.com/Azure/WALinuxAgent/blob/master/azurelinuxagent/common/logcollector_manifests.py
80- GLOBS+=(/var/lib/waagent/provisioned)
81- GLOBS+=(/etc/fstab)
82- GLOBS+=(/etc/ssh/sshd_config)
83- GLOBS+=(/boot/grub* /grub.c* )
84- GLOBS+=(/boot/grub* /menu.lst)
82+ # Configuration files (small, critical for diagnosis)
8583GLOBS+=(/etc/* -release)
8684GLOBS+=(/etc/HOSTNAME)
8785GLOBS+=(/etc/hostname)
88- GLOBS+=(/etc/apt/sources.list)
89- GLOBS+=(/etc/apt/sources.list.d/* )
90- GLOBS+=(/etc/network/interfaces)
91- GLOBS+=(/etc/network/interfaces.d/* .cfg)
92- GLOBS+=(/etc/netplan/* .yaml)
93- GLOBS+=(/etc/nsswitch.conf)
86+ GLOBS+=(/etc/waagent.conf)
9487GLOBS+=(/etc/resolv.conf)
9588GLOBS+=(/run/systemd/resolve/stub-resolv.conf)
96- GLOBS+=(/run/resolvconf/resolv.conf)
97- GLOBS+=(/etc/sysconfig/iptables)
98- GLOBS+=(/etc/sysconfig/network)
99- GLOBS+=(/etc/sysconfig/network/ifcfg-eth* )
100- GLOBS+=(/etc/sysconfig/network/routes)
101- GLOBS+=(/etc/sysconfig/network-scripts/ifcfg-eth* )
102- GLOBS+=(/etc/sysconfig/network-scripts/route-eth* )
103- GLOBS+=(/etc/ufw/ufw.conf)
104- GLOBS+=(/etc/waagent.conf)
105- GLOBS+=(/var/lib/hyperv/.kvp_pool_* )
106- GLOBS+=(/var/lib/dhcp/dhclient.eth0.leases)
107- GLOBS+=(/var/lib/dhclient/dhclient-eth0.leases)
108- GLOBS+=(/var/lib/wicked/lease-eth0-dhcp-ipv4.xml)
89+
90+ # Key log files
91+ GLOBS+=(/var/log/dmesg* )
92+ GLOBS+=(/var/log/syslog* )
93+ GLOBS+=(/var/log/messages* )
94+ GLOBS+=(/var/log/secure* )
95+ GLOBS+=(/var/log/auth* )
96+ GLOBS+=(/var/log/cloud-init* )
97+ GLOBS+=(/var/log/azure/* /* )
98+ GLOBS+=(/var/log/azure/* /* /* )
10999GLOBS+=(/var/log/azure/custom-script/handler.log)
110100GLOBS+=(/var/log/azure/run-command/handler.log)
101+
102+ # Extension state
111103GLOBS+=(/var/lib/waagent/ovf-env.xml)
104+ GLOBS+=(/var/lib/waagent/waagent_status.json)
112105GLOBS+=(/var/lib/waagent/* /status/* .status)
113106GLOBS+=(/var/lib/waagent/* /config/* .settings)
114107GLOBS+=(/var/lib/waagent/* /config/HandlerState)
115108GLOBS+=(/var/lib/waagent/* /config/HandlerStatus)
116- GLOBS+=(/var/lib/waagent/SharedConfig.xml)
117- GLOBS+=(/var/lib/waagent/ManagedIdentity-* .json)
118- GLOBS+=(/var/lib/waagent/waagent_status.json)
119109GLOBS+=(/var/lib/waagent/* /error.json)
120- GLOBS+=(/var/log/cloud-init* )
121- GLOBS+=(/var/log/azure/* /* )
122- GLOBS+=(/var/log/azure/* /* /* )
123- GLOBS+=(/var/log/syslog* )
124- GLOBS+=(/var/log/rsyslog* )
125- GLOBS+=(/var/log/messages* )
126- GLOBS+=(/var/log/kern* )
127- GLOBS+=(/var/log/dmesg* )
128- GLOBS+=(/var/log/dpkg* )
129- GLOBS+=(/var/log/yum* )
130- GLOBS+=(/var/log/boot* )
131- GLOBS+=(/var/log/auth* )
132- GLOBS+=(/var/log/secure* )
133- GLOBS+=(/var/log/journal* )
134110
111+ # Based on MANIFEST_FULL from Azure Linux Agent's log collector
112+ # https://github.com/Azure/WALinuxAgent/blob/master/azurelinuxagent/ga/logcollector_manifests.py
113+ if [ " $COLLECT_WAAGENT_FULL " = " true" ]; then
114+ GLOBS+=(/var/lib/waagent/provisioned)
115+ GLOBS+=(/etc/fstab)
116+ GLOBS+=(/etc/ssh/sshd_config)
117+ GLOBS+=(/boot/grub* /grub.c* )
118+ GLOBS+=(/boot/grub* /menu.lst)
119+ GLOBS+=(/etc/apt/sources.list)
120+ GLOBS+=(/etc/apt/sources.list.d/* )
121+ GLOBS+=(/etc/network/interfaces)
122+ GLOBS+=(/etc/network/interfaces.d/* .cfg)
123+ GLOBS+=(/etc/netplan/* .yaml)
124+ GLOBS+=(/etc/nsswitch.conf)
125+ GLOBS+=(/run/resolvconf/resolv.conf)
126+ GLOBS+=(/etc/sysconfig/iptables)
127+ GLOBS+=(/etc/sysconfig/network)
128+ GLOBS+=(/etc/sysconfig/network/ifcfg-eth* )
129+ GLOBS+=(/etc/sysconfig/network/routes)
130+ GLOBS+=(/etc/sysconfig/network-scripts/ifcfg-eth* )
131+ GLOBS+=(/etc/sysconfig/network-scripts/route-eth* )
132+ GLOBS+=(/etc/ufw/ufw.conf)
133+ GLOBS+=(/var/lib/hyperv/.kvp_pool_* )
134+ GLOBS+=(/var/lib/dhcp/dhclient.eth0.leases)
135+ GLOBS+=(/var/lib/dhclient/dhclient-eth0.leases)
136+ GLOBS+=(/var/lib/wicked/lease-eth0-dhcp-ipv4.xml)
137+ GLOBS+=(/var/lib/waagent/SharedConfig.xml)
138+ GLOBS+=(/var/lib/waagent/ManagedIdentity-* .json)
139+ # Rotated and additional log files
140+ GLOBS+=(/var/log/rsyslog* )
141+ GLOBS+=(/var/log/kern* )
142+ GLOBS+=(/var/log/dpkg* )
143+ GLOBS+=(/var/log/yum* )
144+ GLOBS+=(/var/log/boot* )
145+ GLOBS+=(/var/log/journal* )
146+ fi
135147# ## END CONFIGURATION
136148
137149command -v zip > /dev/null || {
@@ -187,25 +199,30 @@ echo "Collecting system information..."
187199mkdir collect
188200
189201# Collect general information and create the ZIP in the first place
190- zip -DZ deflate " ${ZIP} " /proc/@ (cmdline| cpuinfo| filesystems| interrupts| loadavg| meminfo| modules| mounts| slabinfo| stat| uptime| version* | vmstat) /proc/net/*
191-
192- # Include some disk listings
193- collectToZip collect/file_listings.txt find /dev /etc /var/lib/waagent /var/log -ls
194-
195- # Collect system information
196- collectToZip collect/blkid.txt blkid $( find /dev -type b ! -name ' sr*' )
197- collectToZip collect/du_bytes.txt df -al
198- collectToZip collect/du_inodes.txt df -ail
199- collectToZip collect/diskinfo.txt lsblk
200- collectToZip collect/lscpu.txt lscpu
201- collectToZip collect/lscpu.json lscpu -J
202- collectToZip collect/lsipc.txt lsipc
203- collectToZip collect/lsns.json lsns -J --output-all
204- collectToZip collect/lspci.txt lspci -vkPP
205- collectToZip collect/lsscsi.txt lsscsi -vv
206- collectToZip collect/lsvmbus.txt lsvmbus -vv
207- collectToZip collect/sysctl.txt sysctl -a
208- collectToZip collect/systemctl-status.txt systemctl status --all -fr
202+ zip -DZ deflate " ${ZIP} " /proc/@ (cmdline| loadavg| meminfo| mounts| uptime| version* )
203+
204+ if [ " $COLLECT_SYSINFO " = " true" ]; then
205+ # Extensive proc info
206+ zip -gDZ deflate " ${ZIP} " /proc/@ (cpuinfo| filesystems| interrupts| modules| slabinfo| stat| vmstat) /proc/net/*
207+
208+ # Include some disk listings
209+ collectToZip collect/file_listings.txt find /dev /etc /var/lib/waagent /var/log -ls
210+
211+ # Collect system information
212+ collectToZip collect/blkid.txt blkid $( find /dev -type b ! -name ' sr*' )
213+ collectToZip collect/du_bytes.txt df -al
214+ collectToZip collect/du_inodes.txt df -ail
215+ collectToZip collect/diskinfo.txt lsblk
216+ collectToZip collect/lscpu.txt lscpu
217+ collectToZip collect/lscpu.json lscpu -J
218+ collectToZip collect/lsipc.txt lsipc
219+ collectToZip collect/lsns.json lsns -J --output-all
220+ collectToZip collect/lspci.txt lspci -vkPP
221+ collectToZip collect/lsscsi.txt lsscsi -vv
222+ collectToZip collect/lsvmbus.txt lsvmbus -vv
223+ collectToZip collect/sysctl.txt sysctl -a
224+ collectToZip collect/systemctl-status.txt systemctl status --all -fr
225+ fi
209226
210227# Collect logs of the Nvidia services if present
211228collectToZip collect/journalctl_nvidia-dcgm.txt journalctl -u nvidia-dcgm --no-pager
@@ -223,12 +240,16 @@ collectToZip collect/crictl_images.json crictl images -o json
223240collectToZip collect/crictl_imagefsinfo.json crictl imagefsinfo -o json
224241collectToZip collect/crictl_pods.json crictl pods -o json
225242collectToZip collect/crictl_ps.json crictl ps -o json
226- collectToZip collect/crictl_stats.json crictl stats -o json
227- collectToZip collect/crictl_statsp.json crictl statsp -o json
243+ if [ " $COLLECT_SYSINFO " = " true" ]; then
244+ collectToZip collect/crictl_stats.json crictl stats -o json
245+ collectToZip collect/crictl_statsp.json crictl statsp -o json
246+ fi
228247
229248# Collect network information
230- collectToZip collect/conntrack.txt conntrack -L
231- collectToZip collect/conntrack_stats.txt conntrack -S
249+ if [ " $COLLECT_SYSINFO " = " true" ]; then
250+ collectToZip collect/conntrack.txt conntrack -L
251+ collectToZip collect/conntrack_stats.txt conntrack -S
252+ fi
232253collectToZip collect/ip_4_addr.json ip -4 -d -j addr show
233254collectToZip collect/ip_4_neighbor.json ip -4 -d -j neighbor show
234255collectToZip collect/ip_4_route.json ip -4 -d -j route show
@@ -251,15 +272,19 @@ if [ "${COLLECT_NFTABLES}" = "true" ]; then
251272 collectToZip collect/nftables.txt nft -n list ruleset 2>&1
252273fi
253274
254- collectToZip collect/ss.txt ss -anoempiO --cgroup
255- collectToZip collect/ss_stats.txt ss -s
275+ if [ " $COLLECT_SYSINFO " = " true" ]; then
276+ collectToZip collect/ss.txt ss -anoempiO --cgroup
277+ collectToZip collect/ss_stats.txt ss -s
278+ fi
256279
257280# Collect network information from network namespaces
258281if [ " ${COLLECT_NETNS} " = " true" ]; then
259282 for NETNS in $( ip -j netns list | jq -r ' .[].name' ) ; do
260283 mkdir -p " collect/ip_netns_${NETNS} /"
261- collectToZip collect/ip_netns_${NETNS} /conntrack.txt ip netns exec " ${NETNS} " conntrack -L
262- collectToZip collect/ip_netns_${NETNS} /conntrack_stats.txt ip netns exec " ${NETNS} " conntrack -S
284+ if [ " $COLLECT_SYSINFO " = " true" ]; then
285+ collectToZip collect/ip_netns_${NETNS} /conntrack.txt ip netns exec " ${NETNS} " conntrack -L
286+ collectToZip collect/ip_netns_${NETNS} /conntrack_stats.txt ip netns exec " ${NETNS} " conntrack -S
287+ fi
263288 collectToZip collect/ip_netns_${NETNS} /ip_4_addr.json ip -n " ${NETNS} " -4 -d -j addr show
264289 collectToZip collect/ip_netns_${NETNS} /ip_4_neighbor.json ip -n " ${NETNS} " -4 -d -j neighbor show
265290 collectToZip collect/ip_netns_${NETNS} /ip_4_route.json ip -n " ${NETNS} " -4 -d -j route show
@@ -279,27 +304,42 @@ if [ "${COLLECT_NETNS}" = "true" ]; then
279304 if [ " ${COLLECT_NFTABLES} " = " true" ]; then
280305 collectToZip collect/ip_netns_${NETNS} /nftables.txt nft -n list ruleset
281306 fi
282- collectToZip collect/ip_netns_${NETNS} /ss.txt ip netns exec " ${NETNS} " ss -anoempiO --cgroup
283- collectToZip collect/ip_netns_${NETNS} /ss_stats.txt ip netns exec " ${NETNS} " ss -s
307+ if [ " $COLLECT_SYSINFO " = " true" ]; then
308+ collectToZip collect/ip_netns_${NETNS} /ss.txt ip netns exec " ${NETNS} " ss -anoempiO --cgroup
309+ collectToZip collect/ip_netns_${NETNS} /ss_stats.txt ip netns exec " ${NETNS} " ss -s
310+ fi
284311 done
285312fi
286313
287314# Add each file sequentially to the zip archive. This is slightly less efficient then adding them
288315# all at once, but allows us to easily check when we've exceeded the maximum file size and stop
289316# adding things to the archive.
290- echo " Adding log files to zip archive..."
291- for file in ${GLOBS[*]} ; do
292- if test -e $file ; then
293- zip -g -DZ deflate -u " ${ZIP} " $file -x ' *.sock'
294-
295- # The API for the log bundle has a max file size (defined above, usually 100MB), so if
296- # adding this last file made the zip go over that size, remove that file and stop processing.
297- FILE_SIZE=$( stat --printf " %s" ${ZIP} )
298- if [ " $FILE_SIZE " -ge " $MAX_SIZE " ]; then
299- echo " WARNING: ZIP file size $FILE_SIZE >= $MAX_SIZE ; removing last log file and terminating adding more files."
300- zip -d " ${ZIP} " $file
301- break
302- fi
317+ MAX_FILE_SIZE=$(( 10 * 1024 * 1024 ))
318+ echo " Adding log files to zip archive with max file size: $MAX_FILE_SIZE bytes..."
319+ for file in " ${GLOBS[@]} " ; do
320+ # shellcheck disable=SC3010
321+ [[ " $file " == * .gz ]] && continue
322+ test -e " $file " || continue
323+
324+ fsize=$( stat --printf " %s" " $file " )
325+ if [ " $fsize " -gt " $MAX_FILE_SIZE " ]; then
326+ # Preserve directory structure so zip entry has the original path
327+ truncdir=" ${file%/* } "
328+ mkdir -p " .${truncdir} "
329+ mkfifo " .${file} "
330+ tail -c " $MAX_FILE_SIZE " " $file " > " .${file} " &
331+ tail_pid=$!
332+ zip -gDZ deflate --fifo " ${ZIP} " " .${file} "
333+ wait " $tail_pid " 2> /dev/null
334+ rm -f " .${file} "
335+ else
336+ zip -g -DZ deflate -u " ${ZIP} " " $file " -x ' *.sock'
337+ fi
338+
339+ FILE_SIZE=$( stat --printf " %s" " ${ZIP} " )
340+ if [ " $FILE_SIZE " -ge " $MAX_SIZE " ]; then
341+ echo " WARNING: ZIP file size $FILE_SIZE >= $MAX_SIZE ; stopping."
342+ break
303343 fi
304344done
305345
0 commit comments