Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions scripts/skupper-gather.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,102 @@ get_log_collection_args() {
fi
}

function getPodResources() {
local ns="${1}"
local podName="${2}"
local container="${3}"
local podDirName="${podName##"pod/"}"

local resourcePath=${BASE_COLLECTION_PATH}/namespaces/${ns}/pods/${podDirName}/${container}/${container}/resources
mkdir -p "${resourcePath}"

echo "Collecting resource usage for pod ${podName} in ${ns}"

# Memory Info
oc exec -n "${ns}" "${podName}" -c "${container}" -- bash -c '
curr=$(cat /sys/fs/cgroup/memory.current)
max=$(cat /sys/fs/cgroup/memory.max)
curr_kb=$((curr / 1024))
curr_mb=$((curr / 1024 / 1024))

if [ "$max" = "max" ]; then
limit_str="max (uncapped)"
else
max_kb=$((max / 1024))
max_mb=$((max / 1024 / 1024))
limit_str="$max bytes ($max_kb KiB / $max_mb MiB)"
fi

echo "Usage: $curr bytes ($curr_kb KiB / $curr_mb MiB) - Limit: $limit_str" > /tmp/memory.info
' 2>/dev/null
oc exec -n "${ns}" "${podName}" -c "${container}" -- cat /tmp/memory.info > "${resourcePath}/memory.info" 2>&1

# Get previous container termination info
oc get pod "${podName##"pod/"}" -n "${ns}" -o jsonpath="{.status.containerStatuses[?(@.name=='${container}')].lastState}" > "${resourcePath}/last.terminated" 2>/dev/null
if [ ! -s "${resourcePath}/last.terminated" ]; then
echo "No previous termination recorded" > "${resourcePath}/last.terminated"
fi

# Memory pressure
oc exec -n "${ns}" "${podName}" -c "${container}" -- bash -c '
if [ -f /sys/fs/cgroup/memory.pressure ]; then
cat /sys/fs/cgroup/memory.pressure
else
echo "PSI not available on this node (kernel psi=1 not enabled)"
fi
' > "${resourcePath}/memory.pressure" 2>&1

# CPU usage
oc exec -n "${ns}" "${podName}" -c "${container}" -- bash -c "
declare -A cpu_descriptions
cpu_descriptions=(
[usage_usec]='Total CPU time consumed (user + system)'
[user_usec]='Time spent in user-space (app logic)'
[system_usec]='Time spent in kernel-space (syscalls, I/O)'
[core_sched.force_idle_usec]='Time CPU forced idle for security (cross-HT side-channel protection)'
[nr_periods]='Number of CPU quota enforcement periods (0 = no quota set)'
[nr_throttled]='Times the container was throttled for exceeding CPU quota'
[throttled_usec]='Total time spent throttled/waiting for CPU quota'
[nr_bursts]='Times container used burst CPU capacity beyond quota'
[burst_usec]='Total time spent using burst CPU capacity'
)

cat /sys/fs/cgroup/cpu.stat | while read line; do
val=\$(echo \$line | awk '{print \$2}')
key=\$(echo \$line | awk '{print \$1}')
desc=\${cpu_descriptions[\$key]}
if [[ \$key == *_usec ]] && [ \$val -ge 1000000 ]; then
sec=\$(awk \"BEGIN {printf \\\"%.2f\\\", \$val/1000000}\")
echo \"\$line (\$sec s) | \$desc\"
else
echo \"\$line | \$desc\"
fi
done
" > "${resourcePath}/cpu.stat" 2>&1

# CPU limit
oc exec -n "${ns}" "${podName}" -c "${container}" -- bash -c '
cpu_max=$(cat /sys/fs/cgroup/cpu.max)
quota=$(echo $cpu_max | awk "{print \$1}")
period=$(echo $cpu_max | awk "{print \$2}")
if [ "$quota" = "max" ]; then
echo "CPU Limit: uncapped (no quota set)"
else
cores=$(awk "BEGIN {printf \"%.2f\", $quota/$period}")
echo "CPU Limit: $quota/$period us = $cores cores"
fi
' > "${resourcePath}/cpu.max" 2>&1

# CPU pressure
oc exec -n "${ns}" "${podName}" -c "${container}" -- bash -c '
if [ -f /sys/fs/cgroup/cpu.pressure ]; then
cat /sys/fs/cgroup/cpu.pressure
else
echo "PSI not available on this node (kernel psi=1 not enabled)"
fi
' > "${resourcePath}/cpu.pressure" 2>&1
}

function getSkupperRouterSkstatInNamespace() {
local routerNamespace="${1}"
local flags=("g" "c" "l" "n" "e" "a" "m" "p")
Expand All @@ -28,6 +124,7 @@ function getSkupperRouterSkstatInNamespace() {
for flag in "${flags[@]}"; do
oc exec -n "${sitens}" "${routerName}" -c router -- skstat -"${flag}" > "${logPath}/skstat.${flag}" 2>&1
done
getPodResources "${sitens}" "${routerName}" "router"
done
}

Expand Down
Loading