Skip to content

Commit 900531f

Browse files
committed
fix: improve podman stop/rm timeout workaround with accurate cgroup detection
Instead of checking PidMode=host, now check if crun's cgroup-path is actually empty. This is more accurate because: - With systemd cgroup manager (e.g., Fedora), cgroup delegation works even with --pid host, so the workaround is not needed - With cgroupfs manager, cgroup-path may be empty regardless of PidMode Changes: - Check crun status file for empty cgroup-path instead of PidMode - Move warning from distrobox-create to distrobox-stop/rm (more accurate) - Show warning only when workaround is actually applied - Simplify sed command (single sed instead of grep|sed) The workaround is applied proactively (before stop/rm) rather than as a fallback, to avoid masking other potential failures. Signed-off-by: xz-dev <xiangzhedev@gmail.com>
1 parent dc98f7a commit 900531f

File tree

3 files changed

+51
-25
lines changed

3 files changed

+51
-25
lines changed

distrobox-create

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -692,13 +692,6 @@ generate_create_command()
692692
if [ "${unshare_process}" -eq 0 ]; then
693693
result_command="${result_command}
694694
--pid host"
695-
# Warn about --pid host limitation in rootless podman mode.
696-
# See: https://github.com/containers/podman/issues/11888
697-
if [ "${rootful}" -eq 0 ] && echo "${container_manager}" | grep -q "podman"; then
698-
printf >&2 "Warning: using --pid host with rootless podman.\n"
699-
printf >&2 "Warning: orphaned child processes may remain after container stop.\n"
700-
printf >&2 "Warning: consider using --unshare-process for full process cleanup.\n"
701-
fi
702695
fi
703696
# Mount useful stuff inside the container.
704697
# We also mount host's root filesystem to /run/host, to be able to syphon

distrobox-rm

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -389,26 +389,41 @@ delete_container()
389389

390390
# Remove the container
391391
printf "Removing container...\n"
392-
# Workaround for podman rm --force timeout issue with --pid host in rootless mode.
392+
# Workaround for podman rm --force timeout issue when cgroup-path is empty.
393393
# See: https://github.com/chimera-linux/cports/issues/1718
394394
#
395-
# Root cause: In rootless mode with --pid host (distrobox default), the container's
396-
# cgroup-path is empty. When podman rm --force tries to stop the container, it uses
397-
# "crun kill --all" which relies on cgroup to find processes. With empty cgroup-path,
398-
# no processes are found and killed, causing the stop to timeout.
395+
# Root cause: In rootless mode, crun may fail to create a cgroup for the
396+
# container (e.g., with cgroupfs manager or --pid host). When cgroup-path
397+
# is empty, "crun kill --all" cannot enumerate processes via cgroup, causing
398+
# podman rm --force to timeout. With systemd cgroup manager (e.g., Fedora),
399+
# cgroup delegation works and this issue doesn't occur.
399400
#
400-
# Solution: Use "podman kill" first, which sends signal directly to the
401-
# container's init process PID, bypassing the cgroup lookup issue.
401+
# Solution: Check if crun's cgroup-path is empty. If so, use "podman kill"
402+
# first, which sends signal directly to the container's init process PID,
403+
# bypassing the cgroup lookup issue.
404+
#
405+
# Why not try rm --force first then fallback to kill? Because that could mask
406+
# other failures - we want to apply the workaround only when we know the
407+
# specific condition (empty cgroup-path) that causes the timeout.
402408
#
403409
# Note: This only kills the init process. Processes started via "podman exec"
404410
# (e.g., distrobox-enter) run in separate process groups and will become
405411
# orphaned. Daemonized processes (setsid/double-fork) also cannot be tracked.
406412
# Use --unshare-process when creating the container for full process cleanup.
407413
# See: https://github.com/containers/podman/issues/11888
408414
# distrobox-rm does not call distrobox-stop by design; a similar fix exists there.
409-
if [ "${container_status}" = "running" ] && [ "${rootful}" -eq 0 ] && echo "${container_manager}" | grep -q "podman" &&
410-
[ "$(${container_manager} inspect --format '{{.HostConfig.PidMode}}' "${container_name}" 2> /dev/null)" = "host" ]; then
411-
${container_manager} kill "${container_name}" > /dev/null 2>&1 || :
415+
if [ "${container_status}" = "running" ] && [ "${rootful}" -eq 0 ] && echo "${container_manager}" | grep -q "podman"; then
416+
container_id=$(${container_manager} inspect --format '{{.Id}}' "${container_name}" 2> /dev/null)
417+
crun_status="/run/user/$(id -u)/crun/${container_id}/status"
418+
if [ -f "${crun_status}" ]; then
419+
cgroup_path=$(sed -n 's/.*"cgroup-path": "\([^"]*\)".*/\1/p' "${crun_status}" 2>/dev/null)
420+
if [ -z "${cgroup_path}" ]; then
421+
${container_manager} kill "${container_name}" > /dev/null 2>&1 || :
422+
printf >&2 "Warning: container was created with --pid host and cgroup is not available.\n"
423+
printf >&2 "Warning: some child processes may remain running (orphaned).\n"
424+
printf >&2 "Warning: use --unshare-process when creating containers for full cleanup.\n"
425+
fi
426+
fi
412427
fi
413428
# shellcheck disable=SC2086,SC2248
414429
${container_manager} rm ${force_flag} --volumes "${container_name}"

distrobox-stop

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -290,22 +290,40 @@ case "${response}" in
290290
y | Y | Yes | yes | YES)
291291
# Stop the container
292292
for container_name in ${container_name_list}; do
293-
# Workaround for podman stop timeout issue with --pid host in rootless mode.
293+
# Workaround for podman stop timeout issue when cgroup-path is empty.
294294
# See: https://github.com/chimera-linux/cports/issues/1718
295-
# In rootless mode, podman stop uses "crun kill --all" which fails when
296-
# cgroup-path is empty (which happens with --pid host, the distrobox default).
297295
#
298-
# Solution: Use "podman kill" first, which sends signal directly to the
299-
# container's init process PID, bypassing the cgroup lookup issue.
296+
# Root cause: In rootless mode, crun may fail to create a cgroup for the
297+
# container (e.g., with cgroupfs manager or --pid host). When cgroup-path
298+
# is empty, "crun kill --all" cannot enumerate processes via cgroup, causing
299+
# podman stop to timeout. With systemd cgroup manager (e.g., Fedora), cgroup
300+
# delegation works and this issue doesn't occur.
301+
#
302+
# Solution: Check if crun's cgroup-path is empty. If so, use "podman kill"
303+
# first, which sends signal directly to the container's init process PID,
304+
# bypassing the cgroup lookup issue.
305+
#
306+
# Why not try stop first then fallback to kill? Because that could mask
307+
# other failures - we want to apply the workaround only when we know the
308+
# specific condition (empty cgroup-path) that causes the timeout.
300309
#
301310
# Note: This only kills the init process. Processes started via "podman exec"
302311
# (e.g., distrobox-enter) run in separate process groups and will become
303312
# orphaned. Daemonized processes (setsid/double-fork) also cannot be tracked.
304313
# Use --unshare-process when creating the container for full process cleanup.
305314
# See: https://github.com/containers/podman/issues/11888
306-
if [ "${rootful}" -eq 0 ] && echo "${container_manager}" | grep -q "podman" &&
307-
[ "$(${container_manager} inspect --format '{{.HostConfig.PidMode}}' "${container_name}" 2> /dev/null)" = "host" ]; then
308-
${container_manager} kill "${container_name}" 2> /dev/null || :
315+
if [ "${rootful}" -eq 0 ] && echo "${container_manager}" | grep -q "podman"; then
316+
container_id=$(${container_manager} inspect --format '{{.Id}}' "${container_name}" 2> /dev/null)
317+
crun_status="/run/user/$(id -u)/crun/${container_id}/status"
318+
if [ -f "${crun_status}" ]; then
319+
cgroup_path=$(sed -n 's/.*"cgroup-path": "\([^"]*\)".*/\1/p' "${crun_status}" 2>/dev/null)
320+
if [ -z "${cgroup_path}" ]; then
321+
${container_manager} kill "${container_name}" 2> /dev/null || :
322+
printf >&2 "Warning: container was created with --pid host and cgroup is not available.\n"
323+
printf >&2 "Warning: some child processes may remain running (orphaned).\n"
324+
printf >&2 "Warning: use --unshare-process when creating containers for full cleanup.\n"
325+
fi
326+
fi
309327
fi
310328
${container_manager} stop "${container_name}" 2> /dev/null || :
311329
done

0 commit comments

Comments
 (0)