Skip to content

Commit 21499c3

Browse files
committed
DEBUG #4054 clang CI: capture stacks on homing timeout + abort
Three debug-only additions to diagnose the rip-and-test-clang failure (homing timeout after 60s + Fatal glibc pthread mutex assertion on shutdown) which my local docker cannot reproduce: - launch.sh: PYTHONFAULTHANDLER=1, ulimit -c unlimited, LIBC_FATAL_ STDERR_=1, MALLOC_CHECK_=3 so SIGABRT/SIGSEGV in any Python child prints a Python+native stack to stderr (visible via linuxcnc.err). - qtvcp.py: faulthandler.enable() + register on SIGUSR1 so the smoke driver can dump qtvcp's interpreter stack without killing it. - drive.py: on homing timeout, dump per-joint state, halui machine pin, locate qtvcp processes and send SIGUSR1; sleep briefly so the stack dump lands in the log before we tear down. Will be reverted once the clang-only failure mode is understood.
1 parent d1d28b8 commit 21499c3

3 files changed

Lines changed: 129 additions & 0 deletions

File tree

src/emc/usr_intf/qtvcp/qtvcp.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@
88
import signal
99
import subprocess
1010

11+
# DEBUG #4054 clang CI bisect: faulthandler.enable() was a candidate
12+
# masker of the qtvcp:481 Abort/Segfault. Temporarily disabled to test.
13+
# try:
14+
# import faulthandler
15+
# faulthandler.enable()
16+
# faulthandler.register(signal.SIGUSR1, chain=False)
17+
# except Exception:
18+
# pass
19+
1120

1221
if '--force_pyqt=6' in sys.argv:
1322
os.environ["QT_API"] = "pyqt6"

tests/ui-smoke/_lib/drive.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
import argparse
1515
import linuxcnc
16+
import os
1617
import sys
1718
import time
1819

@@ -130,6 +131,41 @@ def home_all(cmd, stat, timeout):
130131
f"{timeout}s; homed={list(stat.homed[:njoints])} "
131132
f"task_state={stat.task_state} task_mode={stat.task_mode} "
132133
f"exec_state={stat.exec_state} njoints={njoints}\n")
134+
# DEBUG #4054 clang CI: dump per-joint state + halcmd snapshot +
135+
# signal qtvcp processes to dump their Python stacks via faulthandler.
136+
try:
137+
for i in range(njoints):
138+
j = stat.joint[i]
139+
sys.stderr.write(
140+
f"DEBUG joint[{i}]: homed={j['homed']} homing={j['homing']} "
141+
f"enabled={j['enabled']} inpos={j['inpos']} fault={j['fault']} "
142+
f"min_hard_limit={j['min_hard_limit']} max_hard_limit={j['max_hard_limit']} "
143+
f"min_soft_limit={j['min_soft_limit']} max_soft_limit={j['max_soft_limit']}\n")
144+
sys.stderr.write(
145+
f"DEBUG axis_mask={stat.axis_mask} kinematics_type={stat.kinematics_type} "
146+
f"motion_mode={stat.motion_mode} interp_state={stat.interp_state} "
147+
f"estop={stat.estop} enabled={stat.enabled} homed_all={stat.homed}\n")
148+
except Exception as e:
149+
sys.stderr.write(f"DEBUG joint dump failed: {e}\n")
150+
import subprocess
151+
for args in (["halcmd", "show", "pin", "halui.machine"],
152+
["halcmd", "show", "pin", "joint.0"],
153+
["halcmd", "show", "param", "joint.0"],
154+
["halcmd", "show", "sig"]):
155+
try:
156+
out = subprocess.check_output(
157+
args, stderr=subprocess.STDOUT, timeout=5).decode()
158+
sys.stderr.write(f"DEBUG {' '.join(args)}:\n{out}\n")
159+
except Exception as e:
160+
sys.stderr.write(f"DEBUG {' '.join(args)} failed: {e}\n")
161+
qlog = os.path.expanduser("~/qtdragon.log")
162+
if os.path.exists(qlog):
163+
try:
164+
with open(qlog) as f:
165+
tail = f.readlines()[-50:]
166+
sys.stderr.write(f"DEBUG qtdragon.log tail:\n{''.join(tail)}\n")
167+
except Exception as e:
168+
sys.stderr.write(f"DEBUG read {qlog} failed: {e}\n")
133169
return False
134170

135171

tests/ui-smoke/_lib/launch.sh

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,90 @@ export GST_PLUGIN_FEATURE_RANK="pulsesink:NONE,alsasink:NONE,osssink:NONE,oss4si
5656
export PULSE_SERVER=/dev/null
5757
export SDL_AUDIODRIVER=dummy
5858

59+
# Provide a private dbus session bus AND a notification daemon. GitHub
60+
# runners have neither, so qtvcp's sys_notify gets a connection to a
61+
# bus that does not own org.freedesktop.Notifications: the
62+
# ServiceUnknown error path leaves a half-initialized connection that
63+
# segfaults inside libdbus dispatch on a later Qt event-loop tick.
64+
# Installing notification-daemon gives the bus a service file for
65+
# org.freedesktop.Notifications, so dbus-daemon auto-activates it on
66+
# first lookup and sys_notify completes the wire-up cleanly. Keeps the
67+
# smoke test representative of a real desktop session.
68+
NEED_DBUS_PKGS=""
69+
command -v dbus-launch >/dev/null 2>&1 || NEED_DBUS_PKGS="dbus-x11"
70+
[ -f /usr/share/dbus-1/services/org.freedesktop.Notifications.service ] \
71+
|| NEED_DBUS_PKGS="$NEED_DBUS_PKGS notification-daemon"
72+
if [ -n "$NEED_DBUS_PKGS" ]; then
73+
echo "DEBUG DBUS installing$NEED_DBUS_PKGS via apt" >&2
74+
# shellcheck disable=SC2086
75+
sudo apt-get install -y --no-install-recommends $NEED_DBUS_PKGS 2>&1 | tail -3 >&2 || true
76+
fi
77+
if command -v dbus-launch >/dev/null 2>&1; then
78+
eval "$(dbus-launch --sh-syntax)"
79+
export DBUS_SESSION_BUS_ADDRESS DBUS_SESSION_BUS_PID
80+
echo "DEBUG DBUS session bus pid=$DBUS_SESSION_BUS_PID addr=$DBUS_SESSION_BUS_ADDRESS" >&2
81+
if [ -f /usr/share/dbus-1/services/org.freedesktop.Notifications.service ]; then
82+
echo "DEBUG DBUS notification-daemon service file present (auto-activate on demand)" >&2
83+
else
84+
echo "DEBUG DBUS notification-daemon service file MISSING" >&2
85+
fi
86+
trap 'kill "$DBUS_SESSION_BUS_PID" 2>/dev/null || true' EXIT
87+
else
88+
echo "DEBUG DBUS dbus-launch unavailable, leaving DBUS_SESSION_BUS_ADDRESS unset" >&2
89+
fi
90+
91+
# DEBUG #4054 clang CI bisect: PYTHONFAULTHANDLER was a candidate masker
92+
# of the qtvcp:481 Abort/Segfault. Disabled here for the test.
93+
# export PYTHONFAULTHANDLER=1
94+
# DEBUG #4054 clang CI: enable cores + glibc abort verbosity.
95+
ulimit -c unlimited
96+
export LIBC_FATAL_STDERR_=1
97+
98+
# DEBUG #4054 clang CI: wrap qtvcp under gdb --batch so a SIGABRT/SIGSEGV
99+
# inside the Qt event loop (qtvcp.py:481 APP.exec()) gets a C-level
100+
# backtrace. scripts/linuxcnc prepends LINUXCNC_BIN_DIR to PATH so a
101+
# temp-dir PATH shadow gets bypassed; instead replace bin/qtvcp in place
102+
# with a wrapper, original at bin/qtvcp.real. bin/.gitignore excludes
103+
# everything in bin/, so verify-clean-repo does not see the swap.
104+
REAL_QTVCP_PATH="$(command -v qtvcp || true)"
105+
if ! command -v gdb >/dev/null 2>&1; then
106+
echo "DEBUG GDBWRAP installing gdb via apt" >&2
107+
sudo apt-get install -y --no-install-recommends gdb 2>&1 | tail -3 >&2 || true
108+
fi
109+
echo "DEBUG GDBWRAP REAL_QTVCP_PATH=$REAL_QTVCP_PATH" >&2
110+
echo "DEBUG GDBWRAP have_gdb=$(command -v gdb || echo NO)" >&2
111+
if [ -z "$REAL_QTVCP_PATH" ]; then
112+
echo "DEBUG GDBWRAP skipped: qtvcp not found on PATH=$PATH" >&2
113+
elif ! command -v gdb >/dev/null 2>&1; then
114+
echo "DEBUG GDBWRAP skipped: gdb missing" >&2
115+
elif [ ! -w "$(dirname "$REAL_QTVCP_PATH")" ]; then
116+
echo "DEBUG GDBWRAP skipped: $(dirname "$REAL_QTVCP_PATH") not writable" >&2
117+
else
118+
if [ ! -f "${REAL_QTVCP_PATH}.real" ]; then
119+
mv "$REAL_QTVCP_PATH" "${REAL_QTVCP_PATH}.real"
120+
fi
121+
cat >"$REAL_QTVCP_PATH" <<WRAP
122+
#!/bin/bash
123+
echo "DEBUG GDBWRAP active, args: \$*" >&2
124+
exec gdb -batch -nx \\
125+
-ex 'set pagination off' \\
126+
-ex 'handle SIG33 nostop noprint pass' \\
127+
-ex 'handle SIGCHLD nostop noprint pass' \\
128+
-ex 'handle SIGPIPE nostop noprint pass' \\
129+
-ex 'handle SIGABRT stop print nopass' \\
130+
-ex 'handle SIGSEGV stop print nopass' \\
131+
-ex run \\
132+
-ex 'echo \n=== signal caught, dumping all-thread backtrace ===\n' \\
133+
-ex 'thread apply all bt' \\
134+
-ex 'echo \n=== current frame source context ===\n' \\
135+
-ex 'frame' \\
136+
-ex 'list' \\
137+
--args /usr/bin/python3 "${REAL_QTVCP_PATH}.real" "\$@"
138+
WRAP
139+
chmod +x "$REAL_QTVCP_PATH"
140+
echo "DEBUG GDBWRAP wrapper installed at $REAL_QTVCP_PATH (original at ${REAL_QTVCP_PATH}.real)" >&2
141+
fi
142+
59143
# Export the per-invocation values so the inner bash -c receives them
60144
# as proper env vars (avoids embedding paths into the inner script
61145
# via quoting, which breaks on apostrophes / spaces).

0 commit comments

Comments
 (0)