Skip to content

Commit 41455e3

Browse files
godlygeekpablogsal
authored andcommitted
Reintroduce debuginfod deadlock workaround
While switching from referencing the system debuginfod client library to vendoring our own copy of it, I removed a hack that turns out to still be required for some libc versions, because I incorrectly thought it only applied to the case when debuginfod was being loaded with `dlopen`. Signed-off-by: Matt Wozniski <[email protected]>
1 parent 3c5a8c7 commit 41455e3

File tree

3 files changed

+60
-5
lines changed

3 files changed

+60
-5
lines changed

news/634.bugfix.rst

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Fix a deadlock that could occur on some Linux systems when resolving debug information using debuginfod.

src/vendor/libbacktrace/elf.c

+24-1
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,8 @@ elf_readlink (struct backtrace_state *state, const char *filename,
853853

854854
#define SYSTEM_BUILD_ID_DIR "/usr/lib/debug/.build-id/"
855855

856+
static int debuginfod_guard = 0;
857+
856858
static int
857859
elf_open_debugfile_by_debuginfod (const char *buildid_data,
858860
size_t buildid_size,
@@ -4455,7 +4457,7 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor,
44554457

44564458
d = elf_open_debugfile_by_buildid (state, buildid_data, buildid_size,
44574459
error_callback, data);
4458-
if (d < 0) {
4460+
if (d < 0 && !debuginfod_guard) {
44594461
char* env = getenv(DEBUGINFOD_PROGRESS_ENV_VAR);
44604462
if (env) {
44614463
fprintf(stderr, "Trying to download debuginfo for %s\n", filename);
@@ -4930,7 +4932,28 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
49304932
pd.exe_filename = filename;
49314933
pd.exe_descriptor = ret < 0 ? descriptor : -1;
49324934

4935+
/* Here, There Be Dragons: we are about to call dl_iterate_phdr,
4936+
which is a glibc-internal function that holds a libc internal
4937+
lock. As this function needs to iterate over all the loaded
4938+
modules, this lock is shared by dlopen so no new modules can be
4939+
loaded while is iterating. This is a problem for us, as the
4940+
debuginfod client will use libcurl to spawn threads to download
4941+
debuginfo files, and libcurl uses dlopen to load a bunch of stuff
4942+
for its backend in some versions. This can cause a deadlock because
4943+
the debuginfod client will wait until the libcurl threads finish but
4944+
they will never finish because they are waiting for the dlopen lock
4945+
to be released, which will not happen until the call to dl_iterate_phdr
4946+
finishes.
4947+
4948+
To avoid this, we use a global variable to detect if we are already
4949+
iterating over the modules, and if so, we skip the query to debuginfod
4950+
and just try with the other default methods.
4951+
4952+
Note this ONLY affects the symbol resolution when retrieving a backtrace,
4953+
and it doesn't affect offline symbolication. */
4954+
debuginfod_guard++;
49334955
dl_iterate_phdr (phdr_callback, (void *) &pd);
4956+
debuginfod_guard--;
49344957

49354958
if (!state->threaded)
49364959
{

src/vendor/libbacktrace_2446c66076480ce07a6bd868badcbceb3eeecc2e_debuginfod_patch.diff

+35-4
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,7 @@ index 0000000..78f4d8d
180180
+
181181
+#endif /* _DEBUGINFOD_CLIENT_H */
182182
diff --git a/elf.c b/elf.c
183-
index fb54165..0fcce38 100644
183+
index fb54165..2eb2546 100644
184184
--- a/elf.c
185185
+++ b/elf.c
186186
@@ -38,6 +38,7 @@ POSSIBILITY OF SUCH DAMAGE. */
@@ -199,10 +199,12 @@ index fb54165..0fcce38 100644
199199

200200
#ifndef S_ISLNK
201201
#ifndef S_IFLNK
202-
@@ -851,6 +853,37 @@ elf_readlink (struct backtrace_state *state, const char *filename,
202+
@@ -851,6 +853,39 @@ elf_readlink (struct backtrace_state *state, const char *filename,
203203

204204
#define SYSTEM_BUILD_ID_DIR "/usr/lib/debug/.build-id/"
205205

206+
+static int debuginfod_guard = 0;
207+
+
206208
+static int
207209
+elf_open_debugfile_by_debuginfod (const char *buildid_data,
208210
+ size_t buildid_size,
@@ -237,11 +239,11 @@ index fb54165..0fcce38 100644
237239
/* Open a separate debug info file, using the build ID to find it.
238240
Returns an open file descriptor, or -1.
239241

240-
@@ -4422,6 +4455,14 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor,
242+
@@ -4422,6 +4457,14 @@ elf_add (struct backtrace_state *state, const char *filename, int descriptor,
241243

242244
d = elf_open_debugfile_by_buildid (state, buildid_data, buildid_size,
243245
error_callback, data);
244-
+ if (d < 0) {
246+
+ if (d < 0 && !debuginfod_guard) {
245247
+ char* env = getenv(DEBUGINFOD_PROGRESS_ENV_VAR);
246248
+ if (env) {
247249
+ fprintf(stderr, "Trying to download debuginfo for %s\n", filename);
@@ -252,3 +254,32 @@ index fb54165..0fcce38 100644
252254
if (d >= 0)
253255
{
254256
int ret;
257+
@@ -4889,7 +4932,28 @@ backtrace_initialize (struct backtrace_state *state, const char *filename,
258+
pd.exe_filename = filename;
259+
pd.exe_descriptor = ret < 0 ? descriptor : -1;
260+
261+
+ /* Here, There Be Dragons: we are about to call dl_iterate_phdr,
262+
+ which is a glibc-internal function that holds a libc internal
263+
+ lock. As this function needs to iterate over all the loaded
264+
+ modules, this lock is shared by dlopen so no new modules can be
265+
+ loaded while is iterating. This is a problem for us, as the
266+
+ debuginfod client will use libcurl to spawn threads to download
267+
+ debuginfo files, and libcurl uses dlopen to load a bunch of stuff
268+
+ for its backend in some versions. This can cause a deadlock because
269+
+ the debuginfod client will wait until the libcurl threads finish but
270+
+ they will never finish because they are waiting for the dlopen lock
271+
+ to be released, which will not happen until the call to dl_iterate_phdr
272+
+ finishes.
273+
+
274+
+ To avoid this, we use a global variable to detect if we are already
275+
+ iterating over the modules, and if so, we skip the query to debuginfod
276+
+ and just try with the other default methods.
277+
+
278+
+ Note this ONLY affects the symbol resolution when retrieving a backtrace,
279+
+ and it doesn't affect offline symbolication. */
280+
+ debuginfod_guard++;
281+
dl_iterate_phdr (phdr_callback, (void *) &pd);
282+
+ debuginfod_guard--;
283+
284+
if (!state->threaded)
285+
{

0 commit comments

Comments
 (0)