-
Notifications
You must be signed in to change notification settings - Fork 162
Expand file tree
/
Copy pathlddtree.py
More file actions
622 lines (540 loc) · 20.2 KB
/
lddtree.py
File metadata and controls
622 lines (540 loc) · 20.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
# Copyright 2016 Robert McGibbon
# Copyright 2012-2014 Gentoo Foundation
# Copyright 2012-2014 Mike Frysinger <vapier@gentoo.org>
# Copyright 2012-2014 The Chromium OS Authors
# Use of this source code is governed by a BSD-style license (BSD-3)
# Original version available from:
# https://sources.gentoo.org/cgi-bin/viewvc.cgi/gentoo-projects/pax-utils/lddtree.py
"""Read the ELF dependency tree
This does not work like `ldd` in that we do not execute/load code (only read
files on disk).
"""
from __future__ import annotations
import errno
import functools
import glob
import logging
import os
import re
from dataclasses import dataclass
from fnmatch import fnmatch
from pathlib import Path
from elftools.elf.constants import E_FLAGS
from elftools.elf.elffile import ELFFile
from elftools.elf.sections import NoteSection
from auditwheel.architecture import Architecture
from auditwheel.error import InvalidLibcError
from auditwheel.libc import Libc
log = logging.getLogger(__name__)
__all__ = ["LIBPYTHON_RE", "DynamicExecutable", "DynamicLibrary", "ldd"]
# Regex to match libpython shared library names
LIBPYTHON_RE = re.compile(r"^libpython\d+\.\d+m?.so(\.\d)*$")
@dataclass(frozen=True)
class Platform:
_elf_osabi: str
_elf_class: int
_elf_little_endian: bool
_elf_machine: str
_base_arch: Architecture | None
_ext_arch: Architecture | None
_error_msg: str | None
def is_compatible(self, other: Platform) -> bool:
os_abis = frozenset((self._elf_osabi, other._elf_osabi))
compat_sets = (frozenset(f"ELFOSABI_{x}" for x in ("NONE", "SYSV", "GNU", "LINUX")),)
return (
(len(os_abis) == 1 or any(os_abis.issubset(x) for x in compat_sets))
and self._elf_class == other._elf_class
and self._elf_little_endian == other._elf_little_endian
and self._elf_machine == other._elf_machine
)
@property
def baseline_architecture(self) -> Architecture:
if self._base_arch is not None:
return self._base_arch
raise ValueError(self._error_msg)
@property
def extended_architecture(self) -> Architecture | None:
if self._error_msg is not None:
raise ValueError(self._error_msg)
return self._ext_arch
@dataclass(frozen=True)
class DynamicLibrary:
soname: str
path: str | None
realpath: Path | None
platform: Platform | None = None
needed: tuple[str, ...] = ()
@dataclass(frozen=True)
class DynamicExecutable:
interpreter: str | None
libc: Libc | None
path: str
realpath: Path
platform: Platform
needed: tuple[str, ...]
rpath: tuple[str, ...]
runpath: tuple[str, ...]
libraries: dict[str, DynamicLibrary]
def _get_platform(elf: ELFFile) -> Platform:
elf_osabi = elf.header["e_ident"]["EI_OSABI"]
elf_class = elf.elfclass
elf_little_endian = elf.little_endian
elf_machine = elf["e_machine"]
base_arch = {
("EM_386", 32, True): Architecture.i686,
("EM_X86_64", 64, True): Architecture.x86_64,
("EM_PPC64", 64, True): Architecture.ppc64le,
("EM_PPC64", 64, False): Architecture.ppc64,
("EM_RISCV", 64, True): Architecture.riscv64,
("EM_AARCH64", 64, True): Architecture.aarch64,
("EM_S390", 64, False): Architecture.s390x,
("EM_ARM", 32, True): Architecture.armv7l,
("EM_LOONGARCH", 64, True): Architecture.loongarch64,
}.get((elf_machine, elf_class, elf_little_endian), None)
ext_arch: Architecture | None = None
error_msg: str | None = None
flags = elf["e_flags"]
assert base_arch is None or base_arch.baseline == base_arch # noqa: S101
if base_arch is None:
error_msg = "Unknown architecture"
elif base_arch == Architecture.x86_64:
for section in elf.iter_sections():
if not isinstance(section, NoteSection):
continue
for note in section.iter_notes():
if note["n_type"] != "NT_GNU_PROPERTY_TYPE_0":
continue
if note["n_name"] != "GNU":
continue
for prop in note["n_desc"]:
if prop.pr_type != "GNU_PROPERTY_X86_ISA_1_NEEDED":
continue
if prop.pr_datasz != 4:
continue
data = prop.pr_data
data -= data & 1 # clear baseline
if data & 8 == 8:
ext_arch = Architecture.x86_64_v4
break
if data & 4 == 4:
ext_arch = Architecture.x86_64_v3
break
if data & 2 == 2:
ext_arch = Architecture.x86_64_v2
break
if data != 0:
error_msg = "unknown x86_64 ISA"
break
elif base_arch == Architecture.armv7l:
if (flags & E_FLAGS.EF_ARM_EABIMASK) != E_FLAGS.EF_ARM_EABI_VER5:
error_msg = "Invalid ARM EABI version for armv7l"
elif (flags & E_FLAGS.EF_ARM_ABI_FLOAT_HARD) != E_FLAGS.EF_ARM_ABI_FLOAT_HARD:
error_msg = "armv7l shall use hard-float"
if error_msg is not None:
base_arch = None
return Platform(
elf_osabi,
elf_class,
elf_little_endian,
elf_machine,
base_arch,
ext_arch,
error_msg,
)
def normpath(path: str) -> str:
"""Normalize a path
Python's os.path.normpath() doesn't handle some cases:
// -> //
//..// -> //
//..//..// -> ///
"""
return os.path.normpath(path).replace("//", "/")
def readlink(path: str, root: str, *, prefixed: bool = False) -> str:
"""Like os.readlink(), but relative to a ``root``
This does not currently handle the pathological case:
/lib/foo.so -> ../../../../../../../foo.so
This relies on the .. entries in / to point to itself.
Parameters
----------
path
The symlink to read
root
The path to use for resolving absolute symlinks
prefixed
When False, the ``path`` must not have ``root`` prefixed to it, nor
will the return value have ``root`` prefixed. When True, ``path``
must have ``root`` prefixed, and the return value will have ``root``
added.
Returns
-------
A fully resolved symlink path
"""
root = root.rstrip("/")
if prefixed:
path = path[len(root) :]
while os.path.islink(root + path):
path = os.path.join(os.path.dirname(path), os.readlink(root + path))
return normpath((root + path) if prefixed else path)
def dedupe(items: list[str]) -> list[str]:
"""Remove all duplicates from ``items`` (keeping order)"""
seen: dict[str, str] = {}
return [seen.setdefault(x, x) for x in items if x not in seen]
def parse_ld_paths(
str_ldpaths: str,
path: str,
root: str = "",
keep_non_exist: bool = False, # noqa: FBT001, FBT002
) -> list[str]:
"""Parse the colon-delimited list of paths and apply ldso rules to each
Note the special handling as dictated by the ldso:
- Empty paths are equivalent to $PWD
- $ORIGIN is expanded to the path of the given file
- (TODO) $LIB and friends
Parameters
----------
str_ldpaths
A colon-delimited string of paths
root
The path to prepend to all paths found
path
The object actively being parsed (used for $ORIGIN)
keep_non_exist
Do not eliminate non-exist rpath from result
Returns
-------
list of processed paths
"""
ldpaths: list[str] = []
for ldpath in str_ldpaths.split(":"):
if ldpath == "":
# The ldso treats "" paths as $PWD.
ldpath_ = os.getcwd()
elif "$ORIGIN" in ldpath:
ldpath_ = ldpath.replace("$ORIGIN", os.path.dirname(os.path.abspath(path)))
else:
ldpath_ = root + ldpath
ldpaths.append(normpath(ldpath_))
return [p for p in dedupe(ldpaths) if keep_non_exist or os.path.isdir(p)]
@functools.lru_cache
def parse_ld_so_conf(ldso_conf: str, *, root: str = "/", _first: bool = True) -> list[str]:
"""Load all the paths from a given ldso config file
This should handle comments, whitespace, and "include" statements.
Parameters
----------
ldso_conf
The file to scan
root
The path to prepend to all paths found
_first
Recursive use only; is this the first ELF?
Returns
-------
list of paths found
"""
paths: list[str] = []
dbg_pfx = "" if _first else " "
try:
log.debug("%sparse_ld_so_conf(%s)", dbg_pfx, ldso_conf)
with open(ldso_conf) as f:
for input_line in f:
line = input_line.split("#", 1)[0].strip()
if not line:
continue
if line.startswith("include "):
line = line[8:]
if line[0] == "/":
line = root + line.lstrip("/")
else:
line = os.path.dirname(ldso_conf) + "/" + line
log.debug("%s glob: %s", dbg_pfx, line)
for path in glob.glob(line):
paths += parse_ld_so_conf(path, root=root, _first=False)
else:
paths += [normpath(root + line)]
except OSError as e:
if e.errno != errno.ENOENT:
log.warning(e)
if _first:
# TODO: Load paths from ldso itself.
# Remove duplicate entries to speed things up.
paths = [p for p in dedupe(paths) if os.path.isdir(p)]
return paths
@functools.lru_cache
def load_ld_paths(
libc: Libc | None,
root: str = "/",
prefix: str = "",
) -> dict[str, list[str]]:
"""Load linker paths from common locations
This parses the ld.so.conf and LD_LIBRARY_PATH env var.
Parameters
----------
root
The root tree to prepend to paths
prefix
The path under ``root`` to search
Returns
-------
dict containing library paths to search
"""
ldpaths: dict[str, list[str]] = {"conf": [], "env": [], "interp": []}
# Load up $LD_LIBRARY_PATH.
env_ldpath = os.environ.get("LD_LIBRARY_PATH")
if env_ldpath is not None:
if root != "/":
log.warning("ignoring LD_LIBRARY_PATH due to ROOT usage")
else:
# TODO: If this contains $ORIGIN, we probably have to parse this
# on a per-ELF basis so it can get turned into the right thing.
ldpaths["env"] = parse_ld_paths(env_ldpath, path="")
if libc == Libc.MUSL:
# from https://git.musl-libc.org/cgit/musl/tree/ldso
# /dynlink.c?id=3f701faace7addc75d16dea8a6cd769fa5b3f260#n1063
root_prefix = Path(root) / prefix
ld_musl = list((root_prefix / "etc").glob("ld-musl-*.path"))
assert len(ld_musl) <= 1 # noqa: S101
if len(ld_musl) == 0:
ldpaths["conf"] = [
root + "/lib",
root + "/usr/local/lib",
root + "/usr/lib",
]
else:
ldpaths["conf"] = []
for ldpath in ld_musl[0].read_text().split(":"):
ldpath_stripped = ldpath.strip()
if ldpath_stripped == "":
continue
ldpaths["conf"].append(root + ldpath_stripped)
else:
# Load up /etc/ld.so.conf.
ldpaths["conf"] = parse_ld_so_conf(root + prefix + "/etc/ld.so.conf", root=root)
# the trusted directories are not necessarily in ld.so.conf
ldpaths["conf"].extend(["/lib", "/lib64/", "/usr/lib", "/usr/lib64"])
log.debug("linker ldpaths: %s", ldpaths)
return ldpaths
def find_lib(
platform: Platform,
lib: str,
ldpaths: list[str],
root: str = "/",
) -> tuple[Path | None, str | None]:
"""Try to locate a ``lib`` that is compatible to ``elf`` in the given
``ldpaths``
Parameters
----------
platform : Platform
The platform which the library should be compatible with (ELF wise)
lib : str
The library (basename) to search for
ldpaths : list[str]
A list of paths to search
root : str
The root path to resolve symlinks
Returns
-------
Tuple of the full path to the desired library and the real path to it
"""
for ldpath in ldpaths:
path = os.path.join(ldpath, lib)
target = Path(readlink(path, root, prefixed=True))
if target.exists():
with open(target, "rb") as f:
libelf = ELFFile(f)
if platform.is_compatible(_get_platform(libelf)):
return target, path
return None, None
def ldd(
path: Path,
root: str = "/",
prefix: str = "",
ldpaths: dict[str, list[str]] | None = None,
display: str | None = None,
exclude: frozenset[str] = frozenset(),
_all_libs: dict[str, DynamicLibrary] | None = None,
) -> DynamicExecutable:
"""Parse the ELF dependency tree of the specified file
Parameters
----------
path
The ELF to scan
root
The root tree to prepend to paths; this applies to interp and rpaths
only as ``path`` and ``ldpaths`` are expected to be prefixed already
prefix
The path under ``root`` to search
ldpaths
dict containing library paths to search; should have the keys:
conf, env, interp. If not supplied, the function ``load_ld_paths``
will be called.
display
The path to show rather than ``path``
exclude
List of soname (DT_NEEDED) to exclude from the tree
_all_libs
Recursive use only; dict of all libs we've seen
Returns
-------
a dict containing information about all the ELFs; e.g.
{
'interp': '/lib64/ld-linux.so.2',
'needed': ['libc.so.6', 'libcurl.so.4',],
'libs': {
'libc.so.6': {
'path': '/lib64/libc.so.6',
'needed': [],
},
'libcurl.so.4': {
'path': '/usr/lib64/libcurl.so.4',
'needed': ['libc.so.6', 'librt.so.1',],
},
},
}
"""
_first = _all_libs is None
if _all_libs is None:
_all_libs = {}
log.debug("ldd(%s)", path)
interpreter: str | None = None
libc: Libc | None = None
needed: list[str] = []
rpaths: list[str] = []
runpaths: list[str] = []
with open(path, "rb") as f:
elf = ELFFile(f)
# get the platform
platform = _get_platform(elf)
# If this is the first ELF, extract the interpreter.
if _first:
for segment in elf.iter_segments():
if segment.header.p_type != "PT_INTERP":
continue
interp = segment.get_interp_name()
log.debug(" interp = %s", interp)
interpreter = normpath(root + interp)
soname = os.path.basename(interpreter)
_all_libs[soname] = DynamicLibrary(
soname,
interpreter,
Path(readlink(interpreter, root, prefixed=True)),
platform,
)
# if we have an interpreter and it's not MUSL, assume GLIBC
libc = Libc.MUSL if soname.startswith("ld-musl-") else Libc.GLIBC
if ldpaths is None:
ldpaths = load_ld_paths(libc).copy()
# TODO: Should read it and scan for /lib paths.
ldpaths["interp"] = [
normpath(root + os.path.dirname(interp)),
normpath(
root + prefix + "/usr" + os.path.dirname(interp).lstrip(prefix),
),
]
log.debug(" ldpaths[interp] = %s", ldpaths["interp"])
break
# Parse the ELF's dynamic tags.
for segment in elf.iter_segments():
if segment.header.p_type != "PT_DYNAMIC":
continue
for t in segment.iter_tags():
if t.entry.d_tag == "DT_RPATH":
rpaths = parse_ld_paths(t.rpath, path=str(path), root=root)
elif t.entry.d_tag == "DT_RUNPATH":
runpaths = parse_ld_paths(t.runpath, path=str(path), root=root)
elif t.entry.d_tag == "DT_NEEDED":
needed.append(t.needed)
if runpaths:
# If both RPATH and RUNPATH are set, only the latter is used.
rpaths = []
# We assume there is only one PT_DYNAMIC. This is
# probably fine since the runtime ldso does the same.
break
del elf
if _first:
# get the libc based on dependencies
for soname in needed:
if soname.startswith(("libc.musl-", "ld-musl-")):
if libc is None:
libc = Libc.MUSL
if libc != Libc.MUSL:
msg = f"found a dependency on MUSL but the libc is already set to {libc}"
raise InvalidLibcError(msg)
elif soname == "libc.so.6" or soname.startswith(("ld-linux-", "ld64.so.")):
if libc is None:
libc = Libc.GLIBC
if libc != Libc.GLIBC:
msg = f"found a dependency on GLIBC but the libc is already set to {libc}"
raise InvalidLibcError(msg)
if libc is None:
# try the filename as a last resort
if path.name.endswith(("-arm-linux-musleabihf.so", "-linux-musl.so")):
libc = Libc.MUSL
elif path.name.endswith(("-arm-linux-gnueabihf.so", "-linux-gnu.so")):
# before python 3.11, musl was also using gnu
soabi = path.stem.split(".")[-1].split("-")
valid_python = tuple(f"3{minor}" for minor in range(11, 100))
if soabi[0] == "cpython" and soabi[1].startswith(valid_python):
libc = Libc.GLIBC
if ldpaths is None:
ldpaths = load_ld_paths(libc).copy()
# Propagate the rpaths used by the main ELF since those will be
# used at runtime to locate things.
ldpaths["rpath"] = rpaths
ldpaths["runpath"] = runpaths
log.debug(" ldpaths[rpath] = %s", rpaths)
log.debug(" ldpaths[runpath] = %s", runpaths)
assert ldpaths is not None # noqa: S101
all_ldpaths = (
ldpaths["rpath"]
+ rpaths
+ runpaths
+ ldpaths["env"]
+ ldpaths["runpath"]
+ ldpaths["conf"]
+ ldpaths["interp"]
)
_excluded_libs: set[str] = set()
for soname in needed:
if soname in _all_libs:
continue
if soname in _excluded_libs:
continue
if any(fnmatch(soname, e) for e in exclude):
log.info("Excluding %s", soname)
_excluded_libs.add(soname)
continue
# special case for libpython, see https://github.com/pypa/auditwheel/issues/589
# we want to return the dependency to be able to remove it later on but
# we don't want to analyze it for symbol versions nor do we want to analyze its
# dependencies as it will be removed.
if LIBPYTHON_RE.match(soname):
log.info("Skip %s resolution", soname)
_all_libs[soname] = DynamicLibrary(soname, None, None)
continue
realpath, fullpath = find_lib(platform, soname, all_ldpaths, root)
if realpath is not None and any(fnmatch(str(realpath), e) for e in exclude):
log.info("Excluding %s", realpath)
_excluded_libs.add(soname)
continue
_all_libs[soname] = DynamicLibrary(soname, fullpath, realpath)
if realpath is None or fullpath is None:
continue
dependency = ldd(realpath, root, prefix, ldpaths, fullpath, exclude, _all_libs)
_all_libs[soname] = DynamicLibrary(
soname,
fullpath,
realpath,
dependency.platform,
dependency.needed,
)
return DynamicExecutable(
interpreter,
libc,
str(path) if display is None else display,
path,
platform,
tuple(soname for soname in needed if soname not in _excluded_libs),
tuple(rpaths),
tuple(runpaths),
_all_libs,
)