From a31408f3cc29f76a82416e37552ec12aa2965945 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Fri, 7 Mar 2025 17:38:18 +0800 Subject: [PATCH 01/20] Init pool impl grouped by file path --- src/auditwheel/pool.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 src/auditwheel/pool.py diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py new file mode 100644 index 00000000..1f4faaa5 --- /dev/null +++ b/src/auditwheel/pool.py @@ -0,0 +1,34 @@ +from concurrent.futures import Future, ThreadPoolExecutor +from pathlib import Path +from typing import Any, Callable, Optional + + +class FileTaskExecutor: + def __init__(self, concurrent: bool = False): + self.executor = ThreadPoolExecutor() if concurrent else None + self.working_map: dict[Path, Future[tuple[str, str]]] = {} + + def submit( + self, path: Path, fn: Callable[[Any], Any], /, *args: Any, **kwargs: Any + ) -> Future[Any]: + future: Future[Any] + if self.executor is None: + future = Future() + future.set_result(fn(*args, **kwargs)) + return future + assert path not in self.working_map + future = self.executor.submit(fn, *args, **kwargs) + future.add_done_callback(lambda f: self.working_map.pop(path)) + self.working_map[path] = future + return future + + def wait(self, path: Optional[Path] = None) -> None: + if self.executor is None: + return + if path is not None: + if path in self.working_map: + self.working_map.pop(path).result() + return + + for path in self.working_map: + self.wait(path) From 019e074387e79b29b0608bb50f9f3e586d17ec02 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Fri, 7 Mar 2025 18:41:23 +0800 Subject: [PATCH 02/20] update --- src/auditwheel/pool.py | 95 +++++++++++++++++++++++++++++++++++------- 1 file changed, 80 insertions(+), 15 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 1f4faaa5..e80c4cc7 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -1,34 +1,99 @@ +import time from concurrent.futures import Future, ThreadPoolExecutor from pathlib import Path from typing import Any, Callable, Optional class FileTaskExecutor: - def __init__(self, concurrent: bool = False): - self.executor = ThreadPoolExecutor() if concurrent else None + """A task executor that manages concurrent file operations with deduplication. + + This executor ensures that only one task per file path runs at a time, even if + multiple tasks are submitted for the same file. It executes tasks with `concurrent` + threads when `concurrent` >= 1, specially when `concurrent` is 1, it will execute + tasks sequentially. When `concurrent` < 1, it will use the default setting of + ThreadPoolExecutor. + + Args: + concurrent (int): Number of concurrent threads to use. Defaults to 1. + Example: + >>> executor = FileTaskExecutor(concurrent=2) + >>> future = executor.submit(Path("file.txt"), process_file, "file.txt") + >>> executor.wait() # Wait for all tasks to complete + """ + + def __init__(self, concurrent: int = 1): + self.executor = ( + None + if concurrent == 1 + else ThreadPoolExecutor(concurrent if concurrent > 1 else None) + ) self.working_map: dict[Path, Future[tuple[str, str]]] = {} def submit( self, path: Path, fn: Callable[[Any], Any], /, *args: Any, **kwargs: Any - ) -> Future[Any]: + ) -> None: + if not path.is_absolute(): + path = path.absolute() + future: Future[Any] if self.executor is None: future = Future() future.set_result(fn(*args, **kwargs)) - return future - assert path not in self.working_map - future = self.executor.submit(fn, *args, **kwargs) - future.add_done_callback(lambda f: self.working_map.pop(path)) - self.working_map[path] = future - return future + return + + if path not in self.working_map: + future = self.executor.submit(fn, *args, **kwargs) + self.working_map[path] = future + else: + future = self.working_map[path] + future.add_done_callback(lambda _: self.working_map.pop(path, None)) + future.add_done_callback(lambda _: self.submit(path, fn, *args, **kwargs)) def wait(self, path: Optional[Path] = None) -> None: + """Wait for tasks to complete. + + If a path is specified, waits only for that specific file's task to complete. + Otherwise, waits for all tasks to complete. + + Args: + path (Optional[Path]): The specific file path to wait for. If None, + waits for all tasks to complete. + """ if self.executor is None: return - if path is not None: - if path in self.working_map: - self.working_map.pop(path).result() - return - - for path in self.working_map: + if path is not None and path in self.working_map: + self.working_map.pop(path, None).result() + # may have chained callback, so we need to wait again self.wait(path) + + while self.working_map: + # Process one task for each for-loop + # for map might be changed during the loop + for path in self.working_map: + self.wait(path) + break + + +def fake_job(i: int) -> int: + print(f"start {i}") + time.sleep(i) + print(f"end {i}") + + +if __name__ == "__main__": + executor = FileTaskExecutor(concurrent=0) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + for i in range(10): + executor.submit(Path(f"test{i}.txt"), fake_job, i) + executor.wait() From b5867390722e75296a9212b43e27e74568f4b025 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Fri, 7 Mar 2025 18:53:32 +0800 Subject: [PATCH 03/20] simplify --- src/auditwheel/pool.py | 42 +++++++++++------------------------------- 1 file changed, 11 insertions(+), 31 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index e80c4cc7..2cd1f3d9 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -39,15 +39,12 @@ def submit( if self.executor is None: future = Future() future.set_result(fn(*args, **kwargs)) - return + return None - if path not in self.working_map: - future = self.executor.submit(fn, *args, **kwargs) - self.working_map[path] = future - else: - future = self.working_map[path] - future.add_done_callback(lambda _: self.working_map.pop(path, None)) - future.add_done_callback(lambda _: self.submit(path, fn, *args, **kwargs)) + assert path not in self.working_map, "path already in working_map" + future = self.executor.submit(fn, *args, **kwargs) + self.working_map[path] = future + return future def wait(self, path: Optional[Path] = None) -> None: """Wait for tasks to complete. @@ -61,17 +58,12 @@ def wait(self, path: Optional[Path] = None) -> None: """ if self.executor is None: return - if path is not None and path in self.working_map: - self.working_map.pop(path, None).result() - # may have chained callback, so we need to wait again - self.wait(path) - - while self.working_map: - # Process one task for each for-loop - # for map might be changed during the loop - for path in self.working_map: - self.wait(path) - break + if path is None: + for future in self.working_map.values(): + future.result() + self.working_map.clear() + elif future := self.working_map.pop(path, None): + future.result() def fake_job(i: int) -> int: @@ -82,18 +74,6 @@ def fake_job(i: int) -> int: if __name__ == "__main__": executor = FileTaskExecutor(concurrent=0) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) for i in range(10): executor.submit(Path(f"test{i}.txt"), fake_job, i) executor.wait() From 70dddf0f5cbf4dad80cd11232d1b54bc4e63c0c9 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Fri, 7 Mar 2025 21:55:16 +0800 Subject: [PATCH 04/20] parallel get_wheel_elfdata --- src/auditwheel/pool.py | 29 +++++++---------------- src/auditwheel/wheel_abi.py | 46 ++++++++++++++++++++++++++----------- 2 files changed, 41 insertions(+), 34 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 2cd1f3d9..7ca9fd30 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -1,4 +1,3 @@ -import time from concurrent.futures import Future, ThreadPoolExecutor from pathlib import Path from typing import Any, Callable, Optional @@ -21,30 +20,28 @@ class FileTaskExecutor: >>> executor.wait() # Wait for all tasks to complete """ - def __init__(self, concurrent: int = 1): + def __init__(self, concurrent: int = 0): self.executor = ( None if concurrent == 1 else ThreadPoolExecutor(concurrent if concurrent > 1 else None) ) - self.working_map: dict[Path, Future[tuple[str, str]]] = {} + self.working_map: dict[Path, Future[Any]] = {} def submit( - self, path: Path, fn: Callable[[Any], Any], /, *args: Any, **kwargs: Any + self, path: Path, fn: Callable[..., Any], /, *args: Any, **kwargs: Any ) -> None: if not path.is_absolute(): path = path.absolute() future: Future[Any] if self.executor is None: - future = Future() - future.set_result(fn(*args, **kwargs)) - return None + fn(*args, **kwargs) + return assert path not in self.working_map, "path already in working_map" future = self.executor.submit(fn, *args, **kwargs) self.working_map[path] = future - return future def wait(self, path: Optional[Path] = None) -> None: """Wait for tasks to complete. @@ -62,18 +59,8 @@ def wait(self, path: Optional[Path] = None) -> None: for future in self.working_map.values(): future.result() self.working_map.clear() - elif future := self.working_map.pop(path, None): - future.result() - - -def fake_job(i: int) -> int: - print(f"start {i}") - time.sleep(i) - print(f"end {i}") + elif path in self.working_map: + self.working_map.pop(path).result() -if __name__ == "__main__": - executor = FileTaskExecutor(concurrent=0) - for i in range(10): - executor.submit(Path(f"test{i}.txt"), fake_job, i) - executor.wait() +POOL = FileTaskExecutor(2) diff --git a/src/auditwheel/wheel_abi.py b/src/auditwheel/wheel_abi.py index 57539233..5effc69f 100644 --- a/src/auditwheel/wheel_abi.py +++ b/src/auditwheel/wheel_abi.py @@ -10,6 +10,10 @@ from pathlib import Path from typing import Optional, TypeVar +from elftools.elf.elffile import ELFFile + +from auditwheel.pool import POOL + from . import json from .architecture import Architecture from .elfutils import ( @@ -94,19 +98,19 @@ def get_wheel_elfdata( shared_libraries_with_invalid_machine = [] platform_wheel = False - for fn, elf in elf_file_filter(ctx.iter_files()): - # Check for invalid binary wheel format: no shared library should - # be found in purelib - so_name = fn.name - # If this is in purelib, add it to the list of shared libraries in - # purelib - if any(p.name == "purelib" for p in fn.parents): - shared_libraries_in_purelib.append(so_name) + def inner(fn: Path) -> None: + nonlocal \ + platform_wheel, \ + shared_libraries_in_purelib, \ + uses_ucs2_symbols, \ + uses_PyFPE_jbuf + + with open(fn, "rb") as f: + elf = ELFFile(f) + + so_name = fn.name - # If at least one shared library exists in purelib, this is going - # to fail and there's no need to do further checks - if not shared_libraries_in_purelib: log.debug("processing: %s", fn) elftree = ldd(fn, exclude=exclude) @@ -115,11 +119,11 @@ def get_wheel_elfdata( if arch != wheel_policy.architecture.baseline: shared_libraries_with_invalid_machine.append(so_name) log.warning("ignoring: %s with %s architecture", so_name, arch) - continue + return except ValueError: shared_libraries_with_invalid_machine.append(so_name) log.warning("ignoring: %s with unknown architecture", so_name) - continue + return platform_wheel = True @@ -148,6 +152,20 @@ def get_wheel_elfdata( # its internal references later. nonpy_elftree[fn] = elftree + # Create new ELFFile object to avoid use-after-free + for fn, _elf in elf_file_filter(ctx.iter_files()): + # Check for invalid binary wheel format: no shared library should + # be found in purelib + so_name = fn.name + + # If this is in purelib, add it to the list of shared libraries in + # purelib + if any(p.name == "purelib" for p in fn.parents): + shared_libraries_in_purelib.append(so_name) + + if not shared_libraries_in_purelib: + POOL.submit(fn, inner, fn) + # If at least one shared library exists in purelib, raise an error if shared_libraries_in_purelib: libraries = "\n\t".join(shared_libraries_in_purelib) @@ -159,6 +177,8 @@ def get_wheel_elfdata( ) raise RuntimeError(msg) + POOL.wait() + if not platform_wheel: raise NonPlatformWheel( wheel_policy.architecture, shared_libraries_with_invalid_machine From 0cfd26811e04599eb96c665e49bd5c8416c1523b Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Fri, 7 Mar 2025 22:12:07 +0800 Subject: [PATCH 05/20] 1/2 repair_wheel --- src/auditwheel/pool.py | 3 +++ src/auditwheel/repair.py | 14 +++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 7ca9fd30..6ae08fda 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -62,5 +62,8 @@ def wait(self, path: Optional[Path] = None) -> None: elif path in self.working_map: self.working_map.pop(path).result() + def __contains__(self, fn: Path) -> bool: + return self.executor is not None and fn in self.working_map + POOL = FileTaskExecutor(2) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 964676cc..97fd9cb9 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -14,6 +14,7 @@ from subprocess import check_call from auditwheel.patcher import ElfPatcher +from auditwheel.pool import POOL from .elfutils import elf_read_dt_needed, elf_read_rpaths, is_subdir from .hashfile import hashfile @@ -82,9 +83,14 @@ def repair_wheel( if not dest_dir.exists(): dest_dir.mkdir() - new_soname, new_path = copylib(src_path, dest_dir, patcher) + new_soname, new_path = copylib(src_path, dest_dir, patcher, dry=True) + if new_path not in POOL: + POOL.submit(new_path, copylib, src_path, dest_dir, patcher) soname_map[soname] = (new_soname, new_path) replacements.append((soname, new_soname)) + + POOL.wait() + if replacements: patcher.replace_needed(fn, *replacements) @@ -127,7 +133,9 @@ def strip_symbols(libraries: Iterable[Path]) -> None: check_call(["strip", "-s", lib]) -def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> tuple[str, Path]: +def copylib( + src_path: Path, dest_dir: Path, patcher: ElfPatcher, dry: bool = False +) -> tuple[str, Path]: """Graft a shared library from the system into the wheel and update the relevant links. @@ -151,7 +159,7 @@ def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> tuple[str, P new_soname = src_name dest_path = dest_dir / new_soname - if dest_path.exists(): + if dry or dest_path.exists(): return new_soname, dest_path logger.debug("Grafting: %s -> %s", src_path, dest_path) From ee0184db5745ba2761b08a3daebbf4697d91364a Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sat, 8 Mar 2025 15:14:30 +0800 Subject: [PATCH 06/20] 2/3 repair_wheel --- src/auditwheel/pool.py | 70 +++++++++++++++++++++++++++++++++------- src/auditwheel/repair.py | 70 ++++++++++++++++++++++------------------ 2 files changed, 97 insertions(+), 43 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 6ae08fda..3bb53823 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -1,8 +1,14 @@ +import functools from concurrent.futures import Future, ThreadPoolExecutor from pathlib import Path +from time import sleep from typing import Any, Callable, Optional +def yield_thread() -> None: + sleep(0) + + class FileTaskExecutor: """A task executor that manages concurrent file operations with deduplication. @@ -28,20 +34,56 @@ def __init__(self, concurrent: int = 0): ) self.working_map: dict[Path, Future[Any]] = {} + def submit_chain( + self, path: Path, fn: Callable[..., Any], /, *args: Any, **kwargs: Any + ) -> Future[Any]: + return self._submit(path, fn, True, *args, **kwargs) + def submit( self, path: Path, fn: Callable[..., Any], /, *args: Any, **kwargs: Any - ) -> None: + ) -> Future[Any]: + return self._submit(path, fn, False, *args, **kwargs) + + def _submit( + self, + path: Path, + fn: Callable[..., Any], + chain: bool, + /, + *args: Any, + **kwargs: Any, + ) -> Future[Any]: if not path.is_absolute(): path = path.absolute() future: Future[Any] if self.executor is None: - fn(*args, **kwargs) - return + future = Future() + future.set_result(fn(*args, **kwargs)) + elif not chain: + assert path not in self.working_map, "path already in working_map" + future = self.executor.submit(fn, *args, **kwargs) + self.working_map[path] = future + else: + current = self.working_map[path] + future = Future() + + @functools.wraps(fn) + def new_fn(_current: Future[Any]) -> None: + nonlocal future, current + + assert _current == current + + self.working_map.pop(path) + self.working_map[path] = future + try: + future.set_result(fn(*args, **kwargs)) + except Exception as e: + future.set_exception(e) + + current.add_done_callback(new_fn) - assert path not in self.working_map, "path already in working_map" - future = self.executor.submit(fn, *args, **kwargs) - self.working_map[path] = future + return future def wait(self, path: Optional[Path] = None) -> None: """Wait for tasks to complete. @@ -55,12 +97,16 @@ def wait(self, path: Optional[Path] = None) -> None: """ if self.executor is None: return - if path is None: - for future in self.working_map.values(): - future.result() - self.working_map.clear() - elif path in self.working_map: - self.working_map.pop(path).result() + if path is not None: + while True: + yield_thread() + if path not in self.working_map: + return + self.working_map.pop(path).result() + else: + while self.working_map: + path = next(iter(self.working_map)) + self.wait(path) def __contains__(self, fn: Path) -> bool: return self.executor is not None and fn in self.working_map diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 97fd9cb9..d67405db 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -14,11 +14,11 @@ from subprocess import check_call from auditwheel.patcher import ElfPatcher -from auditwheel.pool import POOL from .elfutils import elf_read_dt_needed, elf_read_rpaths, is_subdir from .hashfile import hashfile from .policy import WheelPolicies, get_replace_platforms +from .pool import POOL from .wheel_abi import get_wheel_elfdata from .wheeltools import InWheelCtx, add_platforms @@ -83,25 +83,32 @@ def repair_wheel( if not dest_dir.exists(): dest_dir.mkdir() - new_soname, new_path = copylib(src_path, dest_dir, patcher, dry=True) - if new_path not in POOL: - POOL.submit(new_path, copylib, src_path, dest_dir, patcher) - soname_map[soname] = (new_soname, new_path) - replacements.append((soname, new_soname)) + new_soname, new_path = get_new_soname(src_path, dest_dir) + if soname not in soname_map: + soname_map[soname] = (new_soname, new_path) + replacements.append((soname, new_soname)) - POOL.wait() + POOL.submit(new_path, copylib, src_path, dest_dir, patcher) if replacements: - patcher.replace_needed(fn, *replacements) + POOL.submit(fn, patcher.replace_needed, fn, *replacements) if len(ext_libs) > 0: - new_fn = fn - if _path_is_script(fn): - new_fn = _replace_elf_script_with_shim(match.group("name"), fn) - new_rpath = os.path.relpath(dest_dir, new_fn.parent) - new_rpath = os.path.join("$ORIGIN", new_rpath) - append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) + def _patch_fn(fn: Path) -> None: + new_fn = fn + if _path_is_script(fn): + POOL.wait(fn) + new_fn = _replace_elf_script_with_shim(match.group("name"), fn) + + new_rpath = os.path.relpath(dest_dir, new_fn.parent) + new_rpath = os.path.join("$ORIGIN", new_rpath) + + append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) + + POOL.submit_chain(fn, _patch_fn) + + POOL.wait() # we grafted in a bunch of libraries and modified their sonames, but # they may have internal dependencies (DT_NEEDED) on one another, so @@ -133,9 +140,21 @@ def strip_symbols(libraries: Iterable[Path]) -> None: check_call(["strip", "-s", lib]) -def copylib( - src_path: Path, dest_dir: Path, patcher: ElfPatcher, dry: bool = False -) -> tuple[str, Path]: +def get_new_soname(src_path: Path, dest_dir: Path) -> tuple[str, Path]: + with open(src_path, "rb") as f: + shorthash = hashfile(f)[:8] + src_name = src_path.name + base, ext = src_name.split(".", 1) + if not base.endswith(f"-{shorthash}"): + new_soname = f"{base}-{shorthash}.{ext}" + else: + new_soname = src_name + + dest_path = dest_dir / new_soname + return new_soname, dest_path + + +def copylib(src_path: Path, dest_dir: Path, patcher: ElfPatcher) -> None: """Graft a shared library from the system into the wheel and update the relevant links. @@ -148,19 +167,10 @@ def copylib( # if the library has a RUNPATH/RPATH we clear it and set RPATH to point to # its new location. - with open(src_path, "rb") as f: - shorthash = hashfile(f)[:8] + new_soname, dest_path = get_new_soname(src_path, dest_dir) - src_name = src_path.name - base, ext = src_name.split(".", 1) - if not base.endswith(f"-{shorthash}"): - new_soname = f"{base}-{shorthash}.{ext}" - else: - new_soname = src_name - - dest_path = dest_dir / new_soname - if dry or dest_path.exists(): - return new_soname, dest_path + if dest_path.exists(): + return logger.debug("Grafting: %s -> %s", src_path, dest_path) rpaths = elf_read_rpaths(src_path) @@ -174,8 +184,6 @@ def copylib( if any(itertools.chain(rpaths["rpaths"], rpaths["runpaths"])): patcher.set_rpath(dest_path, "$ORIGIN") - return new_soname, dest_path - def append_rpath_within_wheel( lib_name: Path, rpath: str, wheel_base_dir: Path, patcher: ElfPatcher From 06c89e7e8b60a71bf24e123fc1203e1693866837 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sat, 8 Mar 2025 15:26:27 +0800 Subject: [PATCH 07/20] 3/3 repair_wheel --- src/auditwheel/repair.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index d67405db..c10187fb 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -121,7 +121,7 @@ def _patch_fn(fn: Path) -> None: if n in soname_map: replacements.append((n, soname_map[n][0])) if replacements: - patcher.replace_needed(path, *replacements) + POOL.submit(path, patcher.replace_needed, path, *replacements) if update_tags: ctx.out_wheel = add_platforms(ctx, abis, get_replace_platforms(abis[0])) @@ -131,13 +131,15 @@ def _patch_fn(fn: Path) -> None: extensions = external_refs_by_fn.keys() strip_symbols(itertools.chain(libs_to_strip, extensions)) + POOL.wait() return ctx.out_wheel def strip_symbols(libraries: Iterable[Path]) -> None: for lib in libraries: logger.info("Stripping symbols from %s", lib) - check_call(["strip", "-s", lib]) + POOL.submit_chain(lib, check_call, ["strip", "-s", lib]) + POOL.wait() def get_new_soname(src_path: Path, dest_dir: Path) -> tuple[str, Path]: From 85a5e0b14012172c981c75c89256f98a1c6094ef Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sat, 8 Mar 2025 15:48:33 +0800 Subject: [PATCH 08/20] make mypy happy --- src/auditwheel/repair.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index c10187fb..92449c72 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -1,5 +1,6 @@ from __future__ import annotations +import functools import itertools import logging import os @@ -96,6 +97,7 @@ def repair_wheel( if len(ext_libs) > 0: def _patch_fn(fn: Path) -> None: + assert match is not None new_fn = fn if _path_is_script(fn): POOL.wait(fn) @@ -142,6 +144,7 @@ def strip_symbols(libraries: Iterable[Path]) -> None: POOL.wait() +@functools.lru_cache(maxsize=1) def get_new_soname(src_path: Path, dest_dir: Path) -> tuple[str, Path]: with open(src_path, "rb") as f: shorthash = hashfile(f)[:8] From d5894d0d95dc268bb0b71c35648949f3d3d05e99 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sat, 8 Mar 2025 16:07:43 +0800 Subject: [PATCH 09/20] fix --- src/auditwheel/repair.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 92449c72..9ef48d7a 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -108,7 +108,7 @@ def _patch_fn(fn: Path) -> None: append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) - POOL.submit_chain(fn, _patch_fn) + POOL.submit_chain(fn, _patch_fn, fn) POOL.wait() From 609bb4df30084b02791c8c979f100fa2f511414a Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sun, 9 Mar 2025 10:16:59 +0800 Subject: [PATCH 10/20] always set future to latest one --- src/auditwheel/pool.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 3bb53823..e2f5ebe4 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -65,7 +65,7 @@ def _submit( future = self.executor.submit(fn, *args, **kwargs) self.working_map[path] = future else: - current = self.working_map[path] + current = self.working_map.pop(path) future = Future() @functools.wraps(fn) @@ -74,14 +74,13 @@ def new_fn(_current: Future[Any]) -> None: assert _current == current - self.working_map.pop(path) - self.working_map[path] = future try: future.set_result(fn(*args, **kwargs)) except Exception as e: future.set_exception(e) current.add_done_callback(new_fn) + self.working_map[path] = future return future @@ -98,11 +97,7 @@ def wait(self, path: Optional[Path] = None) -> None: if self.executor is None: return if path is not None: - while True: - yield_thread() - if path not in self.working_map: - return - self.working_map.pop(path).result() + self.working_map.pop(path).result() else: while self.working_map: path = next(iter(self.working_map)) From 3e556efee7224a6966d07e4b44df6f6440897aac Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sun, 9 Mar 2025 10:18:36 +0800 Subject: [PATCH 11/20] tmp --- src/auditwheel/pool.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index e2f5ebe4..35a54a87 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -66,6 +66,8 @@ def _submit( self.working_map[path] = future else: current = self.working_map.pop(path) + if not current.done(): + pass future = Future() @functools.wraps(fn) From 4232c766ad3844c4f211d5af9baf6c7369464e29 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sun, 9 Mar 2025 15:29:18 +0800 Subject: [PATCH 12/20] make map store the last future --- src/auditwheel/pool.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 3bb53823..064a8c3e 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -66,7 +66,7 @@ def _submit( self.working_map[path] = future else: current = self.working_map[path] - future = Future() + future = self.working_map[path] = Future() @functools.wraps(fn) def new_fn(_current: Future[Any]) -> None: @@ -74,8 +74,6 @@ def new_fn(_current: Future[Any]) -> None: assert _current == current - self.working_map.pop(path) - self.working_map[path] = future try: future.set_result(fn(*args, **kwargs)) except Exception as e: @@ -98,14 +96,9 @@ def wait(self, path: Optional[Path] = None) -> None: if self.executor is None: return if path is not None: - while True: - yield_thread() - if path not in self.working_map: - return - self.working_map.pop(path).result() + self.working_map.pop(path).result() else: - while self.working_map: - path = next(iter(self.working_map)) + for path in list(self.working_map): self.wait(path) def __contains__(self, fn: Path) -> bool: From f8071d2e1de89f410dc7dabe49fd6a48272fa459 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sun, 9 Mar 2025 18:50:09 +0800 Subject: [PATCH 13/20] fix --- src/auditwheel/pool.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index 064a8c3e..f4cb796b 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -60,11 +60,7 @@ def _submit( if self.executor is None: future = Future() future.set_result(fn(*args, **kwargs)) - elif not chain: - assert path not in self.working_map, "path already in working_map" - future = self.executor.submit(fn, *args, **kwargs) - self.working_map[path] = future - else: + elif chain and path in self.working_map: current = self.working_map[path] future = self.working_map[path] = Future() @@ -80,6 +76,13 @@ def new_fn(_current: Future[Any]) -> None: future.set_exception(e) current.add_done_callback(new_fn) + else: + if not chain: + assert path not in self.working_map, ( + "task assiciated with path is already running" + ) + future = self.executor.submit(fn, *args, **kwargs) + self.working_map[path] = future return future From 804961fa7ab97eb56323fe9360752f3060b1ef9a Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Sun, 9 Mar 2025 18:59:40 +0800 Subject: [PATCH 14/20] fix --- src/auditwheel/pool.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index f4cb796b..b9785631 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -1,3 +1,4 @@ +import contextlib import functools from concurrent.futures import Future, ThreadPoolExecutor from pathlib import Path @@ -96,10 +97,11 @@ def wait(self, path: Optional[Path] = None) -> None: path (Optional[Path]): The specific file path to wait for. If None, waits for all tasks to complete. """ - if self.executor is None: + if self.executor is None or (path is not None and path not in self.working_map): return if path is not None: - self.working_map.pop(path).result() + with contextlib.suppress(Exception): + self.working_map.pop(path).result() else: for path in list(self.working_map): self.wait(path) From 6416a2790458a8040bf3b5850f5b3ec18f0b7b21 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Tue, 11 Mar 2025 11:22:32 +0800 Subject: [PATCH 15/20] fix test failure --- src/auditwheel/repair.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 9ef48d7a..2cb566e4 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -85,11 +85,10 @@ def repair_wheel( if not dest_dir.exists(): dest_dir.mkdir() new_soname, new_path = get_new_soname(src_path, dest_dir) + replacements.append((soname, new_soname)) if soname not in soname_map: - soname_map[soname] = (new_soname, new_path) - replacements.append((soname, new_soname)) - POOL.submit(new_path, copylib, src_path, dest_dir, patcher) + soname_map[soname] = (new_soname, new_path) if replacements: POOL.submit(fn, patcher.replace_needed, fn, *replacements) From f56f6c2b473ef60810edbb7e365dbab993e8457c Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Tue, 11 Mar 2025 12:14:10 +0800 Subject: [PATCH 16/20] update doc of pool.py --- src/auditwheel/pool.py | 33 +++++++++++++++++++++++---------- src/auditwheel/repair.py | 1 - 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index b9785631..ce99f3c8 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -1,22 +1,28 @@ +""" +Concurrent support of auditwheel. +This can sppedup `auditwheel show` and `auditwheel repair` +where they have external shell invocation/io operations +that do no depends on each other and can be parallelized. + +If `j=1`, there'll be no concurrency at all and each call is synchronous, +which is same as not using this pool. +""" + import contextlib import functools from concurrent.futures import Future, ThreadPoolExecutor from pathlib import Path -from time import sleep from typing import Any, Callable, Optional -def yield_thread() -> None: - sleep(0) - - class FileTaskExecutor: - """A task executor that manages concurrent file operations with deduplication. + """A task executor that manages parallel jobs assiciated with a file. - This executor ensures that only one task per file path runs at a time, even if - multiple tasks are submitted for the same file. It executes tasks with `concurrent` - threads when `concurrent` >= 1, specially when `concurrent` is 1, it will execute - tasks sequentially. When `concurrent` < 1, it will use the default setting of + This executor ensures that only one task per file path runs at a time. + Multiple tasks submitted for the same file will be executed in order. + It executes tasks with `concurrent` threads when `concurrent` >= 1. + Specially when `concurrent` is 1, it will execute tasks sequentially. + When `concurrent` < 1, it will use the default setting of ThreadPoolExecutor. Args: @@ -38,11 +44,18 @@ def __init__(self, concurrent: int = 0): def submit_chain( self, path: Path, fn: Callable[..., Any], /, *args: Any, **kwargs: Any ) -> Future[Any]: + """ + Submit a task to be executed (after any existing task) for the file. + """ return self._submit(path, fn, True, *args, **kwargs) def submit( self, path: Path, fn: Callable[..., Any], /, *args: Any, **kwargs: Any ) -> Future[Any]: + """ + Submit a task to be executed when no task running for the file, + otherwise raise an error. + """ return self._submit(path, fn, False, *args, **kwargs) def _submit( diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index 2cb566e4..c1a502bc 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -99,7 +99,6 @@ def _patch_fn(fn: Path) -> None: assert match is not None new_fn = fn if _path_is_script(fn): - POOL.wait(fn) new_fn = _replace_elf_script_with_shim(match.group("name"), fn) new_rpath = os.path.relpath(dest_dir, new_fn.parent) From bc932d85908e47dc51ff1dfc33bc3255104abe2f Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Tue, 11 Mar 2025 13:00:17 +0800 Subject: [PATCH 17/20] add cli arg --- src/auditwheel/main.py | 11 +++++++++ src/auditwheel/main_repair.py | 4 ++- src/auditwheel/pool.py | 5 +--- src/auditwheel/repair.py | 29 +++++++++++----------- src/auditwheel/tools.py | 46 +++++++++++++++++------------------ src/auditwheel/wheel_abi.py | 12 +++++---- tests/unit/test_tools.py | 2 +- 7 files changed, 59 insertions(+), 50 deletions(-) diff --git a/src/auditwheel/main.py b/src/auditwheel/main.py index 758b8970..b703551b 100644 --- a/src/auditwheel/main.py +++ b/src/auditwheel/main.py @@ -10,6 +10,7 @@ import auditwheel from . import main_lddtree, main_repair, main_show +from .tools import EnvironmentDefault def main() -> int | None: @@ -33,6 +34,16 @@ def main() -> int | None: default=0, help="Give more output. Option is additive", ) + p.add_argument( + "-j", + "--max-jobs", + dest="max_jobs", + action=EnvironmentDefault, + env="AUDITWHEEL_MAX_JOBS", + type=int, + default=1, + help="Maximum number of jobs to run in parallel", + ) sub_parsers = p.add_subparsers(metavar="command", dest="cmd") main_show.configure_parser(sub_parsers) diff --git a/src/auditwheel/main_repair.py b/src/auditwheel/main_repair.py index 47c09157..3745e81d 100644 --- a/src/auditwheel/main_repair.py +++ b/src/auditwheel/main_repair.py @@ -8,6 +8,7 @@ from auditwheel.patcher import Patchelf from .policy import WheelPolicies +from .pool import FileTaskExecutor from .tools import EnvironmentDefault logger = logging.getLogger(__name__) @@ -50,7 +51,7 @@ def configure_parser(sub_parsers) -> None: # type: ignore[no-untyped-def] dest="ZIP_COMPRESSION_LEVEL", type=int, help="Compress level to be used to create zip file.", - choices=list(range(zlib.Z_NO_COMPRESSION, zlib.Z_BEST_COMPRESSION + 1)), + choices=list(range(zlib.Z_DEFAULT_COMPRESSION, zlib.Z_BEST_COMPRESSION + 1)), default=zlib.Z_DEFAULT_COMPRESSION, ) parser.add_argument( @@ -211,6 +212,7 @@ def execute(args: argparse.Namespace, parser: argparse.ArgumentParser) -> int: exclude=exclude, strip=args.STRIP, zip_compression_level=args.ZIP_COMPRESSION_LEVEL, + pool=FileTaskExecutor(args.max_jobs), ) if out_wheel is not None: diff --git a/src/auditwheel/pool.py b/src/auditwheel/pool.py index ce99f3c8..98fc2739 100644 --- a/src/auditwheel/pool.py +++ b/src/auditwheel/pool.py @@ -119,8 +119,5 @@ def wait(self, path: Optional[Path] = None) -> None: for path in list(self.working_map): self.wait(path) - def __contains__(self, fn: Path) -> bool: - return self.executor is not None and fn in self.working_map - -POOL = FileTaskExecutor(2) +DEFAULT_POOL = FileTaskExecutor(1) diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index f1f0adfa..c77925ef 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -8,7 +8,6 @@ import re import shutil import stat -from collections.abc import Iterable from fnmatch import fnmatch from os.path import isabs from pathlib import Path @@ -19,7 +18,7 @@ from .elfutils import elf_read_dt_needed, elf_read_rpaths, is_subdir from .hashfile import hashfile from .policy import WheelPolicies, get_replace_platforms -from .pool import POOL +from .pool import DEFAULT_POOL, FileTaskExecutor from .wheel_abi import get_wheel_elfdata from .wheeltools import InWheelCtx, add_platforms @@ -45,8 +44,9 @@ def repair_wheel( exclude: frozenset[str], strip: bool, zip_compression_level: int, + pool: FileTaskExecutor = DEFAULT_POOL, ) -> Path | None: - elf_data = get_wheel_elfdata(wheel_policy, wheel_path, exclude) + elf_data = get_wheel_elfdata(wheel_policy, wheel_path, exclude, pool) external_refs_by_fn = elf_data.full_external_refs # Do not repair a pure wheel, i.e. has no external refs @@ -89,11 +89,11 @@ def repair_wheel( new_soname, new_path = get_new_soname(src_path, dest_dir) replacements.append((soname, new_soname)) if soname not in soname_map: - POOL.submit(new_path, copylib, src_path, dest_dir, patcher) + pool.submit(new_path, copylib, src_path, dest_dir, patcher) soname_map[soname] = (new_soname, new_path) if replacements: - POOL.submit(fn, patcher.replace_needed, fn, *replacements) + pool.submit(fn, patcher.replace_needed, fn, *replacements) if len(ext_libs) > 0: @@ -108,9 +108,9 @@ def _patch_fn(fn: Path) -> None: append_rpath_within_wheel(new_fn, new_rpath, ctx.name, patcher) - POOL.submit_chain(fn, _patch_fn, fn) + pool.submit_chain(fn, _patch_fn, fn) - POOL.wait() + pool.wait() # we grafted in a bunch of libraries and modified their sonames, but # they may have internal dependencies (DT_NEEDED) on one another, so @@ -123,7 +123,7 @@ def _patch_fn(fn: Path) -> None: if n in soname_map: replacements.append((n, soname_map[n][0])) if replacements: - POOL.submit(path, patcher.replace_needed, path, *replacements) + pool.submit(path, patcher.replace_needed, path, *replacements) if update_tags: ctx.out_wheel = add_platforms(ctx, abis, get_replace_platforms(abis[0])) @@ -131,17 +131,16 @@ def _patch_fn(fn: Path) -> None: if strip: libs_to_strip = [path for (_, path) in soname_map.values()] extensions = external_refs_by_fn.keys() - strip_symbols(itertools.chain(libs_to_strip, extensions)) + for lib in itertools.chain(libs_to_strip, extensions): + pool.submit(lib, strip_symbols, lib) - POOL.wait() + pool.wait() return ctx.out_wheel -def strip_symbols(libraries: Iterable[Path]) -> None: - for lib in libraries: - logger.info("Stripping symbols from %s", lib) - POOL.submit_chain(lib, check_call, ["strip", "-s", lib]) - POOL.wait() +def strip_symbols(lib: Path) -> None: + logger.info("Stripping symbols from %s", lib) + check_call(["strip", "-s", lib]) @functools.lru_cache(maxsize=1) diff --git a/src/auditwheel/tools.py b/src/auditwheel/tools.py index 70d30ab0..d5a764e9 100644 --- a/src/auditwheel/tools.py +++ b/src/auditwheel/tools.py @@ -182,37 +182,35 @@ def __init__( ) -> None: self.env_default = os.environ.get(env) self.env = env - if self.env_default: - if type: - try: - self.env_default = type(self.env_default) - except Exception: - self.option_strings = kwargs["option_strings"] - args = { - "value": self.env_default, - "type": type, - "env": self.env, - } - msg = ( - "invalid type: %(value)r from environment variable " - "%(env)r cannot be converted to %(type)r" - ) - raise argparse.ArgumentError(self, msg % args) from None + error_msg_value_meta = "%(value)r" + if self.env_default is not None: default = self.env_default - if ( - self.env_default is not None - and choices is not None - and self.env_default not in choices - ): + error_msg_value_meta = "%(value)r from environment variable %(env)r" + if type: + try: + default = type(default) + except Exception: + self.option_strings = kwargs["option_strings"] + args = { + "value": self.env_default, + "type": type, + "env": self.env, + } + msg = ( + "invalid type: " + + error_msg_value_meta + + " cannot be converted to %(type)r" + ) + raise argparse.ArgumentError(self, msg % args) from None + if default is not None and choices is not None and default not in choices: self.option_strings = kwargs["option_strings"] args = { - "value": self.env_default, + "value": default, "choices": ", ".join(map(repr, choices)), "env": self.env, } msg = ( - "invalid choice: %(value)r from environment variable " - "%(env)r (choose from %(choices)s)" + "invalid choice: " + error_msg_value_meta + " (choose from %(choices)s)" ) raise argparse.ArgumentError(self, msg % args) diff --git a/src/auditwheel/wheel_abi.py b/src/auditwheel/wheel_abi.py index 5effc69f..0cb720e1 100644 --- a/src/auditwheel/wheel_abi.py +++ b/src/auditwheel/wheel_abi.py @@ -12,8 +12,6 @@ from elftools.elf.elffile import ELFFile -from auditwheel.pool import POOL - from . import json from .architecture import Architecture from .elfutils import ( @@ -26,6 +24,7 @@ from .genericpkgctx import InGenericPkgCtx from .lddtree import DynamicExecutable, ldd from .policy import ExternalReference, Policy, WheelPolicies +from .pool import DEFAULT_POOL, FileTaskExecutor log = logging.getLogger(__name__) @@ -84,7 +83,10 @@ def message(self) -> str: @functools.lru_cache def get_wheel_elfdata( - wheel_policy: WheelPolicies, wheel_fn: Path, exclude: frozenset[str] + wheel_policy: WheelPolicies, + wheel_fn: Path, + exclude: frozenset[str], + pool: FileTaskExecutor = DEFAULT_POOL, ) -> WheelElfData: full_elftree = {} nonpy_elftree = {} @@ -164,7 +166,7 @@ def inner(fn: Path) -> None: shared_libraries_in_purelib.append(so_name) if not shared_libraries_in_purelib: - POOL.submit(fn, inner, fn) + pool.submit(fn, inner, fn) # If at least one shared library exists in purelib, raise an error if shared_libraries_in_purelib: @@ -177,7 +179,7 @@ def inner(fn: Path) -> None: ) raise RuntimeError(msg) - POOL.wait() + pool.wait() if not platform_wheel: raise NonPlatformWheel( diff --git a/tests/unit/test_tools.py b/tests/unit/test_tools.py index d5c022b9..953972c1 100644 --- a/tests/unit/test_tools.py +++ b/tests/unit/test_tools.py @@ -46,7 +46,7 @@ def test_plat_environment_action( _all_zip_level: list[int] = list( - range(zlib.Z_NO_COMPRESSION, zlib.Z_BEST_COMPRESSION + 1) + range(zlib.Z_DEFAULT_COMPRESSION, zlib.Z_BEST_COMPRESSION + 1) ) From ddb5e11f22ca4c744468a5bad0ecb61576a70298 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Tue, 11 Mar 2025 13:03:24 +0800 Subject: [PATCH 18/20] fix test --- tests/unit/test_main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/test_main.py b/tests/unit/test_main.py index 2c7792b0..4bba8f27 100644 --- a/tests/unit/test_main.py +++ b/tests/unit/test_main.py @@ -33,4 +33,4 @@ def test_help(monkeypatch, capsys): # THEN assert retval is None captured = capsys.readouterr() - assert "usage: auditwheel [-h] [-V] [-v] command ..." in captured.out + assert "usage: auditwheel [-h] [-V] [-v] [-j MAX_JOBS] command ..." in captured.out From 1d99454103725eb9d85877505e48ec7e28a8d830 Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Tue, 11 Mar 2025 13:32:27 +0800 Subject: [PATCH 19/20] fix test --- tests/integration/test_bundled_wheels.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/integration/test_bundled_wheels.py b/tests/integration/test_bundled_wheels.py index 4605ec54..0650e6ce 100644 --- a/tests/integration/test_bundled_wheels.py +++ b/tests/integration/test_bundled_wheels.py @@ -135,6 +135,7 @@ def test_wheel_source_date_epoch(tmp_path, monkeypatch): EXCLUDE=[], DISABLE_ISA_EXT_CHECK=False, ZIP_COMPRESSION_LEVEL=6, + max_jobs=1, cmd="repair", func=Mock(), prog="auditwheel", From 1da1db7a314ad7eb71b67bdf5e08843866f65b6e Mon Sep 17 00:00:00 2001 From: Yichen Yan Date: Thu, 13 Mar 2025 11:29:32 +0800 Subject: [PATCH 20/20] update some doc --- src/auditwheel/patcher.py | 4 ++++ src/auditwheel/repair.py | 2 ++ src/auditwheel/wheel_abi.py | 8 ++++++-- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/auditwheel/patcher.py b/src/auditwheel/patcher.py index 60c3157f..84d5b590 100644 --- a/src/auditwheel/patcher.py +++ b/src/auditwheel/patcher.py @@ -47,6 +47,10 @@ def __init__(self) -> None: _verify_patchelf() def replace_needed(self, file_name: Path, *old_new_pairs: tuple[str, str]) -> None: + """ + Patching one elf do not need its dependencies to be ready, + so this function can be parallelized. + """ check_call( [ "patchelf", diff --git a/src/auditwheel/repair.py b/src/auditwheel/repair.py index c77925ef..65aadbdd 100644 --- a/src/auditwheel/repair.py +++ b/src/auditwheel/repair.py @@ -93,6 +93,8 @@ def repair_wheel( soname_map[soname] = (new_soname, new_path) if replacements: + # patching one elf do not need its dependencies to be ready + # so we can submit this task without waiting for dependencies pool.submit(fn, patcher.replace_needed, fn, *replacements) if len(ext_libs) > 0: diff --git a/src/auditwheel/wheel_abi.py b/src/auditwheel/wheel_abi.py index 0cb720e1..5e5f2305 100644 --- a/src/auditwheel/wheel_abi.py +++ b/src/auditwheel/wheel_abi.py @@ -101,7 +101,11 @@ def get_wheel_elfdata( platform_wheel = False - def inner(fn: Path) -> None: + def _get_fn_data_inner(fn: Path) -> None: + """ + This function reads one elf file per call, + so can be safely parallelized. + """ nonlocal \ platform_wheel, \ shared_libraries_in_purelib, \ @@ -166,7 +170,7 @@ def inner(fn: Path) -> None: shared_libraries_in_purelib.append(so_name) if not shared_libraries_in_purelib: - pool.submit(fn, inner, fn) + pool.submit(fn, _get_fn_data_inner, fn) # If at least one shared library exists in purelib, raise an error if shared_libraries_in_purelib: