-
-
Notifications
You must be signed in to change notification settings - Fork 418
Expand file tree
/
Copy pathrepair.rs
More file actions
353 lines (321 loc) · 13.6 KB
/
Copy pathrepair.rs
File metadata and controls
353 lines (321 loc) · 13.6 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
//! Shared wheel repair infrastructure.
//!
//! This module contains the [`WheelRepairer`] trait and the shared utilities
//! for preparing external libraries for grafting into wheels.
//!
//! Platform-specific implementations live in:
//! - [`super::linux::ElfRepairer`]
//! - [`super::macos::MacOSRepairer`]
use crate::compile::BuildArtifact;
use crate::util::hash_file;
use anyhow::{Context, Result};
use std::borrow::Borrow;
use std::collections::{HashMap, HashSet};
use std::path::{Path, PathBuf};
use fs_err as fs;
/// A build artifact bundled with the external shared libraries it depends on.
///
/// Keeps the artifact and its per-artifact dependency list together so they
/// cannot accidentally get out of sync when passed through the wheel-writing
/// pipeline.
pub struct AuditedArtifact {
/// The build artifact.
pub artifact: BuildArtifact,
/// External shared libraries this artifact depends on that must be
/// bundled into the wheel.
pub external_libs: Vec<lddtree::Library>,
/// **Universal2 only**: CPU architectures that require each library.
///
/// Maps library realpath to the set of architectures (e.g., "arm64", "x86_64")
/// that depend on it. Used during wheel repair to verify grafted dylibs
/// contain all necessary architecture slices.
///
/// Empty for single-arch builds and Linux builds.
///
/// Note: Universal2 support may be removed when Apple drops x86_64 support
pub arch_requirements: HashMap<PathBuf, HashSet<String>>,
}
impl Borrow<BuildArtifact> for AuditedArtifact {
fn borrow(&self) -> &BuildArtifact {
&self.artifact
}
}
/// A library prepared for grafting into a wheel.
///
/// Created by [`prepare_grafted_libs`] with a hash-suffixed filename and a
/// writable temporary copy ready for platform-specific patching.
pub struct GraftedLib {
/// Original library name as it appears in dependency records.
/// For ELF this is a leaf name like `libfoo.so.1`.
/// For Mach-O this can be a full install name like `/usr/local/lib/libfoo.dylib`
/// or `@rpath/libfoo.dylib`.
pub original_name: String,
/// Additional install names that resolve to the same file on disk.
/// These need the same rewriting as `original_name` → `new_name`.
pub aliases: Vec<String>,
/// New filename with hash suffix (e.g., `libfoo-ab12cd34.so.1`)
pub new_name: String,
/// Path to the writable temporary copy (ready for patching).
pub dest_path: PathBuf,
/// Libraries this one depends on (from lddtree's `needed` field).
#[cfg_attr(not(feature = "auditwheel"), allow(dead_code))]
pub needed: Vec<String>,
/// Runtime library search paths from the original library.
#[cfg_attr(not(feature = "auditwheel"), allow(dead_code))]
pub rpath: Vec<String>,
/// **Universal2 only**: CPU architectures that require this library.
///
/// For universal2 macOS wheels, each architecture (arm64, x86_64) may have
/// different dependencies. This field tracks which architectures actually
/// need this library, so we can verify the grafted dylib contains (at least)
/// those architectures.
///
/// Empty for single-arch builds and Linux builds.
///
/// Note: Universal2 support may be removed when Apple drops x86_64 support
/// (expected ~2025-2026).
#[cfg_attr(not(feature = "auditwheel"), allow(dead_code))]
pub required_archs: HashSet<String>,
}
/// Result of auditing a build artifact for external dependencies.
///
/// Contains the platform policy, discovered external libraries, and
/// (for universal2 macOS builds) architecture requirements.
pub struct AuditResult {
/// The determined platform policy (e.g., manylinux tag).
pub policy: super::Policy,
/// External shared libraries that need to be bundled.
pub external_libs: Vec<lddtree::Library>,
/// **Universal2 only**: CPU architectures that require each library.
///
/// Maps library realpath to the set of architectures (e.g., "arm64", "x86_64")
/// that depend on it. Empty for single-arch builds and Linux builds.
///
/// Note: Universal2 support may be removed when Apple drops x86_64 support
pub arch_requirements: HashMap<PathBuf, HashSet<String>>,
}
impl AuditResult {
/// Create a new AuditResult with no arch requirements (for single-arch/Linux).
pub fn new(policy: super::Policy, external_libs: Vec<lddtree::Library>) -> Self {
Self {
policy,
external_libs,
arch_requirements: HashMap::new(),
}
}
}
/// Platform-specific wheel repair operations.
///
/// Each platform (Linux/ELF, macOS/Mach-O) implements this trait to provide
/// its own dependency discovery and binary patching logic.
pub trait WheelRepairer {
/// Audit an artifact for platform compliance and find external libraries
/// that need to be bundled.
///
/// Returns an [`AuditResult`] containing the platform policy, external
/// library dependencies, and (for universal2) architecture requirements.
fn audit(&self, artifact: &BuildArtifact, ld_paths: Vec<PathBuf>) -> Result<AuditResult>;
/// Patch binary references after libraries have been grafted.
///
/// This is called after [`prepare_grafted_libs`] has copied and
/// hash-renamed all external libraries. Implementations should:
///
/// 1. Rewrite references in each artifact to point to the new names
/// 2. Set appropriate metadata on grafted libraries (soname, install ID, etc.)
/// 3. Update cross-references between grafted libraries
/// 4. Perform any final steps (e.g., code signing on macOS)
fn patch(
&self,
audited: &[AuditedArtifact],
grafted: &[GraftedLib],
libs_dir: &Path,
artifact_dir: &Path,
) -> Result<()>;
/// Patch artifacts for editable installs (e.g., set RPATH to Cargo target dir).
///
/// Implementations must clear `cargo_output_path` on every artifact whose
/// bytes they rewrite in place, so that `finalize_staged_artifacts` does
/// not move the patched bytes back to the cargo output path (see #2969).
///
/// The default implementation is a no-op. Platform-specific repairers can
/// override this to add runtime library search paths for editable mode.
fn patch_editable(&self, _audited: &mut [AuditedArtifact]) -> Result<()> {
Ok(())
}
/// Return a Python code snippet to prepend to `__init__.py` for runtime
/// shared library discovery.
///
/// `libs_dir_name` is the leaf directory name for bundled libraries (e.g.
/// `"mypackage.libs"`). `depth` is the number of parent directories from
/// the package's `__init__.py` to the site-packages root where `.libs/`
/// lives.
///
/// Returns `None` on platforms that don't need runtime patching:
/// - Linux/ELF uses RPATH (`$ORIGIN`)
/// - macOS/Mach-O uses `@loader_path`
/// - Windows/PE needs `os.add_dll_directory()` injected into `__init__.py`
fn init_py_patch(&self, _libs_dir_name: &str, _depth: usize) -> Option<String> {
None
}
/// Return the wheel-internal directory name for grafted libraries.
///
/// macOS uses `.dylibs` (matching delocate convention),
/// Linux and Windows use `.libs` (matching auditwheel/delvewheel convention).
fn libs_dir(&self, dist_name: &str) -> PathBuf {
PathBuf::from(format!("{dist_name}.libs"))
}
}
/// Prepare external libraries for grafting into a wheel.
///
/// For each library:
/// 1. Resolves the real path on disk (fails if not found)
/// 2. Generates a hash-suffixed filename to avoid DLL hell
/// 3. Copies to `temp_dir` and makes the copy writable
///
/// Returns the prepared libraries and the set of original paths that were copied.
///
/// Deduplication is by `realpath` (the actual file on disk). When the same
/// file is referenced via multiple install names (common on macOS), only one
/// copy is made, but all original names are recorded as aliases.
///
/// The optional `arch_requirements` parameter is used for universal2 macOS builds
/// to track which CPU architectures require each library (by realpath). This
/// enables verification that grafted dylibs contain all necessary architecture
/// slices. For single-arch builds or Linux, pass `None`.
pub fn prepare_grafted_libs(
audited: &[AuditedArtifact],
temp_dir: &Path,
arch_requirements: Option<&HashMap<PathBuf, HashSet<String>>>,
) -> Result<(Vec<GraftedLib>, HashSet<PathBuf>)> {
let mut grafted = Vec::new();
let mut libs_copied = HashSet::new();
let mut realpath_to_idx: HashMap<PathBuf, usize> = HashMap::new();
for lib in audited.iter().flat_map(|a| &a.external_libs) {
let source_path = lib.realpath.clone().with_context(|| {
format!(
"Cannot repair wheel, because required library {} could not be located.",
lib.path.display()
)
})?;
// Check if we've already copied this exact file (by realpath).
if let Some(&idx) = realpath_to_idx.get(&source_path) {
let existing: &mut GraftedLib = &mut grafted[idx];
if lib.name != existing.original_name && !existing.aliases.contains(&lib.name) {
existing.aliases.push(lib.name.clone());
}
continue;
}
let new_name = hashed_lib_name(&lib.name, &source_path)?;
let dest_path = temp_dir.join(&new_name);
fs::copy(&source_path, &dest_path)?;
// Make the copy writable so platform-specific tools can modify it
let mut perms = fs::metadata(&dest_path)?.permissions();
#[allow(clippy::permissions_set_readonly_false)]
perms.set_readonly(false);
fs::set_permissions(&dest_path, perms)?;
let idx = grafted.len();
realpath_to_idx.insert(source_path.clone(), idx);
libs_copied.insert(source_path.clone());
// Get required architectures for this library (universal2 only).
let required_archs = arch_requirements
.and_then(|reqs| reqs.get(&source_path))
.cloned()
.unwrap_or_default();
grafted.push(GraftedLib {
original_name: lib.name.clone(),
aliases: Vec::new(),
new_name,
dest_path,
needed: lib.needed.clone(),
rpath: lib.rpath.clone(),
required_archs,
});
}
Ok((grafted, libs_copied))
}
/// Extract the leaf filename from a library name.
///
/// Library names can be full paths on macOS (e.g., `/usr/local/lib/libfoo.dylib`
/// or `@rpath/libfoo.dylib`). This returns just the filename component.
pub(crate) fn leaf_filename(lib_name: &str) -> &str {
Path::new(lib_name)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(lib_name)
}
/// Generate a hash-suffixed filename for a library to avoid collisions.
///
/// Takes the leaf filename from `lib_name` (which may be a full path on macOS),
/// computes a short hash of the file content, and inserts it before the first
/// extension dot.
///
/// Examples:
/// - `libfoo.so.1` + hash `ab12cd34` → `libfoo-ab12cd34.so.1`
/// - `/usr/local/lib/libbar.dylib` + hash `ef56gh78` → `libbar-ef56gh78.dylib`
pub(crate) fn hashed_lib_name(lib_name: &str, lib_path: &Path) -> Result<String> {
let short_hash = &hash_file(lib_path)
.with_context(|| format!("Failed to hash library {}", lib_path.display()))?[..8];
let leaf = leaf_filename(lib_name);
Ok(if let Some(pos) = leaf.find('.') {
let (stem, ext) = leaf.split_at(pos);
if stem.ends_with(&format!("-{short_hash}")) {
leaf.to_string()
} else {
format!("{stem}-{short_hash}{ext}")
}
} else {
format!("{leaf}-{short_hash}")
})
}
/// Log which libraries were grafted into the wheel.
pub fn log_grafted_libs(libs_copied: &HashSet<PathBuf>, libs_dir: &Path) {
let mut grafted_paths: Vec<&PathBuf> = libs_copied.iter().collect();
grafted_paths.sort();
eprintln!(
"🖨 Copied external shared libraries to package {} directory:",
libs_dir.display()
);
for lib_path in &grafted_paths {
eprintln!(" {}", lib_path.display());
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
#[test]
fn test_leaf_filename() {
assert_eq!(leaf_filename("libfoo.so.1"), "libfoo.so.1");
assert_eq!(leaf_filename("/usr/local/lib/libfoo.dylib"), "libfoo.dylib");
assert_eq!(leaf_filename("@rpath/libfoo.dylib"), "libfoo.dylib");
}
#[test]
fn test_hashed_lib_name() {
let tmp_dir = tempfile::tempdir().unwrap();
let lib_path = tmp_dir.path().join("libfoo.so.1");
{
let mut f = fs_err::File::create(&lib_path).unwrap();
f.write_all(b"fake library content").unwrap();
}
let name = hashed_lib_name("libfoo.so.1", &lib_path).unwrap();
// Should have format: libfoo-XXXXXXXX.so.1
assert!(name.starts_with("libfoo-"));
assert!(name.ends_with(".so.1"));
assert_eq!(name.len(), "libfoo-".len() + 8 + ".so.1".len());
// Idempotent: calling with already-hashed name should not double-hash
let name2 = hashed_lib_name(&name, &lib_path).unwrap();
assert_eq!(name, name2);
}
#[test]
fn test_hashed_lib_name_macos_path() {
let tmp_dir = tempfile::tempdir().unwrap();
let lib_path = tmp_dir.path().join("libbar.dylib");
{
let mut f = fs_err::File::create(&lib_path).unwrap();
f.write_all(b"fake dylib content").unwrap();
}
let name = hashed_lib_name("/usr/local/lib/libbar.dylib", &lib_path).unwrap();
assert!(name.starts_with("libbar-"));
assert!(name.ends_with(".dylib"));
}
}