Skip to content

Commit dfff34a

Browse files
fix: NOP register_chrdev instead of remapping, direct sysfs_write for PCI disable
- Host nvidia-580 owns both majors 185 and 195; any chrdev remap still conflicts. Switch catalyst patch from PatchByteAt(0x7b, 0xC3→0xB9) to NopCallAt(0x7f) to skip register_chrdev entirely — the catalyst pattern doesn't need the chardev, PCI match triggers probe. - Use direct sysfs_write for PCI enable attribute instead of guarded child process — the enable attribute is non-blocking and the shell child was failing with I/O errors. Status: insmod chrdev conflict resolved, but request_mem_region still fails on Titan V because PCI BAR resources remain claimed by the kernel resource tree even after unbind+disable. Needs PCI remove/rescan approach or kernel-level BAR release. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent fb5ce20 commit dfff34a

2 files changed

Lines changed: 13 additions & 21 deletions

File tree

crates/core/cylinder/src/vfio/module_patch.rs

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -389,20 +389,14 @@ impl PatchSet {
389389
symbol: "nv_cap_destroy_entry".into(),
390390
strategy: PatchStrategy::RetAtEntry,
391391
},
392-
// Remap chrdev major 195→185 in the __register_chrdev
393-
// call inside init_module. Host nvidia owns major 195;
394-
// changing the immediate avoids conflict without NOP-ing
395-
// the call, so RM's chardev is created (major 185) and
396-
// userspace can trigger GPU init via device open.
397-
// Layout: `bf c3 00 00 00` = `mov $0xc3, %edi` at fn+0x7a;
398-
// the immediate 0xC3 is at fn+0x7b.
392+
// NOP the __register_chrdev call inside init_module.
393+
// Host nvidia owns majors 185 and 195; any remap still
394+
// conflicts. For the catalyst pattern we don't need the
395+
// chardev — the PCI match triggers probe during insmod.
396+
// Layout: `call __register_chrdev` at fn+0x7f (5 bytes).
399397
PatchTarget {
400398
symbol: "init_module".into(),
401-
strategy: PatchStrategy::PatchByteAt {
402-
fn_offset: 0x7b,
403-
expected: 0xC3,
404-
replacement: 0xB9,
405-
},
399+
strategy: PatchStrategy::NopCallAt(0x7f),
406400
},
407401
],
408402
min_applied: 1,

crates/core/cylinder/src/vfio/sovereign_handoff.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -796,18 +796,16 @@ pub fn execute_handoff(
796796
// request_mem_region call on BAR0 fails because the
797797
// PCI subsystem still has the region reserved from the
798798
// previous driver's pci_enable_device.
799+
// Direct sysfs_write is safe here: the device is unbound
800+
// and the `enable` attribute is a non-blocking kernel op.
799801
let enable_path = crate::linux_paths::sysfs_pci_device_file(
800802
&config.bdf, "enable",
801803
);
802-
if let Err(e) = guarded_sysfs::sysfs_write_guarded(
803-
&enable_path, "0",
804-
guarded_sysfs::UNBIND_TIMEOUT,
805-
) {
806-
tracing::warn!(bdf = config.bdf.as_str(), error = %e,
807-
"pci disable failed (continuing — request_mem_region may fail)");
808-
} else {
809-
tracing::info!(bdf = config.bdf.as_str(),
810-
"pci device disabled — BAR resources released for driver takeover");
804+
match guarded_sysfs::sysfs_write(&enable_path, "0") {
805+
Ok(()) => tracing::info!(bdf = config.bdf.as_str(),
806+
"pci device disabled — BAR resources released for driver takeover"),
807+
Err(e) => tracing::warn!(bdf = config.bdf.as_str(), error = %e,
808+
"pci disable failed (continuing — request_mem_region may fail)"),
811809
}
812810

813811
let override_path = crate::linux_paths::sysfs_pci_device_file(

0 commit comments

Comments
 (0)