diff --git a/src/hugifyr.c b/src/hugifyr.c index 771c77d..4e27e03 100644 --- a/src/hugifyr.c +++ b/src/hugifyr.c @@ -129,6 +129,11 @@ typedef struct { offset_entry *loclist_offsets; } ElfInfo; +typedef struct { + GElf_Addr old_lo; // inclusive + GElf_Addr old_hi; // exclusive +} AddrRange; + typedef struct { GElf_Addr old_exec_vaddr; GElf_Addr vaddr_delta; @@ -138,6 +143,18 @@ typedef struct { bool adjust_offsets; bool sections_adjusted; bool adjust_debug; + + // lld-style support: vaddr ranges in seg0 that aren't in the + // relocatable_section_types whitelist. They sit below old_exec_vaddr but + // are RIP-referenced from .text (.rodata, .eh_frame*, .gcc_except_table), + // so they must shift by vaddr_delta along with the rest of the binary. + // Empty (n_movable_seg0 == 0) for non-lld-style binaries; in that case + // calc_adjusted_addr falls through to the legacy "shift if addr >= + // old_exec_vaddr" logic. + AddrRange *movable_seg0_ranges; + size_t n_movable_seg0; + GElf_Addr seg0_end_after_shift; // post-shift highest end of any seg0 section + GElf_Addr exec_p_vaddr_clamped; // LOAD RE's final p_vaddr (max(round_down, seg0_end)) } AdjInfo; typedef struct { @@ -208,6 +225,10 @@ static void cleanup_free_uchar(unsigned char **ptr) { cleanup_free((void **)ptr); } +static void cleanup_free_addrrange(AddrRange **ptr) { + cleanup_free((void **)ptr); +} + static void cleanup_close(int *fd) { if (*fd != -1) { close(*fd); @@ -408,11 +429,27 @@ static void write_address(void *p, uint64_t value, uint8_t address_size) { *(uint32_t *)p = value; } +static bool addr_in_movable_seg0(AdjInfo *adj_info, GElf_Addr addr) { + for (size_t i = 0; i < adj_info->n_movable_seg0; i++) { + if (addr >= adj_info->movable_seg0_ranges[i].old_lo && + addr < adj_info->movable_seg0_ranges[i].old_hi) { + return true; + } + } + return false; +} + static GElf_Addr calc_adjusted_addr(AdjInfo *adj_info, GElf_Addr addr) { if (addr >= adj_info->old_exec_vaddr) { return addr + adj_info->vaddr_delta; } + // lld-style: addresses in movable seg0 sections shift by the same delta + // (relocation addends, symbol values, dynamic-section pointers, DWARF + // references that target .rodata / .eh_frame* etc.). + if (addr_in_movable_seg0(adj_info, addr)) { + return addr + adj_info->vaddr_delta; + } return addr; } @@ -1385,7 +1422,23 @@ static void adjust_program_headers(ElfInfo *info, AdjInfo *adj_info) { if ( i == (size_t)info->first_load_index) { phdr->p_align = HUGE_PAGE_SIZE; } - + + // lld-style: seg0 LOAD covers movable sections that shift forward. + // Extend its filesz/memsz to cover the post-shift end. Don't change + // its p_vaddr/p_offset (still 0). + if (i == (size_t)info->first_load_index && + info->first_load_index != info->exec_index && + adj_info->n_movable_seg0 > 0) { + GElf_Addr new_end = adj_info->seg0_end_after_shift; + if (new_end > phdr->p_vaddr + phdr->p_memsz) { + GElf_Xword new_size = new_end - phdr->p_vaddr; + phdr->p_memsz = new_size; + if (adj_info->adjust_offsets) { + phdr->p_filesz = new_size; + } + } + } + if (phdr->p_vaddr >= (size_t)adj_info->old_exec_vaddr) { uint64_t old_end_addr = phdr->p_vaddr + phdr->p_memsz; @@ -1396,18 +1449,29 @@ static void adjust_program_headers(ElfInfo *info, AdjInfo *adj_info) { if (i == (size_t)info->exec_index) { uint64_t old_end_offset_in_page = old_end_addr % PAGE_SIZE; uint64_t offset_in_page = phdr->p_vaddr % PAGE_SIZE; - uint64_t memsz = round_up(phdr->p_memsz, HUGE_PAGE_SIZE) - - offset_in_page - + uint64_t memsz = round_up(phdr->p_memsz, HUGE_PAGE_SIZE) - + offset_in_page - (PAGE_SIZE - old_end_offset_in_page); - // calculate the + // calculate the phdr->p_memsz = memsz; if (adj_info->adjust_offsets) { phdr->p_filesz = phdr->p_memsz; phdr->p_align = HUGE_PAGE_SIZE; } - } + } + } else if (addr_in_movable_seg0(adj_info, phdr->p_vaddr)) { + // lld-style: phdr (e.g. PT_GNU_EH_FRAME) targets a section + // that's been remapped into the new vaddr range. Shift in + // lockstep. seg0 has p_vaddr == p_offset, so the file delta + // matches vaddr_delta. + phdr->p_vaddr += adj_info->vaddr_delta; + phdr->p_paddr += adj_info->vaddr_delta; + if (adj_info->adjust_offsets) { + phdr->p_offset += adj_info->vaddr_delta; + } + continue; } if (adj_info->adjust_offsets) { @@ -1416,8 +1480,21 @@ static void adjust_program_headers(ElfInfo *info, AdjInfo *adj_info) { // that is then added at the beginning of the executable segment. if (phdr->p_offset == adj_info->old_exec_offset) { phdr->p_offset += adj_info->segment_offset_delta; - phdr->p_vaddr = round_down(phdr->p_vaddr, HUGE_PAGE_SIZE); - phdr->p_paddr = round_down(phdr->p_paddr, HUGE_PAGE_SIZE); + // adj_info->exec_p_vaddr_clamped == max(round_down(p_vaddr, + // 2MB), seg0_end_after_shift). For non-lld binaries this + // equals round_down (a no-op, since new_exec_sec_vaddr was + // chosen so the LOAD RE rounds down cleanly). For lld-style + // it pushes LOAD RE's start up to seg0_end_after_shift so + // seg0 LOAD R and LOAD RE don't overlap in vaddr space. + GElf_Addr rounded = round_down(phdr->p_vaddr, HUGE_PAGE_SIZE); + GElf_Addr new_pvaddr = adj_info->exec_p_vaddr_clamped; + GElf_Xword shrink = new_pvaddr - rounded; + phdr->p_vaddr = new_pvaddr; + phdr->p_paddr = new_pvaddr; + if (shrink > 0 && i == (size_t)info->exec_index) { + if (phdr->p_memsz > shrink) phdr->p_memsz -= shrink; + if (phdr->p_filesz > shrink) phdr->p_filesz -= shrink; + } } else if (phdr->p_offset > adj_info->old_exec_offset) { // Note that section_offset_delta is also for next sections phdr->p_offset += adj_info->section_offset_delta; @@ -1443,6 +1520,8 @@ static void adjust_section_headers(ElfInfo *info, AdjInfo *adj_info) { struct elf_section *sec = &info->sections[i]; uint64_t old_addr = sec->shdr.sh_addr; uint64_t new_addr = calc_adjusted_addr(adj_info, old_addr); + bool is_movable_seg0 = old_addr < adj_info->old_exec_vaddr && + addr_in_movable_seg0(adj_info, old_addr); if (old_addr != new_addr) { n_updated++; @@ -1450,8 +1529,15 @@ static void adjust_section_headers(ElfInfo *info, AdjInfo *adj_info) { sec->shdr.sh_addr = new_addr; } - if (adj_info->adjust_offsets && sec->shdr.sh_offset >= adj_info->old_exec_offset) { - sec->shdr.sh_offset += adj_info->section_offset_delta; + if (adj_info->adjust_offsets) { + if (sec->shdr.sh_offset >= adj_info->old_exec_offset) { + sec->shdr.sh_offset += adj_info->section_offset_delta; + } else if (is_movable_seg0) { + // lld-style: this section sits in seg0 (file offset below + // exec) but shifts forward in vaddr. seg0 has p_vaddr == + // p_offset == 0, so its file delta equals vaddr_delta. + sec->shdr.sh_offset += adj_info->vaddr_delta; + } } } pr_info("Adjusted %zu section headers\n", n_updated); @@ -1465,7 +1551,7 @@ static void adjust_section_headers(ElfInfo *info, AdjInfo *adj_info) { if (adj_info->adjust_offsets) { if (info->ehdr.e_shoff > adj_info->old_exec_offset) { info->ehdr.e_shoff = round_up(info->ehdr.e_shoff + adj_info->section_offset_delta, 8); - } + } } } @@ -2686,40 +2772,53 @@ static int pad_segment_start(ElfInfo *info, AdjInfo *adj_info) { } GElf_Phdr *exec_phdr = &info->phdrs[info->exec_index]; - - // Find the first section in the executable segment - struct elf_section *first_sec = NULL; - GElf_Addr lowest_addr = UINT64_MAX; - + + // Pad the gap between the last non-executable byte and the first + // executable section in LOAD RE. "Last non-executable byte" = max end + // of any non-SHF_EXECINSTR section in the segment, or the segment's + // p_vaddr if there are no such sections. This gives: + // - modern path (LOAD RE has only code): pad [p_vaddr, .text). Zero + // bytes when p_vaddr is 2MB-aligned and .text starts there. + // - 2-LOAD R+E first / "combined": LOAD RE holds metadata (.interp, + // .note, .dynsym, ...) at low vaddrs; the gap from last metadata + // to first code is small or zero; we don't clobber metadata or + // ELF/PHDR which sit in the same range. + // - lld-style: seg0 movable sections live in seg0 LOAD R; LOAD RE + // starts at seg0_end_after_shift. We pad [LOAD_RE.p_vaddr, + // .text) — typically a sub-page sliver. + GElf_Addr seg_lo = exec_phdr->p_vaddr; + GElf_Addr seg_hi = exec_phdr->p_vaddr + exec_phdr->p_memsz; + GElf_Addr first_exec_addr = UINT64_MAX; + GElf_Addr non_exec_end = seg_lo; + for (size_t i = 0; i < info->shnum; i++) { struct elf_section *sec = &info->sections[i]; - - // Check if section is in this segment - if (sec->shdr.sh_addr >= exec_phdr->p_vaddr && - sec->shdr.sh_addr < exec_phdr->p_vaddr + exec_phdr->p_memsz) { - - if (sec->shdr.sh_addr < lowest_addr) { - first_sec = sec; - lowest_addr = sec->shdr.sh_addr; + if (sec->shdr.sh_addr < seg_lo || sec->shdr.sh_addr >= seg_hi) + continue; + if (sec->shdr.sh_flags & SHF_EXECINSTR) { + if (sec->shdr.sh_addr < first_exec_addr) { + first_exec_addr = sec->shdr.sh_addr; + } + } else { + GElf_Addr end = sec->shdr.sh_addr + sec->shdr.sh_size; + if (end > non_exec_end) { + non_exec_end = end; } } } - if (!first_sec) { - pr_error("Could not find first section in executable segment\n"); + if (first_exec_addr == UINT64_MAX) { + pr_error("Could not find first executable section in executable segment\n"); return -1; } - // Calculate padding needed between segment start and first section - GElf_Addr segment_start_addr = round_up(exec_phdr->p_vaddr, HUGE_PAGE_SIZE); - size_t padding_size = first_sec->shdr.sh_addr - segment_start_addr; - - if (padding_size == 0) { - return 0; // No padding needed + if (first_exec_addr <= non_exec_end) { + return 0; // metadata abuts code — nothing to pad } - // Calculate file offset where padding should go - GElf_Off padding_offset = exec_phdr->p_offset + (segment_start_addr - exec_phdr->p_vaddr); + GElf_Addr pad_start_addr = non_exec_end; + size_t padding_size = first_exec_addr - pad_start_addr; + GElf_Off padding_offset = exec_phdr->p_offset + (pad_start_addr - exec_phdr->p_vaddr); // Create a buffer for the int3 instructions __attribute__((cleanup(cleanup_free))) void *padding = malloc(padding_size); @@ -2931,13 +3030,62 @@ static int handle_fallback_hugepage_alignment(ElfInfo *info) { return ERR_SUCCESS; } +// Returns 1 if any section in the first read-only LOAD ("seg0") is +// SHF_ALLOC, not SHT_NOBITS, sits at vaddr below the first PT_X LOAD's +// p_vaddr, AND is NOT in the relocatable_section_types whitelist. The +// whitelist catches metadata that's only consumed by ld.so (.dynsym, +// .gnu.hash, .rela.*, .dynamic, .interp, .note.*, ...). Anything else +// in seg0 — typically .rodata, .eh_frame, .eh_frame_hdr, .gcc_except_table +// — is RIP-referenced from .text, making the binary "lld-style" and +// unsafe for the main shifting path. +static int seg0_has_movable_sections(ElfInfo *info) { + GElf_Addr exec_vaddr = 0; + bool found = false; + for (size_t i = 0; i < info->phnum; i++) { + if (info->phdrs[i].p_type == PT_LOAD && + (info->phdrs[i].p_flags & PF_X)) { + exec_vaddr = info->phdrs[i].p_vaddr; + found = true; + break; + } + } + if (!found) { + return 0; + } + + for (size_t i = 0; i < info->shnum; i++) { + struct elf_section *sec = &info->sections[i]; + if (sec->shdr.sh_addr == 0 || sec->shdr.sh_addr >= exec_vaddr) { + continue; + } + if ((sec->shdr.sh_flags & SHF_ALLOC) == 0) continue; + if (sec->shdr.sh_type == SHT_NOBITS) continue; + + bool whitelisted = false; + for (size_t j = 0; j < array_size(relocatable_section_types); j++) { + if (sec->shdr.sh_type == relocatable_section_types[j].type && + (relocatable_section_types[j].name == NULL || + strcmp(sec->name, relocatable_section_types[j].name) == 0)) { + whitelisted = true; + break; + } + } + if (!whitelisted) { + pr_debug("seg0 contains non-whitelisted section '%s' (type=%u, addr=0x%lx) — lld-style\n", + sec->name, sec->shdr.sh_type, sec->shdr.sh_addr); + return 1; + } + } + return 0; +} + static int process_elf(const char *filename, uint32_t flags) { __attribute__((cleanup(cleanup_free_elf_info))) ElfInfo *elf_info = init_elf(filename); if (elf_info == NULL) { return ERR_FATAL; } - + if (read_elf(elf_info) != 0) { pr_error("Failed to read ELF file\n"); return ERR_FATAL; @@ -3010,12 +3158,95 @@ static int process_elf(const char *filename, uint32_t flags) { uint64_t new_exec_sec_vaddr = round_down(new_p_vaddr_end - old_aligned_p_memsz, exec_phdr.p_align); uint64_t vaddr_delta = new_exec_sec_vaddr - exec_phdr.p_vaddr; - uint64_t segment_offset_delta = round_up_delta(exec_phdr.p_offset, HUGE_PAGE_SIZE); - uint64_t section_offset_delta = segment_offset_delta + (new_exec_sec_vaddr % HUGE_PAGE_SIZE); + // lld-style PIE detection: seg0 carries .rodata / .eh_frame* / + // .gcc_except_table that are RIP-referenced from .text. We can't leave + // them at their original vaddrs (the shift would break the RIP-relative + // LEAs), so collect their ranges here so calc_adjusted_addr can shift + // them by the same vaddr_delta as everything at-or-after the exec + // segment. Whitelisted seg0 metadata (.dynsym, .gnu.hash, .rela.*, ...) + // stays put. seg0_end_after_shift is the post-shift highest end of any + // seg0 section — used as the lower clamp on LOAD RE's p_vaddr so seg0 + // LOAD R and LOAD RE never overlap in vaddr space. + AddrRange *movable_seg0_ranges __attribute__((cleanup(cleanup_free_addrrange))) = NULL; + size_t n_movable_seg0 = 0; + GElf_Addr seg0_end_after_shift = 0; + + if (seg0_has_movable_sections(elf_info)) { + movable_seg0_ranges = malloc(elf_info->shnum * sizeof(*movable_seg0_ranges)); + if (movable_seg0_ranges == NULL) { + pr_error("Failed to allocate movable seg0 ranges\n"); + return ERR_FATAL; + } + + for (size_t i = 0; i < elf_info->shnum; i++) { + struct elf_section *sec = &elf_info->sections[i]; + if (sec->shdr.sh_addr == 0 || sec->shdr.sh_addr >= exec_phdr.p_vaddr) continue; + if ((sec->shdr.sh_flags & SHF_ALLOC) == 0) continue; + if (sec->shdr.sh_type == SHT_NOBITS) continue; + + bool whitelisted = false; + for (size_t j = 0; j < array_size(relocatable_section_types); j++) { + if (sec->shdr.sh_type == relocatable_section_types[j].type && + (relocatable_section_types[j].name == NULL || + strcmp(sec->name, relocatable_section_types[j].name) == 0)) { + whitelisted = true; + break; + } + } + + GElf_Addr sec_end = sec->shdr.sh_addr + sec->shdr.sh_size; + GElf_Addr post_end = whitelisted ? sec_end : sec_end + vaddr_delta; + if (post_end > seg0_end_after_shift) { + seg0_end_after_shift = post_end; + } + if (!whitelisted) { + movable_seg0_ranges[n_movable_seg0].old_lo = sec->shdr.sh_addr; + movable_seg0_ranges[n_movable_seg0].old_hi = sec_end; + n_movable_seg0++; + pr_debug(" movable seg0: '%s' [0x%lx, 0x%lx) -> [0x%lx, 0x%lx)\n", + sec->name, (uint64_t)sec->shdr.sh_addr, (uint64_t)sec_end, + (uint64_t)(sec->shdr.sh_addr + vaddr_delta), + (uint64_t)(sec_end + vaddr_delta)); + } + } + pr_info("lld-style seg0: %zu movable section(s); seg0_end_after_shift=0x%lx\n", + n_movable_seg0, (uint64_t)seg0_end_after_shift); + } + + // Compute the LOAD RE p_vaddr that adjust_program_headers will end up + // setting (max(round_down(new_exec_sec_vaddr, 2MB), seg0_end_after_shift)). + // For modern paths this equals round_down(new_exec_sec_vaddr, 2MB); + // for lld-style with seg0_end_after_shift > round_down it equals + // seg0_end_after_shift. section_offset_delta must be derived from this + // clamped p_vaddr so that for every section in LOAD RE, + // sh_offset_new - sh_addr_new == p_offset_new - p_vaddr_clamped. + // Without the clamp factored in, .text's sh_offset would point to a + // file location outside LOAD RE's file region, and the kernel would + // load garbage at .text's vaddr. + GElf_Addr exec_p_vaddr_round_down = round_down(new_exec_sec_vaddr, HUGE_PAGE_SIZE); + GElf_Addr exec_p_vaddr_clamped = exec_p_vaddr_round_down > seg0_end_after_shift + ? exec_p_vaddr_round_down + : seg0_end_after_shift; + // segment_offset_delta brings p_offset up to a place where + // (a) it doesn't overlap the extended seg0 LOAD R file region, and + // (b) p_offset_new % 2MB == p_vaddr_clamped % 2MB (kernel constraint + // for placing LOAD RE on a 2MB-aligned VMA). + // For lld-style we extended seg0 LOAD R's filesz to exec_p_vaddr_clamped + // (since seg0 has p_offset == p_vaddr == 0); LOAD RE's file region must + // start at exec_p_vaddr_clamped, which automatically gives us the + // matching modulo. For modern paths where seg0 wasn't extended, the + // existing round-up-to-2MB formula already aligns things. + uint64_t segment_offset_delta; + if (n_movable_seg0 > 0) { + segment_offset_delta = exec_p_vaddr_clamped - exec_phdr.p_offset; + } else { + segment_offset_delta = round_up_delta(exec_phdr.p_offset, HUGE_PAGE_SIZE); + } + uint64_t section_offset_delta = segment_offset_delta + (new_exec_sec_vaddr - exec_p_vaddr_clamped); pr_info("Original vaddr: 0x%lx, size: 0x%lx\n", exec_phdr.p_vaddr, exec_phdr.p_memsz); pr_info("New vaddr: 0x%lx, new size: 0x%lx\n", new_exec_sec_vaddr, huge_aligned_size); - pr_info("vaddr delta: 0x%lx\n", vaddr_delta); + pr_info("vaddr delta: 0x%lx, exec p_vaddr clamped: 0x%lx\n", vaddr_delta, (uint64_t)exec_p_vaddr_clamped); if (flags & FLAG_FILE_PADDING) { pr_info("Segment offset delta: 0x%lx, section offset delta: 0x%lx\n", segment_offset_delta, section_offset_delta); pr_info("New offset: 0x%lx\n", new_exec_p_offset); @@ -3029,6 +3260,10 @@ static int process_elf(const char *filename, uint32_t flags) { .section_offset_delta = section_offset_delta, .adjust_offsets = flags & FLAG_FILE_PADDING, .adjust_debug = flags & FLAG_DEBUG_UPDATE, + .movable_seg0_ranges = movable_seg0_ranges, + .n_movable_seg0 = n_movable_seg0, + .seg0_end_after_shift = seg0_end_after_shift, + .exec_p_vaddr_clamped = exec_p_vaddr_clamped, }; // Adjust the ELF structure diff --git a/tests/test.py b/tests/test.py index 173accb..cd420ff 100644 --- a/tests/test.py +++ b/tests/test.py @@ -11,11 +11,119 @@ def run_command(cmd): raise RuntimeError(f"Command {cmd} failed: {result.stderr}") return result.stdout +def parse_load_segments(filename): + """Parse LOAD program-header entries from readelf -lW output. + Returns a list of dicts with offset/vaddr/filesz/memsz/flags.""" + output = run_command(['readelf', '-lW', filename]) + loads = [] + for line in output.splitlines(): + s = line.strip() + if not s.startswith('LOAD'): + continue + parts = s.split() + # Format with R+E (flags split): LOAD off vaddr paddr filesz memsz R E align + # Format with R/RW/RWE: LOAD off vaddr paddr filesz memsz Flg align + if len(parts) == 9: + flags = parts[6] + parts[7] + else: + flags = parts[6] + loads.append({ + 'offset': int(parts[1], 16), + 'vaddr': int(parts[2], 16), + 'filesz': int(parts[4], 16), + 'memsz': int(parts[5], 16), + 'flags': flags, + }) + return loads + + +def check_offset_vaddr_mod_2mb_match(filename, huge=0x200000): + """Verify that for the executable LOAD, p_offset and p_vaddr have the + same residue modulo 2MB. Without this property the kernel cannot place + the segment on a 2MB-aligned VMA backed by the file (THP / file-backed + huge pages need both vaddr and file offset 2MB-aligned in lockstep). + Linkers normally produce binaries where this isn't true (e.g. lld + leaves a 0x1000 vaddr/offset gap on the executable LOAD), so hugifyr's + transformation must establish it.""" + loads = parse_load_segments(filename) + exec_load = next((l for l in loads if 'E' in l['flags']), None) + if not exec_load: + raise RuntimeError(f"{filename}: no executable LOAD") + o_mod = exec_load['offset'] % huge + v_mod = exec_load['vaddr'] % huge + if o_mod != v_mod: + raise RuntimeError( + f"{filename}: exec LOAD p_offset%2MB=0x{o_mod:x} != p_vaddr%2MB=0x{v_mod:x} " + f"(p_offset=0x{exec_load['offset']:x}, p_vaddr=0x{exec_load['vaddr']:x}). " + f"Kernel can't place on a 2MB-aligned file-backed VMA.") + print(f"offset%2MB == vaddr%2MB OK in {filename} " + f"(both 0x{v_mod:x}; p_offset=0x{exec_load['offset']:x}, " + f"p_vaddr=0x{exec_load['vaddr']:x})") + + +def check_re_chunk_isolation(filename, huge=0x200000): + """Verify that every 2MB chunk *fully covered* by LOAD RE is not + touched by any other LOAD's vaddr range. The fully-covered chunks + are the ones the kernel can promote to a code huge page; partial + chunks at the start/end of LOAD RE can legitimately share their + range with the adjacent LOAD R / LOAD RW (e.g. lld-style binaries + place the first partial RE chunk inside seg0's vaddr range, which + is fine because a partial chunk is never huge-page eligible).""" + loads = parse_load_segments(filename) + exec_load = next((l for l in loads if 'E' in l['flags']), None) + if not exec_load: + raise RuntimeError(f"{filename}: no executable LOAD") + + re_lo = exec_load['vaddr'] + re_hi = re_lo + exec_load['memsz'] + full_lo = (re_lo + huge - 1) & ~(huge - 1) # first fully-covered chunk start + full_hi = re_hi & ~(huge - 1) # one-past last fully-covered chunk start + + if full_hi <= full_lo: + print(f"RE chunk isolation: no fully-covered 2MB chunks in {filename} " + f"(RE [0x{re_lo:x}, 0x{re_hi:x}); too small)") + return + + for l in loads: + if l is exec_load: + continue + l_lo = l['vaddr'] + l_hi = l_lo + l['memsz'] + if l_lo < full_hi and l_hi > full_lo: + raise RuntimeError( + f"{filename}: LOAD vaddr 0x{l_lo:x}-0x{l_hi:x} (flags '{l['flags']}') " + f"intersects a fully-covered RE 2MB chunk in [0x{full_lo:x}, 0x{full_hi:x}). " + f"RE [0x{re_lo:x}, 0x{re_hi:x}); mmap-order will mix protections in shared chunks " + f"and the kernel can't grant code huge pages.") + print(f"RE chunk isolation OK in {filename} " + f"(RE [0x{re_lo:x}, 0x{re_hi:x}), full chunks [0x{full_lo:x}, 0x{full_hi:x}) clear)") + + +def check_exec_load_end_aligned(filename, huge=0x200000): + """Verify that for the executable LOAD, p_vaddr + p_memsz lands on a + 2MB boundary AND p_align is 2MB. The END being aligned is what makes + the last code huge page eligible — it's required regardless of + whether the START is also 2MB-aligned. (lld-style transformed + binaries have a non-aligned p_vaddr clamped to seg0_end_after_shift, + but the end is still extended to a 2MB boundary.)""" + loads = parse_load_segments(filename) + exec_load = next((l for l in loads if 'E' in l['flags']), None) + if not exec_load: + raise RuntimeError(f"{filename}: no executable LOAD") + end = exec_load['vaddr'] + exec_load['memsz'] + if end % huge != 0: + raise RuntimeError( + f"{filename}: exec LOAD end 0x{end:x} (vaddr=0x{exec_load['vaddr']:x} + " + f"memsz=0x{exec_load['memsz']:x}) is not 2MB-aligned") + print(f"exec LOAD end 2MB-aligned OK in {filename} " + f"(end=0x{end:x}, vaddr=0x{exec_load['vaddr']:x}, memsz=0x{exec_load['memsz']:x})") + + def check_segment_alignment(filename): """Check if executable segment is properly 2MB aligned""" ALIGN_SIZE = 0x200000 # 2MB output = run_command(['readelf', '-Wl', filename]) - + # Parse readelf output looking for LOAD segments with execute permission lines = output.splitlines() i = 0 @@ -25,7 +133,7 @@ def check_segment_alignment(filename): # Check if this line has flags (ARM64 format) or next line has flags (x86_64 format) has_exec = False flags_line = line - + # Check current line for flags if ' R E ' in line or ' RWE ' in line: has_exec = True @@ -34,26 +142,26 @@ def check_segment_alignment(filename): elif i + 1 < len(lines) and (' R E ' in lines[i + 1] or ' RWE ' in lines[i + 1]): has_exec = True flags_line = lines[i + 1] - + if has_exec: # Parse LOAD segment values load_match = re.search(r'LOAD\s+0x([0-9a-f]+)\s+0x([0-9a-f]+)', line) if not load_match: raise RuntimeError(f"Failed to parse LOAD segment in {filename}") - + offset = int(load_match.group(1), 16) vaddr = int(load_match.group(2), 16) - + # Parse alignment - could be on same line or flags line align_match = re.search(r'0x([0-9a-f]+)\s*$', flags_line) if not align_match: align_match = re.search(r'0x([0-9a-f]+)\s*$', line) - + if not align_match: print(f"Line: {line}") print(f"Flags line: {flags_line}") raise RuntimeError(f"Failed to parse alignment in {filename}") - + align = int(align_match.group(1), 16) if align != ALIGN_SIZE: @@ -65,7 +173,7 @@ def check_segment_alignment(filename): print(f"Segment alignment OK in {filename}") return True i += 1 - + raise RuntimeError(f"No executable segment found in {filename}") def test_basic(): @@ -104,6 +212,17 @@ def test_basic(): check_segment_alignment('test1_huge.exe') check_segment_alignment('libtest1_huge.so') + # Regression: the 2MB chunk where RE ends must not be intersected by + # any subsequent LOAD's vaddr — that's what makes the last code huge + # page eligible. This is what end-aligning vaddr_delta gives us; a + # start-aligning delta puts subsequent LOADs back into RE's last chunk. + check_re_chunk_isolation('test1_huge.exe') + + # Regression: p_offset and p_vaddr of the exec LOAD must agree + # modulo 2MB. The transformation should establish this property + # whether the input had it or not. + check_offset_vaddr_mod_2mb_match('test1_huge.exe') + # Check debug info print("Checking debug info...") gdb_output = run_command(['gdb', '-batch', '-ex', 'file test1_huge.exe', @@ -197,6 +316,62 @@ def test_tls_relocations(): print("TLS relocation tests passed!") +def test_load_layouts(): + """Regression: hugifyr must work across the LOAD-segment layouts. + + 1. default (modern ld -z separate-code): 4-LOAD with metadata-only first + R segment. .rodata sits in a separate (third) R LOAD. Goes through + the main shifting path (huge-page enablement). + 2. -z noseparate-code: 2-LOAD with code+rodata combined + in the first R+E segment (Oracle JDK / libjvm.so shape). Main path. + 3. lld: 4-LOAD with .rodata + .eh_frame* + in the first R segment. .text RIP-references seg0 content, so the + main shifting path would crash this binary. Routes through the new + padding-only path that establishes + p_offset%2MB == p_vaddr%2MB for the exec LOAD without changing any + vaddrs. Output runs identically to the original.""" + print("\n=== Testing across LOAD-segment layouts ===") + + variants = [ + ('default', ['gcc', '-pie', '-O2']), + ('combined', ['gcc', '-pie', '-O2', '-Wl,-z,noseparate-code']), + ] + if subprocess.run(['gcc', '-fuse-ld=lld', '-x', 'c', '-', '-o', '/dev/null'], + input='int main(){}', capture_output=True, text=True).returncode == 0: + variants.append(('lld', ['gcc', '-pie', '-O2', '-fuse-ld=lld'])) + else: + print(" (lld not available — skipping rodata-in-seg0 variant)") + + for name, cmd in variants: + src_exe = f'test1_{name}.exe' + huge_exe = f'test1_{name}_huge.exe' + run_command(cmd + ['test1.c', '-o', src_exe]) + + original = run_command([f'./{src_exe}']).strip() + run_command(['../bin/hugifyr', src_exe, huge_exe]) + os.chmod(huge_exe, 0o755) + modified = run_command([f'./{huge_exe}']).strip() + + if original != modified: + raise RuntimeError(f"{name}: output mismatch (orig={original!r}, new={modified!r})") + + # Every variant must establish p_offset%2MB == p_vaddr%2MB after + # the transform. modern paths shift to a 2MB-aligned p_vaddr; + # lld-style clamps p_vaddr to seg0_end_after_shift and pads the + # file to match. + check_offset_vaddr_mod_2mb_match(huge_exe) + # Every variant must end-align LOAD RE so the kernel can grant a + # huge page on the last 2MB chunk of code. + check_exec_load_end_aligned(huge_exe) + # Every variant must keep LOAD RE's last 2MB chunk free of any + # other LOAD's vaddr range. This is the central goal: code chunks + # must be exclusively code. + check_re_chunk_isolation(huge_exe) + print(f" {name}: {original}") + + print("Layout tests passed!") + + def test_stripped(): """Regression: hugifyr must work on binaries with no DWARF debug info. @@ -229,6 +404,9 @@ def main(): # Run basic tests test_basic() + # Layout-coverage regression + test_load_layouts() + # Regression: stripped binaries (no DWARF) test_stripped()