Skip to content

Commit 294c9e4

Browse files
executor: use dynamic page table allocation for guest
Use a pool of 32 pages to allocate PT and PE entries for the guest page tables. This eliminates the need for manually assigned page table entries that are brittle and may break when someone changes the memory layout.
1 parent 8271fdf commit 294c9e4

File tree

2 files changed

+38
-63
lines changed

2 files changed

+38
-63
lines changed

executor/common_kvm_amd64.h

Lines changed: 35 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ struct mem_region {
220220

221221
// SYZOS guest virtual memory layout (must be in sync with executor/kvm.h):
222222
static const struct mem_region syzos_mem_regions[] = {
223-
// AMD64 data structures (20 pages starting at GPA 0x0, see kvm.h).
224-
{X86_SYZOS_ADDR_ZERO, 20, MEM_REGION_FLAG_GPA0},
223+
// AMD64 data structures (48 pages starting at GPA 0x0, see kvm.h).
224+
{X86_SYZOS_ADDR_ZERO, 48, MEM_REGION_FLAG_GPA0},
225225
// SMRAM memory.
226226
{X86_SYZOS_ADDR_SMRAM, 10, 0},
227227
// Unmapped region to trigger a page faults for uexits etc.
@@ -309,56 +309,41 @@ struct kvm_opt {
309309
#if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
310310
#define PAGE_MASK GENMASK_ULL(51, 12)
311311

312-
static void map_4k_page(uint64 host_mem, uint64 gpa)
312+
typedef struct {
313+
uint64 next_page;
314+
uint64 last_page;
315+
} page_alloc_t;
316+
317+
static uint64 pg_alloc(page_alloc_t* alloc)
318+
{
319+
if (alloc->next_page >= alloc->last_page)
320+
fail("page table allocation failed");
321+
uint64 page = alloc->next_page;
322+
alloc->next_page += KVM_PAGE_SIZE;
323+
return page;
324+
}
325+
326+
static void map_4k_page(uint64 host_mem, page_alloc_t* alloc, uint64 gpa)
313327
{
314328
uint64* pml4 = (uint64*)(host_mem + X86_SYZOS_ADDR_PML4);
315329

316330
// PML4 Entry (Level 4).
317331
uint64 pml4_idx = (gpa >> 39) & 0x1FF;
318-
// We assume all GPAs are < 512GB, so pml4_idx is always 0 - link it to the PDPT.
319332
if (pml4[pml4_idx] == 0)
320-
pml4[pml4_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_SYZOS_ADDR_PDP & PAGE_MASK);
321-
uint64* pdpt = (uint64*)(host_mem + (pml4[0] & PAGE_MASK));
333+
pml4[pml4_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
334+
uint64* pdpt = (uint64*)(host_mem + (pml4[pml4_idx] & PAGE_MASK));
322335

323336
// PDPT Entry (Level 3).
324337
uint64 pdpt_idx = (gpa >> 30) & 0x1FF;
325-
uint64* pd_addr_ptr = &pdpt[pdpt_idx];
326-
uint64 pd_phys_addr = 0;
327-
328-
// Determine which Page Directory (PD) to use based on the address
329-
if (gpa >= X86_SYZOS_ADDR_IOAPIC) {
330-
// High-memory IOAPIC region (0xfec00000).
331-
// PDPT index will be 3 (for 0xC0000000 - 0xFFFFFFFFF).
332-
pd_phys_addr = X86_SYZOS_ADDR_PD_IOAPIC;
333-
} else {
334-
// Low-memory region (< 1GB).
335-
// PDPT index will be 0 (for 0x0 - 0x3FFFFFFF).
336-
pd_phys_addr = X86_SYZOS_ADDR_PD;
337-
}
338-
if (*pd_addr_ptr == 0)
339-
*pd_addr_ptr = X86_PDE64_PRESENT | X86_PDE64_RW | (pd_phys_addr & PAGE_MASK);
340-
341-
uint64* pd = (uint64*)(host_mem + (*pd_addr_ptr & PAGE_MASK));
338+
if (pdpt[pdpt_idx] == 0)
339+
pdpt[pdpt_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
340+
uint64* pd = (uint64*)(host_mem + (pdpt[pdpt_idx] & PAGE_MASK));
342341

343342
// PD Entry (Level 2).
344343
uint64 pd_idx = (gpa >> 21) & 0x1FF;
345-
uint64* pt_addr_ptr = &pd[pd_idx];
346-
uint64 pt_phys_addr = 0;
347-
348-
// Determine which Page Table (PT) to use.
349-
if (gpa >= X86_SYZOS_ADDR_IOAPIC) {
350-
pt_phys_addr = X86_SYZOS_ADDR_PT_IOAPIC;
351-
} else if (gpa >= X86_SYZOS_ADDR_UNUSED) {
352-
const uint64 unused_base_pd_idx = (X86_SYZOS_ADDR_UNUSED >> 21) & 0x1FF;
353-
const uint64 gpa_pd_idx = (gpa >> 21) & 0x1FF;
354-
pt_phys_addr = X86_SYZOS_ADDR_PT_UNUSED_MEM + (gpa_pd_idx - unused_base_pd_idx) * KVM_PAGE_SIZE;
355-
} else {
356-
pt_phys_addr = X86_SYZOS_ADDR_PT_LOW_MEM;
357-
}
358-
if (*pt_addr_ptr == 0)
359-
*pt_addr_ptr = X86_PDE64_PRESENT | X86_PDE64_RW | (pt_phys_addr & PAGE_MASK);
360-
361-
uint64* pt = (uint64*)(host_mem + (*pt_addr_ptr & PAGE_MASK));
344+
if (pd[pd_idx] == 0)
345+
pd[pd_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
346+
uint64* pt = (uint64*)(host_mem + (pd[pd_idx] & PAGE_MASK));
362347

363348
// PT Entry (Level 1).
364349
uint64 pt_idx = (gpa >> 12) & 0x1FF;
@@ -368,10 +353,10 @@ static void map_4k_page(uint64 host_mem, uint64 gpa)
368353
pt[pt_idx] = (gpa & PAGE_MASK) | X86_PDE64_PRESENT | X86_PDE64_RW;
369354
}
370355

371-
static int map_4k_region(uint64 host_mem, uint64 gpa_start, int num_pages)
356+
static int map_4k_region(uint64 host_mem, page_alloc_t* alloc, uint64 gpa_start, int num_pages)
372357
{
373358
for (int i = 0; i < num_pages; i++)
374-
map_4k_page(host_mem, gpa_start + (i * KVM_PAGE_SIZE));
359+
map_4k_page(host_mem, alloc, gpa_start + (i * KVM_PAGE_SIZE));
375360
return num_pages;
376361
}
377362

@@ -382,20 +367,18 @@ static void setup_pg_table(struct kvm_syz_vm* vm)
382367
int total = vm->total_pages;
383368
// Page tables are located in the first memory region starting at 0x0.
384369
uint64 host_mem = (uint64)vm->gpa0_mem;
370+
371+
page_alloc_t alloc = {.next_page = X86_SYZOS_ADDR_PT_POOL,
372+
.last_page = X86_SYZOS_ADDR_PT_POOL + 32 * KVM_PAGE_SIZE};
373+
385374
// Zero-out all page table memory.
386-
// This includes the 4 levels (PML4, PDPT, PDs) and 3 PTs
387-
memset((void*)(host_mem + X86_SYZOS_ADDR_PML4), 0, KVM_PAGE_SIZE);
388-
memset((void*)(host_mem + X86_SYZOS_ADDR_PDP), 0, KVM_PAGE_SIZE);
389-
memset((void*)(host_mem + X86_SYZOS_ADDR_PD), 0, KVM_PAGE_SIZE);
390-
memset((void*)(host_mem + X86_SYZOS_ADDR_PD_IOAPIC), 0, KVM_PAGE_SIZE);
391-
memset((void*)(host_mem + X86_SYZOS_ADDR_PT_LOW_MEM), 0, KVM_PAGE_SIZE);
392-
memset((void*)(host_mem + X86_SYZOS_ADDR_PT_UNUSED_MEM), 0, 2 * KVM_PAGE_SIZE);
393-
memset((void*)(host_mem + X86_SYZOS_ADDR_PT_IOAPIC), 0, KVM_PAGE_SIZE);
375+
for (uint64 i = 0; i < (alloc.last_page - alloc.next_page); i += KVM_PAGE_SIZE)
376+
memset((void*)(host_mem + alloc.next_page + i), 0, KVM_PAGE_SIZE);
394377

395378
// Map all the regions defined in setup_vm()
396379
for (size_t i = 0; i < sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]); i++)
397-
total -= map_4k_region(host_mem, syzos_mem_regions[i].gpa, syzos_mem_regions[i].pages);
398-
map_4k_region(host_mem, X86_SYZOS_ADDR_UNUSED, total);
380+
total -= map_4k_region(host_mem, &alloc, syzos_mem_regions[i].gpa, syzos_mem_regions[i].pages);
381+
map_4k_region(host_mem, &alloc, X86_SYZOS_ADDR_UNUSED, total);
399382
}
400383

401384
// A 64-bit GDT entry for a code or data segment.

executor/kvm.h

Lines changed: 3 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -41,17 +41,9 @@
4141
#define X86_SYZOS_ADDR_PML4 0x2000
4242
// PDP for GPAs 0x0 - 0x7fffffffff.
4343
#define X86_SYZOS_ADDR_PDP 0x3000
44-
// Lowmem PD for GPAs 0x0 - 0x3fffffff.
45-
#define X86_SYZOS_ADDR_PD 0x4000
46-
// IOAPIC PD for GPAs 0xc0000000 - 0xffffffff.
47-
#define X86_SYZOS_ADDR_PD_IOAPIC 0x5000
48-
// Lowmem PT for GPAs 0x000000 - 0x1fffff.
49-
#define X86_SYZOS_ADDR_PT_LOW_MEM 0x6000
50-
// Two PTs for unused memory for GPAs 0x200000 - 0x3fffff.
51-
#define X86_SYZOS_ADDR_PT_UNUSED_MEM 0x7000
52-
// IOAPIC PT for GPAs 0xfed00000 - 0xfedfffff.
53-
#define X86_SYZOS_ADDR_PT_IOAPIC 0x9000
54-
#define X86_SYZOS_ADDR_VAR_IDT 0x10000
44+
// Pool of 32 pages for dynamic PT/PD allocations.
45+
#define X86_SYZOS_ADDR_PT_POOL 0x5000
46+
#define X86_SYZOS_ADDR_VAR_IDT 0x25000
5547

5648
#define X86_SYZOS_ADDR_SMRAM 0x30000
5749
// Write to this page to trigger a page fault and stop KVM_RUN.

0 commit comments

Comments
 (0)