Skip to content

Commit c0707a0

Browse files
executor: isolate x86 page table pool to high memory in SYZOS
Refactor the SYZOS guest memory layout to decouple the dynamic page table allocator from the fixed system data structures (GDT, IDT, initial PML4). Previously, the page table pool was located at 0x5000, tightly packed with the initial system pages. This rigid structure made it difficult to expand the pool or inject configuration data without shifting fixed offsets. Move X86_SYZOS_ADDR_PT_POOL to 0x180000, creating a distinct high-memory region well above the L2 VCPU data, and increase the pool size to 64 pages (256KB) to support deeper nested hierarchies. Update the syz_kvm_setup_syzos_vm logic to handle non-contiguous Guest-to-Host address translation via a new get_host_pte_ptr() helper. This is necessary because the executor's host memory allocation remains strictly linear while the guest physical address space now contains significant gaps. This layout change is a prerequisite for enabling "SYZOS inside SYZOS" (L2 nesting), allowing the future injection of boot arguments into the gap created between fixed data and dynamic regions.
1 parent 53d8280 commit c0707a0

File tree

2 files changed

+43
-20
lines changed

2 files changed

+43
-20
lines changed

executor/common_kvm_amd64.h

Lines changed: 38 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -222,8 +222,12 @@ struct mem_region {
222222

223223
// SYZOS guest virtual memory layout (must be in sync with executor/kvm.h):
224224
static const struct mem_region syzos_mem_regions[] = {
225-
// AMD64 data structures (48 pages starting at GPA 0x0, see kvm.h).
226-
{X86_SYZOS_ADDR_ZERO, 48, MEM_REGION_FLAG_GPA0},
225+
// AMD64 fixed data structures (5 pages: Zero, GDT, PML4, PDP, PD).
226+
{X86_SYZOS_ADDR_ZERO, 5, MEM_REGION_FLAG_GPA0},
227+
// High fixed data (IDT, TSS).
228+
{X86_SYZOS_ADDR_VAR_IDT, 11, 0},
229+
// Dynamic Page Table Pool.
230+
{X86_SYZOS_ADDR_PT_POOL, X86_SYZOS_PT_POOL_SIZE, 0},
227231
// SMRAM memory.
228232
{X86_SYZOS_ADDR_SMRAM, 10, 0},
229233
// Unmapped region to trigger a page faults for uexits etc.
@@ -253,6 +257,7 @@ struct kvm_syz_vm {
253257
size_t total_pages;
254258
void* user_text;
255259
void* gpa0_mem;
260+
void* pt_pool_mem;
256261
};
257262
#endif
258263

@@ -311,40 +316,52 @@ static uint64 pg_alloc(page_alloc_t* alloc)
311316
return page;
312317
}
313318

314-
static void map_4k_page(uint64 host_mem, page_alloc_t* alloc, uint64 gpa)
319+
// Helper to translate GPA to Host Pointer handling the memory gap.
320+
static uint64* get_host_pte_ptr(struct kvm_syz_vm* vm, uint64 gpa)
315321
{
316-
uint64* pml4 = (uint64*)(host_mem + X86_SYZOS_ADDR_PML4);
322+
// Case 1: GPA is in the PT Pool (High Memory).
323+
if (gpa >= X86_SYZOS_ADDR_PT_POOL &&
324+
gpa < X86_SYZOS_ADDR_PT_POOL + (X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE)) {
325+
uint64 offset = gpa - X86_SYZOS_ADDR_PT_POOL;
326+
return (uint64*)((char*)vm->pt_pool_mem + offset);
327+
}
328+
// Case 2: GPA is in the Low Fixed Data (0x0 based).
329+
return (uint64*)((char*)vm->gpa0_mem + gpa);
330+
}
331+
332+
static void map_4k_page(struct kvm_syz_vm* vm, page_alloc_t* alloc, uint64 gpa)
333+
{
334+
uint64* pml4 = (uint64*)((char*)vm->gpa0_mem + X86_SYZOS_ADDR_PML4);
317335

318336
// PML4 Entry (Level 4).
319337
uint64 pml4_idx = (gpa >> 39) & 0x1FF;
320338
if (pml4[pml4_idx] == 0)
321339
pml4[pml4_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
322-
uint64* pdpt = (uint64*)(host_mem + (pml4[pml4_idx] & PAGE_MASK));
340+
uint64* pdpt = get_host_pte_ptr(vm, pml4[pml4_idx] & PAGE_MASK);
323341

324342
// PDPT Entry (Level 3).
325343
uint64 pdpt_idx = (gpa >> 30) & 0x1FF;
326344
if (pdpt[pdpt_idx] == 0)
327345
pdpt[pdpt_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
328-
uint64* pd = (uint64*)(host_mem + (pdpt[pdpt_idx] & PAGE_MASK));
346+
uint64* pd = get_host_pte_ptr(vm, pdpt[pdpt_idx] & PAGE_MASK);
329347

330348
// PD Entry (Level 2).
331349
uint64 pd_idx = (gpa >> 21) & 0x1FF;
332350
if (pd[pd_idx] == 0)
333351
pd[pd_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
334-
uint64* pt = (uint64*)(host_mem + (pd[pd_idx] & PAGE_MASK));
352+
uint64* pt = get_host_pte_ptr(vm, pd[pd_idx] & PAGE_MASK);
335353

336354
// PT Entry (Level 1).
337355
uint64 pt_idx = (gpa >> 12) & 0x1FF;
338356

339357
// Set the final 4KB page table entry to map the GPA
340-
// This is an identity map: GPA -> GPA
341358
pt[pt_idx] = (gpa & PAGE_MASK) | X86_PDE64_PRESENT | X86_PDE64_RW;
342359
}
343360

344-
static int map_4k_region(uint64 host_mem, page_alloc_t* alloc, uint64 gpa_start, int num_pages)
361+
static int map_4k_region(struct kvm_syz_vm* vm, page_alloc_t* alloc, uint64 gpa_start, int num_pages)
345362
{
346363
for (int i = 0; i < num_pages; i++)
347-
map_4k_page(host_mem, alloc, gpa_start + (i * KVM_PAGE_SIZE));
364+
map_4k_page(vm, alloc, gpa_start + (i * KVM_PAGE_SIZE));
348365
return num_pages;
349366
}
350367

@@ -353,20 +370,21 @@ static int map_4k_region(uint64 host_mem, page_alloc_t* alloc, uint64 gpa_start,
353370
static void setup_pg_table(struct kvm_syz_vm* vm)
354371
{
355372
int total = vm->total_pages;
356-
// Page tables are located in the first memory region starting at 0x0.
357-
uint64 host_mem = (uint64)vm->gpa0_mem;
358373

359374
page_alloc_t alloc = {.next_page = X86_SYZOS_ADDR_PT_POOL,
360-
.last_page = X86_SYZOS_ADDR_PT_POOL + 32 * KVM_PAGE_SIZE};
375+
.last_page = X86_SYZOS_ADDR_PT_POOL + X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE};
376+
377+
// Zero-out the PT Pool memory.
378+
memset(vm->pt_pool_mem, 0, X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE);
361379

362-
// Zero-out all page table memory.
363-
for (uint64 i = 0; i < (alloc.last_page - alloc.next_page); i += KVM_PAGE_SIZE)
364-
memset((void*)(host_mem + alloc.next_page + i), 0, KVM_PAGE_SIZE);
380+
// Zero-out the fixed system pages (PML4/PDP/PD).
381+
// They are in the first 5 pages of gpa0_mem.
382+
memset(vm->gpa0_mem, 0, 5 * KVM_PAGE_SIZE);
365383

366384
// Map all the regions defined in setup_vm()
367385
for (size_t i = 0; i < sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]); i++)
368-
total -= map_4k_region(host_mem, &alloc, syzos_mem_regions[i].gpa, syzos_mem_regions[i].pages);
369-
map_4k_region(host_mem, &alloc, X86_SYZOS_ADDR_UNUSED, total);
386+
total -= map_4k_region(vm, &alloc, syzos_mem_regions[i].gpa, syzos_mem_regions[i].pages);
387+
map_4k_region(vm, &alloc, X86_SYZOS_ADDR_UNUSED, total);
370388
}
371389

372390
// A 64-bit GDT entry for a code or data segment.
@@ -1149,6 +1167,8 @@ static void setup_vm(int vmfd, struct kvm_syz_vm* vm)
11491167
vm->user_text = next.addr;
11501168
if (r->flags & MEM_REGION_FLAG_GPA0)
11511169
vm->gpa0_mem = next.addr;
1170+
if (r->gpa == X86_SYZOS_ADDR_PT_POOL)
1171+
vm->pt_pool_mem = next.addr;
11521172
if (r->flags & MEM_REGION_FLAG_EXECUTOR_CODE)
11531173
install_syzos_code(next.addr, next.size);
11541174
vm_set_user_memory_region(vmfd, slot++, flags, r->gpa, next.size, (uintptr_t)next.addr);

executor/kvm.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@
4545
#define X86_SYZOS_ADDR_PML4 0x2000
4646
// PDP for GPAs 0x0 - 0x7fffffffff.
4747
#define X86_SYZOS_ADDR_PDP 0x3000
48-
// Pool of 32 pages for dynamic PT/PD allocations.
49-
#define X86_SYZOS_ADDR_PT_POOL 0x5000
5048
#define X86_SYZOS_ADDR_VAR_IDT 0x25000
5149
#define X86_SYZOS_ADDR_VAR_TSS 0x26000
5250

@@ -76,6 +74,11 @@
7674
// Base offset for the area containing the 4 L2 VM slots.
7775
#define X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA 0x1000
7876

77+
// Separated Page Table Pool in high memory.
78+
// Located above L2 VCPU regions.
79+
#define X86_SYZOS_ADDR_PT_POOL 0x180000
80+
#define X86_SYZOS_PT_POOL_SIZE 64
81+
7982
// Layout of a single L2 VM's data block.
8083

8184
// Size of the memory block for a single L2 VM.

0 commit comments

Comments
 (0)