Skip to content

Commit 4466bd4

Browse files
executor: implement dynamic L2 page table allocation in SYZOS
Enable the SYZOS guest (L1) to dynamically allocate memory for nested L2 page tables, replacing the previous rigid static layout. Move the mem_region and syzos_boot_args struct definitions to the guest header (common_kvm_amd64_syzos.h) to allow the guest to parse the memory map injected by the host. Introduce a bump allocator, guest_alloc_page(), which targets the X86_SYZOS_ADDR_UNUSED heap. This allocator relies on a new struct syzos_globals located at X86_SYZOS_ADDR_GLOBALS to track the allocation offset. Refactor setup_l2_page_tables() to allocate intermediate paging levels (PDPT, PD, PT) via guest_alloc_page() instead of using fixed contiguous offsets relative to the PML4. This allows for disjoint memory usage and supports future recursion requirements.
1 parent 275cded commit 4466bd4

File tree

2 files changed

+66
-23
lines changed

2 files changed

+66
-23
lines changed

executor/common_kvm_amd64.h

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -215,18 +215,6 @@ static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t g
215215
#define MEM_REGION_FLAG_NO_HOST_MEM (1 << 6)
216216
#define MEM_REGION_FLAG_REMAINING (1 << 7)
217217

218-
struct mem_region {
219-
uint64 gpa;
220-
int pages;
221-
uint32 flags;
222-
};
223-
224-
struct syzos_boot_args {
225-
uint32 region_count;
226-
uint32 reserved;
227-
struct mem_region regions[];
228-
};
229-
230218
// SYZOS guest virtual memory layout (must be in sync with executor/kvm.h):
231219
static const struct mem_region syzos_mem_regions[] = {
232220
// AMD64 fixed data structures (5 pages: Zero, GDT, PML4, PDP, PD).

executor/common_kvm_amd64_syzos.h

Lines changed: 66 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,23 @@ struct l2_guest_regs {
9696
uint64 r8, r9, r10, r11, r12, r13, r14, r15;
9797
};
9898

99+
struct mem_region {
100+
uint64 gpa;
101+
int pages;
102+
uint32 flags;
103+
};
104+
105+
struct syzos_boot_args {
106+
uint32 region_count;
107+
uint32 reserved;
108+
struct mem_region regions[];
109+
};
110+
111+
struct syzos_globals {
112+
uint64 alloc_offset;
113+
uint64 total_size;
114+
};
115+
99116
#ifdef __cplusplus
100117
extern "C" {
101118
#endif
@@ -672,35 +689,73 @@ guest_handle_enable_nested(struct api_call_1* cmd, uint64 cpu_id)
672689
}
673690
}
674691

692+
// Calculate the size of the unused memory region from the boot arguments.
693+
GUEST_CODE static uint64 get_unused_memory_size()
694+
{
695+
volatile struct syzos_boot_args* args = (volatile struct syzos_boot_args*)X86_SYZOS_ADDR_BOOT_ARGS;
696+
for (uint32 i = 0; i < args->region_count; i++) {
697+
if (args->regions[i].gpa == X86_SYZOS_ADDR_UNUSED)
698+
return args->regions[i].pages * KVM_PAGE_SIZE;
699+
}
700+
return 0;
701+
}
702+
703+
// Allocate a page from the X86_SYZOS_ADDR_UNUSED region using a non-reclaiming bump allocator.
704+
GUEST_CODE static uint64 guest_alloc_page()
705+
{
706+
volatile struct syzos_globals* globals = (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;
707+
708+
// Lazy initialization of total_size using CAS to prevent races.
709+
if (globals->total_size == 0) {
710+
uint64 size = get_unused_memory_size();
711+
// Attempt to swap 0 with the calculated size.
712+
// If another CPU beat us to it, this does nothing (which is fine).
713+
__sync_val_compare_and_swap(&globals->total_size, 0, size);
714+
}
715+
716+
// Atomic fetch-and-add to reserve space.
717+
uint64 offset = __sync_fetch_and_add(&globals->alloc_offset, KVM_PAGE_SIZE);
718+
719+
if (offset >= globals->total_size)
720+
guest_uexit(UEXIT_ASSERT);
721+
722+
uint64 ptr = X86_SYZOS_ADDR_UNUSED + offset;
723+
guest_memset((void*)ptr, 0, KVM_PAGE_SIZE);
724+
return ptr;
725+
}
726+
675727
GUEST_CODE static noinline void setup_l2_page_tables(cpu_vendor_id vendor, uint64 cpu_id, uint64 vm_id)
676728
{
729+
// The Root PML4 remains at the fixed address assigned to this VM.
677730
uint64 l2_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
678-
uint64 l2_pdpt_addr = l2_pml4_addr + KVM_PAGE_SIZE;
679-
uint64 l2_pd_addr = l2_pml4_addr + 2 * KVM_PAGE_SIZE;
680-
uint64 l2_pt_addr = l2_pml4_addr + 3 * KVM_PAGE_SIZE;
731+
732+
// Allocate subsequent levels dynamically.
733+
uint64 l2_pdpt_addr = guest_alloc_page();
734+
uint64 l2_pd_addr = guest_alloc_page();
735+
uint64 l2_pt_addr = guest_alloc_page();
681736

682737
volatile uint64* pml4 = (volatile uint64*)l2_pml4_addr;
683738
volatile uint64* pdpt = (volatile uint64*)l2_pdpt_addr;
684739
volatile uint64* pd = (volatile uint64*)l2_pd_addr;
685740
volatile uint64* pt = (volatile uint64*)l2_pt_addr;
686741

742+
// Clear the root table (the others are cleared by guest_alloc_page).
687743
guest_memset((void*)l2_pml4_addr, 0, KVM_PAGE_SIZE);
688-
guest_memset((void*)l2_pdpt_addr, 0, KVM_PAGE_SIZE);
689-
guest_memset((void*)l2_pd_addr, 0, KVM_PAGE_SIZE);
690-
guest_memset((void*)l2_pt_addr, 0, KVM_PAGE_SIZE);
691744
guest_memset((void*)X86_SYZOS_ADDR_MSR_BITMAP(cpu_id, vm_id), 0, KVM_PAGE_SIZE);
692745

693746
// Intel EPT: set Read, Write, Execute.
694747
// AMD NPT: set Present, Write, User.
695748
uint64 flags = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
696-
// Create the 4-level page table entries using 4KB pages:
697-
// PML4[0] -> points to PDPT
749+
750+
// Setup Hierarchy:
751+
// PML4[0] -> PDPT
698752
pml4[0] = l2_pdpt_addr | flags;
699-
// PDPT[0] -> points to Page Directory (PD)
753+
// PDPT[0] -> PD
700754
pdpt[0] = l2_pd_addr | flags;
701-
// PD[0] -> points to Page Table (PT) (NO X86_PDE64_PS)
755+
// PD[0] -> PT
702756
pd[0] = l2_pt_addr | flags;
703-
// PT[0..511] -> maps 512 4KB pages (2MB total) identity
757+
758+
// PT[0..511] -> Maps 2MB identity
704759
uint64 pt_flags = flags;
705760
if (vendor == CPU_VENDOR_INTEL) {
706761
pt_flags |= EPT_MEMTYPE_WB | EPT_ACCESSED | EPT_DIRTY;

0 commit comments

Comments
 (0)