Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions executor/common_kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,19 @@ static long syz_kvm_assert_syzos_kvm_exit(volatile long a0, volatile long a1)
uint64 expect = a1;

if (!run) {
#if !SYZ_EXECUTOR
fprintf(stderr, "[SYZOS-DEBUG] Assertion Triggered: run is NULL\n");
#endif
errno = EINVAL;
return -1;
}

if (run->exit_reason != expect) {
#if !SYZ_EXECUTOR
fprintf(stderr, "[SYZOS-DEBUG] KVM Exit Reason Mismatch\n");
fprintf(stderr, " Expected: 0x%lx\n", (unsigned long)expect);
fprintf(stderr, " Actual: 0x%lx\n", (unsigned long)run->exit_reason);
#endif
errno = EDOM;
return -1;
}
Expand Down
109 changes: 78 additions & 31 deletions executor/common_kvm_amd64.h
Original file line number Diff line number Diff line change
Expand Up @@ -213,17 +213,20 @@ static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t g
#define MEM_REGION_FLAG_EXECUTOR_CODE (1 << 3)
#define MEM_REGION_FLAG_GPA0 (1 << 5)
#define MEM_REGION_FLAG_NO_HOST_MEM (1 << 6)

struct mem_region {
uint64 gpa;
int pages;
uint32 flags;
};
#define MEM_REGION_FLAG_REMAINING (1 << 7)

// SYZOS guest virtual memory layout (must be in sync with executor/kvm.h):
static const struct mem_region syzos_mem_regions[] = {
// AMD64 data structures (48 pages starting at GPA 0x0, see kvm.h).
{X86_SYZOS_ADDR_ZERO, 48, MEM_REGION_FLAG_GPA0},
// AMD64 fixed data structures (5 pages: Zero, GDT, PML4, PDP, PD).
{X86_SYZOS_ADDR_ZERO, 5, MEM_REGION_FLAG_GPA0},
// High fixed data (IDT, TSS). Reduced to 10 pages to make room for Boot Args.
{X86_SYZOS_ADDR_VAR_IDT, 10, 0},
// Boot Configuration Page.
{X86_SYZOS_ADDR_BOOT_ARGS, 1, 0},
// Dynamic Page Table Pool.
{X86_SYZOS_ADDR_PT_POOL, X86_SYZOS_PT_POOL_SIZE, 0},
// Global State Page.
{X86_SYZOS_ADDR_GLOBALS, 1, 0},
// SMRAM memory.
{X86_SYZOS_ADDR_SMRAM, 10, 0},
// Unmapped region to trigger a page faults for uexits etc.
Expand All @@ -242,6 +245,8 @@ static const struct mem_region syzos_mem_regions[] = {
{X86_SYZOS_PER_VCPU_REGIONS_BASE, (KVM_MAX_VCPU * X86_SYZOS_L1_VCPU_REGION_SIZE) / KVM_PAGE_SIZE, 0},
// IOAPIC memory.
{X86_SYZOS_ADDR_IOAPIC, 1, 0},
// Remainder of memory (Unused Heap). Must be last.
{X86_SYZOS_ADDR_UNUSED, 0, MEM_REGION_FLAG_REMAINING},
};
#endif

Expand All @@ -253,6 +258,7 @@ struct kvm_syz_vm {
size_t total_pages;
void* user_text;
void* gpa0_mem;
void* pt_pool_mem;
};
#endif

Expand Down Expand Up @@ -311,40 +317,52 @@ static uint64 pg_alloc(page_alloc_t* alloc)
return page;
}

static void map_4k_page(uint64 host_mem, page_alloc_t* alloc, uint64 gpa)
// Helper to translate GPA to Host Pointer handling the memory gap.
static uint64* get_host_pte_ptr(struct kvm_syz_vm* vm, uint64 gpa)
{
// Case 1: GPA is in the PT Pool (High Memory).
if (gpa >= X86_SYZOS_ADDR_PT_POOL &&
gpa < X86_SYZOS_ADDR_PT_POOL + (X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE)) {
uint64 offset = gpa - X86_SYZOS_ADDR_PT_POOL;
return (uint64*)((char*)vm->pt_pool_mem + offset);
}
// Case 2: GPA is in the Low Fixed Data (0x0 based).
return (uint64*)((char*)vm->gpa0_mem + gpa);
}

static void map_4k_page(struct kvm_syz_vm* vm, page_alloc_t* alloc, uint64 gpa)
{
uint64* pml4 = (uint64*)(host_mem + X86_SYZOS_ADDR_PML4);
uint64* pml4 = (uint64*)((char*)vm->gpa0_mem + X86_SYZOS_ADDR_PML4);

// PML4 Entry (Level 4).
uint64 pml4_idx = (gpa >> 39) & 0x1FF;
if (pml4[pml4_idx] == 0)
pml4[pml4_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
uint64* pdpt = (uint64*)(host_mem + (pml4[pml4_idx] & PAGE_MASK));
uint64* pdpt = get_host_pte_ptr(vm, pml4[pml4_idx] & PAGE_MASK);

// PDPT Entry (Level 3).
uint64 pdpt_idx = (gpa >> 30) & 0x1FF;
if (pdpt[pdpt_idx] == 0)
pdpt[pdpt_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
uint64* pd = (uint64*)(host_mem + (pdpt[pdpt_idx] & PAGE_MASK));
uint64* pd = get_host_pte_ptr(vm, pdpt[pdpt_idx] & PAGE_MASK);

// PD Entry (Level 2).
uint64 pd_idx = (gpa >> 21) & 0x1FF;
if (pd[pd_idx] == 0)
pd[pd_idx] = X86_PDE64_PRESENT | X86_PDE64_RW | pg_alloc(alloc);
uint64* pt = (uint64*)(host_mem + (pd[pd_idx] & PAGE_MASK));
uint64* pt = get_host_pte_ptr(vm, pd[pd_idx] & PAGE_MASK);

// PT Entry (Level 1).
uint64 pt_idx = (gpa >> 12) & 0x1FF;

// Set the final 4KB page table entry to map the GPA
// This is an identity map: GPA -> GPA
pt[pt_idx] = (gpa & PAGE_MASK) | X86_PDE64_PRESENT | X86_PDE64_RW;
}

static int map_4k_region(uint64 host_mem, page_alloc_t* alloc, uint64 gpa_start, int num_pages)
static int map_4k_region(struct kvm_syz_vm* vm, page_alloc_t* alloc, uint64 gpa_start, int num_pages)
{
for (int i = 0; i < num_pages; i++)
map_4k_page(host_mem, alloc, gpa_start + (i * KVM_PAGE_SIZE));
map_4k_page(vm, alloc, gpa_start + (i * KVM_PAGE_SIZE));
return num_pages;
}

Expand All @@ -353,20 +371,31 @@ static int map_4k_region(uint64 host_mem, page_alloc_t* alloc, uint64 gpa_start,
static void setup_pg_table(struct kvm_syz_vm* vm)
{
int total = vm->total_pages;
// Page tables are located in the first memory region starting at 0x0.
uint64 host_mem = (uint64)vm->gpa0_mem;

page_alloc_t alloc = {.next_page = X86_SYZOS_ADDR_PT_POOL,
.last_page = X86_SYZOS_ADDR_PT_POOL + 32 * KVM_PAGE_SIZE};
.last_page = X86_SYZOS_ADDR_PT_POOL + X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE};

// Zero-out all page table memory.
for (uint64 i = 0; i < (alloc.last_page - alloc.next_page); i += KVM_PAGE_SIZE)
memset((void*)(host_mem + alloc.next_page + i), 0, KVM_PAGE_SIZE);
// Zero-out the PT Pool memory.
memset(vm->pt_pool_mem, 0, X86_SYZOS_PT_POOL_SIZE * KVM_PAGE_SIZE);
// Zero-out the fixed system pages (PML4/PDP/PD).
memset(vm->gpa0_mem, 0, 5 * KVM_PAGE_SIZE);

// Map all the regions defined in setup_vm()
for (size_t i = 0; i < sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]); i++)
total -= map_4k_region(host_mem, &alloc, syzos_mem_regions[i].gpa, syzos_mem_regions[i].pages);
map_4k_region(host_mem, &alloc, X86_SYZOS_ADDR_UNUSED, total);
for (size_t i = 0; i < sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]); i++) {
int pages = syzos_mem_regions[i].pages;
if (syzos_mem_regions[i].flags & MEM_REGION_FLAG_REMAINING) {
if (total < 0)
fail("Guest memory accounting underflow");
pages = total;
}
map_4k_region(vm, &alloc, syzos_mem_regions[i].gpa, pages);

// Only consume 'total' if the region is actually backed by host RAM.
if (!(syzos_mem_regions[i].flags & MEM_REGION_FLAG_NO_HOST_MEM))
total -= pages;
if (syzos_mem_regions[i].flags & MEM_REGION_FLAG_REMAINING)
break;
}
}

// A 64-bit GDT entry for a code or data segment.
Expand Down Expand Up @@ -1134,12 +1163,18 @@ static void setup_vm(int vmfd, struct kvm_syz_vm* vm)
{
struct addr_size allocator = {.addr = vm->host_mem, .size = vm->total_pages * KVM_PAGE_SIZE};
int slot = 0; // Slot numbers do not matter, they just have to be different.
struct syzos_boot_args* boot_args = NULL;

for (size_t i = 0; i < sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]); i++) {
const struct mem_region* r = &syzos_mem_regions[i];
if (r->flags & MEM_REGION_FLAG_NO_HOST_MEM)
continue;
struct addr_size next = alloc_guest_mem(&allocator, r->pages * KVM_PAGE_SIZE);

size_t pages = r->pages;
if (r->flags & MEM_REGION_FLAG_REMAINING)
pages = allocator.size / KVM_PAGE_SIZE;

struct addr_size next = alloc_guest_mem(&allocator, pages * KVM_PAGE_SIZE);
uint32 flags = 0;
if (r->flags & MEM_REGION_FLAG_DIRTY_LOG)
flags |= KVM_MEM_LOG_DIRTY_PAGES;
Expand All @@ -1149,14 +1184,26 @@ static void setup_vm(int vmfd, struct kvm_syz_vm* vm)
vm->user_text = next.addr;
if (r->flags & MEM_REGION_FLAG_GPA0)
vm->gpa0_mem = next.addr;
if (r->gpa == X86_SYZOS_ADDR_PT_POOL)
vm->pt_pool_mem = next.addr;

if (r->gpa == X86_SYZOS_ADDR_BOOT_ARGS) {
boot_args = (struct syzos_boot_args*)next.addr;
boot_args->region_count = sizeof(syzos_mem_regions) / sizeof(syzos_mem_regions[0]);
for (size_t k = 0; k < boot_args->region_count; k++)
boot_args->regions[k] = syzos_mem_regions[k];
}

if ((r->flags & MEM_REGION_FLAG_REMAINING) && boot_args)
boot_args->regions[i].pages = pages;

if (r->flags & MEM_REGION_FLAG_EXECUTOR_CODE)
install_syzos_code(next.addr, next.size);
vm_set_user_memory_region(vmfd, slot++, flags, r->gpa, next.size, (uintptr_t)next.addr);
}

// Map the remaining pages at an unused address.
struct addr_size next = alloc_guest_mem(&allocator, allocator.size);
vm_set_user_memory_region(vmfd, slot++, 0, X86_SYZOS_ADDR_UNUSED, next.size, (uintptr_t)next.addr);
if (r->flags & MEM_REGION_FLAG_REMAINING)
break;
}
}
#endif

Expand Down Expand Up @@ -1269,4 +1316,4 @@ static long syz_kvm_assert_syzos_uexit(volatile long a0, volatile long a1, volat
}
#endif

#endif // EXECUTOR_COMMON_KVM_AMD64_H
#endif // EXECUTOR_COMMON_KVM_AMD64_H
77 changes: 66 additions & 11 deletions executor/common_kvm_amd64_syzos.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,23 @@ struct l2_guest_regs {
uint64 r8, r9, r10, r11, r12, r13, r14, r15;
};

struct mem_region {
uint64 gpa;
int pages;
uint32 flags;
};

struct syzos_boot_args {
uint32 region_count;
uint32 reserved;
struct mem_region regions[];
};

struct syzos_globals {
uint64 alloc_offset;
uint64 total_size;
};

#ifdef __cplusplus
extern "C" {
#endif
Expand Down Expand Up @@ -672,35 +689,73 @@ guest_handle_enable_nested(struct api_call_1* cmd, uint64 cpu_id)
}
}

// Calculate the size of the unused memory region from the boot arguments.
GUEST_CODE static uint64 get_unused_memory_size()
{
volatile struct syzos_boot_args* args = (volatile struct syzos_boot_args*)X86_SYZOS_ADDR_BOOT_ARGS;
for (uint32 i = 0; i < args->region_count; i++) {
if (args->regions[i].gpa == X86_SYZOS_ADDR_UNUSED)
return args->regions[i].pages * KVM_PAGE_SIZE;
}
return 0;
}

// Allocate a page from the X86_SYZOS_ADDR_UNUSED region using a non-reclaiming bump allocator.
GUEST_CODE static uint64 guest_alloc_page()
{
volatile struct syzos_globals* globals = (volatile struct syzos_globals*)X86_SYZOS_ADDR_GLOBALS;

// Lazy initialization of total_size using CAS to prevent races.
if (globals->total_size == 0) {
uint64 size = get_unused_memory_size();
// Attempt to swap 0 with the calculated size.
// If another CPU beat us to it, this does nothing (which is fine).
__sync_val_compare_and_swap(&globals->total_size, 0, size);
}

// Atomic fetch-and-add to reserve space.
uint64 offset = __sync_fetch_and_add(&globals->alloc_offset, KVM_PAGE_SIZE);

if (offset >= globals->total_size)
guest_uexit(UEXIT_ASSERT);

uint64 ptr = X86_SYZOS_ADDR_UNUSED + offset;
guest_memset((void*)ptr, 0, KVM_PAGE_SIZE);
return ptr;
}

GUEST_CODE static noinline void setup_l2_page_tables(cpu_vendor_id vendor, uint64 cpu_id, uint64 vm_id)
{
// The Root PML4 remains at the fixed address assigned to this VM.
uint64 l2_pml4_addr = X86_SYZOS_ADDR_VM_PGTABLE(cpu_id, vm_id);
uint64 l2_pdpt_addr = l2_pml4_addr + KVM_PAGE_SIZE;
uint64 l2_pd_addr = l2_pml4_addr + 2 * KVM_PAGE_SIZE;
uint64 l2_pt_addr = l2_pml4_addr + 3 * KVM_PAGE_SIZE;

// Allocate subsequent levels dynamically.
uint64 l2_pdpt_addr = guest_alloc_page();
uint64 l2_pd_addr = guest_alloc_page();
uint64 l2_pt_addr = guest_alloc_page();

volatile uint64* pml4 = (volatile uint64*)l2_pml4_addr;
volatile uint64* pdpt = (volatile uint64*)l2_pdpt_addr;
volatile uint64* pd = (volatile uint64*)l2_pd_addr;
volatile uint64* pt = (volatile uint64*)l2_pt_addr;

// Clear the root table (the others are cleared by guest_alloc_page).
guest_memset((void*)l2_pml4_addr, 0, KVM_PAGE_SIZE);
guest_memset((void*)l2_pdpt_addr, 0, KVM_PAGE_SIZE);
guest_memset((void*)l2_pd_addr, 0, KVM_PAGE_SIZE);
guest_memset((void*)l2_pt_addr, 0, KVM_PAGE_SIZE);
guest_memset((void*)X86_SYZOS_ADDR_MSR_BITMAP(cpu_id, vm_id), 0, KVM_PAGE_SIZE);

// Intel EPT: set Read, Write, Execute.
// AMD NPT: set Present, Write, User.
uint64 flags = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_USER;
// Create the 4-level page table entries using 4KB pages:
// PML4[0] -> points to PDPT

// Setup Hierarchy:
// PML4[0] -> PDPT
pml4[0] = l2_pdpt_addr | flags;
// PDPT[0] -> points to Page Directory (PD)
// PDPT[0] -> PD
pdpt[0] = l2_pd_addr | flags;
// PD[0] -> points to Page Table (PT) (NO X86_PDE64_PS)
// PD[0] -> PT
pd[0] = l2_pt_addr | flags;
// PT[0..511] -> maps 512 4KB pages (2MB total) identity

// PT[0..511] -> Maps 2MB identity
uint64 pt_flags = flags;
if (vendor == CPU_VENDOR_INTEL) {
pt_flags |= EPT_MEMTYPE_WB | EPT_ACCESSED | EPT_DIRTY;
Expand Down
12 changes: 10 additions & 2 deletions executor/kvm.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
#define X86_SYZOS_ADDR_PML4 0x2000
// PDP for GPAs 0x0 - 0x7fffffffff.
#define X86_SYZOS_ADDR_PDP 0x3000
// Pool of 32 pages for dynamic PT/PD allocations.
#define X86_SYZOS_ADDR_PT_POOL 0x5000
#define X86_SYZOS_ADDR_VAR_IDT 0x25000
#define X86_SYZOS_ADDR_VAR_TSS 0x26000
// Dedicated page for passing configuration (memory map) to L1.
#define X86_SYZOS_ADDR_BOOT_ARGS 0x2F000

#define X86_SYZOS_ADDR_SMRAM 0x30000
// Write to this page to trigger a page fault and stop KVM_RUN.
Expand Down Expand Up @@ -76,6 +76,14 @@
// Base offset for the area containing the 4 L2 VM slots.
#define X86_SYZOS_L1_VCPU_OFFSET_L2_VMS_AREA 0x1000

// Global state page (Allocator offsets, etc).
#define X86_SYZOS_ADDR_GLOBALS 0x17F000

// Separated Page Table Pool in high memory.
// Located above L2 VCPU regions.
#define X86_SYZOS_ADDR_PT_POOL 0x180000
#define X86_SYZOS_PT_POOL_SIZE 64

// Layout of a single L2 VM's data block.

// Size of the memory block for a single L2 VM.
Expand Down
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_amd_inject
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xface)
# Cleanup
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_amd_invlpga
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xface)
# guest_main should finish with guest_uexit(-1).
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_amd_stgi
Original file line number Diff line number Diff line change
Expand Up @@ -44,4 +44,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xface)
# Cleanup.
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_amd_vmcb_write_mask
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xe2e20002)
# guest_main should finish with guest_uexit(-1).
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_create_vm-hlt
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xe2e20001)
# KVM_RUN should exit with KVM_EXIT_MMIO due to guest_uexit(-1).
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
2 changes: 1 addition & 1 deletion sys/linux/test/amd64-syz_kvm_nested_intel_vmwrite_mask
Original file line number Diff line number Diff line change
Expand Up @@ -37,4 +37,4 @@ syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xe2e20002)
# guest_main should finish with guest_uexit(-1).
#
ioctl$KVM_RUN(r3, AUTO, 0x0)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffff)
syz_kvm_assert_syzos_uexit$x86(r3, r5, 0xffffffffffffffff)
Loading
Loading