77// See Intel Software Developer’s Manual Volume 3: System Programming Guide
88// for details on what happens here.
99
10+ #include "common_kvm_amd64_syzos.h"
1011#include "kvm.h"
1112#include "kvm_amd64.S.h"
1213
@@ -69,7 +70,9 @@ struct tss32 {
6970 uint16 trace ;
7071 uint16 io_bitmap ;
7172} __attribute__((packed ));
73+ #endif
7274
75+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
7376struct tss64 {
7477 uint32 reserved0 ;
7578 uint64 rsp [3 ];
@@ -79,9 +82,7 @@ struct tss64 {
7982 uint32 reserved3 ;
8083 uint32 io_bitmap ;
8184} __attribute__((packed ));
82- #endif
8385
84- #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
8586static void fill_segment_descriptor (uint64 * dt , uint64 * lt , struct kvm_segment * seg )
8687{
8788 uint16 index = seg -> selector >> 3 ;
@@ -200,7 +201,7 @@ static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t g
200201}
201202#endif
202203
203- #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
204+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
204205struct kvm_text {
205206 uintptr_t typ ;
206207 const void * text ;
@@ -215,6 +216,122 @@ struct kvm_opt {
215216};
216217#endif
217218
219+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
220+ #define PAGE_MASK GENMASK_ULL(51, 12)
221+
222+ // We assume a 4-level page table, in the future we could add support for
223+ // n-level if needed.
224+ static void setup_pg_table (void * host_mem )
225+ {
226+ uint64 * pml4 = (uint64 * )((uint64 )host_mem + X86_ADDR_PML4 );
227+ uint64 * pdp = (uint64 * )((uint64 )host_mem + X86_ADDR_PDP );
228+ uint64 * pd = (uint64 * )((uint64 )host_mem + X86_ADDR_PD );
229+ uint64 * pd_ioapic = (uint64 * )((uint64 )host_mem + X86_ADDR_PD_IOAPIC );
230+
231+ pml4 [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PDP & PAGE_MASK );
232+ pdp [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PD & PAGE_MASK );
233+ pdp [3 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PD_IOAPIC & PAGE_MASK );
234+
235+ pd [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_PS ;
236+ pd_ioapic [502 ] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_PS ;
237+ }
238+
239+ // This only sets up a 64-bit VCPU.
240+ // TODO: Should add support for other modes.
241+ static void setup_gdt_ldt_pg (int cpufd , void * host_mem )
242+ {
243+ struct kvm_sregs sregs ;
244+ ioctl (cpufd , KVM_GET_SREGS , & sregs );
245+
246+ sregs .gdt .base = X86_ADDR_GDT ;
247+ sregs .gdt .limit = 256 * sizeof (uint64 ) - 1 ;
248+ uint64 * gdt = (uint64 * )((uint64 )host_mem + sregs .gdt .base );
249+
250+ struct kvm_segment seg_ldt ;
251+ memset (& seg_ldt , 0 , sizeof (seg_ldt ));
252+ seg_ldt .selector = X86_SEL_LDT ;
253+ seg_ldt .type = 2 ;
254+ seg_ldt .base = X86_ADDR_LDT ;
255+ seg_ldt .limit = 256 * sizeof (uint64 ) - 1 ;
256+ seg_ldt .present = 1 ;
257+ seg_ldt .dpl = 0 ;
258+ seg_ldt .s = 0 ;
259+ seg_ldt .g = 0 ;
260+ seg_ldt .db = 1 ;
261+ seg_ldt .l = 0 ;
262+ sregs .ldt = seg_ldt ;
263+ uint64 * ldt = (uint64 * )((uint64 )host_mem + sregs .ldt .base );
264+
265+ struct kvm_segment seg_cs64 ;
266+ memset (& seg_cs64 , 0 , sizeof (seg_cs64 ));
267+ seg_cs64 .selector = X86_SEL_CS64 ;
268+ seg_cs64 .type = 11 ;
269+ seg_cs64 .base = 0 ;
270+ seg_cs64 .limit = 0xFFFFFFFFu ;
271+ seg_cs64 .present = 1 ;
272+ seg_cs64 .s = 1 ;
273+ seg_cs64 .g = 1 ;
274+ seg_cs64 .l = 1 ;
275+
276+ sregs .cs = seg_cs64 ;
277+
278+ struct kvm_segment seg_ds64 ;
279+ memset (& seg_ds64 , 0 , sizeof (struct kvm_segment ));
280+ seg_ds64 .selector = X86_SEL_DS64 ;
281+ seg_ds64 .type = 3 ;
282+ seg_ds64 .limit = 0xFFFFFFFFu ;
283+ seg_ds64 .present = 1 ;
284+ seg_ds64 .s = 1 ;
285+ seg_ds64 .g = 1 ;
286+
287+ sregs .ds = seg_ds64 ;
288+ sregs .es = seg_ds64 ;
289+
290+ struct kvm_segment seg_tss64 ;
291+ memset (& seg_tss64 , 0 , sizeof (seg_tss64 ));
292+ seg_tss64 .selector = X86_SEL_TSS64 ;
293+ seg_tss64 .base = X86_ADDR_VAR_TSS64 ;
294+ seg_tss64 .limit = 0x1ff ;
295+ seg_tss64 .type = 9 ;
296+ seg_tss64 .present = 1 ;
297+
298+ struct tss64 tss64 ;
299+ memset (& tss64 , 0 , sizeof (tss64 ));
300+ tss64 .rsp [0 ] = X86_ADDR_STACK0 ;
301+ tss64 .rsp [1 ] = X86_ADDR_STACK0 ;
302+ tss64 .rsp [2 ] = X86_ADDR_STACK0 ;
303+ tss64 .io_bitmap = offsetof(struct tss64 , io_bitmap );
304+ struct tss64 * tss64_addr = (struct tss64 * )((uint64 )host_mem + seg_tss64 .base );
305+ memcpy (tss64_addr , & tss64 , sizeof (tss64 ));
306+
307+ fill_segment_descriptor (gdt , ldt , & seg_ldt );
308+ fill_segment_descriptor (gdt , ldt , & seg_cs64 );
309+ fill_segment_descriptor (gdt , ldt , & seg_ds64 );
310+ fill_segment_descriptor_dword (gdt , ldt , & seg_tss64 );
311+
312+ setup_pg_table (host_mem );
313+
314+ sregs .cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG ;
315+ sregs .cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR ;
316+ sregs .efer |= (X86_EFER_LME | X86_EFER_LMA | X86_EFER_NXE );
317+ sregs .cr3 = X86_ADDR_PML4 ;
318+
319+ ioctl (cpufd , KVM_SET_SREGS , & sregs );
320+ }
321+
322+ static void setup_cpuid (int cpufd )
323+ {
324+ int kvmfd = open ("/dev/kvm" , O_RDWR );
325+ char buf [sizeof (struct kvm_cpuid2 ) + 128 * sizeof (struct kvm_cpuid_entry2 )];
326+ memset (buf , 0 , sizeof (buf ));
327+ struct kvm_cpuid2 * cpuid = (struct kvm_cpuid2 * )buf ;
328+ cpuid -> nent = 128 ;
329+ ioctl (kvmfd , KVM_GET_SUPPORTED_CPUID , cpuid );
330+ ioctl (cpufd , KVM_SET_CPUID2 , cpuid );
331+ close (kvmfd );
332+ }
333+ #endif
334+
218335#if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
219336#define KVM_SETUP_PAGING (1 << 0)
220337#define KVM_SETUP_PAE (1 << 1)
@@ -764,18 +881,192 @@ static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volat
764881}
765882#endif
766883
884+ #if SYZ_EXECUTOR || __NR_syz_kvm_add_vcpu
885+ static void reset_cpu_regs (int cpufd , int cpu_id , size_t text_size )
886+ {
887+ struct kvm_regs regs ;
888+ memset (& regs , 0 , sizeof (regs ));
889+
890+ regs .rflags |= 2 ; // bit 1 is always set
891+ // PC points to the relative offset of guest_main() within the guest code.
892+ regs .rip = X86_ADDR_EXECUTOR_CODE + ((uint64 )guest_main - (uint64 )& __start_guest );
893+ regs .rsp = X86_ADDR_STACK0 ;
894+ // Pass parameters to guest_main().
895+ regs .rdi = text_size ;
896+ regs .rsi = cpu_id ;
897+ ioctl (cpufd , KVM_SET_REGS , & regs );
898+ }
899+
900+ static void install_user_code (int cpufd , void * user_text_slot , int cpu_id , const void * text , size_t text_size , void * host_mem )
901+ {
902+ if ((cpu_id < 0 ) || (cpu_id >= KVM_MAX_VCPU ))
903+ return ;
904+ if (!user_text_slot )
905+ return ;
906+ if (text_size > KVM_PAGE_SIZE )
907+ text_size = KVM_PAGE_SIZE ;
908+ void * target = (void * )((uint64 )user_text_slot + (KVM_PAGE_SIZE * cpu_id ));
909+ memcpy (target , text , text_size );
910+ setup_gdt_ldt_pg (cpufd , host_mem );
911+ setup_cpuid (cpufd );
912+ reset_cpu_regs (cpufd , cpu_id , text_size );
913+ }
914+ #endif
915+
916+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm
917+ struct addr_size {
918+ void * addr ;
919+ size_t size ;
920+ };
921+
922+ static struct addr_size alloc_guest_mem (struct addr_size * free , size_t size )
923+ {
924+ struct addr_size ret = {.addr = NULL , .size = 0 };
925+
926+ if (free -> size < size )
927+ return ret ;
928+ ret .addr = free -> addr ;
929+ ret .size = size ;
930+ free -> addr = (void * )((char * )free -> addr + size );
931+ free -> size -= size ;
932+ return ret ;
933+ }
934+
935+ // Call KVM_SET_USER_MEMORY_REGION for the given pages.
936+ static void vm_set_user_memory_region (int vmfd , uint32 slot , uint32 flags , uint64 guest_phys_addr , uint64 memory_size , uint64 userspace_addr )
937+ {
938+ struct kvm_userspace_memory_region memreg ;
939+ memreg .slot = slot ;
940+ memreg .flags = flags ;
941+ memreg .guest_phys_addr = guest_phys_addr ;
942+ memreg .memory_size = memory_size ;
943+ memreg .userspace_addr = userspace_addr ;
944+ ioctl (vmfd , KVM_SET_USER_MEMORY_REGION , & memreg );
945+ }
946+
947+ static void install_syzos_code (void * host_mem , size_t mem_size )
948+ {
949+ size_t size = (char * )& __stop_guest - (char * )& __start_guest ;
950+ if (size > mem_size )
951+ fail ("SyzOS size exceeds guest memory" );
952+ memcpy (host_mem , & __start_guest , size );
953+ }
954+
955+ static void setup_vm (int vmfd , void * host_mem , void * * text_slot )
956+ {
957+ // Guest virtual memory layout (must be in sync with executor/kvm.h):
958+ // 0x00000000 - AMD64 data structures (10 pages, see kvm.h)
959+ // 0x00030000 - SMRAM (10 pages)
960+ // 0x00040000 - unmapped region to trigger a page faults for uexits etc. (1 page)
961+ // 0x00041000 - writable region with KVM_MEM_LOG_DIRTY_PAGES to fuzz dirty ring (2 pages)
962+ // 0x00050000 - user code (4 pages)
963+ // 0x00054000 - executor guest code (4 pages)
964+ // 0000058000 - scratch memory for code generated at runtime (1 page)
965+ // 0xfec00000 - IOAPIC (1 page)
966+ struct addr_size allocator = {.addr = host_mem , .size = KVM_GUEST_MEM_SIZE };
967+ int slot = 0 ; // Slot numbers do not matter, they just have to be different.
968+
969+ // This *needs* to be the first allocation to avoid passing pointers
970+ // around for the gdt/ldt/page table setup.
971+ struct addr_size next = alloc_guest_mem (& allocator , 10 * KVM_PAGE_SIZE );
972+ vm_set_user_memory_region (vmfd , slot ++ , 0 , 0 , next .size , (uintptr_t )next .addr );
973+
974+ next = alloc_guest_mem (& allocator , 10 * KVM_PAGE_SIZE );
975+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_SMRAM , next .size , (uintptr_t )next .addr );
976+
977+ next = alloc_guest_mem (& allocator , 2 * KVM_PAGE_SIZE );
978+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_LOG_DIRTY_PAGES , X86_ADDR_DIRTY_PAGES , next .size , (uintptr_t )next .addr );
979+
980+ next = alloc_guest_mem (& allocator , KVM_MAX_VCPU * KVM_PAGE_SIZE );
981+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_READONLY , X86_ADDR_USER_CODE , next .size , (uintptr_t )next .addr );
982+ if (text_slot )
983+ * text_slot = next .addr ;
984+
985+ struct addr_size host_text = alloc_guest_mem (& allocator , 4 * KVM_PAGE_SIZE );
986+ install_syzos_code (host_text .addr , host_text .size );
987+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_READONLY , X86_ADDR_EXECUTOR_CODE , host_text .size , (uintptr_t )host_text .addr );
988+
989+ next = alloc_guest_mem (& allocator , KVM_PAGE_SIZE );
990+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_SCRATCH_CODE , next .size , (uintptr_t )next .addr );
991+
992+ next = alloc_guest_mem (& allocator , KVM_PAGE_SIZE );
993+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_IOAPIC , next .size , (uintptr_t )next .addr );
994+
995+ // Map the remaining pages at an unused address.
996+ next = alloc_guest_mem (& allocator , allocator .size );
997+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_UNUSED , next .size , (uintptr_t )next .addr );
998+ }
999+ #endif
1000+
1001+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm || __NR_syz_kvm_add_vcpu_amd64
1002+ struct kvm_syz_vm {
1003+ int vmfd ;
1004+ int next_cpu_id ;
1005+ void * user_text ;
1006+ void * host_mem ;
1007+ };
1008+ #endif
1009+
7671010#if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm
7681011static long syz_kvm_setup_syzos_vm (volatile long a0 , volatile long a1 )
7691012{
770- // Placeholder.
771- return 0 ;
1013+ const int vmfd = a0 ;
1014+ void * host_mem = (void * )a1 ;
1015+
1016+ void * user_text_slot = NULL ;
1017+ struct kvm_syz_vm * ret = (struct kvm_syz_vm * )host_mem ;
1018+ host_mem = (void * )((uint64 )host_mem + KVM_PAGE_SIZE );
1019+ setup_vm (vmfd , host_mem , & user_text_slot );
1020+ ret -> vmfd = vmfd ;
1021+ ret -> next_cpu_id = 0 ;
1022+ ret -> user_text = user_text_slot ;
1023+ ret -> host_mem = host_mem ;
1024+ return (long )ret ;
7721025}
7731026#endif
7741027
7751028#if SYZ_EXECUTOR || __NR_syz_kvm_add_vcpu
7761029static long syz_kvm_add_vcpu (volatile long a0 , volatile long a1 )
7771030{
778- // Placeholder.
779- return 0 ;
1031+ struct kvm_syz_vm * vm = (struct kvm_syz_vm * )a0 ;
1032+ struct kvm_text * utext = (struct kvm_text * )a1 ;
1033+ const void * text = utext -> text ;
1034+ size_t text_size = utext -> size ;
1035+
1036+ if (!vm ) {
1037+ errno = EINVAL ;
1038+ return -1 ;
1039+ }
1040+ if (vm -> next_cpu_id == KVM_MAX_VCPU ) {
1041+ errno = ENOMEM ;
1042+ return -1 ;
1043+ }
1044+ int cpu_id = vm -> next_cpu_id ;
1045+ int cpufd = ioctl (vm -> vmfd , KVM_CREATE_VCPU , cpu_id );
1046+ if (cpufd == -1 )
1047+ return -1 ;
1048+ // Only increment next_cpu_id if CPU creation succeeded.
1049+ vm -> next_cpu_id ++ ;
1050+ install_user_code (cpufd , vm -> user_text , cpu_id , text , text_size , vm -> host_mem );
1051+ return cpufd ;
1052+ }
1053+ #endif
1054+
1055+ #if SYZ_EXECUTOR || __NR_syz_kvm_assert_syzos_uexit
1056+ static long syz_kvm_assert_syzos_uexit (volatile long a0 , volatile long a1 )
1057+ {
1058+ struct kvm_run * run = (struct kvm_run * )a0 ;
1059+ uint64 expect = a1 ;
1060+
1061+ if (!run || (run -> exit_reason != KVM_EXIT_MMIO ) || (run -> mmio .phys_addr != X86_ADDR_UEXIT )) {
1062+ errno = EINVAL ;
1063+ return -1 ;
1064+ }
1065+
1066+ if ((((uint64 * )(run -> mmio .data ))[0 ]) != expect ) {
1067+ errno = EDOM ;
1068+ return -1 ;
1069+ }
1070+ return 0 ;
7801071}
7811072#endif
0 commit comments