77// See Intel Software Developer’s Manual Volume 3: System Programming Guide
88// for details on what happens here.
99
10+ #include "common_kvm_amd64_syzos.h"
1011#include "kvm.h"
1112#include "kvm_amd64.S.h"
1213
@@ -69,7 +70,9 @@ struct tss32 {
6970 uint16 trace ;
7071 uint16 io_bitmap ;
7172} __attribute__((packed ));
73+ #endif
7274
75+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
7376struct tss64 {
7477 uint32 reserved0 ;
7578 uint64 rsp [3 ];
@@ -79,9 +82,7 @@ struct tss64 {
7982 uint32 reserved3 ;
8083 uint32 io_bitmap ;
8184} __attribute__((packed ));
82- #endif
8385
84- #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
8586static void fill_segment_descriptor (uint64 * dt , uint64 * lt , struct kvm_segment * seg )
8687{
8788 uint16 index = seg -> selector >> 3 ;
@@ -200,7 +201,7 @@ static void setup_64bit_idt(struct kvm_sregs* sregs, char* host_mem, uintptr_t g
200201}
201202#endif
202203
203- #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
204+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
204205struct kvm_text {
205206 uintptr_t typ ;
206207 const void * text ;
@@ -215,6 +216,123 @@ struct kvm_opt {
215216};
216217#endif
217218
219+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu || __NR_syz_kvm_add_vcpu
220+ #define PAGE_MASK GENMASK_ULL(51, 12)
221+
222+ // We assume a 4-level page table, in the future we could add support for
223+ // n-level if needed.
224+ static void setup_pg_table (void * host_mem )
225+ {
226+ uint64 * pml4 = (uint64 * ) ((uint64 )host_mem + X86_ADDR_PML4 );
227+ uint64 * pdp = (uint64 * ) ((uint64 )host_mem + X86_ADDR_PDP );
228+ uint64 * pd = (uint64 * ) ((uint64 )host_mem + X86_ADDR_PD );
229+ uint64 * pd_ioapic = (uint64 * ) ((uint64 )host_mem + X86_ADDR_PD_IOAPIC );
230+
231+ pml4 [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PDP & PAGE_MASK );
232+ pdp [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PD & PAGE_MASK );
233+ pdp [3 ] = X86_PDE64_PRESENT | X86_PDE64_RW | (X86_ADDR_PD_IOAPIC & PAGE_MASK );
234+
235+ pd [0 ] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_PS ;
236+ pd_ioapic [502 ] = X86_PDE64_PRESENT | X86_PDE64_RW | X86_PDE64_PS ;
237+ }
238+
239+
240+ // This only sets up a 64-bit VCPU.
241+ // TODO: Should add support for other modes.
242+ static void setup_gdt_ldt_pg (int cpufd , void * host_mem )
243+ {
244+ struct kvm_sregs sregs ;
245+ ioctl (cpufd , KVM_GET_SREGS , & sregs );
246+
247+ sregs .gdt .base = X86_ADDR_GDT ;
248+ sregs .gdt .limit = 256 * sizeof (uint64 ) - 1 ;
249+ uint64 * gdt = (uint64 * )((uint64 )host_mem + sregs .gdt .base );
250+
251+ struct kvm_segment seg_ldt ;
252+ memset (& seg_ldt , 0 , sizeof (seg_ldt ));
253+ seg_ldt .selector = X86_SEL_LDT ;
254+ seg_ldt .type = 2 ;
255+ seg_ldt .base = X86_ADDR_LDT ;
256+ seg_ldt .limit = 256 * sizeof (uint64 ) - 1 ;
257+ seg_ldt .present = 1 ;
258+ seg_ldt .dpl = 0 ;
259+ seg_ldt .s = 0 ;
260+ seg_ldt .g = 0 ;
261+ seg_ldt .db = 1 ;
262+ seg_ldt .l = 0 ;
263+ sregs .ldt = seg_ldt ;
264+ uint64 * ldt = (uint64 * )((uint64 )host_mem + sregs .ldt .base );
265+
266+ struct kvm_segment seg_cs64 ;
267+ memset (& seg_cs64 , 0 , sizeof (seg_cs64 ));
268+ seg_cs64 .selector = X86_SEL_CS64 ;
269+ seg_cs64 .type = 11 ;
270+ seg_cs64 .base = 0 ;
271+ seg_cs64 .limit = 0xFFFFFFFFu ;
272+ seg_cs64 .present = 1 ;
273+ seg_cs64 .s = 1 ;
274+ seg_cs64 .g = 1 ;
275+ seg_cs64 .l = 1 ;
276+
277+ sregs .cs = seg_cs64 ;
278+
279+ struct kvm_segment seg_ds64 ;
280+ memset (& seg_ds64 , 0 , sizeof (struct kvm_segment ));
281+ seg_ds64 .selector = X86_SEL_DS64 ;
282+ seg_ds64 .type = 3 ;
283+ seg_ds64 .limit = 0xFFFFFFFFu ;
284+ seg_ds64 .present = 1 ;
285+ seg_ds64 .s = 1 ;
286+ seg_ds64 .g = 1 ;
287+
288+ sregs .ds = seg_ds64 ;
289+ sregs .es = seg_ds64 ;
290+
291+ struct kvm_segment seg_tss64 ;
292+ memset (& seg_tss64 , 0 , sizeof (seg_tss64 ));
293+ seg_tss64 .selector = X86_SEL_TSS64 ;
294+ seg_tss64 .base = X86_ADDR_VAR_TSS64 ;
295+ seg_tss64 .limit = 0x1ff ;
296+ seg_tss64 .type = 9 ;
297+ seg_tss64 .present = 1 ;
298+
299+ struct tss64 tss64 ;
300+ memset (& tss64 , 0 , sizeof (tss64 ));
301+ tss64 .rsp [0 ] = X86_ADDR_STACK0 ;
302+ tss64 .rsp [1 ] = X86_ADDR_STACK0 ;
303+ tss64 .rsp [2 ] = X86_ADDR_STACK0 ;
304+ tss64 .io_bitmap = offsetof(struct tss64 , io_bitmap );
305+ struct tss64 * tss64_addr = (struct tss64 * )((uint64 )host_mem + seg_tss64 .base );
306+ memcpy (tss64_addr , & tss64 , sizeof (tss64 ));
307+
308+ fill_segment_descriptor (gdt , ldt , & seg_ldt );
309+ fill_segment_descriptor (gdt , ldt , & seg_cs64 );
310+ fill_segment_descriptor (gdt , ldt , & seg_ds64 );
311+ fill_segment_descriptor_dword (gdt , ldt , & seg_tss64 );
312+
313+ setup_pg_table (host_mem );
314+
315+ sregs .cr0 = X86_CR0_PE | X86_CR0_NE | X86_CR0_PG ;
316+ sregs .cr4 |= X86_CR4_PAE | X86_CR4_OSFXSR ;
317+ sregs .efer |= (X86_EFER_LME | X86_EFER_LMA | X86_EFER_NXE );
318+ sregs .cr3 = X86_ADDR_PML4 ;
319+
320+ ioctl (cpufd , KVM_SET_SREGS , & sregs );
321+ }
322+
323+ static void setup_cpuid (int cpufd )
324+ {
325+ int kvmfd = open ("/dev/kvm" , O_RDWR );
326+ char buf [sizeof (struct kvm_cpuid2 ) + 128 * sizeof (struct kvm_cpuid_entry2 )];
327+ memset (buf , 0 , sizeof (buf ));
328+ struct kvm_cpuid2 * cpuid = (struct kvm_cpuid2 * )buf ;
329+ cpuid -> nent = 128 ;
330+ ioctl (kvmfd , KVM_GET_SUPPORTED_CPUID , cpuid );
331+ ioctl (cpufd , KVM_SET_CPUID2 , cpuid );
332+ close (kvmfd );
333+ }
334+ #endif
335+
218336#if SYZ_EXECUTOR || __NR_syz_kvm_setup_cpu
219337#define KVM_SETUP_PAGING (1 << 0)
220338#define KVM_SETUP_PAE (1 << 1)
@@ -764,18 +882,193 @@ static volatile long syz_kvm_setup_cpu(volatile long a0, volatile long a1, volat
764882}
765883#endif
766884
885+ #if SYZ_EXECUTOR || __NR_syz_kvm_add_vcpu
886+ static void reset_cpu_regs (int cpufd , int cpu_id , size_t text_size )
887+ {
888+ struct kvm_regs regs ;
889+ memset (& regs , 0 , sizeof (regs ));
890+
891+ regs .rflags |= 2 ; // bit 1 is always set
892+ // PC points to the relative offset of guest_main() within the guest code.
893+ regs .rip = X86_ADDR_EXECUTOR_CODE + ((uint64 )guest_main - (uint64 )& __start_guest );
894+ regs .rsp = X86_ADDR_STACK0 ;
895+ // Pass parameters to guest_main().
896+ regs .rdi = text_size ;
897+ regs .rsi = cpu_id ;
898+ ioctl (cpufd , KVM_SET_REGS , & regs );
899+ }
900+
901+ static void install_user_code (int cpufd , void * user_text_slot , int cpu_id , const void * text , size_t text_size , void * host_mem )
902+ {
903+ if ((cpu_id < 0 ) || (cpu_id >= KVM_MAX_VCPU ))
904+ return ;
905+ if (!user_text_slot )
906+ return ;
907+ if (text_size > KVM_PAGE_SIZE )
908+ text_size = KVM_PAGE_SIZE ;
909+ void * target = (void * )((uint64 )user_text_slot + (KVM_PAGE_SIZE * cpu_id ));
910+ memcpy (target , text , text_size );
911+ setup_gdt_ldt_pg (cpufd , host_mem );
912+ setup_cpuid (cpufd );
913+ reset_cpu_regs (cpufd , cpu_id , text_size );
914+ }
915+ #endif
916+
917+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm
918+ struct addr_size {
919+ void * addr ;
920+ size_t size ;
921+ };
922+
923+ static struct addr_size alloc_guest_mem (struct addr_size * free , size_t size )
924+ {
925+ struct addr_size ret = {.addr = NULL , .size = 0 };
926+
927+ if (free -> size < size )
928+ return ret ;
929+ ret .addr = free -> addr ;
930+ ret .size = size ;
931+ free -> addr = (void * )((char * )free -> addr + size );
932+ free -> size -= size ;
933+ return ret ;
934+ }
935+
936+ // Call KVM_SET_USER_MEMORY_REGION for the given pages.
937+ static void vm_set_user_memory_region (int vmfd , uint32 slot , uint32 flags , uint64 guest_phys_addr , uint64 memory_size , uint64 userspace_addr )
938+ {
939+ struct kvm_userspace_memory_region memreg ;
940+ memreg .slot = slot ;
941+ memreg .flags = flags ;
942+ memreg .guest_phys_addr = guest_phys_addr ;
943+ memreg .memory_size = memory_size ;
944+ memreg .userspace_addr = userspace_addr ;
945+ ioctl (vmfd , KVM_SET_USER_MEMORY_REGION , & memreg );
946+ }
947+
948+ static void install_syzos_code (void * host_mem , size_t mem_size )
949+ {
950+ size_t size = (char * )& __stop_guest - (char * )& __start_guest ;
951+ if (size > mem_size )
952+ fail ("SyzOS size exceeds guest memory" );
953+ memcpy (host_mem , & __start_guest , size );
954+ }
955+
956+ static void setup_vm (int vmfd , void * host_mem , void * * text_slot )
957+ {
958+ // Guest virtual memory layout (must be in sync with executor/kvm.h):
959+ // 0x00000000 - AMD64 data structures (10 pages, see kvm.h)
960+ // 0x00030000 - SMRAM (10 pages)
961+ // 0x00040000 - unmapped region to trigger a page faults for uexits etc. (1 page)
962+ // 0x00041000 - writable region with KVM_MEM_LOG_DIRTY_PAGES to fuzz dirty ring (2 pages)
963+ // 0x00050000 - user code (4 pages)
964+ // 0x00054000 - executor guest code (4 pages)
965+ // 0000058000 - scratch memory for code generated at runtime (1 page)
966+ // 0xfec00000 - IOAPIC (1 page)
967+ struct addr_size allocator = {.addr = host_mem , .size = KVM_GUEST_MEM_SIZE };
968+ int slot = 0 ; // Slot numbers do not matter, they just have to be different.
969+
970+ // This *needs* to be the first allocation to avoid passing pointers
971+ // around for the gdt/ldt/page table setup.
972+ struct addr_size next = alloc_guest_mem (& allocator , 10 * KVM_PAGE_SIZE );
973+ vm_set_user_memory_region (vmfd , slot ++ , 0 , 0 , next .size , (uintptr_t )next .addr );
974+
975+ next = alloc_guest_mem (& allocator , 10 * KVM_PAGE_SIZE );
976+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_SMRAM , next .size , (uintptr_t )next .addr );
977+
978+ next = alloc_guest_mem (& allocator , 2 * KVM_PAGE_SIZE );
979+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_LOG_DIRTY_PAGES , X86_ADDR_DIRTY_PAGES , next .size , (uintptr_t )next .addr );
980+
981+ next = alloc_guest_mem (& allocator , KVM_MAX_VCPU * KVM_PAGE_SIZE );
982+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_READONLY , X86_ADDR_USER_CODE , next .size , (uintptr_t )next .addr );
983+ if (text_slot )
984+ * text_slot = next .addr ;
985+
986+ struct addr_size host_text = alloc_guest_mem (& allocator , 4 * KVM_PAGE_SIZE );
987+ install_syzos_code (host_text .addr , host_text .size );
988+ vm_set_user_memory_region (vmfd , slot ++ , KVM_MEM_READONLY , X86_ADDR_EXECUTOR_CODE , host_text .size , (uintptr_t )host_text .addr );
989+
990+ next = alloc_guest_mem (& allocator , KVM_PAGE_SIZE );
991+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_SCRATCH_CODE , next .size , (uintptr_t )next .addr );
992+
993+ next = alloc_guest_mem (& allocator , KVM_PAGE_SIZE );
994+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_IOAPIC , next .size , (uintptr_t )next .addr );
995+
996+ // Map the remaining pages at an unused address.
997+ next = alloc_guest_mem (& allocator , allocator .size );
998+ vm_set_user_memory_region (vmfd , slot ++ , 0 , X86_ADDR_UNUSED , next .size , (uintptr_t )next .addr );
999+ }
1000+ #endif
1001+
1002+ #if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm || __NR_syz_kvm_add_vcpu_amd64
1003+ struct kvm_syz_vm {
1004+ int vmfd ;
1005+ int next_cpu_id ;
1006+ void * user_text ;
1007+ void * host_mem ;
1008+ };
1009+ #endif
1010+
7671011#if SYZ_EXECUTOR || __NR_syz_kvm_setup_syzos_vm
7681012static long syz_kvm_setup_syzos_vm (volatile long a0 , volatile long a1 )
7691013{
770- // Placeholder.
771- return 0 ;
1014+ const int vmfd = a0 ;
1015+ void * host_mem = (void * )a1 ;
1016+
1017+ void * user_text_slot = NULL ;
1018+ struct kvm_syz_vm * ret = (struct kvm_syz_vm * )host_mem ;
1019+ host_mem = (void * )((uint64 )host_mem + KVM_PAGE_SIZE );
1020+ setup_vm (vmfd , host_mem , & user_text_slot );
1021+ ret -> vmfd = vmfd ;
1022+ ret -> next_cpu_id = 0 ;
1023+ ret -> user_text = user_text_slot ;
1024+ ret -> host_mem = host_mem ;
1025+ return (long )ret ;
7721026}
7731027#endif
7741028
7751029#if SYZ_EXECUTOR || __NR_syz_kvm_add_vcpu
7761030static long syz_kvm_add_vcpu (volatile long a0 , volatile long a1 )
7771031{
778- // Placeholder.
779- return 0 ;
1032+ struct kvm_syz_vm * vm = (struct kvm_syz_vm * )a0 ;
1033+ struct kvm_text * utext = (struct kvm_text * )a1 ;
1034+ const void * text = utext -> text ;
1035+ size_t text_size = utext -> size ;
1036+
1037+ if (!vm ) {
1038+ errno = EINVAL ;
1039+ return -1 ;
1040+ }
1041+ if (vm -> next_cpu_id == KVM_MAX_VCPU ) {
1042+ errno = ENOMEM ;
1043+ return -1 ;
1044+ }
1045+ int cpu_id = vm -> next_cpu_id ;
1046+ int cpufd = ioctl (vm -> vmfd , KVM_CREATE_VCPU , cpu_id );
1047+ if (cpufd == -1 )
1048+ return -1 ;
1049+ // Only increment next_cpu_id if CPU creation succeeded.
1050+ vm -> next_cpu_id ++ ;
1051+ install_user_code (cpufd , vm -> user_text , cpu_id , text , text_size , vm -> host_mem );
1052+ return cpufd ;
7801053}
7811054#endif
1055+
1056+ #if SYZ_EXECUTOR || __NR_syz_kvm_assert_syzos_uexit
1057+ static long syz_kvm_assert_syzos_uexit (volatile long a0 , volatile long a1 )
1058+ {
1059+ struct kvm_run * run = (struct kvm_run * )a0 ;
1060+ uint64 expect = a1 ;
1061+
1062+ if (!run || (run -> exit_reason != KVM_EXIT_MMIO ) || (run -> mmio .phys_addr != X86_ADDR_UEXIT )) {
1063+ errno = EINVAL ;
1064+ return -1 ;
1065+ }
1066+
1067+ if ((((uint64 * )(run -> mmio .data ))[0 ]) != expect ) {
1068+ errno = EDOM ;
1069+ return -1 ;
1070+ }
1071+ return 0 ;
1072+ }
1073+ #endif
1074+
0 commit comments