@@ -29,6 +29,7 @@ typedef enum {
2929 SYZOS_API_ENABLE_NESTED = 300 ,
3030 SYZOS_API_NESTED_CREATE_VM = 301 ,
3131 SYZOS_API_NESTED_LOAD_CODE = 302 ,
32+ SYZOS_API_NESTED_VMLAUNCH = 303 ,
3233 SYZOS_API_STOP , // Must be the last one
3334} syzos_api_id ;
3435
@@ -74,10 +75,17 @@ struct api_call_3 {
7475 uint64 args [3 ];
7576};
7677
78+ // This struct must match the push/pop order in nested_vm_exit_handler_intel_asm().
79+ struct l2_guest_regs {
80+ uint64 rax , rbx , rcx , rdx , rsi , rdi , rbp ;
81+ uint64 r8 , r9 , r10 , r11 , r12 , r13 , r14 , r15 ;
82+ };
83+
7784#ifdef __cplusplus
7885extern "C" {
7986#endif
8087GUEST_CODE static void guest_uexit (uint64 exit_code );
88+ GUEST_CODE static void nested_vm_exit_handler_intel (uint64 exit_reason , struct l2_guest_regs * regs );
8189#ifdef __cplusplus
8290}
8391#endif
@@ -93,11 +101,13 @@ GUEST_CODE static void guest_handle_set_irq_handler(struct api_call_2* cmd);
93101GUEST_CODE static void guest_handle_enable_nested (struct api_call_1 * cmd , uint64 cpu_id );
94102GUEST_CODE static void guest_handle_nested_create_vm (struct api_call_1 * cmd , uint64 cpu_id );
95103GUEST_CODE static void guest_handle_nested_load_code (struct api_call_nested_load_code * cmd , uint64 cpu_id );
104+ GUEST_CODE static void guest_handle_nested_vmlaunch (struct api_call_1 * cmd , uint64 cpu_id );
96105
97106typedef enum {
98107 UEXIT_END = (uint64 )- 1 ,
99108 UEXIT_IRQ = (uint64 )- 2 ,
100109 UEXIT_ASSERT = (uint64 )- 3 ,
110+ UEXIT_STOP_L2 = (uint64 )- 4 ,
101111} uexit_code ;
102112
103113typedef enum {
@@ -196,6 +206,9 @@ guest_main(uint64 size, uint64 cpu)
196206 } else if (call == SYZOS_API_NESTED_LOAD_CODE ) {
197207 // Load code into the nested VM.
198208 guest_handle_nested_load_code ((struct api_call_nested_load_code * )cmd , cpu );
209+ } else if (call == SYZOS_API_NESTED_VMLAUNCH ) {
210+ // Launch the nested VM.
211+ guest_handle_nested_vmlaunch ((struct api_call_1 * )cmd , cpu );
199212 }
200213 addr += cmd -> size ;
201214 size -= cmd -> size ;
@@ -538,6 +551,11 @@ GUEST_CODE static noinline void vmcb_write64(uint64 vmcb, uint16 offset, uint64
538551 * ((volatile uint64 * )(vmcb + offset )) = val ;
539552}
540553
554+ GUEST_CODE static noinline uint64 vmcb_read64 (volatile uint8 * vmcb , uint16 offset )
555+ {
556+ return * ((volatile uint64 * )(vmcb + offset ));
557+ }
558+
541559GUEST_CODE static void guest_memset (void * s , uint8 c , int size )
542560{
543561 volatile uint8 * p = (volatile uint8 * )s ;
@@ -713,9 +731,100 @@ GUEST_CODE static noinline void init_vmcs_control_fields(uint64 cpu_id, uint64 v
713731 vmwrite (VMCS_TPR_THRESHOLD , 0 );
714732}
715733
716- // Empty for now.
734+ // Common L2 exit reasons for Intel and AMD.
735+ typedef enum {
736+ SYZ_NESTED_EXIT_REASON_HLT = 1 ,
737+ SYZ_NESTED_EXIT_REASON_UNKNOWN = 0xFF ,
738+ } syz_nested_exit_reason ;
739+
740+ GUEST_CODE static void guest_uexit_l2 (uint64 exit_reason , syz_nested_exit_reason mapped_reason ,
741+ cpu_vendor_id vendor )
742+ {
743+ if (mapped_reason != SYZ_NESTED_EXIT_REASON_UNKNOWN ) {
744+ guest_uexit (0xe2e20000 | mapped_reason );
745+ } else if (vendor == CPU_VENDOR_INTEL ) {
746+ guest_uexit (0xe2110000 | exit_reason );
747+ } else {
748+ guest_uexit (0xe2aa0000 | exit_reason );
749+ }
750+ }
751+
752+ GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason (uint64 reason )
753+ {
754+ volatile uint64 basic_reason = reason & 0xFFFF ;
755+ // EXIT_REASON_HLT.
756+ if (basic_reason == 0xc )
757+ return SYZ_NESTED_EXIT_REASON_HLT ;
758+ return SYZ_NESTED_EXIT_REASON_UNKNOWN ;
759+ }
760+
761+ // This function is called from inline assembly.
762+ __attribute__((used ))
763+ GUEST_CODE static void
764+ nested_vm_exit_handler_intel (uint64 exit_reason , struct l2_guest_regs * regs )
765+ {
766+ syz_nested_exit_reason mapped_reason = map_intel_exit_reason (exit_reason );
767+ guest_uexit_l2 (exit_reason , mapped_reason , CPU_VENDOR_INTEL );
768+ }
769+
770+ extern char after_vmentry_label ;
717771__attribute__((naked )) GUEST_CODE static void nested_vm_exit_handler_intel_asm (void )
718772{
773+ asm volatile (R "(
774+ // Save L2's GPRs. This creates the 'struct l2_guest_regs' on the stack.
775+ // The order MUST match the struct.
776+ push %%rax
777+ push %%rbx
778+ push %%rcx
779+ push %%rdx
780+ push %%rsi
781+ push %%rdi
782+ push %%rbp
783+ push %%r8
784+ push %%r9
785+ push %%r10
786+ push %%r11
787+ push %%r12
788+ push %%r13
789+ push %%r14
790+ push %%r15
791+
792+ // Prepare arguments for the C handler:
793+ // arg1 (RDI) = exit_reason
794+ // arg2 (RSI) = pointer to the saved registers
795+ mov %%rsp , %%rsi
796+ mov %[vm_exit_reason ], %%rbx
797+ vmread %%rbx , %%rdi
798+
799+ // Call the C handler.
800+ call nested_vm_exit_handler_intel
801+
802+ // The C handler has processed the exit. Now, return to the L1 command
803+ // processing loop. VMX remains enabled.
804+ add %[stack_cleanup_size ], %%rsp
805+
806+ // Jump to L1 main flow
807+ jmp after_vmentry_label
808+ )"
809+
810+ : : [stack_cleanup_size ] "i" (sizeof (struct l2_guest_regs )),
811+ [vm_exit_reason ] "i" (VMCS_VM_EXIT_REASON ) : "memory" , "cc" , "rbx" , "rdi" , "rsi" );
812+ }
813+
814+ GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason (uint64 reason )
815+ {
816+ volatile uint64 basic_reason = reason & 0xFFFF ;
817+ // #VMEXIT_HLT.
818+ if (basic_reason == 0x78 )
819+ return SYZ_NESTED_EXIT_REASON_HLT ;
820+ return SYZ_NESTED_EXIT_REASON_UNKNOWN ;
821+ }
822+
823+ __attribute__((used )) GUEST_CODE static void
824+ nested_vm_exit_handler_amd (uint64 exit_reason , uint64 cpu_id , uint64 vm_id )
825+ {
826+ syz_nested_exit_reason mapped_reason = map_amd_exit_reason (exit_reason );
827+ guest_uexit_l2 (exit_reason , mapped_reason , CPU_VENDOR_AMD );
719828}
720829
721830GUEST_CODE static noinline void init_vmcs_host_state (void )
@@ -969,4 +1078,94 @@ guest_handle_nested_load_code(struct api_call_nested_load_code* cmd, uint64 cpu_
9691078 }
9701079}
9711080
1081+ GUEST_CODE static noinline void
1082+ guest_handle_nested_vmentry_intel (struct api_call_1 * cmd , uint64 cpu_id , bool is_launch )
1083+ {
1084+ uint64 vm_id = cmd -> arg ;
1085+ uint64 vmx_error_code = 0 ;
1086+ uint8 fail_flag = 0 ; // Will be 1 if EITHER CF or ZF is set
1087+
1088+ nested_vmptrld (cpu_id , vm_id );
1089+
1090+ if (is_launch ) {
1091+ asm volatile (R "(
1092+ // Attempt to launch the L2 guest.
1093+ vmlaunch
1094+ // Set AL to 1 if CF=1 (VMfailValid)
1095+ setc %%al
1096+ // Set BL to 1 if ZF=1 (VMfailInvalid)
1097+ setz %%bl
1098+ or %%bl , %%al )"
1099+ : "=a" (fail_flag )
1100+ :
1101+ : "rbx" , "cc" , "memory" );
1102+ } else {
1103+ asm volatile (R "(
1104+ // Attempt to resume the L2 guest.
1105+ vmresume
1106+ // Set AL to 1 if CF=1 (VMfailValid)
1107+ setc %%al
1108+ // Set BL to 1 if ZF=1 (VMfailInvalid)
1109+ setz %%bl
1110+ or %%bl , %%al )"
1111+ : "=a" (fail_flag )
1112+ :
1113+ : "rbx" , "cc" , "memory" );
1114+ }
1115+ asm volatile (".globl after_vmentry_label\nafter_vmentry_label:" );
1116+ if (fail_flag ) {
1117+ // VMLAUNCH/VMRESUME failed, so VMCS is still valid and can be read.
1118+ vmx_error_code = vmread (VMCS_VM_INSTRUCTION_ERROR );
1119+ guest_uexit (0xE2E10000 | (uint32 )vmx_error_code );
1120+ } else {
1121+ // This path is only taken if VMLAUNCH/VMRESUME truly succeeded (CF=0 and ZF=0)
1122+ // and the L2 guest has run and exited.
1123+ guest_uexit (UEXIT_STOP_L2 );
1124+ }
1125+ }
1126+
1127+ GUEST_CODE static noinline void
1128+ guest_run_amd_vm (uint64 cpu_id , uint64 vm_id )
1129+ {
1130+ uint64 vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB (cpu_id , vm_id );
1131+ volatile uint8 * vmcb_ptr = (volatile uint8 * )vmcb_addr ;
1132+ uint8 fail_flag = 0 ;
1133+
1134+ asm volatile (
1135+ "mov %1, %%rax\n\t" // Load VMCB physical address into RAX
1136+ "vmrun\n\t" // Launch or resume L2 guest
1137+ "setc %0\n\t"
1138+ : "=q" (fail_flag )
1139+ : "m" (vmcb_addr )
1140+ : "rax" , "cc" , "memory" );
1141+
1142+ if (fail_flag ) {
1143+ // VMRUN failed.
1144+ guest_uexit (0xE2E10000 | 0xFFFF );
1145+ return ;
1146+ }
1147+
1148+ // VMRUN succeeded and we have a VM-exit.
1149+ uint64 exit_reason = vmcb_read64 (vmcb_ptr , VMCB_EXIT_CODE );
1150+ nested_vm_exit_handler_amd (exit_reason , cpu_id , vm_id );
1151+ guest_uexit (UEXIT_STOP_L2 );
1152+ }
1153+
1154+ GUEST_CODE static noinline void
1155+ guest_handle_nested_vmlaunch_amd (struct api_call_1 * cmd , uint64 cpu_id , uint64 vm_id )
1156+ {
1157+ guest_run_amd_vm (cpu_id , vm_id );
1158+ }
1159+
1160+ GUEST_CODE static noinline void
1161+ guest_handle_nested_vmlaunch (struct api_call_1 * cmd , uint64 cpu_id )
1162+ {
1163+ uint64 vm_id = cmd -> arg ;
1164+ if (get_cpu_vendor () == CPU_VENDOR_INTEL ) {
1165+ guest_handle_nested_vmentry_intel (cmd , cpu_id , true);
1166+ } else {
1167+ guest_handle_nested_vmlaunch_amd (cmd , cpu_id , vm_id );
1168+ }
1169+ }
1170+
9721171#endif // EXECUTOR_COMMON_KVM_AMD64_SYZOS_H
0 commit comments