Skip to content

Commit 5c74d2f

Browse files
executor: sys/linux: implement SYZOS_API_NESTED_VMRESUME
Provide the SYZOS API command to resume L2 execution after a VM exit, using VMRESUME on Intel and VMRUN on AMD. For testing purpose, implement basic handling of the INVD instruction: - enable INVD interception on AMD (set all bits in VMCB 00Ch); - map EXIT_REASON_INVD and VMEXIT_INVD into SYZOS_NESTED_EXIT_REASON_INVD; - advance L2 RIP to skip to the next instruction. While at it, perform minor refactorings of L2 exit reason handling. sys/linux/test/amd64-syz_kvm_nested_vmresume tests the new command by executing two instructions, INVD and HLT, in the nested VM.
1 parent ec988b2 commit 5c74d2f

File tree

4 files changed

+90
-19
lines changed

4 files changed

+90
-19
lines changed

executor/common_kvm_amd64_syzos.h

Lines changed: 59 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ typedef enum {
3030
SYZOS_API_NESTED_CREATE_VM = 301,
3131
SYZOS_API_NESTED_LOAD_CODE = 302,
3232
SYZOS_API_NESTED_VMLAUNCH = 303,
33+
SYZOS_API_NESTED_VMRESUME = 304,
3334
SYZOS_API_STOP, // Must be the last one
3435
} syzos_api_id;
3536

@@ -102,6 +103,7 @@ GUEST_CODE static void guest_handle_enable_nested(struct api_call_1* cmd, uint64
102103
GUEST_CODE static void guest_handle_nested_create_vm(struct api_call_1* cmd, uint64 cpu_id);
103104
GUEST_CODE static void guest_handle_nested_load_code(struct api_call_nested_load_code* cmd, uint64 cpu_id);
104105
GUEST_CODE static void guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64 cpu_id);
106+
GUEST_CODE static void guest_handle_nested_vmresume(struct api_call_1* cmd, uint64 cpu_id);
105107

106108
typedef enum {
107109
UEXIT_END = (uint64)-1,
@@ -208,6 +210,9 @@ guest_main(uint64 size, uint64 cpu)
208210
} else if (call == SYZOS_API_NESTED_VMLAUNCH) {
209211
// Launch the nested VM.
210212
guest_handle_nested_vmlaunch((struct api_call_1*)cmd, cpu);
213+
} else if (call == SYZOS_API_NESTED_VMRESUME) {
214+
// Resume a nested VM.
215+
guest_handle_nested_vmresume((struct api_call_1*)cmd, cpu);
211216
}
212217
addr += cmd->size;
213218
size -= cmd->size;
@@ -733,6 +738,7 @@ GUEST_CODE static noinline void init_vmcs_control_fields(uint64 cpu_id, uint64 v
733738
// Common L2 exit reasons for Intel and AMD.
734739
typedef enum {
735740
SYZOS_NESTED_EXIT_REASON_HLT = 1,
741+
SYZOS_NESTED_EXIT_REASON_INVD = 2,
736742
SYZOS_NESTED_EXIT_REASON_UNKNOWN = 0xFF,
737743
} syz_nested_exit_reason;
738744

@@ -748,22 +754,37 @@ GUEST_CODE static void guest_uexit_l2(uint64 exit_reason, syz_nested_exit_reason
748754
}
749755
}
750756

751-
GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason(uint64 reason)
757+
#define EXIT_REASON_HLT 0xc
758+
#define EXIT_REASON_INVD 0xd
759+
760+
GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason(uint64 basic_reason)
752761
{
753-
volatile uint64 basic_reason = reason & 0xFFFF;
754-
// EXIT_REASON_HLT.
755-
if (basic_reason == 0xc)
762+
// Disable optimizations.
763+
volatile uint64 reason = basic_reason;
764+
if (reason == EXIT_REASON_HLT)
756765
return SYZOS_NESTED_EXIT_REASON_HLT;
766+
if (reason == EXIT_REASON_INVD)
767+
return SYZOS_NESTED_EXIT_REASON_INVD;
757768
return SYZOS_NESTED_EXIT_REASON_UNKNOWN;
758769
}
759770

771+
GUEST_CODE static void advance_l2_rip_intel(uint64 basic_reason)
772+
{
773+
if (basic_reason == EXIT_REASON_INVD) {
774+
uint64 rip = vmread(VMCS_GUEST_RIP);
775+
vmwrite(VMCS_GUEST_RIP, rip + 2);
776+
}
777+
}
778+
760779
// This function is called from inline assembly.
761780
__attribute__((used))
762781
GUEST_CODE static void
763782
nested_vm_exit_handler_intel(uint64 exit_reason, struct l2_guest_regs* regs)
764783
{
765-
syz_nested_exit_reason mapped_reason = map_intel_exit_reason(exit_reason);
784+
uint64 basic_reason = exit_reason & 0xFFFF;
785+
syz_nested_exit_reason mapped_reason = map_intel_exit_reason(basic_reason);
766786
guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_INTEL);
787+
advance_l2_rip_intel(basic_reason);
767788
}
768789

769790
extern char after_vmentry_label;
@@ -810,20 +831,36 @@ __attribute__((naked)) GUEST_CODE static void nested_vm_exit_handler_intel_asm(v
810831
[vm_exit_reason] "i"(VMCS_VM_EXIT_REASON) : "memory", "cc", "rbx", "rdi", "rsi");
811832
}
812833

813-
GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason(uint64 reason)
834+
#define VMEXIT_INVD 0x76
835+
#define VMEXIT_HLT 0x78
836+
837+
GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason(uint64 basic_reason)
814838
{
815-
volatile uint64 basic_reason = reason & 0xFFFF;
816-
// #VMEXIT_HLT.
817-
if (basic_reason == 0x78)
839+
// Disable optimizations.
840+
volatile uint64 reason = basic_reason;
841+
if (reason == VMEXIT_HLT)
818842
return SYZOS_NESTED_EXIT_REASON_HLT;
843+
if (reason == VMEXIT_INVD)
844+
return SYZOS_NESTED_EXIT_REASON_INVD;
819845
return SYZOS_NESTED_EXIT_REASON_UNKNOWN;
820846
}
821847

848+
GUEST_CODE static void advance_l2_rip_amd(uint64 basic_reason, uint64 cpu_id, uint64 vm_id)
849+
{
850+
if (basic_reason == VMEXIT_INVD) {
851+
uint64 vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id);
852+
uint64 rip = vmcb_read64((volatile uint8*)vmcb_addr, VMCB_GUEST_RIP);
853+
vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip + 2);
854+
}
855+
}
856+
822857
__attribute__((used)) GUEST_CODE static void
823858
nested_vm_exit_handler_amd(uint64 exit_reason, uint64 cpu_id, uint64 vm_id)
824859
{
825-
syz_nested_exit_reason mapped_reason = map_amd_exit_reason(exit_reason);
860+
volatile uint64 basic_reason = exit_reason & 0xFFFF;
861+
syz_nested_exit_reason mapped_reason = map_amd_exit_reason(basic_reason);
826862
guest_uexit_l2(exit_reason, mapped_reason, CPU_VENDOR_AMD);
863+
advance_l2_rip_amd(basic_reason, cpu_id, vm_id);
827864
}
828865

829866
GUEST_CODE static noinline void init_vmcs_host_state(void)
@@ -1011,7 +1048,7 @@ GUEST_CODE static noinline void init_vmcb_guest_state(uint64 cpu_id, uint64 vm_i
10111048
vmcb_write32(vmcb_addr, VMCB_GUEST_IDTR_LIM, idtr.limit);
10121049

10131050
// Setup VMCB Control Fields.
1014-
vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC3, VMCB_CTRL_INTERCEPT_HLT);
1051+
vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC3, VMCB_CTRL_INTERCEPT_VEC3_ALL);
10151052
vmcb_write32(vmcb_addr, VMCB_CTRL_INTERCEPT_VEC4, VMCB_CTRL_INTERCEPT_VEC4_ALL);
10161053

10171054
// Enable Nested Paging (NPT):
@@ -1078,9 +1115,8 @@ guest_handle_nested_load_code(struct api_call_nested_load_code* cmd, uint64 cpu_
10781115
}
10791116

10801117
GUEST_CODE static noinline void
1081-
guest_handle_nested_vmentry_intel(struct api_call_1* cmd, uint64 cpu_id, bool is_launch)
1118+
guest_handle_nested_vmentry_intel(uint64 vm_id, uint64 cpu_id, bool is_launch)
10821119
{
1083-
uint64 vm_id = cmd->arg;
10841120
uint64 vmx_error_code = 0;
10851121
uint8 fail_flag = 0; // Will be 1 if EITHER CF or ZF is set
10861122

@@ -1149,19 +1185,24 @@ guest_run_amd_vm(uint64 cpu_id, uint64 vm_id)
11491185
}
11501186

11511187
GUEST_CODE static noinline void
1152-
guest_handle_nested_vmlaunch_amd(struct api_call_1* cmd, uint64 cpu_id, uint64 vm_id)
1188+
guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64 cpu_id)
11531189
{
1154-
guest_run_amd_vm(cpu_id, vm_id);
1190+
uint64 vm_id = cmd->arg;
1191+
if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
1192+
guest_handle_nested_vmentry_intel(vm_id, cpu_id, true);
1193+
} else {
1194+
guest_run_amd_vm(cpu_id, vm_id);
1195+
}
11551196
}
11561197

11571198
GUEST_CODE static noinline void
1158-
guest_handle_nested_vmlaunch(struct api_call_1* cmd, uint64 cpu_id)
1199+
guest_handle_nested_vmresume(struct api_call_1* cmd, uint64 cpu_id)
11591200
{
11601201
uint64 vm_id = cmd->arg;
11611202
if (get_cpu_vendor() == CPU_VENDOR_INTEL) {
1162-
guest_handle_nested_vmentry_intel(cmd, cpu_id, true);
1203+
guest_handle_nested_vmentry_intel(vm_id, cpu_id, false);
11631204
} else {
1164-
guest_handle_nested_vmlaunch_amd(cmd, cpu_id, vm_id);
1205+
guest_run_amd_vm(cpu_id, vm_id);
11651206
}
11661207
}
11671208

executor/kvm.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -393,7 +393,7 @@
393393

394394
// Control Area
395395
#define VMCB_CTRL_INTERCEPT_VEC3 0x0c
396-
#define VMCB_CTRL_INTERCEPT_HLT (1 << 24) // Bit 24 in VEC3
396+
#define VMCB_CTRL_INTERCEPT_VEC3_ALL (0xffffffff)
397397
#define VMCB_CTRL_INTERCEPT_VEC4 0x10
398398
// Bits 0-9: intercept VMRUN, VMMCALL, VMLOAD, VMSAVE, STGI, CLGI, SKINIT, RDTSCP, ICEBP, WBINVD.
399399
#define VMCB_CTRL_INTERCEPT_VEC4_ALL (0x3ff)

sys/linux/dev_kvm_amd64.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ syzos_api_call$x86 [
117117
nested_create_vm syzos_api$x86[301, syzos_api_vm_id]
118118
nested_load_code syzos_api$x86[302, syzos_api_nested_load_code]
119119
nested_vmlaunch syzos_api$x86[303, syzos_api_vm_id]
120+
nested_vmresume syzos_api$x86[304, syzos_api_vm_id]
120121
] [varlen]
121122

122123
kvm_text_x86 [
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
#
2+
# requires: arch=amd64 -threaded
3+
#
4+
r0 = openat$kvm(0, &AUTO='/dev/kvm\x00', 0x0, 0x0)
5+
r1 = ioctl$KVM_CREATE_VM(r0, AUTO, 0x0)
6+
r2 = syz_kvm_setup_syzos_vm$x86(r1, &(0x7f0000c00000/0x400000)=nil)
7+
8+
# Create a nested VM that performs INVD (0f 08) and HLT (f4) to test vmresume.
9+
# INVD is one of the few instructions that cause unconditional VM exit on Intel.
10+
# On AMD, SYZOS also turns on INVD interception.
11+
#
12+
r3 = syz_kvm_add_vcpu$x86(r2, &AUTO={0x0, &AUTO=[@enable_nested={AUTO, AUTO, 0x0}, @nested_create_vm={AUTO, AUTO, 0x0}, @nested_load_code={AUTO, AUTO, {0x0, "0f08f4"}}, @nested_vmlaunch={AUTO, AUTO, 0x0}, @nested_vmresume={AUTO, AUTO, 0x0}], AUTO})
13+
r4 = ioctl$KVM_GET_VCPU_MMAP_SIZE(r0, AUTO)
14+
r5 = mmap$KVM_VCPU(&(0x7f0000009000/0x1000)=nil, r4, 0x3, 0x1, r3, 0x0)
15+
16+
# L2 VM executes INVD. Exit reason is mapped to 0xe2e20002.
17+
#
18+
ioctl$KVM_RUN(r3, AUTO, 0x0)
19+
syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20002)
20+
21+
# L1 resumes L2, which executes HLT. Exit reason is mapped to 0xe2e20001.
22+
#
23+
ioctl$KVM_RUN(r3, AUTO, 0x0)
24+
syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20001)
25+
26+
# guest_main should finish with guest_uexit(-1).
27+
#
28+
ioctl$KVM_RUN(r3, AUTO, 0x0)
29+
syz_kvm_assert_syzos_uexit$x86(r5, 0xffffffff)

0 commit comments

Comments
 (0)