diff --git a/executor/common_kvm_amd64_syzos.h b/executor/common_kvm_amd64_syzos.h index 27f4be400922..a691bebdef14 100644 --- a/executor/common_kvm_amd64_syzos.h +++ b/executor/common_kvm_amd64_syzos.h @@ -249,11 +249,8 @@ GUEST_CODE static noinline void guest_handle_cpuid(uint32 eax, uint32 ecx) : "rbx", "rdx"); } -// Write val into an MSR register reg. -GUEST_CODE static noinline void guest_handle_wrmsr(uint64 reg, uint64 val) +GUEST_CODE static noinline void wrmsr(uint64 reg, uint64 val) { - // The wrmsr instruction takes its arguments in specific registers: - // edx:eax contains the 64-bit value to write, ecx contains the MSR address. asm volatile( "wrmsr" : @@ -263,20 +260,26 @@ GUEST_CODE static noinline void guest_handle_wrmsr(uint64 reg, uint64 val) : "memory"); } -// Read an MSR register, ignore the result. -GUEST_CODE static noinline void guest_handle_rdmsr(uint64 reg) +// Write val into an MSR register reg. +GUEST_CODE static noinline void guest_handle_wrmsr(uint64 reg, uint64 val) +{ + wrmsr(reg, val); +} + +GUEST_CODE static noinline uint64 rdmsr(uint64 msr_id) { - uint32 low = 0, high = 0; - // The rdmsr instruction takes the MSR address in ecx. + uint32 low = 0, high = 0; // nolint + // The RDMSR instruction takes the MSR address in ecx. // It puts the lower 32 bits of the MSR value into eax, and the upper. // 32 bits of the MSR value into edx. - asm volatile( - "rdmsr" - : "=a"(low), - "=d"(high) - : "c"(reg) - : // No explicit clobbers. - ); + asm volatile("rdmsr" : "=a"(low), "=d"(high) : "c"(msr_id)); + return ((uint64)high << 32) | low; +} + +// Read an MSR register, ignore the result. +GUEST_CODE static noinline void guest_handle_rdmsr(uint64 reg) +{ + (void)rdmsr(reg); } // Write to CRn control register. @@ -488,24 +491,6 @@ GUEST_CODE static inline void write_cr4(uint64 val) asm volatile("mov %0, %%cr4" : : "r"(val)); } -GUEST_CODE static noinline void wrmsr(uint64 reg, uint64 val) -{ - asm volatile( - "wrmsr" - : - : "c"(reg), - "a"((uint32)val), - "d"((uint32)(val >> 32)) - : "memory"); -} - -GUEST_CODE static noinline uint64 rdmsr(uint32 msr_id) -{ - uint64 msr_value; - asm volatile("rdmsr" : "=A"(msr_value) : "c"(msr_id)); - return msr_value; -} - GUEST_CODE static noinline void vmwrite(uint64 field, uint64 value) { uint8 error = 0; // nolint @@ -678,15 +663,17 @@ GUEST_CODE static noinline void init_vmcs_control_fields(uint64 cpu_id, uint64 v vmwrite(VMCS_PIN_BASED_VM_EXEC_CONTROL, (uint32)vmx_msr); // Setup Secondary Processor-Based controls: enable EPT. - vmx_msr = rdmsr(X86_MSR_IA32_VMX_PROCBASED_CTLS2); - uint32 sec_exec_ctl = (uint32)(vmx_msr >> 32); // Must-be-1 bits. - sec_exec_ctl |= ((uint32)vmx_msr & SECONDARY_EXEC_ENABLE_EPT); // Allowed bits. - vmwrite(VMCS_SECONDARY_VM_EXEC_CONTROL, sec_exec_ctl); + vmx_msr = (uint32)rdmsr(X86_MSR_IA32_VMX_PROCBASED_CTLS2); + vmx_msr |= SECONDARY_EXEC_ENABLE_EPT | SECONDARY_EXEC_ENABLE_RDTSCP; + vmwrite(VMCS_SECONDARY_VM_EXEC_CONTROL, vmx_msr); // Read and write Primary Processor-Based controls from TRUE MSR. // We also add the bit to enable the secondary controls. vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_PROCBASED_CTLS); - vmwrite(VMCS_CPU_BASED_VM_EXEC_CONTROL, (uint32)vmx_msr | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS | CPU_BASED_HLT_EXITING); + vmx_msr |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS; + // Exit on HLT and RDTSC. + vmx_msr |= CPU_BASED_HLT_EXITING | CPU_BASED_RDTSC_EXITING; + vmwrite(VMCS_CPU_BASED_VM_EXEC_CONTROL, (uint32)vmx_msr); // Set up VM-Exit controls via TRUE MSR: indicate a 64-bit host. vmx_msr = rdmsr(X86_MSR_IA32_VMX_TRUE_EXIT_CTLS); @@ -739,6 +726,9 @@ GUEST_CODE static noinline void init_vmcs_control_fields(uint64 cpu_id, uint64 v typedef enum { SYZOS_NESTED_EXIT_REASON_HLT = 1, SYZOS_NESTED_EXIT_REASON_INVD = 2, + SYZOS_NESTED_EXIT_REASON_CPUID = 3, + SYZOS_NESTED_EXIT_REASON_RDTSC = 4, + SYZOS_NESTED_EXIT_REASON_RDTSCP = 5, SYZOS_NESTED_EXIT_REASON_UNKNOWN = 0xFF, } syz_nested_exit_reason; @@ -754,8 +744,11 @@ GUEST_CODE static void guest_uexit_l2(uint64 exit_reason, syz_nested_exit_reason } } +#define EXIT_REASON_CPUID 0xa #define EXIT_REASON_HLT 0xc #define EXIT_REASON_INVD 0xd +#define EXIT_REASON_RDTSC 0x10 +#define EXIT_REASON_RDTSCP 0x33 GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason(uint64 basic_reason) { @@ -765,15 +758,28 @@ GUEST_CODE static syz_nested_exit_reason map_intel_exit_reason(uint64 basic_reas return SYZOS_NESTED_EXIT_REASON_HLT; if (reason == EXIT_REASON_INVD) return SYZOS_NESTED_EXIT_REASON_INVD; + if (reason == EXIT_REASON_CPUID) + return SYZOS_NESTED_EXIT_REASON_CPUID; + if (reason == EXIT_REASON_RDTSC) + return SYZOS_NESTED_EXIT_REASON_RDTSC; + if (reason == EXIT_REASON_RDTSCP) + return SYZOS_NESTED_EXIT_REASON_RDTSCP; return SYZOS_NESTED_EXIT_REASON_UNKNOWN; } GUEST_CODE static void advance_l2_rip_intel(uint64 basic_reason) { - if (basic_reason == EXIT_REASON_INVD) { - uint64 rip = vmread(VMCS_GUEST_RIP); - vmwrite(VMCS_GUEST_RIP, rip + 2); + // Disable optimizations. + volatile uint64 reason = basic_reason; + uint64 rip = vmread(VMCS_GUEST_RIP); + if ((reason == EXIT_REASON_INVD) || (reason == EXIT_REASON_CPUID) || + (reason == EXIT_REASON_RDTSC)) { + rip += 2; + } else if (reason == EXIT_REASON_RDTSCP) { + // We insist on a single-line compound statement for else-if. + rip += 3; } + vmwrite(VMCS_GUEST_RIP, rip); } // This function is called from inline assembly. @@ -831,8 +837,11 @@ __attribute__((naked)) GUEST_CODE static void nested_vm_exit_handler_intel_asm(v [vm_exit_reason] "i"(VMCS_VM_EXIT_REASON) : "memory", "cc", "rbx", "rdi", "rsi"); } +#define VMEXIT_RDTSC 0x6e +#define VMEXIT_CPUID 0x72 #define VMEXIT_INVD 0x76 #define VMEXIT_HLT 0x78 +#define VMEXIT_RDTSCP 0x87 GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason(uint64 basic_reason) { @@ -842,16 +851,29 @@ GUEST_CODE static syz_nested_exit_reason map_amd_exit_reason(uint64 basic_reason return SYZOS_NESTED_EXIT_REASON_HLT; if (reason == VMEXIT_INVD) return SYZOS_NESTED_EXIT_REASON_INVD; + if (reason == VMEXIT_CPUID) + return SYZOS_NESTED_EXIT_REASON_CPUID; + if (reason == VMEXIT_RDTSC) + return SYZOS_NESTED_EXIT_REASON_RDTSC; + if (reason == VMEXIT_RDTSCP) + return SYZOS_NESTED_EXIT_REASON_RDTSCP; return SYZOS_NESTED_EXIT_REASON_UNKNOWN; } GUEST_CODE static void advance_l2_rip_amd(uint64 basic_reason, uint64 cpu_id, uint64 vm_id) { - if (basic_reason == VMEXIT_INVD) { - uint64 vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); - uint64 rip = vmcb_read64((volatile uint8*)vmcb_addr, VMCB_GUEST_RIP); - vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip + 2); + // Disable optimizations. + volatile uint64 reason = basic_reason; + uint64 vmcb_addr = X86_SYZOS_ADDR_VMCS_VMCB(cpu_id, vm_id); + uint64 rip = vmcb_read64((volatile uint8*)vmcb_addr, VMCB_GUEST_RIP); + if ((reason == VMEXIT_INVD) || (reason == VMEXIT_CPUID) || + (reason == VMEXIT_RDTSC)) { + rip += 2; + } else if (reason == VMEXIT_RDTSCP) { + // We insist on a single-line compound statement for else-if. + rip += 3; } + vmcb_write64(vmcb_addr, VMCB_GUEST_RIP, rip); } __attribute__((used)) GUEST_CODE static void diff --git a/executor/kvm.h b/executor/kvm.h index d7c708b34e3a..a390becbb26b 100644 --- a/executor/kvm.h +++ b/executor/kvm.h @@ -244,6 +244,7 @@ // VMX control bits #define RFLAGS_1_BIT (1ULL << 1) #define CPU_BASED_HLT_EXITING (1U << 7) +#define CPU_BASED_RDTSC_EXITING (1U << 12) #define AR_TSS_AVAILABLE 0x0089 #define SVM_ATTR_LDTR_UNUSABLE 0x0000 #define VMX_AR_TSS_BUSY 0x008b @@ -251,6 +252,7 @@ #define VMX_AR_LDTR_UNUSABLE 0x10000 #define VM_ENTRY_IA32E_MODE (1U << 9) #define SECONDARY_EXEC_ENABLE_EPT (1U << 1) +#define SECONDARY_EXEC_ENABLE_RDTSCP (1U << 3) #define VM_EXIT_HOST_ADDR_SPACE_SIZE (1U << 9) #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS (1U << 31) diff --git a/sys/linux/test/amd64-syz_kvm_nested_vmresume-cpuid b/sys/linux/test/amd64-syz_kvm_nested_vmresume-cpuid new file mode 100644 index 000000000000..43abd7cc37c6 --- /dev/null +++ b/sys/linux/test/amd64-syz_kvm_nested_vmresume-cpuid @@ -0,0 +1,27 @@ +# +# requires: arch=amd64 -threaded +# +r0 = openat$kvm(0, &AUTO='/dev/kvm\x00', 0x0, 0x0) +r1 = ioctl$KVM_CREATE_VM(r0, AUTO, 0x0) +r2 = syz_kvm_setup_syzos_vm$x86(r1, &(0x7f0000c00000/0x400000)=nil) + +# Create a nested VM that performs CPUID (0f a2) and HLT (f4). +# +r3 = syz_kvm_add_vcpu$x86(r2, &AUTO={0x0, &AUTO=[@enable_nested={AUTO, AUTO, 0x0}, @nested_create_vm={AUTO, AUTO, 0x0}, @nested_load_code={AUTO, AUTO, {0x0, "0fa2f4"}}, @nested_vmlaunch={AUTO, AUTO, 0x0}, @nested_vmresume={AUTO, AUTO, 0x0}], AUTO}) +r4 = ioctl$KVM_GET_VCPU_MMAP_SIZE(r0, AUTO) +r5 = mmap$KVM_VCPU(&(0x7f0000009000/0x1000)=nil, r4, 0x3, 0x1, r3, 0x0) + +# L2 VM executes CPUID. Exit reason is mapped to 0xe2e20003. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20003) + +# L1 resumes L2, which executes HLT. Exit reason is mapped to 0xe2e20001. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20001) + +# guest_main should finish with guest_uexit(-1). +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xffffffff) diff --git a/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtsc b/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtsc new file mode 100644 index 000000000000..3c0e25635817 --- /dev/null +++ b/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtsc @@ -0,0 +1,27 @@ +# +# requires: arch=amd64 -threaded +# +r0 = openat$kvm(0, &AUTO='/dev/kvm\x00', 0x0, 0x0) +r1 = ioctl$KVM_CREATE_VM(r0, AUTO, 0x0) +r2 = syz_kvm_setup_syzos_vm$x86(r1, &(0x7f0000c00000/0x400000)=nil) + +# Create a nested VM that performs RDTSC (0f 31) and HLT (f4). +# +r3 = syz_kvm_add_vcpu$x86(r2, &AUTO={0x0, &AUTO=[@enable_nested={AUTO, AUTO, 0x0}, @nested_create_vm={AUTO, AUTO, 0x0}, @nested_load_code={AUTO, AUTO, {0x0, "0f31f4"}}, @nested_vmlaunch={AUTO, AUTO, 0x0}, @nested_vmresume={AUTO, AUTO, 0x0}], AUTO}) +r4 = ioctl$KVM_GET_VCPU_MMAP_SIZE(r0, AUTO) +r5 = mmap$KVM_VCPU(&(0x7f0000009000/0x1000)=nil, r4, 0x3, 0x1, r3, 0x0) + +# L2 VM executes CPUID. Exit reason is mapped to 0xe2e20004. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20004) + +# L1 resumes L2, which executes HLT. Exit reason is mapped to 0xe2e20001. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20001) + +# guest_main should finish with guest_uexit(-1). +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xffffffff) diff --git a/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtscp b/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtscp new file mode 100644 index 000000000000..0ccff3425397 --- /dev/null +++ b/sys/linux/test/amd64-syz_kvm_nested_vmresume-rdtscp @@ -0,0 +1,27 @@ +# +# requires: arch=amd64 -threaded +# +r0 = openat$kvm(0, &AUTO='/dev/kvm\x00', 0x0, 0x0) +r1 = ioctl$KVM_CREATE_VM(r0, AUTO, 0x0) +r2 = syz_kvm_setup_syzos_vm$x86(r1, &(0x7f0000c00000/0x400000)=nil) + +# Create a nested VM that performs RDTSCP (0f 01 f9) and HLT (f4). +# +r3 = syz_kvm_add_vcpu$x86(r2, &AUTO={0x0, &AUTO=[@enable_nested={AUTO, AUTO, 0x0}, @nested_create_vm={AUTO, AUTO, 0x0}, @nested_load_code={AUTO, AUTO, {0x0, "0f01f9f4"}}, @nested_vmlaunch={AUTO, AUTO, 0x0}, @nested_vmresume={AUTO, AUTO, 0x0}], AUTO}) +r4 = ioctl$KVM_GET_VCPU_MMAP_SIZE(r0, AUTO) +r5 = mmap$KVM_VCPU(&(0x7f0000009000/0x1000)=nil, r4, 0x3, 0x1, r3, 0x0) + +# L2 VM executes CPUID. Exit reason is mapped to 0xe2e20005. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20005) + +# L1 resumes L2, which executes HLT. Exit reason is mapped to 0xe2e20001. +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xe2e20001) + +# guest_main should finish with guest_uexit(-1). +# +ioctl$KVM_RUN(r3, AUTO, 0x0) +syz_kvm_assert_syzos_uexit$x86(r5, 0xffffffff)