core: fix bug that INVEPT is not invoked on Mac

junxiaoc · junxiaoc · commit 50b68067abf8 · 2018-06-29T16:20:11.000+08:00
There is coexistence issue in MacOS when some Mac VMMs are running at the same time. invept might not be invoked due to coexistence issue. This change is to fix the issue that invept is not invoked. It also refactors the existed logic for VMXON/VMXOFF into two functions. The function names come from PR #49.
diff --git a/core/cpu.c b/core/cpu.c
@@ -567,8 +567,6 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
     struct per_cpu_data *cpu_data;
     paddr_t vmcs_phy;
     paddr_t curr_vmcs = VMCS_NONE;
-    vmx_error_t err = 0;
-    uint64 fc_msr;
 
     hax_disable_preemption(flags);
 
@@ -584,87 +582,9 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
         return 0;
     }
 
-    cpu_data->host_cr4_vmxe = (get_cr4() & CR4_VMXE);
-    if(cpu_data->host_cr4_vmxe) {
-        if (debug_vmcs_count % 100000 == 0) {
-            hax_debug("host VT has enabled!\n");
-            hax_debug("Cr4 value = 0x%lx\n", get_cr4());
-            log_host_cr4_vmxe = 1;
-            log_host_cr4 = get_cr4();
-        }
-        debug_vmcs_count++;
-    }
-    set_cr4(get_cr4() | CR4_VMXE);
-    /* HP systems & Mac systems workaround
-      * When resuming from S3, some HP/Mac set the IA32_FEATURE_CONTROL MSR to
-      * zero. Setting the lock bit to zero & then doing 'vmxon' would cause a GP.
-      * As a workaround, when we see this condition, we enable the bits so that
-      * we can launch vmxon & thereby hax.
-      * bit 0 - Lock bit
-      * bit 2 - Enable VMX outside SMX operation
-      *
-      * ********* To Do **************************************
-      * This is the workground to fix BSOD when resume from S3
-      * The best way is to add one power management handler, and set
-      * IA32_FEATURE_CONTROL MSR in that PM S3 handler
-      * *****************************************************
-      */
-    fc_msr = ia32_rdmsr(IA32_FEATURE_CONTROL);
-    if (!(fc_msr & FC_LOCKED))
-        ia32_wrmsr(IA32_FEATURE_CONTROL,
-                   fc_msr | FC_LOCKED | FC_VMXON_OUTSMX);
-
-    err = __vmxon(hax_page_pa(cpu_data->vmxon_page));
-
-    log_vmxon_err = err;
-    log_vmxon_addr = hax_page_pa(cpu_data->vmxon_page);
-
-    if (!(err & VMX_FAIL_MASK))
-        cpu_data->vmm_flag |= VMXON_HAX;
-    else {
-        bool fatal = true;
-
-#ifdef __MACH__
-        if ((err & VMX_FAIL_INVALID) && cpu_data->host_cr4_vmxe) {
-            // On macOS, if VMXON fails with VMX_FAIL_INVALID and host CR4.VMXE
-            // was already set, it is very likely that another VMM (VirtualBox
-            // or any VMM based on macOS Hypervisor Framework, e.g. Docker) is
-            // running and did not call VMXOFF. In that case, the current host
-            // logical processor is already in VMX operation, and we can use an
-            // innocuous VMX instruction (VMPTRST) to confirm that.
-            // However, if the above assumption is wrong and the host processor
-            // is not actually in VMX operation, VMPTRST will probably cause a
-            // host reboot. But we don't have a better choice, and it is worth
-            // taking the risk.
-            curr_vmcs = __vmptrst();
-            if (curr_vmcs == VMCS_NONE) {
-                hax_debug("Already in VMX operation, courtesy of another"
-                          " VMM (VirtualBox or macOS Hypervisor Framework)\n");
-                fatal = false;
-                // Indicate that it is not necessary to call VMXOFF later
-                cpu_data->vmm_flag &= ~VMXON_HAX;
-            } else {
-                // Should never happen
-                hax_error("VMXON failed with VMX_FAIL_INVALID, but there is a"
-                          " current VMCS at 0x%llx\n", curr_vmcs);
-            }
-        }
-#endif
-
-        if (fatal) {
-            hax_error("VMXON failed for region 0x%llx (err=0x%x)\n",
-                      hax_page_pa(cpu_data->vmxon_page), (uint32) err);
-            restore_host_cr4_vmxe(cpu_data);
-            if (err & VMX_FAIL_INVALID) {
-                log_vmxon_err_type1 = 1;
-            } else {
-                // TODO: Should VMX_FAIL_VALID be ignored? The current VMCS can
-                // be cleared (deactivated and saved to memory) using VMCLEAR
-                log_vmxon_err_type2 = 1;
-            }
-            hax_enable_preemption(flags);
-            return VMXON_FAIL;
-        }
+    if (cpu_vmxroot_enter() != VMX_SUCCEED) {
+        hax_enable_preemption(flags);
+        return VMXON_FAIL;
     }
 
     if (vcpu)
@@ -679,9 +599,7 @@ uint32 load_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
 
     if (__vmptrld(vmcs_phy) != VMX_SUCCEED) {
         hax_error("HAX: vmptrld failed (%08llx)\n", vmcs_phy);
-        cpu_data->vmm_flag = 0;
-        __vmxoff();
-        restore_host_cr4_vmxe(cpu_data);
+        cpu_vmxroot_leave();
         log_vmxon_err_type3 = 1;
         hax_enable_preemption(flags);
         return VMPTRLD_FAIL;
@@ -716,7 +634,6 @@ uint32 put_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
     int cpu_id = hax_cpuid();
     struct per_cpu_data *cpu_data = hax_cpu_data[cpu_id];
     paddr_t vmcs_phy;
-    vmx_error_t err = 0;
     vmx_error_t vmxoff_err = 0;
     if (vcpu && cpu_data->nested > 0) {
         cpu_data->nested--;
@@ -735,27 +652,8 @@ uint32 put_vmcs(struct vcpu_t *vcpu, preempt_flag *flags)
 
     cpu_data->current_vcpu = NULL;
 
-    if (cpu_data->vmm_flag & VMXON_HAX) {
-        err = __vmxoff();
-        if (!(err & VMX_FAIL_MASK)) {
-            restore_host_cr4_vmxe(cpu_data);
-        } else {
-            hax_error("VMXOFF Failed..........\n");
-            vmxoff_err = err;
-            log_vmxoff_err = err;
-        }
-    } else {
-        log_vmxoff_no = 1;
-#ifdef __MACH__
-        hax_debug("Skipping VMXOFF because another VMM (VirtualBox or macOS"
-                  " Hypervisor Framework) is running\n");
-#else
-        vmxoff_err = 0x1;
-        hax_error("NO VMXOFF.......\n");
-#endif
-    }
+    vmxoff_err = cpu_vmxroot_leave();
     cpu_data->other_vmcs = VMCS_NONE;
-    cpu_data->vmm_flag = 0;
     if (vcpu && vcpu->is_vmcs_loaded)
         vcpu->is_vmcs_loaded = 0;
 out:
@@ -817,3 +715,119 @@ static vmx_error_t cpu_vmentry_failed(struct vcpu_t *vcpu, vmx_error_t err)
     hax_log("end of cpu_vmentry_failed\n");
     return err;
 }
+
+vmx_error_t cpu_vmxroot_leave(void)
+{
+    struct per_cpu_data *cpu_data = current_cpu_data();
+    vmx_error_t err = VMX_SUCCEED;
+
+    if (cpu_data->vmm_flag & VMXON_HAX) {
+        err = __vmxoff();
+        if (!(err & VMX_FAIL_MASK)) {
+            cpu_data->vmm_flag &= ~VMXON_HAX;
+            restore_host_cr4_vmxe(cpu_data);
+        } else {
+            hax_error("VMXOFF Failed..........\n");
+        }
+    } else {
+        log_vmxoff_no = 1;
+#ifdef __MACH__
+        hax_debug("Skipping VMXOFF because another VMM (VirtualBox or macOS"
+                  " Hypervisor Framework) is running\n");
+#else
+        // It should not go here in Win64/win32
+        err = VMX_FAIL_VALID;
+        hax_error("NO VMXOFF.......\n");
+#endif
+    }
+    cpu_data->vmxoff_err = err;
+
+    return err;
+}
+
+vmx_error_t cpu_vmxroot_enter(void)
+{
+    struct per_cpu_data *cpu_data = current_cpu_data();
+    uint64 fc_msr;
+    vmx_error_t err = VMX_SUCCEED;
+
+    cpu_data->host_cr4_vmxe = (get_cr4() & CR4_VMXE);
+    if (cpu_data->host_cr4_vmxe) {
+        if (debug_vmcs_count % 100000 == 0) {
+            hax_debug("host VT has enabled!\n");
+            hax_debug("Cr4 value = 0x%lx\n", get_cr4());
+            log_host_cr4_vmxe = 1;
+            log_host_cr4 = get_cr4();
+        }
+        debug_vmcs_count++;
+    }
+
+    set_cr4(get_cr4() | CR4_VMXE);
+    /* HP systems & Mac systems workaround
+     * When resuming from S3, some HP/Mac set the IA32_FEATURE_CONTROL MSR to
+     * zero. Setting the lock bit to zero & then doing 'vmxon' would cause a GP.
+     * As a workaround, when we see this condition, we enable the bits so that
+     * we can launch vmxon & thereby hax.
+     * bit 0 - Lock bit
+     * bit 2 - Enable VMX outside SMX operation
+     *
+     * ********* To Do **************************************
+     * This is the workground to fix BSOD when resume from S3
+     * The best way is to add one power management handler, and set
+     * IA32_FEATURE_CONTROL MSR in that PM S3 handler
+     * *****************************************************
+     */
+    fc_msr = ia32_rdmsr(IA32_FEATURE_CONTROL);
+    if (!(fc_msr & FC_LOCKED))
+        ia32_wrmsr(IA32_FEATURE_CONTROL,
+                   fc_msr | FC_LOCKED | FC_VMXON_OUTSMX);
+
+    err = __vmxon(hax_page_pa(cpu_data->vmxon_page));
+
+    log_vmxon_err = err;
+    log_vmxon_addr = hax_page_pa(cpu_data->vmxon_page);
+
+    if (!(err & VMX_FAIL_MASK)) {
+        cpu_data->vmm_flag |= VMXON_HAX;
+    } else {
+        bool fatal = true;
+
+#ifdef __MACH__
+        if ((err & VMX_FAIL_INVALID) && cpu_data->host_cr4_vmxe) {
+            // On macOS, if VMXON fails with VMX_FAIL_INVALID and host CR4.VMXE
+            // was already set, it is very likely that another VMM (VirtualBox
+            // or any VMM based on macOS Hypervisor Framework, e.g. Docker) is
+            // running and did not call VMXOFF. In that case, the current host
+            // logical processor is already in VMX operation, and we can use an
+            // innocuous VMX instruction (VMPTRST) to confirm that.
+            // However, if the above assumption is wrong and the host processor
+            // is not actually in VMX operation, VMPTRST will probably cause a
+            // host reboot. But we don't have a better choice, and it is worth
+            // taking the risk.
+            __vmptrst();
+
+            // It is still alive - Just assumption is right.
+            fatal = false;
+            err = VMX_SUCCEED;
+            // Indicate that it is not necessary to call VMXOFF later
+            cpu_data->vmm_flag &= ~VMXON_HAX;
+        }
+#endif
+
+        if (fatal) {
+            hax_error("VMXON failed for region 0x%llx (err=0x%x, vmxe=%x)\n",
+                      hax_page_pa(cpu_data->vmxon_page), (uint32)err,
+                      (uint32)cpu_data->host_cr4_vmxe);
+            restore_host_cr4_vmxe(cpu_data);
+            if (err & VMX_FAIL_INVALID) {
+                log_vmxon_err_type1 = 1;
+            } else {
+                // TODO: Should VMX_FAIL_VALID be ignored? The current VMCS can
+                // be cleared (deactivated and saved to memory) using VMCLEAR
+                log_vmxon_err_type2 = 1;
+            }
+        }
+    }
+    cpu_data->vmxon_err = err;
+    return err;
+}
diff --git a/core/ept.c b/core/ept.c
@@ -330,22 +330,13 @@ static void invept_smpfunc(struct invept_bundle *bundle)
 
     smp_mb();
     cpu_data = current_cpu_data();
-    cpu_data->vmxon_err = VMX_SUCCEED;
-    cpu_data->vmxoff_err = VMX_SUCCEED;
     cpu_data->invept_err = VMX_SUCCEED;
 
-    cpu_data->host_cr4_vmxe = get_cr4() & CR4_VMXE;
-    set_cr4(get_cr4() | CR4_VMXE);
-    cpu_data->vmxon_err = __vmxon(hax_page_pa(cpu_data->vmxon_page));
+    cpu_vmxroot_enter();
 
     if (!(cpu_data->vmxon_err & VMX_FAIL_MASK)) {
         cpu_data->invept_err = __invept(bundle->type, bundle->desc);
-        cpu_data->vmxoff_err = __vmxoff();
-        if (cpu_data->host_cr4_vmxe) {
-            set_cr4(get_cr4() | CR4_VMXE);
-        } else {
-            set_cr4(get_cr4() & ~CR4_VMXE);
-        }
+        cpu_vmxroot_leave();
     }
 }
 
diff --git a/core/include/cpu.h b/core/include/cpu.h
@@ -165,7 +165,6 @@ static vmcs_t * current_cpu_vmcs(void)
 
 void cpu_init_vmx(void *arg);
 void cpu_exit_vmx(void *arg);
-void cpu_enter_vmx(void *arg);
 
 void cpu_pmu_init(void *arg);
 
@@ -182,6 +181,9 @@ vmx_error_t vmptrld(paddr_t vmcs, struct vcpu_t *vcpu);
 vmx_error_t resume(paddr_t vmcs, struct vcpu_t *vcpu);
 vmx_error_t launch(paddr_t vmcs, struct vcpu_t *vcpu);
 
+vmx_error_t cpu_vmxroot_leave(void);
+vmx_error_t cpu_vmxroot_enter(void);
+
 extern struct hax_page *io_bitmap_page_a;
 extern struct hax_page *io_bitmap_page_b;
 extern struct hax_page *msr_bitmap_page;