diff options
Diffstat (limited to 'target/i386/hvf/hvf.c')
-rw-r--r-- | target/i386/hvf/hvf.c | 721 |
1 files changed, 230 insertions, 491 deletions
diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index e193022c03..11ffdd4c69 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -13,10 +13,10 @@ * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * This file contain code under public domain from the hvdos project: * https://github.com/mist64/hvdos @@ -45,11 +45,15 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" +#include "sysemu/runstate.h" +#include "sysemu/cpus.h" #include "hvf-i386.h" #include "vmcs.h" #include "vmx.h" @@ -63,165 +67,13 @@ #include <Hypervisor/hv.h> #include <Hypervisor/hv_vmx.h> +#include <sys/sysctl.h> -#include "exec/address-spaces.h" #include "hw/i386/apic_internal.h" -#include "hw/boards.h" #include "qemu/main-loop.h" -#include "sysemu/accel.h" -#include "sysemu/sysemu.h" +#include "qemu/accel.h" #include "target/i386/cpu.h" -HVFState *hvf_state; - -static void assert_hvf_ok(hv_return_t ret) -{ - if (ret == HV_SUCCESS) { - return; - } - - switch (ret) { - case HV_ERROR: - error_report("Error: HV_ERROR"); - break; - case HV_BUSY: - error_report("Error: HV_BUSY"); - break; - case HV_BAD_ARGUMENT: - error_report("Error: HV_BAD_ARGUMENT"); - break; - case HV_NO_RESOURCES: - error_report("Error: HV_NO_RESOURCES"); - break; - case HV_NO_DEVICE: - error_report("Error: HV_NO_DEVICE"); - break; - case HV_UNSUPPORTED: - error_report("Error: HV_UNSUPPORTED"); - break; - default: - error_report("Unknown Error"); - } - - abort(); -} - -/* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) -{ - hvf_slot *slot; - int x; - for (x = 0; x < hvf_state->num_slots; ++x) { - slot = &hvf_state->slots[x]; - if (slot->size && start < (slot->start + slot->size) && - end > slot->start) { - return slot; - } - } - return NULL; -} - -struct mac_slot { - int present; - uint64_t size; - uint64_t gpa_start; - uint64_t gva; -}; - -struct mac_slot mac_slots[32]; -#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) - -static int do_hvf_set_memory(hvf_slot *slot) -{ - struct mac_slot *macslot; - hv_memory_flags_t flags; - hv_return_t ret; - - macslot = &mac_slots[slot->slot_id]; - - if (macslot->present) { - if (macslot->size != slot->size) { - macslot->present = 0; - ret = hv_vm_unmap(macslot->gpa_start, macslot->size); - assert_hvf_ok(ret); - } - } - - if (!slot->size) { - return 0; - } - - flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - - macslot->present = 1; - macslot->gpa_start = slot->start; - macslot->size = slot->size; - ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); - assert_hvf_ok(ret); - return 0; -} - -void hvf_set_phys_mem(MemoryRegionSection *section, bool add) -{ - hvf_slot *mem; - MemoryRegion *area = section->mr; - - if (!memory_region_is_ram(area)) { - return; - } - - mem = hvf_find_overlap_slot( - section->offset_within_address_space, - section->offset_within_address_space + int128_get64(section->size)); - - if (mem && add) { - if (mem->size == int128_get64(section->size) && - mem->start == section->offset_within_address_space && - mem->mem == (memory_region_get_ram_ptr(area) + - section->offset_within_region)) { - return; /* Same region was attempted to register, go away. */ - } - } - - /* Region needs to be reset. set the size to 0 and remap it. */ - if (mem) { - mem->size = 0; - if (do_hvf_set_memory(mem)) { - error_report("Failed to reset overlapping slot"); - abort(); - } - } - - if (!add) { - return; - } - - /* Now make a new slot. */ - int x; - - for (x = 0; x < hvf_state->num_slots; ++x) { - mem = &hvf_state->slots[x]; - if (!mem->size) { - break; - } - } - - if (x == hvf_state->num_slots) { - error_report("No free slots"); - abort(); - } - - mem->size = int128_get64(section->size); - mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; - mem->start = section->offset_within_address_space; - mem->region = area; - - if (do_hvf_set_memory(mem)) { - error_report("Error registering new memory slot"); - abort(); - } -} - void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ @@ -229,32 +81,24 @@ void vmx_update_tpr(CPUState *cpu) int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); - wreg(cpu->hvf_fd, HV_X86_TPR, tpr); + wreg(cpu->accel->fd, HV_X86_TPR, tpr); if (irr == -1) { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0); } else { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : irr >> 4); } } -void update_apic_tpr(CPUState *cpu) +static void update_apic_tpr(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); - int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; + int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4; cpu_set_apic_tpr(x86_cpu->apic_state, tpr); } #define VECTORING_INFO_VECTOR_MASK 0xff -static void hvf_handle_interrupt(CPUState * cpu, int mask) -{ - cpu->interrupt_request |= mask; - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - } -} - void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, int direction, int size, int count) { @@ -269,48 +113,6 @@ void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, } } -/* TODO: synchronize vcpu state */ -static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - if (cpu_state->vcpu_dirty == 0) { - hvf_get_registers(cpu_state); - } - - cpu_state->vcpu_dirty = 1; -} - -void hvf_cpu_synchronize_state(CPUState *cpu_state) -{ - if (cpu_state->vcpu_dirty == 0) { - run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - hvf_put_registers(cpu_state); - cpu_state->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) -{ - run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - hvf_put_registers(cpu_state); - cpu_state->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_init(CPUState *cpu_state) -{ - run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) { int read, write; @@ -345,202 +147,96 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } - return !slot; + if (!slot) { + return true; + } + if (!memory_region_is_ram(slot->region) && + !(read && memory_region_is_romd(slot->region))) { + return true; + } + return false; } -static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) +void hvf_arch_vcpu_destroy(CPUState *cpu) { - hvf_slot *slot; - - slot = hvf_find_overlap_slot( - section->offset_within_address_space, - section->offset_within_address_space + int128_get64(section->size)); - - /* protect region against writes; begin tracking it */ - if (on) { - slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ); - /* stop tracking region*/ - } else { - slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE); - } + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + g_free(env->hvf_mmio_buf); } -static void hvf_log_start(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) +static void init_tsc_freq(CPUX86State *env) { - if (old != 0) { + size_t length; + uint64_t tsc_freq; + + if (env->tsc_khz != 0) { return; } - hvf_set_dirty_tracking(section, 1); + length = sizeof(uint64_t); + if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) { + return; + } + env->tsc_khz = tsc_freq / 1000; /* Hz to KHz */ } -static void hvf_log_stop(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) +static void init_apic_bus_freq(CPUX86State *env) { - if (new != 0) { + size_t length; + uint64_t bus_freq; + + if (env->apic_bus_freq != 0) { return; } - hvf_set_dirty_tracking(section, 0); -} - -static void hvf_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - /* - * sync of dirty pages is handled elsewhere; just make sure we keep - * tracking the region. - */ - hvf_set_dirty_tracking(section, 1); + length = sizeof(uint64_t); + if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) { + return; + } + env->apic_bus_freq = bus_freq; } -static void hvf_region_add(MemoryListener *listener, - MemoryRegionSection *section) +static inline bool tsc_is_known(CPUX86State *env) { - hvf_set_phys_mem(section, true); + return env->tsc_khz != 0; } -static void hvf_region_del(MemoryListener *listener, - MemoryRegionSection *section) +static inline bool apic_bus_freq_is_known(CPUX86State *env) { - hvf_set_phys_mem(section, false); -} - -static MemoryListener hvf_memory_listener = { - .priority = 10, - .region_add = hvf_region_add, - .region_del = hvf_region_del, - .log_start = hvf_log_start, - .log_stop = hvf_log_stop, - .log_sync = hvf_log_sync, -}; - -void hvf_reset_vcpu(CPUState *cpu) { - - /* TODO: this shouldn't be needed; there is already a call to - * cpu_synchronize_all_post_reset in vl.c - */ - wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); - macvm_set_cr0(cpu->hvf_fd, 0x60000010); - - wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); - wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); - - /* set VMCS guest state fields */ - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); - - /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); - - wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); - wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); - wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); - wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); - wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); - wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); - wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); - - for (int i = 0; i < 8; i++) { - wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); - } - - hv_vm_sync_tsc(0); - cpu->halted = 0; - hv_vcpu_invalidate_tlb(cpu->hvf_fd); - hv_vcpu_flush(cpu->hvf_fd); + return env->apic_bus_freq != 0; } -void hvf_vcpu_destroy(CPUState *cpu) +void hvf_kick_vcpu_thread(CPUState *cpu) { - hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); - assert_hvf_ok(ret); + cpus_kick_thread(cpu); } -static void dummy_signal(int sig) +int hvf_arch_init(void) { + return 0; } -int hvf_init_vcpu(CPUState *cpu) +int hvf_arch_init_vcpu(CPUState *cpu) { - X86CPU *x86cpu = X86_CPU(cpu); CPUX86State *env = &x86cpu->env; - int r; - - /* init cpu signals */ - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); + uint64_t reqCap; init_emu(); init_decoder(); hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); - env->hvf_emul = g_new0(HVFX86EmulatorState, 1); + env->hvf_mmio_buf = g_new(char, 4096); - r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); - cpu->vcpu_dirty = 1; - assert_hvf_ok(r); + if (x86cpu->vmware_cpuid_freq) { + init_tsc_freq(env); + init_apic_bus_freq(env); + + if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + error_report("vmware-cpuid-freq: feature couldn't be enabled"); + } + } if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, &hvf_state->hvf_caps->vmx_cap_pinbased)) { @@ -560,45 +256,57 @@ int hvf_init_vcpu(CPUState *cpu) } /* set VMCS control fields */ - wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, + wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, - VMCS_PIN_BASED_CTLS_EXTINT | - VMCS_PIN_BASED_CTLS_NMI | - VMCS_PIN_BASED_CTLS_VNMI)); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, + VMCS_PIN_BASED_CTLS_EXTINT | + VMCS_PIN_BASED_CTLS_NMI | + VMCS_PIN_BASED_CTLS_VNMI)); + wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, - VMCS_PRI_PROC_BASED_CTLS_HLT | - VMCS_PRI_PROC_BASED_CTLS_MWAIT | - VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | - VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | + VMCS_PRI_PROC_BASED_CTLS_HLT | + VMCS_PRI_PROC_BASED_CTLS_MWAIT | + VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | + VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); - wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, - cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, - VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); - wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, - 0)); - wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ + reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES; + + /* Is RDTSCP support in CPUID? If so, enable it in the VMCS. */ + if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { + reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP; + } - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS, + cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap)); - hvf_reset_vcpu(cpu); + wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS, + cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0)); + wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ + + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0); x86cpu = X86_CPU(cpu); - x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); - - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); - /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); + x86cpu->env.xsave_buf_len = 4096; + x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len); + + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len); + + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1); return 0; } @@ -608,9 +316,13 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - env->exception_injected = -1; + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; env->interrupt_injected = -1; env->nmi_injected = false; + env->ins_len = 0; + env->has_error_code = false; if (idtvec_info & VMCS_IDT_VEC_VALID) { switch (idtvec_info & VMCS_IDT_VEC_TYPE) { case VMCS_IDT_VEC_HWINTR: @@ -622,7 +334,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in break; case VMCS_IDT_VEC_HWEXCEPTION: case VMCS_IDT_VEC_SWEXCEPTION: - env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_injected = 1; break; case VMCS_IDT_VEC_PRIV_SWEXCEPTION: default: @@ -632,18 +345,18 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { env->ins_len = ins_len; } - if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; - env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); + env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR); } } - if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) & VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { env->hflags2 |= HF2_NMI_MASK; } else { env->hflags2 &= ~HF2_NMI_MASK; } - if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { env->hflags |= HF_INHIBIT_IRQ_MASK; @@ -652,6 +365,48 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in } } +static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + /* + * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs, + * leafs 0x40000001-0x4000000F are filled with zeros + * Provides vmware-cpuid-freq support to hvf + * + * Note: leaf 0x40000000 not exposes HVF, + * leaving hypervisor signature empty + */ + + if (index < 0x40000000 || index > 0x40000010 || + !tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + + cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx); + return; + } + + switch (index) { + case 0x40000000: + *eax = 0x40000010; /* Max available cpuid leaf */ + *ebx = 0; /* Leave signature empty */ + *ecx = 0; + *edx = 0; + break; + case 0x40000010: + *eax = env->tsc_khz; + *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ + *ecx = 0; + *edx = 0; + break; + default: + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + break; + } +} + int hvf_vcpu_exec(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -659,8 +414,6 @@ int hvf_vcpu_exec(CPUState *cpu) int ret = 0; uint64_t rip = 0; - cpu->halted = 0; - if (hvf_process_events(cpu)) { return EXCP_HLT; } @@ -676,29 +429,28 @@ int hvf_vcpu_exec(CPUState *cpu) } vmx_update_tpr(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { - qemu_mutex_lock_iothread(); + bql_lock(); return EXCP_HLT; } - hv_return_t r = hv_vcpu_run(cpu->hvf_fd); + hv_return_t r = hv_vcpu_run(cpu->accel->fd); assert_hvf_ok(r); /* handle VMEXIT */ - uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); - uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); - uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, + uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON); + uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION); + uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd, VMCS_EXIT_INSTRUCTION_LENGTH); - uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO); hvf_store_events(cpu, ins_len, idtvec_info); - rip = rreg(cpu->hvf_fd, HV_X86_RIP); - RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); - env->eflags = RFLAGS(env); + rip = rreg(cpu->accel->fd, HV_X86_RIP); + env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS); - qemu_mutex_lock_iothread(); + bql_lock(); update_apic_tpr(cpu); current_cpu = cpu; @@ -708,11 +460,12 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_HLT: { macvm_set_rip(cpu, rip + ins_len); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (EFLAGS(env) & IF_MASK)) + (env->eflags & IF_MASK)) && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && !(idtvec_info & VMCS_IDT_VEC_VALID)) { cpu->halted = 1; ret = EXCP_HLT; + break; } ret = EXCP_INTERRUPT; break; @@ -721,25 +474,23 @@ int hvf_vcpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - /* Need to check if MMIO or unmmaped fault */ + /* Need to check if MMIO or unmapped fault */ case EXIT_REASON_EPT_FAULT: { hvf_slot *slot; - uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); + uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS); if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { vmx_set_nmi_blocking(cpu); } - slot = hvf_find_overlap_slot(gpa, gpa); + slot = hvf_find_overlap_slot(gpa, 1); /* mmio */ if (ept_emulation_fault(slot, gpa, exit_qual)) { struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); exec_instruction(env, &decode); store_regs(cpu); @@ -768,11 +519,11 @@ int hvf_vcpu_exec(CPUState *cpu) } else { RAX(env) = (uint64_t)val; } - RIP(env) += ins_len; + env->eip += ins_len; store_regs(cpu); break; } else if (!string && !in) { - RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); + RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX); hvf_handle_io(env, port, &RAX(env), 1, size, 1); macvm_set_rip(cpu, rip + ins_len); break; @@ -780,8 +531,6 @@ int hvf_vcpu_exec(CPUState *cpu) struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); assert(ins_len == decode.len); exec_instruction(env, &decode); @@ -790,17 +539,21 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_CPUID: { - uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); - uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); - - cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); + uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX); + uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX); + uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX); + uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX); + + if (rax == 1) { + /* CPUID1.ecx.OSXSAVE needs to know CR4 */ + env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4); + } + hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); - wreg(cpu->hvf_fd, HV_X86_RAX, rax); - wreg(cpu->hvf_fd, HV_X86_RBX, rbx); - wreg(cpu->hvf_fd, HV_X86_RCX, rcx); - wreg(cpu->hvf_fd, HV_X86_RDX, rdx); + wreg(cpu->accel->fd, HV_X86_RAX, rax); + wreg(cpu->accel->fd, HV_X86_RBX, rbx); + wreg(cpu->accel->fd, HV_X86_RCX, rcx); + wreg(cpu->accel->fd, HV_X86_RDX, rdx); macvm_set_rip(cpu, rip + ins_len); break; @@ -808,16 +561,16 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_XSETBV: { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); + uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX); + uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX); + uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX); if (ecx) { macvm_set_rip(cpu, rip + ins_len); break; } env->xcr0 = ((uint64_t)edx << 32) | eax; - wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); + wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1); macvm_set_rip(cpu, rip + ins_len); break; } @@ -838,11 +591,11 @@ int hvf_vcpu_exec(CPUState *cpu) { load_regs(cpu); if (exit_reason == EXIT_REASON_RDMSR) { - simulate_rdmsr(cpu); + simulate_rdmsr(env); } else { - simulate_wrmsr(cpu); + simulate_wrmsr(env); } - RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); + env->eip += ins_len; store_regs(cpu); break; } @@ -856,11 +609,11 @@ int hvf_vcpu_exec(CPUState *cpu) switch (cr) { case 0x0: { - macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr0(cpu->accel->fd, RRX(env, reg)); break; } case 4: { - macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr4(cpu->accel->fd, RRX(env, reg)); break; } case 8: { @@ -878,7 +631,7 @@ int hvf_vcpu_exec(CPUState *cpu) error_report("Unrecognized CR %d", cr); abort(); } - RIP(env) += ins_len; + env->eip += ins_len; store_regs(cpu); break; } @@ -886,8 +639,6 @@ int hvf_vcpu_exec(CPUState *cpu) struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); exec_instruction(env, &decode); store_regs(cpu); @@ -898,7 +649,7 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_TASK_SWITCH: { - uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO); x68_segment_selector sel = {.sel = exit_qual & 0xffff}; vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo @@ -911,12 +662,13 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_RDPMC: - wreg(cpu->hvf_fd, HV_X86_RAX, 0); - wreg(cpu->hvf_fd, HV_X86_RDX, 0); + wreg(cpu->accel->fd, HV_X86_RAX, 0); + wreg(cpu->accel->fd, HV_X86_RDX, 0); macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: - env->exception_injected = EXCP0D_GPF; + env->exception_nr = EXCP0D_GPF; + env->exception_injected = 1; env->has_error_code = true; env->error_code = 0; break; @@ -928,48 +680,35 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -bool hvf_allowed; +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} + +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} -static int hvf_accel_init(MachineState *ms) +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) { - int x; - hv_return_t ret; - HVFState *s; - - ret = hv_vm_create(HV_VM_DEFAULT); - assert_hvf_ok(ret); - - s = g_new0(HVFState, 1); - - s->num_slots = 32; - for (x = 0; x < s->num_slots; ++x) { - s->slots[x].size = 0; - s->slots[x].slot_id = x; - } - - hvf_state = s; - cpu_interrupt_handler = hvf_handle_interrupt; - memory_listener_register(&hvf_memory_listener, &address_space_memory); - return 0; + return -ENOSYS; } -static void hvf_accel_class_init(ObjectClass *oc, void *data) +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) { - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HVF"; - ac->init_machine = hvf_accel_init; - ac->allowed = &hvf_allowed; + return -ENOSYS; } -static const TypeInfo hvf_accel_type = { - .name = TYPE_HVF_ACCEL, - .parent = TYPE_ACCEL, - .class_init = hvf_accel_class_init, -}; +void hvf_arch_remove_all_hw_breakpoints(void) +{ +} -static void hvf_type_init(void) +void hvf_arch_update_guest_debug(CPUState *cpu) { - type_register_static(&hvf_accel_type); } -type_init(hvf_type_init); +inline bool hvf_arch_supports_guest_debug(void) +{ + return false; +} |