diff options
Diffstat (limited to 'target/i386')
133 files changed, 38292 insertions, 22551 deletions
diff --git a/target/i386/Kconfig b/target/i386/Kconfig new file mode 100644 index 0000000000..ce6968906e --- /dev/null +++ b/target/i386/Kconfig @@ -0,0 +1,5 @@ +config I386 + bool + +config X86_64 + bool diff --git a/target/i386/Makefile.objs b/target/i386/Makefile.objs deleted file mode 100644 index 04678f5503..0000000000 --- a/target/i386/Makefile.objs +++ /dev/null @@ -1,19 +0,0 @@ -obj-y += helper.o cpu.o gdbstub.o xsave_helper.o -obj-$(CONFIG_TCG) += translate.o -obj-$(CONFIG_TCG) += bpt_helper.o cc_helper.o excp_helper.o fpu_helper.o -obj-$(CONFIG_TCG) += int_helper.o mem_helper.o misc_helper.o mpx_helper.o -obj-$(CONFIG_TCG) += seg_helper.o smm_helper.o svm_helper.o -obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o monitor.o -obj-$(CONFIG_KVM) += kvm.o hyperv.o -obj-$(CONFIG_SEV) += sev.o -obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(call lnot,$(CONFIG_SEV)) += sev-stub.o -# HAX support -ifdef CONFIG_WIN32 -obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-windows.o -endif -ifdef CONFIG_DARWIN -obj-$(CONFIG_HAX) += hax-all.o hax-mem.o hax-darwin.o -obj-$(CONFIG_HVF) += hvf/ -endif -obj-$(CONFIG_WHPX) += whpx-all.o diff --git a/target/i386/TODO b/target/i386/TODO deleted file mode 100644 index a8d69cf87f..0000000000 --- a/target/i386/TODO +++ /dev/null @@ -1,31 +0,0 @@ -Correctness issues: - -- some eflags manipulation incorrectly reset the bit 0x2. -- SVM: test, cpu save/restore, SMM save/restore. -- x86_64: lcall/ljmp intel/amd differences ? -- better code fetch (different exception handling + CS.limit support) -- user/kernel PUSHL/POPL in helper.c -- add missing cpuid tests -- return UD exception if LOCK prefix incorrectly used -- test ldt limit < 7 ? -- fix some 16 bit sp push/pop overflow (pusha/popa, lcall lret) -- full support of segment limit/rights -- full x87 exception support -- improve x87 bit exactness (use bochs code ?) -- DRx register support -- CR0.AC emulation -- SSE alignment checks - -Optimizations/Features: - -- add SVM nested paging support -- add VMX support -- add AVX support -- add SSE5 support -- fxsave/fxrstor AMD extensions -- improve monitor/mwait support -- faster EFLAGS update: consider SZAP, C, O can be updated separately - with a bit field in CC_OP and more state variables. -- evaluate x87 stack pointer statically -- find a way to avoid translating several time the same TB if CR0.TS - is set or not. diff --git a/target/i386/arch_dump.c b/target/i386/arch_dump.c index 004141fc04..c290910a04 100644 --- a/target/i386/arch_dump.c +++ b/target/i386/arch_dump.c @@ -42,7 +42,7 @@ typedef struct { static int x86_64_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, int id, - void *opaque) + DumpState *s) { x86_64_user_regs_struct regs; Elf64_Nhdr *note; @@ -94,7 +94,7 @@ static int x86_64_write_elf64_note(WriteCoreDumpFunction f, buf += descsz - sizeof(x86_64_user_regs_struct)-sizeof(target_ulong); memcpy(buf, ®s, sizeof(x86_64_user_regs_struct)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -148,7 +148,7 @@ static void x86_fill_elf_prstatus(x86_elf_prstatus *prstatus, CPUX86State *env, } static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, - int id, void *opaque) + int id, DumpState *s) { x86_elf_prstatus prstatus; Elf64_Nhdr *note; @@ -170,7 +170,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, buf += ROUND_UP(name_size, 4); memcpy(buf, &prstatus, sizeof(prstatus)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -180,7 +180,7 @@ static int x86_write_elf64_note(WriteCoreDumpFunction f, CPUX86State *env, } int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { X86CPU *cpu = X86_CPU(cs); int ret; @@ -189,10 +189,10 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, bool lma = !!(first_x86_cpu->env.hflags & HF_LMA_MASK); if (lma) { - ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, opaque); + ret = x86_64_write_elf64_note(f, &cpu->env, cpuid, s); } else { #endif - ret = x86_write_elf64_note(f, &cpu->env, cpuid, opaque); + ret = x86_write_elf64_note(f, &cpu->env, cpuid, s); #ifdef TARGET_X86_64 } #endif @@ -201,7 +201,7 @@ int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cs, } int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, - int cpuid, void *opaque) + int cpuid, DumpState *s) { X86CPU *cpu = X86_CPU(cs); x86_elf_prstatus prstatus; @@ -224,7 +224,7 @@ int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cs, buf += ROUND_UP(name_size, 4); memcpy(buf, &prstatus, sizeof(prstatus)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -329,7 +329,7 @@ static void qemu_get_cpustate(QEMUCPUState *s, CPUX86State *env) static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, CPUX86State *env, - void *opaque, + DumpState *s, int type) { QEMUCPUState state; @@ -369,7 +369,7 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, buf += ROUND_UP(name_size, 4); memcpy(buf, &state, sizeof(state)); - ret = f(note, note_size, opaque); + ret = f(note, note_size, s); g_free(note); if (ret < 0) { return -1; @@ -379,19 +379,19 @@ static inline int cpu_write_qemu_note(WriteCoreDumpFunction f, } int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cs, - void *opaque) + DumpState *s) { X86CPU *cpu = X86_CPU(cs); - return cpu_write_qemu_note(f, &cpu->env, opaque, 1); + return cpu_write_qemu_note(f, &cpu->env, s, 1); } int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cs, - void *opaque) + DumpState *s) { X86CPU *cpu = X86_CPU(cs); - return cpu_write_qemu_note(f, &cpu->env, opaque, 0); + return cpu_write_qemu_note(f, &cpu->env, s, 0); } int cpu_get_dump_info(ArchDumpInfo *info, diff --git a/target/i386/arch_memory_mapping.c b/target/i386/arch_memory_mapping.c index 271cb5e41b..d1ff659128 100644 --- a/target/i386/arch_memory_mapping.c +++ b/target/i386/arch_memory_mapping.c @@ -266,7 +266,7 @@ static void walk_pml5e(MemoryMappingList *list, AddressSpace *as, } #endif -void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, +bool x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, Error **errp) { X86CPU *cpu = X86_CPU(cs); @@ -275,7 +275,7 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, if (!cpu_paging_enabled(cs)) { /* paging is disabled */ - return; + return true; } a20_mask = x86_get_a20_mask(env); @@ -310,5 +310,7 @@ void x86_cpu_get_memory_mapping(CPUState *cs, MemoryMappingList *list, pse = !!(env->cr[4] & CR4_PSE_MASK); walk_pde2(list, cs->as, pde_addr, a20_mask, pse); } + + return true; } diff --git a/target/i386/cpu-dump.c b/target/i386/cpu-dump.c new file mode 100644 index 0000000000..40697064d9 --- /dev/null +++ b/target/i386/cpu-dump.c @@ -0,0 +1,567 @@ +/* + * i386 CPU dump to FILE + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "qemu/qemu-print.h" +#ifndef CONFIG_USER_ONLY +#include "hw/i386/apic_internal.h" +#endif + +/***********************************************************/ +/* x86 debug */ + +static const char *cc_op_str[CC_OP_NB] = { + "DYNAMIC", + "EFLAGS", + + "MULB", + "MULW", + "MULL", + "MULQ", + + "ADDB", + "ADDW", + "ADDL", + "ADDQ", + + "ADCB", + "ADCW", + "ADCL", + "ADCQ", + + "SUBB", + "SUBW", + "SUBL", + "SUBQ", + + "SBBB", + "SBBW", + "SBBL", + "SBBQ", + + "LOGICB", + "LOGICW", + "LOGICL", + "LOGICQ", + + "INCB", + "INCW", + "INCL", + "INCQ", + + "DECB", + "DECW", + "DECL", + "DECQ", + + "SHLB", + "SHLW", + "SHLL", + "SHLQ", + + "SARB", + "SARW", + "SARL", + "SARQ", + + "BMILGB", + "BMILGW", + "BMILGL", + "BMILGQ", + + "ADCX", + "ADOX", + "ADCOX", + + "CLR", +}; + +static void +cpu_x86_dump_seg_cache(CPUX86State *env, FILE *f, + const char *name, struct SegmentCache *sc) +{ +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + qemu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name, + sc->selector, sc->base, sc->limit, + sc->flags & 0x00ffff00); + } else +#endif + { + qemu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector, + (uint32_t)sc->base, sc->limit, + sc->flags & 0x00ffff00); + } + + if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK)) + goto done; + + qemu_fprintf(f, " DPL=%d ", + (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT); + if (sc->flags & DESC_S_MASK) { + if (sc->flags & DESC_CS_MASK) { + qemu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" : + ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16")); + qemu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-', + (sc->flags & DESC_R_MASK) ? 'R' : '-'); + } else { + qemu_fprintf(f, (sc->flags & DESC_B_MASK + || env->hflags & HF_LMA_MASK) + ? "DS " : "DS16"); + qemu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-', + (sc->flags & DESC_W_MASK) ? 'W' : '-'); + } + qemu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-'); + } else { + static const char *sys_type_name[2][16] = { + { /* 32 bit mode */ + "Reserved", "TSS16-avl", "LDT", "TSS16-busy", + "CallGate16", "TaskGate", "IntGate16", "TrapGate16", + "Reserved", "TSS32-avl", "Reserved", "TSS32-busy", + "CallGate32", "Reserved", "IntGate32", "TrapGate32" + }, + { /* 64 bit mode */ + "<hiword>", "Reserved", "LDT", "Reserved", "Reserved", + "Reserved", "Reserved", "Reserved", "Reserved", + "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64", + "Reserved", "IntGate64", "TrapGate64" + } + }; + qemu_fprintf(f, "%s", + sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0] + [(sc->flags & DESC_TYPE_MASK) >> DESC_TYPE_SHIFT]); + } +done: + qemu_fprintf(f, "\n"); +} + +#ifndef CONFIG_USER_ONLY + +/* ARRAY_SIZE check is not required because + * DeliveryMode(dm) has a size of 3 bit. + */ +static inline const char *dm2str(uint32_t dm) +{ + static const char *str[] = { + "Fixed", + "...", + "SMI", + "...", + "NMI", + "INIT", + "...", + "ExtINT" + }; + return str[dm]; +} + +static void dump_apic_lvt(const char *name, uint32_t lvt, bool is_timer) +{ + uint32_t dm = (lvt & APIC_LVT_DELIV_MOD) >> APIC_LVT_DELIV_MOD_SHIFT; + qemu_printf("%s\t 0x%08x %s %-5s %-6s %-7s %-12s %-6s", + name, lvt, + lvt & APIC_LVT_INT_POLARITY ? "active-lo" : "active-hi", + lvt & APIC_LVT_LEVEL_TRIGGER ? "level" : "edge", + lvt & APIC_LVT_MASKED ? "masked" : "", + lvt & APIC_LVT_DELIV_STS ? "pending" : "", + !is_timer ? + "" : lvt & APIC_LVT_TIMER_PERIODIC ? + "periodic" : lvt & APIC_LVT_TIMER_TSCDEADLINE ? + "tsc-deadline" : "one-shot", + dm2str(dm)); + if (dm != APIC_DM_NMI) { + qemu_printf(" (vec %u)\n", lvt & APIC_VECTOR_MASK); + } else { + qemu_printf("\n"); + } +} + +/* ARRAY_SIZE check is not required because + * destination shorthand has a size of 2 bit. + */ +static inline const char *shorthand2str(uint32_t shorthand) +{ + const char *str[] = { + "no-shorthand", "self", "all-self", "all" + }; + return str[shorthand]; +} + +static inline uint8_t divider_conf(uint32_t divide_conf) +{ + uint8_t divide_val = ((divide_conf & 0x8) >> 1) | (divide_conf & 0x3); + + return divide_val == 7 ? 1 : 2 << divide_val; +} + +static inline void mask2str(char *str, uint32_t val, uint8_t size) +{ + while (size--) { + *str++ = (val >> size) & 1 ? '1' : '0'; + } + *str = 0; +} + +#define MAX_LOGICAL_APIC_ID_MASK_SIZE 16 + +static void dump_apic_icr(APICCommonState *s, CPUX86State *env) +{ + uint32_t icr = s->icr[0], icr2 = s->icr[1]; + uint8_t dest_shorthand = \ + (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; + bool logical_mod = icr & APIC_ICR_DEST_MOD; + char apic_id_str[MAX_LOGICAL_APIC_ID_MASK_SIZE + 1]; + uint32_t dest_field; + bool x2apic; + + qemu_printf("ICR\t 0x%08x %s %s %s %s\n", + icr, + logical_mod ? "logical" : "physical", + icr & APIC_ICR_TRIGGER_MOD ? "level" : "edge", + icr & APIC_ICR_LEVEL ? "assert" : "de-assert", + shorthand2str(dest_shorthand)); + + qemu_printf("ICR2\t 0x%08x", icr2); + if (dest_shorthand != 0) { + qemu_printf("\n"); + return; + } + x2apic = env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; + dest_field = x2apic ? icr2 : icr2 >> APIC_ICR_DEST_SHIFT; + + if (!logical_mod) { + if (x2apic) { + qemu_printf(" cpu %u (X2APIC ID)\n", dest_field); + } else { + qemu_printf(" cpu %u (APIC ID)\n", + dest_field & APIC_LOGDEST_XAPIC_ID); + } + return; + } + + if (s->dest_mode == 0xf) { /* flat mode */ + mask2str(apic_id_str, icr2 >> APIC_ICR_DEST_SHIFT, 8); + qemu_printf(" mask %s (APIC ID)\n", apic_id_str); + } else if (s->dest_mode == 0) { /* cluster mode */ + if (x2apic) { + mask2str(apic_id_str, dest_field & APIC_LOGDEST_X2APIC_ID, 16); + qemu_printf(" cluster %u mask %s (X2APIC ID)\n", + dest_field >> APIC_LOGDEST_X2APIC_SHIFT, apic_id_str); + } else { + mask2str(apic_id_str, dest_field & APIC_LOGDEST_XAPIC_ID, 4); + qemu_printf(" cluster %u mask %s (APIC ID)\n", + dest_field >> APIC_LOGDEST_XAPIC_SHIFT, apic_id_str); + } + } +} + +static void dump_apic_interrupt(const char *name, uint32_t *ireg_tab, + uint32_t *tmr_tab) +{ + int i, empty = true; + + qemu_printf("%s\t ", name); + for (i = 0; i < 256; i++) { + if (apic_get_bit(ireg_tab, i)) { + qemu_printf("%u%s ", i, + apic_get_bit(tmr_tab, i) ? "(level)" : ""); + empty = false; + } + } + qemu_printf("%s\n", empty ? "(none)" : ""); +} + +void x86_cpu_dump_local_apic_state(CPUState *cs, int flags) +{ + X86CPU *cpu = X86_CPU(cs); + APICCommonState *s = APIC_COMMON(cpu->apic_state); + if (!s) { + qemu_printf("local apic state not available\n"); + return; + } + uint32_t *lvt = s->lvt; + + qemu_printf("dumping local APIC state for CPU %-2u\n\n", + CPU(cpu)->cpu_index); + dump_apic_lvt("LVT0", lvt[APIC_LVT_LINT0], false); + dump_apic_lvt("LVT1", lvt[APIC_LVT_LINT1], false); + dump_apic_lvt("LVTPC", lvt[APIC_LVT_PERFORM], false); + dump_apic_lvt("LVTERR", lvt[APIC_LVT_ERROR], false); + dump_apic_lvt("LVTTHMR", lvt[APIC_LVT_THERMAL], false); + dump_apic_lvt("LVTT", lvt[APIC_LVT_TIMER], true); + + qemu_printf("Timer\t DCR=0x%x (divide by %u) initial_count = %u" + " current_count = %u\n", + s->divide_conf & APIC_DCR_MASK, + divider_conf(s->divide_conf), + s->initial_count, apic_get_current_count(s)); + + qemu_printf("SPIV\t 0x%08x APIC %s, focus=%s, spurious vec %u\n", + s->spurious_vec, + s->spurious_vec & APIC_SPURIO_ENABLED ? "enabled" : "disabled", + s->spurious_vec & APIC_SPURIO_FOCUS ? "on" : "off", + s->spurious_vec & APIC_VECTOR_MASK); + + dump_apic_icr(s, &cpu->env); + + qemu_printf("ESR\t 0x%08x\n", s->esr); + + dump_apic_interrupt("ISR", s->isr, s->tmr); + dump_apic_interrupt("IRR", s->irr, s->tmr); + + qemu_printf("\nAPR 0x%02x TPR 0x%02x DFR 0x%02x LDR 0x%02x", + s->arb_id, s->tpr, s->dest_mode, s->log_dest); + if (s->dest_mode == 0) { + qemu_printf("(cluster %u: id %u)", + s->log_dest >> APIC_LOGDEST_XAPIC_SHIFT, + s->log_dest & APIC_LOGDEST_XAPIC_ID); + } + qemu_printf(" PPR 0x%02x\n", apic_get_ppr(s)); +} + +#endif /* !CONFIG_USER_ONLY */ + +#define DUMP_CODE_BYTES_TOTAL 50 +#define DUMP_CODE_BYTES_BACKWARD 20 + +void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int eflags, i, nb; + char cc_op_name[32]; + static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; + + eflags = cpu_compute_eflags(env); +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + qemu_fprintf(f, "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n" + "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n" + "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n" + "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n" + "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + env->regs[R_EAX], + env->regs[R_EBX], + env->regs[R_ECX], + env->regs[R_EDX], + env->regs[R_ESI], + env->regs[R_EDI], + env->regs[R_EBP], + env->regs[R_ESP], + env->regs[8], + env->regs[9], + env->regs[10], + env->regs[11], + env->regs[12], + env->regs[13], + env->regs[14], + env->regs[15], + env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + cs->halted); + } else +#endif + { + qemu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n" + "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n" + "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", + (uint32_t)env->regs[R_EAX], + (uint32_t)env->regs[R_EBX], + (uint32_t)env->regs[R_ECX], + (uint32_t)env->regs[R_EDX], + (uint32_t)env->regs[R_ESI], + (uint32_t)env->regs[R_EDI], + (uint32_t)env->regs[R_EBP], + (uint32_t)env->regs[R_ESP], + (uint32_t)env->eip, eflags, + eflags & DF_MASK ? 'D' : '-', + eflags & CC_O ? 'O' : '-', + eflags & CC_S ? 'S' : '-', + eflags & CC_Z ? 'Z' : '-', + eflags & CC_A ? 'A' : '-', + eflags & CC_P ? 'P' : '-', + eflags & CC_C ? 'C' : '-', + env->hflags & HF_CPL_MASK, + (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, + (env->a20_mask >> 20) & 1, + (env->hflags >> HF_SMM_SHIFT) & 1, + cs->halted); + } + + for(i = 0; i < 6; i++) { + cpu_x86_dump_seg_cache(env, f, seg_name[i], &env->segs[i]); + } + cpu_x86_dump_seg_cache(env, f, "LDT", &env->ldt); + cpu_x86_dump_seg_cache(env, f, "TR", &env->tr); + +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + qemu_fprintf(f, "GDT= %016" PRIx64 " %08x\n", + env->gdt.base, env->gdt.limit); + qemu_fprintf(f, "IDT= %016" PRIx64 " %08x\n", + env->idt.base, env->idt.limit); + qemu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n", + (uint32_t)env->cr[0], + env->cr[2], + env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) + qemu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]); + qemu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n", + env->dr[6], env->dr[7]); + } else +#endif + { + qemu_fprintf(f, "GDT= %08x %08x\n", + (uint32_t)env->gdt.base, env->gdt.limit); + qemu_fprintf(f, "IDT= %08x %08x\n", + (uint32_t)env->idt.base, env->idt.limit); + qemu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n", + (uint32_t)env->cr[0], + (uint32_t)env->cr[2], + (uint32_t)env->cr[3], + (uint32_t)env->cr[4]); + for(i = 0; i < 4; i++) { + qemu_fprintf(f, "DR%d=" TARGET_FMT_lx " ", i, env->dr[i]); + } + qemu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n", + env->dr[6], env->dr[7]); + } + if (flags & CPU_DUMP_CCOP) { + if ((unsigned)env->cc_op < CC_OP_NB) + snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]); + else + snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); +#ifdef TARGET_X86_64 + if (env->hflags & HF_CS64_MASK) { + qemu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%s\n", + env->cc_src, env->cc_dst, + cc_op_name); + } else +#endif + { + qemu_fprintf(f, "CCS=%08x CCD=%08x CCO=%s\n", + (uint32_t)env->cc_src, (uint32_t)env->cc_dst, + cc_op_name); + } + } + qemu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer); + if (flags & CPU_DUMP_FPU) { + int fptag; + const uint64_t avx512_mask = XSTATE_OPMASK_MASK | \ + XSTATE_ZMM_Hi256_MASK | \ + XSTATE_Hi16_ZMM_MASK | \ + XSTATE_YMM_MASK | XSTATE_SSE_MASK, + avx_mask = XSTATE_YMM_MASK | XSTATE_SSE_MASK; + fptag = 0; + for(i = 0; i < 8; i++) { + fptag |= ((!env->fptags[i]) << i); + } + update_mxcsr_from_sse_status(env); + qemu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n", + env->fpuc, + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11, + env->fpstt, + fptag, + env->mxcsr); + for(i=0;i<8;i++) { + CPU_LDoubleU u; + u.d = env->fpregs[i].d; + qemu_fprintf(f, "FPR%d=%016" PRIx64 " %04x", + i, u.l.lower, u.l.upper); + if ((i & 1) == 1) + qemu_fprintf(f, "\n"); + else + qemu_fprintf(f, " "); + } + + if ((env->xcr0 & avx512_mask) == avx512_mask) { + /* XSAVE enabled AVX512 */ + for (i = 0; i < NB_OPMASK_REGS; i++) { + qemu_fprintf(f, "Opmask%02d=%016"PRIx64"%s", i, + env->opmask_regs[i], ((i & 3) == 3) ? "\n" : " "); + } + + nb = (env->hflags & HF_CS64_MASK) ? 32 : 8; + for (i = 0; i < nb; i++) { + qemu_fprintf(f, "ZMM%02d=%016"PRIx64" %016"PRIx64" %016"PRIx64 + " %016"PRIx64" %016"PRIx64" %016"PRIx64 + " %016"PRIx64" %016"PRIx64"\n", + i, + env->xmm_regs[i].ZMM_Q(7), + env->xmm_regs[i].ZMM_Q(6), + env->xmm_regs[i].ZMM_Q(5), + env->xmm_regs[i].ZMM_Q(4), + env->xmm_regs[i].ZMM_Q(3), + env->xmm_regs[i].ZMM_Q(2), + env->xmm_regs[i].ZMM_Q(1), + env->xmm_regs[i].ZMM_Q(0)); + } + } else if ((env->xcr0 & avx_mask) == avx_mask) { + /* XSAVE enabled AVX */ + nb = env->hflags & HF_CS64_MASK ? 16 : 8; + for (i = 0; i < nb; i++) { + qemu_fprintf(f, "YMM%02d=%016"PRIx64" %016"PRIx64" %016"PRIx64 + " %016"PRIx64"\n", i, + env->xmm_regs[i].ZMM_Q(3), + env->xmm_regs[i].ZMM_Q(2), + env->xmm_regs[i].ZMM_Q(1), + env->xmm_regs[i].ZMM_Q(0)); + } + } else { /* SSE and below cases */ + nb = env->hflags & HF_CS64_MASK ? 16 : 8; + for (i = 0; i < nb; i++) { + qemu_fprintf(f, "XMM%02d=%016"PRIx64" %016"PRIx64"%s", + i, + env->xmm_regs[i].ZMM_Q(1), + env->xmm_regs[i].ZMM_Q(0), + (i & 1) ? "\n" : " "); + } + } + } + if (flags & CPU_DUMP_CODE) { + target_ulong base = env->segs[R_CS].base + env->eip; + target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD); + uint8_t code; + char codestr[3]; + + qemu_fprintf(f, "Code="); + for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) { + if (cpu_memory_rw_debug(cs, base - offs + i, &code, 1, 0) == 0) { + snprintf(codestr, sizeof(codestr), "%02x", code); + } else { + snprintf(codestr, sizeof(codestr), "??"); + } + qemu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "", + i == offs ? "<" : "", codestr, i == offs ? ">" : ""); + } + qemu_fprintf(f, "\n"); + } +} diff --git a/target/i386/cpu-internal.h b/target/i386/cpu-internal.h new file mode 100644 index 0000000000..9baac5c0b4 --- /dev/null +++ b/target/i386/cpu-internal.h @@ -0,0 +1,70 @@ +/* + * i386 CPU internal definitions to be shared between cpu.c and cpu-sysemu.c + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef I386_CPU_INTERNAL_H +#define I386_CPU_INTERNAL_H + +typedef enum FeatureWordType { + CPUID_FEATURE_WORD, + MSR_FEATURE_WORD, +} FeatureWordType; + +typedef struct FeatureWordInfo { + FeatureWordType type; + /* feature flags names are taken from "Intel Processor Identification and + * the CPUID Instruction" and AMD's "CPUID Specification". + * In cases of disagreement between feature naming conventions, + * aliases may be added. + */ + const char *feat_names[64]; + union { + /* If type==CPUID_FEATURE_WORD */ + struct { + uint32_t eax; /* Input EAX for CPUID */ + bool needs_ecx; /* CPUID instruction uses ECX as input */ + uint32_t ecx; /* Input ECX value for CPUID */ + int reg; /* output register (R_* constant) */ + } cpuid; + /* If type==MSR_FEATURE_WORD */ + struct { + uint32_t index; + } msr; + }; + uint64_t tcg_features; /* Feature flags supported by TCG */ + uint64_t unmigratable_flags; /* Feature flags known to be unmigratable */ + uint64_t migratable_flags; /* Feature flags known to be migratable */ + /* Features that shouldn't be auto-enabled by "-cpu host" */ + uint64_t no_autoenable_flags; +} FeatureWordInfo; + +extern FeatureWordInfo feature_word_info[]; + +void x86_cpu_expand_features(X86CPU *cpu, Error **errp); + +#ifndef CONFIG_USER_ONLY +GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs); +void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v, + const char *name, void *opaque, Error **errp); + +void x86_cpu_apic_create(X86CPU *cpu, Error **errp); +void x86_cpu_apic_realize(X86CPU *cpu, Error **errp); +void x86_cpu_machine_reset_cb(void *opaque); +#endif /* !CONFIG_USER_ONLY */ + +#endif /* I386_CPU_INTERNAL_H */ diff --git a/target/i386/cpu-param.h b/target/i386/cpu-param.h new file mode 100644 index 0000000000..911b4cd51b --- /dev/null +++ b/target/i386/cpu-param.h @@ -0,0 +1,27 @@ +/* + * i386 cpu parameters for qemu. + * + * Copyright (c) 2003 Fabrice Bellard + * SPDX-License-Identifier: LGPL-2.0+ + */ + +#ifndef I386_CPU_PARAM_H +#define I386_CPU_PARAM_H + +#ifdef TARGET_X86_64 +# define TARGET_LONG_BITS 64 +# define TARGET_PHYS_ADDR_SPACE_BITS 52 +/* + * ??? This is really 48 bits, sign-extended, but the only thing + * accessible to userland with bit 48 set is the VSYSCALL, and that + * is handled via other mechanisms. + */ +# define TARGET_VIRT_ADDR_SPACE_BITS 47 +#else +# define TARGET_LONG_BITS 32 +# define TARGET_PHYS_ADDR_SPACE_BITS 36 +# define TARGET_VIRT_ADDR_SPACE_BITS 32 +#endif +#define TARGET_PAGE_BITS 12 + +#endif diff --git a/target/i386/cpu-qom.h b/target/i386/cpu-qom.h index 22f95eb3a4..d4e216d000 100644 --- a/target/i386/cpu-qom.h +++ b/target/i386/cpu-qom.h @@ -20,8 +20,7 @@ #ifndef QEMU_I386_CPU_QOM_H #define QEMU_I386_CPU_QOM_H -#include "qom/cpu.h" -#include "qemu/notify.h" +#include "hw/core/cpu.h" #ifdef TARGET_X86_64 #define TYPE_X86_CPU "x86_64-cpu" @@ -29,57 +28,9 @@ #define TYPE_X86_CPU "i386-cpu" #endif -#define X86_CPU_CLASS(klass) \ - OBJECT_CLASS_CHECK(X86CPUClass, (klass), TYPE_X86_CPU) -#define X86_CPU(obj) \ - OBJECT_CHECK(X86CPU, (obj), TYPE_X86_CPU) -#define X86_CPU_GET_CLASS(obj) \ - OBJECT_GET_CLASS(X86CPUClass, (obj), TYPE_X86_CPU) +OBJECT_DECLARE_CPU_TYPE(X86CPU, X86CPUClass, X86_CPU) -/** - * X86CPUDefinition: - * - * CPU model definition data that was not converted to QOM per-subclass - * property defaults yet. - */ -typedef struct X86CPUDefinition X86CPUDefinition; - -/** - * X86CPUClass: - * @cpu_def: CPU model definition - * @host_cpuid_required: Whether CPU model requires cpuid from host. - * @ordering: Ordering on the "-cpu help" CPU model list. - * @migration_safe: See CpuDefinitionInfo::migration_safe - * @static_model: See CpuDefinitionInfo::static - * @parent_realize: The parent class' realize handler. - * @parent_reset: The parent class' reset handler. - * - * An x86 CPU model or family. - */ -typedef struct X86CPUClass { - /*< private >*/ - CPUClass parent_class; - /*< public >*/ - - /* CPU definition, automatically loaded by instance_init if not NULL. - * Should be eventually replaced by subclass-specific property defaults. - */ - X86CPUDefinition *cpu_def; - - bool host_cpuid_required; - int ordering; - bool migration_safe; - bool static_model; - - /* Optional description of CPU model. - * If unavailable, cpu_def->model_id is used */ - const char *model_description; - - DeviceRealize parent_realize; - DeviceUnrealize parent_unrealize; - void (*parent_reset)(CPUState *cpu); -} X86CPUClass; - -typedef struct X86CPU X86CPU; +#define X86_CPU_TYPE_SUFFIX "-" TYPE_X86_CPU +#define X86_CPU_TYPE_NAME(name) (name X86_CPU_TYPE_SUFFIX) #endif diff --git a/target/i386/cpu-sysemu.c b/target/i386/cpu-sysemu.c new file mode 100644 index 0000000000..3f9093d285 --- /dev/null +++ b/target/i386/cpu-sysemu.c @@ -0,0 +1,388 @@ +/* + * i386 CPUID, CPU class, definitions, models: sysemu-only code + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "sysemu/kvm.h" +#include "sysemu/xen.h" +#include "sysemu/whpx.h" +#include "qapi/error.h" +#include "qapi/qapi-visit-run-state.h" +#include "qapi/qmp/qdict.h" +#include "qapi/qobject-input-visitor.h" +#include "qom/qom-qobject.h" +#include "qapi/qapi-commands-machine-target.h" +#include "hw/qdev-properties.h" + +#include "exec/address-spaces.h" +#include "hw/i386/apic_internal.h" + +#include "cpu-internal.h" + +/* Return a QDict containing keys for all properties that can be included + * in static expansion of CPU models. All properties set by x86_cpu_load_model() + * must be included in the dictionary. + */ +static QDict *x86_cpu_static_props(void) +{ + FeatureWord w; + int i; + static const char *props[] = { + "min-level", + "min-xlevel", + "family", + "model", + "stepping", + "model-id", + "vendor", + "lmce", + NULL, + }; + static QDict *d; + + if (d) { + return d; + } + + d = qdict_new(); + for (i = 0; props[i]; i++) { + qdict_put_null(d, props[i]); + } + + for (w = 0; w < FEATURE_WORDS; w++) { + FeatureWordInfo *fi = &feature_word_info[w]; + int bit; + for (bit = 0; bit < 64; bit++) { + if (!fi->feat_names[bit]) { + continue; + } + qdict_put_null(d, fi->feat_names[bit]); + } + } + + return d; +} + +/* Add an entry to @props dict, with the value for property. */ +static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop) +{ + QObject *value = object_property_get_qobject(OBJECT(cpu), prop, + &error_abort); + + qdict_put_obj(props, prop, value); +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model. + */ +static void x86_cpu_to_dict(X86CPU *cpu, QDict *props) +{ + QDict *sprops = x86_cpu_static_props(); + const QDictEntry *e; + + for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) { + const char *prop = qdict_entry_key(e); + x86_cpu_expand_prop(cpu, props, prop); + } +} + +/* Convert CPU model data from X86CPU object to a property dictionary + * that can recreate exactly the same CPU model, including every + * writable QOM property. + */ +static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props) +{ + ObjectPropertyIterator iter; + ObjectProperty *prop; + + object_property_iter_init(&iter, OBJECT(cpu)); + while ((prop = object_property_iter_next(&iter))) { + /* skip read-only or write-only properties */ + if (!prop->get || !prop->set) { + continue; + } + + /* "hotplugged" is the only property that is configurable + * on the command-line but will be set differently on CPUs + * created using "-cpu ... -smp ..." and by CPUs created + * on the fly by x86_cpu_from_model() for querying. Skip it. + */ + if (!strcmp(prop->name, "hotplugged")) { + continue; + } + x86_cpu_expand_prop(cpu, props, prop->name); + } +} + +static void object_apply_props(Object *obj, QObject *props, + const char *props_arg_name, Error **errp) +{ + Visitor *visitor; + QDict *qdict; + const QDictEntry *prop; + + visitor = qobject_input_visitor_new(props); + if (!visit_start_struct(visitor, props_arg_name, NULL, 0, errp)) { + visit_free(visitor); + return; + } + + qdict = qobject_to(QDict, props); + for (prop = qdict_first(qdict); prop; prop = qdict_next(qdict, prop)) { + if (!object_property_set(obj, qdict_entry_key(prop), + visitor, errp)) { + goto out; + } + } + + visit_check_struct(visitor, errp); +out: + visit_end_struct(visitor, NULL); + visit_free(visitor); +} + +/* Create X86CPU object according to model+props specification */ +static X86CPU *x86_cpu_from_model(const char *model, QObject *props, + const char *props_arg_name, Error **errp) +{ + X86CPU *xc = NULL; + X86CPUClass *xcc; + Error *err = NULL; + + xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model)); + if (xcc == NULL) { + error_setg(&err, "CPU model '%s' not found", model); + goto out; + } + + xc = X86_CPU(object_new_with_class(OBJECT_CLASS(xcc))); + if (props) { + object_apply_props(OBJECT(xc), props, props_arg_name, &err); + if (err) { + goto out; + } + } + + x86_cpu_expand_features(xc, &err); + if (err) { + goto out; + } + +out: + if (err) { + error_propagate(errp, err); + object_unref(OBJECT(xc)); + xc = NULL; + } + return xc; +} + +CpuModelExpansionInfo * +qmp_query_cpu_model_expansion(CpuModelExpansionType type, + CpuModelInfo *model, + Error **errp) +{ + X86CPU *xc = NULL; + Error *err = NULL; + CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1); + QDict *props = NULL; + const char *base_name; + + xc = x86_cpu_from_model(model->name, model->props, "model.props", &err); + if (err) { + goto out; + } + + props = qdict_new(); + ret->model = g_new0(CpuModelInfo, 1); + ret->model->props = QOBJECT(props); + + switch (type) { + case CPU_MODEL_EXPANSION_TYPE_STATIC: + /* Static expansion will be based on "base" only */ + base_name = "base"; + x86_cpu_to_dict(xc, props); + break; + case CPU_MODEL_EXPANSION_TYPE_FULL: + /* As we don't return every single property, full expansion needs + * to keep the original model name+props, and add extra + * properties on top of that. + */ + base_name = model->name; + x86_cpu_to_dict_full(xc, props); + break; + default: + error_setg(&err, "Unsupported expansion type"); + goto out; + } + + x86_cpu_to_dict(xc, props); + + ret->model->name = g_strdup(base_name); + +out: + object_unref(OBJECT(xc)); + if (err) { + error_propagate(errp, err); + qapi_free_CpuModelExpansionInfo(ret); + ret = NULL; + } + return ret; +} + +void cpu_clear_apic_feature(CPUX86State *env) +{ + env->features[FEAT_1_EDX] &= ~CPUID_APIC; +} + +void cpu_set_apic_feature(CPUX86State *env) +{ + env->features[FEAT_1_EDX] |= CPUID_APIC; +} + +bool cpu_has_x2apic_feature(CPUX86State *env) +{ + return env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; +} + +bool cpu_is_bsp(X86CPU *cpu) +{ + return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP; +} + +/* TODO: remove me, when reset over QOM tree is implemented */ +void x86_cpu_machine_reset_cb(void *opaque) +{ + X86CPU *cpu = opaque; + cpu_reset(CPU(cpu)); +} + +APICCommonClass *apic_get_class(Error **errp) +{ + const char *apic_type = "apic"; + + /* TODO: in-kernel irqchip for hvf */ + if (kvm_enabled()) { + if (!kvm_irqchip_in_kernel()) { + error_setg(errp, "KVM does not support userspace APIC"); + return NULL; + } + apic_type = "kvm-apic"; + } else if (xen_enabled()) { + apic_type = "xen-apic"; + } else if (whpx_apic_in_platform()) { + apic_type = "whpx-apic"; + } + + return APIC_COMMON_CLASS(object_class_by_name(apic_type)); +} + +void x86_cpu_apic_create(X86CPU *cpu, Error **errp) +{ + APICCommonState *apic; + APICCommonClass *apic_class = apic_get_class(errp); + + if (!apic_class) { + return; + } + + cpu->apic_state = DEVICE(object_new_with_class(OBJECT_CLASS(apic_class))); + object_property_add_child(OBJECT(cpu), "lapic", + OBJECT(cpu->apic_state)); + object_unref(OBJECT(cpu->apic_state)); + + /* TODO: convert to link<> */ + apic = APIC_COMMON(cpu->apic_state); + apic->cpu = cpu; + apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; + + /* + * apic_common_set_id needs to check if the CPU has x2APIC + * feature in case APIC ID >= 255, so we need to set apic->cpu + * before setting APIC ID + */ + qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); +} + +void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) +{ + APICCommonState *apic; + static bool apic_mmio_map_once; + + if (cpu->apic_state == NULL) { + return; + } + qdev_realize(DEVICE(cpu->apic_state), NULL, errp); + + /* Map APIC MMIO area */ + apic = APIC_COMMON(cpu->apic_state); + if (!apic_mmio_map_once) { + memory_region_add_subregion_overlap(get_system_memory(), + apic->apicbase & + MSR_IA32_APICBASE_BASE, + &apic->io_memory, + 0x1000); + apic_mmio_map_once = true; + } +} + +GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + GuestPanicInformation *panic_info = NULL; + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_CRASH)) { + panic_info = g_new0(GuestPanicInformation, 1); + + panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V; + + assert(HV_CRASH_PARAMS >= 5); + panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0]; + panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1]; + panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2]; + panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3]; + panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4]; + } + + return panic_info; +} +void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + CPUState *cs = CPU(obj); + GuestPanicInformation *panic_info; + + if (!cs->crash_occurred) { + error_setg(errp, "No crash occurred"); + return; + } + + panic_info = x86_cpu_get_crash_info(cs); + if (panic_info == NULL) { + error_setg(errp, "No crash information"); + return; + } + + visit_type_GuestPanicInformation(v, "crash-information", &panic_info, + errp); + qapi_free_GuestPanicInformation(panic_info); +} + diff --git a/target/i386/cpu.c b/target/i386/cpu.c index c88876dfe3..33760a2ee1 100644 --- a/target/i386/cpu.c +++ b/target/i386/cpu.c @@ -1,12 +1,12 @@ /* - * i386 CPUID helper functions + * i386 CPUID, CPU class, definitions, models * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,41 +20,33 @@ #include "qemu/osdep.h" #include "qemu/units.h" #include "qemu/cutils.h" -#include "qemu/bitops.h" - +#include "qemu/qemu-print.h" +#include "qemu/hw-version.h" #include "cpu.h" -#include "exec/exec-all.h" -#include "sysemu/kvm.h" +#include "tcg/helper-tcg.h" #include "sysemu/hvf.h" -#include "sysemu/cpus.h" -#include "kvm_i386.h" -#include "sev_i386.h" - -#include "qemu/error-report.h" -#include "qemu/option.h" -#include "qemu/config-file.h" +#include "hvf/hvf-i386.h" +#include "kvm/kvm_i386.h" +#include "sev.h" #include "qapi/error.h" -#include "qapi/qapi-visit-misc.h" -#include "qapi/qapi-visit-run-state.h" -#include "qapi/qmp/qdict.h" +#include "qemu/error-report.h" +#include "qapi/qapi-visit-machine.h" #include "qapi/qmp/qerror.h" -#include "qapi/visitor.h" -#include "qom/qom-qobject.h" -#include "sysemu/arch_init.h" - #include "standard-headers/asm-x86/kvm_para.h" - -#include "sysemu/sysemu.h" #include "hw/qdev-properties.h" #include "hw/i386/topology.h" #ifndef CONFIG_USER_ONLY +#include "sysemu/reset.h" +#include "qapi/qapi-commands-machine-target.h" #include "exec/address-spaces.h" -#include "hw/hw.h" -#include "hw/xen/xen.h" -#include "hw/i386/apic_internal.h" +#include "hw/boards.h" +#include "hw/i386/sgx-epc.h" #endif #include "disas/capstone.h" +#include "cpu-internal.h" + +static void x86_cpu_realizefn(DeviceState *dev, Error **errp); /* Helpers for building CPUID[2] descriptors: */ @@ -333,68 +325,13 @@ static void encode_cache_cpuid80000006(CPUCacheInfo *l2, } } -/* - * Definitions used for building CPUID Leaf 0x8000001D and 0x8000001E - * Please refer to the AMD64 Architecture Programmer’s Manual Volume 3. - * Define the constants to build the cpu topology. Right now, TOPOEXT - * feature is enabled only on EPYC. So, these constants are based on - * EPYC supported configurations. We may need to handle the cases if - * these values change in future. - */ -/* Maximum core complexes in a node */ -#define MAX_CCX 2 -/* Maximum cores in a core complex */ -#define MAX_CORES_IN_CCX 4 -/* Maximum cores in a node */ -#define MAX_CORES_IN_NODE 8 -/* Maximum nodes in a socket */ -#define MAX_NODES_PER_SOCKET 4 - -/* - * Figure out the number of nodes required to build this config. - * Max cores in a node is 8 - */ -static int nodes_in_socket(int nr_cores) -{ - int nodes; - - nodes = DIV_ROUND_UP(nr_cores, MAX_CORES_IN_NODE); - - /* Hardware does not support config with 3 nodes, return 4 in that case */ - return (nodes == 3) ? 4 : nodes; -} - -/* - * Decide the number of cores in a core complex with the given nr_cores using - * following set constants MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE and - * MAX_NODES_PER_SOCKET. Maintain symmetry as much as possible - * L3 cache is shared across all cores in a core complex. So, this will also - * tell us how many cores are sharing the L3 cache. - */ -static int cores_in_core_complex(int nr_cores) -{ - int nodes; - - /* Check if we can fit all the cores in one core complex */ - if (nr_cores <= MAX_CORES_IN_CCX) { - return nr_cores; - } - /* Get the number of nodes required to build this config */ - nodes = nodes_in_socket(nr_cores); - - /* - * Divide the cores accros all the core complexes - * Return rounded up value - */ - return DIV_ROUND_UP(nr_cores, nodes * MAX_CCX); -} - /* Encode cache info for CPUID[8000001D] */ -static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, + X86CPUTopoInfo *topo_info, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) { - uint32_t l3_cores; + uint32_t l3_threads; assert(cache->size == cache->line_size * cache->associativity * cache->partitions * cache->sets); @@ -403,10 +340,10 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, /* L3 is shared among multiple cores */ if (cache->level == 3) { - l3_cores = cores_in_core_complex(cs->nr_cores); - *eax |= ((l3_cores * cs->nr_threads) - 1) << 14; + l3_threads = topo_info->cores_per_die * topo_info->threads_per_core; + *eax |= (l3_threads - 1) << 14; } else { - *eax |= ((cs->nr_threads - 1) << 14); + *eax |= ((topo_info->threads_per_core - 1) << 14); } assert(cache->line_size > 0); @@ -426,107 +363,58 @@ static void encode_cache_cpuid8000001d(CPUCacheInfo *cache, CPUState *cs, (cache->complex_indexing ? CACHE_COMPLEX_IDX : 0); } -/* Data structure to hold the configuration info for a given core index */ -struct core_topology { - /* core complex id of the current core index */ - int ccx_id; - /* - * Adjusted core index for this core in the topology - * This can be 0,1,2,3 with max 4 cores in a core complex - */ - int core_id; - /* Node id for this core index */ - int node_id; - /* Number of nodes in this config */ - int num_nodes; -}; - -/* - * Build the configuration closely match the EPYC hardware. Using the EPYC - * hardware configuration values (MAX_CCX, MAX_CORES_IN_CCX, MAX_CORES_IN_NODE) - * right now. This could change in future. - * nr_cores : Total number of cores in the config - * core_id : Core index of the current CPU - * topo : Data structure to hold all the config info for this core index - */ -static void build_core_topology(int nr_cores, int core_id, - struct core_topology *topo) -{ - int nodes, cores_in_ccx; - - /* First get the number of nodes required */ - nodes = nodes_in_socket(nr_cores); - - cores_in_ccx = cores_in_core_complex(nr_cores); - - topo->node_id = core_id / (cores_in_ccx * MAX_CCX); - topo->ccx_id = (core_id % (cores_in_ccx * MAX_CCX)) / cores_in_ccx; - topo->core_id = core_id % cores_in_ccx; - topo->num_nodes = nodes; -} - /* Encode cache info for CPUID[8000001E] */ -static void encode_topo_cpuid8000001e(CPUState *cs, X86CPU *cpu, - uint32_t *eax, uint32_t *ebx, - uint32_t *ecx, uint32_t *edx) +static void encode_topo_cpuid8000001e(X86CPU *cpu, X86CPUTopoInfo *topo_info, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) { - struct core_topology topo = {0}; - unsigned long nodes; - int shift; + X86CPUTopoIDs topo_ids; + + x86_topo_ids_from_apicid(cpu->apic_id, topo_info, &topo_ids); - build_core_topology(cs->nr_cores, cpu->core_id, &topo); *eax = cpu->apic_id; + /* - * CPUID_Fn8000001E_EBX - * 31:16 Reserved - * 15:8 Threads per core (The number of threads per core is - * Threads per core + 1) - * 7:0 Core id (see bit decoding below) - * SMT: - * 4:3 node id - * 2 Core complex id - * 1:0 Core id - * Non SMT: - * 5:4 node id - * 3 Core complex id - * 1:0 Core id + * CPUID_Fn8000001E_EBX [Core Identifiers] (CoreId) + * Read-only. Reset: 0000_XXXXh. + * See Core::X86::Cpuid::ExtApicId. + * Core::X86::Cpuid::CoreId_lthree[1:0]_core[3:0]_thread[1:0]; + * Bits Description + * 31:16 Reserved. + * 15:8 ThreadsPerCore: threads per core. Read-only. Reset: XXh. + * The number of threads per core is ThreadsPerCore+1. + * 7:0 CoreId: core ID. Read-only. Reset: XXh. + * + * NOTE: CoreId is already part of apic_id. Just use it. We can + * use all the 8 bits to represent the core_id here. */ - if (cs->nr_threads - 1) { - *ebx = ((cs->nr_threads - 1) << 8) | (topo.node_id << 3) | - (topo.ccx_id << 2) | topo.core_id; - } else { - *ebx = (topo.node_id << 4) | (topo.ccx_id << 3) | topo.core_id; - } + *ebx = ((topo_info->threads_per_core - 1) << 8) | (topo_ids.core_id & 0xFF); + /* - * CPUID_Fn8000001E_ECX - * 31:11 Reserved - * 10:8 Nodes per processor (Nodes per processor is number of nodes + 1) - * 7:0 Node id (see bit decoding below) - * 2 Socket id - * 1:0 Node id + * CPUID_Fn8000001E_ECX [Node Identifiers] (NodeId) + * Read-only. Reset: 0000_0XXXh. + * Core::X86::Cpuid::NodeId_lthree[1:0]_core[3:0]_thread[1:0]; + * Bits Description + * 31:11 Reserved. + * 10:8 NodesPerProcessor: Node per processor. Read-only. Reset: XXXb. + * ValidValues: + * Value Description + * 000b 1 node per processor. + * 001b 2 nodes per processor. + * 010b Reserved. + * 011b 4 nodes per processor. + * 111b-100b Reserved. + * 7:0 NodeId: Node ID. Read-only. Reset: XXh. + * + * NOTE: Hardware reserves 3 bits for number of nodes per processor. + * But users can create more nodes than the actual hardware can + * support. To genaralize we can use all the upper 8 bits for nodes. + * NodeId is combination of node and socket_id which is already decoded + * in apic_id. Just use it by shifting. */ - if (topo.num_nodes <= 4) { - *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << 2) | - topo.node_id; - } else { - /* - * Node id fix up. Actual hardware supports up to 4 nodes. But with - * more than 32 cores, we may end up with more than 4 nodes. - * Node id is a combination of socket id and node id. Only requirement - * here is that this number should be unique accross the system. - * Shift the socket id to accommodate more nodes. We dont expect both - * socket id and node id to be big number at the same time. This is not - * an ideal config but we need to to support it. Max nodes we can have - * is 32 (255/8) with 8 cores per node and 255 max cores. We only need - * 5 bits for nodes. Find the left most set bit to represent the total - * number of nodes. find_last_bit returns last set bit(0 based). Left - * shift(+1) the socket id to represent all the nodes. - */ - nodes = topo.num_nodes - 1; - shift = find_last_bit(&nodes, 8); - *ecx = ((topo.num_nodes - 1) << 8) | (cpu->socket_id << (shift + 1)) | - topo.node_id; - } + *ecx = ((topo_info->dies_per_pkg - 1) << 8) | + ((cpu->apic_id >> apicid_die_offset(topo_info)) & 0xFF); + *edx = 0; } @@ -691,8 +579,20 @@ static CPUCacheInfo legacy_l3_cache = { #define INTEL_PT_CYCLE_BITMAP 0x1fff /* Support 0,2^(0~11) */ #define INTEL_PT_PSB_BITMAP (0x003f << 16) /* Support 2K,4K,8K,16K,32K,64K */ -static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, - uint32_t vendor2, uint32_t vendor3) +/* CPUID Leaf 0x1D constants: */ +#define INTEL_AMX_TILE_MAX_SUBLEAF 0x1 +#define INTEL_AMX_TOTAL_TILE_BYTES 0x2000 +#define INTEL_AMX_BYTES_PER_TILE 0x400 +#define INTEL_AMX_BYTES_PER_ROW 0x40 +#define INTEL_AMX_TILE_MAX_NAMES 0x8 +#define INTEL_AMX_TILE_MAX_ROWS 0x10 + +/* CPUID Leaf 0x1E constants: */ +#define INTEL_AMX_TMUL_MAX_K 0x10 +#define INTEL_AMX_TMUL_MAX_N 0x40 + +void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3) { int i; for (i = 0; i < 4; i++) { @@ -724,72 +624,149 @@ static void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, CPUID_MTRR, CPUID_MCA, CPUID_CLFLUSH (needed for Win64) */ /* missing: CPUID_VME, CPUID_DTS, CPUID_SS, CPUID_HT, CPUID_TM, CPUID_PBE */ + +/* + * Kernel-only features that can be shown to usermode programs even if + * they aren't actually supported by TCG, because qemu-user only runs + * in CPL=3; remove them if they are ever implemented for system emulation. + */ +#if defined CONFIG_USER_ONLY +#define CPUID_EXT_KERNEL_FEATURES \ + (CPUID_EXT_PCID | CPUID_EXT_TSC_DEADLINE_TIMER) +#else +#define CPUID_EXT_KERNEL_FEATURES 0 +#endif #define TCG_EXT_FEATURES (CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | \ CPUID_EXT_MONITOR | CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | \ CPUID_EXT_SSE41 | CPUID_EXT_SSE42 | CPUID_EXT_POPCNT | \ CPUID_EXT_XSAVE | /* CPUID_EXT_OSXSAVE is dynamic */ \ - CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR) + CPUID_EXT_MOVBE | CPUID_EXT_AES | CPUID_EXT_HYPERVISOR | \ + CPUID_EXT_RDRAND | CPUID_EXT_AVX | CPUID_EXT_F16C | \ + CPUID_EXT_FMA | CPUID_EXT_X2APIC | CPUID_EXT_KERNEL_FEATURES) /* missing: CPUID_EXT_DTES64, CPUID_EXT_DSCPL, CPUID_EXT_VMX, CPUID_EXT_SMX, - CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_FMA, + CPUID_EXT_EST, CPUID_EXT_TM2, CPUID_EXT_CID, CPUID_EXT_XTPR, CPUID_EXT_PDCM, CPUID_EXT_PCID, CPUID_EXT_DCA, - CPUID_EXT_X2APIC, CPUID_EXT_TSC_DEADLINE_TIMER, CPUID_EXT_AVX, - CPUID_EXT_F16C, CPUID_EXT_RDRAND */ + CPUID_EXT_TSC_DEADLINE_TIMER + */ #ifdef TARGET_X86_64 -#define TCG_EXT2_X86_64_FEATURES (CPUID_EXT2_SYSCALL | CPUID_EXT2_LM) +#define TCG_EXT2_X86_64_FEATURES CPUID_EXT2_LM #else #define TCG_EXT2_X86_64_FEATURES 0 #endif +/* + * CPUID_*_KERNEL_FEATURES denotes bits and features that are not usable + * in usermode or by 32-bit programs. Those are added to supported + * TCG features unconditionally in user-mode emulation mode. This may + * indeed seem strange or incorrect, but it works because code running + * under usermode emulation cannot access them. + * + * Even for long mode, qemu-i386 is not running "a userspace program on a + * 32-bit CPU"; it's running "a userspace program with a 32-bit code segment" + * and therefore using the 32-bit ABI; the CPU itself might be 64-bit + * but again the difference is only visible in kernel mode. + */ +#if defined CONFIG_LINUX_USER +#define CPUID_EXT2_KERNEL_FEATURES (CPUID_EXT2_LM | CPUID_EXT2_FFXSR) +#elif defined CONFIG_USER_ONLY +/* FIXME: Long mode not yet supported for i386 bsd-user */ +#define CPUID_EXT2_KERNEL_FEATURES CPUID_EXT2_FFXSR +#else +#define CPUID_EXT2_KERNEL_FEATURES 0 +#endif + #define TCG_EXT2_FEATURES ((TCG_FEATURES & CPUID_EXT2_AMD_ALIASES) | \ CPUID_EXT2_NX | CPUID_EXT2_MMXEXT | CPUID_EXT2_RDTSCP | \ CPUID_EXT2_3DNOW | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_PDPE1GB | \ - TCG_EXT2_X86_64_FEATURES) + CPUID_EXT2_SYSCALL | TCG_EXT2_X86_64_FEATURES | \ + CPUID_EXT2_KERNEL_FEATURES) + +#if defined CONFIG_USER_ONLY +#define CPUID_EXT3_KERNEL_FEATURES CPUID_EXT3_OSVW +#else +#define CPUID_EXT3_KERNEL_FEATURES 0 +#endif + #define TCG_EXT3_FEATURES (CPUID_EXT3_LAHF_LM | CPUID_EXT3_SVM | \ - CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A) + CPUID_EXT3_CR8LEG | CPUID_EXT3_ABM | CPUID_EXT3_SSE4A | \ + CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_KERNEL_FEATURES) + #define TCG_EXT4_FEATURES 0 -#define TCG_SVM_FEATURES CPUID_SVM_NPT + +#if defined CONFIG_USER_ONLY +#define CPUID_SVM_KERNEL_FEATURES (CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI) +#else +#define CPUID_SVM_KERNEL_FEATURES 0 +#endif +#define TCG_SVM_FEATURES (CPUID_SVM_NPT | CPUID_SVM_VGIF | \ + CPUID_SVM_SVME_ADDR_CHK | CPUID_SVM_KERNEL_FEATURES) + #define TCG_KVM_FEATURES 0 + +#if defined CONFIG_USER_ONLY +#define CPUID_7_0_EBX_KERNEL_FEATURES CPUID_7_0_EBX_INVPCID +#else +#define CPUID_7_0_EBX_KERNEL_FEATURES 0 +#endif #define TCG_7_0_EBX_FEATURES (CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_SMAP | \ CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ADX | \ CPUID_7_0_EBX_PCOMMIT | CPUID_7_0_EBX_CLFLUSHOPT | \ CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_FSGSBASE | \ - CPUID_7_0_EBX_ERMS) + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_RDSEED | \ + CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_KERNEL_FEATURES) /* missing: - CPUID_7_0_EBX_HLE, CPUID_7_0_EBX_AVX2, - CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM, - CPUID_7_0_EBX_RDSEED */ -#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_PKU | \ + CPUID_7_0_EBX_HLE + CPUID_7_0_EBX_INVPCID, CPUID_7_0_EBX_RTM */ + +#if !defined CONFIG_USER_ONLY || defined CONFIG_LINUX +#define TCG_7_0_ECX_RDPID CPUID_7_0_ECX_RDPID +#else +#define TCG_7_0_ECX_RDPID 0 +#endif +#define TCG_7_0_ECX_FEATURES (CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | \ /* CPUID_7_0_ECX_OSPKE is dynamic */ \ - CPUID_7_0_ECX_LA57) -#define TCG_7_0_EDX_FEATURES 0 + CPUID_7_0_ECX_LA57 | CPUID_7_0_ECX_PKS | CPUID_7_0_ECX_VAES | \ + TCG_7_0_ECX_RDPID) + +#if defined CONFIG_USER_ONLY +#define CPUID_7_0_EDX_KERNEL_FEATURES (CPUID_7_0_EDX_SPEC_CTRL | \ + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD) +#else +#define CPUID_7_0_EDX_KERNEL_FEATURES 0 +#endif +#define TCG_7_0_EDX_FEATURES (CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_KERNEL_FEATURES) + +#define TCG_7_1_EAX_FEATURES (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | \ + CPUID_7_1_EAX_FSRC | CPUID_7_1_EAX_CMPCCXADD) +#define TCG_7_1_EDX_FEATURES 0 +#define TCG_7_2_EDX_FEATURES 0 #define TCG_APM_FEATURES 0 #define TCG_6_EAX_FEATURES CPUID_6_EAX_ARAT #define TCG_XSAVE_FEATURES (CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XGETBV1) /* missing: CPUID_XSAVE_XSAVEC, CPUID_XSAVE_XSAVES */ +#define TCG_14_0_ECX_FEATURES 0 +#define TCG_SGX_12_0_EAX_FEATURES 0 +#define TCG_SGX_12_0_EBX_FEATURES 0 +#define TCG_SGX_12_1_EAX_FEATURES 0 + +#if defined CONFIG_USER_ONLY +#define CPUID_8000_0008_EBX_KERNEL_FEATURES (CPUID_8000_0008_EBX_IBPB | \ + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | \ + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | CPUID_8000_0008_EBX_AMD_SSBD | \ + CPUID_8000_0008_EBX_AMD_PSFD) +#else +#define CPUID_8000_0008_EBX_KERNEL_FEATURES 0 +#endif -typedef struct FeatureWordInfo { - /* feature flags names are taken from "Intel Processor Identification and - * the CPUID Instruction" and AMD's "CPUID Specification". - * In cases of disagreement between feature naming conventions, - * aliases may be added. - */ - const char *feat_names[32]; - uint32_t cpuid_eax; /* Input EAX for CPUID */ - bool cpuid_needs_ecx; /* CPUID instruction uses ECX as input */ - uint32_t cpuid_ecx; /* Input ECX value for CPUID */ - int cpuid_reg; /* output register (R_* constant) */ - uint32_t tcg_features; /* Feature flags supported by TCG */ - uint32_t unmigratable_flags; /* Feature flags known to be unmigratable */ - uint32_t migratable_flags; /* Feature flags known to be migratable */ - /* Features that shouldn't be auto-enabled by "-cpu host" */ - uint32_t no_autoenable_flags; -} FeatureWordInfo; - -static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { +#define TCG_8000_0008_EBX (CPUID_8000_0008_EBX_XSAVEERPTR | \ + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_KERNEL_FEATURES) + +FeatureWordInfo feature_word_info[FEATURE_WORDS] = { [FEAT_1_EDX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce", @@ -800,10 +777,12 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "fxsr", "sse", "sse2", "ss", "ht" /* Intel htt */, "tm", "ia64", "pbe", }, - .cpuid_eax = 1, .cpuid_reg = R_EDX, + .cpuid = {.eax = 1, .reg = R_EDX, }, .tcg_features = TCG_FEATURES, + .no_autoenable_flags = CPUID_HT, }, [FEAT_1_ECX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "pni" /* Intel,AMD sse3 */, "pclmulqdq", "dtes64", "monitor", "ds-cpl", "vmx", "smx", "est", @@ -814,7 +793,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "tsc-deadline", "aes", "xsave", NULL /* osxsave */, "avx", "f16c", "rdrand", "hypervisor", }, - .cpuid_eax = 1, .cpuid_reg = R_ECX, + .cpuid = { .eax = 1, .reg = R_ECX, }, .tcg_features = TCG_EXT_FEATURES, }, /* Feature names that are already defined on feature_name[] but @@ -823,6 +802,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { * to features[FEAT_8000_0001_EDX] if and only if CPU vendor is AMD. */ [FEAT_8000_0001_EDX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL /* fpu */, NULL /* vme */, NULL /* de */, NULL /* pse */, NULL /* tsc */, NULL /* msr */, NULL /* pae */, NULL /* mce */, @@ -833,10 +813,11 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL /* fxsr */, "fxsr-opt", "pdpe1gb", "rdtscp", NULL, "lm", "3dnowext", "3dnow", }, - .cpuid_eax = 0x80000001, .cpuid_reg = R_EDX, + .cpuid = { .eax = 0x80000001, .reg = R_EDX, }, .tcg_features = TCG_EXT2_FEATURES, }, [FEAT_8000_0001_ECX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "lahf-lm", "cmp-legacy", "svm", "extapic", "cr8legacy", "abm", "sse4a", "misalignsse", @@ -847,7 +828,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "perfctr-nb", NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 0x80000001, .cpuid_reg = R_ECX, + .cpuid = { .eax = 0x80000001, .reg = R_ECX, }, .tcg_features = TCG_EXT3_FEATURES, /* * TOPOEXT is always allowed but can't be enabled blindly by @@ -857,6 +838,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { .no_autoenable_flags = CPUID_EXT3_TOPOEXT, }, [FEAT_C000_0001_EDX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, "xstore", "xstore-en", NULL, NULL, "xcrypt", "xcrypt-en", @@ -867,24 +849,26 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 0xC0000001, .cpuid_reg = R_EDX, + .cpuid = { .eax = 0xC0000001, .reg = R_EDX, }, .tcg_features = TCG_EXT4_FEATURES, }, [FEAT_KVM] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "kvmclock", "kvm-nopiodelay", "kvm-mmu", "kvmclock", "kvm-asyncpf", "kvm-steal-time", "kvm-pv-eoi", "kvm-pv-unhalt", - NULL, "kvm-pv-tlb-flush", NULL, "kvm-pv-ipi", - NULL, NULL, NULL, NULL, + NULL, "kvm-pv-tlb-flush", "kvm-asyncpf-vmexit", "kvm-pv-ipi", + "kvm-poll-control", "kvm-pv-sched-yield", "kvm-asyncpf-int", "kvm-msi-ext-dest-id", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, "kvmclock-stable-bit", NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EAX, + .cpuid = { .eax = KVM_CPUID_FEATURES, .reg = R_EAX, }, .tcg_features = TCG_KVM_FEATURES, }, [FEAT_KVM_HINTS] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "kvm-hint-dedicated", NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -895,7 +879,7 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = KVM_CPUID_FEATURES, .cpuid_reg = R_EDX, + .cpuid = { .eax = KVM_CPUID_FEATURES, .reg = R_EDX, }, .tcg_features = TCG_KVM_FEATURES, /* * KVM hints aren't auto-enabled by -cpu host, they need to be @@ -903,71 +887,25 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { */ .no_autoenable_flags = ~0U, }, - [FEAT_HYPERV_EAX] = { - .feat_names = { - NULL /* hv_msr_vp_runtime_access */, NULL /* hv_msr_time_refcount_access */, - NULL /* hv_msr_synic_access */, NULL /* hv_msr_stimer_access */, - NULL /* hv_msr_apic_access */, NULL /* hv_msr_hypercall_access */, - NULL /* hv_vpindex_access */, NULL /* hv_msr_reset_access */, - NULL /* hv_msr_stats_access */, NULL /* hv_reftsc_access */, - NULL /* hv_msr_idle_access */, NULL /* hv_msr_frequency_access */, - NULL /* hv_msr_debug_access */, NULL /* hv_msr_reenlightenment_access */, - NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - }, - .cpuid_eax = 0x40000003, .cpuid_reg = R_EAX, - }, - [FEAT_HYPERV_EBX] = { - .feat_names = { - NULL /* hv_create_partitions */, NULL /* hv_access_partition_id */, - NULL /* hv_access_memory_pool */, NULL /* hv_adjust_message_buffers */, - NULL /* hv_post_messages */, NULL /* hv_signal_events */, - NULL /* hv_create_port */, NULL /* hv_connect_port */, - NULL /* hv_access_stats */, NULL, NULL, NULL /* hv_debugging */, - NULL /* hv_cpu_power_management */, NULL /* hv_configure_profiler */, - NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - }, - .cpuid_eax = 0x40000003, .cpuid_reg = R_EBX, - }, - [FEAT_HYPERV_EDX] = { - .feat_names = { - NULL /* hv_mwait */, NULL /* hv_guest_debugging */, - NULL /* hv_perf_monitor */, NULL /* hv_cpu_dynamic_part */, - NULL /* hv_hypercall_params_xmm */, NULL /* hv_guest_idle_state */, - NULL, NULL, - NULL, NULL, NULL /* hv_guest_crash_msr */, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - }, - .cpuid_eax = 0x40000003, .cpuid_reg = R_EDX, - }, [FEAT_SVM] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "npt", "lbrv", "svm-lock", "nrip-save", "tsc-scale", "vmcb-clean", "flushbyasid", "decodeassists", NULL, NULL, "pause-filter", NULL, - "pfthreshold", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "pfthreshold", "avic", NULL, "v-vmsave-vmload", + "vgif", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, "vnmi", NULL, NULL, + "svme-addr-chk", NULL, NULL, NULL, }, - .cpuid_eax = 0x8000000A, .cpuid_reg = R_EDX, + .cpuid = { .eax = 0x8000000A, .reg = R_EDX, }, .tcg_features = TCG_SVM_FEATURES, }, [FEAT_7_0_EBX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { - "fsgsbase", "tsc-adjust", NULL, "bmi1", + "fsgsbase", "tsc-adjust", "sgx", "bmi1", "hle", "avx2", NULL, "smep", "bmi2", "erms", "invpcid", "rtm", NULL, NULL, "mpx", NULL, @@ -976,45 +914,110 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { "clwb", "intel-pt", "avx512pf", "avx512er", "avx512cd", "sha-ni", "avx512bw", "avx512vl", }, - .cpuid_eax = 7, - .cpuid_needs_ecx = true, .cpuid_ecx = 0, - .cpuid_reg = R_EBX, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 0, + .reg = R_EBX, + }, .tcg_features = TCG_7_0_EBX_FEATURES, }, [FEAT_7_0_ECX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL, "avx512vbmi", "umip", "pku", - NULL /* ospke */, NULL, "avx512vbmi2", NULL, + NULL /* ospke */, "waitpkg", "avx512vbmi2", NULL, "gfni", "vaes", "vpclmulqdq", "avx512vnni", "avx512bitalg", NULL, "avx512-vpopcntdq", NULL, "la57", NULL, NULL, NULL, NULL, NULL, "rdpid", NULL, - NULL, "cldemote", NULL, NULL, - NULL, NULL, NULL, NULL, + "bus-lock-detect", "cldemote", NULL, "movdiri", + "movdir64b", NULL, "sgxlc", "pks", + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 0, + .reg = R_ECX, }, - .cpuid_eax = 7, - .cpuid_needs_ecx = true, .cpuid_ecx = 0, - .cpuid_reg = R_ECX, .tcg_features = TCG_7_0_ECX_FEATURES, }, [FEAT_7_0_EDX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, "avx512-4vnniw", "avx512-4fmaps", + "fsrm", NULL, NULL, NULL, + "avx512-vp2intersect", NULL, "md-clear", NULL, + NULL, NULL, "serialize", NULL, + "tsx-ldtrk", NULL, NULL /* pconfig */, "arch-lbr", + NULL, NULL, "amx-bf16", "avx512-fp16", + "amx-tile", "amx-int8", "spec-ctrl", "stibp", + "flush-l1d", "arch-capabilities", "core-capability", "ssbd", + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 0, + .reg = R_EDX, + }, + .tcg_features = TCG_7_0_EDX_FEATURES, + }, + [FEAT_7_1_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { NULL, NULL, NULL, NULL, + "avx-vnni", "avx512-bf16", NULL, "cmpccxadd", + NULL, NULL, "fzrm", "fsrs", + "fsrc", NULL, NULL, NULL, NULL, NULL, NULL, NULL, + NULL, "amx-fp16", NULL, "avx-ifma", NULL, NULL, NULL, NULL, - NULL, NULL, "pconfig", NULL, NULL, NULL, NULL, NULL, - NULL, NULL, "spec-ctrl", NULL, - NULL, "arch-capabilities", NULL, "ssbd", }, - .cpuid_eax = 7, - .cpuid_needs_ecx = true, .cpuid_ecx = 0, - .cpuid_reg = R_EDX, - .tcg_features = TCG_7_0_EDX_FEATURES, - .unmigratable_flags = CPUID_7_0_EDX_ARCH_CAPABILITIES, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_EAX, + }, + .tcg_features = TCG_7_1_EAX_FEATURES, + }, + [FEAT_7_1_EDX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + "avx-vnni-int8", "avx-ne-convert", NULL, NULL, + "amx-complex", NULL, NULL, NULL, + NULL, NULL, "prefetchiti", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 1, + .reg = R_EDX, + }, + .tcg_features = TCG_7_1_EDX_FEATURES, + }, + [FEAT_7_2_EDX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "mcdt-no", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 7, + .needs_ecx = true, .ecx = 2, + .reg = R_EDX, + }, + .tcg_features = TCG_7_2_EDX_FEATURES, }, [FEAT_8000_0007_EDX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1025,31 +1028,47 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 0x80000007, - .cpuid_reg = R_EDX, + .cpuid = { .eax = 0x80000007, .reg = R_EDX, }, .tcg_features = TCG_APM_FEATURES, .unmigratable_flags = CPUID_APM_INVTSC, }, [FEAT_8000_0008_EBX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { - NULL, NULL, NULL, NULL, + "clzero", NULL, "xsaveerptr", NULL, NULL, NULL, NULL, NULL, NULL, "wbnoinvd", NULL, NULL, - "ibpb", NULL, NULL, NULL, - NULL, NULL, NULL, NULL, + "ibpb", NULL, "ibrs", "amd-stibp", + NULL, "stibp-always-on", NULL, NULL, NULL, NULL, NULL, NULL, "amd-ssbd", "virt-ssbd", "amd-no-ssb", NULL, + "amd-psfd", NULL, NULL, NULL, + }, + .cpuid = { .eax = 0x80000008, .reg = R_EBX, }, + .tcg_features = TCG_8000_0008_EBX, + .unmigratable_flags = 0, + }, + [FEAT_8000_0021_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "no-nested-data-bp", NULL, "lfence-always-serializing", NULL, + NULL, NULL, "null-sel-clr-base", NULL, + "auto-ibrs", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 0x80000008, - .cpuid_reg = R_EBX, + .cpuid = { .eax = 0x80000021, .reg = R_EAX, }, .tcg_features = 0, .unmigratable_flags = 0, }, [FEAT_XSAVE] = { + .type = CPUID_FEATURE_WORD, .feat_names = { "xsaveopt", "xsavec", "xgetbv1", "xsaves", - NULL, NULL, NULL, NULL, + "xfd", NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, @@ -1057,12 +1076,43 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 0xd, - .cpuid_needs_ecx = true, .cpuid_ecx = 1, - .cpuid_reg = R_EAX, + .cpuid = { + .eax = 0xd, + .needs_ecx = true, .ecx = 1, + .reg = R_EAX, + }, .tcg_features = TCG_XSAVE_FEATURES, }, + [FEAT_XSAVE_XSS_LO] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_ECX, + }, + }, + [FEAT_XSAVE_XSS_HI] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, + .ecx = 1, + .reg = R_EDX + }, + }, [FEAT_6_EAX] = { + .type = CPUID_FEATURE_WORD, .feat_names = { NULL, NULL, "arat", NULL, NULL, NULL, NULL, NULL, @@ -1073,25 +1123,436 @@ static FeatureWordInfo feature_word_info[FEATURE_WORDS] = { NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, - .cpuid_eax = 6, .cpuid_reg = R_EAX, + .cpuid = { .eax = 6, .reg = R_EAX, }, .tcg_features = TCG_6_EAX_FEATURES, }, - [FEAT_XSAVE_COMP_LO] = { - .cpuid_eax = 0xD, - .cpuid_needs_ecx = true, .cpuid_ecx = 0, - .cpuid_reg = R_EAX, + [FEAT_XSAVE_XCR0_LO] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, .ecx = 0, + .reg = R_EAX, + }, .tcg_features = ~0U, .migratable_flags = XSTATE_FP_MASK | XSTATE_SSE_MASK | XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK, }, - [FEAT_XSAVE_COMP_HI] = { - .cpuid_eax = 0xD, - .cpuid_needs_ecx = true, .cpuid_ecx = 0, - .cpuid_reg = R_EDX, + [FEAT_XSAVE_XCR0_HI] = { + .type = CPUID_FEATURE_WORD, + .cpuid = { + .eax = 0xD, + .needs_ecx = true, .ecx = 0, + .reg = R_EDX, + }, .tcg_features = ~0U, }, + /*Below are MSR exposed features*/ + [FEAT_ARCH_CAPABILITIES] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "rdctl-no", "ibrs-all", "rsba", "skip-l1dfl-vmentry", + "ssb-no", "mds-no", "pschange-mc-no", "tsx-ctrl", + "taa-no", NULL, NULL, NULL, + NULL, "sbdr-ssdp-no", "fbsdp-no", "psdp-no", + NULL, "fb-clear", NULL, NULL, + NULL, NULL, NULL, NULL, + "pbrsb-no", NULL, "gds-no", NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_ARCH_CAPABILITIES, + }, + /* + * FEAT_ARCH_CAPABILITIES only affects a read-only MSR, which + * cannot be read from user mode. Therefore, it has no impact + > on any user-mode operation, and warnings about unsupported + * features do not matter. + */ + .tcg_features = ~0U, + }, + [FEAT_CORE_CAPABILITY] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "split-lock-detect", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_CORE_CAPABILITY, + }, + }, + [FEAT_PERF_CAPABILITIES] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "full-width-write", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_PERF_CAPABILITIES, + }, + }, + + [FEAT_VMX_PROCBASED_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, "vmx-vintr-pending", "vmx-tsc-offset", + NULL, NULL, NULL, "vmx-hlt-exit", + NULL, "vmx-invlpg-exit", "vmx-mwait-exit", "vmx-rdpmc-exit", + "vmx-rdtsc-exit", NULL, NULL, "vmx-cr3-load-noexit", + "vmx-cr3-store-noexit", NULL, NULL, "vmx-cr8-load-exit", + "vmx-cr8-store-exit", "vmx-flexpriority", "vmx-vnmi-pending", "vmx-movdr-exit", + "vmx-io-exit", "vmx-io-bitmap", NULL, "vmx-mtf", + "vmx-msr-bitmap", "vmx-monitor-exit", "vmx-pause-exit", "vmx-secondary-ctls", + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + } + }, + + [FEAT_VMX_SECONDARY_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-apicv-xapic", "vmx-ept", "vmx-desc-exit", "vmx-rdtscp-exit", + "vmx-apicv-x2apic", "vmx-vpid", "vmx-wbinvd-exit", "vmx-unrestricted-guest", + "vmx-apicv-register", "vmx-apicv-vid", "vmx-ple", "vmx-rdrand-exit", + "vmx-invpcid-exit", "vmx-vmfunc", "vmx-shadow-vmcs", "vmx-encls-exit", + "vmx-rdseed-exit", "vmx-pml", NULL, NULL, + "vmx-xsaves", NULL, NULL, NULL, + NULL, "vmx-tsc-scaling", "vmx-enable-user-wait-pause", NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_PROCBASED_CTLS2, + } + }, + + [FEAT_VMX_PINBASED_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-intr-exit", NULL, NULL, "vmx-nmi-exit", + NULL, "vmx-vnmi", "vmx-preemption-timer", "vmx-posted-intr", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_PINBASED_CTLS, + } + }, + + [FEAT_VMX_EXIT_CTLS] = { + .type = MSR_FEATURE_WORD, + /* + * VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE is copied from + * the LM CPUID bit. + */ + .feat_names = { + NULL, NULL, "vmx-exit-nosave-debugctl", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL /* vmx-exit-host-addr-space-size */, NULL, NULL, + "vmx-exit-load-perf-global-ctrl", NULL, NULL, "vmx-exit-ack-intr", + NULL, NULL, "vmx-exit-save-pat", "vmx-exit-load-pat", + "vmx-exit-save-efer", "vmx-exit-load-efer", + "vmx-exit-save-preemption-timer", "vmx-exit-clear-bndcfgs", + NULL, "vmx-exit-clear-rtit-ctl", NULL, NULL, + NULL, "vmx-exit-load-pkrs", NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_EXIT_CTLS, + } + }, + + [FEAT_VMX_ENTRY_CTLS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, "vmx-entry-noload-debugctl", NULL, + NULL, NULL, NULL, NULL, + NULL, "vmx-entry-ia32e-mode", NULL, NULL, + NULL, "vmx-entry-load-perf-global-ctrl", "vmx-entry-load-pat", "vmx-entry-load-efer", + "vmx-entry-load-bndcfgs", NULL, "vmx-entry-load-rtit-ctl", NULL, + NULL, NULL, "vmx-entry-load-pkrs", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_TRUE_ENTRY_CTLS, + } + }, + + [FEAT_VMX_MISC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, "vmx-store-lma", "vmx-activity-hlt", "vmx-activity-shutdown", + "vmx-activity-wait-sipi", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, "vmx-vmwrite-vmexit-fields", "vmx-zero-len-inject", NULL, + }, + .msr = { + .index = MSR_IA32_VMX_MISC, + } + }, + + [FEAT_VMX_EPT_VPID_CAPS] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + "vmx-ept-execonly", NULL, NULL, NULL, + NULL, NULL, "vmx-page-walk-4", "vmx-page-walk-5", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "vmx-ept-2mb", "vmx-ept-1gb", NULL, NULL, + "vmx-invept", "vmx-eptad", "vmx-ept-advanced-exitinfo", NULL, + NULL, "vmx-invept-single-context", "vmx-invept-all-context", NULL, + NULL, NULL, NULL, NULL, + "vmx-invvpid", NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + "vmx-invvpid-single-addr", "vmx-invept-single-context", + "vmx-invvpid-all-context", "vmx-invept-single-context-noglobals", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .msr = { + .index = MSR_IA32_VMX_EPT_VPID_CAP, + } + }, + + [FEAT_VMX_BASIC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + [54] = "vmx-ins-outs", + [55] = "vmx-true-ctls", + [56] = "vmx-any-errcode", + }, + .msr = { + .index = MSR_IA32_VMX_BASIC, + }, + /* Just to be safe - we don't support setting the MSEG version field. */ + .no_autoenable_flags = MSR_VMX_BASIC_DUAL_MONITOR, + }, + + [FEAT_VMX_VMFUNC] = { + .type = MSR_FEATURE_WORD, + .feat_names = { + [0] = "vmx-eptp-switching", + }, + .msr = { + .index = MSR_IA32_VMX_VMFUNC, + } + }, + + [FEAT_14_0_ECX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "intel-pt-lip", + }, + .cpuid = { + .eax = 0x14, + .needs_ecx = true, .ecx = 0, + .reg = R_ECX, + }, + .tcg_features = TCG_14_0_ECX_FEATURES, + }, + + [FEAT_SGX_12_0_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "sgx1", "sgx2", NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, "sgx-edeccssa", + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0x12, + .needs_ecx = true, .ecx = 0, + .reg = R_EAX, + }, + .tcg_features = TCG_SGX_12_0_EAX_FEATURES, + }, + + [FEAT_SGX_12_0_EBX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + "sgx-exinfo" , NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0x12, + .needs_ecx = true, .ecx = 0, + .reg = R_EBX, + }, + .tcg_features = TCG_SGX_12_0_EBX_FEATURES, + }, + + [FEAT_SGX_12_1_EAX] = { + .type = CPUID_FEATURE_WORD, + .feat_names = { + NULL, "sgx-debug", "sgx-mode64", NULL, + "sgx-provisionkey", "sgx-tokenkey", NULL, "sgx-kss", + NULL, NULL, "sgx-aex-notify", NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + }, + .cpuid = { + .eax = 0x12, + .needs_ecx = true, .ecx = 1, + .reg = R_EAX, + }, + .tcg_features = TCG_SGX_12_1_EAX_FEATURES, + }, +}; + +typedef struct FeatureMask { + FeatureWord index; + uint64_t mask; +} FeatureMask; + +typedef struct FeatureDep { + FeatureMask from, to; +} FeatureDep; + +static FeatureDep feature_dependencies[] = { + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_ARCH_CAPABILITIES }, + .to = { FEAT_ARCH_CAPABILITIES, ~0ull }, + }, + { + .from = { FEAT_7_0_EDX, CPUID_7_0_EDX_CORE_CAPABILITY }, + .to = { FEAT_CORE_CAPABILITY, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_PDCM }, + .to = { FEAT_PERF_CAPABILITIES, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_PROCBASED_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_PINBASED_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_EXIT_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_ENTRY_CTLS, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_MISC, ~0ull }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_VMX }, + .to = { FEAT_VMX_BASIC, ~0ull }, + }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_LM }, + .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_IA32E_MODE }, + }, + { + .from = { FEAT_VMX_PROCBASED_CTLS, VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS }, + .to = { FEAT_VMX_SECONDARY_CTLS, ~0ull }, + }, + { + .from = { FEAT_XSAVE, CPUID_XSAVE_XSAVES }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_XSAVES }, + }, + { + .from = { FEAT_1_ECX, CPUID_EXT_RDRAND }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDRAND_EXITING }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INVPCID }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_INVPCID }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX }, + .to = { FEAT_VMX_EXIT_CTLS, VMX_VM_EXIT_CLEAR_BNDCFGS }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_MPX }, + .to = { FEAT_VMX_ENTRY_CTLS, VMX_VM_ENTRY_LOAD_BNDCFGS }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_RDSEED }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDSEED_EXITING }, + }, + { + .from = { FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT }, + .to = { FEAT_14_0_ECX, ~0ull }, + }, + { + .from = { FEAT_8000_0001_EDX, CPUID_EXT2_RDTSCP }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_RDTSCP }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, + .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_EPT }, + .to = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VPID }, + .to = { FEAT_VMX_EPT_VPID_CAPS, 0xffffffffull << 32 }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_VMFUNC }, + .to = { FEAT_VMX_VMFUNC, ~0ull }, + }, + { + .from = { FEAT_8000_0001_ECX, CPUID_EXT3_SVM }, + .to = { FEAT_SVM, ~0ull }, + }, + { + .from = { FEAT_VMX_SECONDARY_CTLS, VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE }, + .to = { FEAT_7_0_ECX, CPUID_7_0_ECX_WAITPKG }, + }, }; typedef struct X86RegisterInfo32 { @@ -1115,65 +1576,67 @@ static const X86RegisterInfo32 x86_reg_info_32[CPU_NB_REGS32] = { }; #undef REGISTER -typedef struct ExtSaveArea { - uint32_t feature, bits; - uint32_t offset, size; -} ExtSaveArea; +/* CPUID feature bits available in XSS */ +#define CPUID_XSTATE_XSS_MASK (XSTATE_ARCH_LBR_MASK) -static const ExtSaveArea x86_ext_save_areas[] = { +ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT] = { [XSTATE_FP_BIT] = { /* x87 FP state component is always enabled if XSAVE is supported */ .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE, - /* x87 state is in the legacy region of the XSAVE area */ - .offset = 0, .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader), }, [XSTATE_SSE_BIT] = { /* SSE state component is always enabled if XSAVE is supported */ .feature = FEAT_1_ECX, .bits = CPUID_EXT_XSAVE, - /* SSE state is in the legacy region of the XSAVE area */ - .offset = 0, .size = sizeof(X86LegacyXSaveArea) + sizeof(X86XSaveHeader), }, [XSTATE_YMM_BIT] = { .feature = FEAT_1_ECX, .bits = CPUID_EXT_AVX, - .offset = offsetof(X86XSaveArea, avx_state), .size = sizeof(XSaveAVX) }, [XSTATE_BNDREGS_BIT] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, - .offset = offsetof(X86XSaveArea, bndreg_state), .size = sizeof(XSaveBNDREG) }, [XSTATE_BNDCSR_BIT] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_MPX, - .offset = offsetof(X86XSaveArea, bndcsr_state), .size = sizeof(XSaveBNDCSR) }, [XSTATE_OPMASK_BIT] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, - .offset = offsetof(X86XSaveArea, opmask_state), .size = sizeof(XSaveOpmask) }, [XSTATE_ZMM_Hi256_BIT] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, - .offset = offsetof(X86XSaveArea, zmm_hi256_state), .size = sizeof(XSaveZMM_Hi256) }, [XSTATE_Hi16_ZMM_BIT] = { .feature = FEAT_7_0_EBX, .bits = CPUID_7_0_EBX_AVX512F, - .offset = offsetof(X86XSaveArea, hi16_zmm_state), .size = sizeof(XSaveHi16_ZMM) }, [XSTATE_PKRU_BIT] = { .feature = FEAT_7_0_ECX, .bits = CPUID_7_0_ECX_PKU, - .offset = offsetof(X86XSaveArea, pkru_state), .size = sizeof(XSavePKRU) }, + [XSTATE_ARCH_LBR_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_ARCH_LBR, + .offset = 0 /*supervisor mode component, offset = 0 */, + .size = sizeof(XSavesArchLBR) }, + [XSTATE_XTILE_CFG_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILECFG), + }, + [XSTATE_XTILE_DATA_BIT] = { + .feature = FEAT_7_0_EDX, .bits = CPUID_7_0_EDX_AMX_TILE, + .size = sizeof(XSaveXTILEDATA) + }, }; -static uint32_t xsave_area_size(uint64_t mask) +uint32_t xsave_area_size(uint64_t mask, bool compacted) { + uint64_t ret = x86_ext_save_areas[0].size; + const ExtSaveArea *esa; + uint32_t offset = 0; int i; - uint64_t ret = 0; - for (i = 0; i < ARRAY_SIZE(x86_ext_save_areas); i++) { - const ExtSaveArea *esa = &x86_ext_save_areas[i]; + for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { + esa = &x86_ext_save_areas[i]; if ((mask >> i) & 1) { - ret = MAX(ret, esa->offset + esa->size); + offset = compacted ? ret : esa->offset; + ret = MAX(ret, offset + esa->size); } } return ret; @@ -1184,13 +1647,14 @@ static inline bool accel_uses_host_cpuid(void) return kvm_enabled() || hvf_enabled(); } -static inline uint64_t x86_cpu_xsave_components(X86CPU *cpu) +static inline uint64_t x86_cpu_xsave_xcr0_components(X86CPU *cpu) { - return ((uint64_t)cpu->env.features[FEAT_XSAVE_COMP_HI]) << 32 | - cpu->env.features[FEAT_XSAVE_COMP_LO]; + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XCR0_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XCR0_LO]; } -const char *get_register_name_32(unsigned int reg) +/* Return name of 32-bit register, from a R_* constant */ +static const char *get_register_name_32(unsigned int reg) { if (reg >= CPU_NB_REGS32) { return NULL; @@ -1198,18 +1662,24 @@ const char *get_register_name_32(unsigned int reg) return x86_reg_info_32[reg].name; } +static inline uint64_t x86_cpu_xsave_xss_components(X86CPU *cpu) +{ + return ((uint64_t)cpu->env.features[FEAT_XSAVE_XSS_HI]) << 32 | + cpu->env.features[FEAT_XSAVE_XSS_LO]; +} + /* * Returns the set of feature flags that are supported and migratable by * QEMU, for a given FeatureWord. */ -static uint32_t x86_cpu_get_migratable_flags(FeatureWord w) +static uint64_t x86_cpu_get_migratable_flags(FeatureWord w) { FeatureWordInfo *wi = &feature_word_info[w]; - uint32_t r = 0; + uint64_t r = 0; int i; - for (i = 0; i < 32; i++) { - uint32_t f = 1U << i; + for (i = 0; i < 64; i++) { + uint64_t f = 1ULL << i; /* If the feature name is known, it is implicitly considered migratable, * unless it is explicitly set in unmigratable_flags */ @@ -1255,25 +1725,6 @@ void host_cpuid(uint32_t function, uint32_t count, *edx = vec[3]; } -void host_vendor_fms(char *vendor, int *family, int *model, int *stepping) -{ - uint32_t eax, ebx, ecx, edx; - - host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(vendor, ebx, edx, ecx); - - host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); - if (family) { - *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); - } - if (model) { - *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); - } - if (stepping) { - *stepping = eax & 0x0F; - } -} - /* CPU class name definitions: */ /* Return type name for a given CPU model name @@ -1286,22 +1737,27 @@ static char *x86_cpu_type_name(const char *model_name) static ObjectClass *x86_cpu_class_by_name(const char *cpu_model) { - ObjectClass *oc; - char *typename = x86_cpu_type_name(cpu_model); - oc = object_class_by_name(typename); - g_free(typename); - return oc; + g_autofree char *typename = x86_cpu_type_name(cpu_model); + return object_class_by_name(typename); } static char *x86_cpu_class_get_model_name(X86CPUClass *cc) { const char *class_name = object_class_get_name(OBJECT_CLASS(cc)); assert(g_str_has_suffix(class_name, X86_CPU_TYPE_SUFFIX)); - return g_strndup(class_name, - strlen(class_name) - strlen(X86_CPU_TYPE_SUFFIX)); + return cpu_model_from_type(class_name); } -struct X86CPUDefinition { +typedef struct X86CPUVersionDefinition { + X86CPUVersion version; + const char *alias; + const char *note; + PropValue *props; + const CPUCaches *const cache_info; +} X86CPUVersionDefinition; + +/* Base definition for a CPU model */ +typedef struct X86CPUDefinition { const char *name; uint32_t level; uint32_t xlevel; @@ -1312,10 +1768,101 @@ struct X86CPUDefinition { int stepping; FeatureWordArray features; const char *model_id; - CPUCaches *cache_info; + const CPUCaches *const cache_info; + /* + * Definitions for alternative versions of CPU model. + * List is terminated by item with version == 0. + * If NULL, version 1 will be registered automatically. + */ + const X86CPUVersionDefinition *versions; + const char *deprecation_note; +} X86CPUDefinition; + +/* Reference to a specific CPU model version */ +struct X86CPUModel { + /* Base CPU definition */ + const X86CPUDefinition *cpudef; + /* CPU model version */ + X86CPUVersion version; + const char *note; + /* + * If true, this is an alias CPU model. + * This matters only for "-cpu help" and query-cpu-definitions + */ + bool is_alias; +}; + +/* Get full model name for CPU version */ +static char *x86_cpu_versioned_model_name(const X86CPUDefinition *cpudef, + X86CPUVersion version) +{ + assert(version > 0); + return g_strdup_printf("%s-v%d", cpudef->name, (int)version); +} + +static const X86CPUVersionDefinition * +x86_cpu_def_get_versions(const X86CPUDefinition *def) +{ + /* When X86CPUDefinition::versions is NULL, we register only v1 */ + static const X86CPUVersionDefinition default_version_list[] = { + { 1 }, + { /* end of list */ } + }; + + return def->versions ?: default_version_list; +} + +static const CPUCaches epyc_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 64 * KiB, + .line_size = 64, + .associativity = 4, + .partitions = 1, + .sets = 256, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 8 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 8192, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + }, }; -static CPUCaches epyc_cache_info = { +static CPUCaches epyc_v4_cache_info = { .l1d_cache = &(CPUCacheInfo) { .type = DATA_CACHE, .level = 1, @@ -1361,18 +1908,296 @@ static CPUCaches epyc_cache_info = { .lines_per_tag = 1, .self_init = true, .inclusive = true, + .complex_indexing = false, + }, +}; + +static const CPUCaches epyc_rome_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = true, + }, +}; + +static const CPUCaches epyc_rome_v3_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 16 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 16384, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + +static const CPUCaches epyc_milan_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, .complex_indexing = true, }, }; -static X86CPUDefinition builtin_x86_defs[] = { +static const CPUCaches epyc_milan_v2_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 512 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 1024, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + +static const CPUCaches epyc_genoa_cache_info = { + .l1d_cache = &(CPUCacheInfo) { + .type = DATA_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l1i_cache = &(CPUCacheInfo) { + .type = INSTRUCTION_CACHE, + .level = 1, + .size = 32 * KiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 64, + .lines_per_tag = 1, + .self_init = 1, + .no_invd_sharing = true, + }, + .l2_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 2, + .size = 1 * MiB, + .line_size = 64, + .associativity = 8, + .partitions = 1, + .sets = 2048, + .lines_per_tag = 1, + }, + .l3_cache = &(CPUCacheInfo) { + .type = UNIFIED_CACHE, + .level = 3, + .size = 32 * MiB, + .line_size = 64, + .associativity = 16, + .partitions = 1, + .sets = 32768, + .lines_per_tag = 1, + .self_init = true, + .inclusive = true, + .complex_indexing = false, + }, +}; + +/* The following VMX features are not supported by KVM and are left out in the + * CPU definitions: + * + * Dual-monitor support (all processors) + * Entry to SMM + * Deactivate dual-monitor treatment + * Number of CR3-target values + * Shutdown activity state + * Wait-for-SIPI activity state + * PAUSE-loop exiting (Westmere and newer) + * EPT-violation #VE (Broadwell and newer) + * Inject event with insn length=0 (Skylake and newer) + * Conceal non-root operation from PT + * Conceal VM exits from PT + * Conceal VM entries from PT + * Enable ENCLS exiting + * Mode-based execute control (XS/XU) + * TSC scaling (Skylake Server and newer) + * GPA translation for PT (IceLake and newer) + * User wait and pause + * ENCLV exiting + * Load IA32_RTIT_CTL + * Clear IA32_RTIT_CTL + * Advanced VM-exit information for EPT violations + * Sub-page write permissions + * PT in VMX operation + */ + +static const X86CPUDefinition builtin_x86_defs[] = { { .name = "qemu64", .level = 0xd, .vendor = CPUID_VENDOR_AMD, - .family = 6, - .model = 6, - .stepping = 3, + .family = 15, + .model = 107, + .stepping = 1, .features[FEAT_1_EDX] = PPRO_FEATURES | CPUID_MTRR | CPUID_CLFLUSH | CPUID_MCA | @@ -1439,6 +2264,24 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, .xlevel = 0x80000008, .model_id = "Intel(R) Core(TM)2 Duo CPU T7700 @ 2.40GHz", }, @@ -1466,6 +2309,20 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_OSVW, CPUID_EXT3_IBS, CPUID_EXT3_SVM */ .features[FEAT_8000_0001_ECX] = 0, + /* VMX features from Cedar Mill/Prescott */ + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING, .xlevel = 0x80000008, .model_id = "Common KVM processor" }, @@ -1497,6 +2354,19 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_SSE3, .features[FEAT_8000_0001_ECX] = 0, + /* VMX features from Yonah */ + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, .xlevel = 0x80000008, .model_id = "Common 32-bit KVM processor" }, @@ -1518,6 +2388,18 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_SSE3 | CPUID_EXT_MONITOR, .features[FEAT_8000_0001_EDX] = CPUID_EXT2_NX, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | VMX_CPU_BASED_USE_MSR_BITMAPS, .xlevel = 0x80000008, .model_id = "Genuine Intel(R) CPU T2600 @ 2.16GHz", }, @@ -1628,6 +2510,24 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES, .xlevel = 0x80000008, .model_id = "Intel Celeron_4x0 (Conroe/Merom Class Core 2)", }, @@ -1651,6 +2551,27 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL, + .features[FEAT_VMX_EXIT_CTLS] = VMX_VM_EXIT_ACK_INTR_ON_EXIT | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING, .xlevel = 0x80000008, .model_id = "Intel Core 2 Duo P9xxx (Penryn Class Core 2)", }, @@ -1674,33 +2595,62 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_LAHF_LM, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID, .xlevel = 0x80000008, .model_id = "Intel Core i7 9xx (Nehalem Class Core i7)", - }, - { - .name = "Nehalem-IBRS", - .level = 11, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 26, - .stepping = 3, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | - CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_SSE3, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, - .xlevel = 0x80000008, - .model_id = "Intel Core i7 9xx (Nehalem Core i7, IBRS update)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Nehalem-IBRS", + .props = (PropValue[]) { + { "spec-ctrl", "on" }, + { "model-id", + "Intel Core i7 9xx (Nehalem Core i7, IBRS update)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { .name = "Westmere", @@ -1725,36 +2675,63 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_LAHF_LM, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, .xlevel = 0x80000008, .model_id = "Westmere E56xx/L56xx/X56xx (Nehalem-C)", - }, - { - .name = "Westmere-IBRS", - .level = 11, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 44, - .stepping = 1, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_SYSCALL | CPUID_EXT2_NX, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Westmere E56xx/L56xx/X56xx (IBRS update)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Westmere-IBRS", + .props = (PropValue[]) { + { "spec-ctrl", "on" }, + { "model-id", + "Westmere E56xx/L56xx/X56xx (IBRS update)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { .name = "SandyBridge", @@ -1784,41 +2761,63 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XSAVEOPT, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST, .xlevel = 0x80000008, .model_id = "Intel Xeon E312xx (Sandy Bridge)", - }, - { - .name = "SandyBridge-IBRS", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 42, - .stepping = 1, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT | - CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | - CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | - CPUID_EXT_SSE3, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Xeon E312xx (Sandy Bridge, IBRS update)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "SandyBridge-IBRS", + .props = (PropValue[]) { + { "spec-ctrl", "on" }, + { "model-id", + "Intel Xeon E312xx (Sandy Bridge, IBRS update)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { .name = "IvyBridge", @@ -1851,118 +2850,66 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XSAVEOPT, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING, .xlevel = 0x80000008, .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge)", - }, - { - .name = "IvyBridge-IBRS", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 58, - .stepping = 9, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_POPCNT | - CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | - CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | - CPUID_EXT_SSE3 | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_ERMS, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)", - }, - { - .name = "Haswell-noTSX", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 60, - .stepping = 1, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Haswell, no TSX)", - }, - { - .name = "Haswell-noTSX-IBRS", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 60, - .stepping = 1, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Haswell, no TSX, IBRS)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "IvyBridge-IBRS", + .props = (PropValue[]) { + { "spec-ctrl", "on" }, + { "model-id", + "Intel Xeon E3-12xx v2 (Ivy Bridge, IBRS)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { .name = "Haswell", @@ -1998,125 +2945,100 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XSAVEOPT, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, .model_id = "Intel Core Processor (Haswell)", - }, - { - .name = "Haswell-IBRS", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 60, - .stepping = 4, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Haswell, IBRS)", - }, - { - .name = "Broadwell-noTSX", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 61, - .stepping = 2, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Broadwell, no TSX)", - }, - { - .name = "Broadwell-noTSX-IBRS", - .level = 0xd, - .vendor = CPUID_VENDOR_INTEL, - .family = 6, - .model = 61, - .stepping = 2, - .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, - .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, - .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, - .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, - .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP, - .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, - .features[FEAT_6_EAX] = - CPUID_6_EAX_ARAT, - .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Broadwell, no TSX, IBRS)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Haswell-noTSX", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { "stepping", "1" }, + { "model-id", "Intel Core Processor (Haswell, no TSX)", }, + { /* end of list */ } + }, + }, + { + .version = 3, + .alias = "Haswell-IBRS", + .props = (PropValue[]) { + /* Restore TSX features removed by -v2 above */ + { "hle", "on" }, + { "rtm", "on" }, + /* + * Haswell and Haswell-IBRS had stepping=4 in + * QEMU 4.0 and older + */ + { "stepping", "4" }, + { "spec-ctrl", "on" }, + { "model-id", + "Intel Core Processor (Haswell, IBRS)" }, + { /* end of list */ } + } + }, + { + .version = 4, + .alias = "Haswell-noTSX-IBRS", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + /* spec-ctrl was already enabled by -v3 above */ + { "stepping", "1" }, + { "model-id", + "Intel Core Processor (Haswell, no TSX, IBRS)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { .name = "Broadwell", @@ -2153,16 +3075,102 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_XSAVE_XSAVEOPT, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, .model_id = "Intel Core Processor (Broadwell)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Broadwell-noTSX", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { "model-id", "Intel Core Processor (Broadwell, no TSX)", }, + { /* end of list */ } + }, + }, + { + .version = 3, + .alias = "Broadwell-IBRS", + .props = (PropValue[]) { + /* Restore TSX features removed by -v2 above */ + { "hle", "on" }, + { "rtm", "on" }, + { "spec-ctrl", "on" }, + { "model-id", + "Intel Core Processor (Broadwell, IBRS)" }, + { /* end of list */ } + } + }, + { + .version = 4, + .alias = "Broadwell-noTSX-IBRS", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + /* spec-ctrl was already enabled by -v3 above */ + { "model-id", + "Intel Core Processor (Broadwell, no TSX, IBRS)" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { - .name = "Broadwell-IBRS", + .name = "Skylake-Client", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 61, - .stepping = 2, + .model = 94, + .stepping = 3, .features[FEAT_1_EDX] = CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | @@ -2181,28 +3189,107 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP, + /* XSAVES is added in version 4 */ .features[FEAT_XSAVE] = - CPUID_XSAVE_XSAVEOPT, + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Broadwell, IBRS)", + .model_id = "Intel Core Processor (Skylake)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Skylake-Client-IBRS", + .props = (PropValue[]) { + { "spec-ctrl", "on" }, + { "model-id", + "Intel Core Processor (Skylake, IBRS)" }, + { /* end of list */ } + } + }, + { + .version = 3, + .alias = "Skylake-Client-noTSX-IBRS", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { "model-id", + "Intel Core Processor (Skylake, IBRS, no TSX)" }, + { /* end of list */ } + } + }, + { + .version = 4, + .note = "IBRS, XSAVES, no TSX", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { - .name = "Skylake-Client", + .name = "Skylake-Server", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 94, - .stepping = 3, + .model = 85, + .stepping = 4, .features[FEAT_1_EDX] = CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | @@ -2217,8 +3304,8 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | + CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, .features[FEAT_7_0_EBX] = @@ -2226,28 +3313,119 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU, + /* XSAVES is added in version 5 */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Skylake)", + .model_id = "Intel Xeon Processor (Skylake)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "Skylake-Server-IBRS", + .props = (PropValue[]) { + /* clflushopt was not added to Skylake-Server-IBRS */ + /* TODO: add -v3 including clflushopt */ + { "clflushopt", "off" }, + { "spec-ctrl", "on" }, + { "model-id", + "Intel Xeon Processor (Skylake, IBRS)" }, + { /* end of list */ } + } + }, + { + .version = 3, + .alias = "Skylake-Server-noTSX-IBRS", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { "model-id", + "Intel Xeon Processor (Skylake, IBRS, no TSX)" }, + { /* end of list */ } + } + }, + { + .version = 4, + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + } + }, + { + .version = 5, + .note = "IBRS, XSAVES, EPT switching, no TSX", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { - .name = "Skylake-Client-IBRS", + .name = "Cascadelake-Server", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 94, - .stepping = 3, + .model = 85, + .stepping = 6, .features[FEAT_1_EDX] = CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | @@ -2262,39 +3440,126 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | + CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512VNNI, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + /* XSAVES is added in version 5 */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Skylake, IBRS)", + .model_id = "Intel Xeon Processor (Cascadelake)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { .version = 2, + .note = "ARCH_CAPABILITIES", + .props = (PropValue[]) { + { "arch-capabilities", "on" }, + { "rdctl-no", "on" }, + { "ibrs-all", "on" }, + { "skip-l1dfl-vmentry", "on" }, + { "mds-no", "on" }, + { /* end of list */ } + }, + }, + { .version = 3, + .alias = "Cascadelake-Server-noTSX", + .note = "ARCH_CAPABILITIES, no TSX", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { /* end of list */ } + }, + }, + { .version = 4, + .note = "ARCH_CAPABILITIES, no TSX", + .props = (PropValue[]) { + { "vmx-eptp-switching", "on" }, + { /* end of list */ } + }, + }, + { .version = 5, + .note = "ARCH_CAPABILITIES, EPT switching, XSAVES, no TSX", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ } + }, + }, + { /* end of list */ } + } }, { - .name = "Skylake-Server", + .name = "Cooperlake", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, .model = 85, - .stepping = 4, + .stepping = 10, .features[FEAT_1_EDX] = CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | @@ -2318,31 +3583,98 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512VNNI, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_STIBP | + CPUID_7_0_EDX_SPEC_CTRL_SSBD | CPUID_7_0_EDX_ARCH_CAPABILITIES, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + /* XSAVES is added in version 2 */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Skylake)", + .model_id = "Intel Xeon Processor (Cooperlake)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { .version = 2, + .note = "XSAVES", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ } + }, + }, + { /* end of list */ } + } }, { - .name = "Skylake-Server-IBRS", + .name = "Icelake-Server", .level = 0xd, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 85, - .stepping = 4, + .model = 134, + .stepping = 0, .features[FEAT_1_EDX] = CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | @@ -2361,145 +3693,662 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, - .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_WBNOINVD, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + /* XSAVES is added in version 5 */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + /* Missing: Mode-based execute control (XS/XU), processor tracing, TSC scaling */ + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS, .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Skylake, IBRS)", + .model_id = "Intel Xeon Processor (Icelake)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .note = "no TSX", + .alias = "Icelake-Server-noTSX", + .props = (PropValue[]) { + { "hle", "off" }, + { "rtm", "off" }, + { /* end of list */ } + }, + }, + { + .version = 3, + .props = (PropValue[]) { + { "arch-capabilities", "on" }, + { "rdctl-no", "on" }, + { "ibrs-all", "on" }, + { "skip-l1dfl-vmentry", "on" }, + { "mds-no", "on" }, + { "pschange-mc-no", "on" }, + { "taa-no", "on" }, + { /* end of list */ } + }, + }, + { + .version = 4, + .props = (PropValue[]) { + { "sha-ni", "on" }, + { "avx512ifma", "on" }, + { "rdpid", "on" }, + { "fsrm", "on" }, + { "vmx-rdseed-exit", "on" }, + { "vmx-pml", "on" }, + { "vmx-eptp-switching", "on" }, + { "model", "106" }, + { /* end of list */ } + }, + }, + { + .version = 5, + .note = "XSAVES", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ } + }, + }, + { + .version = 6, + .note = "5-level EPT", + .props = (PropValue[]) { + { "vmx-page-walk-5", "on" }, + { /* end of list */ } + }, + }, + { /* end of list */ } + } }, { - .name = "Icelake-Client", - .level = 0xd, + .name = "SapphireRapids", + .level = 0x20, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 126, - .stepping = 0, + .model = 143, + .stepping = 4, + /* + * please keep the ascending order so that we can have a clear view of + * bit position of each feature. + */ .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, .features[FEAT_8000_0008_EBX] = CPUID_8000_0008_EBX_WBNOINVD, .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_INTEL_PT, + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | + CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | - CPUID_7_0_ECX_AVX512_VPOPCNTDQ, + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_SPEC_CTRL_SSBD, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 | + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE | + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16 | + CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC, + .features[FEAT_VMX_BASIC] = + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = + MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 | + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | + MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | + VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = + VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | + VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | + VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | + VMX_SECONDARY_EXEC_XSAVES, + .features[FEAT_VMX_VMFUNC] = + MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, - .model_id = "Intel Core Processor (Icelake)", + .model_id = "Intel Xeon Processor (SapphireRapids)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "sbdr-ssdp-no", "on" }, + { "fbsdp-no", "on" }, + { "psdp-no", "on" }, + { /* end of list */ } + } + }, + { /* end of list */ } + } }, { - .name = "Icelake-Server", - .level = 0xd, + .name = "GraniteRapids", + .level = 0x20, .vendor = CPUID_VENDOR_INTEL, .family = 6, - .model = 134, + .model = 173, .stepping = 0, + /* + * please keep the ascending order so that we can have a clear view of + * bit position of each feature. + */ .features[FEAT_1_EDX] = - CPUID_VME | CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | - CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | - CPUID_PGE | CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | - CPUID_MCE | CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | - CPUID_DE | CPUID_FP87, + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, .features[FEAT_1_ECX] = - CPUID_EXT_AVX | CPUID_EXT_XSAVE | CPUID_EXT_AES | - CPUID_EXT_POPCNT | CPUID_EXT_X2APIC | CPUID_EXT_SSE42 | - CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | - CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | - CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_FMA | CPUID_EXT_MOVBE | - CPUID_EXT_PCID | CPUID_EXT_F16C | CPUID_EXT_RDRAND, + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSSE3 | + CPUID_EXT_FMA | CPUID_EXT_CX16 | CPUID_EXT_PCID | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | + CPUID_EXT_XSAVE | CPUID_EXT_AVX | CPUID_EXT_F16C | CPUID_EXT_RDRAND, .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | - CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, .features[FEAT_8000_0001_ECX] = - CPUID_EXT3_ABM | CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + CPUID_EXT3_LAHF_LM | CPUID_EXT3_ABM | CPUID_EXT3_3DNOWPREFETCH, .features[FEAT_8000_0008_EBX] = CPUID_8000_0008_EBX_WBNOINVD, .features[FEAT_7_0_EBX] = - CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | - CPUID_7_0_EBX_HLE | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | - CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | - CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | - CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_HLE | + CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | + CPUID_7_0_EBX_ERMS | CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_AVX512F | CPUID_7_0_EBX_AVX512DQ | - CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512CD | - CPUID_7_0_EBX_AVX512VL | CPUID_7_0_EBX_CLFLUSHOPT | - CPUID_7_0_EBX_INTEL_PT, + CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | + CPUID_7_0_EBX_AVX512IFMA | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, .features[FEAT_7_0_ECX] = - CPUID_7_0_ECX_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | - CPUID_7_0_ECX_OSPKE | CPUID_7_0_ECX_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | - CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57, + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_BUS_LOCK_DETECT, .features[FEAT_7_0_EDX] = - CPUID_7_0_EDX_PCONFIG | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_FSRM | CPUID_7_0_EDX_SERIALIZE | + CPUID_7_0_EDX_TSX_LDTRK | CPUID_7_0_EDX_AMX_BF16 | + CPUID_7_0_EDX_AVX512_FP16 | CPUID_7_0_EDX_AMX_TILE | + CPUID_7_0_EDX_AMX_INT8 | CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_IBRS_ALL | + MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY | MSR_ARCH_CAP_MDS_NO | + MSR_ARCH_CAP_PSCHANGE_MC_NO | MSR_ARCH_CAP_TAA_NO | + MSR_ARCH_CAP_SBDR_SSDP_NO | MSR_ARCH_CAP_FBSDP_NO | + MSR_ARCH_CAP_PSDP_NO | MSR_ARCH_CAP_PBRSB_NO, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES | CPUID_D_1_EAX_XFD, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX_VNNI | CPUID_7_1_EAX_AVX512_BF16 | + CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC | + CPUID_7_1_EAX_AMX_FP16, + .features[FEAT_7_1_EDX] = + CPUID_7_1_EDX_PREFETCHITI, + .features[FEAT_7_2_EDX] = + CPUID_7_2_EDX_MCDT_NO, + .features[FEAT_VMX_BASIC] = + MSR_VMX_BASIC_INS_OUTS | MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_ENTRY_LOAD_IA32_PAT | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = + MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_PAGE_WALK_LENGTH_5 | + MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | MSR_VMX_EPT_1GB | + MSR_VMX_EPT_INVEPT | MSR_VMX_EPT_AD_BITS | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | + MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_IA32_PAT | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_LOAD_IA32_EFER | VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = + VMX_PIN_BASED_EXT_INTR_MASK | VMX_PIN_BASED_NMI_EXITING | + VMX_PIN_BASED_VIRTUAL_NMIS | VMX_PIN_BASED_VMX_PREEMPTION_TIMER | + VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = + VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_VIRTUAL_NMI_PENDING | + VMX_CPU_BASED_MOV_DR_EXITING | VMX_CPU_BASED_UNCOND_IO_EXITING | + VMX_CPU_BASED_USE_IO_BITMAPS | VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_USE_MSR_BITMAPS | VMX_CPU_BASED_MONITOR_EXITING | + VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_ENABLE_EPT | VMX_SECONDARY_EXEC_DESC | + VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_WBINVD_EXITING | + VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | + VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML | + VMX_SECONDARY_EXEC_XSAVES, + .features[FEAT_VMX_VMFUNC] = + MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Xeon Processor (GraniteRapids)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { /* end of list */ }, + }, + }, + { + .name = "Denverton", + .level = 21, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 95, + .stepping = 1, + .features[FEAT_1_EDX] = + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | CPUID_TSC | + CPUID_MSR | CPUID_PAE | CPUID_MCE | CPUID_CX8 | CPUID_APIC | + CPUID_SEP | CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | CPUID_MMX | CPUID_FXSR | + CPUID_SSE | CPUID_SSE2, + .features[FEAT_1_ECX] = + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_MONITOR | + CPUID_EXT_SSSE3 | CPUID_EXT_CX16 | CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | CPUID_EXT_TSC_DEADLINE_TIMER | + CPUID_EXT_AES | CPUID_EXT_XSAVE | CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_SYSCALL | CPUID_EXT2_NX | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_RDTSCP | CPUID_EXT2_LM, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_MPX | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_SMAP | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_SHA_NI, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component, - * and the only one defined in Skylake (processor tracing) - * probably will block migration anyway. - */ + /* XSAVES is added in version 3 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_ARCH_CAPABILITIES] = + MSR_ARCH_CAP_RDCL_NO | MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, + .xlevel = 0x80000008, + .model_id = "Intel Atom Processor (Denverton)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .note = "no MPX, no MONITOR", + .props = (PropValue[]) { + { "monitor", "off" }, + { "mpx", "off" }, + { /* end of list */ }, + }, + }, + { + .version = 3, + .note = "XSAVES, no MPX, no MONITOR", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ }, + }, + }, + { /* end of list */ }, + }, + }, + { + .name = "Snowridge", + .level = 27, + .vendor = CPUID_VENDOR_INTEL, + .family = 6, + .model = 134, + .stepping = 1, + .features[FEAT_1_EDX] = + /* missing: CPUID_PN CPUID_IA64 */ + /* missing: CPUID_DTS, CPUID_HT, CPUID_TM, CPUID_PBE */ + CPUID_FP87 | CPUID_VME | CPUID_DE | CPUID_PSE | + CPUID_TSC | CPUID_MSR | CPUID_PAE | CPUID_MCE | + CPUID_CX8 | CPUID_APIC | CPUID_SEP | + CPUID_MTRR | CPUID_PGE | CPUID_MCA | CPUID_CMOV | + CPUID_PAT | CPUID_PSE36 | CPUID_CLFLUSH | + CPUID_MMX | + CPUID_FXSR | CPUID_SSE | CPUID_SSE2, + .features[FEAT_1_ECX] = + CPUID_EXT_SSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_MONITOR | + CPUID_EXT_SSSE3 | + CPUID_EXT_CX16 | + CPUID_EXT_SSE41 | + CPUID_EXT_SSE42 | CPUID_EXT_X2APIC | CPUID_EXT_MOVBE | + CPUID_EXT_POPCNT | + CPUID_EXT_TSC_DEADLINE_TIMER | CPUID_EXT_AES | CPUID_EXT_XSAVE | + CPUID_EXT_RDRAND, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_SYSCALL | + CPUID_EXT2_NX | + CPUID_EXT2_PDPE1GB | CPUID_EXT2_RDTSCP | + CPUID_EXT2_LM, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_LAHF_LM | + CPUID_EXT3_3DNOWPREFETCH, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | + CPUID_7_0_EBX_SMEP | + CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_MPX | /* missing bits 13, 15 */ + CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_SHA_NI, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_UMIP | + /* missing bit 5 */ + CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_MOVDIRI | CPUID_7_0_ECX_CLDEMOTE | + CPUID_7_0_ECX_MOVDIR64B, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_SPEC_CTRL | + CPUID_7_0_EDX_ARCH_CAPABILITIES | CPUID_7_0_EDX_SPEC_CTRL_SSBD | + CPUID_7_0_EDX_CORE_CAPABILITY, + .features[FEAT_CORE_CAPABILITY] = + MSR_CORE_CAP_SPLIT_LOCK_DETECT, + /* XSAVES is added in version 3 */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_VMX_BASIC] = MSR_VMX_BASIC_INS_OUTS | + MSR_VMX_BASIC_TRUE_CTLS, + .features[FEAT_VMX_ENTRY_CTLS] = VMX_VM_ENTRY_IA32E_MODE | + VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL | VMX_VM_ENTRY_LOAD_IA32_PAT | + VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS | VMX_VM_ENTRY_LOAD_IA32_EFER, + .features[FEAT_VMX_EPT_VPID_CAPS] = MSR_VMX_EPT_EXECONLY | + MSR_VMX_EPT_PAGE_WALK_LENGTH_4 | MSR_VMX_EPT_WB | MSR_VMX_EPT_2MB | + MSR_VMX_EPT_1GB | MSR_VMX_EPT_INVEPT | + MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT | MSR_VMX_EPT_INVEPT_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID | MSR_VMX_EPT_INVVPID_SINGLE_ADDR | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT | MSR_VMX_EPT_INVVPID_ALL_CONTEXT | + MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS | MSR_VMX_EPT_AD_BITS, + .features[FEAT_VMX_EXIT_CTLS] = + VMX_VM_EXIT_ACK_INTR_ON_EXIT | VMX_VM_EXIT_SAVE_DEBUG_CONTROLS | + VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL | + VMX_VM_EXIT_LOAD_IA32_PAT | VMX_VM_EXIT_LOAD_IA32_EFER | + VMX_VM_EXIT_SAVE_IA32_PAT | VMX_VM_EXIT_SAVE_IA32_EFER | + VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER, + .features[FEAT_VMX_MISC] = MSR_VMX_MISC_ACTIVITY_HLT | + MSR_VMX_MISC_STORE_LMA | MSR_VMX_MISC_VMWRITE_VMEXIT, + .features[FEAT_VMX_PINBASED_CTLS] = VMX_PIN_BASED_EXT_INTR_MASK | + VMX_PIN_BASED_NMI_EXITING | VMX_PIN_BASED_VIRTUAL_NMIS | + VMX_PIN_BASED_VMX_PREEMPTION_TIMER | VMX_PIN_BASED_POSTED_INTR, + .features[FEAT_VMX_PROCBASED_CTLS] = VMX_CPU_BASED_VIRTUAL_INTR_PENDING | + VMX_CPU_BASED_USE_TSC_OFFSETING | VMX_CPU_BASED_HLT_EXITING | + VMX_CPU_BASED_INVLPG_EXITING | VMX_CPU_BASED_MWAIT_EXITING | + VMX_CPU_BASED_RDPMC_EXITING | VMX_CPU_BASED_RDTSC_EXITING | + VMX_CPU_BASED_CR8_LOAD_EXITING | VMX_CPU_BASED_CR8_STORE_EXITING | + VMX_CPU_BASED_TPR_SHADOW | VMX_CPU_BASED_MOV_DR_EXITING | + VMX_CPU_BASED_UNCOND_IO_EXITING | VMX_CPU_BASED_USE_IO_BITMAPS | + VMX_CPU_BASED_MONITOR_EXITING | VMX_CPU_BASED_PAUSE_EXITING | + VMX_CPU_BASED_VIRTUAL_NMI_PENDING | VMX_CPU_BASED_USE_MSR_BITMAPS | + VMX_CPU_BASED_CR3_LOAD_EXITING | VMX_CPU_BASED_CR3_STORE_EXITING | + VMX_CPU_BASED_MONITOR_TRAP_FLAG | + VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS, + .features[FEAT_VMX_SECONDARY_CTLS] = + VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES | + VMX_SECONDARY_EXEC_WBINVD_EXITING | VMX_SECONDARY_EXEC_ENABLE_EPT | + VMX_SECONDARY_EXEC_DESC | VMX_SECONDARY_EXEC_RDTSCP | + VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | + VMX_SECONDARY_EXEC_ENABLE_VPID | VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST | + VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT | + VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | + VMX_SECONDARY_EXEC_RDRAND_EXITING | VMX_SECONDARY_EXEC_ENABLE_INVPCID | + VMX_SECONDARY_EXEC_ENABLE_VMFUNC | VMX_SECONDARY_EXEC_SHADOW_VMCS | + VMX_SECONDARY_EXEC_RDSEED_EXITING | VMX_SECONDARY_EXEC_ENABLE_PML, + .features[FEAT_VMX_VMFUNC] = MSR_VMX_VMFUNC_EPT_SWITCHING, .xlevel = 0x80000008, - .model_id = "Intel Xeon Processor (Icelake)", + .model_id = "Intel Atom Processor (SnowRidge)", + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "mpx", "off" }, + { "model-id", "Intel Atom Processor (Snowridge, no MPX)" }, + { /* end of list */ }, + }, + }, + { + .version = 3, + .note = "XSAVES, no MPX", + .props = (PropValue[]) { + { "xsaves", "on" }, + { "vmx-xsaves", "on" }, + { /* end of list */ }, + }, + }, + { + .version = 4, + .note = "no split lock detect, no core-capability", + .props = (PropValue[]) { + { "split-lock-detect", "off" }, + { "core-capability", "off" }, + { /* end of list */ }, + }, + }, + { /* end of list */ }, + }, }, { .name = "KnightsMill", @@ -2578,7 +4427,6 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_DE | CPUID_FP87, .features[FEAT_1_ECX] = CPUID_EXT_CX16 | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, .features[FEAT_8000_0001_ECX] = @@ -2602,9 +4450,9 @@ static X86CPUDefinition builtin_x86_defs[] = { .features[FEAT_1_ECX] = CPUID_EXT_POPCNT | CPUID_EXT_CX16 | CPUID_EXT_MONITOR | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = - CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL, + CPUID_EXT2_LM | CPUID_EXT2_NX | CPUID_EXT2_SYSCALL | + CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, @@ -2629,15 +4477,16 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x8000001A, .model_id = "AMD Opteron 62xx class CPU", @@ -2660,15 +4509,16 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT_AES | CPUID_EXT_POPCNT | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3, - /* Missing: CPUID_EXT2_RDTSCP */ .features[FEAT_8000_0001_EDX] = CPUID_EXT2_LM | CPUID_EXT2_PDPE1GB | CPUID_EXT2_NX | - CPUID_EXT2_SYSCALL, + CPUID_EXT2_SYSCALL | CPUID_EXT2_RDTSCP, .features[FEAT_8000_0001_ECX] = CPUID_EXT3_TBM | CPUID_EXT3_FMA4 | CPUID_EXT3_XOP | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, /* no xsaveopt! */ .xlevel = 0x8000001A, .model_id = "AMD Opteron 63xx class CPU", @@ -2706,26 +4556,117 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_SHA_NI, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component. - */ .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | CPUID_XSAVE_XGETBV1, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x8000001E, .model_id = "AMD EPYC Processor", .cache_info = &epyc_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .alias = "EPYC-IBPB", + .props = (PropValue[]) { + { "ibpb", "on" }, + { "model-id", + "AMD EPYC Processor (with IBPB)" }, + { /* end of list */ } + } + }, + { + .version = 3, + .props = (PropValue[]) { + { "ibpb", "on" }, + { "perfctr-core", "on" }, + { "clzero", "on" }, + { "xsaveerptr", "on" }, + { "xsaves", "on" }, + { "model-id", + "AMD EPYC Processor" }, + { /* end of list */ } + } + }, + { + .version = 4, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-v4 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_v4_cache_info + }, + { /* end of list */ } + } + }, + { + .name = "Dhyana", + .level = 0xd, + .vendor = CPUID_VENDOR_HYGON, + .family = 24, + .model = 0, + .stepping = 1, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | + CPUID_EXT_MONITOR | CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_IBPB, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT, + /* XSAVES is added in version 2 */ + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, + .xlevel = 0x8000001E, + .model_id = "Hygon Dhyana Processor", + .cache_info = &epyc_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { .version = 2, + .note = "XSAVES", + .props = (PropValue[]) { + { "xsaves", "on" }, + { /* end of list */ } + }, + }, + { /* end of list */ } + } }, { - .name = "EPYC-IBPB", + .name = "EPYC-Rome", .level = 0xd, .vendor = CPUID_VENDOR_AMD, .family = 23, - .model = 1, - .stepping = 2, + .model = 49, + .stepping = 0, .features[FEAT_1_EDX] = CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | @@ -2746,114 +4687,248 @@ static X86CPUDefinition builtin_x86_defs[] = { CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | - CPUID_EXT3_TOPOEXT, + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, .features[FEAT_8000_0008_EBX] = - CPUID_8000_0008_EBX_IBPB, + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_STIBP, .features[FEAT_7_0_EBX] = CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | - CPUID_7_0_EBX_SHA_NI, - /* Missing: XSAVES (not supported by some Linux versions, - * including v4.1 to v4.12). - * KVM doesn't yet expose any XSAVES state save component. - */ + CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID, .features[FEAT_XSAVE] = CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | - CPUID_XSAVE_XGETBV1, + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, .features[FEAT_6_EAX] = CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE, .xlevel = 0x8000001E, - .model_id = "AMD EPYC Processor (with IBPB)", - .cache_info = &epyc_cache_info, + .model_id = "AMD EPYC-Rome Processor", + .cache_info = &epyc_rome_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "ibrs", "on" }, + { "amd-ssbd", "on" }, + { /* end of list */ } + } + }, + { + .version = 3, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Rome-v3 Processor" }, + { /* end of list */ } + }, + .cache_info = &epyc_rome_v3_cache_info + }, + { + .version = 4, + .props = (PropValue[]) { + /* Erratum 1386 */ + { "model-id", + "AMD EPYC-Rome-v4 Processor (no XSAVES)" }, + { "xsaves", "off" }, + { /* end of list */ } + }, + }, + { /* end of list */ } + } + }, + { + .name = "EPYC-Milan", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 25, + .model = 1, + .stepping = 1, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_CX16 | CPUID_EXT_FMA | CPUID_EXT_SSSE3 | + CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | CPUID_EXT_SSE3 | + CPUID_EXT_PCID, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_AMD_SSBD, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_RDSEED | + CPUID_7_0_EBX_ADX | CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_CLFLUSHOPT | + CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_RDPID | CPUID_7_0_ECX_PKU, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x8000001E, + .model_id = "AMD EPYC-Milan Processor", + .cache_info = &epyc_milan_cache_info, + .versions = (X86CPUVersionDefinition[]) { + { .version = 1 }, + { + .version = 2, + .props = (PropValue[]) { + { "model-id", + "AMD EPYC-Milan-v2 Processor" }, + { "vaes", "on" }, + { "vpclmulqdq", "on" }, + { "stibp-always-on", "on" }, + { "amd-psfd", "on" }, + { "no-nested-data-bp", "on" }, + { "lfence-always-serializing", "on" }, + { "null-sel-clr-base", "on" }, + { /* end of list */ } + }, + .cache_info = &epyc_milan_v2_cache_info + }, + { /* end of list */ } + } + }, + { + .name = "EPYC-Genoa", + .level = 0xd, + .vendor = CPUID_VENDOR_AMD, + .family = 25, + .model = 17, + .stepping = 0, + .features[FEAT_1_EDX] = + CPUID_SSE2 | CPUID_SSE | CPUID_FXSR | CPUID_MMX | CPUID_CLFLUSH | + CPUID_PSE36 | CPUID_PAT | CPUID_CMOV | CPUID_MCA | CPUID_PGE | + CPUID_MTRR | CPUID_SEP | CPUID_APIC | CPUID_CX8 | CPUID_MCE | + CPUID_PAE | CPUID_MSR | CPUID_TSC | CPUID_PSE | CPUID_DE | + CPUID_VME | CPUID_FP87, + .features[FEAT_1_ECX] = + CPUID_EXT_RDRAND | CPUID_EXT_F16C | CPUID_EXT_AVX | + CPUID_EXT_XSAVE | CPUID_EXT_AES | CPUID_EXT_POPCNT | + CPUID_EXT_MOVBE | CPUID_EXT_SSE42 | CPUID_EXT_SSE41 | + CPUID_EXT_PCID | CPUID_EXT_CX16 | CPUID_EXT_FMA | + CPUID_EXT_SSSE3 | CPUID_EXT_MONITOR | CPUID_EXT_PCLMULQDQ | + CPUID_EXT_SSE3, + .features[FEAT_8000_0001_EDX] = + CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_PDPE1GB | + CPUID_EXT2_FFXSR | CPUID_EXT2_MMXEXT | CPUID_EXT2_NX | + CPUID_EXT2_SYSCALL, + .features[FEAT_8000_0001_ECX] = + CPUID_EXT3_OSVW | CPUID_EXT3_3DNOWPREFETCH | + CPUID_EXT3_MISALIGNSSE | CPUID_EXT3_SSE4A | CPUID_EXT3_ABM | + CPUID_EXT3_CR8LEG | CPUID_EXT3_SVM | CPUID_EXT3_LAHF_LM | + CPUID_EXT3_TOPOEXT | CPUID_EXT3_PERFCORE, + .features[FEAT_8000_0008_EBX] = + CPUID_8000_0008_EBX_CLZERO | CPUID_8000_0008_EBX_XSAVEERPTR | + CPUID_8000_0008_EBX_WBNOINVD | CPUID_8000_0008_EBX_IBPB | + CPUID_8000_0008_EBX_IBRS | CPUID_8000_0008_EBX_STIBP | + CPUID_8000_0008_EBX_STIBP_ALWAYS_ON | + CPUID_8000_0008_EBX_AMD_SSBD | CPUID_8000_0008_EBX_AMD_PSFD, + .features[FEAT_8000_0021_EAX] = + CPUID_8000_0021_EAX_No_NESTED_DATA_BP | + CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING | + CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE | + CPUID_8000_0021_EAX_AUTO_IBRS, + .features[FEAT_7_0_EBX] = + CPUID_7_0_EBX_FSGSBASE | CPUID_7_0_EBX_BMI1 | CPUID_7_0_EBX_AVX2 | + CPUID_7_0_EBX_SMEP | CPUID_7_0_EBX_BMI2 | CPUID_7_0_EBX_ERMS | + CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_AVX512F | + CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_RDSEED | CPUID_7_0_EBX_ADX | + CPUID_7_0_EBX_SMAP | CPUID_7_0_EBX_AVX512IFMA | + CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | + CPUID_7_0_EBX_AVX512CD | CPUID_7_0_EBX_SHA_NI | + CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL, + .features[FEAT_7_0_ECX] = + CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_UMIP | CPUID_7_0_ECX_PKU | + CPUID_7_0_ECX_AVX512_VBMI2 | CPUID_7_0_ECX_GFNI | + CPUID_7_0_ECX_VAES | CPUID_7_0_ECX_VPCLMULQDQ | + CPUID_7_0_ECX_AVX512VNNI | CPUID_7_0_ECX_AVX512BITALG | + CPUID_7_0_ECX_AVX512_VPOPCNTDQ | CPUID_7_0_ECX_LA57 | + CPUID_7_0_ECX_RDPID, + .features[FEAT_7_0_EDX] = + CPUID_7_0_EDX_FSRM, + .features[FEAT_7_1_EAX] = + CPUID_7_1_EAX_AVX512_BF16, + .features[FEAT_XSAVE] = + CPUID_XSAVE_XSAVEOPT | CPUID_XSAVE_XSAVEC | + CPUID_XSAVE_XGETBV1 | CPUID_XSAVE_XSAVES, + .features[FEAT_6_EAX] = + CPUID_6_EAX_ARAT, + .features[FEAT_SVM] = + CPUID_SVM_NPT | CPUID_SVM_NRIPSAVE | CPUID_SVM_VNMI | + CPUID_SVM_SVME_ADDR_CHK, + .xlevel = 0x80000022, + .model_id = "AMD EPYC-Genoa Processor", + .cache_info = &epyc_genoa_cache_info, }, }; -typedef struct PropValue { - const char *prop, *value; -} PropValue; - -/* KVM-specific features that are automatically added/removed - * from all CPU models when KVM is enabled. - */ -static PropValue kvm_default_props[] = { - { "kvmclock", "on" }, - { "kvm-nopiodelay", "on" }, - { "kvm-asyncpf", "on" }, - { "kvm-steal-time", "on" }, - { "kvm-pv-eoi", "on" }, - { "kvmclock-stable-bit", "on" }, - { "x2apic", "on" }, - { "acpi", "off" }, - { "monitor", "off" }, - { "svm", "off" }, - { NULL, NULL }, -}; - -/* TCG-specific defaults that override all CPU models when using TCG +/* + * We resolve CPU model aliases using -v1 when using "-machine + * none", but this is just for compatibility while libvirt isn't + * adapted to resolve CPU model versions before creating VMs. + * See "Runnability guarantee of CPU models" at + * docs/about/deprecated.rst. */ -static PropValue tcg_default_props[] = { - { "vme", "off" }, - { NULL, NULL }, -}; - +X86CPUVersion default_cpu_version = 1; -void x86_cpu_change_kvm_default(const char *prop, const char *value) +void x86_cpu_set_default_version(X86CPUVersion version) { - PropValue *pv; - for (pv = kvm_default_props; pv->prop; pv++) { - if (!strcmp(pv->prop, prop)) { - pv->value = value; - break; - } - } - - /* It is valid to call this function only for properties that - * are already present in the kvm_default_props table. - */ - assert(pv->prop); + /* Translating CPU_VERSION_AUTO to CPU_VERSION_AUTO doesn't make sense */ + assert(version != CPU_VERSION_AUTO); + default_cpu_version = version; } -static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, - bool migratable_only); - -static bool lmce_supported(void) +static X86CPUVersion x86_cpu_model_last_version(const X86CPUModel *model) { - uint64_t mce_cap = 0; - -#ifdef CONFIG_KVM - if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) { - return false; - } -#endif - - return !!(mce_cap & MCG_LMCE_P); + int v = 0; + const X86CPUVersionDefinition *vdef = + x86_cpu_def_get_versions(model->cpudef); + while (vdef->version) { + v = vdef->version; + vdef++; + } + return v; } -#define CPUID_MODEL_ID_SZ 48 - -/** - * cpu_x86_fill_model_id: - * Get CPUID model ID string from host CPU. - * - * @str should have at least CPUID_MODEL_ID_SZ bytes - * - * The function does NOT add a null terminator to the string - * automatically. - */ -static int cpu_x86_fill_model_id(char *str) +/* Return the actual version being used for a specific CPU model */ +static X86CPUVersion x86_cpu_model_resolve_version(const X86CPUModel *model) { - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - int i; - - for (i = 0; i < 3; i++) { - host_cpuid(0x80000002 + i, 0, &eax, &ebx, &ecx, &edx); - memcpy(str + i * 16 + 0, &eax, 4); - memcpy(str + i * 16 + 4, &ebx, 4); - memcpy(str + i * 16 + 8, &ecx, 4); - memcpy(str + i * 16 + 12, &edx, 4); + X86CPUVersion v = model->version; + if (v == CPU_VERSION_AUTO) { + v = default_cpu_version; } - return 0; + if (v == CPU_VERSION_LATEST) { + return x86_cpu_model_last_version(model); + } + return v; } static Property max_x86_cpu_properties[] = { @@ -2862,6 +4937,25 @@ static Property max_x86_cpu_properties[] = { DEFINE_PROP_END_OF_LIST() }; +static void max_x86_cpu_realize(DeviceState *dev, Error **errp) +{ + Object *obj = OBJECT(dev); + + if (!object_property_get_int(obj, "family", &error_abort)) { + if (X86_CPU(obj)->env.features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + object_property_set_int(obj, "family", 15, &error_abort); + object_property_set_int(obj, "model", 107, &error_abort); + object_property_set_int(obj, "stepping", 1, &error_abort); + } else { + object_property_set_int(obj, "family", 6, &error_abort); + object_property_set_int(obj, "model", 6, &error_abort); + object_property_set_int(obj, "stepping", 3, &error_abort); + } + } + + x86_cpu_realizefn(dev, errp); +} + static void max_x86_cpu_class_init(ObjectClass *oc, void *data) { DeviceClass *dc = DEVICE_CLASS(oc); @@ -2872,75 +4966,29 @@ static void max_x86_cpu_class_init(ObjectClass *oc, void *data) xcc->model_description = "Enables all features supported by the accelerator in the current host"; - dc->props = max_x86_cpu_properties; + device_class_set_props(dc, max_x86_cpu_properties); + dc->realize = max_x86_cpu_realize; } -static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp); - static void max_x86_cpu_initfn(Object *obj) { X86CPU *cpu = X86_CPU(obj); - CPUX86State *env = &cpu->env; - KVMState *s = kvm_state; /* We can't fill the features array here because we don't know yet if * "migratable" is true or false. */ cpu->max_features = true; + object_property_set_bool(OBJECT(cpu), "pmu", true, &error_abort); - if (accel_uses_host_cpuid()) { - char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; - char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; - int family, model, stepping; - X86CPUDefinition host_cpudef = { }; - uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; - - host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(host_cpudef.vendor, ebx, edx, ecx); - - host_vendor_fms(vendor, &family, &model, &stepping); - - cpu_x86_fill_model_id(model_id); - - object_property_set_str(OBJECT(cpu), vendor, "vendor", &error_abort); - object_property_set_int(OBJECT(cpu), family, "family", &error_abort); - object_property_set_int(OBJECT(cpu), model, "model", &error_abort); - object_property_set_int(OBJECT(cpu), stepping, "stepping", - &error_abort); - object_property_set_str(OBJECT(cpu), model_id, "model-id", - &error_abort); - - if (kvm_enabled()) { - env->cpuid_min_level = - kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); - env->cpuid_min_xlevel = - kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX); - env->cpuid_min_xlevel2 = - kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); - } else { - env->cpuid_min_level = - hvf_get_supported_cpuid(0x0, 0, R_EAX); - env->cpuid_min_xlevel = - hvf_get_supported_cpuid(0x80000000, 0, R_EAX); - env->cpuid_min_xlevel2 = - hvf_get_supported_cpuid(0xC0000000, 0, R_EAX); - } - - if (lmce_supported()) { - object_property_set_bool(OBJECT(cpu), true, "lmce", &error_abort); - } - } else { - object_property_set_str(OBJECT(cpu), CPUID_VENDOR_AMD, - "vendor", &error_abort); - object_property_set_int(OBJECT(cpu), 6, "family", &error_abort); - object_property_set_int(OBJECT(cpu), 6, "model", &error_abort); - object_property_set_int(OBJECT(cpu), 3, "stepping", &error_abort); - object_property_set_str(OBJECT(cpu), - "QEMU TCG CPU version " QEMU_HW_VERSION, - "model-id", &error_abort); - } - - object_property_set_bool(OBJECT(cpu), true, "pmu", &error_abort); + /* + * these defaults are used for TCG and all other accelerators + * besides KVM and HVF, which overwrite these values + */ + object_property_set_str(OBJECT(cpu), "vendor", CPUID_VENDOR_AMD, + &error_abort); + object_property_set_str(OBJECT(cpu), "model-id", + "QEMU TCG CPU version " QEMU_HW_VERSION, + &error_abort); } static const TypeInfo max_x86_cpu_type_info = { @@ -2950,44 +4998,61 @@ static const TypeInfo max_x86_cpu_type_info = { .class_init = max_x86_cpu_class_init, }; -#if defined(CONFIG_KVM) || defined(CONFIG_HVF) -static void host_x86_cpu_class_init(ObjectClass *oc, void *data) +static char *feature_word_description(FeatureWordInfo *f, uint32_t bit) { - X86CPUClass *xcc = X86_CPU_CLASS(oc); + assert(f->type == CPUID_FEATURE_WORD || f->type == MSR_FEATURE_WORD); - xcc->host_cpuid_required = true; - xcc->ordering = 8; + switch (f->type) { + case CPUID_FEATURE_WORD: + { + const char *reg = get_register_name_32(f->cpuid.reg); + assert(reg); + return g_strdup_printf("CPUID.%02XH:%s", + f->cpuid.eax, reg); + } + case MSR_FEATURE_WORD: + return g_strdup_printf("MSR(%02XH)", + f->msr.index); + } -#if defined(CONFIG_KVM) - xcc->model_description = - "KVM processor with all supported host features "; -#elif defined(CONFIG_HVF) - xcc->model_description = - "HVF processor with all supported host features "; -#endif + return NULL; } -static const TypeInfo host_x86_cpu_type_info = { - .name = X86_CPU_TYPE_NAME("host"), - .parent = X86_CPU_TYPE_NAME("max"), - .class_init = host_x86_cpu_class_init, -}; +static bool x86_cpu_have_filtered_features(X86CPU *cpu) +{ + FeatureWord w; -#endif + for (w = 0; w < FEATURE_WORDS; w++) { + if (cpu->filtered_features[w]) { + return true; + } + } + + return false; +} -static void report_unavailable_features(FeatureWord w, uint32_t mask) +static void mark_unavailable_features(X86CPU *cpu, FeatureWord w, uint64_t mask, + const char *verbose_prefix) { + CPUX86State *env = &cpu->env; FeatureWordInfo *f = &feature_word_info[w]; int i; - for (i = 0; i < 32; ++i) { - if ((1UL << i) & mask) { - const char *reg = get_register_name_32(f->cpuid_reg); - assert(reg); - warn_report("%s doesn't support requested feature: " - "CPUID.%02XH:%s%s%s [bit %d]", - accel_uses_host_cpuid() ? "host" : "TCG", - f->cpuid_eax, reg, + if (!cpu->force_features) { + env->features[w] &= ~mask; + } + cpu->filtered_features[w] |= mask; + + if (!verbose_prefix) { + return; + } + + for (i = 0; i < 64; ++i) { + if ((1ULL << i) & mask) { + g_autofree char *feat_word_str = feature_word_description(f, i); + warn_report("%s: %s%s%s [bit %d]", + verbose_prefix, + feat_word_str, f->feat_names[i] ? "." : "", f->feat_names[i] ? f->feat_names[i] : "", i); } @@ -3017,12 +5082,9 @@ static void x86_cpuid_version_set_family(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; const int64_t min = 0; const int64_t max = 0xff + 0xf; - Error *local_err = NULL; int64_t value; - visit_type_int(v, name, &value, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!visit_type_int(v, name, &value, errp)) { return; } if (value < min || value > max) { @@ -3060,12 +5122,9 @@ static void x86_cpuid_version_set_model(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; const int64_t min = 0; const int64_t max = 0xff; - Error *local_err = NULL; int64_t value; - visit_type_int(v, name, &value, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!visit_type_int(v, name, &value, errp)) { return; } if (value < min || value > max) { @@ -3098,12 +5157,9 @@ static void x86_cpuid_version_set_stepping(Object *obj, Visitor *v, CPUX86State *env = &cpu->env; const int64_t min = 0; const int64_t max = 0xf; - Error *local_err = NULL; int64_t value; - visit_type_int(v, name, &value, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!visit_type_int(v, name, &value, errp)) { return; } if (value < min || value > max) { @@ -3136,7 +5192,8 @@ static void x86_cpuid_set_vendor(Object *obj, const char *value, int i; if (strlen(value) != CPUID_VENDOR_SZ) { - error_setg(errp, QERR_PROPERTY_VALUE_BAD, "", "vendor", value); + error_setg(errp, "value of property 'vendor' must consist of" + " exactly " stringify(CPUID_VENDOR_SZ) " characters"); return; } @@ -3203,12 +5260,9 @@ static void x86_cpuid_set_tsc_freq(Object *obj, Visitor *v, const char *name, X86CPU *cpu = X86_CPU(obj); const int64_t min = 0; const int64_t max = INT64_MAX; - Error *local_err = NULL; int64_t value; - visit_type_int(v, name, &value, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!visit_type_int(v, name, &value, errp)) { return; } if (value < min || value > max) { @@ -3225,7 +5279,7 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, const char *name, void *opaque, Error **errp) { - uint32_t *array = (uint32_t *)opaque; + uint64_t *array = (uint64_t *)opaque; FeatureWord w; X86CPUFeatureWordInfo word_infos[FEATURE_WORDS] = { }; X86CPUFeatureWordInfoList list_entries[FEATURE_WORDS] = { }; @@ -3233,11 +5287,18 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, for (w = 0; w < FEATURE_WORDS; w++) { FeatureWordInfo *wi = &feature_word_info[w]; + /* + * We didn't have MSR features when "feature-words" was + * introduced. Therefore skipped other type entries. + */ + if (wi->type != CPUID_FEATURE_WORD) { + continue; + } X86CPUFeatureWordInfo *qwi = &word_infos[w]; - qwi->cpuid_input_eax = wi->cpuid_eax; - qwi->has_cpuid_input_ecx = wi->cpuid_needs_ecx; - qwi->cpuid_input_ecx = wi->cpuid_ecx; - qwi->cpuid_register = x86_reg_info_32[wi->cpuid_reg].qapi_enum; + qwi->cpuid_input_eax = wi->cpuid.eax; + qwi->has_cpuid_input_ecx = wi->cpuid.needs_ecx; + qwi->cpuid_input_ecx = wi->cpuid.ecx; + qwi->cpuid_register = x86_reg_info_32[wi->cpuid.reg].qapi_enum; qwi->features = array[w]; /* List will be in reverse order, but order shouldn't matter */ @@ -3249,46 +5310,6 @@ static void x86_cpu_get_feature_words(Object *obj, Visitor *v, visit_type_X86CPUFeatureWordInfoList(v, "feature-words", &list, errp); } -static void x86_get_hv_spinlocks(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - X86CPU *cpu = X86_CPU(obj); - int64_t value = cpu->hyperv_spinlock_attempts; - - visit_type_int(v, name, &value, errp); -} - -static void x86_set_hv_spinlocks(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - const int64_t min = 0xFFF; - const int64_t max = UINT_MAX; - X86CPU *cpu = X86_CPU(obj); - Error *err = NULL; - int64_t value; - - visit_type_int(v, name, &value, &err); - if (err) { - error_propagate(errp, err); - return; - } - - if (value < min || value > max) { - error_setg(errp, "Property %s.%s doesn't take value %" PRId64 - " (minimum: %" PRId64 ", maximum: %" PRId64 ")", - object_get_typename(obj), name ? name : "null", - value, min, max); - return; - } - cpu->hyperv_spinlock_attempts = value; -} - -static const PropertyInfo qdev_prop_spinlocks = { - .name = "int", - .get = x86_get_hv_spinlocks, - .set = x86_set_hv_spinlocks, -}; - /* Convert all '_' in a feature string option name to '-', to make feature * name conform to QOM property naming rule, which uses '-' instead of '_'. */ @@ -3302,11 +5323,12 @@ static inline void feat2prop(char *s) /* Return the feature property name for a feature flag bit */ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) { + const char *name; /* XSAVE components are automatically enabled by other features, * so return the original feature name instead */ - if (w == FEAT_XSAVE_COMP_LO || w == FEAT_XSAVE_COMP_HI) { - int comp = (w == FEAT_XSAVE_COMP_HI) ? bitnr + 32 : bitnr; + if (w == FEAT_XSAVE_XCR0_LO || w == FEAT_XSAVE_XCR0_HI) { + int comp = (w == FEAT_XSAVE_XCR0_HI) ? bitnr + 32 : bitnr; if (comp < ARRAY_SIZE(x86_ext_save_areas) && x86_ext_save_areas[comp].bits) { @@ -3315,12 +5337,14 @@ static const char *x86_cpu_feature_name(FeatureWord w, int bitnr) } } - assert(bitnr < 32); + assert(bitnr < 64); assert(w < FEATURE_WORDS); - return feature_word_info[w].feat_names[bitnr]; + name = feature_word_info[w].feat_names[bitnr]; + assert(bitnr < 32 || !(name && feature_word_info[w].type == CPUID_FEATURE_WORD)); + return name; } -/* Compatibily hack to maintain legacy +-feat semantic, +/* Compatibility hack to maintain legacy +-feat semantic, * where +-feat overwrites any feature set by * feat=on|feat even if the later is parsed after +-feat * (i.e. "-x2apic,x2apic=on" will result in x2apic disabled) @@ -3413,7 +5437,6 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, prop->driver = typename; prop->property = g_strdup(name); prop->value = g_strdup(val); - prop->errp = &error_fatal; qdev_prop_register_global(prop); } @@ -3423,62 +5446,40 @@ static void x86_cpu_parse_featurestr(const char *typename, char *features, } } -static void x86_cpu_expand_features(X86CPU *cpu, Error **errp); -static int x86_cpu_filter_features(X86CPU *cpu); +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose); -/* Check for missing features that may prevent the CPU class from - * running using the current machine and accelerator. - */ -static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, - strList **missing_feats) +/* Build a list with the name of all features on a feature word array */ +static void x86_cpu_list_feature_names(FeatureWordArray features, + strList **list) { - X86CPU *xc; + strList **tail = list; FeatureWord w; - Error *err = NULL; - strList **next = missing_feats; - - if (xcc->host_cpuid_required && !accel_uses_host_cpuid()) { - strList *new = g_new0(strList, 1); - new->value = g_strdup("kvm"); - *missing_feats = new; - return; - } - - xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); - - x86_cpu_expand_features(xc, &err); - if (err) { - /* Errors at x86_cpu_expand_features should never happen, - * but in case it does, just report the model as not - * runnable at all using the "type" property. - */ - strList *new = g_new0(strList, 1); - new->value = g_strdup("type"); - *next = new; - next = &new->next; - } - - x86_cpu_filter_features(xc); for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t filtered = xc->filtered_features[w]; + uint64_t filtered = features[w]; int i; - for (i = 0; i < 32; i++) { - if (filtered & (1UL << i)) { - strList *new = g_new0(strList, 1); - new->value = g_strdup(x86_cpu_feature_name(w, i)); - *next = new; - next = &new->next; + for (i = 0; i < 64; i++) { + if (filtered & (1ULL << i)) { + QAPI_LIST_APPEND(tail, g_strdup(x86_cpu_feature_name(w, i))); } } } +} - object_unref(OBJECT(xc)); +static void x86_cpu_get_unavailable_features(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + X86CPU *xc = X86_CPU(obj); + strList *result = NULL; + + x86_cpu_list_feature_names(xc->filtered_features, &result); + visit_type_strList(v, "unavailable-features", &result, errp); } /* Print all cpuid feature names in featureset */ -static void listflags(FILE *f, fprintf_function print, GList *features) +static void listflags(GList *features) { size_t len = 0; GList *tmp; @@ -3486,13 +5487,13 @@ static void listflags(FILE *f, fprintf_function print, GList *features) for (tmp = features; tmp; tmp = tmp->next) { const char *name = tmp->data; if ((len + strlen(name) + 1) >= 75) { - print(f, "\n"); + qemu_printf("\n"); len = 0; } - print(f, "%s%s", len == 0 ? " " : " ", name); + qemu_printf("%s%s", len == 0 ? " " : " ", name); len += strlen(name) + 1; } - print(f, "\n"); + qemu_printf("\n"); } /* Sort alphabetically by type name, respecting X86CPUClass::ordering. */ @@ -3502,17 +5503,14 @@ static gint x86_cpu_list_compare(gconstpointer a, gconstpointer b) ObjectClass *class_b = (ObjectClass *)b; X86CPUClass *cc_a = X86_CPU_CLASS(class_a); X86CPUClass *cc_b = X86_CPU_CLASS(class_b); - char *name_a, *name_b; int ret; if (cc_a->ordering != cc_b->ordering) { ret = cc_a->ordering - cc_b->ordering; } else { - name_a = x86_cpu_class_get_model_name(cc_a); - name_b = x86_cpu_class_get_model_name(cc_b); + g_autofree char *name_a = x86_cpu_class_get_model_name(cc_a); + g_autofree char *name_b = x86_cpu_class_get_model_name(cc_b); ret = strcmp(name_a, name_b); - g_free(name_a); - g_free(name_b); } return ret; } @@ -3524,42 +5522,75 @@ static GSList *get_sorted_cpu_model_list(void) return list; } +static char *x86_cpu_class_get_model_id(X86CPUClass *xc) +{ + Object *obj = object_new_with_class(OBJECT_CLASS(xc)); + char *r = object_property_get_str(obj, "model-id", &error_abort); + object_unref(obj); + return r; +} + +static char *x86_cpu_class_get_alias_of(X86CPUClass *cc) +{ + X86CPUVersion version; + + if (!cc->model || !cc->model->is_alias) { + return NULL; + } + version = x86_cpu_model_resolve_version(cc->model); + if (version <= 0) { + return NULL; + } + return x86_cpu_versioned_model_name(cc->model->cpudef, version); +} + static void x86_cpu_list_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; X86CPUClass *cc = X86_CPU_CLASS(oc); - CPUListState *s = user_data; - char *name = x86_cpu_class_get_model_name(cc); - const char *desc = cc->model_description; - if (!desc && cc->cpu_def) { - desc = cc->cpu_def->model_id; + g_autofree char *name = x86_cpu_class_get_model_name(cc); + g_autofree char *desc = g_strdup(cc->model_description); + g_autofree char *alias_of = x86_cpu_class_get_alias_of(cc); + g_autofree char *model_id = x86_cpu_class_get_model_id(cc); + + if (!desc && alias_of) { + if (cc->model && cc->model->version == CPU_VERSION_AUTO) { + desc = g_strdup("(alias configured by machine type)"); + } else { + desc = g_strdup_printf("(alias of %s)", alias_of); + } + } + if (!desc && cc->model && cc->model->note) { + desc = g_strdup_printf("%s [%s]", model_id, cc->model->note); + } + if (!desc) { + desc = g_strdup_printf("%s", model_id); + } + + if (cc->model && cc->model->cpudef->deprecation_note) { + g_autofree char *olddesc = desc; + desc = g_strdup_printf("%s (deprecated)", olddesc); } - (*s->cpu_fprintf)(s->file, "x86 %-20s %-48s\n", - name, desc); - g_free(name); + qemu_printf("x86 %-20s %s\n", name, desc); } /* list available CPU models and flags */ -void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf) +void x86_cpu_list(void) { int i, j; - CPUListState s = { - .file = f, - .cpu_fprintf = cpu_fprintf, - }; GSList *list; GList *names = NULL; - (*cpu_fprintf)(f, "Available CPUs:\n"); + qemu_printf("Available CPUs:\n"); list = get_sorted_cpu_model_list(); - g_slist_foreach(list, x86_cpu_list_entry, &s); + g_slist_foreach(list, x86_cpu_list_entry, NULL); g_slist_free(list); names = NULL; for (i = 0; i < ARRAY_SIZE(feature_word_info); i++) { FeatureWordInfo *fw = &feature_word_info[i]; - for (j = 0; j < 32; j++) { + for (j = 0; j < 64; j++) { if (fw->feat_names[j]) { names = g_list_append(names, (gpointer)fw->feat_names[j]); } @@ -3568,18 +5599,53 @@ void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf) names = g_list_sort(names, (GCompareFunc)strcmp); - (*cpu_fprintf)(f, "\nRecognized CPUID flags:\n"); - listflags(f, cpu_fprintf, names); - (*cpu_fprintf)(f, "\n"); + qemu_printf("\nRecognized CPUID flags:\n"); + listflags(names); + qemu_printf("\n"); g_list_free(names); } +#ifndef CONFIG_USER_ONLY + +/* Check for missing features that may prevent the CPU class from + * running using the current machine and accelerator. + */ +static void x86_cpu_class_check_missing_features(X86CPUClass *xcc, + strList **list) +{ + strList **tail = list; + X86CPU *xc; + Error *err = NULL; + + if (xcc->host_cpuid_required && !accel_uses_host_cpuid()) { + QAPI_LIST_APPEND(tail, g_strdup("kvm")); + return; + } + + xc = X86_CPU(object_new_with_class(OBJECT_CLASS(xcc))); + + x86_cpu_expand_features(xc, &err); + if (err) { + /* Errors at x86_cpu_expand_features should never happen, + * but in case it does, just report the model as not + * runnable at all using the "type" property. + */ + QAPI_LIST_APPEND(tail, g_strdup("type")); + error_free(err); + } + + x86_cpu_filter_features(xc, false); + + x86_cpu_list_feature_names(xc->filtered_features, tail); + + object_unref(OBJECT(xc)); +} + static void x86_cpu_definition_entry(gpointer data, gpointer user_data) { ObjectClass *oc = data; X86CPUClass *cc = X86_CPU_CLASS(oc); CpuDefinitionInfoList **cpu_list = user_data; - CpuDefinitionInfoList *entry; CpuDefinitionInfo *info; info = g_malloc0(sizeof(*info)); @@ -3590,14 +5656,23 @@ static void x86_cpu_definition_entry(gpointer data, gpointer user_data) info->migration_safe = cc->migration_safe; info->has_migration_safe = true; info->q_static = cc->static_model; + if (cc->model && cc->model->cpudef->deprecation_note) { + info->deprecated = true; + } else { + info->deprecated = false; + } + /* + * Old machine types won't report aliases, so that alias translation + * doesn't break compatibility with previous QEMU versions. + */ + if (default_cpu_version != CPU_VERSION_LEGACY) { + info->alias_of = x86_cpu_class_get_alias_of(cc); + } - entry = g_malloc0(sizeof(*entry)); - entry->value = info; - entry->next = *cpu_list; - *cpu_list = entry; + QAPI_LIST_PREPEND(*cpu_list, info); } -CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp) +CpuDefinitionInfoList *qmp_query_cpu_definitions(Error **errp) { CpuDefinitionInfoList *cpu_list = NULL; GSList *list = get_sorted_cpu_model_list(); @@ -3606,345 +5681,287 @@ CpuDefinitionInfoList *arch_query_cpu_definitions(Error **errp) return cpu_list; } -static uint32_t x86_cpu_get_supported_feature_word(FeatureWord w, - bool migratable_only) +#endif /* !CONFIG_USER_ONLY */ + +uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only) { FeatureWordInfo *wi = &feature_word_info[w]; - uint32_t r; + uint64_t r = 0; if (kvm_enabled()) { - r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid_eax, - wi->cpuid_ecx, - wi->cpuid_reg); + switch (wi->type) { + case CPUID_FEATURE_WORD: + r = kvm_arch_get_supported_cpuid(kvm_state, wi->cpuid.eax, + wi->cpuid.ecx, + wi->cpuid.reg); + break; + case MSR_FEATURE_WORD: + r = kvm_arch_get_supported_msr_feature(kvm_state, + wi->msr.index); + break; + } } else if (hvf_enabled()) { - r = hvf_get_supported_cpuid(wi->cpuid_eax, - wi->cpuid_ecx, - wi->cpuid_reg); + if (wi->type != CPUID_FEATURE_WORD) { + return 0; + } + r = hvf_get_supported_cpuid(wi->cpuid.eax, + wi->cpuid.ecx, + wi->cpuid.reg); } else if (tcg_enabled()) { r = wi->tcg_features; } else { return ~0; } +#ifndef TARGET_X86_64 + if (w == FEAT_8000_0001_EDX) { + /* + * 32-bit TCG can emulate 64-bit compatibility mode. If there is no + * way for userspace to get out of its 32-bit jail, we can leave + * the LM bit set. + */ + uint32_t unavail = tcg_enabled() + ? CPUID_EXT2_LM & ~CPUID_EXT2_KERNEL_FEATURES + : CPUID_EXT2_LM; + r &= ~unavail; + } +#endif if (migratable_only) { r &= x86_cpu_get_migratable_flags(w); } return r; } -static void x86_cpu_report_filtered_features(X86CPU *cpu) +static void x86_cpu_get_supported_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) { - FeatureWord w; + if (kvm_enabled()) { + *eax = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EAX); + *ebx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EBX); + *ecx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_ECX); + *edx = kvm_arch_get_supported_cpuid(kvm_state, func, index, R_EDX); + } else if (hvf_enabled()) { + *eax = hvf_get_supported_cpuid(func, index, R_EAX); + *ebx = hvf_get_supported_cpuid(func, index, R_EBX); + *ecx = hvf_get_supported_cpuid(func, index, R_ECX); + *edx = hvf_get_supported_cpuid(func, index, R_EDX); + } else { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } +} - for (w = 0; w < FEATURE_WORDS; w++) { - report_unavailable_features(w, cpu->filtered_features[w]); +static void x86_cpu_get_cache_cpuid(uint32_t func, uint32_t index, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + uint32_t level, unused; + + /* Only return valid host leaves. */ + switch (func) { + case 2: + case 4: + host_cpuid(0, 0, &level, &unused, &unused, &unused); + break; + case 0x80000005: + case 0x80000006: + case 0x8000001d: + host_cpuid(0x80000000, 0, &level, &unused, &unused, &unused); + break; + default: + return; + } + + if (func > level) { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } else { + host_cpuid(func, index, eax, ebx, ecx, edx); } } -static void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) +/* + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + */ +void x86_cpu_apply_props(X86CPU *cpu, PropValue *props) { PropValue *pv; for (pv = props; pv->prop; pv++) { if (!pv->value) { continue; } - object_property_parse(OBJECT(cpu), pv->value, pv->prop, + object_property_parse(OBJECT(cpu), pv->prop, pv->value, &error_abort); } } -/* Load data from X86CPUDefinition into a X86CPU object +/* + * Apply properties for the CPU model version specified in model. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. */ -static void x86_cpu_load_def(X86CPU *cpu, X86CPUDefinition *def, Error **errp) -{ - CPUX86State *env = &cpu->env; - const char *vendor; - char host_vendor[CPUID_VENDOR_SZ + 1]; - FeatureWord w; - - /*NOTE: any property set by this function should be returned by - * x86_cpu_static_props(), so static expansion of - * query-cpu-model-expansion is always complete. - */ - /* CPU models only set _minimum_ values for level/xlevel: */ - object_property_set_uint(OBJECT(cpu), def->level, "min-level", errp); - object_property_set_uint(OBJECT(cpu), def->xlevel, "min-xlevel", errp); +static void x86_cpu_apply_version_props(X86CPU *cpu, X86CPUModel *model) +{ + const X86CPUVersionDefinition *vdef; + X86CPUVersion version = x86_cpu_model_resolve_version(model); - object_property_set_int(OBJECT(cpu), def->family, "family", errp); - object_property_set_int(OBJECT(cpu), def->model, "model", errp); - object_property_set_int(OBJECT(cpu), def->stepping, "stepping", errp); - object_property_set_str(OBJECT(cpu), def->model_id, "model-id", errp); - for (w = 0; w < FEATURE_WORDS; w++) { - env->features[w] = def->features[w]; + if (version == CPU_VERSION_LEGACY) { + return; } - /* legacy-cache defaults to 'off' if CPU model provides cache info */ - cpu->legacy_cache = !def->cache_info; + for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { + PropValue *p; - /* Special cases not set in the X86CPUDefinition structs: */ - /* TODO: in-kernel irqchip for hvf */ - if (kvm_enabled()) { - if (!kvm_irqchip_in_kernel()) { - x86_cpu_change_kvm_default("x2apic", "off"); + for (p = vdef->props; p && p->prop; p++) { + object_property_parse(OBJECT(cpu), p->prop, p->value, + &error_abort); } - x86_cpu_apply_props(cpu, kvm_default_props); - } else if (tcg_enabled()) { - x86_cpu_apply_props(cpu, tcg_default_props); - } - - env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; - - /* sysenter isn't supported in compatibility mode on AMD, - * syscall isn't supported in compatibility mode on Intel. - * Normally we advertise the actual CPU vendor, but you can - * override this using the 'vendor' property if you want to use - * KVM's sysenter/syscall emulation in compatibility mode and - * when doing cross vendor migration - */ - vendor = def->vendor; - if (accel_uses_host_cpuid()) { - uint32_t ebx = 0, ecx = 0, edx = 0; - host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); - x86_cpu_vendor_words2str(host_vendor, ebx, edx, ecx); - vendor = host_vendor; - } - - object_property_set_str(OBJECT(cpu), vendor, "vendor", errp); - -} - -/* Return a QDict containing keys for all properties that can be included - * in static expansion of CPU models. All properties set by x86_cpu_load_def() - * must be included in the dictionary. - */ -static QDict *x86_cpu_static_props(void) -{ - FeatureWord w; - int i; - static const char *props[] = { - "min-level", - "min-xlevel", - "family", - "model", - "stepping", - "model-id", - "vendor", - "lmce", - NULL, - }; - static QDict *d; - - if (d) { - return d; - } - - d = qdict_new(); - for (i = 0; props[i]; i++) { - qdict_put_null(d, props[i]); - } - - for (w = 0; w < FEATURE_WORDS; w++) { - FeatureWordInfo *fi = &feature_word_info[w]; - int bit; - for (bit = 0; bit < 32; bit++) { - if (!fi->feat_names[bit]) { - continue; - } - qdict_put_null(d, fi->feat_names[bit]); + if (vdef->version == version) { + break; } } - return d; + /* + * If we reached the end of the list, version number was invalid + */ + assert(vdef->version == version); } -/* Add an entry to @props dict, with the value for property. */ -static void x86_cpu_expand_prop(X86CPU *cpu, QDict *props, const char *prop) +static const CPUCaches *x86_cpu_get_versioned_cache_info(X86CPU *cpu, + X86CPUModel *model) { - QObject *value = object_property_get_qobject(OBJECT(cpu), prop, - &error_abort); + const X86CPUVersionDefinition *vdef; + X86CPUVersion version = x86_cpu_model_resolve_version(model); + const CPUCaches *cache_info = model->cpudef->cache_info; - qdict_put_obj(props, prop, value); -} - -/* Convert CPU model data from X86CPU object to a property dictionary - * that can recreate exactly the same CPU model. - */ -static void x86_cpu_to_dict(X86CPU *cpu, QDict *props) -{ - QDict *sprops = x86_cpu_static_props(); - const QDictEntry *e; - - for (e = qdict_first(sprops); e; e = qdict_next(sprops, e)) { - const char *prop = qdict_entry_key(e); - x86_cpu_expand_prop(cpu, props, prop); + if (version == CPU_VERSION_LEGACY) { + return cache_info; } -} - -/* Convert CPU model data from X86CPU object to a property dictionary - * that can recreate exactly the same CPU model, including every - * writeable QOM property. - */ -static void x86_cpu_to_dict_full(X86CPU *cpu, QDict *props) -{ - ObjectPropertyIterator iter; - ObjectProperty *prop; - object_property_iter_init(&iter, OBJECT(cpu)); - while ((prop = object_property_iter_next(&iter))) { - /* skip read-only or write-only properties */ - if (!prop->get || !prop->set) { - continue; + for (vdef = x86_cpu_def_get_versions(model->cpudef); vdef->version; vdef++) { + if (vdef->cache_info) { + cache_info = vdef->cache_info; } - /* "hotplugged" is the only property that is configurable - * on the command-line but will be set differently on CPUs - * created using "-cpu ... -smp ..." and by CPUs created - * on the fly by x86_cpu_from_model() for querying. Skip it. - */ - if (!strcmp(prop->name, "hotplugged")) { - continue; - } - x86_cpu_expand_prop(cpu, props, prop->name); - } -} - -static void object_apply_props(Object *obj, QDict *props, Error **errp) -{ - const QDictEntry *prop; - Error *err = NULL; - - for (prop = qdict_first(props); prop; prop = qdict_next(props, prop)) { - object_property_set_qobject(obj, qdict_entry_value(prop), - qdict_entry_key(prop), &err); - if (err) { + if (vdef->version == version) { break; } } - error_propagate(errp, err); + assert(vdef->version == version); + return cache_info; } -/* Create X86CPU object according to model+props specification */ -static X86CPU *x86_cpu_from_model(const char *model, QDict *props, Error **errp) +/* + * Load data from X86CPUDefinition into a X86CPU object. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + */ +static void x86_cpu_load_model(X86CPU *cpu, X86CPUModel *model) { - X86CPU *xc = NULL; - X86CPUClass *xcc; - Error *err = NULL; - - xcc = X86_CPU_CLASS(cpu_class_by_name(TYPE_X86_CPU, model)); - if (xcc == NULL) { - error_setg(&err, "CPU model '%s' not found", model); - goto out; - } - - xc = X86_CPU(object_new(object_class_get_name(OBJECT_CLASS(xcc)))); - if (props) { - object_apply_props(OBJECT(xc), props, &err); - if (err) { - goto out; - } - } + const X86CPUDefinition *def = model->cpudef; + CPUX86State *env = &cpu->env; + FeatureWord w; - x86_cpu_expand_features(xc, &err); - if (err) { - goto out; - } + /*NOTE: any property set by this function should be returned by + * x86_cpu_static_props(), so static expansion of + * query-cpu-model-expansion is always complete. + */ -out: - if (err) { - error_propagate(errp, err); - object_unref(OBJECT(xc)); - xc = NULL; + /* CPU models only set _minimum_ values for level/xlevel: */ + object_property_set_uint(OBJECT(cpu), "min-level", def->level, + &error_abort); + object_property_set_uint(OBJECT(cpu), "min-xlevel", def->xlevel, + &error_abort); + + object_property_set_int(OBJECT(cpu), "family", def->family, &error_abort); + object_property_set_int(OBJECT(cpu), "model", def->model, &error_abort); + object_property_set_int(OBJECT(cpu), "stepping", def->stepping, + &error_abort); + object_property_set_str(OBJECT(cpu), "model-id", def->model_id, + &error_abort); + for (w = 0; w < FEATURE_WORDS; w++) { + env->features[w] = def->features[w]; } - return xc; -} -CpuModelExpansionInfo * -arch_query_cpu_model_expansion(CpuModelExpansionType type, - CpuModelInfo *model, - Error **errp) -{ - X86CPU *xc = NULL; - Error *err = NULL; - CpuModelExpansionInfo *ret = g_new0(CpuModelExpansionInfo, 1); - QDict *props = NULL; - const char *base_name; - - xc = x86_cpu_from_model(model->name, - model->has_props ? - qobject_to(QDict, model->props) : - NULL, &err); - if (err) { - goto out; - } + /* legacy-cache defaults to 'off' if CPU model provides cache info */ + cpu->legacy_cache = !x86_cpu_get_versioned_cache_info(cpu, model); - props = qdict_new(); - ret->model = g_new0(CpuModelInfo, 1); - ret->model->props = QOBJECT(props); - ret->model->has_props = true; + env->features[FEAT_1_ECX] |= CPUID_EXT_HYPERVISOR; - switch (type) { - case CPU_MODEL_EXPANSION_TYPE_STATIC: - /* Static expansion will be based on "base" only */ - base_name = "base"; - x86_cpu_to_dict(xc, props); - break; - case CPU_MODEL_EXPANSION_TYPE_FULL: - /* As we don't return every single property, full expansion needs - * to keep the original model name+props, and add extra - * properties on top of that. - */ - base_name = model->name; - x86_cpu_to_dict_full(xc, props); - break; - default: - error_setg(&err, "Unsupportted expansion type"); - goto out; - } + /* sysenter isn't supported in compatibility mode on AMD, + * syscall isn't supported in compatibility mode on Intel. + * Normally we advertise the actual CPU vendor, but you can + * override this using the 'vendor' property if you want to use + * KVM's sysenter/syscall emulation in compatibility mode and + * when doing cross vendor migration + */ - x86_cpu_to_dict(xc, props); + /* + * vendor property is set here but then overloaded with the + * host cpu vendor for KVM and HVF. + */ + object_property_set_str(OBJECT(cpu), "vendor", def->vendor, &error_abort); - ret->model->name = g_strdup(base_name); + x86_cpu_apply_version_props(cpu, model); -out: - object_unref(OBJECT(xc)); - if (err) { - error_propagate(errp, err); - qapi_free_CpuModelExpansionInfo(ret); - ret = NULL; - } - return ret; + /* + * Properties in versioned CPU model are not user specified features. + * We can simply clear env->user_features here since it will be filled later + * in x86_cpu_expand_features() based on plus_features and minus_features. + */ + memset(&env->user_features, 0, sizeof(env->user_features)); } -static gchar *x86_gdb_arch_name(CPUState *cs) +static const gchar *x86_gdb_arch_name(CPUState *cs) { #ifdef TARGET_X86_64 - return g_strdup("i386:x86-64"); + return "i386:x86-64"; #else - return g_strdup("i386"); + return "i386"; #endif } static void x86_cpu_cpudef_class_init(ObjectClass *oc, void *data) { - X86CPUDefinition *cpudef = data; + X86CPUModel *model = data; X86CPUClass *xcc = X86_CPU_CLASS(oc); + CPUClass *cc = CPU_CLASS(oc); - xcc->cpu_def = cpudef; + xcc->model = model; xcc->migration_safe = true; + cc->deprecation_note = model->cpudef->deprecation_note; } -static void x86_register_cpudef_type(X86CPUDefinition *def) +static void x86_register_cpu_model_type(const char *name, X86CPUModel *model) { - char *typename = x86_cpu_type_name(def->name); + g_autofree char *typename = x86_cpu_type_name(name); TypeInfo ti = { .name = typename, .parent = TYPE_X86_CPU, .class_init = x86_cpu_cpudef_class_init, - .class_data = def, + .class_data = model, }; + type_register(&ti); +} + + +/* + * register builtin_x86_defs; + * "max", "base" and subclasses ("host") are not registered here. + * See x86_cpu_register_types for all model registrations. + */ +static void x86_register_cpudef_types(const X86CPUDefinition *def) +{ + X86CPUModel *m; + const X86CPUVersionDefinition *vdef; + /* AMD aliases are handled at runtime based on CPUID vendor, so * they shouldn't be set on the CPU model table. */ @@ -3952,29 +5969,59 @@ static void x86_register_cpudef_type(X86CPUDefinition *def) /* catch mistakes instead of silently truncating model_id when too long */ assert(def->model_id && strlen(def->model_id) <= 48); + /* Unversioned model: */ + m = g_new0(X86CPUModel, 1); + m->cpudef = def; + m->version = CPU_VERSION_AUTO; + m->is_alias = true; + x86_register_cpu_model_type(def->name, m); + + /* Versioned models: */ + + for (vdef = x86_cpu_def_get_versions(def); vdef->version; vdef++) { + g_autofree char *name = + x86_cpu_versioned_model_name(def, vdef->version); + + m = g_new0(X86CPUModel, 1); + m->cpudef = def; + m->version = vdef->version; + m->note = vdef->note; + x86_register_cpu_model_type(name, m); + + if (vdef->alias) { + X86CPUModel *am = g_new0(X86CPUModel, 1); + am->cpudef = def; + am->version = vdef->version; + am->is_alias = true; + x86_register_cpu_model_type(vdef->alias, am); + } + } - type_register(&ti); - g_free(typename); } -#if !defined(CONFIG_USER_ONLY) - -void cpu_clear_apic_feature(CPUX86State *env) +uint32_t cpu_x86_virtual_addr_width(CPUX86State *env) { - env->features[FEAT_1_EDX] &= ~CPUID_APIC; + if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) { + return 57; /* 57 bits virtual */ + } else { + return 48; /* 48 bits virtual */ + } } -#endif /* !CONFIG_USER_ONLY */ - void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { - X86CPU *cpu = x86_env_get_cpu(env); - CPUState *cs = CPU(cpu); - uint32_t pkg_offset; + X86CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); + uint32_t die_offset; uint32_t limit; uint32_t signature[3]; + X86CPUTopoInfo topo_info; + + topo_info.dies_per_pkg = env->nr_dies; + topo_info.cores_per_die = cs->nr_cores / env->nr_dies; + topo_info.threads_per_core = cs->nr_threads; /* Calculate & apply limits for different index ranges */ if (index >= 0xC0000000) { @@ -4015,11 +6062,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ebx |= (cs->nr_cores * cs->nr_threads) << 16; *edx |= CPUID_HT; } + if (!cpu->enable_pmu) { + *ecx &= ~CPUID_EXT_PDCM; + } break; case 2: /* cache info: needed for Pentium Pro compatibility */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); + break; + } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; break; } *eax = 1; /* Number of CPUID[EAX=2] calls required */ @@ -4036,12 +6089,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 4: /* cache info: needed for Core compatibility */ if (cpu->cache_info_passthrough) { - host_cpuid(index, count, eax, ebx, ecx, edx); - /* QEMU gives out its own APIC IDs, never pass down bits 31..26. */ - *eax &= ~0xFC000000; - if ((*eax & 31) && cs->nr_cores > 1) { - *eax |= (cs->nr_cores - 1) << 26; + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); + /* + * QEMU has its own number of cores/logical cpus, + * set 24..14, 31..26 bit to configured values + */ + if (*eax & 31) { + int host_vcpus_per_cache = 1 + ((*eax & 0x3FFC000) >> 14); + int vcpus_per_socket = cs->nr_cores * cs->nr_threads; + if (cs->nr_cores > 1) { + *eax &= ~0xFC000000; + *eax |= (pow2ceil(cs->nr_cores) - 1) << 26; + } + if (host_vcpus_per_cache > vcpus_per_socket) { + *eax &= ~0x3FFC000; + *eax |= (pow2ceil(vcpus_per_socket) - 1) << 14; + } } + } else if (cpu->vendor_cpuid_only && IS_AMD_CPU(env)) { + *eax = *ebx = *ecx = *edx = 0; } else { *eax = 0; switch (count) { @@ -4061,10 +6127,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - pkg_offset = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); + die_offset = apicid_die_offset(&topo_info); if (cpu->enable_l3_cache) { encode_cache_cpuid4(env->cache_info_cpuid4.l3_cache, - (1 << pkg_offset), cs->nr_cores, + (1 << die_offset), cs->nr_cores, eax, ebx, ecx, edx); break; } @@ -4092,13 +6158,43 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 7: /* Structured Extended Feature Flags Enumeration Leaf */ if (count == 0) { - *eax = 0; /* Maximum ECX value for sub-leaves */ + uint32_t eax_0_unused, ebx_0, ecx_0, edx_0_unused; + + /* Maximum ECX value for sub-leaves */ + *eax = env->cpuid_level_func7; *ebx = env->features[FEAT_7_0_EBX]; /* Feature flags */ *ecx = env->features[FEAT_7_0_ECX]; /* Feature flags */ if ((*ecx & CPUID_7_0_ECX_PKU) && env->cr[4] & CR4_PKE_MASK) { *ecx |= CPUID_7_0_ECX_OSPKE; } *edx = env->features[FEAT_7_0_EDX]; /* Feature flags */ + + /* + * SGX cannot be emulated in software. If hardware does not + * support enabling SGX and/or SGX flexible launch control, + * then we need to update the VM's CPUID values accordingly. + */ + x86_cpu_get_supported_cpuid(0x7, 0, + &eax_0_unused, &ebx_0, + &ecx_0, &edx_0_unused); + if ((*ebx & CPUID_7_0_EBX_SGX) && !(ebx_0 & CPUID_7_0_EBX_SGX)) { + *ebx &= ~CPUID_7_0_EBX_SGX; + } + + if ((*ecx & CPUID_7_0_ECX_SGX_LC) + && (!(*ebx & CPUID_7_0_EBX_SGX) || !(ecx_0 & CPUID_7_0_ECX_SGX_LC))) { + *ecx &= ~CPUID_7_0_ECX_SGX_LC; + } + } else if (count == 1) { + *eax = env->features[FEAT_7_1_EAX]; + *edx = env->features[FEAT_7_1_EDX]; + *ebx = 0; + *ecx = 0; + } else if (count == 2) { + *edx = env->features[FEAT_7_2_EDX]; + *eax = 0; + *ebx = 0; + *ecx = 0; } else { *eax = 0; *ebx = 0; @@ -4115,18 +6211,8 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0xA: /* Architectural Performance Monitoring Leaf */ - if (kvm_enabled() && cpu->enable_pmu) { - KVMState *s = cs->kvm_state; - - *eax = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EAX); - *ebx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EBX); - *ecx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_ECX); - *edx = kvm_arch_get_supported_cpuid(s, 0xA, count, R_EDX); - } else if (hvf_enabled() && cpu->enable_pmu) { - *eax = hvf_get_supported_cpuid(0xA, count, R_EAX); - *ebx = hvf_get_supported_cpuid(0xA, count, R_EBX); - *ecx = hvf_get_supported_cpuid(0xA, count, R_ECX); - *edx = hvf_get_supported_cpuid(0xA, count, R_EDX); + if (cpu->enable_pmu) { + x86_cpu_get_supported_cpuid(0xA, count, eax, ebx, ecx, edx); } else { *eax = 0; *ebx = 0; @@ -4146,12 +6232,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, switch (count) { case 0: - *eax = apicid_core_offset(cs->nr_cores, cs->nr_threads); + *eax = apicid_core_offset(&topo_info); *ebx = cs->nr_threads; *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; break; case 1: - *eax = apicid_pkg_offset(cs->nr_cores, cs->nr_threads); + *eax = apicid_pkg_offset(&topo_info); *ebx = cs->nr_cores * cs->nr_threads; *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; break; @@ -4164,6 +6250,45 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, assert(!(*eax & ~0x1f)); *ebx &= 0xffff; /* The count doesn't need to be reliable. */ break; + case 0x1C: + if (cpu->enable_pmu && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0x1C, 0, eax, ebx, ecx, edx); + *edx = 0; + } + break; + case 0x1F: + /* V2 Extended Topology Enumeration Leaf */ + if (env->nr_dies < 2) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + *ecx = count & 0xff; + *edx = cpu->apic_id; + switch (count) { + case 0: + *eax = apicid_core_offset(&topo_info); + *ebx = cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_SMT; + break; + case 1: + *eax = apicid_die_offset(&topo_info); + *ebx = topo_info.cores_per_die * topo_info.threads_per_core; + *ecx |= CPUID_TOPOLOGY_LEVEL_CORE; + break; + case 2: + *eax = apicid_pkg_offset(&topo_info); + *ebx = cs->nr_cores * cs->nr_threads; + *ecx |= CPUID_TOPOLOGY_LEVEL_DIE; + break; + default: + *eax = 0; + *ebx = 0; + *ecx |= CPUID_TOPOLOGY_LEVEL_INVALID; + } + assert(!(*eax & ~0x1f)); + *ebx &= 0xffff; /* The count doesn't need to be reliable. */ + break; case 0xD: { /* Processor Extended State */ *eax = 0; @@ -4175,21 +6300,107 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } if (count == 0) { - *ecx = xsave_area_size(x86_cpu_xsave_components(cpu)); - *eax = env->features[FEAT_XSAVE_COMP_LO]; - *edx = env->features[FEAT_XSAVE_COMP_HI]; - *ebx = *ecx; + *ecx = xsave_area_size(x86_cpu_xsave_xcr0_components(cpu), false); + *eax = env->features[FEAT_XSAVE_XCR0_LO]; + *edx = env->features[FEAT_XSAVE_XCR0_HI]; + /* + * The initial value of xcr0 and ebx == 0, On host without kvm + * commit 412a3c41(e.g., CentOS 6), the ebx's value always == 0 + * even through guest update xcr0, this will crash some legacy guest + * (e.g., CentOS 6), So set ebx == ecx to workaround it. + */ + *ebx = kvm_enabled() ? *ecx : xsave_area_size(env->xcr0, false); } else if (count == 1) { + uint64_t xstate = x86_cpu_xsave_xcr0_components(cpu) | + x86_cpu_xsave_xss_components(cpu); + *eax = env->features[FEAT_XSAVE]; + *ebx = xsave_area_size(xstate, true); + *ecx = env->features[FEAT_XSAVE_XSS_LO]; + *edx = env->features[FEAT_XSAVE_XSS_HI]; + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR) && + (*eax & CPUID_XSAVE_XSAVES)) { + *ecx |= XSTATE_ARCH_LBR_MASK; + } else { + *ecx &= ~XSTATE_ARCH_LBR_MASK; + } + } else if (count == 0xf && cpu->enable_pmu + && (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + x86_cpu_get_supported_cpuid(0xD, count, eax, ebx, ecx, edx); } else if (count < ARRAY_SIZE(x86_ext_save_areas)) { - if ((x86_cpu_xsave_components(cpu) >> count) & 1) { - const ExtSaveArea *esa = &x86_ext_save_areas[count]; + const ExtSaveArea *esa = &x86_ext_save_areas[count]; + + if (x86_cpu_xsave_xcr0_components(cpu) & (1ULL << count)) { *eax = esa->size; *ebx = esa->offset; + *ecx = esa->ecx & + (ESA_FEATURE_ALIGN64_MASK | ESA_FEATURE_XFD_MASK); + } else if (x86_cpu_xsave_xss_components(cpu) & (1ULL << count)) { + *eax = esa->size; + *ebx = 0; + *ecx = 1; } } break; } + case 0x12: +#ifndef CONFIG_USER_ONLY + if (!kvm_enabled() || + !(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + + /* + * SGX sub-leafs CPUID.0x12.{0x2..N} enumerate EPC sections. Retrieve + * the EPC properties, e.g. confidentiality and integrity, from the + * host's first EPC section, i.e. assume there is one EPC section or + * that all EPC sections have the same security properties. + */ + if (count > 1) { + uint64_t epc_addr, epc_size; + + if (sgx_epc_get_section(count - 2, &epc_addr, &epc_size)) { + *eax = *ebx = *ecx = *edx = 0; + break; + } + host_cpuid(index, 2, eax, ebx, ecx, edx); + *eax = (uint32_t)(epc_addr & 0xfffff000) | 0x1; + *ebx = (uint32_t)(epc_addr >> 32); + *ecx = (uint32_t)(epc_size & 0xfffff000) | (*ecx & 0xf); + *edx = (uint32_t)(epc_size >> 32); + break; + } + + /* + * SGX sub-leafs CPUID.0x12.{0x0,0x1} are heavily dependent on hardware + * and KVM, i.e. QEMU cannot emulate features to override what KVM + * supports. Features can be further restricted by userspace, but not + * made more permissive. + */ + x86_cpu_get_supported_cpuid(0x12, count, eax, ebx, ecx, edx); + + if (count == 0) { + *eax &= env->features[FEAT_SGX_12_0_EAX]; + *ebx &= env->features[FEAT_SGX_12_0_EBX]; + } else { + *eax &= env->features[FEAT_SGX_12_1_EAX]; + *ebx &= 0; /* ebx reserve */ + *ecx &= env->features[FEAT_XSAVE_XCR0_LO]; + *edx &= env->features[FEAT_XSAVE_XCR0_HI]; + + /* FP and SSE are always allowed regardless of XSAVE/XCR0. */ + *ecx |= XSTATE_FP_MASK | XSTATE_SSE_MASK; + + /* Access to PROVISIONKEY requires additional credentials. */ + if ((*eax & (1U << 4)) && + !kvm_enable_sgx_provisioning(cs->kvm_state)) { + *eax &= ~(1U << 4); + } + } +#endif + break; case 0x14: { /* Intel Processor Trace Enumeration */ *eax = 0; @@ -4201,16 +6412,60 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; } + /* + * If these are changed, they should stay in sync with + * x86_cpu_filter_features(). + */ if (count == 0) { *eax = INTEL_PT_MAX_SUBLEAF; *ebx = INTEL_PT_MINIMAL_EBX; *ecx = INTEL_PT_MINIMAL_ECX; + if (env->features[FEAT_14_0_ECX] & CPUID_14_0_ECX_LIP) { + *ecx |= CPUID_14_0_ECX_LIP; + } } else if (count == 1) { *eax = INTEL_PT_MTC_BITMAP | INTEL_PT_ADDR_RANGES_NUM; *ebx = INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP; } break; } + case 0x1D: { + /* AMX TILE, for now hardcoded for Sapphire Rapids*/ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *eax = INTEL_AMX_TILE_MAX_SUBLEAF; + } else if (count == 1) { + *eax = INTEL_AMX_TOTAL_TILE_BYTES | + (INTEL_AMX_BYTES_PER_TILE << 16); + *ebx = INTEL_AMX_BYTES_PER_ROW | (INTEL_AMX_TILE_MAX_NAMES << 16); + *ecx = INTEL_AMX_TILE_MAX_ROWS; + } + break; + } + case 0x1E: { + /* AMX TMUL, for now hardcoded for Sapphire Rapids */ + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + if (!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE)) { + break; + } + + if (count == 0) { + /* Highest numbered palette subleaf */ + *ebx = INTEL_AMX_TMUL_MAX_K | (INTEL_AMX_TMUL_MAX_N << 8); + } + break; + } case 0x40000000: /* * CPUID code in kvm_arch_init_vcpu() ignores stuff @@ -4258,6 +6513,10 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *ecx |= 1 << 1; /* CmpLegacy bit */ } } + if (tcg_enabled() && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && + !(env->hflags & HF_LMA_MASK)) { + *edx &= ~CPUID_EXT2_SYSCALL; + } break; case 0x80000002: case 0x80000003: @@ -4270,12 +6529,12 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000005: /* cache info (L1 cache) */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | \ + *eax = (L1_DTLB_2M_ASSOC << 24) | (L1_DTLB_2M_ENTRIES << 16) | (L1_ITLB_2M_ASSOC << 8) | (L1_ITLB_2M_ENTRIES); - *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | \ + *ebx = (L1_DTLB_4K_ASSOC << 24) | (L1_DTLB_4K_ENTRIES << 16) | (L1_ITLB_4K_ASSOC << 8) | (L1_ITLB_4K_ENTRIES); *ecx = encode_cache_cpuid80000005(env->cache_info_amd.l1d_cache); *edx = encode_cache_cpuid80000005(env->cache_info_amd.l1i_cache); @@ -4283,16 +6542,16 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, case 0x80000006: /* cache info (L2 cache) */ if (cpu->cache_info_passthrough) { - host_cpuid(index, 0, eax, ebx, ecx, edx); + x86_cpu_get_cache_cpuid(index, 0, eax, ebx, ecx, edx); break; } - *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | \ - (L2_DTLB_2M_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | \ + *eax = (AMD_ENC_ASSOC(L2_DTLB_2M_ASSOC) << 28) | + (L2_DTLB_2M_ENTRIES << 16) | + (AMD_ENC_ASSOC(L2_ITLB_2M_ASSOC) << 12) | (L2_ITLB_2M_ENTRIES); - *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | \ - (L2_DTLB_4K_ENTRIES << 16) | \ - (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | \ + *ebx = (AMD_ENC_ASSOC(L2_DTLB_4K_ASSOC) << 28) | + (L2_DTLB_4K_ENTRIES << 16) | + (AMD_ENC_ASSOC(L2_ITLB_4K_ASSOC) << 12) | (L2_ITLB_4K_ENTRIES); encode_cache_cpuid80000006(env->cache_info_amd.l2_cache, cpu->enable_l3_cache ? @@ -4307,23 +6566,25 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x80000008: /* virtual & phys address size in low 2 bytes. */ + *eax = cpu->phys_bits; if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { /* 64 bit processor */ - *eax = cpu->phys_bits; /* configurable physical bits */ - if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57) { - *eax |= 0x00003900; /* 57 bits virtual */ - } else { - *eax |= 0x00003000; /* 48 bits virtual */ - } - } else { - *eax = cpu->phys_bits; + *eax |= (cpu_x86_virtual_addr_width(env) << 8); } *ebx = env->features[FEAT_8000_0008_EBX]; - *ecx = 0; - *edx = 0; if (cs->nr_cores * cs->nr_threads > 1) { - *ecx |= (cs->nr_cores * cs->nr_threads) - 1; + /* + * Bits 15:12 is "The number of bits in the initial + * Core::X86::Apic::ApicId[ApicId] value that indicate + * thread ID within a package". + * Bits 7:0 is "The number of threads in the package is NC+1" + */ + *ecx = (apicid_pkg_offset(&topo_info) << 12) | + ((cs->nr_cores * cs->nr_threads) - 1); + } else { + *ecx = 0; } + *edx = 0; break; case 0x8000000A: if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { @@ -4340,22 +6601,26 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, break; case 0x8000001D: *eax = 0; + if (cpu->cache_info_passthrough) { + x86_cpu_get_cache_cpuid(index, count, eax, ebx, ecx, edx); + break; + } switch (count) { case 0: /* L1 dcache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, cs, - eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l1d_cache, + &topo_info, eax, ebx, ecx, edx); break; case 1: /* L1 icache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, cs, - eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l1i_cache, + &topo_info, eax, ebx, ecx, edx); break; case 2: /* L2 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, cs, - eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l2_cache, + &topo_info, eax, ebx, ecx, edx); break; case 3: /* L3 cache info */ - encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, cs, - eax, ebx, ecx, edx); + encode_cache_cpuid8000001d(env->cache_info_amd.l3_cache, + &topo_info, eax, ebx, ecx, edx); break; default: /* end of info */ *eax = *ebx = *ecx = *edx = 0; @@ -4363,9 +6628,14 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } break; case 0x8000001E: - assert(cpu->core_id <= 255); - encode_topo_cpuid8000001e(cs, cpu, - eax, ebx, ecx, edx); + if (cpu->core_id <= 255) { + encode_topo_cpuid8000001e(cpu, &topo_info, eax, ebx, ecx, edx); + } else { + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + } break; case 0xC0000000: *eax = env->cpuid_xlevel2; @@ -4390,11 +6660,17 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, *edx = 0; break; case 0x8000001F: - *eax = sev_enabled() ? 0x2 : 0; - *ebx = sev_get_cbit_position(); - *ebx |= sev_get_reduced_phys_bits() << 6; - *ecx = 0; - *edx = 0; + *eax = *ebx = *ecx = *edx = 0; + if (sev_enabled()) { + *eax = 0x2; + *eax |= sev_es_enabled() ? 0x8 : 0; + *ebx = sev_get_cbit_position() & 0x3f; /* EBX[5:0] */ + *ebx |= (sev_get_reduced_phys_bits() & 0x3f) << 6; /* EBX[11:6] */ + } + break; + case 0x80000021: + *eax = env->features[FEAT_8000_0021_EAX]; + *ebx = *ecx = *edx = 0; break; default: /* reserved values: zero */ @@ -4406,25 +6682,40 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, } } -/* CPUClass::reset() */ -static void x86_cpu_reset(CPUState *s) +static void x86_cpu_set_sgxlepubkeyhash(CPUX86State *env) { - X86CPU *cpu = X86_CPU(s); - X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); +#ifndef CONFIG_USER_ONLY + /* Those default values are defined in Skylake HW */ + env->msr_ia32_sgxlepubkeyhash[0] = 0xa6053e051270b7acULL; + env->msr_ia32_sgxlepubkeyhash[1] = 0x6cfbe8ba8b3b413dULL; + env->msr_ia32_sgxlepubkeyhash[2] = 0xc4916d99f2b3735dULL; + env->msr_ia32_sgxlepubkeyhash[3] = 0xd4f8c05909f9bb3bULL; +#endif +} + +static void x86_cpu_reset_hold(Object *obj) +{ + CPUState *cs = CPU(obj); + X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(obj); CPUX86State *env = &cpu->env; target_ulong cr4; uint64_t xcr0; int i; - xcc->parent_reset(s); + if (xcc->parent_phases.hold) { + xcc->parent_phases.hold(obj); + } memset(env, 0, offsetof(CPUX86State, end_reset_fields)); env->old_exception = -1; /* init to reset state */ - + env->int_ctl = 0; env->hflags2 |= HF2_GIF_MASK; + env->hflags2 |= HF2_VGIF_MASK; + env->hflags &= ~HF_GUEST_MASK; cpu_x86_update_cr0(env, 0x60000010); env->a20_mask = ~0x0; @@ -4473,13 +6764,29 @@ static void x86_cpu_reset(CPUState *s) env->xstate_bv = 0; env->pat = 0x0007040600070406ULL; + + if (kvm_enabled()) { + /* + * KVM handles TSC = 0 specially and thinks we are hot-plugging + * a new CPU, use 1 instead to force a reset. + */ + if (env->tsc != 0) { + env->tsc = 1; + } + } else { + env->tsc = 0; + } + env->msr_ia32_misc_enable = MSR_IA32_MISC_ENABLE_DEFAULT; + if (env->features[FEAT_1_ECX] & CPUID_EXT_MONITOR) { + env->msr_ia32_misc_enable |= MSR_IA32_MISC_ENABLE_MWAIT; + } memset(env->dr, 0, sizeof(env->dr)); env->dr[6] = DR6_FIXED_1; env->dr[7] = DR7_FIXED_1; - cpu_breakpoint_remove_all(s, BP_CPU); - cpu_watchpoint_remove_all(s, BP_CPU); + cpu_breakpoint_remove_all(cs, BP_CPU); + cpu_watchpoint_remove_all(cs, BP_CPU); cr4 = 0; xcr0 = XSTATE_FP_MASK; @@ -4491,6 +6798,9 @@ static void x86_cpu_reset(CPUState *s) } for (i = 2; i < ARRAY_SIZE(x86_ext_save_areas); i++) { const ExtSaveArea *esa = &x86_ext_save_areas[i]; + if (!((1 << i) & CPUID_XSTATE_XCR0_MASK)) { + continue; + } if (env->features[esa->feature] & esa->bits) { xcr0 |= 1ull << i; } @@ -4518,36 +6828,42 @@ static void x86_cpu_reset(CPUState *s) memset(env->mtrr_fixed, 0, sizeof(env->mtrr_fixed)); env->interrupt_injected = -1; - env->exception_injected = -1; + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; + env->exception_has_payload = false; + env->exception_payload = 0; env->nmi_injected = false; + env->triple_fault_pending = false; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ - apic_designate_bsp(cpu->apic_state, s->cpu_index == 0); + apic_designate_bsp(cpu->apic_state, cs->cpu_index == 0); - s->halted = !cpu_is_bsp(cpu); + cs->halted = !cpu_is_bsp(cpu); if (kvm_enabled()) { kvm_arch_reset_vcpu(cpu); } - else if (hvf_enabled()) { - hvf_reset_vcpu(s); - } + + x86_cpu_set_sgxlepubkeyhash(env); + + env->amd_tsc_scale_msr = MSR_AMD64_TSC_RATIO_DEFAULT; + #endif } -#ifndef CONFIG_USER_ONLY -bool cpu_is_bsp(X86CPU *cpu) +void x86_cpu_after_reset(X86CPU *cpu) { - return cpu_get_apic_base(cpu->apic_state) & MSR_IA32_APICBASE_BSP; -} +#ifndef CONFIG_USER_ONLY + if (kvm_enabled()) { + kvm_arch_after_reset_vcpu(cpu); + } -/* TODO: remove me, when reset over QOM tree is implemented */ -static void x86_cpu_machine_reset_cb(void *opaque) -{ - X86CPU *cpu = opaque; - cpu_reset(CPU(cpu)); -} + if (cpu->apic_state) { + device_cold_reset(cpu->apic_state); + } #endif +} static void mce_init(X86CPU *cpu) { @@ -4566,108 +6882,6 @@ static void mce_init(X86CPU *cpu) } } -#ifndef CONFIG_USER_ONLY -APICCommonClass *apic_get_class(void) -{ - const char *apic_type = "apic"; - - /* TODO: in-kernel irqchip for hvf */ - if (kvm_apic_in_kernel()) { - apic_type = "kvm-apic"; - } else if (xen_enabled()) { - apic_type = "xen-apic"; - } - - return APIC_COMMON_CLASS(object_class_by_name(apic_type)); -} - -static void x86_cpu_apic_create(X86CPU *cpu, Error **errp) -{ - APICCommonState *apic; - ObjectClass *apic_class = OBJECT_CLASS(apic_get_class()); - - cpu->apic_state = DEVICE(object_new(object_class_get_name(apic_class))); - - object_property_add_child(OBJECT(cpu), "lapic", - OBJECT(cpu->apic_state), &error_abort); - object_unref(OBJECT(cpu->apic_state)); - - qdev_prop_set_uint32(cpu->apic_state, "id", cpu->apic_id); - /* TODO: convert to link<> */ - apic = APIC_COMMON(cpu->apic_state); - apic->cpu = cpu; - apic->apicbase = APIC_DEFAULT_ADDRESS | MSR_IA32_APICBASE_ENABLE; -} - -static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) -{ - APICCommonState *apic; - static bool apic_mmio_map_once; - - if (cpu->apic_state == NULL) { - return; - } - object_property_set_bool(OBJECT(cpu->apic_state), true, "realized", - errp); - - /* Map APIC MMIO area */ - apic = APIC_COMMON(cpu->apic_state); - if (!apic_mmio_map_once) { - memory_region_add_subregion_overlap(get_system_memory(), - apic->apicbase & - MSR_IA32_APICBASE_BASE, - &apic->io_memory, - 0x1000); - apic_mmio_map_once = true; - } -} - -static void x86_cpu_machine_done(Notifier *n, void *unused) -{ - X86CPU *cpu = container_of(n, X86CPU, machine_done); - MemoryRegion *smram = - (MemoryRegion *) object_resolve_path("/machine/smram", NULL); - - if (smram) { - cpu->smram = g_new(MemoryRegion, 1); - memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram", - smram, 0, 1ull << 32); - memory_region_set_enabled(cpu->smram, true); - memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->smram, 1); - } -} -#else -static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) -{ -} -#endif - -/* Note: Only safe for use on x86(-64) hosts */ -static uint32_t x86_host_phys_bits(void) -{ - uint32_t eax; - uint32_t host_phys_bits; - - host_cpuid(0x80000000, 0, &eax, NULL, NULL, NULL); - if (eax >= 0x80000008) { - host_cpuid(0x80000008, 0, &eax, NULL, NULL, NULL); - /* Note: According to AMD doc 25481 rev 2.34 they have a field - * at 23:16 that can specify a maximum physical address bits for - * the guest that can override this value; but I've not seen - * anything with that set. - */ - host_phys_bits = eax & 0xff; - } else { - /* It's an odd 64 bit machine that doesn't have the leaf for - * physical address bits; fall back to 36 that's most older - * Intel. - */ - host_phys_bits = 36; - } - - return host_phys_bits; -} - static void x86_cpu_adjust_level(X86CPU *cpu, uint32_t *min, uint32_t value) { if (*min < value) { @@ -4680,9 +6894,10 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w) { CPUX86State *env = &cpu->env; FeatureWordInfo *fi = &feature_word_info[w]; - uint32_t eax = fi->cpuid_eax; + uint32_t eax = fi->cpuid.eax; uint32_t region = eax & 0xF0000000; + assert(feature_word_info[w].type == CPUID_FEATURE_WORD); if (!env->features[w]) { return; } @@ -4698,6 +6913,11 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu, FeatureWord w) x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel2, eax); break; } + + if (eax == 7) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level_func7, + fi->cpuid.ecx); + } } /* Calculate XSAVE components based on the configured CPU feature flags */ @@ -4706,8 +6926,13 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) CPUX86State *env = &cpu->env; int i; uint64_t mask; + static bool request_perm; if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) { + env->features[FEAT_XSAVE_XCR0_LO] = 0; + env->features[FEAT_XSAVE_XCR0_HI] = 0; + env->features[FEAT_XSAVE_XSS_LO] = 0; + env->features[FEAT_XSAVE_XSS_HI] = 0; return; } @@ -4719,8 +6944,16 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) } } - env->features[FEAT_XSAVE_COMP_LO] = mask; - env->features[FEAT_XSAVE_COMP_HI] = mask >> 32; + /* Only request permission for first vcpu */ + if (kvm_enabled() && !request_perm) { + kvm_request_xsave_components(cpu, mask); + request_perm = true; + } + + env->features[FEAT_XSAVE_XCR0_LO] = mask & CPUID_XSTATE_XCR0_MASK; + env->features[FEAT_XSAVE_XCR0_HI] = (mask & CPUID_XSTATE_XCR0_MASK) >> 32; + env->features[FEAT_XSAVE_XSS_LO] = mask & CPUID_XSTATE_XSS_MASK; + env->features[FEAT_XSAVE_XSS_HI] = (mask & CPUID_XSTATE_XSS_MASK) >> 32; } /***** Steps involved on loading and filtering CPUID data @@ -4729,7 +6962,7 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) * involved in setting up CPUID data are: * * 1) Loading CPU model definition (X86CPUDefinition). This is - * implemented by x86_cpu_load_def() and should be completely + * implemented by x86_cpu_load_model() and should be completely * transparent, as it is done automatically by instance_init. * No code should need to look at X86CPUDefinition structs * outside instance_init. @@ -4763,12 +6996,26 @@ static void x86_cpu_enable_xsave_components(X86CPU *cpu) /* Expand CPU configuration data, based on configured features * and host/accelerator capabilities when appropriate. */ -static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) +void x86_cpu_expand_features(X86CPU *cpu, Error **errp) { CPUX86State *env = &cpu->env; FeatureWord w; + int i; GList *l; - Error *local_err = NULL; + + for (l = plus_features; l; l = l->next) { + const char *prop = l->data; + if (!object_property_set_bool(OBJECT(cpu), prop, true, errp)) { + return; + } + } + + for (l = minus_features; l; l = l->next) { + const char *prop = l->data; + if (!object_property_set_bool(OBJECT(cpu), prop, false, errp)) { + return; + } + } /*TODO: Now cpu->max_features doesn't overwrite features * set using QOM properties, and we can convert @@ -4782,24 +7029,22 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) */ env->features[w] |= x86_cpu_get_supported_feature_word(w, cpu->migratable) & - ~env->user_features[w] & \ + ~env->user_features[w] & ~feature_word_info[w].no_autoenable_flags; } } - for (l = plus_features; l; l = l->next) { - const char *prop = l->data; - object_property_set_bool(OBJECT(cpu), true, prop, &local_err); - if (local_err) { - goto out; - } - } + for (i = 0; i < ARRAY_SIZE(feature_dependencies); i++) { + FeatureDep *d = &feature_dependencies[i]; + if (!(env->features[d->from.index] & d->from.mask)) { + uint64_t unavailable_features = env->features[d->to.index] & d->to.mask; - for (l = minus_features; l; l = l->next) { - const char *prop = l->data; - object_property_set_bool(OBJECT(cpu), false, prop, &local_err); - if (local_err) { - goto out; + /* Not an error unless the dependent feature was added explicitly. */ + mark_unavailable_features(cpu, d->to.index, + unavailable_features & env->user_features[d->to.index], + "This feature depends on other features that were not requested"); + + env->features[d->to.index] &= ~unavailable_features; } } @@ -4816,6 +7061,9 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_1_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_6_EAX); x86_cpu_adjust_feat_level(cpu, FEAT_7_0_ECX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EAX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_1_EDX); + x86_cpu_adjust_feat_level(cpu, FEAT_7_2_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0001_ECX); x86_cpu_adjust_feat_level(cpu, FEAT_8000_0007_EDX); @@ -4823,6 +7071,30 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) x86_cpu_adjust_feat_level(cpu, FEAT_C000_0001_EDX); x86_cpu_adjust_feat_level(cpu, FEAT_SVM); x86_cpu_adjust_feat_level(cpu, FEAT_XSAVE); + + /* Intel Processor Trace requires CPUID[0x14] */ + if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT)) { + if (cpu->intel_pt_auto_level) { + x86_cpu_adjust_level(cpu, &cpu->env.cpuid_min_level, 0x14); + } else if (cpu->env.cpuid_min_level < 0x14) { + mark_unavailable_features(cpu, FEAT_7_0_EBX, + CPUID_7_0_EBX_INTEL_PT, + "Intel PT need CPUID leaf 0x14, please set by \"-cpu ...,intel-pt=on,min-level=0x14\""); + } + } + + /* + * Intel CPU topology with multi-dies support requires CPUID[0x1F]. + * For AMD Rome/Milan, cpuid level is 0x10, and guest OS should detect + * extended toplogy by leaf 0xB. Only adjust it for Intel CPU, unless + * cpu->vendor_cpuid_only has been unset for compatibility with older + * machine types. + */ + if ((env->nr_dies > 1) && + (IS_INTEL_CPU(env) || !cpu->vendor_cpuid_only)) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x1F); + } + /* SVM requires CPUID[0x8000000A] */ if (env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000000A); @@ -4832,9 +7104,21 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) if (sev_enabled()) { x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x8000001F); } + + if (env->features[FEAT_8000_0021_EAX]) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_xlevel, 0x80000021); + } + + /* SGX requires CPUID[0x12] for EPC enumeration */ + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SGX) { + x86_cpu_adjust_level(cpu, &env->cpuid_min_level, 0x12); + } } /* Set cpuid_*level* based on cpuid_min_*level, if not explicitly set */ + if (env->cpuid_level_func7 == UINT32_MAX) { + env->cpuid_level_func7 = env->cpuid_min_level_func7; + } if (env->cpuid_level == UINT32_MAX) { env->cpuid_level = env->cpuid_min_level; } @@ -4845,9 +7129,8 @@ static void x86_cpu_expand_features(X86CPU *cpu, Error **errp) env->cpuid_xlevel2 = env->cpuid_min_xlevel2; } -out: - if (local_err != NULL) { - error_propagate(errp, local_err); + if (kvm_enabled() && !kvm_hyperv_expand_features(cpu, errp)) { + return; } } @@ -4857,31 +7140,39 @@ out: * * Returns: 0 if all flags are supported by the host, non-zero otherwise. */ -static int x86_cpu_filter_features(X86CPU *cpu) +static void x86_cpu_filter_features(X86CPU *cpu, bool verbose) { CPUX86State *env = &cpu->env; FeatureWord w; - int rv = 0; + const char *prefix = NULL; + + if (verbose) { + prefix = accel_uses_host_cpuid() + ? "host doesn't support requested feature" + : "TCG doesn't support requested feature"; + } for (w = 0; w < FEATURE_WORDS; w++) { - uint32_t host_feat = + uint64_t host_feat = x86_cpu_get_supported_feature_word(w, false); - uint32_t requested_features = env->features[w]; - env->features[w] &= host_feat; - cpu->filtered_features[w] = requested_features & ~env->features[w]; - if (cpu->filtered_features[w]) { - rv = 1; - } + uint64_t requested_features = env->features[w]; + uint64_t unavailable_features = requested_features & ~host_feat; + mark_unavailable_features(cpu, w, unavailable_features, prefix); } + /* + * Check that KVM actually allows the processor tracing features that + * are advertised by cpu_x86_cpuid(). Keep these two in sync. + */ if ((env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) && kvm_enabled()) { - KVMState *s = CPU(cpu)->kvm_state; - uint32_t eax_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EAX); - uint32_t ebx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_EBX); - uint32_t ecx_0 = kvm_arch_get_supported_cpuid(s, 0x14, 0, R_ECX); - uint32_t eax_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EAX); - uint32_t ebx_1 = kvm_arch_get_supported_cpuid(s, 0x14, 1, R_EBX); + uint32_t eax_0, ebx_0, ecx_0, edx_0_unused; + uint32_t eax_1, ebx_1, ecx_1_unused, edx_1_unused; + + x86_cpu_get_supported_cpuid(0x14, 0, + &eax_0, &ebx_0, &ecx_0, &edx_0_unused); + x86_cpu_get_supported_cpuid(0x14, 1, + &eax_1, &ebx_1, &ecx_1_unused, &edx_1_unused); if (!eax_0 || ((ebx_0 & INTEL_PT_MINIMAL_EBX) != INTEL_PT_MINIMAL_EBX) || @@ -4891,27 +7182,47 @@ static int x86_cpu_filter_features(X86CPU *cpu) INTEL_PT_ADDR_RANGES_NUM) || ((ebx_1 & (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP)) != (INTEL_PT_PSB_BITMAP | INTEL_PT_CYCLE_BITMAP)) || - (ecx_0 & INTEL_PT_IP_LIP)) { + ((ecx_0 & CPUID_14_0_ECX_LIP) != + (env->features[FEAT_14_0_ECX] & CPUID_14_0_ECX_LIP))) { /* * Processor Trace capabilities aren't configurable, so if the * host can't emulate the capabilities we report on * cpu_x86_cpuid(), intel-pt can't be enabled on the current host. */ - env->features[FEAT_7_0_EBX] &= ~CPUID_7_0_EBX_INTEL_PT; - cpu->filtered_features[FEAT_7_0_EBX] |= CPUID_7_0_EBX_INTEL_PT; - rv = 1; + mark_unavailable_features(cpu, FEAT_7_0_EBX, CPUID_7_0_EBX_INTEL_PT, prefix); } } +} + +static void x86_cpu_hyperv_realize(X86CPU *cpu) +{ + size_t len; + + /* Hyper-V vendor id */ + if (!cpu->hyperv_vendor) { + object_property_set_str(OBJECT(cpu), "hv-vendor-id", "Microsoft Hv", + &error_abort); + } + len = strlen(cpu->hyperv_vendor); + if (len > 12) { + warn_report("hv-vendor-id truncated to 12 characters"); + len = 12; + } + memset(cpu->hyperv_vendor_id, 0, 12); + memcpy(cpu->hyperv_vendor_id, cpu->hyperv_vendor, len); + + /* 'Hv#1' interface identification*/ + cpu->hyperv_interface_id[0] = 0x31237648; + cpu->hyperv_interface_id[1] = 0; + cpu->hyperv_interface_id[2] = 0; + cpu->hyperv_interface_id[3] = 0; - return rv; + /* Hypervisor implementation limits */ + cpu->hyperv_limits[0] = 64; + cpu->hyperv_limits[1] = 0; + cpu->hyperv_limits[2] = 0; } -#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \ - (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \ - (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3) -#define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \ - (env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \ - (env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3) static void x86_cpu_realizefn(DeviceState *dev, Error **errp) { CPUState *cs = CPU(dev); @@ -4920,48 +7231,75 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) CPUX86State *env = &cpu->env; Error *local_err = NULL; static bool ht_warned; + unsigned requested_lbr_fmt; - if (xcc->host_cpuid_required) { - if (!accel_uses_host_cpuid()) { - char *name = x86_cpu_class_get_model_name(xcc); - error_setg(&local_err, "CPU model '%s' requires KVM", name); - g_free(name); - goto out; - } - - if (enable_cpu_pm) { - host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, - &cpu->mwait.ecx, &cpu->mwait.edx); - env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; - } - } - - /* mwait extended info: needed for Core compatibility */ - /* We always wake on interrupt even if host does not have the capability */ - cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; +#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) + /* Use pc-relative instructions in system-mode */ + cs->tcg_cflags |= CF_PCREL; +#endif if (cpu->apic_id == UNASSIGNED_APIC_ID) { error_setg(errp, "apic-id property was not initialized properly"); return; } + /* + * Process Hyper-V enlightenments. + * Note: this currently has to happen before the expansion of CPU features. + */ + x86_cpu_hyperv_realize(cpu); + x86_cpu_expand_features(cpu, &local_err); if (local_err) { goto out; } - if (x86_cpu_filter_features(cpu) && - (cpu->check_cpuid || cpu->enforce_cpuid)) { - x86_cpu_report_filtered_features(cpu); - if (cpu->enforce_cpuid) { - error_setg(&local_err, - accel_uses_host_cpuid() ? - "Host doesn't support requested features" : - "TCG doesn't support requested features"); - goto out; + /* + * Override env->features[FEAT_PERF_CAPABILITIES].LBR_FMT + * with user-provided setting. + */ + if (cpu->lbr_fmt != ~PERF_CAP_LBR_FMT) { + if ((cpu->lbr_fmt & PERF_CAP_LBR_FMT) != cpu->lbr_fmt) { + error_setg(errp, "invalid lbr-fmt"); + return; + } + env->features[FEAT_PERF_CAPABILITIES] &= ~PERF_CAP_LBR_FMT; + env->features[FEAT_PERF_CAPABILITIES] |= cpu->lbr_fmt; + } + + /* + * vPMU LBR is supported when 1) KVM is enabled 2) Option pmu=on and + * 3)vPMU LBR format matches that of host setting. + */ + requested_lbr_fmt = + env->features[FEAT_PERF_CAPABILITIES] & PERF_CAP_LBR_FMT; + if (requested_lbr_fmt && kvm_enabled()) { + uint64_t host_perf_cap = + x86_cpu_get_supported_feature_word(FEAT_PERF_CAPABILITIES, false); + unsigned host_lbr_fmt = host_perf_cap & PERF_CAP_LBR_FMT; + + if (!cpu->enable_pmu) { + error_setg(errp, "vPMU: LBR is unsupported without pmu=on"); + return; + } + if (requested_lbr_fmt != host_lbr_fmt) { + error_setg(errp, "vPMU: the lbr-fmt value (0x%x) does not match " + "the host value (0x%x).", + requested_lbr_fmt, host_lbr_fmt); + return; } } + x86_cpu_filter_features(cpu, cpu->check_cpuid || cpu->enforce_cpuid); + + if (cpu->enforce_cpuid && x86_cpu_have_filtered_features(cpu)) { + error_setg(&local_err, + accel_uses_host_cpuid() ? + "Host doesn't support requested features" : + "TCG doesn't support requested features"); + goto out; + } + /* On AMD CPUs, some CPUID[8000_0001].EDX bits must match the bits on * CPUID[1].EDX. */ @@ -4971,51 +7309,72 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) & CPUID_EXT2_AMD_ALIASES); } + x86_cpu_set_sgxlepubkeyhash(env); + + /* + * note: the call to the framework needs to happen after feature expansion, + * but before the checks/modifications to ucode_rev, mwait, phys_bits. + * These may be set by the accel-specific code, + * and the results are subsequently checked / assumed in this function. + */ + cpu_exec_realizefn(cs, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + + if (xcc->host_cpuid_required && !accel_uses_host_cpuid()) { + g_autofree char *name = x86_cpu_class_get_model_name(xcc); + error_setg(&local_err, "CPU model '%s' requires KVM or HVF", name); + goto out; + } + + if (cpu->ucode_rev == 0) { + /* + * The default is the same as KVM's. Note that this check + * needs to happen after the evenual setting of ucode_rev in + * accel-specific code in cpu_exec_realizefn. + */ + if (IS_AMD_CPU(env)) { + cpu->ucode_rev = 0x01000065; + } else { + cpu->ucode_rev = 0x100000000ULL; + } + } + + /* + * mwait extended info: needed for Core compatibility + * We always wake on interrupt even if host does not have the capability. + * + * requires the accel-specific code in cpu_exec_realizefn to + * have already acquired the CPUID data into cpu->mwait. + */ + cpu->mwait.ecx |= CPUID_MWAIT_EMX | CPUID_MWAIT_IBE; + /* For 64bit systems think about the number of physical bits to present. * ideally this should be the same as the host; anything other than matching * the host can cause incorrect guest behaviour. * QEMU used to pick the magic value of 40 bits that corresponds to * consumer AMD devices but nothing else. + * + * Note that this code assumes features expansion has already been done + * (as it checks for CPUID_EXT2_LM), and also assumes that potential + * phys_bits adjustments to match the host have been already done in + * accel-specific code in cpu_exec_realizefn. */ if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { - if (accel_uses_host_cpuid()) { - uint32_t host_phys_bits = x86_host_phys_bits(); - static bool warned; - - if (cpu->host_phys_bits) { - /* The user asked for us to use the host physical bits */ - cpu->phys_bits = host_phys_bits; - } - - /* Print a warning if the user set it to a value that's not the - * host value. - */ - if (cpu->phys_bits != host_phys_bits && cpu->phys_bits != 0 && - !warned) { - warn_report("Host physical bits (%u)" - " does not match phys-bits property (%u)", - host_phys_bits, cpu->phys_bits); - warned = true; - } - - if (cpu->phys_bits && - (cpu->phys_bits > TARGET_PHYS_ADDR_SPACE_BITS || - cpu->phys_bits < 32)) { - error_setg(errp, "phys-bits should be between 32 and %u " - " (but is %u)", - TARGET_PHYS_ADDR_SPACE_BITS, cpu->phys_bits); - return; - } - } else { - if (cpu->phys_bits && cpu->phys_bits != TCG_PHYS_ADDR_BITS) { - error_setg(errp, "TCG only supports phys-bits=%u", - TCG_PHYS_ADDR_BITS); - return; - } + if (cpu->phys_bits && + (cpu->phys_bits > TARGET_PHYS_ADDR_SPACE_BITS || + cpu->phys_bits < 32)) { + error_setg(errp, "phys-bits should be between 32 and %u " + " (but is %u)", + TARGET_PHYS_ADDR_SPACE_BITS, cpu->phys_bits); + return; } - /* 0 means it was not explicitly set by the user (or by machine - * compat_props or by the host code above). In this case, the default - * is the value used by TCG (40). + /* + * 0 means it was not explicitly set by the user (or by machine + * compat_props or by the host code in host-cpu.c). + * In this case, the default is the value used by TCG (40). */ if (cpu->phys_bits == 0) { cpu->phys_bits = TCG_PHYS_ADDR_BITS; @@ -5029,7 +7388,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) return; } - if (env->features[FEAT_1_EDX] & CPUID_PSE36) { + if (env->features[FEAT_1_EDX] & (CPUID_PSE36 | CPUID_PAE)) { cpu->phys_bits = 36; } else { cpu->phys_bits = 32; @@ -5038,15 +7397,17 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) /* Cache information initialization */ if (!cpu->legacy_cache) { - if (!xcc->cpu_def || !xcc->cpu_def->cache_info) { - char *name = x86_cpu_class_get_model_name(xcc); + const CPUCaches *cache_info = + x86_cpu_get_versioned_cache_info(cpu, xcc->model); + + if (!xcc->model || !cache_info) { + g_autofree char *name = x86_cpu_class_get_model_name(xcc); error_setg(errp, "CPU model '%s' doesn't support legacy-cache=off", name); - g_free(name); return; } env->cache_info_cpuid2 = env->cache_info_cpuid4 = env->cache_info_amd = - *xcc->cpu_def->cache_info; + *cache_info; } else { /* Build legacy cache information */ env->cache_info_cpuid2.l1d_cache = &legacy_l1d_cache; @@ -5065,17 +7426,11 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) env->cache_info_amd.l3_cache = &legacy_l3_cache; } - - cpu_exec_realizefn(cs, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } - #ifndef CONFIG_USER_ONLY + MachineState *ms = MACHINE(qdev_get_machine()); qemu_register_reset(x86_cpu_machine_reset_cb, cpu); - if (cpu->env.features[FEAT_1_EDX] & CPUID_APIC || smp_cpus > 1) { + if (cpu->env.features[FEAT_1_EDX] & CPUID_APIC || ms->smp.cpus > 1) { x86_cpu_apic_create(cpu, &local_err); if (local_err != NULL) { goto out; @@ -5085,33 +7440,6 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) mce_init(cpu); -#ifndef CONFIG_USER_ONLY - if (tcg_enabled()) { - cpu->cpu_as_mem = g_new(MemoryRegion, 1); - cpu->cpu_as_root = g_new(MemoryRegion, 1); - - /* Outer container... */ - memory_region_init(cpu->cpu_as_root, OBJECT(cpu), "memory", ~0ull); - memory_region_set_enabled(cpu->cpu_as_root, true); - - /* ... with two regions inside: normal system memory with low - * priority, and... - */ - memory_region_init_alias(cpu->cpu_as_mem, OBJECT(cpu), "memory", - get_system_memory(), 0, ~0ull); - memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0); - memory_region_set_enabled(cpu->cpu_as_mem, true); - - cs->num_ases = 2; - cpu_address_space_init(cs, 0, "cpu-memory", cs->memory); - cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root); - - /* ... SMRAM with higher priority, linked from /machine/smram. */ - cpu->machine_done.notify = x86_cpu_machine_done; - qemu_add_machine_init_done_notifier(&cpu->machine_done); - } -#endif - qemu_init_vcpu(cs); /* @@ -5123,20 +7451,23 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) * NOTE: the following code has to follow qemu_init_vcpu(). Otherwise * cs->nr_threads hasn't be populated yet and the checking is incorrect. */ - if (IS_AMD_CPU(env) && - !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && - cs->nr_threads > 1 && !ht_warned) { - error_report("This family of AMD CPU doesn't support " - "hyperthreading(%d). Please configure -smp " - "options properly or try enabling topoext feature.", - cs->nr_threads); - ht_warned = true; + if (IS_AMD_CPU(env) && + !(env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_TOPOEXT) && + cs->nr_threads > 1 && !ht_warned) { + warn_report("This family of AMD CPU doesn't support " + "hyperthreading(%d)", + cs->nr_threads); + error_printf("Please configure -smp options properly" + " or try enabling topoext feature.\n"); + ht_warned = true; } +#ifndef CONFIG_USER_ONLY x86_cpu_apic_realize(cpu, &local_err); if (local_err != NULL) { goto out; } +#endif /* !CONFIG_USER_ONLY */ cpu_reset(cs); xcc->parent_realize(dev, &local_err); @@ -5148,11 +7479,10 @@ out: } } -static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) +static void x86_cpu_unrealizefn(DeviceState *dev) { X86CPU *cpu = X86_CPU(dev); X86CPUClass *xcc = X86_CPU_GET_CLASS(dev); - Error *local_err = NULL; #ifndef CONFIG_USER_ONLY cpu_remove_sync(CPU(dev)); @@ -5164,16 +7494,12 @@ static void x86_cpu_unrealizefn(DeviceState *dev, Error **errp) cpu->apic_state = NULL; } - xcc->parent_unrealize(dev, &local_err); - if (local_err != NULL) { - error_propagate(errp, local_err); - return; - } + xcc->parent_unrealize(dev); } typedef struct BitProperty { FeatureWord w; - uint32_t mask; + uint64_t mask; } BitProperty; static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, @@ -5181,7 +7507,7 @@ static void x86_cpu_get_bit_prop(Object *obj, Visitor *v, const char *name, { X86CPU *cpu = X86_CPU(obj); BitProperty *fp = opaque; - uint32_t f = cpu->env.features[fp->w]; + uint64_t f = cpu->env.features[fp->w]; bool value = (f & fp->mask) == fp->mask; visit_type_bool(v, name, &value, errp); } @@ -5192,7 +7518,6 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, DeviceState *dev = DEVICE(obj); X86CPU *cpu = X86_CPU(obj); BitProperty *fp = opaque; - Error *local_err = NULL; bool value; if (dev->realized) { @@ -5200,9 +7525,7 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, return; } - visit_type_bool(v, name, &value, &local_err); - if (local_err) { - error_propagate(errp, local_err); + if (!visit_type_bool(v, name, &value, errp)) { return; } @@ -5214,29 +7537,23 @@ static void x86_cpu_set_bit_prop(Object *obj, Visitor *v, const char *name, cpu->env.user_features[fp->w] |= fp->mask; } -static void x86_cpu_release_bit_prop(Object *obj, const char *name, - void *opaque) -{ - BitProperty *prop = opaque; - g_free(prop); -} - /* Register a boolean property to get/set a single bit in a uint32_t field. * * The same property name can be registered multiple times to make it affect * multiple bits in the same FeatureWord. In that case, the getter will return * true only if all bits are set. */ -static void x86_cpu_register_bit_prop(X86CPU *cpu, +static void x86_cpu_register_bit_prop(X86CPUClass *xcc, const char *prop_name, FeatureWord w, int bitnr) { + ObjectClass *oc = OBJECT_CLASS(xcc); BitProperty *fp; ObjectProperty *op; - uint32_t mask = (1UL << bitnr); + uint64_t mask = (1ULL << bitnr); - op = object_property_find(OBJECT(cpu), prop_name, NULL); + op = object_class_property_find(oc, prop_name); if (op) { fp = op->opaque; assert(fp->w == w); @@ -5245,14 +7562,14 @@ static void x86_cpu_register_bit_prop(X86CPU *cpu, fp = g_new0(BitProperty, 1); fp->w = w; fp->mask = mask; - object_property_add(OBJECT(cpu), prop_name, "bool", - x86_cpu_get_bit_prop, - x86_cpu_set_bit_prop, - x86_cpu_release_bit_prop, fp, &error_abort); + object_class_property_add(oc, prop_name, "bool", + x86_cpu_get_bit_prop, + x86_cpu_set_bit_prop, + NULL, fp); } } -static void x86_cpu_register_feature_bit_props(X86CPU *cpu, +static void x86_cpu_register_feature_bit_props(X86CPUClass *xcc, FeatureWord w, int bitnr) { @@ -5271,133 +7588,67 @@ static void x86_cpu_register_feature_bit_props(X86CPU *cpu, /* aliases don't use "|" delimiters anymore, they are registered * manually using object_property_add_alias() */ assert(!strchr(name, '|')); - x86_cpu_register_bit_prop(cpu, name, w, bitnr); + x86_cpu_register_bit_prop(xcc, name, w, bitnr); } -static GuestPanicInformation *x86_cpu_get_crash_info(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - GuestPanicInformation *panic_info = NULL; - - if (env->features[FEAT_HYPERV_EDX] & HV_GUEST_CRASH_MSR_AVAILABLE) { - panic_info = g_malloc0(sizeof(GuestPanicInformation)); - - panic_info->type = GUEST_PANIC_INFORMATION_TYPE_HYPER_V; - - assert(HV_CRASH_PARAMS >= 5); - panic_info->u.hyper_v.arg1 = env->msr_hv_crash_params[0]; - panic_info->u.hyper_v.arg2 = env->msr_hv_crash_params[1]; - panic_info->u.hyper_v.arg3 = env->msr_hv_crash_params[2]; - panic_info->u.hyper_v.arg4 = env->msr_hv_crash_params[3]; - panic_info->u.hyper_v.arg5 = env->msr_hv_crash_params[4]; - } - - return panic_info; -} -static void x86_cpu_get_crash_info_qom(Object *obj, Visitor *v, - const char *name, void *opaque, - Error **errp) +static void x86_cpu_post_initfn(Object *obj) { - CPUState *cs = CPU(obj); - GuestPanicInformation *panic_info; - - if (!cs->crash_occurred) { - error_setg(errp, "No crash occured"); - return; - } - - panic_info = x86_cpu_get_crash_info(cs); - if (panic_info == NULL) { - error_setg(errp, "No crash information"); - return; - } - - visit_type_GuestPanicInformation(v, "crash-information", &panic_info, - errp); - qapi_free_GuestPanicInformation(panic_info); + accel_cpu_instance_init(CPU(obj)); } static void x86_cpu_initfn(Object *obj) { - CPUState *cs = CPU(obj); X86CPU *cpu = X86_CPU(obj); X86CPUClass *xcc = X86_CPU_GET_CLASS(obj); CPUX86State *env = &cpu->env; - FeatureWord w; - cs->env_ptr = env; - - object_property_add(obj, "family", "int", - x86_cpuid_version_get_family, - x86_cpuid_version_set_family, NULL, NULL, NULL); - object_property_add(obj, "model", "int", - x86_cpuid_version_get_model, - x86_cpuid_version_set_model, NULL, NULL, NULL); - object_property_add(obj, "stepping", "int", - x86_cpuid_version_get_stepping, - x86_cpuid_version_set_stepping, NULL, NULL, NULL); - object_property_add_str(obj, "vendor", - x86_cpuid_get_vendor, - x86_cpuid_set_vendor, NULL); - object_property_add_str(obj, "model-id", - x86_cpuid_get_model_id, - x86_cpuid_set_model_id, NULL); - object_property_add(obj, "tsc-frequency", "int", - x86_cpuid_get_tsc_freq, - x86_cpuid_set_tsc_freq, NULL, NULL, NULL); + env->nr_dies = 1; + object_property_add(obj, "feature-words", "X86CPUFeatureWordInfo", x86_cpu_get_feature_words, - NULL, NULL, (void *)env->features, NULL); + NULL, NULL, (void *)env->features); object_property_add(obj, "filtered-features", "X86CPUFeatureWordInfo", x86_cpu_get_feature_words, - NULL, NULL, (void *)cpu->filtered_features, NULL); - - object_property_add(obj, "crash-information", "GuestPanicInformation", - x86_cpu_get_crash_info_qom, NULL, NULL, NULL, NULL); - - cpu->hyperv_spinlock_attempts = HYPERV_SPINLOCK_NEVER_RETRY; - - for (w = 0; w < FEATURE_WORDS; w++) { - int bitnr; - - for (bitnr = 0; bitnr < 32; bitnr++) { - x86_cpu_register_feature_bit_props(cpu, w, bitnr); - } - } - - object_property_add_alias(obj, "sse3", obj, "pni", &error_abort); - object_property_add_alias(obj, "pclmuldq", obj, "pclmulqdq", &error_abort); - object_property_add_alias(obj, "sse4-1", obj, "sse4.1", &error_abort); - object_property_add_alias(obj, "sse4-2", obj, "sse4.2", &error_abort); - object_property_add_alias(obj, "xd", obj, "nx", &error_abort); - object_property_add_alias(obj, "ffxsr", obj, "fxsr-opt", &error_abort); - object_property_add_alias(obj, "i64", obj, "lm", &error_abort); - - object_property_add_alias(obj, "ds_cpl", obj, "ds-cpl", &error_abort); - object_property_add_alias(obj, "tsc_adjust", obj, "tsc-adjust", &error_abort); - object_property_add_alias(obj, "fxsr_opt", obj, "fxsr-opt", &error_abort); - object_property_add_alias(obj, "lahf_lm", obj, "lahf-lm", &error_abort); - object_property_add_alias(obj, "cmp_legacy", obj, "cmp-legacy", &error_abort); - object_property_add_alias(obj, "nodeid_msr", obj, "nodeid-msr", &error_abort); - object_property_add_alias(obj, "perfctr_core", obj, "perfctr-core", &error_abort); - object_property_add_alias(obj, "perfctr_nb", obj, "perfctr-nb", &error_abort); - object_property_add_alias(obj, "kvm_nopiodelay", obj, "kvm-nopiodelay", &error_abort); - object_property_add_alias(obj, "kvm_mmu", obj, "kvm-mmu", &error_abort); - object_property_add_alias(obj, "kvm_asyncpf", obj, "kvm-asyncpf", &error_abort); - object_property_add_alias(obj, "kvm_steal_time", obj, "kvm-steal-time", &error_abort); - object_property_add_alias(obj, "kvm_pv_eoi", obj, "kvm-pv-eoi", &error_abort); - object_property_add_alias(obj, "kvm_pv_unhalt", obj, "kvm-pv-unhalt", &error_abort); - object_property_add_alias(obj, "svm_lock", obj, "svm-lock", &error_abort); - object_property_add_alias(obj, "nrip_save", obj, "nrip-save", &error_abort); - object_property_add_alias(obj, "tsc_scale", obj, "tsc-scale", &error_abort); - object_property_add_alias(obj, "vmcb_clean", obj, "vmcb-clean", &error_abort); - object_property_add_alias(obj, "pause_filter", obj, "pause-filter", &error_abort); - object_property_add_alias(obj, "sse4_1", obj, "sse4.1", &error_abort); - object_property_add_alias(obj, "sse4_2", obj, "sse4.2", &error_abort); - - if (xcc->cpu_def) { - x86_cpu_load_def(cpu, xcc->cpu_def, &error_abort); + NULL, NULL, (void *)cpu->filtered_features); + + object_property_add_alias(obj, "sse3", obj, "pni"); + object_property_add_alias(obj, "pclmuldq", obj, "pclmulqdq"); + object_property_add_alias(obj, "sse4-1", obj, "sse4.1"); + object_property_add_alias(obj, "sse4-2", obj, "sse4.2"); + object_property_add_alias(obj, "xd", obj, "nx"); + object_property_add_alias(obj, "ffxsr", obj, "fxsr-opt"); + object_property_add_alias(obj, "i64", obj, "lm"); + + object_property_add_alias(obj, "ds_cpl", obj, "ds-cpl"); + object_property_add_alias(obj, "tsc_adjust", obj, "tsc-adjust"); + object_property_add_alias(obj, "fxsr_opt", obj, "fxsr-opt"); + object_property_add_alias(obj, "lahf_lm", obj, "lahf-lm"); + object_property_add_alias(obj, "cmp_legacy", obj, "cmp-legacy"); + object_property_add_alias(obj, "nodeid_msr", obj, "nodeid-msr"); + object_property_add_alias(obj, "perfctr_core", obj, "perfctr-core"); + object_property_add_alias(obj, "perfctr_nb", obj, "perfctr-nb"); + object_property_add_alias(obj, "kvm_nopiodelay", obj, "kvm-nopiodelay"); + object_property_add_alias(obj, "kvm_mmu", obj, "kvm-mmu"); + object_property_add_alias(obj, "kvm_asyncpf", obj, "kvm-asyncpf"); + object_property_add_alias(obj, "kvm_asyncpf_int", obj, "kvm-asyncpf-int"); + object_property_add_alias(obj, "kvm_steal_time", obj, "kvm-steal-time"); + object_property_add_alias(obj, "kvm_pv_eoi", obj, "kvm-pv-eoi"); + object_property_add_alias(obj, "kvm_pv_unhalt", obj, "kvm-pv-unhalt"); + object_property_add_alias(obj, "kvm_poll_control", obj, "kvm-poll-control"); + object_property_add_alias(obj, "svm_lock", obj, "svm-lock"); + object_property_add_alias(obj, "nrip_save", obj, "nrip-save"); + object_property_add_alias(obj, "tsc_scale", obj, "tsc-scale"); + object_property_add_alias(obj, "vmcb_clean", obj, "vmcb-clean"); + object_property_add_alias(obj, "pause_filter", obj, "pause-filter"); + object_property_add_alias(obj, "sse4_1", obj, "sse4.1"); + object_property_add_alias(obj, "sse4_2", obj, "sse4.2"); + + object_property_add_alias(obj, "hv-apicv", obj, "hv-avic"); + cpu->lbr_fmt = ~PERF_CAP_LBR_FMT; + object_property_add_alias(obj, "lbr_fmt", obj, "lbr-fmt"); + + if (xcc->model) { + x86_cpu_load_model(cpu, xcc->model); } } @@ -5408,12 +7659,14 @@ static int64_t x86_cpu_get_arch_id(CPUState *cs) return cpu->apic_id; } +#if !defined(CONFIG_USER_ONLY) static bool x86_cpu_get_paging_enabled(const CPUState *cs) { X86CPU *cpu = X86_CPU(cs); return cpu->env.cr[0] & CR0_PG_MASK; } +#endif /* !CONFIG_USER_ONLY */ static void x86_cpu_set_pc(CPUState *cs, vaddr value) { @@ -5422,11 +7675,12 @@ static void x86_cpu_set_pc(CPUState *cs, vaddr value) cpu->env.eip = value; } -static void x86_cpu_synchronize_from_tb(CPUState *cs, TranslationBlock *tb) +static vaddr x86_cpu_get_pc(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); - cpu->env.eip = tb->pc - tb->cs_base; + /* Match cpu_get_tb_cpu_state. */ + return cpu->env.eip + cpu->env.segs[R_CS].base; } int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request) @@ -5460,10 +7714,12 @@ int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request) !(env->hflags & HF_INHIBIT_IRQ_MASK))))) { return CPU_INTERRUPT_HARD; #if !defined(CONFIG_USER_ONLY) - } else if ((interrupt_request & CPU_INTERRUPT_VIRQ) && + } else if (env->hflags2 & HF2_VGIF_MASK) { + if((interrupt_request & CPU_INTERRUPT_VIRQ) && (env->eflags & IF_MASK) && !(env->hflags & HF_INHIBIT_IRQ_MASK)) { - return CPU_INTERRUPT_VIRQ; + return CPU_INTERRUPT_VIRQ; + } #endif } } @@ -5476,6 +7732,18 @@ static bool x86_cpu_has_work(CPUState *cs) return x86_cpu_pending_interrupt(cs, cs->interrupt_request) != 0; } +static int x86_cpu_mmu_index(CPUState *cs, bool ifetch) +{ + CPUX86State *env = cpu_env(cs); + int mmu_index_32 = (env->hflags & HF_CS64_MASK) ? 0 : 1; + int mmu_index_base = + (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER64_IDX : + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + (env->eflags & AC_MASK) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; +} + static void x86_disas_set_info(CPUState *cs, disassemble_info *info) { X86CPU *cpu = X86_CPU(cs); @@ -5484,7 +7752,6 @@ static void x86_disas_set_info(CPUState *cs, disassemble_info *info) info->mach = (env->hflags & HF_CS64_MASK ? bfd_mach_x86_64 : env->hflags & HF_CS32_MASK ? bfd_mach_i386_i386 : bfd_mach_i386_i8086); - info->print_insn = print_insn_i386; info->cap_arch = CS_ARCH_X86; info->cap_mode = (env->hflags & HF_CS64_MASK ? CS_MODE_64 @@ -5542,47 +7809,106 @@ static Property x86_cpu_properties[] = { DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, 0), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, 0), DEFINE_PROP_INT32("core-id", X86CPU, core_id, 0), + DEFINE_PROP_INT32("die-id", X86CPU, die_id, 0), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, 0), #else DEFINE_PROP_UINT32("apic-id", X86CPU, apic_id, UNASSIGNED_APIC_ID), DEFINE_PROP_INT32("thread-id", X86CPU, thread_id, -1), DEFINE_PROP_INT32("core-id", X86CPU, core_id, -1), + DEFINE_PROP_INT32("die-id", X86CPU, die_id, -1), DEFINE_PROP_INT32("socket-id", X86CPU, socket_id, -1), #endif DEFINE_PROP_INT32("node-id", X86CPU, node_id, CPU_UNSET_NUMA_NODE_ID), DEFINE_PROP_BOOL("pmu", X86CPU, enable_pmu, false), - { .name = "hv-spinlocks", .info = &qdev_prop_spinlocks }, - DEFINE_PROP_BOOL("hv-relaxed", X86CPU, hyperv_relaxed_timing, false), - DEFINE_PROP_BOOL("hv-vapic", X86CPU, hyperv_vapic, false), - DEFINE_PROP_BOOL("hv-time", X86CPU, hyperv_time, false), - DEFINE_PROP_BOOL("hv-crash", X86CPU, hyperv_crash, false), - DEFINE_PROP_BOOL("hv-reset", X86CPU, hyperv_reset, false), - DEFINE_PROP_BOOL("hv-vpindex", X86CPU, hyperv_vpindex, false), - DEFINE_PROP_BOOL("hv-runtime", X86CPU, hyperv_runtime, false), - DEFINE_PROP_BOOL("hv-synic", X86CPU, hyperv_synic, false), - DEFINE_PROP_BOOL("hv-stimer", X86CPU, hyperv_stimer, false), - DEFINE_PROP_BOOL("hv-frequencies", X86CPU, hyperv_frequencies, false), - DEFINE_PROP_BOOL("hv-reenlightenment", X86CPU, hyperv_reenlightenment, false), - DEFINE_PROP_BOOL("hv-tlbflush", X86CPU, hyperv_tlbflush, false), + DEFINE_PROP_UINT64_CHECKMASK("lbr-fmt", X86CPU, lbr_fmt, PERF_CAP_LBR_FMT), + + DEFINE_PROP_UINT32("hv-spinlocks", X86CPU, hyperv_spinlock_attempts, + HYPERV_SPINLOCK_NEVER_NOTIFY), + DEFINE_PROP_BIT64("hv-relaxed", X86CPU, hyperv_features, + HYPERV_FEAT_RELAXED, 0), + DEFINE_PROP_BIT64("hv-vapic", X86CPU, hyperv_features, + HYPERV_FEAT_VAPIC, 0), + DEFINE_PROP_BIT64("hv-time", X86CPU, hyperv_features, + HYPERV_FEAT_TIME, 0), + DEFINE_PROP_BIT64("hv-crash", X86CPU, hyperv_features, + HYPERV_FEAT_CRASH, 0), + DEFINE_PROP_BIT64("hv-reset", X86CPU, hyperv_features, + HYPERV_FEAT_RESET, 0), + DEFINE_PROP_BIT64("hv-vpindex", X86CPU, hyperv_features, + HYPERV_FEAT_VPINDEX, 0), + DEFINE_PROP_BIT64("hv-runtime", X86CPU, hyperv_features, + HYPERV_FEAT_RUNTIME, 0), + DEFINE_PROP_BIT64("hv-synic", X86CPU, hyperv_features, + HYPERV_FEAT_SYNIC, 0), + DEFINE_PROP_BIT64("hv-stimer", X86CPU, hyperv_features, + HYPERV_FEAT_STIMER, 0), + DEFINE_PROP_BIT64("hv-frequencies", X86CPU, hyperv_features, + HYPERV_FEAT_FREQUENCIES, 0), + DEFINE_PROP_BIT64("hv-reenlightenment", X86CPU, hyperv_features, + HYPERV_FEAT_REENLIGHTENMENT, 0), + DEFINE_PROP_BIT64("hv-tlbflush", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH, 0), + DEFINE_PROP_BIT64("hv-evmcs", X86CPU, hyperv_features, + HYPERV_FEAT_EVMCS, 0), + DEFINE_PROP_BIT64("hv-ipi", X86CPU, hyperv_features, + HYPERV_FEAT_IPI, 0), + DEFINE_PROP_BIT64("hv-stimer-direct", X86CPU, hyperv_features, + HYPERV_FEAT_STIMER_DIRECT, 0), + DEFINE_PROP_BIT64("hv-avic", X86CPU, hyperv_features, + HYPERV_FEAT_AVIC, 0), + DEFINE_PROP_BIT64("hv-emsr-bitmap", X86CPU, hyperv_features, + HYPERV_FEAT_MSR_BITMAP, 0), + DEFINE_PROP_BIT64("hv-xmm-input", X86CPU, hyperv_features, + HYPERV_FEAT_XMM_INPUT, 0), + DEFINE_PROP_BIT64("hv-tlbflush-ext", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH_EXT, 0), + DEFINE_PROP_BIT64("hv-tlbflush-direct", X86CPU, hyperv_features, + HYPERV_FEAT_TLBFLUSH_DIRECT, 0), + DEFINE_PROP_ON_OFF_AUTO("hv-no-nonarch-coresharing", X86CPU, + hyperv_no_nonarch_cs, ON_OFF_AUTO_OFF), + DEFINE_PROP_BIT64("hv-syndbg", X86CPU, hyperv_features, + HYPERV_FEAT_SYNDBG, 0), + DEFINE_PROP_BOOL("hv-passthrough", X86CPU, hyperv_passthrough, false), + DEFINE_PROP_BOOL("hv-enforce-cpuid", X86CPU, hyperv_enforce_cpuid, false), + + /* WS2008R2 identify by default */ + DEFINE_PROP_UINT32("hv-version-id-build", X86CPU, hyperv_ver_id_build, + 0x3839), + DEFINE_PROP_UINT16("hv-version-id-major", X86CPU, hyperv_ver_id_major, + 0x000A), + DEFINE_PROP_UINT16("hv-version-id-minor", X86CPU, hyperv_ver_id_minor, + 0x0000), + DEFINE_PROP_UINT32("hv-version-id-spack", X86CPU, hyperv_ver_id_sp, 0), + DEFINE_PROP_UINT8("hv-version-id-sbranch", X86CPU, hyperv_ver_id_sb, 0), + DEFINE_PROP_UINT32("hv-version-id-snumber", X86CPU, hyperv_ver_id_sn, 0), + DEFINE_PROP_BOOL("check", X86CPU, check_cpuid, true), DEFINE_PROP_BOOL("enforce", X86CPU, enforce_cpuid, false), + DEFINE_PROP_BOOL("x-force-features", X86CPU, force_features, false), DEFINE_PROP_BOOL("kvm", X86CPU, expose_kvm, true), DEFINE_PROP_UINT32("phys-bits", X86CPU, phys_bits, 0), DEFINE_PROP_BOOL("host-phys-bits", X86CPU, host_phys_bits, false), + DEFINE_PROP_UINT8("host-phys-bits-limit", X86CPU, host_phys_bits_limit, 0), DEFINE_PROP_BOOL("fill-mtrr-mask", X86CPU, fill_mtrr_mask, true), + DEFINE_PROP_UINT32("level-func7", X86CPU, env.cpuid_level_func7, + UINT32_MAX), DEFINE_PROP_UINT32("level", X86CPU, env.cpuid_level, UINT32_MAX), DEFINE_PROP_UINT32("xlevel", X86CPU, env.cpuid_xlevel, UINT32_MAX), DEFINE_PROP_UINT32("xlevel2", X86CPU, env.cpuid_xlevel2, UINT32_MAX), DEFINE_PROP_UINT32("min-level", X86CPU, env.cpuid_min_level, 0), DEFINE_PROP_UINT32("min-xlevel", X86CPU, env.cpuid_min_xlevel, 0), DEFINE_PROP_UINT32("min-xlevel2", X86CPU, env.cpuid_min_xlevel2, 0), + DEFINE_PROP_UINT64("ucode-rev", X86CPU, ucode_rev, 0), DEFINE_PROP_BOOL("full-cpuid-auto-level", X86CPU, full_cpuid_auto_level, true), - DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor_id), + DEFINE_PROP_STRING("hv-vendor-id", X86CPU, hyperv_vendor), DEFINE_PROP_BOOL("cpuid-0xb", X86CPU, enable_cpuid_0xb, true), + DEFINE_PROP_BOOL("x-vendor-cpuid-only", X86CPU, vendor_cpuid_only, true), DEFINE_PROP_BOOL("lmce", X86CPU, enable_lmce, false), DEFINE_PROP_BOOL("l3-cache", X86CPU, enable_l3_cache, true), DEFINE_PROP_BOOL("kvm-no-smi-migration", X86CPU, kvm_no_smi_migration, false), + DEFINE_PROP_BOOL("kvm-pv-enforce-cpuid", X86CPU, kvm_pv_enforce_cpuid, + false), DEFINE_PROP_BOOL("vmware-cpuid-freq", X86CPU, vmware_cpuid_freq, true), DEFINE_PROP_BOOL("tcg-cpuid", X86CPU, expose_tcg, true), DEFINE_PROP_BOOL("x-migrate-smi-count", X86CPU, migrate_smi_count, @@ -5592,6 +7918,7 @@ static Property x86_cpu_properties[] = { * own cache information (see x86_cpu_load_def()). */ DEFINE_PROP_BOOL("legacy-cache", X86CPU, legacy_cache, true), + DEFINE_PROP_BOOL("xen-vapic", X86CPU, xen_vapic, false), /* * From "Requirements for Implementing the Microsoft @@ -5606,84 +7933,127 @@ static Property x86_cpu_properties[] = { * to the specific Windows version being used." */ DEFINE_PROP_INT32("x-hv-max-vps", X86CPU, hv_max_vps, -1), + DEFINE_PROP_BOOL("x-hv-synic-kvm-only", X86CPU, hyperv_synic_kvm_only, + false), + DEFINE_PROP_BOOL("x-intel-pt-auto-level", X86CPU, intel_pt_auto_level, + true), DEFINE_PROP_END_OF_LIST() }; +#ifndef CONFIG_USER_ONLY +#include "hw/core/sysemu-cpu-ops.h" + +static const struct SysemuCPUOps i386_sysemu_ops = { + .get_memory_mapping = x86_cpu_get_memory_mapping, + .get_paging_enabled = x86_cpu_get_paging_enabled, + .get_phys_page_attrs_debug = x86_cpu_get_phys_page_attrs_debug, + .asidx_from_attrs = x86_asidx_from_attrs, + .get_crash_info = x86_cpu_get_crash_info, + .write_elf32_note = x86_cpu_write_elf32_note, + .write_elf64_note = x86_cpu_write_elf64_note, + .write_elf32_qemunote = x86_cpu_write_elf32_qemunote, + .write_elf64_qemunote = x86_cpu_write_elf64_qemunote, + .legacy_vmsd = &vmstate_x86_cpu, +}; +#endif + static void x86_cpu_common_class_init(ObjectClass *oc, void *data) { X86CPUClass *xcc = X86_CPU_CLASS(oc); CPUClass *cc = CPU_CLASS(oc); DeviceClass *dc = DEVICE_CLASS(oc); + ResettableClass *rc = RESETTABLE_CLASS(oc); + FeatureWord w; device_class_set_parent_realize(dc, x86_cpu_realizefn, &xcc->parent_realize); device_class_set_parent_unrealize(dc, x86_cpu_unrealizefn, &xcc->parent_unrealize); - dc->props = x86_cpu_properties; + device_class_set_props(dc, x86_cpu_properties); - xcc->parent_reset = cc->reset; - cc->reset = x86_cpu_reset; + resettable_class_set_parent_phases(rc, NULL, x86_cpu_reset_hold, NULL, + &xcc->parent_phases); cc->reset_dump_flags = CPU_DUMP_FPU | CPU_DUMP_CCOP; cc->class_by_name = x86_cpu_class_by_name; cc->parse_features = x86_cpu_parse_featurestr; cc->has_work = x86_cpu_has_work; -#ifdef CONFIG_TCG - cc->do_interrupt = x86_cpu_do_interrupt; - cc->cpu_exec_interrupt = x86_cpu_exec_interrupt; -#endif + cc->mmu_index = x86_cpu_mmu_index; cc->dump_state = x86_cpu_dump_state; - cc->get_crash_info = x86_cpu_get_crash_info; cc->set_pc = x86_cpu_set_pc; - cc->synchronize_from_tb = x86_cpu_synchronize_from_tb; + cc->get_pc = x86_cpu_get_pc; cc->gdb_read_register = x86_cpu_gdb_read_register; cc->gdb_write_register = x86_cpu_gdb_write_register; cc->get_arch_id = x86_cpu_get_arch_id; - cc->get_paging_enabled = x86_cpu_get_paging_enabled; -#ifdef CONFIG_USER_ONLY - cc->handle_mmu_fault = x86_cpu_handle_mmu_fault; -#else - cc->asidx_from_attrs = x86_asidx_from_attrs; - cc->get_memory_mapping = x86_cpu_get_memory_mapping; - cc->get_phys_page_debug = x86_cpu_get_phys_page_debug; - cc->write_elf64_note = x86_cpu_write_elf64_note; - cc->write_elf64_qemunote = x86_cpu_write_elf64_qemunote; - cc->write_elf32_note = x86_cpu_write_elf32_note; - cc->write_elf32_qemunote = x86_cpu_write_elf32_qemunote; - cc->vmsd = &vmstate_x86_cpu; -#endif + +#ifndef CONFIG_USER_ONLY + cc->sysemu_ops = &i386_sysemu_ops; +#endif /* !CONFIG_USER_ONLY */ + cc->gdb_arch_name = x86_gdb_arch_name; #ifdef TARGET_X86_64 cc->gdb_core_xml_file = "i386-64bit.xml"; - cc->gdb_num_core_regs = 57; #else cc->gdb_core_xml_file = "i386-32bit.xml"; - cc->gdb_num_core_regs = 41; -#endif -#if defined(CONFIG_TCG) && !defined(CONFIG_USER_ONLY) - cc->debug_excp_handler = breakpoint_handler; -#endif - cc->cpu_exec_enter = x86_cpu_exec_enter; - cc->cpu_exec_exit = x86_cpu_exec_exit; -#ifdef CONFIG_TCG - cc->tcg_initialize = tcg_x86_init; #endif cc->disas_set_info = x86_disas_set_info; dc->user_creatable = true; + + object_class_property_add(oc, "family", "int", + x86_cpuid_version_get_family, + x86_cpuid_version_set_family, NULL, NULL); + object_class_property_add(oc, "model", "int", + x86_cpuid_version_get_model, + x86_cpuid_version_set_model, NULL, NULL); + object_class_property_add(oc, "stepping", "int", + x86_cpuid_version_get_stepping, + x86_cpuid_version_set_stepping, NULL, NULL); + object_class_property_add_str(oc, "vendor", + x86_cpuid_get_vendor, + x86_cpuid_set_vendor); + object_class_property_add_str(oc, "model-id", + x86_cpuid_get_model_id, + x86_cpuid_set_model_id); + object_class_property_add(oc, "tsc-frequency", "int", + x86_cpuid_get_tsc_freq, + x86_cpuid_set_tsc_freq, NULL, NULL); + /* + * The "unavailable-features" property has the same semantics as + * CpuDefinitionInfo.unavailable-features on the "query-cpu-definitions" + * QMP command: they list the features that would have prevented the + * CPU from running if the "enforce" flag was set. + */ + object_class_property_add(oc, "unavailable-features", "strList", + x86_cpu_get_unavailable_features, + NULL, NULL, NULL); + +#if !defined(CONFIG_USER_ONLY) + object_class_property_add(oc, "crash-information", "GuestPanicInformation", + x86_cpu_get_crash_info_qom, NULL, NULL, NULL); +#endif + + for (w = 0; w < FEATURE_WORDS; w++) { + int bitnr; + for (bitnr = 0; bitnr < 64; bitnr++) { + x86_cpu_register_feature_bit_props(xcc, w, bitnr); + } + } } static const TypeInfo x86_cpu_type_info = { .name = TYPE_X86_CPU, .parent = TYPE_CPU, .instance_size = sizeof(X86CPU), + .instance_align = __alignof(X86CPU), .instance_init = x86_cpu_initfn, + .instance_post_init = x86_cpu_post_initfn, + .abstract = true, .class_size = sizeof(X86CPUClass), .class_init = x86_cpu_common_class_init, }; - /* "base" CPU model, used by query-cpu-model-expansion */ static void x86_cpu_base_class_init(ObjectClass *oc, void *data) { @@ -5707,13 +8077,10 @@ static void x86_cpu_register_types(void) type_register_static(&x86_cpu_type_info); for (i = 0; i < ARRAY_SIZE(builtin_x86_defs); i++) { - x86_register_cpudef_type(&builtin_x86_defs[i]); + x86_register_cpudef_types(&builtin_x86_defs[i]); } type_register_static(&max_x86_cpu_type_info); type_register_static(&x86_base_cpu_type_info); -#if defined(CONFIG_KVM) || defined(CONFIG_HVF) - type_register_static(&host_x86_cpu_type_info); -#endif } type_init(x86_cpu_register_types) diff --git a/target/i386/cpu.h b/target/i386/cpu.h index 730c06f80a..6b05738079 100644 --- a/target/i386/cpu.h +++ b/target/i386/cpu.h @@ -1,4 +1,3 @@ - /* * i386 virtual CPU header * @@ -7,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -21,23 +20,20 @@ #ifndef I386_CPU_H #define I386_CPU_H -#include "qemu-common.h" +#include "sysemu/tcg.h" #include "cpu-qom.h" -#include "hyperv-proto.h" - -#ifdef TARGET_X86_64 -#define TARGET_LONG_BITS 64 -#else -#define TARGET_LONG_BITS 32 -#endif - +#include "kvm/hyperv-proto.h" #include "exec/cpu-defs.h" +#include "qapi/qapi-types-common.h" +#include "qemu/cpu-float.h" +#include "qemu/timer.h" + +#define XEN_NR_VIRQS 24 /* The x86 has a strong memory model with some store-after-load re-ordering */ #define TCG_GUEST_DEFAULT_MO (TCG_MO_ALL & ~TCG_MO_ST_LD) -/* Maximum instruction code size */ -#define TARGET_MAX_INSN_SIZE 16 +#define KVM_HAVE_MCE_INJECTION 1 /* support for self modifying code even if the modified instruction is close to the modifying instruction */ @@ -51,8 +47,6 @@ #define ELF_MACHINE_UNAME "i686" #endif -#define CPUArchState struct CPUX86State - enum { R_EAX = 0, R_ECX = 1, @@ -177,6 +171,8 @@ typedef enum X86Seg { #define HF_IOBPT_SHIFT 24 /* an io breakpoint enabled */ #define HF_MPX_EN_SHIFT 25 /* MPX Enabled (CR4+XCR0+BNDCFGx) */ #define HF_MPX_IU_SHIFT 26 /* BND registers in-use */ +#define HF_UMIP_SHIFT 27 /* CR4.UMIP */ +#define HF_AVX_EN_SHIFT 28 /* AVX Enabled (CR4+XCR0) */ #define HF_CPL_MASK (3 << HF_CPL_SHIFT) #define HF_INHIBIT_IRQ_MASK (1 << HF_INHIBIT_IRQ_SHIFT) @@ -202,6 +198,8 @@ typedef enum X86Seg { #define HF_IOBPT_MASK (1 << HF_IOBPT_SHIFT) #define HF_MPX_EN_MASK (1 << HF_MPX_EN_SHIFT) #define HF_MPX_IU_MASK (1 << HF_MPX_IU_SHIFT) +#define HF_UMIP_MASK (1 << HF_UMIP_SHIFT) +#define HF_AVX_EN_MASK (1 << HF_AVX_EN_SHIFT) /* hflags2 */ @@ -212,6 +210,8 @@ typedef enum X86Seg { #define HF2_SMM_INSIDE_NMI_SHIFT 4 /* CPU serving SMI nested inside NMI */ #define HF2_MPX_PR_SHIFT 5 /* BNDCFGx.BNDPRESERVE */ #define HF2_NPT_SHIFT 6 /* Nested Paging enabled */ +#define HF2_IGNNE_SHIFT 7 /* Ignore CR0.NE=0 */ +#define HF2_VGIF_SHIFT 8 /* Can take VIRQ*/ #define HF2_GIF_MASK (1 << HF2_GIF_SHIFT) #define HF2_HIF_MASK (1 << HF2_HIF_SHIFT) @@ -220,6 +220,8 @@ typedef enum X86Seg { #define HF2_SMM_INSIDE_NMI_MASK (1 << HF2_SMM_INSIDE_NMI_SHIFT) #define HF2_MPX_PR_MASK (1 << HF2_MPX_PR_SHIFT) #define HF2_NPT_MASK (1 << HF2_NPT_SHIFT) +#define HF2_IGNNE_MASK (1 << HF2_IGNNE_SHIFT) +#define HF2_VGIF_MASK (1 << HF2_VGIF_SHIFT) #define CR0_PE_SHIFT 0 #define CR0_MP_SHIFT 1 @@ -232,6 +234,8 @@ typedef enum X86Seg { #define CR0_NE_MASK (1U << 5) #define CR0_WP_MASK (1U << 16) #define CR0_AM_MASK (1U << 18) +#define CR0_NW_MASK (1U << 29) +#define CR0_CD_MASK (1U << 30) #define CR0_PG_MASK (1U << 31) #define CR4_VME_MASK (1U << 0) @@ -246,6 +250,7 @@ typedef enum X86Seg { #define CR4_OSFXSR_SHIFT 9 #define CR4_OSFXSR_MASK (1U << CR4_OSFXSR_SHIFT) #define CR4_OSXMMEXCPT_MASK (1U << 10) +#define CR4_UMIP_MASK (1U << 11) #define CR4_LA57_MASK (1U << 12) #define CR4_VMXE_MASK (1U << 13) #define CR4_SMXE_MASK (1U << 14) @@ -255,6 +260,16 @@ typedef enum X86Seg { #define CR4_SMEP_MASK (1U << 20) #define CR4_SMAP_MASK (1U << 21) #define CR4_PKE_MASK (1U << 22) +#define CR4_PKS_MASK (1U << 24) + +#define CR4_RESERVED_MASK \ +(~(target_ulong)(CR4_VME_MASK | CR4_PVI_MASK | CR4_TSD_MASK \ + | CR4_DE_MASK | CR4_PSE_MASK | CR4_PAE_MASK \ + | CR4_MCE_MASK | CR4_PGE_MASK | CR4_PCE_MASK \ + | CR4_OSFXSR_MASK | CR4_OSXMMEXCPT_MASK | CR4_UMIP_MASK \ + | CR4_LA57_MASK \ + | CR4_FSGSBASE_MASK | CR4_PCIDE_MASK | CR4_OSXSAVE_MASK \ + | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_PKE_MASK | CR4_PKS_MASK)) #define DR6_BD (1 << 13) #define DR6_BS (1 << 14) @@ -273,6 +288,8 @@ typedef enum X86Seg { #define DR7_TYPE_IO_RW 0x2 #define DR7_TYPE_DATA_RW 0x3 +#define DR_RESERVED_MASK 0xffffffff00000000ULL + #define PG_PRESENT_BIT 0 #define PG_RW_BIT 1 #define PG_USER_BIT 2 @@ -297,7 +314,6 @@ typedef enum X86Seg { #define PG_GLOBAL_MASK (1 << PG_GLOBAL_BIT) #define PG_PSE_PAT_MASK (1 << PG_PSE_PAT_BIT) #define PG_ADDRESS_MASK 0x000ffffffffff000LL -#define PG_HI_RSVD_MASK (PG_ADDRESS_MASK & ~PHYS_ADDR_MASK) #define PG_HI_USER_MASK 0x7ff0000000000000LL #define PG_PKRU_MASK (15ULL << PG_PKRU_BIT) #define PG_NX_MASK (1ULL << PG_NX_BIT) @@ -311,6 +327,19 @@ typedef enum X86Seg { #define PG_ERROR_I_D_MASK 0x10 #define PG_ERROR_PK_MASK 0x20 +#define PG_MODE_PAE (1 << 0) +#define PG_MODE_LMA (1 << 1) +#define PG_MODE_NXE (1 << 2) +#define PG_MODE_PSE (1 << 3) +#define PG_MODE_LA57 (1 << 4) +#define PG_MODE_SVM_MASK MAKE_64BIT_MASK(0, 15) + +/* Bits of CR4 that do not affect the NPT page format. */ +#define PG_MODE_WP (1 << 16) +#define PG_MODE_PKE (1 << 17) +#define PG_MODE_PKS (1 << 18) +#define PG_MODE_SMEP (1 << 19) + #define MCG_CTL_P (1ULL<<8) /* MCG_CAP register available */ #define MCG_SER_P (1ULL<<24) /* MCA recovery/new status bits */ #define MCG_LMCE_P (1ULL<<27) /* Local Machine Check Supported */ @@ -350,22 +379,50 @@ typedef enum X86Seg { #define MSR_IA32_APICBASE_ENABLE (1<<11) #define MSR_IA32_APICBASE_EXTD (1 << 10) #define MSR_IA32_APICBASE_BASE (0xfffffU<<12) +#define MSR_IA32_APICBASE_RESERVED \ + (~(uint64_t)(MSR_IA32_APICBASE_BSP | MSR_IA32_APICBASE_ENABLE \ + | MSR_IA32_APICBASE_EXTD | MSR_IA32_APICBASE_BASE)) + #define MSR_IA32_FEATURE_CONTROL 0x0000003a #define MSR_TSC_ADJUST 0x0000003b #define MSR_IA32_SPEC_CTRL 0x48 #define MSR_VIRT_SSBD 0xc001011f #define MSR_IA32_PRED_CMD 0x49 +#define MSR_IA32_UCODE_REV 0x8b +#define MSR_IA32_CORE_CAPABILITY 0xcf + #define MSR_IA32_ARCH_CAPABILITIES 0x10a +#define ARCH_CAP_TSX_CTRL_MSR (1<<7) + +#define MSR_IA32_PERF_CAPABILITIES 0x345 +#define PERF_CAP_LBR_FMT 0x3f + +#define MSR_IA32_TSX_CTRL 0x122 #define MSR_IA32_TSCDEADLINE 0x6e0 +#define MSR_IA32_PKRS 0x6e1 +#define MSR_ARCH_LBR_CTL 0x000014ce +#define MSR_ARCH_LBR_DEPTH 0x000014cf +#define MSR_ARCH_LBR_FROM_0 0x00001500 +#define MSR_ARCH_LBR_TO_0 0x00001600 +#define MSR_ARCH_LBR_INFO_0 0x00001200 #define FEATURE_CONTROL_LOCKED (1<<0) +#define FEATURE_CONTROL_VMXON_ENABLED_INSIDE_SMX (1ULL << 1) #define FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX (1<<2) +#define FEATURE_CONTROL_SGX_LC (1ULL << 17) +#define FEATURE_CONTROL_SGX (1ULL << 18) #define FEATURE_CONTROL_LMCE (1<<20) +#define MSR_IA32_SGXLEPUBKEYHASH0 0x8c +#define MSR_IA32_SGXLEPUBKEYHASH1 0x8d +#define MSR_IA32_SGXLEPUBKEYHASH2 0x8e +#define MSR_IA32_SGXLEPUBKEYHASH3 0x8f + #define MSR_P6_PERFCTR0 0xc1 #define MSR_IA32_SMBASE 0x9e #define MSR_SMI_COUNT 0x34 +#define MSR_CORE_THREAD_COUNT 0x35 #define MSR_MTRRcap 0xfe #define MSR_MTRRcap_VCNT 8 #define MSR_MTRRcap_FIXRANGE_SUPPORT (1 << 8) @@ -387,6 +444,7 @@ typedef enum X86Seg { #define MSR_IA32_MISC_ENABLE 0x1a0 /* Indicates good rep/movs microcode on some processors: */ #define MSR_IA32_MISC_ENABLE_DEFAULT 1 +#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18) #define MSR_MTRRphysBase(reg) (0x200 + 2 * (reg)) #define MSR_MTRRphysMask(reg) (0x200 + 2 * (reg) + 1) @@ -446,6 +504,11 @@ typedef enum X86Seg { #define MSR_EFER_SVME (1 << 12) #define MSR_EFER_FFXSR (1 << 14) +#define MSR_EFER_RESERVED\ + (~(target_ulong)(MSR_EFER_SCE | MSR_EFER_LME\ + | MSR_EFER_LMA | MSR_EFER_NXE | MSR_EFER_SVME\ + | MSR_EFER_FFXSR)) + #define MSR_STAR 0xc0000081 #define MSR_LSTAR 0xc0000082 #define MSR_CSTAR 0xc0000083 @@ -454,11 +517,40 @@ typedef enum X86Seg { #define MSR_GSBASE 0xc0000101 #define MSR_KERNELGSBASE 0xc0000102 #define MSR_TSC_AUX 0xc0000103 +#define MSR_AMD64_TSC_RATIO 0xc0000104 + +#define MSR_AMD64_TSC_RATIO_DEFAULT 0x100000000ULL #define MSR_VM_HSAVE_PA 0xc0010117 +#define MSR_IA32_XFD 0x000001c4 +#define MSR_IA32_XFD_ERR 0x000001c5 + #define MSR_IA32_BNDCFGS 0x00000d90 #define MSR_IA32_XSS 0x00000da0 +#define MSR_IA32_UMWAIT_CONTROL 0xe1 + +#define MSR_IA32_VMX_BASIC 0x00000480 +#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481 +#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482 +#define MSR_IA32_VMX_EXIT_CTLS 0x00000483 +#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484 +#define MSR_IA32_VMX_MISC 0x00000485 +#define MSR_IA32_VMX_CR0_FIXED0 0x00000486 +#define MSR_IA32_VMX_CR0_FIXED1 0x00000487 +#define MSR_IA32_VMX_CR4_FIXED0 0x00000488 +#define MSR_IA32_VMX_CR4_FIXED1 0x00000489 +#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a +#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b +#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c +#define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x0000048d +#define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x0000048e +#define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x0000048f +#define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x00000490 +#define MSR_IA32_VMX_VMFUNC 0x00000491 + +#define MSR_APIC_START 0x00000800 +#define MSR_APIC_END 0x000008ff #define XSTATE_FP_BIT 0 #define XSTATE_SSE_BIT 1 @@ -469,6 +561,9 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_BIT 6 #define XSTATE_Hi16_ZMM_BIT 7 #define XSTATE_PKRU_BIT 9 +#define XSTATE_ARCH_LBR_BIT 15 +#define XSTATE_XTILE_CFG_BIT 17 +#define XSTATE_XTILE_DATA_BIT 18 #define XSTATE_FP_MASK (1ULL << XSTATE_FP_BIT) #define XSTATE_SSE_MASK (1ULL << XSTATE_SSE_BIT) @@ -479,6 +574,26 @@ typedef enum X86Seg { #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT) #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT) #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT) +#define XSTATE_ARCH_LBR_MASK (1ULL << XSTATE_ARCH_LBR_BIT) +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT) +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT) + +#define XSTATE_DYNAMIC_MASK (XSTATE_XTILE_DATA_MASK) + +#define ESA_FEATURE_ALIGN64_BIT 1 +#define ESA_FEATURE_XFD_BIT 2 + +#define ESA_FEATURE_ALIGN64_MASK (1U << ESA_FEATURE_ALIGN64_BIT) +#define ESA_FEATURE_XFD_MASK (1U << ESA_FEATURE_XFD_BIT) + + +/* CPUID feature bits available in XCR0 */ +#define CPUID_XSTATE_XCR0_MASK (XSTATE_FP_MASK | XSTATE_SSE_MASK | \ + XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | \ + XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | \ + XSTATE_ZMM_Hi256_MASK | \ + XSTATE_Hi16_ZMM_MASK | XSTATE_PKRU_MASK | \ + XSTATE_XTILE_CFG_MASK | XSTATE_XTILE_DATA_MASK) /* CPUID feature words */ typedef enum FeatureWord { @@ -487,25 +602,46 @@ typedef enum FeatureWord { FEAT_7_0_EBX, /* CPUID[EAX=7,ECX=0].EBX */ FEAT_7_0_ECX, /* CPUID[EAX=7,ECX=0].ECX */ FEAT_7_0_EDX, /* CPUID[EAX=7,ECX=0].EDX */ + FEAT_7_1_EAX, /* CPUID[EAX=7,ECX=1].EAX */ FEAT_8000_0001_EDX, /* CPUID[8000_0001].EDX */ FEAT_8000_0001_ECX, /* CPUID[8000_0001].ECX */ FEAT_8000_0007_EDX, /* CPUID[8000_0007].EDX */ FEAT_8000_0008_EBX, /* CPUID[8000_0008].EBX */ + FEAT_8000_0021_EAX, /* CPUID[8000_0021].EAX */ FEAT_C000_0001_EDX, /* CPUID[C000_0001].EDX */ FEAT_KVM, /* CPUID[4000_0001].EAX (KVM_CPUID_FEATURES) */ FEAT_KVM_HINTS, /* CPUID[4000_0001].EDX */ - FEAT_HYPERV_EAX, /* CPUID[4000_0003].EAX */ - FEAT_HYPERV_EBX, /* CPUID[4000_0003].EBX */ - FEAT_HYPERV_EDX, /* CPUID[4000_0003].EDX */ FEAT_SVM, /* CPUID[8000_000A].EDX */ FEAT_XSAVE, /* CPUID[EAX=0xd,ECX=1].EAX */ FEAT_6_EAX, /* CPUID[6].EAX */ - FEAT_XSAVE_COMP_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ - FEAT_XSAVE_COMP_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ + FEAT_XSAVE_XCR0_LO, /* CPUID[EAX=0xd,ECX=0].EAX */ + FEAT_XSAVE_XCR0_HI, /* CPUID[EAX=0xd,ECX=0].EDX */ + FEAT_ARCH_CAPABILITIES, + FEAT_CORE_CAPABILITY, + FEAT_PERF_CAPABILITIES, + FEAT_VMX_PROCBASED_CTLS, + FEAT_VMX_SECONDARY_CTLS, + FEAT_VMX_PINBASED_CTLS, + FEAT_VMX_EXIT_CTLS, + FEAT_VMX_ENTRY_CTLS, + FEAT_VMX_MISC, + FEAT_VMX_EPT_VPID_CAPS, + FEAT_VMX_BASIC, + FEAT_VMX_VMFUNC, + FEAT_14_0_ECX, + FEAT_SGX_12_0_EAX, /* CPUID[EAX=0x12,ECX=0].EAX (SGX) */ + FEAT_SGX_12_0_EBX, /* CPUID[EAX=0x12,ECX=0].EBX (SGX MISCSELECT[31:0]) */ + FEAT_SGX_12_1_EAX, /* CPUID[EAX=0x12,ECX=1].EAX (SGX ATTRIBUTES[31:0]) */ + FEAT_XSAVE_XSS_LO, /* CPUID[EAX=0xd,ECX=1].ECX */ + FEAT_XSAVE_XSS_HI, /* CPUID[EAX=0xd,ECX=1].EDX */ + FEAT_7_1_EDX, /* CPUID[EAX=7,ECX=1].EDX */ + FEAT_7_2_EDX, /* CPUID[EAX=7,ECX=2].EDX */ FEATURE_WORDS, } FeatureWord; -typedef uint32_t FeatureWordArray[FEATURE_WORDS]; +typedef uint64_t FeatureWordArray[FEATURE_WORDS]; +uint64_t x86_cpu_get_supported_feature_word(FeatureWord w, + bool migratable_only); /* cpuid_features bits */ #define CPUID_FP87 (1U << 0) @@ -599,7 +735,7 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_EXT2_3DNOWEXT (1U << 30) #define CPUID_EXT2_3DNOW (1U << 31) -/* CPUID[8000_0001].EDX bits that are aliase of CPUID[1].EDX bits on AMD CPUs */ +/* CPUID[8000_0001].EDX bits that are aliases of CPUID[1].EDX bits on AMD CPUs */ #define CPUID_EXT2_AMD_ALIASES (CPUID_EXT2_FPU | CPUID_EXT2_VME | \ CPUID_EXT2_DE | CPUID_EXT2_PSE | \ CPUID_EXT2_TSC | CPUID_EXT2_MSR | \ @@ -633,70 +769,215 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_EXT3_PERFCORE (1U << 23) #define CPUID_EXT3_PERFNB (1U << 24) -#define CPUID_SVM_NPT (1U << 0) -#define CPUID_SVM_LBRV (1U << 1) -#define CPUID_SVM_SVMLOCK (1U << 2) -#define CPUID_SVM_NRIPSAVE (1U << 3) -#define CPUID_SVM_TSCSCALE (1U << 4) -#define CPUID_SVM_VMCBCLEAN (1U << 5) -#define CPUID_SVM_FLUSHASID (1U << 6) -#define CPUID_SVM_DECODEASSIST (1U << 7) -#define CPUID_SVM_PAUSEFILTER (1U << 10) -#define CPUID_SVM_PFTHRESHOLD (1U << 12) - -#define CPUID_7_0_EBX_FSGSBASE (1U << 0) -#define CPUID_7_0_EBX_BMI1 (1U << 3) -#define CPUID_7_0_EBX_HLE (1U << 4) -#define CPUID_7_0_EBX_AVX2 (1U << 5) -#define CPUID_7_0_EBX_SMEP (1U << 7) -#define CPUID_7_0_EBX_BMI2 (1U << 8) -#define CPUID_7_0_EBX_ERMS (1U << 9) -#define CPUID_7_0_EBX_INVPCID (1U << 10) -#define CPUID_7_0_EBX_RTM (1U << 11) -#define CPUID_7_0_EBX_MPX (1U << 14) -#define CPUID_7_0_EBX_AVX512F (1U << 16) /* AVX-512 Foundation */ -#define CPUID_7_0_EBX_AVX512DQ (1U << 17) /* AVX-512 Doubleword & Quadword Instrs */ -#define CPUID_7_0_EBX_RDSEED (1U << 18) -#define CPUID_7_0_EBX_ADX (1U << 19) -#define CPUID_7_0_EBX_SMAP (1U << 20) -#define CPUID_7_0_EBX_AVX512IFMA (1U << 21) /* AVX-512 Integer Fused Multiply Add */ -#define CPUID_7_0_EBX_PCOMMIT (1U << 22) /* Persistent Commit */ -#define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) /* Flush a Cache Line Optimized */ -#define CPUID_7_0_EBX_CLWB (1U << 24) /* Cache Line Write Back */ -#define CPUID_7_0_EBX_INTEL_PT (1U << 25) /* Intel Processor Trace */ -#define CPUID_7_0_EBX_AVX512PF (1U << 26) /* AVX-512 Prefetch */ -#define CPUID_7_0_EBX_AVX512ER (1U << 27) /* AVX-512 Exponential and Reciprocal */ -#define CPUID_7_0_EBX_AVX512CD (1U << 28) /* AVX-512 Conflict Detection */ -#define CPUID_7_0_EBX_SHA_NI (1U << 29) /* SHA1/SHA256 Instruction Extensions */ -#define CPUID_7_0_EBX_AVX512BW (1U << 30) /* AVX-512 Byte and Word Instructions */ -#define CPUID_7_0_EBX_AVX512VL (1U << 31) /* AVX-512 Vector Length Extensions */ - -#define CPUID_7_0_ECX_AVX512BMI (1U << 1) -#define CPUID_7_0_ECX_VBMI (1U << 1) /* AVX-512 Vector Byte Manipulation Instrs */ -#define CPUID_7_0_ECX_UMIP (1U << 2) -#define CPUID_7_0_ECX_PKU (1U << 3) -#define CPUID_7_0_ECX_OSPKE (1U << 4) -#define CPUID_7_0_ECX_VBMI2 (1U << 6) /* Additional VBMI Instrs */ -#define CPUID_7_0_ECX_GFNI (1U << 8) -#define CPUID_7_0_ECX_VAES (1U << 9) -#define CPUID_7_0_ECX_VPCLMULQDQ (1U << 10) -#define CPUID_7_0_ECX_AVX512VNNI (1U << 11) -#define CPUID_7_0_ECX_AVX512BITALG (1U << 12) -#define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14) /* POPCNT for vectors of DW/QW */ -#define CPUID_7_0_ECX_LA57 (1U << 16) -#define CPUID_7_0_ECX_RDPID (1U << 22) -#define CPUID_7_0_ECX_CLDEMOTE (1U << 25) /* CLDEMOTE Instruction */ - -#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) /* AVX512 Neural Network Instructions */ -#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) /* AVX512 Multiply Accumulation Single Precision */ -#define CPUID_7_0_EDX_PCONFIG (1U << 18) /* Platform Configuration */ -#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) /* Speculation Control */ -#define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) /*Arch Capabilities*/ -#define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) /* Speculative Store Bypass Disable */ - -#define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) /* Write back and - do not invalidate cache */ -#define CPUID_8000_0008_EBX_IBPB (1U << 12) /* Indirect Branch Prediction Barrier */ +#define CPUID_SVM_NPT (1U << 0) +#define CPUID_SVM_LBRV (1U << 1) +#define CPUID_SVM_SVMLOCK (1U << 2) +#define CPUID_SVM_NRIPSAVE (1U << 3) +#define CPUID_SVM_TSCSCALE (1U << 4) +#define CPUID_SVM_VMCBCLEAN (1U << 5) +#define CPUID_SVM_FLUSHASID (1U << 6) +#define CPUID_SVM_DECODEASSIST (1U << 7) +#define CPUID_SVM_PAUSEFILTER (1U << 10) +#define CPUID_SVM_PFTHRESHOLD (1U << 12) +#define CPUID_SVM_AVIC (1U << 13) +#define CPUID_SVM_V_VMSAVE_VMLOAD (1U << 15) +#define CPUID_SVM_VGIF (1U << 16) +#define CPUID_SVM_VNMI (1U << 25) +#define CPUID_SVM_SVME_ADDR_CHK (1U << 28) + +/* Support RDFSBASE/RDGSBASE/WRFSBASE/WRGSBASE */ +#define CPUID_7_0_EBX_FSGSBASE (1U << 0) +/* Support SGX */ +#define CPUID_7_0_EBX_SGX (1U << 2) +/* 1st Group of Advanced Bit Manipulation Extensions */ +#define CPUID_7_0_EBX_BMI1 (1U << 3) +/* Hardware Lock Elision */ +#define CPUID_7_0_EBX_HLE (1U << 4) +/* Intel Advanced Vector Extensions 2 */ +#define CPUID_7_0_EBX_AVX2 (1U << 5) +/* Supervisor-mode Execution Prevention */ +#define CPUID_7_0_EBX_SMEP (1U << 7) +/* 2nd Group of Advanced Bit Manipulation Extensions */ +#define CPUID_7_0_EBX_BMI2 (1U << 8) +/* Enhanced REP MOVSB/STOSB */ +#define CPUID_7_0_EBX_ERMS (1U << 9) +/* Invalidate Process-Context Identifier */ +#define CPUID_7_0_EBX_INVPCID (1U << 10) +/* Restricted Transactional Memory */ +#define CPUID_7_0_EBX_RTM (1U << 11) +/* Memory Protection Extension */ +#define CPUID_7_0_EBX_MPX (1U << 14) +/* AVX-512 Foundation */ +#define CPUID_7_0_EBX_AVX512F (1U << 16) +/* AVX-512 Doubleword & Quadword Instruction */ +#define CPUID_7_0_EBX_AVX512DQ (1U << 17) +/* Read Random SEED */ +#define CPUID_7_0_EBX_RDSEED (1U << 18) +/* ADCX and ADOX instructions */ +#define CPUID_7_0_EBX_ADX (1U << 19) +/* Supervisor Mode Access Prevention */ +#define CPUID_7_0_EBX_SMAP (1U << 20) +/* AVX-512 Integer Fused Multiply Add */ +#define CPUID_7_0_EBX_AVX512IFMA (1U << 21) +/* Persistent Commit */ +#define CPUID_7_0_EBX_PCOMMIT (1U << 22) +/* Flush a Cache Line Optimized */ +#define CPUID_7_0_EBX_CLFLUSHOPT (1U << 23) +/* Cache Line Write Back */ +#define CPUID_7_0_EBX_CLWB (1U << 24) +/* Intel Processor Trace */ +#define CPUID_7_0_EBX_INTEL_PT (1U << 25) +/* AVX-512 Prefetch */ +#define CPUID_7_0_EBX_AVX512PF (1U << 26) +/* AVX-512 Exponential and Reciprocal */ +#define CPUID_7_0_EBX_AVX512ER (1U << 27) +/* AVX-512 Conflict Detection */ +#define CPUID_7_0_EBX_AVX512CD (1U << 28) +/* SHA1/SHA256 Instruction Extensions */ +#define CPUID_7_0_EBX_SHA_NI (1U << 29) +/* AVX-512 Byte and Word Instructions */ +#define CPUID_7_0_EBX_AVX512BW (1U << 30) +/* AVX-512 Vector Length Extensions */ +#define CPUID_7_0_EBX_AVX512VL (1U << 31) + +/* AVX-512 Vector Byte Manipulation Instruction */ +#define CPUID_7_0_ECX_AVX512_VBMI (1U << 1) +/* User-Mode Instruction Prevention */ +#define CPUID_7_0_ECX_UMIP (1U << 2) +/* Protection Keys for User-mode Pages */ +#define CPUID_7_0_ECX_PKU (1U << 3) +/* OS Enable Protection Keys */ +#define CPUID_7_0_ECX_OSPKE (1U << 4) +/* UMONITOR/UMWAIT/TPAUSE Instructions */ +#define CPUID_7_0_ECX_WAITPKG (1U << 5) +/* Additional AVX-512 Vector Byte Manipulation Instruction */ +#define CPUID_7_0_ECX_AVX512_VBMI2 (1U << 6) +/* Galois Field New Instructions */ +#define CPUID_7_0_ECX_GFNI (1U << 8) +/* Vector AES Instructions */ +#define CPUID_7_0_ECX_VAES (1U << 9) +/* Carry-Less Multiplication Quadword */ +#define CPUID_7_0_ECX_VPCLMULQDQ (1U << 10) +/* Vector Neural Network Instructions */ +#define CPUID_7_0_ECX_AVX512VNNI (1U << 11) +/* Support for VPOPCNT[B,W] and VPSHUFBITQMB */ +#define CPUID_7_0_ECX_AVX512BITALG (1U << 12) +/* POPCNT for vectors of DW/QW */ +#define CPUID_7_0_ECX_AVX512_VPOPCNTDQ (1U << 14) +/* 5-level Page Tables */ +#define CPUID_7_0_ECX_LA57 (1U << 16) +/* Read Processor ID */ +#define CPUID_7_0_ECX_RDPID (1U << 22) +/* Bus Lock Debug Exception */ +#define CPUID_7_0_ECX_BUS_LOCK_DETECT (1U << 24) +/* Cache Line Demote Instruction */ +#define CPUID_7_0_ECX_CLDEMOTE (1U << 25) +/* Move Doubleword as Direct Store Instruction */ +#define CPUID_7_0_ECX_MOVDIRI (1U << 27) +/* Move 64 Bytes as Direct Store Instruction */ +#define CPUID_7_0_ECX_MOVDIR64B (1U << 28) +/* Support SGX Launch Control */ +#define CPUID_7_0_ECX_SGX_LC (1U << 30) +/* Protection Keys for Supervisor-mode Pages */ +#define CPUID_7_0_ECX_PKS (1U << 31) + +/* AVX512 Neural Network Instructions */ +#define CPUID_7_0_EDX_AVX512_4VNNIW (1U << 2) +/* AVX512 Multiply Accumulation Single Precision */ +#define CPUID_7_0_EDX_AVX512_4FMAPS (1U << 3) +/* Fast Short Rep Mov */ +#define CPUID_7_0_EDX_FSRM (1U << 4) +/* AVX512 Vector Pair Intersection to a Pair of Mask Registers */ +#define CPUID_7_0_EDX_AVX512_VP2INTERSECT (1U << 8) +/* SERIALIZE instruction */ +#define CPUID_7_0_EDX_SERIALIZE (1U << 14) +/* TSX Suspend Load Address Tracking instruction */ +#define CPUID_7_0_EDX_TSX_LDTRK (1U << 16) +/* Architectural LBRs */ +#define CPUID_7_0_EDX_ARCH_LBR (1U << 19) +/* AMX_BF16 instruction */ +#define CPUID_7_0_EDX_AMX_BF16 (1U << 22) +/* AVX512_FP16 instruction */ +#define CPUID_7_0_EDX_AVX512_FP16 (1U << 23) +/* AMX tile (two-dimensional register) */ +#define CPUID_7_0_EDX_AMX_TILE (1U << 24) +/* AMX_INT8 instruction */ +#define CPUID_7_0_EDX_AMX_INT8 (1U << 25) +/* Speculation Control */ +#define CPUID_7_0_EDX_SPEC_CTRL (1U << 26) +/* Single Thread Indirect Branch Predictors */ +#define CPUID_7_0_EDX_STIBP (1U << 27) +/* Flush L1D cache */ +#define CPUID_7_0_EDX_FLUSH_L1D (1U << 28) +/* Arch Capabilities */ +#define CPUID_7_0_EDX_ARCH_CAPABILITIES (1U << 29) +/* Core Capability */ +#define CPUID_7_0_EDX_CORE_CAPABILITY (1U << 30) +/* Speculative Store Bypass Disable */ +#define CPUID_7_0_EDX_SPEC_CTRL_SSBD (1U << 31) + +/* AVX VNNI Instruction */ +#define CPUID_7_1_EAX_AVX_VNNI (1U << 4) +/* AVX512 BFloat16 Instruction */ +#define CPUID_7_1_EAX_AVX512_BF16 (1U << 5) +/* CMPCCXADD Instructions */ +#define CPUID_7_1_EAX_CMPCCXADD (1U << 7) +/* Fast Zero REP MOVS */ +#define CPUID_7_1_EAX_FZRM (1U << 10) +/* Fast Short REP STOS */ +#define CPUID_7_1_EAX_FSRS (1U << 11) +/* Fast Short REP CMPS/SCAS */ +#define CPUID_7_1_EAX_FSRC (1U << 12) +/* Support Tile Computational Operations on FP16 Numbers */ +#define CPUID_7_1_EAX_AMX_FP16 (1U << 21) +/* Support for VPMADD52[H,L]UQ */ +#define CPUID_7_1_EAX_AVX_IFMA (1U << 23) + +/* Support for VPDPB[SU,UU,SS]D[,S] */ +#define CPUID_7_1_EDX_AVX_VNNI_INT8 (1U << 4) +/* AVX NE CONVERT Instructions */ +#define CPUID_7_1_EDX_AVX_NE_CONVERT (1U << 5) +/* AMX COMPLEX Instructions */ +#define CPUID_7_1_EDX_AMX_COMPLEX (1U << 8) +/* PREFETCHIT0/1 Instructions */ +#define CPUID_7_1_EDX_PREFETCHITI (1U << 14) + +/* Do not exhibit MXCSR Configuration Dependent Timing (MCDT) behavior */ +#define CPUID_7_2_EDX_MCDT_NO (1U << 5) + +/* XFD Extend Feature Disabled */ +#define CPUID_D_1_EAX_XFD (1U << 4) + +/* Packets which contain IP payload have LIP values */ +#define CPUID_14_0_ECX_LIP (1U << 31) + +/* CLZERO instruction */ +#define CPUID_8000_0008_EBX_CLZERO (1U << 0) +/* Always save/restore FP error pointers */ +#define CPUID_8000_0008_EBX_XSAVEERPTR (1U << 2) +/* Write back and do not invalidate cache */ +#define CPUID_8000_0008_EBX_WBNOINVD (1U << 9) +/* Indirect Branch Prediction Barrier */ +#define CPUID_8000_0008_EBX_IBPB (1U << 12) +/* Indirect Branch Restricted Speculation */ +#define CPUID_8000_0008_EBX_IBRS (1U << 14) +/* Single Thread Indirect Branch Predictors */ +#define CPUID_8000_0008_EBX_STIBP (1U << 15) +/* STIBP mode has enhanced performance and may be left always on */ +#define CPUID_8000_0008_EBX_STIBP_ALWAYS_ON (1U << 17) +/* Speculative Store Bypass Disable */ +#define CPUID_8000_0008_EBX_AMD_SSBD (1U << 24) +/* Predictive Store Forwarding Disable */ +#define CPUID_8000_0008_EBX_AMD_PSFD (1U << 28) + +/* Processor ignores nested data breakpoints */ +#define CPUID_8000_0021_EAX_No_NESTED_DATA_BP (1U << 0) +/* LFENCE is always serializing */ +#define CPUID_8000_0021_EAX_LFENCE_ALWAYS_SERIALIZING (1U << 2) +/* Null Selector Clears Base */ +#define CPUID_8000_0021_EAX_NULL_SEL_CLR_BASE (1U << 6) +/* Automatic IBRS */ +#define CPUID_8000_0021_EAX_AUTO_IBRS (1U << 8) #define CPUID_XSAVE_XSAVEOPT (1U << 0) #define CPUID_XSAVE_XSAVEC (1U << 1) @@ -722,6 +1003,15 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_VENDOR_VIA "CentaurHauls" +#define CPUID_VENDOR_HYGON "HygonGenuine" + +#define IS_INTEL_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_INTEL_1 && \ + (env)->cpuid_vendor2 == CPUID_VENDOR_INTEL_2 && \ + (env)->cpuid_vendor3 == CPUID_VENDOR_INTEL_3) +#define IS_AMD_CPU(env) ((env)->cpuid_vendor1 == CPUID_VENDOR_AMD_1 && \ + (env)->cpuid_vendor2 == CPUID_VENDOR_AMD_2 && \ + (env)->cpuid_vendor3 == CPUID_VENDOR_AMD_3) + #define CPUID_MWAIT_IBE (1U << 1) /* Interrupts can exit capability */ #define CPUID_MWAIT_EMX (1U << 0) /* enumeration supported */ @@ -729,9 +1019,167 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPUID_TOPOLOGY_LEVEL_INVALID (0U << 8) #define CPUID_TOPOLOGY_LEVEL_SMT (1U << 8) #define CPUID_TOPOLOGY_LEVEL_CORE (2U << 8) - -#ifndef HYPERV_SPINLOCK_NEVER_RETRY -#define HYPERV_SPINLOCK_NEVER_RETRY 0xFFFFFFFF +#define CPUID_TOPOLOGY_LEVEL_DIE (5U << 8) + +/* MSR Feature Bits */ +#define MSR_ARCH_CAP_RDCL_NO (1U << 0) +#define MSR_ARCH_CAP_IBRS_ALL (1U << 1) +#define MSR_ARCH_CAP_RSBA (1U << 2) +#define MSR_ARCH_CAP_SKIP_L1DFL_VMENTRY (1U << 3) +#define MSR_ARCH_CAP_SSB_NO (1U << 4) +#define MSR_ARCH_CAP_MDS_NO (1U << 5) +#define MSR_ARCH_CAP_PSCHANGE_MC_NO (1U << 6) +#define MSR_ARCH_CAP_TSX_CTRL_MSR (1U << 7) +#define MSR_ARCH_CAP_TAA_NO (1U << 8) +#define MSR_ARCH_CAP_SBDR_SSDP_NO (1U << 13) +#define MSR_ARCH_CAP_FBSDP_NO (1U << 14) +#define MSR_ARCH_CAP_PSDP_NO (1U << 15) +#define MSR_ARCH_CAP_FB_CLEAR (1U << 17) +#define MSR_ARCH_CAP_PBRSB_NO (1U << 24) + +#define MSR_CORE_CAP_SPLIT_LOCK_DETECT (1U << 5) + +/* VMX MSR features */ +#define MSR_VMX_BASIC_VMCS_REVISION_MASK 0x7FFFFFFFull +#define MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK (0x00001FFFull << 32) +#define MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK (0x003C0000ull << 32) +#define MSR_VMX_BASIC_DUAL_MONITOR (1ULL << 49) +#define MSR_VMX_BASIC_INS_OUTS (1ULL << 54) +#define MSR_VMX_BASIC_TRUE_CTLS (1ULL << 55) +#define MSR_VMX_BASIC_ANY_ERRCODE (1ULL << 56) + +#define MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK 0x1Full +#define MSR_VMX_MISC_STORE_LMA (1ULL << 5) +#define MSR_VMX_MISC_ACTIVITY_HLT (1ULL << 6) +#define MSR_VMX_MISC_ACTIVITY_SHUTDOWN (1ULL << 7) +#define MSR_VMX_MISC_ACTIVITY_WAIT_SIPI (1ULL << 8) +#define MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK 0x0E000000ull +#define MSR_VMX_MISC_VMWRITE_VMEXIT (1ULL << 29) +#define MSR_VMX_MISC_ZERO_LEN_INJECT (1ULL << 30) + +#define MSR_VMX_EPT_EXECONLY (1ULL << 0) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_4 (1ULL << 6) +#define MSR_VMX_EPT_PAGE_WALK_LENGTH_5 (1ULL << 7) +#define MSR_VMX_EPT_UC (1ULL << 8) +#define MSR_VMX_EPT_WB (1ULL << 14) +#define MSR_VMX_EPT_2MB (1ULL << 16) +#define MSR_VMX_EPT_1GB (1ULL << 17) +#define MSR_VMX_EPT_INVEPT (1ULL << 20) +#define MSR_VMX_EPT_AD_BITS (1ULL << 21) +#define MSR_VMX_EPT_ADVANCED_VMEXIT_INFO (1ULL << 22) +#define MSR_VMX_EPT_INVEPT_SINGLE_CONTEXT (1ULL << 25) +#define MSR_VMX_EPT_INVEPT_ALL_CONTEXT (1ULL << 26) +#define MSR_VMX_EPT_INVVPID (1ULL << 32) +#define MSR_VMX_EPT_INVVPID_SINGLE_ADDR (1ULL << 40) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT (1ULL << 41) +#define MSR_VMX_EPT_INVVPID_ALL_CONTEXT (1ULL << 42) +#define MSR_VMX_EPT_INVVPID_SINGLE_CONTEXT_NOGLOBALS (1ULL << 43) + +#define MSR_VMX_VMFUNC_EPT_SWITCHING (1ULL << 0) + + +/* VMX controls */ +#define VMX_CPU_BASED_VIRTUAL_INTR_PENDING 0x00000004 +#define VMX_CPU_BASED_USE_TSC_OFFSETING 0x00000008 +#define VMX_CPU_BASED_HLT_EXITING 0x00000080 +#define VMX_CPU_BASED_INVLPG_EXITING 0x00000200 +#define VMX_CPU_BASED_MWAIT_EXITING 0x00000400 +#define VMX_CPU_BASED_RDPMC_EXITING 0x00000800 +#define VMX_CPU_BASED_RDTSC_EXITING 0x00001000 +#define VMX_CPU_BASED_CR3_LOAD_EXITING 0x00008000 +#define VMX_CPU_BASED_CR3_STORE_EXITING 0x00010000 +#define VMX_CPU_BASED_CR8_LOAD_EXITING 0x00080000 +#define VMX_CPU_BASED_CR8_STORE_EXITING 0x00100000 +#define VMX_CPU_BASED_TPR_SHADOW 0x00200000 +#define VMX_CPU_BASED_VIRTUAL_NMI_PENDING 0x00400000 +#define VMX_CPU_BASED_MOV_DR_EXITING 0x00800000 +#define VMX_CPU_BASED_UNCOND_IO_EXITING 0x01000000 +#define VMX_CPU_BASED_USE_IO_BITMAPS 0x02000000 +#define VMX_CPU_BASED_MONITOR_TRAP_FLAG 0x08000000 +#define VMX_CPU_BASED_USE_MSR_BITMAPS 0x10000000 +#define VMX_CPU_BASED_MONITOR_EXITING 0x20000000 +#define VMX_CPU_BASED_PAUSE_EXITING 0x40000000 +#define VMX_CPU_BASED_ACTIVATE_SECONDARY_CONTROLS 0x80000000 + +#define VMX_SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001 +#define VMX_SECONDARY_EXEC_ENABLE_EPT 0x00000002 +#define VMX_SECONDARY_EXEC_DESC 0x00000004 +#define VMX_SECONDARY_EXEC_RDTSCP 0x00000008 +#define VMX_SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010 +#define VMX_SECONDARY_EXEC_ENABLE_VPID 0x00000020 +#define VMX_SECONDARY_EXEC_WBINVD_EXITING 0x00000040 +#define VMX_SECONDARY_EXEC_UNRESTRICTED_GUEST 0x00000080 +#define VMX_SECONDARY_EXEC_APIC_REGISTER_VIRT 0x00000100 +#define VMX_SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY 0x00000200 +#define VMX_SECONDARY_EXEC_PAUSE_LOOP_EXITING 0x00000400 +#define VMX_SECONDARY_EXEC_RDRAND_EXITING 0x00000800 +#define VMX_SECONDARY_EXEC_ENABLE_INVPCID 0x00001000 +#define VMX_SECONDARY_EXEC_ENABLE_VMFUNC 0x00002000 +#define VMX_SECONDARY_EXEC_SHADOW_VMCS 0x00004000 +#define VMX_SECONDARY_EXEC_ENCLS_EXITING 0x00008000 +#define VMX_SECONDARY_EXEC_RDSEED_EXITING 0x00010000 +#define VMX_SECONDARY_EXEC_ENABLE_PML 0x00020000 +#define VMX_SECONDARY_EXEC_XSAVES 0x00100000 +#define VMX_SECONDARY_EXEC_TSC_SCALING 0x02000000 +#define VMX_SECONDARY_EXEC_ENABLE_USER_WAIT_PAUSE 0x04000000 + +#define VMX_PIN_BASED_EXT_INTR_MASK 0x00000001 +#define VMX_PIN_BASED_NMI_EXITING 0x00000008 +#define VMX_PIN_BASED_VIRTUAL_NMIS 0x00000020 +#define VMX_PIN_BASED_VMX_PREEMPTION_TIMER 0x00000040 +#define VMX_PIN_BASED_POSTED_INTR 0x00000080 + +#define VMX_VM_EXIT_SAVE_DEBUG_CONTROLS 0x00000004 +#define VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE 0x00000200 +#define VMX_VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL 0x00001000 +#define VMX_VM_EXIT_ACK_INTR_ON_EXIT 0x00008000 +#define VMX_VM_EXIT_SAVE_IA32_PAT 0x00040000 +#define VMX_VM_EXIT_LOAD_IA32_PAT 0x00080000 +#define VMX_VM_EXIT_SAVE_IA32_EFER 0x00100000 +#define VMX_VM_EXIT_LOAD_IA32_EFER 0x00200000 +#define VMX_VM_EXIT_SAVE_VMX_PREEMPTION_TIMER 0x00400000 +#define VMX_VM_EXIT_CLEAR_BNDCFGS 0x00800000 +#define VMX_VM_EXIT_PT_CONCEAL_PIP 0x01000000 +#define VMX_VM_EXIT_CLEAR_IA32_RTIT_CTL 0x02000000 +#define VMX_VM_EXIT_LOAD_IA32_PKRS 0x20000000 + +#define VMX_VM_ENTRY_LOAD_DEBUG_CONTROLS 0x00000004 +#define VMX_VM_ENTRY_IA32E_MODE 0x00000200 +#define VMX_VM_ENTRY_SMM 0x00000400 +#define VMX_VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800 +#define VMX_VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL 0x00002000 +#define VMX_VM_ENTRY_LOAD_IA32_PAT 0x00004000 +#define VMX_VM_ENTRY_LOAD_IA32_EFER 0x00008000 +#define VMX_VM_ENTRY_LOAD_BNDCFGS 0x00010000 +#define VMX_VM_ENTRY_PT_CONCEAL_PIP 0x00020000 +#define VMX_VM_ENTRY_LOAD_IA32_RTIT_CTL 0x00040000 +#define VMX_VM_ENTRY_LOAD_IA32_PKRS 0x00400000 + +/* Supported Hyper-V Enlightenments */ +#define HYPERV_FEAT_RELAXED 0 +#define HYPERV_FEAT_VAPIC 1 +#define HYPERV_FEAT_TIME 2 +#define HYPERV_FEAT_CRASH 3 +#define HYPERV_FEAT_RESET 4 +#define HYPERV_FEAT_VPINDEX 5 +#define HYPERV_FEAT_RUNTIME 6 +#define HYPERV_FEAT_SYNIC 7 +#define HYPERV_FEAT_STIMER 8 +#define HYPERV_FEAT_FREQUENCIES 9 +#define HYPERV_FEAT_REENLIGHTENMENT 10 +#define HYPERV_FEAT_TLBFLUSH 11 +#define HYPERV_FEAT_EVMCS 12 +#define HYPERV_FEAT_IPI 13 +#define HYPERV_FEAT_STIMER_DIRECT 14 +#define HYPERV_FEAT_AVIC 15 +#define HYPERV_FEAT_SYNDBG 16 +#define HYPERV_FEAT_MSR_BITMAP 17 +#define HYPERV_FEAT_XMM_INPUT 18 +#define HYPERV_FEAT_TLBFLUSH_EXT 19 +#define HYPERV_FEAT_TLBFLUSH_DIRECT 20 + +#ifndef HYPERV_SPINLOCK_NEVER_NOTIFY +#define HYPERV_SPINLOCK_NEVER_NOTIFY 0xFFFFFFFF #endif #define EXCP00_DIVZ 0 @@ -753,9 +1201,9 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define EXCP11_ALGN 17 #define EXCP12_MCHK 18 -#define EXCP_SYSCALL 0x100 /* only happens in user only emulation - for syscall instruction */ -#define EXCP_VMEXIT 0x100 +#define EXCP_VMEXIT 0x100 /* only for system emulation */ +#define EXCP_SYSCALL 0x101 /* only for user emulation */ +#define EXCP_VSYSCALL 0x102 /* only for user emulation */ /* i386-specific interrupt pending bits. */ #define CPU_INTERRUPT_POLL CPU_INTERRUPT_TGT_EXT_1 @@ -844,6 +1292,7 @@ typedef enum { CC_OP_NB, } CCOp; +QEMU_BUILD_BUG_ON(CC_OP_NB >= 128); typedef struct SegmentCache { uint32_t selector; @@ -852,32 +1301,35 @@ typedef struct SegmentCache { uint32_t flags; } SegmentCache; -#define MMREG_UNION(n, bits) \ - union n { \ - uint8_t _b_##n[(bits)/8]; \ - uint16_t _w_##n[(bits)/16]; \ - uint32_t _l_##n[(bits)/32]; \ - uint64_t _q_##n[(bits)/64]; \ - float32 _s_##n[(bits)/32]; \ - float64 _d_##n[(bits)/64]; \ - } - -typedef union { - uint8_t _b[16]; - uint16_t _w[8]; - uint32_t _l[4]; - uint64_t _q[2]; +typedef union MMXReg { + uint8_t _b_MMXReg[64 / 8]; + uint16_t _w_MMXReg[64 / 16]; + uint32_t _l_MMXReg[64 / 32]; + uint64_t _q_MMXReg[64 / 64]; + float32 _s_MMXReg[64 / 32]; + float64 _d_MMXReg[64 / 64]; +} MMXReg; + +typedef union XMMReg { + uint64_t _q_XMMReg[128 / 64]; } XMMReg; -typedef union { - uint8_t _b[32]; - uint16_t _w[16]; - uint32_t _l[8]; - uint64_t _q[4]; +typedef union YMMReg { + uint64_t _q_YMMReg[256 / 64]; + XMMReg _x_YMMReg[256 / 128]; } YMMReg; -typedef MMREG_UNION(ZMMReg, 512) ZMMReg; -typedef MMREG_UNION(MMXReg, 64) MMXReg; +typedef union ZMMReg { + uint8_t _b_ZMMReg[512 / 8]; + uint16_t _w_ZMMReg[512 / 16]; + uint32_t _l_ZMMReg[512 / 32]; + uint64_t _q_ZMMReg[512 / 64]; + float16 _h_ZMMReg[512 / 16]; + float32 _s_ZMMReg[512 / 32]; + float64 _d_ZMMReg[512 / 64]; + XMMReg _x_ZMMReg[512 / 128]; + YMMReg _y_ZMMReg[512 / 256]; +} ZMMReg; typedef struct BNDReg { uint64_t lb; @@ -893,13 +1345,21 @@ typedef struct BNDCSReg { #define BNDCFG_BNDPRESERVE 2ULL #define BNDCFG_BDIR_MASK TARGET_PAGE_MASK -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN #define ZMM_B(n) _b_ZMMReg[63 - (n)] #define ZMM_W(n) _w_ZMMReg[31 - (n)] #define ZMM_L(n) _l_ZMMReg[15 - (n)] +#define ZMM_H(n) _h_ZMMReg[31 - (n)] #define ZMM_S(n) _s_ZMMReg[15 - (n)] #define ZMM_Q(n) _q_ZMMReg[7 - (n)] #define ZMM_D(n) _d_ZMMReg[7 - (n)] +#define ZMM_X(n) _x_ZMMReg[3 - (n)] +#define ZMM_Y(n) _y_ZMMReg[1 - (n)] + +#define XMM_Q(n) _q_XMMReg[1 - (n)] + +#define YMM_Q(n) _q_YMMReg[3 - (n)] +#define YMM_X(n) _x_YMMReg[1 - (n)] #define MMX_B(n) _b_MMXReg[7 - (n)] #define MMX_W(n) _w_MMXReg[3 - (n)] @@ -909,9 +1369,17 @@ typedef struct BNDCSReg { #define ZMM_B(n) _b_ZMMReg[n] #define ZMM_W(n) _w_ZMMReg[n] #define ZMM_L(n) _l_ZMMReg[n] +#define ZMM_H(n) _h_ZMMReg[n] #define ZMM_S(n) _s_ZMMReg[n] #define ZMM_Q(n) _q_ZMMReg[n] #define ZMM_D(n) _d_ZMMReg[n] +#define ZMM_X(n) _x_ZMMReg[n] +#define ZMM_Y(n) _y_ZMMReg[n] + +#define XMM_Q(n) _q_XMMReg[n] + +#define YMM_Q(n) _q_YMMReg[n] +#define YMM_X(n) _x_YMMReg[n] #define MMX_B(n) _b_MMXReg[n] #define MMX_W(n) _w_MMXReg[n] @@ -942,7 +1410,6 @@ typedef struct { #define MAX_FIXED_COUNTERS 3 #define MAX_GP_COUNTERS (MSR_IA32_PERF_STATUS - MSR_P6_EVNTSEL0) -#define NB_MMU_MODES 3 #define TARGET_INSN_START_EXTRA_WORDS 1 #define NB_OPMASK_REGS 8 @@ -1013,40 +1480,54 @@ typedef struct XSavePKRU { uint32_t padding; } XSavePKRU; -typedef struct X86XSaveArea { - X86LegacyXSaveArea legacy; - X86XSaveHeader header; - - /* Extended save areas: */ - - /* AVX State: */ - XSaveAVX avx_state; - uint8_t padding[960 - 576 - sizeof(XSaveAVX)]; - /* MPX State: */ - XSaveBNDREG bndreg_state; - XSaveBNDCSR bndcsr_state; - /* AVX-512 State: */ - XSaveOpmask opmask_state; - XSaveZMM_Hi256 zmm_hi256_state; - XSaveHi16_ZMM hi16_zmm_state; - /* PKRU State: */ - XSavePKRU pkru_state; -} X86XSaveArea; - -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != 0x240); +/* Ext. save area 17: AMX XTILECFG state */ +typedef struct XSaveXTILECFG { + uint8_t xtilecfg[64]; +} XSaveXTILECFG; + +/* Ext. save area 18: AMX XTILEDATA state */ +typedef struct XSaveXTILEDATA { + uint8_t xtiledata[8][1024]; +} XSaveXTILEDATA; + +typedef struct { + uint64_t from; + uint64_t to; + uint64_t info; +} LBREntry; + +#define ARCH_LBR_NR_ENTRIES 32 + +/* Ext. save area 19: Supervisor mode Arch LBR state */ +typedef struct XSavesArchLBR { + uint64_t lbr_ctl; + uint64_t lbr_depth; + uint64_t ler_from; + uint64_t ler_to; + uint64_t ler_info; + LBREntry lbr_records[ARCH_LBR_NR_ENTRIES]; +} XSavesArchLBR; + QEMU_BUILD_BUG_ON(sizeof(XSaveAVX) != 0x100); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != 0x3c0); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDREG) != 0x40); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != 0x400); QEMU_BUILD_BUG_ON(sizeof(XSaveBNDCSR) != 0x40); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != 0x440); QEMU_BUILD_BUG_ON(sizeof(XSaveOpmask) != 0x40); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != 0x480); QEMU_BUILD_BUG_ON(sizeof(XSaveZMM_Hi256) != 0x200); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != 0x680); QEMU_BUILD_BUG_ON(sizeof(XSaveHi16_ZMM) != 0x400); -QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != 0xA80); QEMU_BUILD_BUG_ON(sizeof(XSavePKRU) != 0x8); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILECFG) != 0x40); +QEMU_BUILD_BUG_ON(sizeof(XSaveXTILEDATA) != 0x2000); +QEMU_BUILD_BUG_ON(sizeof(XSavesArchLBR) != 0x328); + +typedef struct ExtSaveArea { + uint32_t feature, bits; + uint32_t offset, size; + uint32_t ecx; +} ExtSaveArea; + +#define XSAVE_STATE_AREA_COUNT (XSTATE_XTILE_DATA_BIT + 1) + +extern ExtSaveArea x86_ext_save_areas[XSAVE_STATE_AREA_COUNT]; typedef enum TPRAccess { TPR_ACCESS_READ, @@ -1112,7 +1593,12 @@ typedef struct CPUCaches { CPUCacheInfo *l3_cache; } CPUCaches; -typedef struct CPUX86State { +typedef struct HVFX86LazyFlags { + target_ulong result; + target_ulong auxbits; +} HVFX86LazyFlags; + +typedef struct CPUArchState { /* standard registers */ target_ulong regs[CPU_NB_REGS]; target_ulong eip; @@ -1138,6 +1624,9 @@ typedef struct CPUX86State { SegmentCache idt; /* only base and limit are used */ target_ulong cr[5]; /* NOTE: cr1 is unused */ + + bool pdptrs_valid; + uint64_t pdptrs[4]; int32_t a20_mask; BNDReg bnd_regs[4]; @@ -1156,6 +1645,8 @@ typedef struct CPUX86State { FPReg fpregs[8]; /* KVM-only so far */ uint16_t fpop; + uint16_t fpcs; + uint16_t fpds; uint64_t fpip; uint64_t fpdp; @@ -1166,15 +1657,15 @@ typedef struct CPUX86State { float_status mmx_status; /* for 3DNow! float ops */ float_status sse_status; uint32_t mxcsr; - ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32]; - ZMMReg xmm_t0; + ZMMReg xmm_regs[CPU_NB_REGS == 8 ? 8 : 32] QEMU_ALIGNED(16); + ZMMReg xmm_t0 QEMU_ALIGNED(16); MMXReg mmx_t0; - XMMReg ymmh_regs[CPU_NB_REGS]; - uint64_t opmask_regs[NB_OPMASK_REGS]; - YMMReg zmmh_regs[CPU_NB_REGS]; - ZMMReg hi16_zmm_regs[CPU_NB_REGS]; +#ifdef TARGET_X86_64 + uint8_t xtilecfg[64]; + uint8_t xtiledata[8192]; +#endif /* sysenter registers */ uint32_t sysenter_cs; @@ -1191,7 +1682,6 @@ typedef struct CPUX86State { target_ulong kernelgsbase; #endif - uint64_t tsc; uint64_t tsc_adjust; uint64_t tsc_deadline; uint64_t tsc_aux; @@ -1201,6 +1691,7 @@ typedef struct CPUX86State { uint64_t mcg_status; uint64_t msr_ia32_misc_enable; uint64_t msr_ia32_feature_control; + uint64_t msr_ia32_sgxlepubkeyhash[4]; uint64_t msr_fixed_ctr_ctrl; uint64_t msr_global_ctrl; @@ -1215,8 +1706,11 @@ typedef struct CPUX86State { uint64_t msr_smi_count; uint32_t pkru; + uint32_t pkrs; + uint32_t tsx_ctrl; uint64_t spec_ctrl; + uint64_t amd_tsc_scale_msr; uint64_t virt_ssbd; /* End of state preserved by INIT (dummy marker). */ @@ -1226,12 +1720,20 @@ typedef struct CPUX86State { uint64_t wall_clock_msr; uint64_t steal_time_msr; uint64_t async_pf_en_msr; + uint64_t async_pf_int_msr; uint64_t pv_eoi_en_msr; + uint64_t poll_control_msr; /* Partition-wide HV MSRs, will be updated only on the first vcpu */ uint64_t msr_hv_hypercall; uint64_t msr_hv_guest_os_id; uint64_t msr_hv_tsc; + uint64_t msr_hv_syndbg_control; + uint64_t msr_hv_syndbg_status; + uint64_t msr_hv_syndbg_send_page; + uint64_t msr_hv_syndbg_recv_page; + uint64_t msr_hv_syndbg_pending_page; + uint64_t msr_hv_syndbg_options; /* Per-VCPU HV MSRs */ uint64_t msr_hv_vapic; @@ -1254,6 +1756,15 @@ typedef struct CPUX86State { uint64_t msr_rtit_cr3_match; uint64_t msr_rtit_addrs[MAX_RTIT_ADDRS]; + /* Per-VCPU XFD MSRs */ + uint64_t msr_xfd; + uint64_t msr_xfd_err; + + /* Per-VCPU Arch LBR MSRs */ + uint64_t msr_lbr_ctl; + uint64_t msr_lbr_depth; + LBREntry lbr_records[ARCH_LBR_NR_ENTRIES]; + /* exception/interrupt handling */ int error_code; int exception_is_int; @@ -1276,6 +1787,7 @@ typedef struct CPUX86State { uint64_t nested_cr3; uint32_t nested_pg_mode; uint8_t v_tpr; + uint32_t int_ctl; /* KVM states, automatically cleared on reset */ uint8_t nmi_injected; @@ -1286,11 +1798,13 @@ typedef struct CPUX86State { /* Fields up to this point are cleared by a CPU reset */ struct {} end_reset_fields; - CPU_COMMON - - /* Fields after CPU_COMMON are preserved across CPU reset. */ + /* Fields after this point are preserved across CPU reset. */ /* processor features (e.g. for CPUID insn) */ + /* Minimum cpuid leaf 7 value */ + uint32_t cpuid_level_func7; + /* Actual cpuid leaf 7 value */ + uint32_t cpuid_min_level_func7; /* Minimum level/xlevel/xlevel2, based on CPU model + features */ uint32_t cpuid_min_level, cpuid_min_xlevel, cpuid_min_xlevel2; /* Maximum level/xlevel/xlevel2 value for auto-assignment: */ @@ -1318,20 +1832,46 @@ typedef struct CPUX86State { /* For KVM */ uint32_t mp_state; - int32_t exception_injected; + int32_t exception_nr; int32_t interrupt_injected; uint8_t soft_interrupt; + uint8_t exception_pending; + uint8_t exception_injected; uint8_t has_error_code; + uint8_t exception_has_payload; + uint64_t exception_payload; + uint8_t triple_fault_pending; uint32_t ins_len; uint32_t sipi_vector; bool tsc_valid; int64_t tsc_khz; int64_t user_tsc_khz; /* for sanity check only */ + uint64_t apic_bus_freq; + uint64_t tsc; #if defined(CONFIG_KVM) || defined(CONFIG_HVF) void *xsave_buf; + uint32_t xsave_buf_len; +#endif +#if defined(CONFIG_KVM) + struct kvm_nested_state *nested_state; + MemoryRegion *xen_vcpu_info_mr; + void *xen_vcpu_info_hva; + uint64_t xen_vcpu_info_gpa; + uint64_t xen_vcpu_info_default_gpa; + uint64_t xen_vcpu_time_info_gpa; + uint64_t xen_vcpu_runstate_gpa; + uint8_t xen_vcpu_callback_vector; + bool xen_callback_asserted; + uint16_t xen_virq[XEN_NR_VIRQS]; + uint64_t xen_singleshot_timer_ns; + QEMUTimer *xen_singleshot_timer; + uint64_t xen_periodic_timer_period; + QEMUTimer *xen_periodic_timer; + QemuMutex xen_timers_lock; #endif #if defined(CONFIG_HVF) - HVFX86EmulatorState *hvf_emul; + HVFX86LazyFlags hvf_lflags; + void *hvf_mmio_buf; #endif uint64_t mcg_cap; @@ -1346,8 +1886,12 @@ typedef struct CPUX86State { uint16_t fpregs_format_vmstate; uint64_t xss; + uint32_t umwait; TPRAccess tpr_access_type; + + /* Number of dies within this CPU package. */ + unsigned nr_dies; } CPUX86State; struct kvm_msrs; @@ -1361,29 +1905,39 @@ struct kvm_msrs; * * An x86 CPU. */ -struct X86CPU { - /*< private >*/ +struct ArchCPU { CPUState parent_obj; - /*< public >*/ CPUX86State env; + VMChangeStateEntry *vmsentry; + + uint64_t ucode_rev; + + uint32_t hyperv_spinlock_attempts; + char *hyperv_vendor; + bool hyperv_synic_kvm_only; + uint64_t hyperv_features; + bool hyperv_passthrough; + OnOffAuto hyperv_no_nonarch_cs; + uint32_t hyperv_vendor_id[3]; + uint32_t hyperv_interface_id[4]; + uint32_t hyperv_limits[3]; + bool hyperv_enforce_cpuid; + uint32_t hyperv_ver_id_build; + uint16_t hyperv_ver_id_major; + uint16_t hyperv_ver_id_minor; + uint32_t hyperv_ver_id_sp; + uint8_t hyperv_ver_id_sb; + uint32_t hyperv_ver_id_sn; - bool hyperv_vapic; - bool hyperv_relaxed_timing; - int hyperv_spinlock_attempts; - char *hyperv_vendor_id; - bool hyperv_time; - bool hyperv_crash; - bool hyperv_reset; - bool hyperv_vpindex; - bool hyperv_runtime; - bool hyperv_synic; - bool hyperv_stimer; - bool hyperv_frequencies; - bool hyperv_reenlightenment; - bool hyperv_tlbflush; bool check_cpuid; bool enforce_cpuid; + /* + * Force features to be enabled even if the host doesn't support them. + * This is dangerous and should be done only for testing CPUID + * compatibility. + */ + bool force_features; bool expose_kvm; bool expose_tcg; bool migratable; @@ -1408,7 +1962,7 @@ struct X86CPU { } mwait; /* Features that were filtered out because of missing host capabilities */ - uint32_t filtered_features[FEATURE_WORDS]; + FeatureWordArray filtered_features; /* Enable PMU CPUID bits. This can't be enabled by default yet because * it doesn't have ABI stability guarantees, as it passes all PMU CPUID @@ -1417,6 +1971,15 @@ struct X86CPU { */ bool enable_pmu; + /* + * Enable LBR_FMT bits of IA32_PERF_CAPABILITIES MSR. + * This can't be initialized with a default because it doesn't have + * stable ABI support yet. It is only allowed to pass all LBR_FMT bits + * returned by kvm_arch_get_supported_msr_feature()(which depends on both + * host CPU and kernel capabilities) to the guest. + */ + uint64_t lbr_fmt; + /* LMCE support can be enabled/disabled via cpu option 'lmce=on/off'. It is * disabled by default to avoid breaking migration between QEMU with * different LMCE configurations. @@ -1440,15 +2003,27 @@ struct X86CPU { /* Enable auto level-increase for all CPUID leaves */ bool full_cpuid_auto_level; + /* Only advertise CPUID leaves defined by the vendor */ + bool vendor_cpuid_only; + + /* Enable auto level-increase for Intel Processor Trace leave */ + bool intel_pt_auto_level; + /* if true fill the top bits of the MTRR_PHYSMASKn variable range */ bool fill_mtrr_mask; /* if true override the phys_bits value with a value read from the host */ bool host_phys_bits; + /* if set, limit maximum value for phys_bits when host_phys_bits is true */ + uint8_t host_phys_bits_limit; + /* Stop SMI delivery for migration compatibility with old machines */ bool kvm_no_smi_migration; + /* Forcefully disable KVM PV features not exposed in guest CPUIDs */ + bool kvm_pv_enforce_cpuid; + /* Number of physical address bits supported */ uint32_t phys_bits; @@ -1462,69 +2037,99 @@ struct X86CPU { int32_t node_id; /* NUMA node this CPU belongs to */ int32_t socket_id; + int32_t die_id; int32_t core_id; int32_t thread_id; int32_t hv_max_vps; + + bool xen_vapic; }; -static inline X86CPU *x86_env_get_cpu(CPUX86State *env) -{ - return container_of(env, X86CPU, env); -} +typedef struct X86CPUModel X86CPUModel; -#define ENV_GET_CPU(e) CPU(x86_env_get_cpu(e)) +/** + * X86CPUClass: + * @cpu_def: CPU model definition + * @host_cpuid_required: Whether CPU model requires cpuid from host. + * @ordering: Ordering on the "-cpu help" CPU model list. + * @migration_safe: See CpuDefinitionInfo::migration_safe + * @static_model: See CpuDefinitionInfo::static + * @parent_realize: The parent class' realize handler. + * @parent_phases: The parent class' reset phase handlers. + * + * An x86 CPU model or family. + */ +struct X86CPUClass { + CPUClass parent_class; -#define ENV_OFFSET offsetof(X86CPU, env) + /* + * CPU definition, automatically loaded by instance_init if not NULL. + * Should be eventually replaced by subclass-specific property defaults. + */ + X86CPUModel *model; + + bool host_cpuid_required; + int ordering; + bool migration_safe; + bool static_model; + + /* + * Optional description of CPU model. + * If unavailable, cpu_def->model_id is used. + */ + const char *model_description; + + DeviceRealize parent_realize; + DeviceUnrealize parent_unrealize; + ResettablePhases parent_phases; +}; #ifndef CONFIG_USER_ONLY -extern struct VMStateDescription vmstate_x86_cpu; +extern const VMStateDescription vmstate_x86_cpu; #endif -/** - * x86_cpu_do_interrupt: - * @cpu: vCPU the interrupt is to be handled by. - */ -void x86_cpu_do_interrupt(CPUState *cpu); -bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); int x86_cpu_pending_interrupt(CPUState *cs, int interrupt_request); int x86_cpu_write_elf64_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); int x86_cpu_write_elf32_note(WriteCoreDumpFunction f, CPUState *cpu, - int cpuid, void *opaque); + int cpuid, DumpState *s); int x86_cpu_write_elf64_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); int x86_cpu_write_elf32_qemunote(WriteCoreDumpFunction f, CPUState *cpu, - void *opaque); + DumpState *s); -void x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, +bool x86_cpu_get_memory_mapping(CPUState *cpu, MemoryMappingList *list, Error **errp); -void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, - int flags); +void x86_cpu_dump_state(CPUState *cs, FILE *f, int flags); -hwaddr x86_cpu_get_phys_page_debug(CPUState *cpu, vaddr addr); - -int x86_cpu_gdb_read_register(CPUState *cpu, uint8_t *buf, int reg); +int x86_cpu_gdb_read_register(CPUState *cpu, GByteArray *buf, int reg); int x86_cpu_gdb_write_register(CPUState *cpu, uint8_t *buf, int reg); -void x86_cpu_exec_enter(CPUState *cpu); -void x86_cpu_exec_exit(CPUState *cpu); - -void x86_cpu_list(FILE *f, fprintf_function cpu_fprintf); +void x86_cpu_list(void); int cpu_x86_support_mca_broadcast(CPUX86State *env); +#ifndef CONFIG_USER_ONLY +hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cpu, vaddr addr, + MemTxAttrs *attrs); int cpu_get_pic_interrupt(CPUX86State *s); -/* MSDOS compatibility mode FPU exception support */ -void cpu_set_ferr(CPUX86State *s); + +/* MS-DOS compatibility mode FPU exception support */ +void x86_register_ferr_irq(qemu_irq irq); +void fpu_check_raise_ferr_irq(CPUX86State *s); +void cpu_set_ignne(void); +void cpu_clear_ignne(void); +#endif + /* mpx_helper.c */ void cpu_sync_bndcs_hflags(CPUX86State *env); /* this function must always be used to load data in the segment cache: it synchronizes the hflags with the segment cache values */ static inline void cpu_x86_load_seg_cache(CPUX86State *env, - int seg_reg, unsigned int selector, + X86Seg seg_reg, unsigned int selector, target_ulong base, unsigned int limit, unsigned int flags) @@ -1613,31 +2218,39 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, /* cpu-exec.c */ /* the following helpers are only usable in user mode simulation as they can trigger unexpected exceptions */ -void cpu_x86_load_seg(CPUX86State *s, int seg_reg, int selector); +void cpu_x86_load_seg(CPUX86State *s, X86Seg seg_reg, int selector); void cpu_x86_fsave(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_frstor(CPUX86State *s, target_ulong ptr, int data32); void cpu_x86_fxsave(CPUX86State *s, target_ulong ptr); void cpu_x86_fxrstor(CPUX86State *s, target_ulong ptr); - -/* you can call this signal handler from your SIGBUS and SIGSEGV - signal handlers to inform the virtual CPU of exceptions. non zero - is returned if the signal was handled by the virtual CPU. */ -int cpu_x86_signal_handler(int host_signum, void *pinfo, - void *puc); +void cpu_x86_xsave(CPUX86State *s, target_ulong ptr); +void cpu_x86_xrstor(CPUX86State *s, target_ulong ptr); /* cpu.c */ +void x86_cpu_vendor_words2str(char *dst, uint32_t vendor1, + uint32_t vendor2, uint32_t vendor3); +typedef struct PropValue { + const char *prop, *value; +} PropValue; +void x86_cpu_apply_props(X86CPU *cpu, PropValue *props); + +void x86_cpu_after_reset(X86CPU *cpu); + +uint32_t cpu_x86_virtual_addr_width(CPUX86State *env); + +/* cpu.c other functions (cpuid) */ void cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); void cpu_clear_apic_feature(CPUX86State *env); +void cpu_set_apic_feature(CPUX86State *env); void host_cpuid(uint32_t function, uint32_t count, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx); -void host_vendor_fms(char *vendor, int *family, int *model, int *stepping); +bool cpu_has_x2apic_feature(CPUX86State *env); /* helper.c */ -int x86_cpu_handle_mmu_fault(CPUState *cpu, vaddr addr, int size, - int is_write, int mmu_idx); void x86_cpu_set_a20(X86CPU *cpu, int a20_state); +void cpu_sync_avx_hflag(CPUX86State *env); #ifndef CONFIG_USER_ONLY static inline int x86_asidx_from_attrs(CPUState *cs, MemTxAttrs attrs) @@ -1650,6 +2263,11 @@ static inline AddressSpace *cpu_addressspace(CPUState *cs, MemTxAttrs attrs) return cpu_get_address_space(cs, cpu_asidx_from_attrs(cs, attrs)); } +/* + * load efer and update the corresponding hflags. XXX: do consistency + * checks with cpuid bits? + */ +void cpu_load_efer(CPUX86State *env, uint64_t val); uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr); uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr); uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr); @@ -1661,8 +2279,6 @@ void x86_stl_phys(CPUState *cs, hwaddr addr, uint32_t val); void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val); #endif -void breakpoint_handler(CPUState *cs); - /* will be suppressed */ void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0); void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3); @@ -1672,31 +2288,6 @@ void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7); /* hw/pc.c */ uint64_t cpu_get_tsc(CPUX86State *env); -#define TARGET_PAGE_BITS 12 - -#ifdef TARGET_X86_64 -#define TARGET_PHYS_ADDR_SPACE_BITS 52 -/* ??? This is really 48 bits, sign-extended, but the only thing - accessible to userland with bit 48 set is the VSYSCALL, and that - is handled via other mechanisms. */ -#define TARGET_VIRT_ADDR_SPACE_BITS 47 -#else -#define TARGET_PHYS_ADDR_SPACE_BITS 36 -#define TARGET_VIRT_ADDR_SPACE_BITS 32 -#endif - -/* XXX: This value should match the one returned by CPUID - * and in exec.c */ -# if defined(TARGET_X86_64) -# define TCG_PHYS_ADDR_BITS 40 -# else -# define TCG_PHYS_ADDR_BITS 36 -# endif - -#define PHYS_ADDR_MASK MAKE_64BIT_MASK(0, TCG_PHYS_ADDR_BITS) - -#define X86_CPU_TYPE_SUFFIX "-" TYPE_X86_CPU -#define X86_CPU_TYPE_NAME(name) (name X86_CPU_TYPE_SUFFIX) #define CPU_RESOLVING_TYPE TYPE_X86_CPU #ifdef TARGET_X86_64 @@ -1705,28 +2296,50 @@ uint64_t cpu_get_tsc(CPUX86State *env); #define TARGET_DEFAULT_CPU_TYPE X86_CPU_TYPE_NAME("qemu32") #endif -#define cpu_signal_handler cpu_x86_signal_handler #define cpu_list x86_cpu_list /* MMU modes definitions */ -#define MMU_MODE0_SUFFIX _ksmap -#define MMU_MODE1_SUFFIX _user -#define MMU_MODE2_SUFFIX _knosmap /* SMAP disabled or CPL<3 && AC=1 */ -#define MMU_KSMAP_IDX 0 -#define MMU_USER_IDX 1 -#define MMU_KNOSMAP_IDX 2 -static inline int cpu_mmu_index(CPUX86State *env, bool ifetch) +#define MMU_KSMAP64_IDX 0 +#define MMU_KSMAP32_IDX 1 +#define MMU_USER64_IDX 2 +#define MMU_USER32_IDX 3 +#define MMU_KNOSMAP64_IDX 4 +#define MMU_KNOSMAP32_IDX 5 +#define MMU_PHYS_IDX 6 +#define MMU_NESTED_IDX 7 + +#ifdef CONFIG_USER_ONLY +#ifdef TARGET_X86_64 +#define MMU_USER_IDX MMU_USER64_IDX +#else +#define MMU_USER_IDX MMU_USER32_IDX +#endif +#endif + +static inline bool is_mmu_index_smap(int mmu_index) +{ + return (mmu_index & ~1) == MMU_KSMAP64_IDX; +} + +static inline bool is_mmu_index_user(int mmu_index) +{ + return (mmu_index & ~1) == MMU_USER64_IDX; +} + +static inline bool is_mmu_index_32(int mmu_index) { - return (env->hflags & HF_CPL_MASK) == 3 ? MMU_USER_IDX : - (!(env->hflags & HF_SMAP_MASK) || (env->eflags & AC_MASK)) - ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX; + assert(mmu_index < MMU_PHYS_IDX); + return mmu_index & 1; } static inline int cpu_mmu_index_kernel(CPUX86State *env) { - return !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP_IDX : - ((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK)) - ? MMU_KNOSMAP_IDX : MMU_KSMAP_IDX; + int mmu_index_32 = (env->hflags & HF_LMA_MASK) ? 0 : 1; + int mmu_index_base = + !(env->hflags & HF_SMAP_MASK) ? MMU_KNOSMAP64_IDX : + ((env->hflags & HF_CPL_MASK) < 3 && (env->eflags & AC_MASK)) ? MMU_KNOSMAP64_IDX : MMU_KSMAP64_IDX; + + return mmu_index_base + mmu_index_32; } #define CC_DST (env->cc_dst) @@ -1734,25 +2347,6 @@ static inline int cpu_mmu_index_kernel(CPUX86State *env) #define CC_SRC2 (env->cc_src2) #define CC_OP (env->cc_op) -/* n must be a constant to be efficient */ -static inline target_long lshift(target_long x, int n) -{ - if (n >= 0) { - return x << n; - } else { - return x >> (-n); - } -} - -/* float macros */ -#define FT0 (env->ft0) -#define ST0 (env->fpregs[env->fpstt].d) -#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) -#define ST1 ST(1) - -/* translate.c */ -void tcg_x86_init(void); - #include "exec/cpu-all.h" #include "svm.h" @@ -1760,17 +2354,21 @@ void tcg_x86_init(void); #include "hw/i386/apic.h" #endif -static inline void cpu_get_tb_cpu_state(CPUX86State *env, target_ulong *pc, - target_ulong *cs_base, uint32_t *flags) +static inline void cpu_get_tb_cpu_state(CPUX86State *env, vaddr *pc, + uint64_t *cs_base, uint32_t *flags) { - *cs_base = env->segs[R_CS].base; - *pc = *cs_base + env->eip; *flags = env->hflags | (env->eflags & (IOPL_MASK | TF_MASK | RF_MASK | VM_MASK | AC_MASK)); + if (env->hflags & HF_CS64_MASK) { + *cs_base = 0; + *pc = env->eip; + } else { + *cs_base = env->segs[R_CS].base; + *pc = (uint32_t)(*cs_base + env->eip); + } } void do_cpu_init(X86CPU *cpu); -void do_cpu_sipi(X86CPU *cpu); #define MCE_INJECT_BROADCAST 1 #define MCE_INJECT_UNCOND_AO 2 @@ -1779,57 +2377,17 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, uint64_t status, uint64_t mcg_status, uint64_t addr, uint64_t misc, int flags); -/* excp_helper.c */ -void QEMU_NORETURN raise_exception(CPUX86State *env, int exception_index); -void QEMU_NORETURN raise_exception_ra(CPUX86State *env, int exception_index, - uintptr_t retaddr); -void QEMU_NORETURN raise_exception_err(CPUX86State *env, int exception_index, - int error_code); -void QEMU_NORETURN raise_exception_err_ra(CPUX86State *env, int exception_index, - int error_code, uintptr_t retaddr); -void QEMU_NORETURN raise_interrupt(CPUX86State *nenv, int intno, int is_int, - int error_code, int next_eip_addend); - -/* cc_helper.c */ -extern const uint8_t parity_table[256]; -uint32_t cpu_cc_compute_all(CPUX86State *env1, int op); +uint32_t cpu_cc_compute_all(CPUX86State *env1); static inline uint32_t cpu_compute_eflags(CPUX86State *env) { uint32_t eflags = env->eflags; if (tcg_enabled()) { - eflags |= cpu_cc_compute_all(env, CC_OP) | (env->df & DF_MASK); + eflags |= cpu_cc_compute_all(env) | (env->df & DF_MASK); } return eflags; } -/* NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS - * after generating a call to a helper that uses this. - */ -static inline void cpu_load_eflags(CPUX86State *env, int eflags, - int update_mask) -{ - CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); - CC_OP = CC_OP_EFLAGS; - env->df = 1 - (2 * ((eflags >> 10) & 1)); - env->eflags = (env->eflags & ~update_mask) | - (eflags & update_mask) | 0x2; -} - -/* load efer and update the corresponding hflags. XXX: do consistency - checks with cpuid bits? */ -static inline void cpu_load_efer(CPUX86State *env, uint64_t val) -{ - env->efer = val; - env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK); - if (env->efer & MSR_EFER_LMA) { - env->hflags |= HF_LMA_MASK; - } - if (env->efer & MSR_EFER_SVME) { - env->hflags |= HF_SVME_MASK; - } -} - static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) { return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 }); @@ -1844,9 +2402,45 @@ static inline int32_t x86_get_a20_mask(CPUX86State *env) } } +static inline bool cpu_has_vmx(CPUX86State *env) +{ + return env->features[FEAT_1_ECX] & CPUID_EXT_VMX; +} + +static inline bool cpu_has_svm(CPUX86State *env) +{ + return env->features[FEAT_8000_0001_ECX] & CPUID_EXT3_SVM; +} + +/* + * In order for a vCPU to enter VMX operation it must have CR4.VMXE set. + * Since it was set, CR4.VMXE must remain set as long as vCPU is in + * VMX operation. This is because CR4.VMXE is one of the bits set + * in MSR_IA32_VMX_CR4_FIXED1. + * + * There is one exception to above statement when vCPU enters SMM mode. + * When a vCPU enters SMM mode, it temporarily exit VMX operation and + * may also reset CR4.VMXE during execution in SMM mode. + * When vCPU exits SMM mode, vCPU state is restored to be in VMX operation + * and CR4.VMXE is restored to it's original value of being set. + * + * Therefore, when vCPU is not in SMM mode, we can infer whether + * VMX is being used by examining CR4.VMXE. Otherwise, we cannot + * know for certain. + */ +static inline bool cpu_vmx_maybe_enabled(CPUX86State *env) +{ + return cpu_has_vmx(env) && + ((env->cr[4] & CR4_VMXE_MASK) || (env->hflags & HF_SMM_MASK)); +} + +/* excp_helper.c */ +int get_pg_mode(CPUX86State *env); + /* fpu_helper.c */ void update_fp_status(CPUX86State *env); void update_mxcsr_status(CPUX86State *env); +void update_mxcsr_from_sse_status(CPUX86State *env); static inline void cpu_set_mxcsr(CPUX86State *env, uint32_t mxcsr) { @@ -1864,54 +2458,122 @@ static inline void cpu_set_fpuc(CPUX86State *env, uint16_t fpuc) } } -/* mem_helper.c */ -void helper_lock_init(void); - /* svm_helper.c */ +#ifdef CONFIG_USER_ONLY +static inline void +cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type, + uint64_t param, uintptr_t retaddr) +{ /* no-op */ } +static inline bool +cpu_svm_has_intercept(CPUX86State *env, uint32_t type) +{ return false; } +#else void cpu_svm_check_intercept_param(CPUX86State *env1, uint32_t type, uint64_t param, uintptr_t retaddr); -void QEMU_NORETURN cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, - uint64_t exit_info_1, uintptr_t retaddr); -void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1); - -/* seg_helper.c */ -void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw); - -/* smm_helper.c */ -void do_smm_enter(X86CPU *cpu); +bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type); +#endif /* apic.c */ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access); void apic_handle_tpr_access_report(DeviceState *d, target_ulong ip, TPRAccess access); +/* Special values for X86CPUVersion: */ -/* Change the value of a KVM-specific default - * - * If value is NULL, no default will be set and the original - * value from the CPU model table will be kept. - * - * It is valid to call this function only for properties that - * are already present in the kvm_default_props table. +/* Resolve to latest CPU version */ +#define CPU_VERSION_LATEST -1 + +/* + * Resolve to version defined by current machine type. + * See x86_cpu_set_default_version() + */ +#define CPU_VERSION_AUTO -2 + +/* Don't resolve to any versioned CPU models, like old QEMU versions */ +#define CPU_VERSION_LEGACY 0 + +typedef int X86CPUVersion; + +/* + * Set default CPU model version for CPU models having + * version == CPU_VERSION_AUTO. */ -void x86_cpu_change_kvm_default(const char *prop, const char *value); +void x86_cpu_set_default_version(X86CPUVersion version); -/* Return name of 32-bit register, from a R_* constant */ -const char *get_register_name_32(unsigned int reg); +#ifndef CONFIG_USER_ONLY -void enable_compat_apic_id_mode(void); +void do_cpu_sipi(X86CPU *cpu); #define APIC_DEFAULT_ADDRESS 0xfee00000 #define APIC_SPACE_SIZE 0x100000 -void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f, - fprintf_function cpu_fprintf, int flags); +/* cpu-dump.c */ +void x86_cpu_dump_local_apic_state(CPUState *cs, int flags); + +#endif /* cpu.c */ bool cpu_is_bsp(X86CPU *cpu); -void x86_cpu_xrstor_all_areas(X86CPU *cpu, const X86XSaveArea *buf); -void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf); +void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen); +void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen); +uint32_t xsave_area_size(uint64_t mask, bool compacted); void x86_update_hflags(CPUX86State* env); +static inline bool hyperv_feat_enabled(X86CPU *cpu, int feat) +{ + return !!(cpu->hyperv_features & BIT(feat)); +} + +static inline uint64_t cr4_reserved_bits(CPUX86State *env) +{ + uint64_t reserved_bits = CR4_RESERVED_MASK; + if (!env->features[FEAT_XSAVE]) { + reserved_bits |= CR4_OSXSAVE_MASK; + } + if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMEP)) { + reserved_bits |= CR4_SMEP_MASK; + } + if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_SMAP)) { + reserved_bits |= CR4_SMAP_MASK; + } + if (!(env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_FSGSBASE)) { + reserved_bits |= CR4_FSGSBASE_MASK; + } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKU)) { + reserved_bits |= CR4_PKE_MASK; + } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_LA57)) { + reserved_bits |= CR4_LA57_MASK; + } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_UMIP)) { + reserved_bits |= CR4_UMIP_MASK; + } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS)) { + reserved_bits |= CR4_PKS_MASK; + } + return reserved_bits; +} + +static inline bool ctl_has_irq(CPUX86State *env) +{ + uint32_t int_prio; + uint32_t tpr; + + int_prio = (env->int_ctl & V_INTR_PRIO_MASK) >> V_INTR_PRIO_SHIFT; + tpr = env->int_ctl & V_TPR_MASK; + + if (env->int_ctl & V_IGN_TPR_MASK) { + return (env->int_ctl & V_IRQ_MASK); + } + + return (env->int_ctl & V_IRQ_MASK) && (int_prio >= tpr); +} + +#if defined(TARGET_X86_64) && \ + defined(CONFIG_USER_ONLY) && \ + defined(CONFIG_LINUX) +# define TARGET_VSYSCALL_PAGE (UINT64_C(-10) << 20) +#endif + #endif /* I386_CPU_H */ diff --git a/target/i386/excp_helper.c b/target/i386/excp_helper.c deleted file mode 100644 index 49231f6b69..0000000000 --- a/target/i386/excp_helper.c +++ /dev/null @@ -1,693 +0,0 @@ -/* - * x86 exception helpers - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/exec-all.h" -#include "qemu/log.h" -#include "sysemu/sysemu.h" -#include "exec/helper-proto.h" - -void helper_raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) -{ - raise_interrupt(env, intno, 1, 0, next_eip_addend); -} - -void helper_raise_exception(CPUX86State *env, int exception_index) -{ - raise_exception(env, exception_index); -} - -/* - * Check nested exceptions and change to double or triple fault if - * needed. It should only be called, if this is not an interrupt. - * Returns the new exception number. - */ -static int check_exception(CPUX86State *env, int intno, int *error_code, - uintptr_t retaddr) -{ - int first_contributory = env->old_exception == 0 || - (env->old_exception >= 10 && - env->old_exception <= 13); - int second_contributory = intno == 0 || - (intno >= 10 && intno <= 13); - - qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n", - env->old_exception, intno); - -#if !defined(CONFIG_USER_ONLY) - if (env->old_exception == EXCP08_DBLE) { - if (env->hflags & HF_GUEST_MASK) { - cpu_vmexit(env, SVM_EXIT_SHUTDOWN, 0, retaddr); /* does not return */ - } - - qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); - - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - return EXCP_HLT; - } -#endif - - if ((first_contributory && second_contributory) - || (env->old_exception == EXCP0E_PAGE && - (second_contributory || (intno == EXCP0E_PAGE)))) { - intno = EXCP08_DBLE; - *error_code = 0; - } - - if (second_contributory || (intno == EXCP0E_PAGE) || - (intno == EXCP08_DBLE)) { - env->old_exception = intno; - } - - return intno; -} - -/* - * Signal an interruption. It is executed in the main CPU loop. - * is_int is TRUE if coming from the int instruction. next_eip is the - * env->eip value AFTER the interrupt instruction. It is only relevant if - * is_int is TRUE. - */ -static void QEMU_NORETURN raise_interrupt2(CPUX86State *env, int intno, - int is_int, int error_code, - int next_eip_addend, - uintptr_t retaddr) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - - if (!is_int) { - cpu_svm_check_intercept_param(env, SVM_EXIT_EXCP_BASE + intno, - error_code, retaddr); - intno = check_exception(env, intno, &error_code, retaddr); - } else { - cpu_svm_check_intercept_param(env, SVM_EXIT_SWINT, 0, retaddr); - } - - cs->exception_index = intno; - env->error_code = error_code; - env->exception_is_int = is_int; - env->exception_next_eip = env->eip + next_eip_addend; - cpu_loop_exit_restore(cs, retaddr); -} - -/* shortcuts to generate exceptions */ - -void QEMU_NORETURN raise_interrupt(CPUX86State *env, int intno, int is_int, - int error_code, int next_eip_addend) -{ - raise_interrupt2(env, intno, is_int, error_code, next_eip_addend, 0); -} - -void raise_exception_err(CPUX86State *env, int exception_index, - int error_code) -{ - raise_interrupt2(env, exception_index, 0, error_code, 0, 0); -} - -void raise_exception_err_ra(CPUX86State *env, int exception_index, - int error_code, uintptr_t retaddr) -{ - raise_interrupt2(env, exception_index, 0, error_code, 0, retaddr); -} - -void raise_exception(CPUX86State *env, int exception_index) -{ - raise_interrupt2(env, exception_index, 0, 0, 0, 0); -} - -void raise_exception_ra(CPUX86State *env, int exception_index, uintptr_t retaddr) -{ - raise_interrupt2(env, exception_index, 0, 0, 0, retaddr); -} - -#if defined(CONFIG_USER_ONLY) -int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size, - int is_write, int mmu_idx) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - /* user mode only emulation */ - is_write &= 1; - env->cr[2] = addr; - env->error_code = (is_write << PG_ERROR_W_BIT); - env->error_code |= PG_ERROR_U_MASK; - cs->exception_index = EXCP0E_PAGE; - env->exception_is_int = 0; - env->exception_next_eip = -1; - return 1; -} - -#else - -static hwaddr get_hphys(CPUState *cs, hwaddr gphys, MMUAccessType access_type, - int *prot) -{ - CPUX86State *env = &X86_CPU(cs)->env; - uint64_t rsvd_mask = PG_HI_RSVD_MASK; - uint64_t ptep, pte; - uint64_t exit_info_1 = 0; - target_ulong pde_addr, pte_addr; - uint32_t page_offset; - int page_size; - - if (likely(!(env->hflags2 & HF2_NPT_MASK))) { - return gphys; - } - - if (!(env->nested_pg_mode & SVM_NPT_NXE)) { - rsvd_mask |= PG_NX_MASK; - } - - if (env->nested_pg_mode & SVM_NPT_PAE) { - uint64_t pde, pdpe; - target_ulong pdpe_addr; - -#ifdef TARGET_X86_64 - if (env->nested_pg_mode & SVM_NPT_LMA) { - uint64_t pml5e; - uint64_t pml4e_addr, pml4e; - - pml5e = env->nested_cr3; - ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; - - pml4e_addr = (pml5e & PG_ADDRESS_MASK) + - (((gphys >> 39) & 0x1ff) << 3); - pml4e = x86_ldq_phys(cs, pml4e_addr); - if (!(pml4e & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pml4e & (rsvd_mask | PG_PSE_MASK)) { - goto do_fault_rsvd; - } - if (!(pml4e & PG_ACCESSED_MASK)) { - pml4e |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e); - } - ptep &= pml4e ^ PG_NX_MASK; - pdpe_addr = (pml4e & PG_ADDRESS_MASK) + - (((gphys >> 30) & 0x1ff) << 3); - pdpe = x86_ldq_phys(cs, pdpe_addr); - if (!(pdpe & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pdpe & rsvd_mask) { - goto do_fault_rsvd; - } - ptep &= pdpe ^ PG_NX_MASK; - if (!(pdpe & PG_ACCESSED_MASK)) { - pdpe |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe); - } - if (pdpe & PG_PSE_MASK) { - /* 1 GB page */ - page_size = 1024 * 1024 * 1024; - pte_addr = pdpe_addr; - pte = pdpe; - goto do_check_protect; - } - } else -#endif - { - pdpe_addr = (env->nested_cr3 & ~0x1f) + ((gphys >> 27) & 0x18); - pdpe = x86_ldq_phys(cs, pdpe_addr); - if (!(pdpe & PG_PRESENT_MASK)) { - goto do_fault; - } - rsvd_mask |= PG_HI_USER_MASK; - if (pdpe & (rsvd_mask | PG_NX_MASK)) { - goto do_fault_rsvd; - } - ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; - } - - pde_addr = (pdpe & PG_ADDRESS_MASK) + (((gphys >> 21) & 0x1ff) << 3); - pde = x86_ldq_phys(cs, pde_addr); - if (!(pde & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pde & rsvd_mask) { - goto do_fault_rsvd; - } - ptep &= pde ^ PG_NX_MASK; - if (pde & PG_PSE_MASK) { - /* 2 MB page */ - page_size = 2048 * 1024; - pte_addr = pde_addr; - pte = pde; - goto do_check_protect; - } - /* 4 KB page */ - if (!(pde & PG_ACCESSED_MASK)) { - pde |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pde_addr, pde); - } - pte_addr = (pde & PG_ADDRESS_MASK) + (((gphys >> 12) & 0x1ff) << 3); - pte = x86_ldq_phys(cs, pte_addr); - if (!(pte & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pte & rsvd_mask) { - goto do_fault_rsvd; - } - /* combine pde and pte nx, user and rw protections */ - ptep &= pte ^ PG_NX_MASK; - page_size = 4096; - } else { - uint32_t pde; - - /* page directory entry */ - pde_addr = (env->nested_cr3 & ~0xfff) + ((gphys >> 20) & 0xffc); - pde = x86_ldl_phys(cs, pde_addr); - if (!(pde & PG_PRESENT_MASK)) { - goto do_fault; - } - ptep = pde | PG_NX_MASK; - - /* if PSE bit is set, then we use a 4MB page */ - if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { - page_size = 4096 * 1024; - pte_addr = pde_addr; - - /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved. - * Leave bits 20-13 in place for setting accessed/dirty bits below. - */ - pte = pde | ((pde & 0x1fe000LL) << (32 - 13)); - rsvd_mask = 0x200000; - goto do_check_protect_pse36; - } - - if (!(pde & PG_ACCESSED_MASK)) { - pde |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pde_addr, pde); - } - - /* page directory entry */ - pte_addr = (pde & ~0xfff) + ((gphys >> 10) & 0xffc); - pte = x86_ldl_phys(cs, pte_addr); - if (!(pte & PG_PRESENT_MASK)) { - goto do_fault; - } - /* combine pde and pte user and rw protections */ - ptep &= pte | PG_NX_MASK; - page_size = 4096; - rsvd_mask = 0; - } - - do_check_protect: - rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK; - do_check_protect_pse36: - if (pte & rsvd_mask) { - goto do_fault_rsvd; - } - ptep ^= PG_NX_MASK; - - if (!(ptep & PG_USER_MASK)) { - goto do_fault_protect; - } - if (ptep & PG_NX_MASK) { - if (access_type == MMU_INST_FETCH) { - goto do_fault_protect; - } - *prot &= ~PAGE_EXEC; - } - if (!(ptep & PG_RW_MASK)) { - if (access_type == MMU_DATA_STORE) { - goto do_fault_protect; - } - *prot &= ~PAGE_WRITE; - } - - pte &= PG_ADDRESS_MASK & ~(page_size - 1); - page_offset = gphys & (page_size - 1); - return pte + page_offset; - - do_fault_rsvd: - exit_info_1 |= SVM_NPTEXIT_RSVD; - do_fault_protect: - exit_info_1 |= SVM_NPTEXIT_P; - do_fault: - x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), - gphys); - exit_info_1 |= SVM_NPTEXIT_US; - if (access_type == MMU_DATA_STORE) { - exit_info_1 |= SVM_NPTEXIT_RW; - } else if (access_type == MMU_INST_FETCH) { - exit_info_1 |= SVM_NPTEXIT_ID; - } - if (prot) { - exit_info_1 |= SVM_NPTEXIT_GPA; - } else { /* page table access */ - exit_info_1 |= SVM_NPTEXIT_GPT; - } - cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, env->retaddr); -} - -/* return value: - * -1 = cannot handle fault - * 0 = nothing more to do - * 1 = generate PF fault - */ -int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, int size, - int is_write1, int mmu_idx) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - uint64_t ptep, pte; - int32_t a20_mask; - target_ulong pde_addr, pte_addr; - int error_code = 0; - int is_dirty, prot, page_size, is_write, is_user; - hwaddr paddr; - uint64_t rsvd_mask = PG_HI_RSVD_MASK; - uint32_t page_offset; - target_ulong vaddr; - - is_user = mmu_idx == MMU_USER_IDX; -#if defined(DEBUG_MMU) - printf("MMU fault: addr=%" VADDR_PRIx " w=%d u=%d eip=" TARGET_FMT_lx "\n", - addr, is_write1, is_user, env->eip); -#endif - is_write = is_write1 & 1; - - a20_mask = x86_get_a20_mask(env); - if (!(env->cr[0] & CR0_PG_MASK)) { - pte = addr; -#ifdef TARGET_X86_64 - if (!(env->hflags & HF_LMA_MASK)) { - /* Without long mode we can only address 32bits in real mode */ - pte = (uint32_t)pte; - } -#endif - prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - page_size = 4096; - goto do_mapping; - } - - if (!(env->efer & MSR_EFER_NXE)) { - rsvd_mask |= PG_NX_MASK; - } - - if (env->cr[4] & CR4_PAE_MASK) { - uint64_t pde, pdpe; - target_ulong pdpe_addr; - -#ifdef TARGET_X86_64 - if (env->hflags & HF_LMA_MASK) { - bool la57 = env->cr[4] & CR4_LA57_MASK; - uint64_t pml5e_addr, pml5e; - uint64_t pml4e_addr, pml4e; - int32_t sext; - - /* test virtual address sign extension */ - sext = la57 ? (int64_t)addr >> 56 : (int64_t)addr >> 47; - if (sext != 0 && sext != -1) { - env->error_code = 0; - cs->exception_index = EXCP0D_GPF; - return 1; - } - - if (la57) { - pml5e_addr = ((env->cr[3] & ~0xfff) + - (((addr >> 48) & 0x1ff) << 3)) & a20_mask; - pml5e_addr = get_hphys(cs, pml5e_addr, MMU_DATA_STORE, NULL); - pml5e = x86_ldq_phys(cs, pml5e_addr); - if (!(pml5e & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pml5e & (rsvd_mask | PG_PSE_MASK)) { - goto do_fault_rsvd; - } - if (!(pml5e & PG_ACCESSED_MASK)) { - pml5e |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pml5e_addr, pml5e); - } - ptep = pml5e ^ PG_NX_MASK; - } else { - pml5e = env->cr[3]; - ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; - } - - pml4e_addr = ((pml5e & PG_ADDRESS_MASK) + - (((addr >> 39) & 0x1ff) << 3)) & a20_mask; - pml4e_addr = get_hphys(cs, pml4e_addr, MMU_DATA_STORE, false); - pml4e = x86_ldq_phys(cs, pml4e_addr); - if (!(pml4e & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pml4e & (rsvd_mask | PG_PSE_MASK)) { - goto do_fault_rsvd; - } - if (!(pml4e & PG_ACCESSED_MASK)) { - pml4e |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pml4e_addr, pml4e); - } - ptep &= pml4e ^ PG_NX_MASK; - pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & - a20_mask; - pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, NULL); - pdpe = x86_ldq_phys(cs, pdpe_addr); - if (!(pdpe & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pdpe & rsvd_mask) { - goto do_fault_rsvd; - } - ptep &= pdpe ^ PG_NX_MASK; - if (!(pdpe & PG_ACCESSED_MASK)) { - pdpe |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pdpe_addr, pdpe); - } - if (pdpe & PG_PSE_MASK) { - /* 1 GB page */ - page_size = 1024 * 1024 * 1024; - pte_addr = pdpe_addr; - pte = pdpe; - goto do_check_protect; - } - } else -#endif - { - /* XXX: load them when cr3 is loaded ? */ - pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & - a20_mask; - pdpe_addr = get_hphys(cs, pdpe_addr, MMU_DATA_STORE, false); - pdpe = x86_ldq_phys(cs, pdpe_addr); - if (!(pdpe & PG_PRESENT_MASK)) { - goto do_fault; - } - rsvd_mask |= PG_HI_USER_MASK; - if (pdpe & (rsvd_mask | PG_NX_MASK)) { - goto do_fault_rsvd; - } - ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; - } - - pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) & - a20_mask; - pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL); - pde = x86_ldq_phys(cs, pde_addr); - if (!(pde & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pde & rsvd_mask) { - goto do_fault_rsvd; - } - ptep &= pde ^ PG_NX_MASK; - if (pde & PG_PSE_MASK) { - /* 2 MB page */ - page_size = 2048 * 1024; - pte_addr = pde_addr; - pte = pde; - goto do_check_protect; - } - /* 4 KB page */ - if (!(pde & PG_ACCESSED_MASK)) { - pde |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pde_addr, pde); - } - pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) & - a20_mask; - pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL); - pte = x86_ldq_phys(cs, pte_addr); - if (!(pte & PG_PRESENT_MASK)) { - goto do_fault; - } - if (pte & rsvd_mask) { - goto do_fault_rsvd; - } - /* combine pde and pte nx, user and rw protections */ - ptep &= pte ^ PG_NX_MASK; - page_size = 4096; - } else { - uint32_t pde; - - /* page directory entry */ - pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & - a20_mask; - pde_addr = get_hphys(cs, pde_addr, MMU_DATA_STORE, NULL); - pde = x86_ldl_phys(cs, pde_addr); - if (!(pde & PG_PRESENT_MASK)) { - goto do_fault; - } - ptep = pde | PG_NX_MASK; - - /* if PSE bit is set, then we use a 4MB page */ - if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { - page_size = 4096 * 1024; - pte_addr = pde_addr; - - /* Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved. - * Leave bits 20-13 in place for setting accessed/dirty bits below. - */ - pte = pde | ((pde & 0x1fe000LL) << (32 - 13)); - rsvd_mask = 0x200000; - goto do_check_protect_pse36; - } - - if (!(pde & PG_ACCESSED_MASK)) { - pde |= PG_ACCESSED_MASK; - x86_stl_phys_notdirty(cs, pde_addr, pde); - } - - /* page directory entry */ - pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & - a20_mask; - pte_addr = get_hphys(cs, pte_addr, MMU_DATA_STORE, NULL); - pte = x86_ldl_phys(cs, pte_addr); - if (!(pte & PG_PRESENT_MASK)) { - goto do_fault; - } - /* combine pde and pte user and rw protections */ - ptep &= pte | PG_NX_MASK; - page_size = 4096; - rsvd_mask = 0; - } - -do_check_protect: - rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK; -do_check_protect_pse36: - if (pte & rsvd_mask) { - goto do_fault_rsvd; - } - ptep ^= PG_NX_MASK; - - /* can the page can be put in the TLB? prot will tell us */ - if (is_user && !(ptep & PG_USER_MASK)) { - goto do_fault_protect; - } - - prot = 0; - if (mmu_idx != MMU_KSMAP_IDX || !(ptep & PG_USER_MASK)) { - prot |= PAGE_READ; - if ((ptep & PG_RW_MASK) || (!is_user && !(env->cr[0] & CR0_WP_MASK))) { - prot |= PAGE_WRITE; - } - } - if (!(ptep & PG_NX_MASK) && - (mmu_idx == MMU_USER_IDX || - !((env->cr[4] & CR4_SMEP_MASK) && (ptep & PG_USER_MASK)))) { - prot |= PAGE_EXEC; - } - if ((env->cr[4] & CR4_PKE_MASK) && (env->hflags & HF_LMA_MASK) && - (ptep & PG_USER_MASK) && env->pkru) { - uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT; - uint32_t pkru_ad = (env->pkru >> pk * 2) & 1; - uint32_t pkru_wd = (env->pkru >> pk * 2) & 2; - uint32_t pkru_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; - - if (pkru_ad) { - pkru_prot &= ~(PAGE_READ | PAGE_WRITE); - } else if (pkru_wd && (is_user || env->cr[0] & CR0_WP_MASK)) { - pkru_prot &= ~PAGE_WRITE; - } - - prot &= pkru_prot; - if ((pkru_prot & (1 << is_write1)) == 0) { - assert(is_write1 != 2); - error_code |= PG_ERROR_PK_MASK; - goto do_fault_protect; - } - } - - if ((prot & (1 << is_write1)) == 0) { - goto do_fault_protect; - } - - /* yes, it can! */ - is_dirty = is_write && !(pte & PG_DIRTY_MASK); - if (!(pte & PG_ACCESSED_MASK) || is_dirty) { - pte |= PG_ACCESSED_MASK; - if (is_dirty) { - pte |= PG_DIRTY_MASK; - } - x86_stl_phys_notdirty(cs, pte_addr, pte); - } - - if (!(pte & PG_DIRTY_MASK)) { - /* only set write access if already dirty... otherwise wait - for dirty access */ - assert(!is_write); - prot &= ~PAGE_WRITE; - } - - do_mapping: - pte = pte & a20_mask; - - /* align to page_size */ - pte &= PG_ADDRESS_MASK & ~(page_size - 1); - page_offset = addr & (page_size - 1); - paddr = get_hphys(cs, pte + page_offset, is_write1, &prot); - - /* Even if 4MB pages, we map only one 4KB page in the cache to - avoid filling it too fast */ - vaddr = addr & TARGET_PAGE_MASK; - paddr &= TARGET_PAGE_MASK; - - assert(prot & (1 << is_write1)); - tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env), - prot, mmu_idx, page_size); - return 0; - do_fault_rsvd: - error_code |= PG_ERROR_RSVD_MASK; - do_fault_protect: - error_code |= PG_ERROR_P_MASK; - do_fault: - error_code |= (is_write << PG_ERROR_W_BIT); - if (is_user) - error_code |= PG_ERROR_U_MASK; - if (is_write1 == 2 && - (((env->efer & MSR_EFER_NXE) && - (env->cr[4] & CR4_PAE_MASK)) || - (env->cr[4] & CR4_SMEP_MASK))) - error_code |= PG_ERROR_I_D_MASK; - if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) { - /* cr2 is not modified in case of exceptions */ - x86_stq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), - addr); - } else { - env->cr[2] = addr; - } - env->error_code = error_code; - cs->exception_index = EXCP0E_PAGE; - return 1; -} -#endif diff --git a/target/i386/fpu_helper.c b/target/i386/fpu_helper.c deleted file mode 100644 index ea5a0c4861..0000000000 --- a/target/i386/fpu_helper.c +++ /dev/null @@ -1,1613 +0,0 @@ -/* - * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include <math.h> -#include "cpu.h" -#include "exec/helper-proto.h" -#include "qemu/host-utils.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "fpu/softfloat.h" - -#define FPU_RC_MASK 0xc00 -#define FPU_RC_NEAR 0x000 -#define FPU_RC_DOWN 0x400 -#define FPU_RC_UP 0x800 -#define FPU_RC_CHOP 0xc00 - -#define MAXTAN 9223372036854775808.0 - -/* the following deal with x86 long double-precision numbers */ -#define MAXEXPD 0x7fff -#define EXPBIAS 16383 -#define EXPD(fp) (fp.l.upper & 0x7fff) -#define SIGND(fp) ((fp.l.upper) & 0x8000) -#define MANTD(fp) (fp.l.lower) -#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS - -#define FPUS_IE (1 << 0) -#define FPUS_DE (1 << 1) -#define FPUS_ZE (1 << 2) -#define FPUS_OE (1 << 3) -#define FPUS_UE (1 << 4) -#define FPUS_PE (1 << 5) -#define FPUS_SF (1 << 6) -#define FPUS_SE (1 << 7) -#define FPUS_B (1 << 15) - -#define FPUC_EM 0x3f - -#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) -#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) -#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) - -static inline void fpush(CPUX86State *env) -{ - env->fpstt = (env->fpstt - 1) & 7; - env->fptags[env->fpstt] = 0; /* validate stack entry */ -} - -static inline void fpop(CPUX86State *env) -{ - env->fptags[env->fpstt] = 1; /* invalidate stack entry */ - env->fpstt = (env->fpstt + 1) & 7; -} - -static inline floatx80 helper_fldt(CPUX86State *env, target_ulong ptr, - uintptr_t retaddr) -{ - CPU_LDoubleU temp; - - temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); - temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); - return temp.d; -} - -static inline void helper_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, - uintptr_t retaddr) -{ - CPU_LDoubleU temp; - - temp.d = f; - cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); - cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); -} - -/* x87 FPU helpers */ - -static inline double floatx80_to_double(CPUX86State *env, floatx80 a) -{ - union { - float64 f64; - double d; - } u; - - u.f64 = floatx80_to_float64(a, &env->fp_status); - return u.d; -} - -static inline floatx80 double_to_floatx80(CPUX86State *env, double a) -{ - union { - float64 f64; - double d; - } u; - - u.d = a; - return float64_to_floatx80(u.f64, &env->fp_status); -} - -static void fpu_set_exception(CPUX86State *env, int mask) -{ - env->fpus |= mask; - if (env->fpus & (~env->fpuc & FPUC_EM)) { - env->fpus |= FPUS_SE | FPUS_B; - } -} - -static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) -{ - if (floatx80_is_zero(b)) { - fpu_set_exception(env, FPUS_ZE); - } - return floatx80_div(a, b, &env->fp_status); -} - -static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) -{ - if (env->cr[0] & CR0_NE_MASK) { - raise_exception_ra(env, EXCP10_COPR, retaddr); - } -#if !defined(CONFIG_USER_ONLY) - else { - cpu_set_ferr(env); - } -#endif -} - -void helper_flds_FT0(CPUX86State *env, uint32_t val) -{ - union { - float32 f; - uint32_t i; - } u; - - u.i = val; - FT0 = float32_to_floatx80(u.f, &env->fp_status); -} - -void helper_fldl_FT0(CPUX86State *env, uint64_t val) -{ - union { - float64 f; - uint64_t i; - } u; - - u.i = val; - FT0 = float64_to_floatx80(u.f, &env->fp_status); -} - -void helper_fildl_FT0(CPUX86State *env, int32_t val) -{ - FT0 = int32_to_floatx80(val, &env->fp_status); -} - -void helper_flds_ST0(CPUX86State *env, uint32_t val) -{ - int new_fpstt; - union { - float32 f; - uint32_t i; - } u; - - new_fpstt = (env->fpstt - 1) & 7; - u.i = val; - env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ -} - -void helper_fldl_ST0(CPUX86State *env, uint64_t val) -{ - int new_fpstt; - union { - float64 f; - uint64_t i; - } u; - - new_fpstt = (env->fpstt - 1) & 7; - u.i = val; - env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ -} - -void helper_fildl_ST0(CPUX86State *env, int32_t val) -{ - int new_fpstt; - - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ -} - -void helper_fildll_ST0(CPUX86State *env, int64_t val) -{ - int new_fpstt; - - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ -} - -uint32_t helper_fsts_ST0(CPUX86State *env) -{ - union { - float32 f; - uint32_t i; - } u; - - u.f = floatx80_to_float32(ST0, &env->fp_status); - return u.i; -} - -uint64_t helper_fstl_ST0(CPUX86State *env) -{ - union { - float64 f; - uint64_t i; - } u; - - u.f = floatx80_to_float64(ST0, &env->fp_status); - return u.i; -} - -int32_t helper_fist_ST0(CPUX86State *env) -{ - int32_t val; - - val = floatx80_to_int32(ST0, &env->fp_status); - if (val != (int16_t)val) { - val = -32768; - } - return val; -} - -int32_t helper_fistl_ST0(CPUX86State *env) -{ - int32_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); - - val = floatx80_to_int32(ST0, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { - val = 0x80000000; - } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); - return val; -} - -int64_t helper_fistll_ST0(CPUX86State *env) -{ - int64_t val; - signed char old_exp_flags; - - old_exp_flags = get_float_exception_flags(&env->fp_status); - set_float_exception_flags(0, &env->fp_status); - - val = floatx80_to_int64(ST0, &env->fp_status); - if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { - val = 0x8000000000000000ULL; - } - set_float_exception_flags(get_float_exception_flags(&env->fp_status) - | old_exp_flags, &env->fp_status); - return val; -} - -int32_t helper_fistt_ST0(CPUX86State *env) -{ - int32_t val; - - val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); - if (val != (int16_t)val) { - val = -32768; - } - return val; -} - -int32_t helper_fisttl_ST0(CPUX86State *env) -{ - return floatx80_to_int32_round_to_zero(ST0, &env->fp_status); -} - -int64_t helper_fisttll_ST0(CPUX86State *env) -{ - return floatx80_to_int64_round_to_zero(ST0, &env->fp_status); -} - -void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) -{ - int new_fpstt; - - new_fpstt = (env->fpstt - 1) & 7; - env->fpregs[new_fpstt].d = helper_fldt(env, ptr, GETPC()); - env->fpstt = new_fpstt; - env->fptags[new_fpstt] = 0; /* validate stack entry */ -} - -void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) -{ - helper_fstt(env, ST0, ptr, GETPC()); -} - -void helper_fpush(CPUX86State *env) -{ - fpush(env); -} - -void helper_fpop(CPUX86State *env) -{ - fpop(env); -} - -void helper_fdecstp(CPUX86State *env) -{ - env->fpstt = (env->fpstt - 1) & 7; - env->fpus &= ~0x4700; -} - -void helper_fincstp(CPUX86State *env) -{ - env->fpstt = (env->fpstt + 1) & 7; - env->fpus &= ~0x4700; -} - -/* FPU move */ - -void helper_ffree_STN(CPUX86State *env, int st_index) -{ - env->fptags[(env->fpstt + st_index) & 7] = 1; -} - -void helper_fmov_ST0_FT0(CPUX86State *env) -{ - ST0 = FT0; -} - -void helper_fmov_FT0_STN(CPUX86State *env, int st_index) -{ - FT0 = ST(st_index); -} - -void helper_fmov_ST0_STN(CPUX86State *env, int st_index) -{ - ST0 = ST(st_index); -} - -void helper_fmov_STN_ST0(CPUX86State *env, int st_index) -{ - ST(st_index) = ST0; -} - -void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) -{ - floatx80 tmp; - - tmp = ST(st_index); - ST(st_index) = ST0; - ST0 = tmp; -} - -/* FPU operations */ - -static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; - -void helper_fcom_ST0_FT0(CPUX86State *env) -{ - int ret; - - ret = floatx80_compare(ST0, FT0, &env->fp_status); - env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; -} - -void helper_fucom_ST0_FT0(CPUX86State *env) -{ - int ret; - - ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; -} - -static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; - -void helper_fcomi_ST0_FT0(CPUX86State *env) -{ - int eflags; - int ret; - - ret = floatx80_compare(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; -} - -void helper_fucomi_ST0_FT0(CPUX86State *env) -{ - int eflags; - int ret; - - ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); - eflags = cpu_cc_compute_all(env, CC_OP); - eflags = (eflags & ~(CC_Z | CC_P | CC_C)) | fcomi_ccval[ret + 1]; - CC_SRC = eflags; -} - -void helper_fadd_ST0_FT0(CPUX86State *env) -{ - ST0 = floatx80_add(ST0, FT0, &env->fp_status); -} - -void helper_fmul_ST0_FT0(CPUX86State *env) -{ - ST0 = floatx80_mul(ST0, FT0, &env->fp_status); -} - -void helper_fsub_ST0_FT0(CPUX86State *env) -{ - ST0 = floatx80_sub(ST0, FT0, &env->fp_status); -} - -void helper_fsubr_ST0_FT0(CPUX86State *env) -{ - ST0 = floatx80_sub(FT0, ST0, &env->fp_status); -} - -void helper_fdiv_ST0_FT0(CPUX86State *env) -{ - ST0 = helper_fdiv(env, ST0, FT0); -} - -void helper_fdivr_ST0_FT0(CPUX86State *env) -{ - ST0 = helper_fdiv(env, FT0, ST0); -} - -/* fp operations between STN and ST0 */ - -void helper_fadd_STN_ST0(CPUX86State *env, int st_index) -{ - ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); -} - -void helper_fmul_STN_ST0(CPUX86State *env, int st_index) -{ - ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); -} - -void helper_fsub_STN_ST0(CPUX86State *env, int st_index) -{ - ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); -} - -void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) -{ - ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); -} - -void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) -{ - floatx80 *p; - - p = &ST(st_index); - *p = helper_fdiv(env, *p, ST0); -} - -void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) -{ - floatx80 *p; - - p = &ST(st_index); - *p = helper_fdiv(env, ST0, *p); -} - -/* misc FPU operations */ -void helper_fchs_ST0(CPUX86State *env) -{ - ST0 = floatx80_chs(ST0); -} - -void helper_fabs_ST0(CPUX86State *env) -{ - ST0 = floatx80_abs(ST0); -} - -void helper_fld1_ST0(CPUX86State *env) -{ - ST0 = floatx80_one; -} - -void helper_fldl2t_ST0(CPUX86State *env) -{ - ST0 = floatx80_l2t; -} - -void helper_fldl2e_ST0(CPUX86State *env) -{ - ST0 = floatx80_l2e; -} - -void helper_fldpi_ST0(CPUX86State *env) -{ - ST0 = floatx80_pi; -} - -void helper_fldlg2_ST0(CPUX86State *env) -{ - ST0 = floatx80_lg2; -} - -void helper_fldln2_ST0(CPUX86State *env) -{ - ST0 = floatx80_ln2; -} - -void helper_fldz_ST0(CPUX86State *env) -{ - ST0 = floatx80_zero; -} - -void helper_fldz_FT0(CPUX86State *env) -{ - FT0 = floatx80_zero; -} - -uint32_t helper_fnstsw(CPUX86State *env) -{ - return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; -} - -uint32_t helper_fnstcw(CPUX86State *env) -{ - return env->fpuc; -} - -void update_fp_status(CPUX86State *env) -{ - int rnd_type; - - /* set rounding mode */ - switch (env->fpuc & FPU_RC_MASK) { - default: - case FPU_RC_NEAR: - rnd_type = float_round_nearest_even; - break; - case FPU_RC_DOWN: - rnd_type = float_round_down; - break; - case FPU_RC_UP: - rnd_type = float_round_up; - break; - case FPU_RC_CHOP: - rnd_type = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_type, &env->fp_status); - switch ((env->fpuc >> 8) & 3) { - case 0: - rnd_type = 32; - break; - case 2: - rnd_type = 64; - break; - case 3: - default: - rnd_type = 80; - break; - } - set_floatx80_rounding_precision(rnd_type, &env->fp_status); -} - -void helper_fldcw(CPUX86State *env, uint32_t val) -{ - cpu_set_fpuc(env, val); -} - -void helper_fclex(CPUX86State *env) -{ - env->fpus &= 0x7f00; -} - -void helper_fwait(CPUX86State *env) -{ - if (env->fpus & FPUS_SE) { - fpu_raise_exception(env, GETPC()); - } -} - -void helper_fninit(CPUX86State *env) -{ - env->fpus = 0; - env->fpstt = 0; - cpu_set_fpuc(env, 0x37f); - env->fptags[0] = 1; - env->fptags[1] = 1; - env->fptags[2] = 1; - env->fptags[3] = 1; - env->fptags[4] = 1; - env->fptags[5] = 1; - env->fptags[6] = 1; - env->fptags[7] = 1; -} - -/* BCD ops */ - -void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) -{ - floatx80 tmp; - uint64_t val; - unsigned int v; - int i; - - val = 0; - for (i = 8; i >= 0; i--) { - v = cpu_ldub_data_ra(env, ptr + i, GETPC()); - val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); - } - tmp = int64_to_floatx80(val, &env->fp_status); - if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { - tmp = floatx80_chs(tmp); - } - fpush(env); - ST0 = tmp; -} - -void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) -{ - int v; - target_ulong mem_ref, mem_end; - int64_t val; - - val = floatx80_to_int64(ST0, &env->fp_status); - mem_ref = ptr; - mem_end = mem_ref + 9; - if (val < 0) { - cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); - val = -val; - } else { - cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); - } - while (mem_ref < mem_end) { - if (val == 0) { - break; - } - v = val % 100; - val = val / 100; - v = ((v / 10) << 4) | (v % 10); - cpu_stb_data_ra(env, mem_ref++, v, GETPC()); - } - while (mem_ref < mem_end) { - cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); - } -} - -void helper_f2xm1(CPUX86State *env) -{ - double val = floatx80_to_double(env, ST0); - - val = pow(2.0, val) - 1.0; - ST0 = double_to_floatx80(env, val); -} - -void helper_fyl2x(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if (fptemp > 0.0) { - fptemp = log(fptemp) / log(2.0); /* log2(ST) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); - } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; - } -} - -void helper_fptan(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - fptemp = tan(fptemp); - ST0 = double_to_floatx80(env, fptemp); - fpush(env); - ST0 = floatx80_one; - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**52 only */ - } -} - -void helper_fpatan(CPUX86State *env) -{ - double fptemp, fpsrcop; - - fpsrcop = floatx80_to_double(env, ST1); - fptemp = floatx80_to_double(env, ST0); - ST1 = double_to_floatx80(env, atan2(fpsrcop, fptemp)); - fpop(env); -} - -void helper_fxtract(CPUX86State *env) -{ - CPU_LDoubleU temp; - - temp.d = ST0; - - if (floatx80_is_zero(ST0)) { - /* Easy way to generate -inf and raising division by 0 exception */ - ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, - &env->fp_status); - fpush(env); - ST0 = temp.d; - } else { - int expdif; - - expdif = EXPD(temp) - EXPBIAS; - /* DP exponent bias */ - ST0 = int32_to_floatx80(expdif, &env->fp_status); - fpush(env); - BIASEXPONENT(temp); - ST0 = temp.d; - } -} - -void helper_fprem1(CPUX86State *env) -{ - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); - - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } - - if (expdif < 53) { - dblq = fpsrcop / fptemp; - /* round dblq towards nearest integer */ - dblq = rint(dblq); - st0 = fpsrcop - fptemp * dblq; - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, expdif - 50); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); - } - ST0 = double_to_floatx80(env, st0); -} - -void helper_fprem(CPUX86State *env) -{ - double st0, st1, dblq, fpsrcop, fptemp; - CPU_LDoubleU fpsrcop1, fptemp1; - int expdif; - signed long long int q; - - st0 = floatx80_to_double(env, ST0); - st1 = floatx80_to_double(env, ST1); - - if (isinf(st0) || isnan(st0) || isnan(st1) || (st1 == 0.0)) { - ST0 = double_to_floatx80(env, 0.0 / 0.0); /* NaN */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - return; - } - - fpsrcop = st0; - fptemp = st1; - fpsrcop1.d = ST0; - fptemp1.d = ST1; - expdif = EXPD(fpsrcop1) - EXPD(fptemp1); - - if (expdif < 0) { - /* optimisation? taken from the AMD docs */ - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* ST0 is unchanged */ - return; - } - - if (expdif < 53) { - dblq = fpsrcop / fptemp; /* ST0 / ST1 */ - /* round dblq towards zero */ - dblq = (dblq < 0.0) ? ceil(dblq) : floor(dblq); - st0 = fpsrcop - fptemp * dblq; /* fpsrcop is ST0 */ - - /* convert dblq to q by truncating towards zero */ - if (dblq < 0.0) { - q = (signed long long int)(-dblq); - } else { - q = (signed long long int)dblq; - } - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - /* (C0,C3,C1) <-- (q2,q1,q0) */ - env->fpus |= (q & 0x4) << (8 - 2); /* (C0) <-- q2 */ - env->fpus |= (q & 0x2) << (14 - 1); /* (C3) <-- q1 */ - env->fpus |= (q & 0x1) << (9 - 0); /* (C1) <-- q0 */ - } else { - int N = 32 + (expdif % 32); /* as per AMD docs */ - - env->fpus |= 0x400; /* C2 <-- 1 */ - fptemp = pow(2.0, (double)(expdif - N)); - fpsrcop = (st0 / st1) / fptemp; - /* fpsrcop = integer obtained by chopping */ - fpsrcop = (fpsrcop < 0.0) ? - -(floor(fabs(fpsrcop))) : floor(fpsrcop); - st0 -= (st1 * fpsrcop * fptemp); - } - ST0 = double_to_floatx80(env, st0); -} - -void helper_fyl2xp1(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if ((fptemp + 1.0) > 0.0) { - fptemp = log(fptemp + 1.0) / log(2.0); /* log2(ST + 1.0) */ - fptemp *= floatx80_to_double(env, ST1); - ST1 = double_to_floatx80(env, fptemp); - fpop(env); - } else { - env->fpus &= ~0x4700; - env->fpus |= 0x400; - } -} - -void helper_fsqrt(CPUX86State *env) -{ - if (floatx80_is_neg(ST0)) { - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - env->fpus |= 0x400; - } - ST0 = floatx80_sqrt(ST0, &env->fp_status); -} - -void helper_fsincos(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(env, sin(fptemp)); - fpush(env); - ST0 = double_to_floatx80(env, cos(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**63 only */ - } -} - -void helper_frndint(CPUX86State *env) -{ - ST0 = floatx80_round_to_int(ST0, &env->fp_status); -} - -void helper_fscale(CPUX86State *env) -{ - if (floatx80_is_any_nan(ST1)) { - ST0 = ST1; - } else { - int n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); - ST0 = floatx80_scalbn(ST0, n, &env->fp_status); - } -} - -void helper_fsin(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(env, sin(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**53 only */ - } -} - -void helper_fcos(CPUX86State *env) -{ - double fptemp = floatx80_to_double(env, ST0); - - if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { - env->fpus |= 0x400; - } else { - ST0 = double_to_floatx80(env, cos(fptemp)); - env->fpus &= ~0x400; /* C2 <-- 0 */ - /* the above code is for |arg| < 2**63 only */ - } -} - -void helper_fxam_ST0(CPUX86State *env) -{ - CPU_LDoubleU temp; - int expdif; - - temp.d = ST0; - - env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ - if (SIGND(temp)) { - env->fpus |= 0x200; /* C1 <-- 1 */ - } - - /* XXX: test fptags too */ - expdif = EXPD(temp); - if (expdif == MAXEXPD) { - if (MANTD(temp) == 0x8000000000000000ULL) { - env->fpus |= 0x500; /* Infinity */ - } else { - env->fpus |= 0x100; /* NaN */ - } - } else if (expdif == 0) { - if (MANTD(temp) == 0) { - env->fpus |= 0x4000; /* Zero */ - } else { - env->fpus |= 0x4400; /* Denormal */ - } - } else { - env->fpus |= 0x400; - } -} - -static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) -{ - int fpus, fptag, exp, i; - uint64_t mant; - CPU_LDoubleU tmp; - - fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - fptag = 0; - for (i = 7; i >= 0; i--) { - fptag <<= 2; - if (env->fptags[i]) { - fptag |= 3; - } else { - tmp.d = env->fpregs[i].d; - exp = EXPD(tmp); - mant = MANTD(tmp); - if (exp == 0 && mant == 0) { - /* zero */ - fptag |= 1; - } else if (exp == 0 || exp == MAXEXPD - || (mant & (1LL << 63)) == 0) { - /* NaNs, infinity, denormal */ - fptag |= 2; - } - } - } - if (data32) { - /* 32 bit */ - cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); - cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); - cpu_stl_data_ra(env, ptr + 12, 0, retaddr); /* fpip */ - cpu_stl_data_ra(env, ptr + 16, 0, retaddr); /* fpcs */ - cpu_stl_data_ra(env, ptr + 20, 0, retaddr); /* fpoo */ - cpu_stl_data_ra(env, ptr + 24, 0, retaddr); /* fpos */ - } else { - /* 16 bit */ - cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); - cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); - cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); - cpu_stw_data_ra(env, ptr + 6, 0, retaddr); - cpu_stw_data_ra(env, ptr + 8, 0, retaddr); - cpu_stw_data_ra(env, ptr + 10, 0, retaddr); - cpu_stw_data_ra(env, ptr + 12, 0, retaddr); - } -} - -void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) -{ - do_fstenv(env, ptr, data32, GETPC()); -} - -static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, - uintptr_t retaddr) -{ - int i, fpus, fptag; - - if (data32) { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); - } else { - cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); - fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); - fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); - } - env->fpstt = (fpus >> 11) & 7; - env->fpus = fpus & ~0x3800; - for (i = 0; i < 8; i++) { - env->fptags[i] = ((fptag & 3) == 3); - fptag >>= 2; - } -} - -void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) -{ - do_fldenv(env, ptr, data32, GETPC()); -} - -void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) -{ - floatx80 tmp; - int i; - - do_fstenv(env, ptr, data32, GETPC()); - - ptr += (14 << data32); - for (i = 0; i < 8; i++) { - tmp = ST(i); - helper_fstt(env, tmp, ptr, GETPC()); - ptr += 10; - } - - /* fninit */ - env->fpus = 0; - env->fpstt = 0; - cpu_set_fpuc(env, 0x37f); - env->fptags[0] = 1; - env->fptags[1] = 1; - env->fptags[2] = 1; - env->fptags[3] = 1; - env->fptags[4] = 1; - env->fptags[5] = 1; - env->fptags[6] = 1; - env->fptags[7] = 1; -} - -void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) -{ - floatx80 tmp; - int i; - - do_fldenv(env, ptr, data32, GETPC()); - ptr += (14 << data32); - - for (i = 0; i < 8; i++) { - tmp = helper_fldt(env, ptr, GETPC()); - ST(i) = tmp; - ptr += 10; - } -} - -#if defined(CONFIG_USER_ONLY) -void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) -{ - helper_fsave(env, ptr, data32); -} - -void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) -{ - helper_frstor(env, ptr, data32); -} -#endif - -#define XO(X) offsetof(X86XSaveArea, X) - -static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - int fpus, fptag, i; - target_ulong addr; - - fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - fptag = 0; - for (i = 0; i < 8; i++) { - fptag |= (env->fptags[i] << i); - } - - cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); - cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); - - /* In 32-bit mode this is eip, sel, dp, sel. - In 64-bit mode this is rip, rdp. - But in either case we don't write actual data, just zeros. */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ - cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ - - addr = ptr + XO(legacy.fpregs); - for (i = 0; i < 8; i++) { - floatx80 tmp = ST(i); - helper_fstt(env, tmp, addr, ra); - addr += 16; - } -} - -static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); - cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); -} - -static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - int i, nb_xmm_regs; - target_ulong addr; - - if (env->hflags & HF_CS64_MASK) { - nb_xmm_regs = 16; - } else { - nb_xmm_regs = 8; - } - - addr = ptr + XO(legacy.xmm_regs); - for (i = 0; i < nb_xmm_regs; i++) { - cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); - cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); - addr += 16; - } -} - -static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); - int i; - - for (i = 0; i < 4; i++, addr += 16) { - cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); - cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); - } -} - -static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), - env->bndcs_regs.cfgu, ra); - cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), - env->bndcs_regs.sts, ra); -} - -static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - cpu_stq_data_ra(env, ptr, env->pkru, ra); -} - -void helper_fxsave(CPUX86State *env, target_ulong ptr) -{ - uintptr_t ra = GETPC(); - - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xsave_fpu(env, ptr, ra); - - if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xsave_mxcsr(env, ptr, ra); - /* Fast FXSAVE leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - do_xsave_sse(env, ptr, ra); - } - } -} - -static uint64_t get_xinuse(CPUX86State *env) -{ - uint64_t inuse = -1; - - /* For the most part, we don't track XINUSE. We could calculate it - here for all components, but it's probably less work to simply - indicate in use. That said, the state of BNDREGS is important - enough to track in HFLAGS, so we might as well use that here. */ - if ((env->hflags & HF_MPX_IU_MASK) == 0) { - inuse &= ~XSTATE_BNDREGS_MASK; - } - return inuse; -} - -static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, - uint64_t inuse, uint64_t opt, uintptr_t ra) -{ - uint64_t old_bv, new_bv; - - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - /* Never save anything not enabled by XCR0. */ - rfbm &= env->xcr0; - opt &= rfbm; - - if (opt & XSTATE_FP_MASK) { - do_xsave_fpu(env, ptr, ra); - } - if (rfbm & XSTATE_SSE_MASK) { - /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ - do_xsave_mxcsr(env, ptr, ra); - } - if (opt & XSTATE_SSE_MASK) { - do_xsave_sse(env, ptr, ra); - } - if (opt & XSTATE_BNDREGS_MASK) { - do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); - } - if (opt & XSTATE_BNDCSR_MASK) { - do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); - } - if (opt & XSTATE_PKRU_MASK) { - do_xsave_pkru(env, ptr + XO(pkru_state), ra); - } - - /* Update the XSTATE_BV field. */ - old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); - new_bv = (old_bv & ~rfbm) | (inuse & rfbm); - cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); -} - -void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) -{ - do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); -} - -void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) -{ - uint64_t inuse = get_xinuse(env); - do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); -} - -static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - int i, fpuc, fpus, fptag; - target_ulong addr; - - fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); - fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); - fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); - cpu_set_fpuc(env, fpuc); - env->fpstt = (fpus >> 11) & 7; - env->fpus = fpus & ~0x3800; - fptag ^= 0xff; - for (i = 0; i < 8; i++) { - env->fptags[i] = ((fptag >> i) & 1); - } - - addr = ptr + XO(legacy.fpregs); - for (i = 0; i < 8; i++) { - floatx80 tmp = helper_fldt(env, addr, ra); - ST(i) = tmp; - addr += 16; - } -} - -static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); -} - -static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - int i, nb_xmm_regs; - target_ulong addr; - - if (env->hflags & HF_CS64_MASK) { - nb_xmm_regs = 16; - } else { - nb_xmm_regs = 8; - } - - addr = ptr + XO(legacy.xmm_regs); - for (i = 0; i < nb_xmm_regs; i++) { - env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); - env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); - addr += 16; - } -} - -static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); - int i; - - for (i = 0; i < 4; i++, addr += 16) { - env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); - env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); - } -} - -static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - /* FIXME: Extend highest implemented bit of linear address. */ - env->bndcs_regs.cfgu - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); - env->bndcs_regs.sts - = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); -} - -static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) -{ - env->pkru = cpu_ldq_data_ra(env, ptr, ra); -} - -void helper_fxrstor(CPUX86State *env, target_ulong ptr) -{ - uintptr_t ra = GETPC(); - - /* The operand must be 16 byte aligned */ - if (ptr & 0xf) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - do_xrstor_fpu(env, ptr, ra); - - if (env->cr[4] & CR4_OSFXSR_MASK) { - do_xrstor_mxcsr(env, ptr, ra); - /* Fast FXRSTOR leaves out the XMM registers */ - if (!(env->efer & MSR_EFER_FFXSR) - || (env->hflags & HF_CPL_MASK) - || !(env->hflags & HF_LMA_MASK)) { - do_xrstor_sse(env, ptr, ra); - } - } -} - -#if defined(CONFIG_USER_ONLY) -void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) -{ - helper_fxsave(env, ptr); -} - -void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) -{ - helper_fxrstor(env, ptr); -} -#endif - -void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) -{ - uintptr_t ra = GETPC(); - uint64_t xstate_bv, xcomp_bv, reserve0; - - rfbm &= env->xcr0; - - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, ra); - } - - /* The operand must be 64 byte aligned. */ - if (ptr & 63) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); - - if ((int64_t)xstate_bv < 0) { - /* FIXME: Compact form. */ - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - /* Standard form. */ - - /* The XSTATE_BV field must not set bits not present in XCR0. */ - if (xstate_bv & ~env->xcr0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - /* The XCOMP_BV field must be zero. Note that, as of the April 2016 - revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) - describes only XCOMP_BV, but the description of the standard form - of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which - includes the next 64-bit field. */ - xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); - reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); - if (xcomp_bv || reserve0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } - - if (rfbm & XSTATE_FP_MASK) { - if (xstate_bv & XSTATE_FP_MASK) { - do_xrstor_fpu(env, ptr, ra); - } else { - helper_fninit(env); - memset(env->fpregs, 0, sizeof(env->fpregs)); - } - } - if (rfbm & XSTATE_SSE_MASK) { - /* Note that the standard form of XRSTOR loads MXCSR from memory - whether or not the XSTATE_BV bit is set. */ - do_xrstor_mxcsr(env, ptr, ra); - if (xstate_bv & XSTATE_SSE_MASK) { - do_xrstor_sse(env, ptr, ra); - } else { - /* ??? When AVX is implemented, we may have to be more - selective in the clearing. */ - memset(env->xmm_regs, 0, sizeof(env->xmm_regs)); - } - } - if (rfbm & XSTATE_BNDREGS_MASK) { - if (xstate_bv & XSTATE_BNDREGS_MASK) { - do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); - env->hflags |= HF_MPX_IU_MASK; - } else { - memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); - env->hflags &= ~HF_MPX_IU_MASK; - } - } - if (rfbm & XSTATE_BNDCSR_MASK) { - if (xstate_bv & XSTATE_BNDCSR_MASK) { - do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); - } else { - memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); - } - cpu_sync_bndcs_hflags(env); - } - if (rfbm & XSTATE_PKRU_MASK) { - uint64_t old_pkru = env->pkru; - if (xstate_bv & XSTATE_PKRU_MASK) { - do_xrstor_pkru(env, ptr + XO(pkru_state), ra); - } else { - env->pkru = 0; - } - if (env->pkru != old_pkru) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - tlb_flush(cs); - } - } -} - -#undef XO - -uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) -{ - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, GETPC()); - } - - switch (ecx) { - case 0: - return env->xcr0; - case 1: - if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { - return env->xcr0 & get_xinuse(env); - } - break; - } - raise_exception_ra(env, EXCP0D_GPF, GETPC()); -} - -void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) -{ - uint32_t dummy, ena_lo, ena_hi; - uint64_t ena; - - /* The OS must have enabled XSAVE. */ - if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { - raise_exception_ra(env, EXCP06_ILLOP, GETPC()); - } - - /* Only XCR0 is defined at present; the FPU may not be disabled. */ - if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { - goto do_gpf; - } - - /* Disallow enabling unimplemented features. */ - cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); - ena = ((uint64_t)ena_hi << 32) | ena_lo; - if (mask & ~ena) { - goto do_gpf; - } - - /* Disallow enabling only half of MPX. */ - if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) - & XSTATE_BNDCSR_MASK) { - goto do_gpf; - } - - env->xcr0 = mask; - cpu_sync_bndcs_hflags(env); - return; - - do_gpf: - raise_exception_ra(env, EXCP0D_GPF, GETPC()); -} - -/* MMX/SSE */ -/* XXX: optimize by storing fptt and fptags in the static cpu state */ - -#define SSE_DAZ 0x0040 -#define SSE_RC_MASK 0x6000 -#define SSE_RC_NEAR 0x0000 -#define SSE_RC_DOWN 0x2000 -#define SSE_RC_UP 0x4000 -#define SSE_RC_CHOP 0x6000 -#define SSE_FZ 0x8000 - -void update_mxcsr_status(CPUX86State *env) -{ - uint32_t mxcsr = env->mxcsr; - int rnd_type; - - /* set rounding mode */ - switch (mxcsr & SSE_RC_MASK) { - default: - case SSE_RC_NEAR: - rnd_type = float_round_nearest_even; - break; - case SSE_RC_DOWN: - rnd_type = float_round_down; - break; - case SSE_RC_UP: - rnd_type = float_round_up; - break; - case SSE_RC_CHOP: - rnd_type = float_round_to_zero; - break; - } - set_float_rounding_mode(rnd_type, &env->sse_status); - - /* set denormals are zero */ - set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); - - /* set flush to zero */ - set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->fp_status); -} - -void helper_ldmxcsr(CPUX86State *env, uint32_t val) -{ - cpu_set_mxcsr(env, val); -} - -void helper_enter_mmx(CPUX86State *env) -{ - env->fpstt = 0; - *(uint32_t *)(env->fptags) = 0; - *(uint32_t *)(env->fptags + 4) = 0; -} - -void helper_emms(CPUX86State *env) -{ - /* set to empty state */ - *(uint32_t *)(env->fptags) = 0x01010101; - *(uint32_t *)(env->fptags + 4) = 0x01010101; -} - -/* XXX: suppress */ -void helper_movq(CPUX86State *env, void *d, void *s) -{ - *(uint64_t *)d = *(uint64_t *)s; -} - -#define SHIFT 0 -#include "ops_sse.h" - -#define SHIFT 1 -#include "ops_sse.h" diff --git a/target/i386/gdbstub.c b/target/i386/gdbstub.c index 9b94ab852c..ebb000df6a 100644 --- a/target/i386/gdbstub.c +++ b/target/i386/gdbstub.c @@ -7,7 +7,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,9 +18,8 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "cpu.h" -#include "exec/gdbstub.h" +#include "include/gdbstub/helpers.h" #ifdef TARGET_X86_64 static const int gpr_map[16] = { @@ -32,18 +31,78 @@ static const int gpr_map[16] = { #endif static const int gpr_map32[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; +/* + * Keep these in sync with assignment to + * gdb_num_core_regs in target/i386/cpu.c + * and with the machine description + */ + +/* + * SEG: 6 segments, plus fs_base, gs_base, kernel_gs_base + */ + +/* + * general regs -----> 8 or 16 + */ +#define IDX_NB_IP 1 +#define IDX_NB_FLAGS 1 +#define IDX_NB_SEG (6 + 3) +#define IDX_NB_CTL 6 +#define IDX_NB_FP 16 +/* + * fpu regs ----------> 8 or 16 + */ +#define IDX_NB_MXCSR 1 +/* + * total ----> 8+1+1+9+6+16+8+1=50 or 16+1+1+9+6+16+16+1=66 + */ + #define IDX_IP_REG CPU_NB_REGS -#define IDX_FLAGS_REG (IDX_IP_REG + 1) -#define IDX_SEG_REGS (IDX_FLAGS_REG + 1) -#define IDX_FP_REGS (IDX_SEG_REGS + 6) -#define IDX_XMM_REGS (IDX_FP_REGS + 16) +#define IDX_FLAGS_REG (IDX_IP_REG + IDX_NB_IP) +#define IDX_SEG_REGS (IDX_FLAGS_REG + IDX_NB_FLAGS) +#define IDX_CTL_REGS (IDX_SEG_REGS + IDX_NB_SEG) +#define IDX_FP_REGS (IDX_CTL_REGS + IDX_NB_CTL) +#define IDX_XMM_REGS (IDX_FP_REGS + IDX_NB_FP) #define IDX_MXCSR_REG (IDX_XMM_REGS + CPU_NB_REGS) -int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) +#define IDX_CTL_CR0_REG (IDX_CTL_REGS + 0) +#define IDX_CTL_CR2_REG (IDX_CTL_REGS + 1) +#define IDX_CTL_CR3_REG (IDX_CTL_REGS + 2) +#define IDX_CTL_CR4_REG (IDX_CTL_REGS + 3) +#define IDX_CTL_CR8_REG (IDX_CTL_REGS + 4) +#define IDX_CTL_EFER_REG (IDX_CTL_REGS + 5) + +#ifdef TARGET_X86_64 +#define GDB_FORCE_64 1 +#else +#define GDB_FORCE_64 0 +#endif + +static int gdb_read_reg_cs64(uint32_t hflags, GByteArray *buf, target_ulong val) +{ + if ((hflags & HF_CS64_MASK) || GDB_FORCE_64) { + return gdb_get_reg64(buf, val); + } + return gdb_get_reg32(buf, val); +} + +static int gdb_write_reg_cs64(uint32_t hflags, uint8_t *buf, target_ulong *val) +{ + if (hflags & HF_CS64_MASK) { + *val = ldq_p(buf); + return 8; + } + *val = ldl_p(buf); + return 4; +} + +int x86_cpu_gdb_read_register(CPUState *cs, GByteArray *mem_buf, int n) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; + uint64_t tpr; + /* N.B. GDB can't deal with changes in registers or sizes in the middle of a session. So if we're in 32-bit mode on a 64-bit cpu, still act as if we're on a 64-bit cpu. */ @@ -56,26 +115,24 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return gdb_get_reg64(mem_buf, env->regs[gpr_map[n]] & 0xffffffffUL); } else { - memset(mem_buf, 0, sizeof(target_ulong)); - return sizeof(target_ulong); + return gdb_get_regl(mem_buf, 0); } } else { return gdb_get_reg32(mem_buf, env->regs[gpr_map32[n]]); } } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) { -#ifdef USE_X86LDOUBLE - /* FIXME: byteswap float values - after fixing fpregs layout. */ - memcpy(mem_buf, &env->fpregs[n - IDX_FP_REGS], 10); -#else - memset(mem_buf, 0, 10); -#endif - return 10; + int st_index = n - IDX_FP_REGS; + int r_index = (st_index + env->fpstt) % 8; + floatx80 *fp = &env->fpregs[r_index].d; + int len = gdb_get_reg64(mem_buf, cpu_to_le64(fp->low)); + len += gdb_get_reg16(mem_buf, cpu_to_le16(fp->high)); + return len; } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) { n -= IDX_XMM_REGS; if (n < CPU_NB_REGS32 || TARGET_LONG_BITS == 64) { - stq_p(mem_buf, env->xmm_regs[n].ZMM_Q(0)); - stq_p(mem_buf + 8, env->xmm_regs[n].ZMM_Q(1)); - return 16; + return gdb_get_reg128(mem_buf, + env->xmm_regs[n].ZMM_Q(1), + env->xmm_regs[n].ZMM_Q(0)); } } else { switch (n) { @@ -104,6 +161,17 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return gdb_get_reg32(mem_buf, env->segs[R_FS].selector); case IDX_SEG_REGS + 5: return gdb_get_reg32(mem_buf, env->segs[R_GS].selector); + case IDX_SEG_REGS + 6: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->segs[R_FS].base); + case IDX_SEG_REGS + 7: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->segs[R_GS].base); + + case IDX_SEG_REGS + 8: +#ifdef TARGET_X86_64 + return gdb_read_reg_cs64(env->hflags, mem_buf, env->kernelgsbase); +#else + return gdb_get_reg32(mem_buf, 0); +#endif case IDX_FP_REGS + 8: return gdb_get_reg32(mem_buf, env->fpuc); @@ -124,13 +192,33 @@ int x86_cpu_gdb_read_register(CPUState *cs, uint8_t *mem_buf, int n) return gdb_get_reg32(mem_buf, 0); /* fop */ case IDX_MXCSR_REG: + update_mxcsr_from_sse_status(env); return gdb_get_reg32(mem_buf, env->mxcsr); + + case IDX_CTL_CR0_REG: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[0]); + case IDX_CTL_CR2_REG: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[2]); + case IDX_CTL_CR3_REG: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[3]); + case IDX_CTL_CR4_REG: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->cr[4]); + case IDX_CTL_CR8_REG: +#ifndef CONFIG_USER_ONLY + tpr = cpu_get_apic_tpr(cpu->apic_state); +#else + tpr = 0; +#endif + return gdb_read_reg_cs64(env->hflags, mem_buf, tpr); + + case IDX_CTL_EFER_REG: + return gdb_read_reg_cs64(env->hflags, mem_buf, env->efer); } } return 0; } -static int x86_cpu_gdb_load_seg(X86CPU *cpu, int sreg, uint8_t *mem_buf) +static int x86_cpu_gdb_load_seg(X86CPU *cpu, X86Seg sreg, uint8_t *mem_buf) { CPUX86State *env = &cpu->env; uint16_t selector = ldl_p(mem_buf); @@ -164,7 +252,8 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - uint32_t tmp; + target_ulong tmp; + int len; /* N.B. GDB can't deal with changes in registers or sizes in the middle of a session. So if we're in 32-bit mode on a 64-bit cpu, still act @@ -185,10 +274,9 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return 4; } } else if (n >= IDX_FP_REGS && n < IDX_FP_REGS + 8) { -#ifdef USE_X86LDOUBLE - /* FIXME: byteswap float values - after fixing fpregs layout. */ - memcpy(&env->fpregs[n - IDX_FP_REGS], mem_buf, 10); -#endif + floatx80 *fp = (floatx80 *) &env->fpregs[n - IDX_FP_REGS]; + fp->low = le64_to_cpu(* (uint64_t *) mem_buf); + fp->high = le16_to_cpu(* (uint16_t *) (mem_buf + 8)); return 10; } else if (n >= IDX_XMM_REGS && n < IDX_XMM_REGS + CPU_NB_REGS) { n -= IDX_XMM_REGS; @@ -228,6 +316,15 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) return x86_cpu_gdb_load_seg(cpu, R_FS, mem_buf); case IDX_SEG_REGS + 5: return x86_cpu_gdb_load_seg(cpu, R_GS, mem_buf); + case IDX_SEG_REGS + 6: + return gdb_write_reg_cs64(env->hflags, mem_buf, &env->segs[R_FS].base); + case IDX_SEG_REGS + 7: + return gdb_write_reg_cs64(env->hflags, mem_buf, &env->segs[R_GS].base); + case IDX_SEG_REGS + 8: +#ifdef TARGET_X86_64 + return gdb_write_reg_cs64(env->hflags, mem_buf, &env->kernelgsbase); +#endif + return 4; case IDX_FP_REGS + 8: cpu_set_fpuc(env, ldl_p(mem_buf)); @@ -253,6 +350,48 @@ int x86_cpu_gdb_write_register(CPUState *cs, uint8_t *mem_buf, int n) case IDX_MXCSR_REG: cpu_set_mxcsr(env, ldl_p(mem_buf)); return 4; + + case IDX_CTL_CR0_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + cpu_x86_update_cr0(env, tmp); +#endif + return len; + + case IDX_CTL_CR2_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + env->cr[2] = tmp; +#endif + return len; + + case IDX_CTL_CR3_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + cpu_x86_update_cr3(env, tmp); +#endif + return len; + + case IDX_CTL_CR4_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + cpu_x86_update_cr4(env, tmp); +#endif + return len; + + case IDX_CTL_CR8_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + cpu_set_apic_tpr(cpu->apic_state, tmp); +#endif + return len; + + case IDX_CTL_EFER_REG: + len = gdb_write_reg_cs64(env->hflags, mem_buf, &tmp); +#ifndef CONFIG_USER_ONLY + cpu_load_efer(env, tmp); +#endif + return len; } } /* Unrecognised register. */ diff --git a/target/i386/hax-all.c b/target/i386/hax-all.c deleted file mode 100644 index d2e512856b..0000000000 --- a/target/i386/hax-all.c +++ /dev/null @@ -1,1113 +0,0 @@ -/* - * QEMU HAX support - * - * Copyright IBM, Corp. 2008 - * Red Hat, Inc. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * Glauber Costa <gcosta@redhat.com> - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * Xin Xiaohui<xiaohui.xin@intel.com> - * Zhang Xiantao<xiantao.zhang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -/* - * HAX common code for both windows and darwin - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/address-spaces.h" - -#include "qemu-common.h" -#include "hax-i386.h" -#include "sysemu/accel.h" -#include "sysemu/sysemu.h" -#include "qemu/main-loop.h" -#include "hw/boards.h" - -#define DEBUG_HAX 0 - -#define DPRINTF(fmt, ...) \ - do { \ - if (DEBUG_HAX) { \ - fprintf(stdout, fmt, ## __VA_ARGS__); \ - } \ - } while (0) - -/* Current version */ -const uint32_t hax_cur_version = 0x4; /* API v4: unmapping and MMIO moves */ -/* Minimum HAX kernel version */ -const uint32_t hax_min_version = 0x4; /* API v4: supports unmapping */ - -static bool hax_allowed; - -struct hax_state hax_global; - -static void hax_vcpu_sync_state(CPUArchState *env, int modified); -static int hax_arch_get_registers(CPUArchState *env); - -int hax_enabled(void) -{ - return hax_allowed; -} - -int valid_hax_tunnel_size(uint16_t size) -{ - return size >= sizeof(struct hax_tunnel); -} - -hax_fd hax_vcpu_get_fd(CPUArchState *env) -{ - struct hax_vcpu_state *vcpu = ENV_GET_CPU(env)->hax_vcpu; - if (!vcpu) { - return HAX_INVALID_FD; - } - return vcpu->fd; -} - -static int hax_get_capability(struct hax_state *hax) -{ - int ret; - struct hax_capabilityinfo capinfo, *cap = &capinfo; - - ret = hax_capability(hax, cap); - if (ret) { - return ret; - } - - if ((cap->wstatus & HAX_CAP_WORKSTATUS_MASK) == HAX_CAP_STATUS_NOTWORKING) { - if (cap->winfo & HAX_CAP_FAILREASON_VT) { - DPRINTF - ("VTX feature is not enabled, HAX driver will not work.\n"); - } else if (cap->winfo & HAX_CAP_FAILREASON_NX) { - DPRINTF - ("NX feature is not enabled, HAX driver will not work.\n"); - } - return -ENXIO; - - } - - if (!(cap->winfo & HAX_CAP_UG)) { - fprintf(stderr, "UG mode is not supported by the hardware.\n"); - return -ENOTSUP; - } - - hax->supports_64bit_ramblock = !!(cap->winfo & HAX_CAP_64BIT_RAMBLOCK); - - if (cap->wstatus & HAX_CAP_MEMQUOTA) { - if (cap->mem_quota < hax->mem_quota) { - fprintf(stderr, "The VM memory needed exceeds the driver limit.\n"); - return -ENOSPC; - } - } - return 0; -} - -static int hax_version_support(struct hax_state *hax) -{ - int ret; - struct hax_module_version version; - - ret = hax_mod_version(hax, &version); - if (ret < 0) { - return 0; - } - - if (hax_min_version > version.cur_version) { - fprintf(stderr, "Incompatible HAX module version %d,", - version.cur_version); - fprintf(stderr, "requires minimum version %d\n", hax_min_version); - return 0; - } - if (hax_cur_version < version.compat_version) { - fprintf(stderr, "Incompatible QEMU HAX API version %x,", - hax_cur_version); - fprintf(stderr, "requires minimum HAX API version %x\n", - version.compat_version); - return 0; - } - - return 1; -} - -int hax_vcpu_create(int id) -{ - struct hax_vcpu_state *vcpu = NULL; - int ret; - - if (!hax_global.vm) { - fprintf(stderr, "vcpu %x created failed, vm is null\n", id); - return -1; - } - - if (hax_global.vm->vcpus[id]) { - fprintf(stderr, "vcpu %x allocated already\n", id); - return 0; - } - - vcpu = g_malloc(sizeof(struct hax_vcpu_state)); - if (!vcpu) { - fprintf(stderr, "Failed to alloc vcpu state\n"); - return -ENOMEM; - } - - memset(vcpu, 0, sizeof(struct hax_vcpu_state)); - - ret = hax_host_create_vcpu(hax_global.vm->fd, id); - if (ret) { - fprintf(stderr, "Failed to create vcpu %x\n", id); - goto error; - } - - vcpu->vcpu_id = id; - vcpu->fd = hax_host_open_vcpu(hax_global.vm->id, id); - if (hax_invalid_fd(vcpu->fd)) { - fprintf(stderr, "Failed to open the vcpu\n"); - ret = -ENODEV; - goto error; - } - - hax_global.vm->vcpus[id] = vcpu; - - ret = hax_host_setup_vcpu_channel(vcpu); - if (ret) { - fprintf(stderr, "Invalid hax tunnel size\n"); - ret = -EINVAL; - goto error; - } - return 0; - - error: - /* vcpu and tunnel will be closed automatically */ - if (vcpu && !hax_invalid_fd(vcpu->fd)) { - hax_close_fd(vcpu->fd); - } - - hax_global.vm->vcpus[id] = NULL; - g_free(vcpu); - return -1; -} - -int hax_vcpu_destroy(CPUState *cpu) -{ - struct hax_vcpu_state *vcpu = cpu->hax_vcpu; - - if (!hax_global.vm) { - fprintf(stderr, "vcpu %x destroy failed, vm is null\n", vcpu->vcpu_id); - return -1; - } - - if (!vcpu) { - return 0; - } - - /* - * 1. The hax_tunnel is also destroied when vcpu destroy - * 2. close fd will cause hax module vcpu be cleaned - */ - hax_close_fd(vcpu->fd); - hax_global.vm->vcpus[vcpu->vcpu_id] = NULL; - g_free(vcpu); - return 0; -} - -int hax_init_vcpu(CPUState *cpu) -{ - int ret; - - ret = hax_vcpu_create(cpu->cpu_index); - if (ret < 0) { - fprintf(stderr, "Failed to create HAX vcpu\n"); - exit(-1); - } - - cpu->hax_vcpu = hax_global.vm->vcpus[cpu->cpu_index]; - cpu->vcpu_dirty = true; - qemu_register_reset(hax_reset_vcpu_state, (CPUArchState *) (cpu->env_ptr)); - - return ret; -} - -struct hax_vm *hax_vm_create(struct hax_state *hax) -{ - struct hax_vm *vm; - int vm_id = 0, ret; - - if (hax_invalid_fd(hax->fd)) { - return NULL; - } - - if (hax->vm) { - return hax->vm; - } - - vm = g_malloc(sizeof(struct hax_vm)); - if (!vm) { - return NULL; - } - memset(vm, 0, sizeof(struct hax_vm)); - ret = hax_host_create_vm(hax, &vm_id); - if (ret) { - fprintf(stderr, "Failed to create vm %x\n", ret); - goto error; - } - vm->id = vm_id; - vm->fd = hax_host_open_vm(hax, vm_id); - if (hax_invalid_fd(vm->fd)) { - fprintf(stderr, "Failed to open vm %d\n", vm_id); - goto error; - } - - hax->vm = vm; - return vm; - - error: - g_free(vm); - hax->vm = NULL; - return NULL; -} - -int hax_vm_destroy(struct hax_vm *vm) -{ - int i; - - for (i = 0; i < HAX_MAX_VCPU; i++) - if (vm->vcpus[i]) { - fprintf(stderr, "VCPU should be cleaned before vm clean\n"); - return -1; - } - hax_close_fd(vm->fd); - g_free(vm); - hax_global.vm = NULL; - return 0; -} - -static void hax_handle_interrupt(CPUState *cpu, int mask) -{ - cpu->interrupt_request |= mask; - - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - } -} - -static int hax_init(ram_addr_t ram_size) -{ - struct hax_state *hax = NULL; - struct hax_qemu_version qversion; - int ret; - - hax = &hax_global; - - memset(hax, 0, sizeof(struct hax_state)); - hax->mem_quota = ram_size; - - hax->fd = hax_mod_open(); - if (hax_invalid_fd(hax->fd)) { - hax->fd = 0; - ret = -ENODEV; - goto error; - } - - ret = hax_get_capability(hax); - - if (ret) { - if (ret != -ENOSPC) { - ret = -EINVAL; - } - goto error; - } - - if (!hax_version_support(hax)) { - ret = -EINVAL; - goto error; - } - - hax->vm = hax_vm_create(hax); - if (!hax->vm) { - fprintf(stderr, "Failed to create HAX VM\n"); - ret = -EINVAL; - goto error; - } - - hax_memory_init(); - - qversion.cur_version = hax_cur_version; - qversion.min_version = hax_min_version; - hax_notify_qemu_version(hax->vm->fd, &qversion); - cpu_interrupt_handler = hax_handle_interrupt; - - return ret; - error: - if (hax->vm) { - hax_vm_destroy(hax->vm); - } - if (hax->fd) { - hax_mod_close(hax); - } - - return ret; -} - -static int hax_accel_init(MachineState *ms) -{ - int ret = hax_init(ms->ram_size); - - if (ret && (ret != -ENOSPC)) { - fprintf(stderr, "No accelerator found.\n"); - } else { - fprintf(stdout, "HAX is %s and emulator runs in %s mode.\n", - !ret ? "working" : "not working", - !ret ? "fast virt" : "emulation"); - } - return ret; -} - -static int hax_handle_fastmmio(CPUArchState *env, struct hax_fastmmio *hft) -{ - if (hft->direction < 2) { - cpu_physical_memory_rw(hft->gpa, (uint8_t *) &hft->value, hft->size, - hft->direction); - } else { - /* - * HAX API v4 supports transferring data between two MMIO addresses, - * hft->gpa and hft->gpa2 (instructions such as MOVS require this): - * hft->direction == 2: gpa ==> gpa2 - */ - uint64_t value; - cpu_physical_memory_rw(hft->gpa, (uint8_t *) &value, hft->size, 0); - cpu_physical_memory_rw(hft->gpa2, (uint8_t *) &value, hft->size, 1); - } - - return 0; -} - -static int hax_handle_io(CPUArchState *env, uint32_t df, uint16_t port, - int direction, int size, int count, void *buffer) -{ - uint8_t *ptr; - int i; - MemTxAttrs attrs = { 0 }; - - if (!df) { - ptr = (uint8_t *) buffer; - } else { - ptr = buffer + size * count - size; - } - for (i = 0; i < count; i++) { - address_space_rw(&address_space_io, port, attrs, - ptr, size, direction == HAX_EXIT_IO_OUT); - if (!df) { - ptr += size; - } else { - ptr -= size; - } - } - - return 0; -} - -static int hax_vcpu_interrupt(CPUArchState *env) -{ - CPUState *cpu = ENV_GET_CPU(env); - struct hax_vcpu_state *vcpu = cpu->hax_vcpu; - struct hax_tunnel *ht = vcpu->tunnel; - - /* - * Try to inject an interrupt if the guest can accept it - * Unlike KVM, HAX kernel check for the eflags, instead of qemu - */ - if (ht->ready_for_interrupt_injection && - (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - int irq; - - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - hax_inject_interrupt(env, irq); - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - } - } - - /* If we have an interrupt but the guest is not ready to receive an - * interrupt, request an interrupt window exit. This will - * cause a return to userspace as soon as the guest is ready to - * receive interrupts. */ - if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - ht->request_interrupt_window = 1; - } else { - ht->request_interrupt_window = 0; - } - return 0; -} - -void hax_raise_event(CPUState *cpu) -{ - struct hax_vcpu_state *vcpu = cpu->hax_vcpu; - - if (!vcpu) { - return; - } - vcpu->tunnel->user_event_pending = 1; -} - -/* - * Ask hax kernel module to run the CPU for us till: - * 1. Guest crash or shutdown - * 2. Need QEMU's emulation like guest execute MMIO instruction - * 3. Guest execute HLT - * 4. QEMU have Signal/event pending - * 5. An unknown VMX exit happens - */ -static int hax_vcpu_hax_exec(CPUArchState *env) -{ - int ret = 0; - CPUState *cpu = ENV_GET_CPU(env); - X86CPU *x86_cpu = X86_CPU(cpu); - struct hax_vcpu_state *vcpu = cpu->hax_vcpu; - struct hax_tunnel *ht = vcpu->tunnel; - - if (!hax_enabled()) { - DPRINTF("Trying to vcpu execute at eip:" TARGET_FMT_lx "\n", env->eip); - return 0; - } - - cpu->halted = 0; - - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; - apic_poll_irq(x86_cpu->apic_state); - } - - if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { - DPRINTF("\nhax_vcpu_hax_exec: handling INIT for %d\n", - cpu->cpu_index); - do_cpu_init(x86_cpu); - hax_vcpu_sync_state(env, 1); - } - - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { - DPRINTF("hax_vcpu_hax_exec: handling SIPI for %d\n", - cpu->cpu_index); - hax_vcpu_sync_state(env, 0); - do_cpu_sipi(x86_cpu); - hax_vcpu_sync_state(env, 1); - } - - do { - int hax_ret; - - if (cpu->exit_request) { - ret = 1; - break; - } - - hax_vcpu_interrupt(env); - - qemu_mutex_unlock_iothread(); - cpu_exec_start(cpu); - hax_ret = hax_vcpu_run(vcpu); - cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); - - /* Simply continue the vcpu_run if system call interrupted */ - if (hax_ret == -EINTR || hax_ret == -EAGAIN) { - DPRINTF("io window interrupted\n"); - continue; - } - - if (hax_ret < 0) { - fprintf(stderr, "vcpu run failed for vcpu %x\n", vcpu->vcpu_id); - abort(); - } - switch (ht->_exit_status) { - case HAX_EXIT_IO: - ret = hax_handle_io(env, ht->pio._df, ht->pio._port, - ht->pio._direction, - ht->pio._size, ht->pio._count, vcpu->iobuf); - break; - case HAX_EXIT_FAST_MMIO: - ret = hax_handle_fastmmio(env, (struct hax_fastmmio *) vcpu->iobuf); - break; - /* Guest state changed, currently only for shutdown */ - case HAX_EXIT_STATECHANGE: - fprintf(stdout, "VCPU shutdown request\n"); - qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); - hax_vcpu_sync_state(env, 0); - ret = 1; - break; - case HAX_EXIT_UNKNOWN_VMEXIT: - fprintf(stderr, "Unknown VMX exit %x from guest\n", - ht->_exit_reason); - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - hax_vcpu_sync_state(env, 0); - cpu_dump_state(cpu, stderr, fprintf, 0); - ret = -1; - break; - case HAX_EXIT_HLT: - if (!(cpu->interrupt_request & CPU_INTERRUPT_HARD) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { - /* hlt instruction with interrupt disabled is shutdown */ - env->eflags |= IF_MASK; - cpu->halted = 1; - cpu->exception_index = EXCP_HLT; - ret = 1; - } - break; - /* these situations will continue to hax module */ - case HAX_EXIT_INTERRUPT: - case HAX_EXIT_PAUSED: - break; - case HAX_EXIT_MMIO: - /* Should not happen on UG system */ - fprintf(stderr, "HAX: unsupported MMIO emulation\n"); - ret = -1; - break; - case HAX_EXIT_REAL: - /* Should not happen on UG system */ - fprintf(stderr, "HAX: unimplemented real mode emulation\n"); - ret = -1; - break; - default: - fprintf(stderr, "Unknown exit %x from HAX\n", ht->_exit_status); - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - hax_vcpu_sync_state(env, 0); - cpu_dump_state(cpu, stderr, fprintf, 0); - ret = 1; - break; - } - } while (!ret); - - if (cpu->exit_request) { - cpu->exit_request = 0; - cpu->exception_index = EXCP_INTERRUPT; - } - return ret < 0; -} - -static void do_hax_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - CPUArchState *env = cpu->env_ptr; - - hax_arch_get_registers(env); - cpu->vcpu_dirty = true; -} - -void hax_cpu_synchronize_state(CPUState *cpu) -{ - if (!cpu->vcpu_dirty) { - run_on_cpu(cpu, do_hax_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -static void do_hax_cpu_synchronize_post_reset(CPUState *cpu, - run_on_cpu_data arg) -{ - CPUArchState *env = cpu->env_ptr; - - hax_vcpu_sync_state(env, 1); - cpu->vcpu_dirty = false; -} - -void hax_cpu_synchronize_post_reset(CPUState *cpu) -{ - run_on_cpu(cpu, do_hax_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -static void do_hax_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) -{ - CPUArchState *env = cpu->env_ptr; - - hax_vcpu_sync_state(env, 1); - cpu->vcpu_dirty = false; -} - -void hax_cpu_synchronize_post_init(CPUState *cpu) -{ - run_on_cpu(cpu, do_hax_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - -static void do_hax_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) -{ - cpu->vcpu_dirty = true; -} - -void hax_cpu_synchronize_pre_loadvm(CPUState *cpu) -{ - run_on_cpu(cpu, do_hax_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); -} - -int hax_smp_cpu_exec(CPUState *cpu) -{ - CPUArchState *env = (CPUArchState *) (cpu->env_ptr); - int fatal; - int ret; - - while (1) { - if (cpu->exception_index >= EXCP_INTERRUPT) { - ret = cpu->exception_index; - cpu->exception_index = -1; - break; - } - - fatal = hax_vcpu_hax_exec(env); - - if (fatal) { - fprintf(stderr, "Unsupported HAX vcpu return\n"); - abort(); - } - } - - return ret; -} - -static void set_v8086_seg(struct segment_desc_t *lhs, const SegmentCache *rhs) -{ - memset(lhs, 0, sizeof(struct segment_desc_t)); - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->type = 3; - lhs->present = 1; - lhs->dpl = 3; - lhs->operand_size = 0; - lhs->desc = 1; - lhs->long_mode = 0; - lhs->granularity = 0; - lhs->available = 0; -} - -static void get_seg(SegmentCache *lhs, const struct segment_desc_t *rhs) -{ - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->flags = (rhs->type << DESC_TYPE_SHIFT) - | (rhs->present * DESC_P_MASK) - | (rhs->dpl << DESC_DPL_SHIFT) - | (rhs->operand_size << DESC_B_SHIFT) - | (rhs->desc * DESC_S_MASK) - | (rhs->long_mode << DESC_L_SHIFT) - | (rhs->granularity * DESC_G_MASK) | (rhs->available * DESC_AVL_MASK); -} - -static void set_seg(struct segment_desc_t *lhs, const SegmentCache *rhs) -{ - unsigned flags = rhs->flags; - - memset(lhs, 0, sizeof(struct segment_desc_t)); - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; - lhs->present = (flags & DESC_P_MASK) != 0; - lhs->dpl = rhs->selector & 3; - lhs->operand_size = (flags >> DESC_B_SHIFT) & 1; - lhs->desc = (flags & DESC_S_MASK) != 0; - lhs->long_mode = (flags >> DESC_L_SHIFT) & 1; - lhs->granularity = (flags & DESC_G_MASK) != 0; - lhs->available = (flags & DESC_AVL_MASK) != 0; -} - -static void hax_getput_reg(uint64_t *hax_reg, target_ulong *qemu_reg, int set) -{ - target_ulong reg = *hax_reg; - - if (set) { - *hax_reg = *qemu_reg; - } else { - *qemu_reg = reg; - } -} - -/* The sregs has been synced with HAX kernel already before this call */ -static int hax_get_segments(CPUArchState *env, struct vcpu_state_t *sregs) -{ - get_seg(&env->segs[R_CS], &sregs->_cs); - get_seg(&env->segs[R_DS], &sregs->_ds); - get_seg(&env->segs[R_ES], &sregs->_es); - get_seg(&env->segs[R_FS], &sregs->_fs); - get_seg(&env->segs[R_GS], &sregs->_gs); - get_seg(&env->segs[R_SS], &sregs->_ss); - - get_seg(&env->tr, &sregs->_tr); - get_seg(&env->ldt, &sregs->_ldt); - env->idt.limit = sregs->_idt.limit; - env->idt.base = sregs->_idt.base; - env->gdt.limit = sregs->_gdt.limit; - env->gdt.base = sregs->_gdt.base; - return 0; -} - -static int hax_set_segments(CPUArchState *env, struct vcpu_state_t *sregs) -{ - if ((env->eflags & VM_MASK)) { - set_v8086_seg(&sregs->_cs, &env->segs[R_CS]); - set_v8086_seg(&sregs->_ds, &env->segs[R_DS]); - set_v8086_seg(&sregs->_es, &env->segs[R_ES]); - set_v8086_seg(&sregs->_fs, &env->segs[R_FS]); - set_v8086_seg(&sregs->_gs, &env->segs[R_GS]); - set_v8086_seg(&sregs->_ss, &env->segs[R_SS]); - } else { - set_seg(&sregs->_cs, &env->segs[R_CS]); - set_seg(&sregs->_ds, &env->segs[R_DS]); - set_seg(&sregs->_es, &env->segs[R_ES]); - set_seg(&sregs->_fs, &env->segs[R_FS]); - set_seg(&sregs->_gs, &env->segs[R_GS]); - set_seg(&sregs->_ss, &env->segs[R_SS]); - - if (env->cr[0] & CR0_PE_MASK) { - /* force ss cpl to cs cpl */ - sregs->_ss.selector = (sregs->_ss.selector & ~3) | - (sregs->_cs.selector & 3); - sregs->_ss.dpl = sregs->_ss.selector & 3; - } - } - - set_seg(&sregs->_tr, &env->tr); - set_seg(&sregs->_ldt, &env->ldt); - sregs->_idt.limit = env->idt.limit; - sregs->_idt.base = env->idt.base; - sregs->_gdt.limit = env->gdt.limit; - sregs->_gdt.base = env->gdt.base; - return 0; -} - -static int hax_sync_vcpu_register(CPUArchState *env, int set) -{ - struct vcpu_state_t regs; - int ret; - memset(®s, 0, sizeof(struct vcpu_state_t)); - - if (!set) { - ret = hax_sync_vcpu_state(env, ®s, 0); - if (ret < 0) { - return -1; - } - } - - /* generic register */ - hax_getput_reg(®s._rax, &env->regs[R_EAX], set); - hax_getput_reg(®s._rbx, &env->regs[R_EBX], set); - hax_getput_reg(®s._rcx, &env->regs[R_ECX], set); - hax_getput_reg(®s._rdx, &env->regs[R_EDX], set); - hax_getput_reg(®s._rsi, &env->regs[R_ESI], set); - hax_getput_reg(®s._rdi, &env->regs[R_EDI], set); - hax_getput_reg(®s._rsp, &env->regs[R_ESP], set); - hax_getput_reg(®s._rbp, &env->regs[R_EBP], set); -#ifdef TARGET_X86_64 - hax_getput_reg(®s._r8, &env->regs[8], set); - hax_getput_reg(®s._r9, &env->regs[9], set); - hax_getput_reg(®s._r10, &env->regs[10], set); - hax_getput_reg(®s._r11, &env->regs[11], set); - hax_getput_reg(®s._r12, &env->regs[12], set); - hax_getput_reg(®s._r13, &env->regs[13], set); - hax_getput_reg(®s._r14, &env->regs[14], set); - hax_getput_reg(®s._r15, &env->regs[15], set); -#endif - hax_getput_reg(®s._rflags, &env->eflags, set); - hax_getput_reg(®s._rip, &env->eip, set); - - if (set) { - regs._cr0 = env->cr[0]; - regs._cr2 = env->cr[2]; - regs._cr3 = env->cr[3]; - regs._cr4 = env->cr[4]; - hax_set_segments(env, ®s); - } else { - env->cr[0] = regs._cr0; - env->cr[2] = regs._cr2; - env->cr[3] = regs._cr3; - env->cr[4] = regs._cr4; - hax_get_segments(env, ®s); - } - - if (set) { - ret = hax_sync_vcpu_state(env, ®s, 1); - if (ret < 0) { - return -1; - } - } - return 0; -} - -static void hax_msr_entry_set(struct vmx_msr *item, uint32_t index, - uint64_t value) -{ - item->entry = index; - item->value = value; -} - -static int hax_get_msrs(CPUArchState *env) -{ - struct hax_msr_data md; - struct vmx_msr *msrs = md.entries; - int ret, i, n; - - n = 0; - msrs[n++].entry = MSR_IA32_SYSENTER_CS; - msrs[n++].entry = MSR_IA32_SYSENTER_ESP; - msrs[n++].entry = MSR_IA32_SYSENTER_EIP; - msrs[n++].entry = MSR_IA32_TSC; -#ifdef TARGET_X86_64 - msrs[n++].entry = MSR_EFER; - msrs[n++].entry = MSR_STAR; - msrs[n++].entry = MSR_LSTAR; - msrs[n++].entry = MSR_CSTAR; - msrs[n++].entry = MSR_FMASK; - msrs[n++].entry = MSR_KERNELGSBASE; -#endif - md.nr_msr = n; - ret = hax_sync_msr(env, &md, 0); - if (ret < 0) { - return ret; - } - - for (i = 0; i < md.done; i++) { - switch (msrs[i].entry) { - case MSR_IA32_SYSENTER_CS: - env->sysenter_cs = msrs[i].value; - break; - case MSR_IA32_SYSENTER_ESP: - env->sysenter_esp = msrs[i].value; - break; - case MSR_IA32_SYSENTER_EIP: - env->sysenter_eip = msrs[i].value; - break; - case MSR_IA32_TSC: - env->tsc = msrs[i].value; - break; -#ifdef TARGET_X86_64 - case MSR_EFER: - env->efer = msrs[i].value; - break; - case MSR_STAR: - env->star = msrs[i].value; - break; - case MSR_LSTAR: - env->lstar = msrs[i].value; - break; - case MSR_CSTAR: - env->cstar = msrs[i].value; - break; - case MSR_FMASK: - env->fmask = msrs[i].value; - break; - case MSR_KERNELGSBASE: - env->kernelgsbase = msrs[i].value; - break; -#endif - } - } - - return 0; -} - -static int hax_set_msrs(CPUArchState *env) -{ - struct hax_msr_data md; - struct vmx_msr *msrs; - msrs = md.entries; - int n = 0; - - memset(&md, 0, sizeof(struct hax_msr_data)); - hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); - hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); - hax_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - hax_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); -#ifdef TARGET_X86_64 - hax_msr_entry_set(&msrs[n++], MSR_EFER, env->efer); - hax_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - hax_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); - hax_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); - hax_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); - hax_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); -#endif - md.nr_msr = n; - md.done = 0; - - return hax_sync_msr(env, &md, 1); -} - -static int hax_get_fpu(CPUArchState *env) -{ - struct fx_layout fpu; - int i, ret; - - ret = hax_sync_fpu(env, &fpu, 0); - if (ret < 0) { - return ret; - } - - env->fpstt = (fpu.fsw >> 11) & 7; - env->fpus = fpu.fsw; - env->fpuc = fpu.fcw; - for (i = 0; i < 8; ++i) { - env->fptags[i] = !((fpu.ftw >> i) & 1); - } - memcpy(env->fpregs, fpu.st_mm, sizeof(env->fpregs)); - - for (i = 0; i < 8; i++) { - env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.mmx_1[i][0]); - env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.mmx_1[i][8]); - if (CPU_NB_REGS > 8) { - env->xmm_regs[i + 8].ZMM_Q(0) = ldq_p(&fpu.mmx_2[i][0]); - env->xmm_regs[i + 8].ZMM_Q(1) = ldq_p(&fpu.mmx_2[i][8]); - } - } - env->mxcsr = fpu.mxcsr; - - return 0; -} - -static int hax_set_fpu(CPUArchState *env) -{ - struct fx_layout fpu; - int i; - - memset(&fpu, 0, sizeof(fpu)); - fpu.fsw = env->fpus & ~(7 << 11); - fpu.fsw |= (env->fpstt & 7) << 11; - fpu.fcw = env->fpuc; - - for (i = 0; i < 8; ++i) { - fpu.ftw |= (!env->fptags[i]) << i; - } - - memcpy(fpu.st_mm, env->fpregs, sizeof(env->fpregs)); - for (i = 0; i < 8; i++) { - stq_p(&fpu.mmx_1[i][0], env->xmm_regs[i].ZMM_Q(0)); - stq_p(&fpu.mmx_1[i][8], env->xmm_regs[i].ZMM_Q(1)); - if (CPU_NB_REGS > 8) { - stq_p(&fpu.mmx_2[i][0], env->xmm_regs[i + 8].ZMM_Q(0)); - stq_p(&fpu.mmx_2[i][8], env->xmm_regs[i + 8].ZMM_Q(1)); - } - } - - fpu.mxcsr = env->mxcsr; - - return hax_sync_fpu(env, &fpu, 1); -} - -static int hax_arch_get_registers(CPUArchState *env) -{ - int ret; - - ret = hax_sync_vcpu_register(env, 0); - if (ret < 0) { - return ret; - } - - ret = hax_get_fpu(env); - if (ret < 0) { - return ret; - } - - ret = hax_get_msrs(env); - if (ret < 0) { - return ret; - } - - x86_update_hflags(env); - return 0; -} - -static int hax_arch_set_registers(CPUArchState *env) -{ - int ret; - ret = hax_sync_vcpu_register(env, 1); - - if (ret < 0) { - fprintf(stderr, "Failed to sync vcpu reg\n"); - return ret; - } - ret = hax_set_fpu(env); - if (ret < 0) { - fprintf(stderr, "FPU failed\n"); - return ret; - } - ret = hax_set_msrs(env); - if (ret < 0) { - fprintf(stderr, "MSR failed\n"); - return ret; - } - - return 0; -} - -static void hax_vcpu_sync_state(CPUArchState *env, int modified) -{ - if (hax_enabled()) { - if (modified) { - hax_arch_set_registers(env); - } else { - hax_arch_get_registers(env); - } - } -} - -/* - * much simpler than kvm, at least in first stage because: - * We don't need consider the device pass-through, we don't need - * consider the framebuffer, and we may even remove the bios at all - */ -int hax_sync_vcpus(void) -{ - if (hax_enabled()) { - CPUState *cpu; - - cpu = first_cpu; - if (!cpu) { - return 0; - } - - for (; cpu != NULL; cpu = CPU_NEXT(cpu)) { - int ret; - - ret = hax_arch_set_registers(cpu->env_ptr); - if (ret < 0) { - return ret; - } - } - } - - return 0; -} - -void hax_reset_vcpu_state(void *opaque) -{ - CPUState *cpu; - for (cpu = first_cpu; cpu != NULL; cpu = CPU_NEXT(cpu)) { - cpu->hax_vcpu->tunnel->user_event_pending = 0; - cpu->hax_vcpu->tunnel->ready_for_interrupt_injection = 0; - } -} - -static void hax_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HAX"; - ac->init_machine = hax_accel_init; - ac->allowed = &hax_allowed; -} - -static const TypeInfo hax_accel_type = { - .name = ACCEL_CLASS_NAME("hax"), - .parent = TYPE_ACCEL, - .class_init = hax_accel_class_init, -}; - -static void hax_type_init(void) -{ - type_register_static(&hax_accel_type); -} - -type_init(hax_type_init); diff --git a/target/i386/hax-darwin.c b/target/i386/hax-darwin.c deleted file mode 100644 index a5426a6dac..0000000000 --- a/target/i386/hax-darwin.c +++ /dev/null @@ -1,323 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -/* HAX module interface - darwin version */ -#include "qemu/osdep.h" -#include <sys/ioctl.h> - -#include "target/i386/hax-i386.h" - -hax_fd hax_mod_open(void) -{ - int fd = open("/dev/HAX", O_RDWR); - if (fd == -1) { - fprintf(stderr, "Failed to open the hax module\n"); - } - - fcntl(fd, F_SETFD, FD_CLOEXEC); - - return fd; -} - -int hax_populate_ram(uint64_t va, uint64_t size) -{ - int ret; - - if (!hax_global.vm || !hax_global.vm->fd) { - fprintf(stderr, "Allocate memory before vm create?\n"); - return -EINVAL; - } - - if (hax_global.supports_64bit_ramblock) { - struct hax_ramblock_info ramblock = { - .start_va = va, - .size = size, - .reserved = 0 - }; - - ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ADD_RAMBLOCK, &ramblock); - } else { - struct hax_alloc_ram_info info = { - .size = (uint32_t)size, - .pad = 0, - .va = va - }; - - ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_ALLOC_RAM, &info); - } - if (ret < 0) { - fprintf(stderr, "Failed to register RAM block: ret=%d, va=0x%" PRIx64 - ", size=0x%" PRIx64 ", method=%s\n", ret, va, size, - hax_global.supports_64bit_ramblock ? "new" : "legacy"); - return ret; - } - return 0; -} - -int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags) -{ - struct hax_set_ram_info info; - int ret; - - info.pa_start = start_pa; - info.size = size; - info.va = host_va; - info.flags = (uint8_t) flags; - - ret = ioctl(hax_global.vm->fd, HAX_VM_IOCTL_SET_RAM, &info); - if (ret < 0) { - return -errno; - } - return 0; -} - -int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap) -{ - int ret; - - ret = ioctl(hax->fd, HAX_IOCTL_CAPABILITY, cap); - if (ret == -1) { - fprintf(stderr, "Failed to get HAX capability\n"); - return -errno; - } - - return 0; -} - -int hax_mod_version(struct hax_state *hax, struct hax_module_version *version) -{ - int ret; - - ret = ioctl(hax->fd, HAX_IOCTL_VERSION, version); - if (ret == -1) { - fprintf(stderr, "Failed to get HAX version\n"); - return -errno; - } - - return 0; -} - -static char *hax_vm_devfs_string(int vm_id) -{ - char *name; - - if (vm_id > MAX_VM_ID) { - fprintf(stderr, "Too big VM id\n"); - return NULL; - } - -#define HAX_VM_DEVFS "/dev/hax_vm/vmxx" - name = g_strdup(HAX_VM_DEVFS); - if (!name) { - return NULL; - } - - snprintf(name, sizeof HAX_VM_DEVFS, "/dev/hax_vm/vm%02d", vm_id); - return name; -} - -static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id) -{ - char *name; - - if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) { - fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id); - return NULL; - } - -#define HAX_VCPU_DEVFS "/dev/hax_vmxx/vcpuxx" - name = g_strdup(HAX_VCPU_DEVFS); - if (!name) { - return NULL; - } - - snprintf(name, sizeof HAX_VCPU_DEVFS, "/dev/hax_vm%02d/vcpu%02d", - vm_id, vcpu_id); - return name; -} - -int hax_host_create_vm(struct hax_state *hax, int *vmid) -{ - int ret; - int vm_id = 0; - - if (hax_invalid_fd(hax->fd)) { - return -EINVAL; - } - - if (hax->vm) { - return 0; - } - - ret = ioctl(hax->fd, HAX_IOCTL_CREATE_VM, &vm_id); - *vmid = vm_id; - return ret; -} - -hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id) -{ - hax_fd fd; - char *vm_name = NULL; - - vm_name = hax_vm_devfs_string(vm_id); - if (!vm_name) { - return -1; - } - - fd = open(vm_name, O_RDWR); - g_free(vm_name); - - fcntl(fd, F_SETFD, FD_CLOEXEC); - - return fd; -} - -int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion) -{ - int ret; - - if (hax_invalid_fd(vm_fd)) { - return -EINVAL; - } - - ret = ioctl(vm_fd, HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, qversion); - - if (ret < 0) { - fprintf(stderr, "Failed to notify qemu API version\n"); - return ret; - } - return 0; -} - -/* Simply assume the size should be bigger than the hax_tunnel, - * since the hax_tunnel can be extended later with compatibility considered - */ -int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid) -{ - int ret; - - ret = ioctl(vm_fd, HAX_VM_IOCTL_VCPU_CREATE, &vcpuid); - if (ret < 0) { - fprintf(stderr, "Failed to create vcpu %x\n", vcpuid); - } - - return ret; -} - -hax_fd hax_host_open_vcpu(int vmid, int vcpuid) -{ - char *devfs_path = NULL; - hax_fd fd; - - devfs_path = hax_vcpu_devfs_string(vmid, vcpuid); - if (!devfs_path) { - fprintf(stderr, "Failed to get the devfs\n"); - return -EINVAL; - } - - fd = open(devfs_path, O_RDWR); - g_free(devfs_path); - if (fd < 0) { - fprintf(stderr, "Failed to open the vcpu devfs\n"); - } - fcntl(fd, F_SETFD, FD_CLOEXEC); - return fd; -} - -int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu) -{ - int ret; - struct hax_tunnel_info info; - - ret = ioctl(vcpu->fd, HAX_VCPU_IOCTL_SETUP_TUNNEL, &info); - if (ret) { - fprintf(stderr, "Failed to setup the hax tunnel\n"); - return ret; - } - - if (!valid_hax_tunnel_size(info.size)) { - fprintf(stderr, "Invalid hax tunnel size %x\n", info.size); - ret = -EINVAL; - return ret; - } - - vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va); - vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va); - return 0; -} - -int hax_vcpu_run(struct hax_vcpu_state *vcpu) -{ - return ioctl(vcpu->fd, HAX_VCPU_IOCTL_RUN, NULL); -} - -int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set) -{ - int ret, fd; - - fd = hax_vcpu_get_fd(env); - if (fd <= 0) { - return -1; - } - - if (set) { - ret = ioctl(fd, HAX_VCPU_IOCTL_SET_FPU, fl); - } else { - ret = ioctl(fd, HAX_VCPU_IOCTL_GET_FPU, fl); - } - return ret; -} - -int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set) -{ - int ret, fd; - - fd = hax_vcpu_get_fd(env); - if (fd <= 0) { - return -1; - } - if (set) { - ret = ioctl(fd, HAX_VCPU_IOCTL_SET_MSRS, msrs); - } else { - ret = ioctl(fd, HAX_VCPU_IOCTL_GET_MSRS, msrs); - } - return ret; -} - -int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, int set) -{ - int ret, fd; - - fd = hax_vcpu_get_fd(env); - if (fd <= 0) { - return -1; - } - - if (set) { - ret = ioctl(fd, HAX_VCPU_SET_REGS, state); - } else { - ret = ioctl(fd, HAX_VCPU_GET_REGS, state); - } - return ret; -} - -int hax_inject_interrupt(CPUArchState *env, int vector) -{ - int fd; - - fd = hax_vcpu_get_fd(env); - if (fd <= 0) { - return -1; - } - - return ioctl(fd, HAX_VCPU_IOCTL_INTERRUPT, &vector); -} diff --git a/target/i386/hax-darwin.h b/target/i386/hax-darwin.h deleted file mode 100644 index 51af0e8c88..0000000000 --- a/target/i386/hax-darwin.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * Xin Xiaohui<xiaohui.xin@intel.com> - * Zhang Xiantao<xiantao.zhang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef TARGET_I386_HAX_DARWIN_H -#define TARGET_I386_HAX_DARWIN_H - -#include <sys/ioctl.h> - -#define HAX_INVALID_FD (-1) -static inline int hax_invalid_fd(hax_fd fd) -{ - return fd <= 0; -} - -static inline void hax_mod_close(struct hax_state *hax) -{ - close(hax->fd); -} - -static inline void hax_close_fd(hax_fd fd) -{ - close(fd); -} - -/* HAX model level ioctl */ -#define HAX_IOCTL_VERSION _IOWR(0, 0x20, struct hax_module_version) -#define HAX_IOCTL_CREATE_VM _IOWR(0, 0x21, uint32_t) -#define HAX_IOCTL_DESTROY_VM _IOW(0, 0x22, uint32_t) -#define HAX_IOCTL_CAPABILITY _IOR(0, 0x23, struct hax_capabilityinfo) - -#define HAX_VM_IOCTL_VCPU_CREATE _IOWR(0, 0x80, uint32_t) -#define HAX_VM_IOCTL_ALLOC_RAM _IOWR(0, 0x81, struct hax_alloc_ram_info) -#define HAX_VM_IOCTL_SET_RAM _IOWR(0, 0x82, struct hax_set_ram_info) -#define HAX_VM_IOCTL_VCPU_DESTROY _IOW(0, 0x83, uint32_t) -#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION _IOW(0, 0x84, struct hax_qemu_version) -#define HAX_VM_IOCTL_ADD_RAMBLOCK _IOW(0, 0x85, struct hax_ramblock_info) - -#define HAX_VCPU_IOCTL_RUN _IO(0, 0xc0) -#define HAX_VCPU_IOCTL_SET_MSRS _IOWR(0, 0xc1, struct hax_msr_data) -#define HAX_VCPU_IOCTL_GET_MSRS _IOWR(0, 0xc2, struct hax_msr_data) - -#define HAX_VCPU_IOCTL_SET_FPU _IOW(0, 0xc3, struct fx_layout) -#define HAX_VCPU_IOCTL_GET_FPU _IOR(0, 0xc4, struct fx_layout) - -#define HAX_VCPU_IOCTL_SETUP_TUNNEL _IOWR(0, 0xc5, struct hax_tunnel_info) -#define HAX_VCPU_IOCTL_INTERRUPT _IOWR(0, 0xc6, uint32_t) -#define HAX_VCPU_SET_REGS _IOWR(0, 0xc7, struct vcpu_state_t) -#define HAX_VCPU_GET_REGS _IOWR(0, 0xc8, struct vcpu_state_t) - -#endif /* TARGET_I386_HAX_DARWIN_H */ diff --git a/target/i386/hax-i386.h b/target/i386/hax-i386.h deleted file mode 100644 index 6abc156f88..0000000000 --- a/target/i386/hax-i386.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef _HAX_I386_H -#define _HAX_I386_H - -#include "cpu.h" -#include "sysemu/hax.h" - -#ifdef CONFIG_DARWIN -typedef int hax_fd; -#endif - -#ifdef CONFIG_WIN32 -typedef HANDLE hax_fd; -#endif - -extern struct hax_state hax_global; -struct hax_vcpu_state { - hax_fd fd; - int vcpu_id; - struct hax_tunnel *tunnel; - unsigned char *iobuf; -}; - -struct hax_state { - hax_fd fd; /* the global hax device interface */ - uint32_t version; - struct hax_vm *vm; - uint64_t mem_quota; - bool supports_64bit_ramblock; -}; - -#define HAX_MAX_VCPU 0x10 -#define MAX_VM_ID 0x40 -#define MAX_VCPU_ID 0x40 - -struct hax_vm { - hax_fd fd; - int id; - struct hax_vcpu_state *vcpus[HAX_MAX_VCPU]; -}; - -#ifdef NEED_CPU_H -/* Functions exported to host specific mode */ -hax_fd hax_vcpu_get_fd(CPUArchState *env); -int valid_hax_tunnel_size(uint16_t size); - -/* Host specific functions */ -int hax_mod_version(struct hax_state *hax, struct hax_module_version *version); -int hax_inject_interrupt(CPUArchState *env, int vector); -struct hax_vm *hax_vm_create(struct hax_state *hax); -int hax_vcpu_run(struct hax_vcpu_state *vcpu); -int hax_vcpu_create(int id); -int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, - int set); -int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set); -int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set); -#endif - -int hax_vm_destroy(struct hax_vm *vm); -int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap); -int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion); -int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags); - -/* Common host function */ -int hax_host_create_vm(struct hax_state *hax, int *vm_id); -hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id); -int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid); -hax_fd hax_host_open_vcpu(int vmid, int vcpuid); -int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu); -hax_fd hax_mod_open(void); -void hax_memory_init(void); - - -#ifdef CONFIG_DARWIN -#include "target/i386/hax-darwin.h" -#endif - -#ifdef CONFIG_WIN32 -#include "target/i386/hax-windows.h" -#endif - -#include "target/i386/hax-interface.h" - -#endif diff --git a/target/i386/hax-interface.h b/target/i386/hax-interface.h deleted file mode 100644 index 93d5fcb1dc..0000000000 --- a/target/i386/hax-interface.h +++ /dev/null @@ -1,369 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * Xin Xiaohui<xiaohui.xin@intel.com> - * Zhang Xiantao<xiantao.zhang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -/* Interface with HAX kernel module */ - -#ifndef _HAX_INTERFACE_H -#define _HAX_INTERFACE_H - -/* fx_layout has 3 formats table 3-56, 512bytes */ -struct fx_layout { - uint16_t fcw; - uint16_t fsw; - uint8_t ftw; - uint8_t res1; - uint16_t fop; - union { - struct { - uint32_t fip; - uint16_t fcs; - uint16_t res2; - }; - uint64_t fpu_ip; - }; - union { - struct { - uint32_t fdp; - uint16_t fds; - uint16_t res3; - }; - uint64_t fpu_dp; - }; - uint32_t mxcsr; - uint32_t mxcsr_mask; - uint8_t st_mm[8][16]; - uint8_t mmx_1[8][16]; - uint8_t mmx_2[8][16]; - uint8_t pad[96]; -} __attribute__ ((aligned(8))); - -struct vmx_msr { - uint64_t entry; - uint64_t value; -} __attribute__ ((__packed__)); - -/* - * Fixed array is not good, but it makes Mac support a bit easier by avoiding - * memory map or copyin staff. - */ -#define HAX_MAX_MSR_ARRAY 0x20 -struct hax_msr_data { - uint16_t nr_msr; - uint16_t done; - uint16_t pad[2]; - struct vmx_msr entries[HAX_MAX_MSR_ARRAY]; -} __attribute__ ((__packed__)); - -union interruptibility_state_t { - uint32_t raw; - struct { - uint32_t sti_blocking:1; - uint32_t movss_blocking:1; - uint32_t smi_blocking:1; - uint32_t nmi_blocking:1; - uint32_t reserved:28; - }; - uint64_t pad; -}; - -typedef union interruptibility_state_t interruptibility_state_t; - -/* Segment descriptor */ -struct segment_desc_t { - uint16_t selector; - uint16_t _dummy; - uint32_t limit; - uint64_t base; - union { - struct { - uint32_t type:4; - uint32_t desc:1; - uint32_t dpl:2; - uint32_t present:1; - uint32_t:4; - uint32_t available:1; - uint32_t long_mode:1; - uint32_t operand_size:1; - uint32_t granularity:1; - uint32_t null:1; - uint32_t:15; - }; - uint32_t ar; - }; - uint32_t ipad; -}; - -typedef struct segment_desc_t segment_desc_t; - -struct vcpu_state_t { - union { - uint64_t _regs[16]; - struct { - union { - struct { - uint8_t _al, _ah; - }; - uint16_t _ax; - uint32_t _eax; - uint64_t _rax; - }; - union { - struct { - uint8_t _cl, _ch; - }; - uint16_t _cx; - uint32_t _ecx; - uint64_t _rcx; - }; - union { - struct { - uint8_t _dl, _dh; - }; - uint16_t _dx; - uint32_t _edx; - uint64_t _rdx; - }; - union { - struct { - uint8_t _bl, _bh; - }; - uint16_t _bx; - uint32_t _ebx; - uint64_t _rbx; - }; - union { - uint16_t _sp; - uint32_t _esp; - uint64_t _rsp; - }; - union { - uint16_t _bp; - uint32_t _ebp; - uint64_t _rbp; - }; - union { - uint16_t _si; - uint32_t _esi; - uint64_t _rsi; - }; - union { - uint16_t _di; - uint32_t _edi; - uint64_t _rdi; - }; - - uint64_t _r8; - uint64_t _r9; - uint64_t _r10; - uint64_t _r11; - uint64_t _r12; - uint64_t _r13; - uint64_t _r14; - uint64_t _r15; - }; - }; - - union { - uint32_t _eip; - uint64_t _rip; - }; - - union { - uint32_t _eflags; - uint64_t _rflags; - }; - - segment_desc_t _cs; - segment_desc_t _ss; - segment_desc_t _ds; - segment_desc_t _es; - segment_desc_t _fs; - segment_desc_t _gs; - segment_desc_t _ldt; - segment_desc_t _tr; - - segment_desc_t _gdt; - segment_desc_t _idt; - - uint64_t _cr0; - uint64_t _cr2; - uint64_t _cr3; - uint64_t _cr4; - - uint64_t _dr0; - uint64_t _dr1; - uint64_t _dr2; - uint64_t _dr3; - uint64_t _dr6; - uint64_t _dr7; - uint64_t _pde; - - uint32_t _efer; - - uint32_t _sysenter_cs; - uint64_t _sysenter_eip; - uint64_t _sysenter_esp; - - uint32_t _activity_state; - uint32_t pad; - interruptibility_state_t _interruptibility_state; -}; - -/* HAX exit status */ -enum exit_status { - /* IO port request */ - HAX_EXIT_IO = 1, - /* MMIO instruction emulation */ - HAX_EXIT_MMIO, - /* QEMU emulation mode request, currently means guest enter non-PG mode */ - HAX_EXIT_REAL, - /* - * Interrupt window open, qemu can inject interrupt now - * Also used when signal pending since at that time qemu usually need - * check interrupt - */ - HAX_EXIT_INTERRUPT, - /* Unknown vmexit, mostly trigger reboot */ - HAX_EXIT_UNKNOWN_VMEXIT, - /* HALT from guest */ - HAX_EXIT_HLT, - /* Reboot request, like because of tripple fault in guest */ - HAX_EXIT_STATECHANGE, - /* the vcpu is now only paused when destroy, so simply return to hax */ - HAX_EXIT_PAUSED, - HAX_EXIT_FAST_MMIO, -}; - -/* - * The interface definition: - * 1. vcpu_run execute will return 0 on success, otherwise mean failed - * 2. exit_status return the exit reason, as stated in enum exit_status - * 3. exit_reason is the vmx exit reason - */ -struct hax_tunnel { - uint32_t _exit_reason; - uint32_t _exit_flag; - uint32_t _exit_status; - uint32_t user_event_pending; - int ready_for_interrupt_injection; - int request_interrupt_window; - union { - struct { - /* 0: read, 1: write */ -#define HAX_EXIT_IO_IN 1 -#define HAX_EXIT_IO_OUT 0 - uint8_t _direction; - uint8_t _df; - uint16_t _size; - uint16_t _port; - uint16_t _count; - uint8_t _flags; - uint8_t _pad0; - uint16_t _pad1; - uint32_t _pad2; - uint64_t _vaddr; - } pio; - struct { - uint64_t gla; - } mmio; - struct { - } state; - }; -} __attribute__ ((__packed__)); - -struct hax_module_version { - uint32_t compat_version; - uint32_t cur_version; -} __attribute__ ((__packed__)); - -/* This interface is support only after API version 2 */ -struct hax_qemu_version { - /* Current API version in QEMU */ - uint32_t cur_version; - /* The minimum API version supported by QEMU */ - uint32_t min_version; -} __attribute__ ((__packed__)); - -/* The mac specfic interface to qemu, mostly is ioctl related */ -struct hax_tunnel_info { - uint64_t va; - uint64_t io_va; - uint16_t size; - uint16_t pad[3]; -} __attribute__ ((__packed__)); - -struct hax_alloc_ram_info { - uint32_t size; - uint32_t pad; - uint64_t va; -} __attribute__ ((__packed__)); - -struct hax_ramblock_info { - uint64_t start_va; - uint64_t size; - uint64_t reserved; -} __attribute__ ((__packed__)); - -#define HAX_RAM_INFO_ROM 0x01 /* Read-Only */ -#define HAX_RAM_INFO_INVALID 0x80 /* Unmapped, usually used for MMIO */ -struct hax_set_ram_info { - uint64_t pa_start; - uint32_t size; - uint8_t flags; - uint8_t pad[3]; - uint64_t va; -} __attribute__ ((__packed__)); - -#define HAX_CAP_STATUS_WORKING 0x1 -#define HAX_CAP_STATUS_NOTWORKING 0x0 -#define HAX_CAP_WORKSTATUS_MASK 0x1 - -#define HAX_CAP_FAILREASON_VT 0x1 -#define HAX_CAP_FAILREASON_NX 0x2 - -#define HAX_CAP_MEMQUOTA 0x2 -#define HAX_CAP_UG 0x4 -#define HAX_CAP_64BIT_RAMBLOCK 0x8 - -struct hax_capabilityinfo { - /* bit 0: 1 - working - * 0 - not working, possibly because NT/NX disabled - * bit 1: 1 - memory limitation working - * 0 - no memory limitation - */ - uint16_t wstatus; - /* valid when not working - * bit 0: VT not enabeld - * bit 1: NX not enabled*/ - uint16_t winfo; - uint32_t pad; - uint64_t mem_quota; -} __attribute__ ((__packed__)); - -struct hax_fastmmio { - uint64_t gpa; - union { - uint64_t value; - uint64_t gpa2; /* since HAX API v4 */ - }; - uint8_t size; - uint8_t direction; - uint16_t reg_index; - uint32_t pad0; - uint64_t _cr0; - uint64_t _cr2; - uint64_t _cr3; - uint64_t _cr4; -} __attribute__ ((__packed__)); -#endif diff --git a/target/i386/hax-mem.c b/target/i386/hax-mem.c deleted file mode 100644 index 5c37e94caa..0000000000 --- a/target/i386/hax-mem.c +++ /dev/null @@ -1,321 +0,0 @@ -/* - * HAX memory mapping operations - * - * Copyright (c) 2015-16 Intel Corporation - * Copyright 2016 Google, Inc. - * - * This work is licensed under the terms of the GNU GPL, version 2. See - * the COPYING file in the top-level directory. - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/address-spaces.h" -#include "qemu/error-report.h" - -#include "target/i386/hax-i386.h" -#include "qemu/queue.h" - -#define DEBUG_HAX_MEM 0 - -#define DPRINTF(fmt, ...) \ - do { \ - if (DEBUG_HAX_MEM) { \ - fprintf(stdout, fmt, ## __VA_ARGS__); \ - } \ - } while (0) - -/** - * HAXMapping: describes a pending guest physical memory mapping - * - * @start_pa: a guest physical address marking the start of the region; must be - * page-aligned - * @size: a guest physical address marking the end of the region; must be - * page-aligned - * @host_va: the host virtual address of the start of the mapping - * @flags: mapping parameters e.g. HAX_RAM_INFO_ROM or HAX_RAM_INFO_INVALID - * @entry: additional fields for linking #HAXMapping instances together - */ -typedef struct HAXMapping { - uint64_t start_pa; - uint32_t size; - uint64_t host_va; - int flags; - QTAILQ_ENTRY(HAXMapping) entry; -} HAXMapping; - -/* - * A doubly-linked list (actually a tail queue) of the pending page mappings - * for the ongoing memory transaction. - * - * It is used to optimize the number of page mapping updates done through the - * kernel module. For example, it's effective when a driver is digging an MMIO - * hole inside an existing memory mapping. It will get a deletion of the whole - * region, then the addition of the 2 remaining RAM areas around the hole and - * finally the memory transaction commit. During the commit, it will effectively - * send to the kernel only the removal of the pages from the MMIO hole after - * having computed locally the result of the deletion and additions. - */ -static QTAILQ_HEAD(HAXMappingListHead, HAXMapping) mappings = - QTAILQ_HEAD_INITIALIZER(mappings); - -/** - * hax_mapping_dump_list: dumps @mappings to stdout (for debugging) - */ -static void hax_mapping_dump_list(void) -{ - HAXMapping *entry; - - DPRINTF("%s updates:\n", __func__); - QTAILQ_FOREACH(entry, &mappings, entry) { - DPRINTF("\t%c 0x%016" PRIx64 "->0x%016" PRIx64 " VA 0x%016" PRIx64 - "%s\n", entry->flags & HAX_RAM_INFO_INVALID ? '-' : '+', - entry->start_pa, entry->start_pa + entry->size, entry->host_va, - entry->flags & HAX_RAM_INFO_ROM ? " ROM" : ""); - } -} - -static void hax_insert_mapping_before(HAXMapping *next, uint64_t start_pa, - uint32_t size, uint64_t host_va, - uint8_t flags) -{ - HAXMapping *entry; - - entry = g_malloc0(sizeof(*entry)); - entry->start_pa = start_pa; - entry->size = size; - entry->host_va = host_va; - entry->flags = flags; - if (!next) { - QTAILQ_INSERT_TAIL(&mappings, entry, entry); - } else { - QTAILQ_INSERT_BEFORE(next, entry, entry); - } -} - -static bool hax_mapping_is_opposite(HAXMapping *entry, uint64_t host_va, - uint8_t flags) -{ - /* removed then added without change for the read-only flag */ - bool nop_flags = (entry->flags ^ flags) == HAX_RAM_INFO_INVALID; - - return (entry->host_va == host_va) && nop_flags; -} - -static void hax_update_mapping(uint64_t start_pa, uint32_t size, - uint64_t host_va, uint8_t flags) -{ - uint64_t end_pa = start_pa + size; - HAXMapping *entry, *next; - - QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) { - uint32_t chunk_sz; - if (start_pa >= entry->start_pa + entry->size) { - continue; - } - if (start_pa < entry->start_pa) { - chunk_sz = end_pa <= entry->start_pa ? size - : entry->start_pa - start_pa; - hax_insert_mapping_before(entry, start_pa, chunk_sz, - host_va, flags); - start_pa += chunk_sz; - host_va += chunk_sz; - size -= chunk_sz; - } else if (start_pa > entry->start_pa) { - /* split the existing chunk at start_pa */ - chunk_sz = start_pa - entry->start_pa; - hax_insert_mapping_before(entry, entry->start_pa, chunk_sz, - entry->host_va, entry->flags); - entry->start_pa += chunk_sz; - entry->host_va += chunk_sz; - entry->size -= chunk_sz; - } - /* now start_pa == entry->start_pa */ - chunk_sz = MIN(size, entry->size); - if (chunk_sz) { - bool nop = hax_mapping_is_opposite(entry, host_va, flags); - bool partial = chunk_sz < entry->size; - if (partial) { - /* remove the beginning of the existing chunk */ - entry->start_pa += chunk_sz; - entry->host_va += chunk_sz; - entry->size -= chunk_sz; - if (!nop) { - hax_insert_mapping_before(entry, start_pa, chunk_sz, - host_va, flags); - } - } else { /* affects the full mapping entry */ - if (nop) { /* no change to this mapping, remove it */ - QTAILQ_REMOVE(&mappings, entry, entry); - g_free(entry); - } else { /* update mapping properties */ - entry->host_va = host_va; - entry->flags = flags; - } - } - start_pa += chunk_sz; - host_va += chunk_sz; - size -= chunk_sz; - } - if (!size) { /* we are done */ - break; - } - } - if (size) { /* add the leftover */ - hax_insert_mapping_before(NULL, start_pa, size, host_va, flags); - } -} - -static void hax_process_section(MemoryRegionSection *section, uint8_t flags) -{ - MemoryRegion *mr = section->mr; - hwaddr start_pa = section->offset_within_address_space; - ram_addr_t size = int128_get64(section->size); - unsigned int delta; - uint64_t host_va; - uint32_t max_mapping_size; - - /* We only care about RAM and ROM regions */ - if (!memory_region_is_ram(mr)) { - if (memory_region_is_romd(mr)) { - /* HAXM kernel module does not support ROMD yet */ - warn_report("Ignoring ROMD region 0x%016" PRIx64 "->0x%016" PRIx64, - start_pa, start_pa + size); - } - return; - } - - /* Adjust start_pa and size so that they are page-aligned. (Cf - * kvm_set_phys_mem() in kvm-all.c). - */ - delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); - delta &= ~qemu_real_host_page_mask; - if (delta > size) { - return; - } - start_pa += delta; - size -= delta; - size &= qemu_real_host_page_mask; - if (!size || (start_pa & ~qemu_real_host_page_mask)) { - return; - } - - host_va = (uintptr_t)memory_region_get_ram_ptr(mr) - + section->offset_within_region + delta; - if (memory_region_is_rom(section->mr)) { - flags |= HAX_RAM_INFO_ROM; - } - - /* - * The kernel module interface uses 32-bit sizes: - * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_set_ram - * - * If the mapping size is longer than 32 bits, we can't process it in one - * call into the kernel. Instead, we split the mapping into smaller ones, - * and call hax_update_mapping() on each. - */ - max_mapping_size = UINT32_MAX & qemu_real_host_page_mask; - while (size > max_mapping_size) { - hax_update_mapping(start_pa, max_mapping_size, host_va, flags); - start_pa += max_mapping_size; - size -= max_mapping_size; - host_va += max_mapping_size; - } - /* Now size <= max_mapping_size */ - hax_update_mapping(start_pa, (uint32_t)size, host_va, flags); -} - -static void hax_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - memory_region_ref(section->mr); - hax_process_section(section, 0); -} - -static void hax_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - hax_process_section(section, HAX_RAM_INFO_INVALID); - memory_region_unref(section->mr); -} - -static void hax_transaction_begin(MemoryListener *listener) -{ - g_assert(QTAILQ_EMPTY(&mappings)); -} - -static void hax_transaction_commit(MemoryListener *listener) -{ - if (!QTAILQ_EMPTY(&mappings)) { - HAXMapping *entry, *next; - - if (DEBUG_HAX_MEM) { - hax_mapping_dump_list(); - } - QTAILQ_FOREACH_SAFE(entry, &mappings, entry, next) { - if (entry->flags & HAX_RAM_INFO_INVALID) { - /* for unmapping, put the values expected by the kernel */ - entry->flags = HAX_RAM_INFO_INVALID; - entry->host_va = 0; - } - if (hax_set_ram(entry->start_pa, entry->size, - entry->host_va, entry->flags)) { - fprintf(stderr, "%s: Failed mapping @0x%016" PRIx64 "+0x%" - PRIx32 " flags %02x\n", __func__, entry->start_pa, - entry->size, entry->flags); - } - QTAILQ_REMOVE(&mappings, entry, entry); - g_free(entry); - } - } -} - -/* currently we fake the dirty bitmap sync, always dirty */ -static void hax_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - MemoryRegion *mr = section->mr; - - if (!memory_region_is_ram(mr)) { - /* Skip MMIO regions */ - return; - } - - memory_region_set_dirty(mr, 0, int128_get64(section->size)); -} - -static MemoryListener hax_memory_listener = { - .begin = hax_transaction_begin, - .commit = hax_transaction_commit, - .region_add = hax_region_add, - .region_del = hax_region_del, - .log_sync = hax_log_sync, - .priority = 10, -}; - -static void hax_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) -{ - /* - * We must register each RAM block with the HAXM kernel module, or - * hax_set_ram() will fail for any mapping into the RAM block: - * https://github.com/intel/haxm/blob/master/API.md#hax_vm_ioctl_alloc_ram - * - * Old versions of the HAXM kernel module (< 6.2.0) used to preallocate all - * host physical pages for the RAM block as part of this registration - * process, hence the name hax_populate_ram(). - */ - if (hax_populate_ram((uint64_t)(uintptr_t)host, size) < 0) { - fprintf(stderr, "HAX failed to populate RAM\n"); - abort(); - } -} - -static struct RAMBlockNotifier hax_ram_notifier = { - .ram_block_added = hax_ram_block_added, -}; - -void hax_memory_init(void) -{ - ram_block_notifier_add(&hax_ram_notifier); - memory_listener_register(&hax_memory_listener, &address_space_memory); -} diff --git a/target/i386/hax-windows.c b/target/i386/hax-windows.c deleted file mode 100644 index 5729ad9b48..0000000000 --- a/target/i386/hax-windows.c +++ /dev/null @@ -1,494 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "hax-i386.h" - -/* - * return 0 when success, -1 when driver not loaded, - * other negative value for other failure - */ -static int hax_open_device(hax_fd *fd) -{ - uint32_t errNum = 0; - HANDLE hDevice; - - if (!fd) { - return -2; - } - - hDevice = CreateFile("\\\\.\\HAX", - GENERIC_READ | GENERIC_WRITE, - 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); - - if (hDevice == INVALID_HANDLE_VALUE) { - fprintf(stderr, "Failed to open the HAX device!\n"); - errNum = GetLastError(); - if (errNum == ERROR_FILE_NOT_FOUND) { - return -1; - } - return -2; - } - *fd = hDevice; - return 0; -} - -/* hax_fd hax_mod_open */ - hax_fd hax_mod_open(void) -{ - int ret; - hax_fd fd = NULL; - - ret = hax_open_device(&fd); - if (ret != 0) { - fprintf(stderr, "Open HAX device failed\n"); - } - - return fd; -} - -int hax_populate_ram(uint64_t va, uint64_t size) -{ - int ret; - HANDLE hDeviceVM; - DWORD dSize = 0; - - if (!hax_global.vm || !hax_global.vm->fd) { - fprintf(stderr, "Allocate memory before vm create?\n"); - return -EINVAL; - } - - hDeviceVM = hax_global.vm->fd; - if (hax_global.supports_64bit_ramblock) { - struct hax_ramblock_info ramblock = { - .start_va = va, - .size = size, - .reserved = 0 - }; - - ret = DeviceIoControl(hDeviceVM, - HAX_VM_IOCTL_ADD_RAMBLOCK, - &ramblock, sizeof(ramblock), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - } else { - struct hax_alloc_ram_info info = { - .size = (uint32_t) size, - .pad = 0, - .va = va - }; - - ret = DeviceIoControl(hDeviceVM, - HAX_VM_IOCTL_ALLOC_RAM, - &info, sizeof(info), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - } - - if (!ret) { - fprintf(stderr, "Failed to register RAM block: va=0x%" PRIx64 - ", size=0x%" PRIx64 ", method=%s\n", va, size, - hax_global.supports_64bit_ramblock ? "new" : "legacy"); - return ret; - } - - return 0; -} - -int hax_set_ram(uint64_t start_pa, uint32_t size, uint64_t host_va, int flags) -{ - struct hax_set_ram_info info; - HANDLE hDeviceVM = hax_global.vm->fd; - DWORD dSize = 0; - int ret; - - info.pa_start = start_pa; - info.size = size; - info.va = host_va; - info.flags = (uint8_t) flags; - - ret = DeviceIoControl(hDeviceVM, HAX_VM_IOCTL_SET_RAM, - &info, sizeof(info), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} - -int hax_capability(struct hax_state *hax, struct hax_capabilityinfo *cap) -{ - int ret; - HANDLE hDevice = hax->fd; /* handle to hax module */ - DWORD dSize = 0; - DWORD err = 0; - - if (hax_invalid_fd(hDevice)) { - fprintf(stderr, "Invalid fd for hax device!\n"); - return -ENODEV; - } - - ret = DeviceIoControl(hDevice, HAX_IOCTL_CAPABILITY, NULL, 0, cap, - sizeof(*cap), &dSize, (LPOVERLAPPED) NULL); - - if (!ret) { - err = GetLastError(); - if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) { - fprintf(stderr, "hax capability is too long to hold.\n"); - } - fprintf(stderr, "Failed to get Hax capability:%luu\n", err); - return -EFAULT; - } else { - return 0; - } -} - -int hax_mod_version(struct hax_state *hax, struct hax_module_version *version) -{ - int ret; - HANDLE hDevice = hax->fd; /* handle to hax module */ - DWORD dSize = 0; - DWORD err = 0; - - if (hax_invalid_fd(hDevice)) { - fprintf(stderr, "Invalid fd for hax device!\n"); - return -ENODEV; - } - - ret = DeviceIoControl(hDevice, - HAX_IOCTL_VERSION, - NULL, 0, - version, sizeof(*version), &dSize, - (LPOVERLAPPED) NULL); - - if (!ret) { - err = GetLastError(); - if (err == ERROR_INSUFFICIENT_BUFFER || err == ERROR_MORE_DATA) { - fprintf(stderr, "hax module verion is too long to hold.\n"); - } - fprintf(stderr, "Failed to get Hax module version:%lu\n", err); - return -EFAULT; - } else { - return 0; - } -} - -static char *hax_vm_devfs_string(int vm_id) -{ - char *name; - - if (vm_id > MAX_VM_ID) { - fprintf(stderr, "Too big VM id\n"); - return NULL; - } - -#define HAX_VM_DEVFS "\\\\.\\hax_vmxx" - name = g_strdup(HAX_VM_DEVFS); - if (!name) { - return NULL; - } - - snprintf(name, sizeof HAX_VM_DEVFS, "\\\\.\\hax_vm%02d", vm_id); - return name; -} - -static char *hax_vcpu_devfs_string(int vm_id, int vcpu_id) -{ - char *name; - - if (vm_id > MAX_VM_ID || vcpu_id > MAX_VCPU_ID) { - fprintf(stderr, "Too big vm id %x or vcpu id %x\n", vm_id, vcpu_id); - return NULL; - } - -#define HAX_VCPU_DEVFS "\\\\.\\hax_vmxx_vcpuxx" - name = g_strdup(HAX_VCPU_DEVFS); - if (!name) { - return NULL; - } - - snprintf(name, sizeof HAX_VCPU_DEVFS, "\\\\.\\hax_vm%02d_vcpu%02d", - vm_id, vcpu_id); - return name; -} - -int hax_host_create_vm(struct hax_state *hax, int *vmid) -{ - int ret; - int vm_id = 0; - DWORD dSize = 0; - - if (hax_invalid_fd(hax->fd)) { - return -EINVAL; - } - - if (hax->vm) { - return 0; - } - - ret = DeviceIoControl(hax->fd, - HAX_IOCTL_CREATE_VM, - NULL, 0, &vm_id, sizeof(vm_id), &dSize, - (LPOVERLAPPED) NULL); - if (!ret) { - fprintf(stderr, "Failed to create VM. Error code: %lu\n", - GetLastError()); - return -1; - } - *vmid = vm_id; - return 0; -} - -hax_fd hax_host_open_vm(struct hax_state *hax, int vm_id) -{ - char *vm_name = NULL; - hax_fd hDeviceVM; - - vm_name = hax_vm_devfs_string(vm_id); - if (!vm_name) { - fprintf(stderr, "Failed to open VM. VM name is null\n"); - return INVALID_HANDLE_VALUE; - } - - hDeviceVM = CreateFile(vm_name, - GENERIC_READ | GENERIC_WRITE, - 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL); - if (hDeviceVM == INVALID_HANDLE_VALUE) { - fprintf(stderr, "Open the vm device error:%s, ec:%lu\n", - vm_name, GetLastError()); - } - - g_free(vm_name); - return hDeviceVM; -} - -int hax_notify_qemu_version(hax_fd vm_fd, struct hax_qemu_version *qversion) -{ - int ret; - DWORD dSize = 0; - if (hax_invalid_fd(vm_fd)) { - return -EINVAL; - } - ret = DeviceIoControl(vm_fd, - HAX_VM_IOCTL_NOTIFY_QEMU_VERSION, - qversion, sizeof(struct hax_qemu_version), - NULL, 0, &dSize, (LPOVERLAPPED) NULL); - if (!ret) { - fprintf(stderr, "Failed to notify qemu API version\n"); - return -1; - } - return 0; -} - -int hax_host_create_vcpu(hax_fd vm_fd, int vcpuid) -{ - int ret; - DWORD dSize = 0; - - ret = DeviceIoControl(vm_fd, - HAX_VM_IOCTL_VCPU_CREATE, - &vcpuid, sizeof(vcpuid), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - if (!ret) { - fprintf(stderr, "Failed to create vcpu %x\n", vcpuid); - return -1; - } - - return 0; -} - -hax_fd hax_host_open_vcpu(int vmid, int vcpuid) -{ - char *devfs_path = NULL; - hax_fd hDeviceVCPU; - - devfs_path = hax_vcpu_devfs_string(vmid, vcpuid); - if (!devfs_path) { - fprintf(stderr, "Failed to get the devfs\n"); - return INVALID_HANDLE_VALUE; - } - - hDeviceVCPU = CreateFile(devfs_path, - GENERIC_READ | GENERIC_WRITE, - 0, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, - NULL); - - if (hDeviceVCPU == INVALID_HANDLE_VALUE) { - fprintf(stderr, "Failed to open the vcpu devfs\n"); - } - g_free(devfs_path); - return hDeviceVCPU; -} - -int hax_host_setup_vcpu_channel(struct hax_vcpu_state *vcpu) -{ - hax_fd hDeviceVCPU = vcpu->fd; - int ret; - struct hax_tunnel_info info; - DWORD dSize = 0; - - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_SETUP_TUNNEL, - NULL, 0, &info, sizeof(info), &dSize, - (LPOVERLAPPED) NULL); - if (!ret) { - fprintf(stderr, "Failed to setup the hax tunnel\n"); - return -1; - } - - if (!valid_hax_tunnel_size(info.size)) { - fprintf(stderr, "Invalid hax tunnel size %x\n", info.size); - ret = -EINVAL; - return ret; - } - vcpu->tunnel = (struct hax_tunnel *) (intptr_t) (info.va); - vcpu->iobuf = (unsigned char *) (intptr_t) (info.io_va); - return 0; -} - -int hax_vcpu_run(struct hax_vcpu_state *vcpu) -{ - int ret; - HANDLE hDeviceVCPU = vcpu->fd; - DWORD dSize = 0; - - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_RUN, - NULL, 0, NULL, 0, &dSize, (LPOVERLAPPED) NULL); - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} - -int hax_sync_fpu(CPUArchState *env, struct fx_layout *fl, int set) -{ - int ret; - hax_fd fd; - HANDLE hDeviceVCPU; - DWORD dSize = 0; - - fd = hax_vcpu_get_fd(env); - if (hax_invalid_fd(fd)) { - return -1; - } - - hDeviceVCPU = fd; - - if (set) { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_SET_FPU, - fl, sizeof(*fl), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - } else { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_GET_FPU, - NULL, 0, fl, sizeof(*fl), &dSize, - (LPOVERLAPPED) NULL); - } - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} - -int hax_sync_msr(CPUArchState *env, struct hax_msr_data *msrs, int set) -{ - int ret; - hax_fd fd; - HANDLE hDeviceVCPU; - DWORD dSize = 0; - - fd = hax_vcpu_get_fd(env); - if (hax_invalid_fd(fd)) { - return -1; - } - hDeviceVCPU = fd; - - if (set) { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_SET_MSRS, - msrs, sizeof(*msrs), - msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL); - } else { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_GET_MSRS, - msrs, sizeof(*msrs), - msrs, sizeof(*msrs), &dSize, (LPOVERLAPPED) NULL); - } - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} - -int hax_sync_vcpu_state(CPUArchState *env, struct vcpu_state_t *state, int set) -{ - int ret; - hax_fd fd; - HANDLE hDeviceVCPU; - DWORD dSize; - - fd = hax_vcpu_get_fd(env); - if (hax_invalid_fd(fd)) { - return -1; - } - - hDeviceVCPU = fd; - - if (set) { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_SET_REGS, - state, sizeof(*state), - NULL, 0, &dSize, (LPOVERLAPPED) NULL); - } else { - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_GET_REGS, - NULL, 0, - state, sizeof(*state), &dSize, - (LPOVERLAPPED) NULL); - } - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} - -int hax_inject_interrupt(CPUArchState *env, int vector) -{ - int ret; - hax_fd fd; - HANDLE hDeviceVCPU; - DWORD dSize; - - fd = hax_vcpu_get_fd(env); - if (hax_invalid_fd(fd)) { - return -1; - } - - hDeviceVCPU = fd; - - ret = DeviceIoControl(hDeviceVCPU, - HAX_VCPU_IOCTL_INTERRUPT, - &vector, sizeof(vector), NULL, 0, &dSize, - (LPOVERLAPPED) NULL); - if (!ret) { - return -EFAULT; - } else { - return 0; - } -} diff --git a/target/i386/hax-windows.h b/target/i386/hax-windows.h deleted file mode 100644 index 12cbd813dc..0000000000 --- a/target/i386/hax-windows.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * QEMU HAXM support - * - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * Copyright (c) 2011 Intel Corporation - * Written by: - * Jiang Yunhong<yunhong.jiang@intel.com> - * Xin Xiaohui<xiaohui.xin@intel.com> - * Zhang Xiantao<xiantao.zhang@intel.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef TARGET_I386_HAX_WINDOWS_H -#define TARGET_I386_HAX_WINDOWS_H - -#include <winioctl.h> -#include <windef.h> - -#define HAX_INVALID_FD INVALID_HANDLE_VALUE - -static inline void hax_mod_close(struct hax_state *hax) -{ - CloseHandle(hax->fd); -} - -static inline void hax_close_fd(hax_fd fd) -{ - CloseHandle(fd); -} - -static inline int hax_invalid_fd(hax_fd fd) -{ - return (fd == INVALID_HANDLE_VALUE); -} - -#define HAX_DEVICE_TYPE 0x4000 - -#define HAX_IOCTL_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x900, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_IOCTL_CREATE_VM CTL_CODE(HAX_DEVICE_TYPE, 0x901, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_IOCTL_CAPABILITY CTL_CODE(HAX_DEVICE_TYPE, 0x910, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) - -#define HAX_VM_IOCTL_VCPU_CREATE CTL_CODE(HAX_DEVICE_TYPE, 0x902, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VM_IOCTL_ALLOC_RAM CTL_CODE(HAX_DEVICE_TYPE, 0x903, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VM_IOCTL_SET_RAM CTL_CODE(HAX_DEVICE_TYPE, 0x904, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VM_IOCTL_VCPU_DESTROY CTL_CODE(HAX_DEVICE_TYPE, 0x905, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VM_IOCTL_ADD_RAMBLOCK CTL_CODE(HAX_DEVICE_TYPE, 0x913, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) - -#define HAX_VCPU_IOCTL_RUN CTL_CODE(HAX_DEVICE_TYPE, 0x906, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_IOCTL_SET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x907, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_IOCTL_GET_MSRS CTL_CODE(HAX_DEVICE_TYPE, 0x908, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_IOCTL_SET_FPU CTL_CODE(HAX_DEVICE_TYPE, 0x909, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_IOCTL_GET_FPU CTL_CODE(HAX_DEVICE_TYPE, 0x90a, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) - -#define HAX_VCPU_IOCTL_SETUP_TUNNEL CTL_CODE(HAX_DEVICE_TYPE, 0x90b, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_IOCTL_INTERRUPT CTL_CODE(HAX_DEVICE_TYPE, 0x90c, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_SET_REGS CTL_CODE(HAX_DEVICE_TYPE, 0x90d, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) -#define HAX_VCPU_GET_REGS CTL_CODE(HAX_DEVICE_TYPE, 0x90e, \ - METHOD_BUFFERED, FILE_ANY_ACCESS) - -#define HAX_VM_IOCTL_NOTIFY_QEMU_VERSION CTL_CODE(HAX_DEVICE_TYPE, 0x910, \ - METHOD_BUFFERED, \ - FILE_ANY_ACCESS) -#endif /* TARGET_I386_HAX_WINDOWS_H */ diff --git a/target/i386/helper.c b/target/i386/helper.c index e695f8ba7a..23ccb23a5b 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,16 +18,30 @@ */ #include "qemu/osdep.h" +#include "qapi/qapi-events-run-state.h" #include "cpu.h" #include "exec/exec-all.h" -#include "sysemu/kvm.h" -#include "kvm_i386.h" +#include "sysemu/runstate.h" #ifndef CONFIG_USER_ONLY -#include "sysemu/sysemu.h" #include "sysemu/hw_accel.h" #include "monitor/monitor.h" -#include "hw/i386/apic_internal.h" +#include "kvm/kvm_i386.h" #endif +#include "qemu/log.h" +#ifdef CONFIG_TCG +#include "tcg/insn-start-words.h" +#endif + +void cpu_sync_avx_hflag(CPUX86State *env) +{ + if ((env->cr[4] & CR4_OSXSAVE_MASK) + && (env->xcr0 & (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) + == (XSTATE_SSE_MASK | XSTATE_YMM_MASK)) { + env->hflags |= HF_AVX_EN_MASK; + } else{ + env->hflags &= ~HF_AVX_EN_MASK; + } +} void cpu_sync_bndcs_hflags(CPUX86State *env) { @@ -86,523 +100,6 @@ int cpu_x86_support_mca_broadcast(CPUX86State *env) } /***********************************************************/ -/* x86 debug */ - -static const char *cc_op_str[CC_OP_NB] = { - "DYNAMIC", - "EFLAGS", - - "MULB", - "MULW", - "MULL", - "MULQ", - - "ADDB", - "ADDW", - "ADDL", - "ADDQ", - - "ADCB", - "ADCW", - "ADCL", - "ADCQ", - - "SUBB", - "SUBW", - "SUBL", - "SUBQ", - - "SBBB", - "SBBW", - "SBBL", - "SBBQ", - - "LOGICB", - "LOGICW", - "LOGICL", - "LOGICQ", - - "INCB", - "INCW", - "INCL", - "INCQ", - - "DECB", - "DECW", - "DECL", - "DECQ", - - "SHLB", - "SHLW", - "SHLL", - "SHLQ", - - "SARB", - "SARW", - "SARL", - "SARQ", - - "BMILGB", - "BMILGW", - "BMILGL", - "BMILGQ", - - "ADCX", - "ADOX", - "ADCOX", - - "CLR", -}; - -static void -cpu_x86_dump_seg_cache(CPUX86State *env, FILE *f, fprintf_function cpu_fprintf, - const char *name, struct SegmentCache *sc) -{ -#ifdef TARGET_X86_64 - if (env->hflags & HF_CS64_MASK) { - cpu_fprintf(f, "%-3s=%04x %016" PRIx64 " %08x %08x", name, - sc->selector, sc->base, sc->limit, sc->flags & 0x00ffff00); - } else -#endif - { - cpu_fprintf(f, "%-3s=%04x %08x %08x %08x", name, sc->selector, - (uint32_t)sc->base, sc->limit, sc->flags & 0x00ffff00); - } - - if (!(env->hflags & HF_PE_MASK) || !(sc->flags & DESC_P_MASK)) - goto done; - - cpu_fprintf(f, " DPL=%d ", (sc->flags & DESC_DPL_MASK) >> DESC_DPL_SHIFT); - if (sc->flags & DESC_S_MASK) { - if (sc->flags & DESC_CS_MASK) { - cpu_fprintf(f, (sc->flags & DESC_L_MASK) ? "CS64" : - ((sc->flags & DESC_B_MASK) ? "CS32" : "CS16")); - cpu_fprintf(f, " [%c%c", (sc->flags & DESC_C_MASK) ? 'C' : '-', - (sc->flags & DESC_R_MASK) ? 'R' : '-'); - } else { - cpu_fprintf(f, - (sc->flags & DESC_B_MASK || env->hflags & HF_LMA_MASK) - ? "DS " : "DS16"); - cpu_fprintf(f, " [%c%c", (sc->flags & DESC_E_MASK) ? 'E' : '-', - (sc->flags & DESC_W_MASK) ? 'W' : '-'); - } - cpu_fprintf(f, "%c]", (sc->flags & DESC_A_MASK) ? 'A' : '-'); - } else { - static const char *sys_type_name[2][16] = { - { /* 32 bit mode */ - "Reserved", "TSS16-avl", "LDT", "TSS16-busy", - "CallGate16", "TaskGate", "IntGate16", "TrapGate16", - "Reserved", "TSS32-avl", "Reserved", "TSS32-busy", - "CallGate32", "Reserved", "IntGate32", "TrapGate32" - }, - { /* 64 bit mode */ - "<hiword>", "Reserved", "LDT", "Reserved", "Reserved", - "Reserved", "Reserved", "Reserved", "Reserved", - "TSS64-avl", "Reserved", "TSS64-busy", "CallGate64", - "Reserved", "IntGate64", "TrapGate64" - } - }; - cpu_fprintf(f, "%s", - sys_type_name[(env->hflags & HF_LMA_MASK) ? 1 : 0] - [(sc->flags & DESC_TYPE_MASK) - >> DESC_TYPE_SHIFT]); - } -done: - cpu_fprintf(f, "\n"); -} - -#ifndef CONFIG_USER_ONLY - -/* ARRAY_SIZE check is not required because - * DeliveryMode(dm) has a size of 3 bit. - */ -static inline const char *dm2str(uint32_t dm) -{ - static const char *str[] = { - "Fixed", - "...", - "SMI", - "...", - "NMI", - "INIT", - "...", - "ExtINT" - }; - return str[dm]; -} - -static void dump_apic_lvt(FILE *f, fprintf_function cpu_fprintf, - const char *name, uint32_t lvt, bool is_timer) -{ - uint32_t dm = (lvt & APIC_LVT_DELIV_MOD) >> APIC_LVT_DELIV_MOD_SHIFT; - cpu_fprintf(f, - "%s\t 0x%08x %s %-5s %-6s %-7s %-12s %-6s", - name, lvt, - lvt & APIC_LVT_INT_POLARITY ? "active-lo" : "active-hi", - lvt & APIC_LVT_LEVEL_TRIGGER ? "level" : "edge", - lvt & APIC_LVT_MASKED ? "masked" : "", - lvt & APIC_LVT_DELIV_STS ? "pending" : "", - !is_timer ? - "" : lvt & APIC_LVT_TIMER_PERIODIC ? - "periodic" : lvt & APIC_LVT_TIMER_TSCDEADLINE ? - "tsc-deadline" : "one-shot", - dm2str(dm)); - if (dm != APIC_DM_NMI) { - cpu_fprintf(f, " (vec %u)\n", lvt & APIC_VECTOR_MASK); - } else { - cpu_fprintf(f, "\n"); - } -} - -/* ARRAY_SIZE check is not required because - * destination shorthand has a size of 2 bit. - */ -static inline const char *shorthand2str(uint32_t shorthand) -{ - const char *str[] = { - "no-shorthand", "self", "all-self", "all" - }; - return str[shorthand]; -} - -static inline uint8_t divider_conf(uint32_t divide_conf) -{ - uint8_t divide_val = ((divide_conf & 0x8) >> 1) | (divide_conf & 0x3); - - return divide_val == 7 ? 1 : 2 << divide_val; -} - -static inline void mask2str(char *str, uint32_t val, uint8_t size) -{ - while (size--) { - *str++ = (val >> size) & 1 ? '1' : '0'; - } - *str = 0; -} - -#define MAX_LOGICAL_APIC_ID_MASK_SIZE 16 - -static void dump_apic_icr(FILE *f, fprintf_function cpu_fprintf, - APICCommonState *s, CPUX86State *env) -{ - uint32_t icr = s->icr[0], icr2 = s->icr[1]; - uint8_t dest_shorthand = \ - (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; - bool logical_mod = icr & APIC_ICR_DEST_MOD; - char apic_id_str[MAX_LOGICAL_APIC_ID_MASK_SIZE + 1]; - uint32_t dest_field; - bool x2apic; - - cpu_fprintf(f, "ICR\t 0x%08x %s %s %s %s\n", - icr, - logical_mod ? "logical" : "physical", - icr & APIC_ICR_TRIGGER_MOD ? "level" : "edge", - icr & APIC_ICR_LEVEL ? "assert" : "de-assert", - shorthand2str(dest_shorthand)); - - cpu_fprintf(f, "ICR2\t 0x%08x", icr2); - if (dest_shorthand != 0) { - cpu_fprintf(f, "\n"); - return; - } - x2apic = env->features[FEAT_1_ECX] & CPUID_EXT_X2APIC; - dest_field = x2apic ? icr2 : icr2 >> APIC_ICR_DEST_SHIFT; - - if (!logical_mod) { - if (x2apic) { - cpu_fprintf(f, " cpu %u (X2APIC ID)\n", dest_field); - } else { - cpu_fprintf(f, " cpu %u (APIC ID)\n", - dest_field & APIC_LOGDEST_XAPIC_ID); - } - return; - } - - if (s->dest_mode == 0xf) { /* flat mode */ - mask2str(apic_id_str, icr2 >> APIC_ICR_DEST_SHIFT, 8); - cpu_fprintf(f, " mask %s (APIC ID)\n", apic_id_str); - } else if (s->dest_mode == 0) { /* cluster mode */ - if (x2apic) { - mask2str(apic_id_str, dest_field & APIC_LOGDEST_X2APIC_ID, 16); - cpu_fprintf(f, " cluster %u mask %s (X2APIC ID)\n", - dest_field >> APIC_LOGDEST_X2APIC_SHIFT, apic_id_str); - } else { - mask2str(apic_id_str, dest_field & APIC_LOGDEST_XAPIC_ID, 4); - cpu_fprintf(f, " cluster %u mask %s (APIC ID)\n", - dest_field >> APIC_LOGDEST_XAPIC_SHIFT, apic_id_str); - } - } -} - -static void dump_apic_interrupt(FILE *f, fprintf_function cpu_fprintf, - const char *name, uint32_t *ireg_tab, - uint32_t *tmr_tab) -{ - int i, empty = true; - - cpu_fprintf(f, "%s\t ", name); - for (i = 0; i < 256; i++) { - if (apic_get_bit(ireg_tab, i)) { - cpu_fprintf(f, "%u%s ", i, - apic_get_bit(tmr_tab, i) ? "(level)" : ""); - empty = false; - } - } - cpu_fprintf(f, "%s\n", empty ? "(none)" : ""); -} - -void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f, - fprintf_function cpu_fprintf, int flags) -{ - X86CPU *cpu = X86_CPU(cs); - APICCommonState *s = APIC_COMMON(cpu->apic_state); - if (!s) { - cpu_fprintf(f, "local apic state not available\n"); - return; - } - uint32_t *lvt = s->lvt; - - cpu_fprintf(f, "dumping local APIC state for CPU %-2u\n\n", - CPU(cpu)->cpu_index); - dump_apic_lvt(f, cpu_fprintf, "LVT0", lvt[APIC_LVT_LINT0], false); - dump_apic_lvt(f, cpu_fprintf, "LVT1", lvt[APIC_LVT_LINT1], false); - dump_apic_lvt(f, cpu_fprintf, "LVTPC", lvt[APIC_LVT_PERFORM], false); - dump_apic_lvt(f, cpu_fprintf, "LVTERR", lvt[APIC_LVT_ERROR], false); - dump_apic_lvt(f, cpu_fprintf, "LVTTHMR", lvt[APIC_LVT_THERMAL], false); - dump_apic_lvt(f, cpu_fprintf, "LVTT", lvt[APIC_LVT_TIMER], true); - - cpu_fprintf(f, "Timer\t DCR=0x%x (divide by %u) initial_count = %u\n", - s->divide_conf & APIC_DCR_MASK, - divider_conf(s->divide_conf), - s->initial_count); - - cpu_fprintf(f, "SPIV\t 0x%08x APIC %s, focus=%s, spurious vec %u\n", - s->spurious_vec, - s->spurious_vec & APIC_SPURIO_ENABLED ? "enabled" : "disabled", - s->spurious_vec & APIC_SPURIO_FOCUS ? "on" : "off", - s->spurious_vec & APIC_VECTOR_MASK); - - dump_apic_icr(f, cpu_fprintf, s, &cpu->env); - - cpu_fprintf(f, "ESR\t 0x%08x\n", s->esr); - - dump_apic_interrupt(f, cpu_fprintf, "ISR", s->isr, s->tmr); - dump_apic_interrupt(f, cpu_fprintf, "IRR", s->irr, s->tmr); - - cpu_fprintf(f, "\nAPR 0x%02x TPR 0x%02x DFR 0x%02x LDR 0x%02x", - s->arb_id, s->tpr, s->dest_mode, s->log_dest); - if (s->dest_mode == 0) { - cpu_fprintf(f, "(cluster %u: id %u)", - s->log_dest >> APIC_LOGDEST_XAPIC_SHIFT, - s->log_dest & APIC_LOGDEST_XAPIC_ID); - } - cpu_fprintf(f, " PPR 0x%02x\n", apic_get_ppr(s)); -} -#else -void x86_cpu_dump_local_apic_state(CPUState *cs, FILE *f, - fprintf_function cpu_fprintf, int flags) -{ -} -#endif /* !CONFIG_USER_ONLY */ - -#define DUMP_CODE_BYTES_TOTAL 50 -#define DUMP_CODE_BYTES_BACKWARD 20 - -void x86_cpu_dump_state(CPUState *cs, FILE *f, fprintf_function cpu_fprintf, - int flags) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - int eflags, i, nb; - char cc_op_name[32]; - static const char *seg_name[6] = { "ES", "CS", "SS", "DS", "FS", "GS" }; - - eflags = cpu_compute_eflags(env); -#ifdef TARGET_X86_64 - if (env->hflags & HF_CS64_MASK) { - cpu_fprintf(f, - "RAX=%016" PRIx64 " RBX=%016" PRIx64 " RCX=%016" PRIx64 " RDX=%016" PRIx64 "\n" - "RSI=%016" PRIx64 " RDI=%016" PRIx64 " RBP=%016" PRIx64 " RSP=%016" PRIx64 "\n" - "R8 =%016" PRIx64 " R9 =%016" PRIx64 " R10=%016" PRIx64 " R11=%016" PRIx64 "\n" - "R12=%016" PRIx64 " R13=%016" PRIx64 " R14=%016" PRIx64 " R15=%016" PRIx64 "\n" - "RIP=%016" PRIx64 " RFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", - env->regs[R_EAX], - env->regs[R_EBX], - env->regs[R_ECX], - env->regs[R_EDX], - env->regs[R_ESI], - env->regs[R_EDI], - env->regs[R_EBP], - env->regs[R_ESP], - env->regs[8], - env->regs[9], - env->regs[10], - env->regs[11], - env->regs[12], - env->regs[13], - env->regs[14], - env->regs[15], - env->eip, eflags, - eflags & DF_MASK ? 'D' : '-', - eflags & CC_O ? 'O' : '-', - eflags & CC_S ? 'S' : '-', - eflags & CC_Z ? 'Z' : '-', - eflags & CC_A ? 'A' : '-', - eflags & CC_P ? 'P' : '-', - eflags & CC_C ? 'C' : '-', - env->hflags & HF_CPL_MASK, - (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, - (env->a20_mask >> 20) & 1, - (env->hflags >> HF_SMM_SHIFT) & 1, - cs->halted); - } else -#endif - { - cpu_fprintf(f, "EAX=%08x EBX=%08x ECX=%08x EDX=%08x\n" - "ESI=%08x EDI=%08x EBP=%08x ESP=%08x\n" - "EIP=%08x EFL=%08x [%c%c%c%c%c%c%c] CPL=%d II=%d A20=%d SMM=%d HLT=%d\n", - (uint32_t)env->regs[R_EAX], - (uint32_t)env->regs[R_EBX], - (uint32_t)env->regs[R_ECX], - (uint32_t)env->regs[R_EDX], - (uint32_t)env->regs[R_ESI], - (uint32_t)env->regs[R_EDI], - (uint32_t)env->regs[R_EBP], - (uint32_t)env->regs[R_ESP], - (uint32_t)env->eip, eflags, - eflags & DF_MASK ? 'D' : '-', - eflags & CC_O ? 'O' : '-', - eflags & CC_S ? 'S' : '-', - eflags & CC_Z ? 'Z' : '-', - eflags & CC_A ? 'A' : '-', - eflags & CC_P ? 'P' : '-', - eflags & CC_C ? 'C' : '-', - env->hflags & HF_CPL_MASK, - (env->hflags >> HF_INHIBIT_IRQ_SHIFT) & 1, - (env->a20_mask >> 20) & 1, - (env->hflags >> HF_SMM_SHIFT) & 1, - cs->halted); - } - - for(i = 0; i < 6; i++) { - cpu_x86_dump_seg_cache(env, f, cpu_fprintf, seg_name[i], - &env->segs[i]); - } - cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "LDT", &env->ldt); - cpu_x86_dump_seg_cache(env, f, cpu_fprintf, "TR", &env->tr); - -#ifdef TARGET_X86_64 - if (env->hflags & HF_LMA_MASK) { - cpu_fprintf(f, "GDT= %016" PRIx64 " %08x\n", - env->gdt.base, env->gdt.limit); - cpu_fprintf(f, "IDT= %016" PRIx64 " %08x\n", - env->idt.base, env->idt.limit); - cpu_fprintf(f, "CR0=%08x CR2=%016" PRIx64 " CR3=%016" PRIx64 " CR4=%08x\n", - (uint32_t)env->cr[0], - env->cr[2], - env->cr[3], - (uint32_t)env->cr[4]); - for(i = 0; i < 4; i++) - cpu_fprintf(f, "DR%d=%016" PRIx64 " ", i, env->dr[i]); - cpu_fprintf(f, "\nDR6=%016" PRIx64 " DR7=%016" PRIx64 "\n", - env->dr[6], env->dr[7]); - } else -#endif - { - cpu_fprintf(f, "GDT= %08x %08x\n", - (uint32_t)env->gdt.base, env->gdt.limit); - cpu_fprintf(f, "IDT= %08x %08x\n", - (uint32_t)env->idt.base, env->idt.limit); - cpu_fprintf(f, "CR0=%08x CR2=%08x CR3=%08x CR4=%08x\n", - (uint32_t)env->cr[0], - (uint32_t)env->cr[2], - (uint32_t)env->cr[3], - (uint32_t)env->cr[4]); - for(i = 0; i < 4; i++) { - cpu_fprintf(f, "DR%d=" TARGET_FMT_lx " ", i, env->dr[i]); - } - cpu_fprintf(f, "\nDR6=" TARGET_FMT_lx " DR7=" TARGET_FMT_lx "\n", - env->dr[6], env->dr[7]); - } - if (flags & CPU_DUMP_CCOP) { - if ((unsigned)env->cc_op < CC_OP_NB) - snprintf(cc_op_name, sizeof(cc_op_name), "%s", cc_op_str[env->cc_op]); - else - snprintf(cc_op_name, sizeof(cc_op_name), "[%d]", env->cc_op); -#ifdef TARGET_X86_64 - if (env->hflags & HF_CS64_MASK) { - cpu_fprintf(f, "CCS=%016" PRIx64 " CCD=%016" PRIx64 " CCO=%-8s\n", - env->cc_src, env->cc_dst, - cc_op_name); - } else -#endif - { - cpu_fprintf(f, "CCS=%08x CCD=%08x CCO=%-8s\n", - (uint32_t)env->cc_src, (uint32_t)env->cc_dst, - cc_op_name); - } - } - cpu_fprintf(f, "EFER=%016" PRIx64 "\n", env->efer); - if (flags & CPU_DUMP_FPU) { - int fptag; - fptag = 0; - for(i = 0; i < 8; i++) { - fptag |= ((!env->fptags[i]) << i); - } - cpu_fprintf(f, "FCW=%04x FSW=%04x [ST=%d] FTW=%02x MXCSR=%08x\n", - env->fpuc, - (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11, - env->fpstt, - fptag, - env->mxcsr); - for(i=0;i<8;i++) { - CPU_LDoubleU u; - u.d = env->fpregs[i].d; - cpu_fprintf(f, "FPR%d=%016" PRIx64 " %04x", - i, u.l.lower, u.l.upper); - if ((i & 1) == 1) - cpu_fprintf(f, "\n"); - else - cpu_fprintf(f, " "); - } - if (env->hflags & HF_CS64_MASK) - nb = 16; - else - nb = 8; - for(i=0;i<nb;i++) { - cpu_fprintf(f, "XMM%02d=%08x%08x%08x%08x", - i, - env->xmm_regs[i].ZMM_L(3), - env->xmm_regs[i].ZMM_L(2), - env->xmm_regs[i].ZMM_L(1), - env->xmm_regs[i].ZMM_L(0)); - if ((i & 1) == 1) - cpu_fprintf(f, "\n"); - else - cpu_fprintf(f, " "); - } - } - if (flags & CPU_DUMP_CODE) { - target_ulong base = env->segs[R_CS].base + env->eip; - target_ulong offs = MIN(env->eip, DUMP_CODE_BYTES_BACKWARD); - uint8_t code; - char codestr[3]; - - cpu_fprintf(f, "Code="); - for (i = 0; i < DUMP_CODE_BYTES_TOTAL; i++) { - if (cpu_memory_rw_debug(cs, base - offs + i, &code, 1, 0) == 0) { - snprintf(codestr, sizeof(codestr), "%02x", code); - } else { - snprintf(codestr, sizeof(codestr), "??"); - } - cpu_fprintf(f, "%s%s%s%s", i > 0 ? " " : "", - i == offs ? "<" : "", codestr, i == offs ? ">" : ""); - } - cpu_fprintf(f, "\n"); - } -} - -/***********************************************************/ /* x86 mmu */ /* XXX: add PGE support */ @@ -628,7 +125,7 @@ void x86_cpu_set_a20(X86CPU *cpu, int a20_state) void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) { - X86CPU *cpu = x86_env_get_cpu(env); + X86CPU *cpu = env_archcpu(env); int pe_state; qemu_log_mask(CPU_LOG_MMU, "CR0 update: CR0=0x%08x\n", new_cr0); @@ -670,19 +167,16 @@ void cpu_x86_update_cr0(CPUX86State *env, uint32_t new_cr0) the PDPT */ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3) { - X86CPU *cpu = x86_env_get_cpu(env); - env->cr[3] = new_cr3; if (env->cr[0] & CR0_PG_MASK) { qemu_log_mask(CPU_LOG_MMU, "CR3 update: CR3=" TARGET_FMT_lx "\n", new_cr3); - tlb_flush(CPU(cpu)); + tlb_flush(env_cpu(env)); } } void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) { - X86CPU *cpu = x86_env_get_cpu(env); uint32_t hflags; #if defined(DEBUG_MMU) @@ -691,11 +185,11 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) if ((new_cr4 ^ env->cr[4]) & (CR4_PGE_MASK | CR4_PAE_MASK | CR4_PSE_MASK | CR4_SMEP_MASK | CR4_SMAP_MASK | CR4_LA57_MASK)) { - tlb_flush(CPU(cpu)); + tlb_flush(env_cpu(env)); } /* Clear bits we're going to recompute. */ - hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK); + hflags = env->hflags & ~(HF_OSFXSR_MASK | HF_SMAP_MASK | HF_UMIP_MASK); /* SSE handling */ if (!(env->features[FEAT_1_EDX] & CPUID_SSE)) { @@ -711,19 +205,30 @@ void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4) if (new_cr4 & CR4_SMAP_MASK) { hflags |= HF_SMAP_MASK; } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_UMIP)) { + new_cr4 &= ~CR4_UMIP_MASK; + } + if (new_cr4 & CR4_UMIP_MASK) { + hflags |= HF_UMIP_MASK; + } if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKU)) { new_cr4 &= ~CR4_PKE_MASK; } + if (!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_PKS)) { + new_cr4 &= ~CR4_PKS_MASK; + } env->cr[4] = new_cr4; env->hflags = hflags; cpu_sync_bndcs_hflags(env); + cpu_sync_avx_hflag(env); } #if !defined(CONFIG_USER_ONLY) -hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) +hwaddr x86_cpu_get_phys_page_attrs_debug(CPUState *cs, vaddr addr, + MemTxAttrs *attrs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; @@ -733,6 +238,8 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) uint32_t page_offset; int page_size; + *attrs = cpu_get_mem_attrs(env); + a20_mask = x86_get_a20_mask(env); if (!(env->cr[0] & CR0_PG_MASK)) { pte = addr & a20_mask; @@ -854,22 +361,35 @@ typedef struct MCEInjectionParams { int flags; } MCEInjectionParams; +static void emit_guest_memory_failure(MemoryFailureAction action, bool ar, + bool recursive) +{ + MemoryFailureFlags mff = {.action_required = ar, .recursive = recursive}; + + qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_GUEST, action, + &mff); +} + static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data) { MCEInjectionParams *params = data.host_ptr; X86CPU *cpu = X86_CPU(cs); CPUX86State *cenv = &cpu->env; uint64_t *banks = cenv->mce_banks + 4 * params->bank; + g_autofree char *msg = NULL; + bool need_reset = false; + bool recursive; + bool ar = !!(params->status & MCI_STATUS_AR); cpu_synchronize_state(cs); + recursive = !!(cenv->mcg_status & MCG_STATUS_MCIP); /* * If there is an MCE exception being processed, ignore this SRAO MCE * unless unconditional injection was requested. */ - if (!(params->flags & MCE_INJECT_UNCOND_AO) - && !(params->status & MCI_STATUS_AR) - && (cenv->mcg_status & MCG_STATUS_MCIP)) { + if (!(params->flags & MCE_INJECT_UNCOND_AO) && !ar && recursive) { + emit_guest_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, ar, recursive); return; } @@ -897,16 +417,25 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data) return; } - if ((cenv->mcg_status & MCG_STATUS_MCIP) || - !(cenv->cr[4] & CR4_MCE_MASK)) { - monitor_printf(params->mon, - "CPU %d: Previous MCE still in progress, raising" - " triple fault\n", - cs->cpu_index); - qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + if (!(cenv->cr[4] & CR4_MCE_MASK)) { + need_reset = true; + msg = g_strdup_printf("CPU %d: MCE capability is not enabled, " + "raising triple fault", cs->cpu_index); + } else if (recursive) { + need_reset = true; + msg = g_strdup_printf("CPU %d: Previous MCE still in progress, " + "raising triple fault", cs->cpu_index); + } + + if (need_reset) { + emit_guest_memory_failure(MEMORY_FAILURE_ACTION_RESET, ar, + recursive); + monitor_printf(params->mon, "%s", msg); + qemu_log_mask(CPU_LOG_RESET, "%s\n", msg); qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); return; } + if (banks[1] & MCI_STATUS_VAL) { params->status |= MCI_STATUS_OVER; } @@ -926,6 +455,8 @@ static void do_inject_x86_mce(CPUState *cs, run_on_cpu_data data) } else { banks[1] |= MCI_STATUS_OVER; } + + emit_guest_memory_failure(MEMORY_FAILURE_ACTION_INJECT, ar, recursive); } void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, @@ -981,19 +512,40 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, } } +static inline target_ulong get_memio_eip(CPUX86State *env) +{ +#ifdef CONFIG_TCG + uint64_t data[TARGET_INSN_START_WORDS]; + CPUState *cs = env_cpu(env); + + if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) { + return env->eip; + } + + /* Per x86_restore_state_to_opc. */ + if (cs->tcg_cflags & CF_PCREL) { + return (env->eip & TARGET_PAGE_MASK) | data[0]; + } else { + return data[0] - env->segs[R_CS].base; + } +#else + qemu_build_not_reached(); +#endif +} + void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) { - X86CPU *cpu = x86_env_get_cpu(env); - CPUState *cs = CPU(cpu); + X86CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); - if (kvm_enabled() || whpx_enabled()) { + if (kvm_enabled() || whpx_enabled() || nvmm_enabled()) { env->tpr_access_type = access; cpu_interrupt(cs, CPU_INTERRUPT_TPR); } else if (tcg_enabled()) { - cpu_restore_state(cs, cs->mem_io_pc, false); + target_ulong eip = get_memio_eip(env); - apic_handle_tpr_access_report(cpu->apic_state, env->eip, access); + apic_handle_tpr_access_report(cpu->apic_state, eip, access); } } #endif /* !CONFIG_USER_ONLY */ @@ -1002,8 +554,7 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, target_ulong *base, unsigned int *limit, unsigned int *flags) { - X86CPU *cpu = x86_env_get_cpu(env); - CPUState *cs = CPU(cpu); + CPUState *cs = env_cpu(env); SegmentCache *dt; target_ulong ptr; uint32_t e1, e2; @@ -1029,9 +580,9 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, return 1; } -#if !defined(CONFIG_USER_ONLY) void do_cpu_init(X86CPU *cpu) { +#if !defined(CONFIG_USER_ONLY) CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; CPUX86State *save = g_new(CPUX86State, 1); @@ -1050,43 +601,28 @@ void do_cpu_init(X86CPU *cpu) kvm_arch_do_init_vcpu(cpu); } apic_init_reset(cpu->apic_state); +#endif /* CONFIG_USER_ONLY */ } +#ifndef CONFIG_USER_ONLY + void do_cpu_sipi(X86CPU *cpu) { apic_sipi(cpu->apic_state); } -#else -void do_cpu_init(X86CPU *cpu) -{ -} -void do_cpu_sipi(X86CPU *cpu) -{ -} -#endif - -/* Frob eflags into and out of the CPU temporary format. */ - -void x86_cpu_exec_enter(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); - env->df = 1 - (2 * ((env->eflags >> 10) & 1)); - CC_OP = CC_OP_EFLAGS; - env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); -} -void x86_cpu_exec_exit(CPUState *cs) +void cpu_load_efer(CPUX86State *env, uint64_t val) { - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - env->eflags = cpu_compute_eflags(env); + env->efer = val; + env->hflags &= ~(HF_LMA_MASK | HF_SVME_MASK); + if (env->efer & MSR_EFER_LMA) { + env->hflags |= HF_LMA_MASK; + } + if (env->efer & MSR_EFER_SVME) { + env->hflags |= HF_SVME_MASK; + } } -#ifndef CONFIG_USER_ONLY uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr) { X86CPU *cpu = X86_CPU(cs); diff --git a/target/i386/helper.h b/target/i386/helper.h index 6fb8fb9b74..ac2b04abd6 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -37,68 +37,50 @@ DEF_HELPER_2(lldt, void, env, int) DEF_HELPER_2(ltr, void, env, int) DEF_HELPER_3(load_seg, void, env, int, int) DEF_HELPER_4(ljmp_protected, void, env, int, tl, tl) -DEF_HELPER_5(lcall_real, void, env, int, tl, int, int) +DEF_HELPER_5(lcall_real, void, env, i32, i32, int, i32) DEF_HELPER_5(lcall_protected, void, env, int, tl, int, tl) DEF_HELPER_2(iret_real, void, env, int) DEF_HELPER_3(iret_protected, void, env, int, int) DEF_HELPER_3(lret_protected, void, env, int, int) -DEF_HELPER_2(read_crN, tl, env, int) -DEF_HELPER_3(write_crN, void, env, int, tl) -DEF_HELPER_2(lmsw, void, env, tl) DEF_HELPER_1(clts, void, env) + +#ifndef CONFIG_USER_ONLY DEF_HELPER_FLAGS_3(set_dr, TCG_CALL_NO_WG, void, env, int, tl) DEF_HELPER_FLAGS_2(get_dr, TCG_CALL_NO_WG, tl, env, int) -DEF_HELPER_2(invlpg, void, env, tl) +#endif /* !CONFIG_USER_ONLY */ DEF_HELPER_1(sysenter, void, env) DEF_HELPER_2(sysexit, void, env, int) -#ifdef TARGET_X86_64 DEF_HELPER_2(syscall, void, env, int) DEF_HELPER_2(sysret, void, env, int) -#endif -DEF_HELPER_2(hlt, void, env, int) -DEF_HELPER_2(monitor, void, env, tl) -DEF_HELPER_2(mwait, void, env, int) -DEF_HELPER_2(pause, void, env, int) -DEF_HELPER_1(debug, void, env) -DEF_HELPER_1(reset_rf, void, env) -DEF_HELPER_3(raise_interrupt, void, env, int, int) -DEF_HELPER_2(raise_exception, void, env, int) -DEF_HELPER_1(cli, void, env) -DEF_HELPER_1(sti, void, env) -DEF_HELPER_1(clac, void, env) -DEF_HELPER_1(stac, void, env) +DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) +DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) + +#ifndef CONFIG_USER_ONLY DEF_HELPER_1(rsm, void, env) +#endif /* !CONFIG_USER_ONLY */ + DEF_HELPER_2(into, void, env, int) -DEF_HELPER_2(cmpxchg8b_unlocked, void, env, tl) -DEF_HELPER_2(cmpxchg8b, void, env, tl) -#ifdef TARGET_X86_64 -DEF_HELPER_2(cmpxchg16b_unlocked, void, env, tl) -DEF_HELPER_2(cmpxchg16b, void, env, tl) -#endif -DEF_HELPER_1(single_step, void, env) +DEF_HELPER_FLAGS_1(single_step, TCG_CALL_NO_WG, noreturn, env) DEF_HELPER_1(rechecking_single_step, void, env) DEF_HELPER_1(cpuid, void, env) +DEF_HELPER_FLAGS_1(rdpid, TCG_CALL_NO_WG, tl, env) DEF_HELPER_1(rdtsc, void, env) -DEF_HELPER_1(rdtscp, void, env) -DEF_HELPER_1(rdpmc, void, env) -DEF_HELPER_1(rdmsr, void, env) -DEF_HELPER_1(wrmsr, void, env) +DEF_HELPER_FLAGS_1(rdpmc, TCG_CALL_NO_WG, noreturn, env) -DEF_HELPER_2(check_iob, void, env, i32) -DEF_HELPER_2(check_iow, void, env, i32) -DEF_HELPER_2(check_iol, void, env, i32) +#ifndef CONFIG_USER_ONLY DEF_HELPER_3(outb, void, env, i32, i32) DEF_HELPER_2(inb, tl, env, i32) DEF_HELPER_3(outw, void, env, i32, i32) DEF_HELPER_2(inw, tl, env, i32) DEF_HELPER_3(outl, void, env, i32, i32) DEF_HELPER_2(inl, tl, env, i32) +DEF_HELPER_FLAGS_3(check_io, TCG_CALL_NO_WG, void, env, i32, i32) DEF_HELPER_FLAGS_4(bpt_io, TCG_CALL_NO_WG, void, env, i32, i32, tl) - -DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64) +DEF_HELPER_2(svm_check_intercept, void, env, i32) DEF_HELPER_4(svm_check_io, void, env, i32, i32, i32) DEF_HELPER_3(vmrun, void, env, int, int) DEF_HELPER_1(vmmcall, void, env) @@ -106,8 +88,15 @@ DEF_HELPER_2(vmload, void, env, int) DEF_HELPER_2(vmsave, void, env, int) DEF_HELPER_1(stgi, void, env) DEF_HELPER_1(clgi, void, env) -DEF_HELPER_1(skinit, void, env) -DEF_HELPER_2(invlpga, void, env, int) +DEF_HELPER_FLAGS_2(flush_page, TCG_CALL_NO_RWG, void, env, tl) +DEF_HELPER_FLAGS_2(hlt, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_FLAGS_2(monitor, TCG_CALL_NO_WG, void, env, tl) +DEF_HELPER_FLAGS_2(mwait, TCG_CALL_NO_WG, noreturn, env, int) +DEF_HELPER_1(rdmsr, void, env) +DEF_HELPER_1(wrmsr, void, env) +DEF_HELPER_FLAGS_2(read_crN, TCG_CALL_NO_RWG, tl, env, int) +DEF_HELPER_FLAGS_3(write_crN, TCG_CALL_NO_RWG, void, env, int, tl) +#endif /* !CONFIG_USER_ONLY */ /* x86 FPU */ @@ -207,14 +196,16 @@ DEF_HELPER_FLAGS_2(pext, TCG_CALL_NO_RWG_SE, tl, tl, tl) /* MMX/SSE */ DEF_HELPER_2(ldmxcsr, void, env, i32) +DEF_HELPER_1(update_mxcsr, void, env) DEF_HELPER_1(enter_mmx, void, env) DEF_HELPER_1(emms, void, env) -DEF_HELPER_3(movq, void, env, ptr, ptr) #define SHIFT 0 -#include "ops_sse_header.h" +#include "tcg/ops_sse_header.h.inc" #define SHIFT 1 -#include "ops_sse_header.h" +#include "tcg/ops_sse_header.h.inc" +#define SHIFT 2 +#include "tcg/ops_sse_header.h.inc" DEF_HELPER_3(rclb, tl, env, tl, tl) DEF_HELPER_3(rclw, tl, env, tl, tl) @@ -226,3 +217,5 @@ DEF_HELPER_3(rcrl, tl, env, tl, tl) DEF_HELPER_3(rclq, tl, env, tl, tl) DEF_HELPER_3(rcrq, tl, env, tl, tl) #endif + +DEF_HELPER_1(rdrand, tl, env) diff --git a/target/i386/host-cpu.c b/target/i386/host-cpu.c new file mode 100644 index 0000000000..92ecb7254b --- /dev/null +++ b/target/i386/host-cpu.c @@ -0,0 +1,208 @@ +/* + * x86 host CPU functions, and "host" cpu type initialization + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "host-cpu.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "sysemu/sysemu.h" + +/* Note: Only safe for use on x86(-64) hosts */ +static uint32_t host_cpu_phys_bits(void) +{ + uint32_t eax; + uint32_t host_phys_bits; + + host_cpuid(0x80000000, 0, &eax, NULL, NULL, NULL); + if (eax >= 0x80000008) { + host_cpuid(0x80000008, 0, &eax, NULL, NULL, NULL); + /* + * Note: According to AMD doc 25481 rev 2.34 they have a field + * at 23:16 that can specify a maximum physical address bits for + * the guest that can override this value; but I've not seen + * anything with that set. + */ + host_phys_bits = eax & 0xff; + } else { + /* + * It's an odd 64 bit machine that doesn't have the leaf for + * physical address bits; fall back to 36 that's most older + * Intel. + */ + host_phys_bits = 36; + } + + return host_phys_bits; +} + +static void host_cpu_enable_cpu_pm(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + host_cpuid(5, 0, &cpu->mwait.eax, &cpu->mwait.ebx, + &cpu->mwait.ecx, &cpu->mwait.edx); + env->features[FEAT_1_ECX] |= CPUID_EXT_MONITOR; +} + +static uint32_t host_cpu_adjust_phys_bits(X86CPU *cpu) +{ + uint32_t host_phys_bits = host_cpu_phys_bits(); + uint32_t phys_bits = cpu->phys_bits; + static bool warned; + + /* + * Print a warning if the user set it to a value that's not the + * host value. + */ + if (phys_bits != host_phys_bits && phys_bits != 0 && + !warned) { + warn_report("Host physical bits (%u)" + " does not match phys-bits property (%u)", + host_phys_bits, phys_bits); + warned = true; + } + + if (cpu->host_phys_bits) { + /* The user asked for us to use the host physical bits */ + phys_bits = host_phys_bits; + if (cpu->host_phys_bits_limit && + phys_bits > cpu->host_phys_bits_limit) { + phys_bits = cpu->host_phys_bits_limit; + } + } + + return phys_bits; +} + +bool host_cpu_realizefn(CPUState *cs, Error **errp) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + if (cpu->max_features && enable_cpu_pm) { + host_cpu_enable_cpu_pm(cpu); + } + if (env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM) { + uint32_t phys_bits = host_cpu_adjust_phys_bits(cpu); + + if (phys_bits && + (phys_bits > TARGET_PHYS_ADDR_SPACE_BITS || + phys_bits < 32)) { + error_setg(errp, "phys-bits should be between 32 and %u " + " (but is %u)", + TARGET_PHYS_ADDR_SPACE_BITS, phys_bits); + return false; + } + cpu->phys_bits = phys_bits; + } + return true; +} + +#define CPUID_MODEL_ID_SZ 48 +/** + * cpu_x86_fill_model_id: + * Get CPUID model ID string from host CPU. + * + * @str should have at least CPUID_MODEL_ID_SZ bytes + * + * The function does NOT add a null terminator to the string + * automatically. + */ +static int host_cpu_fill_model_id(char *str) +{ + uint32_t eax = 0, ebx = 0, ecx = 0, edx = 0; + int i; + + for (i = 0; i < 3; i++) { + host_cpuid(0x80000002 + i, 0, &eax, &ebx, &ecx, &edx); + memcpy(str + i * 16 + 0, &eax, 4); + memcpy(str + i * 16 + 4, &ebx, 4); + memcpy(str + i * 16 + 8, &ecx, 4); + memcpy(str + i * 16 + 12, &edx, 4); + } + return 0; +} + +void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping) +{ + uint32_t eax, ebx, ecx, edx; + + host_cpuid(0x0, 0, &eax, &ebx, &ecx, &edx); + x86_cpu_vendor_words2str(vendor, ebx, edx, ecx); + + host_cpuid(0x1, 0, &eax, &ebx, &ecx, &edx); + if (family) { + *family = ((eax >> 8) & 0x0F) + ((eax >> 20) & 0xFF); + } + if (model) { + *model = ((eax >> 4) & 0x0F) | ((eax & 0xF0000) >> 12); + } + if (stepping) { + *stepping = eax & 0x0F; + } +} + +void host_cpu_instance_init(X86CPU *cpu) +{ + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + + if (xcc->model) { + uint32_t ebx = 0, ecx = 0, edx = 0; + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpuid(0, 0, NULL, &ebx, &ecx, &edx); + x86_cpu_vendor_words2str(vendor, ebx, edx, ecx); + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + } +} + +void host_cpu_max_instance_init(X86CPU *cpu) +{ + char vendor[CPUID_VENDOR_SZ + 1] = { 0 }; + char model_id[CPUID_MODEL_ID_SZ + 1] = { 0 }; + int family, model, stepping; + + /* Use max host physical address bits if -cpu max option is applied */ + object_property_set_bool(OBJECT(cpu), "host-phys-bits", true, &error_abort); + + host_cpu_vendor_fms(vendor, &family, &model, &stepping); + host_cpu_fill_model_id(model_id); + + object_property_set_str(OBJECT(cpu), "vendor", vendor, &error_abort); + object_property_set_int(OBJECT(cpu), "family", family, &error_abort); + object_property_set_int(OBJECT(cpu), "model", model, &error_abort); + object_property_set_int(OBJECT(cpu), "stepping", stepping, + &error_abort); + object_property_set_str(OBJECT(cpu), "model-id", model_id, + &error_abort); +} + +static void host_cpu_class_init(ObjectClass *oc, void *data) +{ + X86CPUClass *xcc = X86_CPU_CLASS(oc); + + xcc->host_cpuid_required = true; + xcc->ordering = 8; + xcc->model_description = + g_strdup_printf("processor with all supported host features "); +} + +static const TypeInfo host_cpu_type_info = { + .name = X86_CPU_TYPE_NAME("host"), + .parent = X86_CPU_TYPE_NAME("max"), + .class_init = host_cpu_class_init, +}; + +static void host_cpu_type_init(void) +{ + type_register_static(&host_cpu_type_info); +} + +type_init(host_cpu_type_init); diff --git a/target/i386/host-cpu.h b/target/i386/host-cpu.h new file mode 100644 index 0000000000..6a9bc918ba --- /dev/null +++ b/target/i386/host-cpu.h @@ -0,0 +1,19 @@ +/* + * x86 host CPU type initialization and host CPU functions + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef HOST_CPU_H +#define HOST_CPU_H + +void host_cpu_instance_init(X86CPU *cpu); +void host_cpu_max_instance_init(X86CPU *cpu); +bool host_cpu_realizefn(CPUState *cs, Error **errp); + +void host_cpu_vendor_fms(char *vendor, int *family, int *model, int *stepping); + +#endif /* HOST_CPU_H */ diff --git a/target/i386/hvf/Makefile.objs b/target/i386/hvf/Makefile.objs deleted file mode 100644 index 927b86bc67..0000000000 --- a/target/i386/hvf/Makefile.objs +++ /dev/null @@ -1,2 +0,0 @@ -obj-y += hvf.o -obj-y += x86.o x86_cpuid.o x86_decode.o x86_descr.o x86_emu.o x86_flags.o x86_mmu.o x86hvf.o x86_task.o diff --git a/target/i386/hvf/README.md b/target/i386/hvf/README.md index 2d33477aca..64a8935237 100644 --- a/target/i386/hvf/README.md +++ b/target/i386/hvf/README.md @@ -4,4 +4,4 @@ These sources (and ../hvf-all.c) are adapted from Veertu Inc's vdhh (Veertu Desk 1. Adapt to our current QEMU's `CPUState` structure and `address_space_rw` API; many struct members have been moved around (emulated x86 state, xsave_buf) due to historical differences + QEMU needing to handle more emulation targets. 2. Removal of `apic_page` and hyperv-related functionality. -3. More relaxed use of `qemu_mutex_lock_iothread`. +3. More relaxed use of `bql_lock`. diff --git a/target/i386/hvf/hvf-cpu.c b/target/i386/hvf/hvf-cpu.c new file mode 100644 index 0000000000..ac617f17e7 --- /dev/null +++ b/target/i386/hvf/hvf-cpu.c @@ -0,0 +1,98 @@ +/* + * x86 HVF CPU type initialization + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "host-cpu.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "hw/boards.h" +#include "sysemu/hvf.h" +#include "hw/core/accel-cpu.h" +#include "hvf-i386.h" + +static void hvf_cpu_max_instance_init(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + host_cpu_max_instance_init(cpu); + + env->cpuid_min_level = + hvf_get_supported_cpuid(0x0, 0, R_EAX); + env->cpuid_min_xlevel = + hvf_get_supported_cpuid(0x80000000, 0, R_EAX); + env->cpuid_min_xlevel2 = + hvf_get_supported_cpuid(0xC0000000, 0, R_EAX); +} + +static void hvf_cpu_xsave_init(void) +{ + static bool first = true; + int i; + + if (!first) { + return; + } + first = false; + + /* x87 and SSE states are in the legacy region of the XSAVE area. */ + x86_ext_save_areas[XSTATE_FP_BIT].offset = 0; + x86_ext_save_areas[XSTATE_SSE_BIT].offset = 0; + + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (esa->size) { + int sz = hvf_get_supported_cpuid(0xd, i, R_EAX); + if (sz != 0) { + assert(esa->size == sz); + esa->offset = hvf_get_supported_cpuid(0xd, i, R_EBX); + } + } + } +} + +static void hvf_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + + host_cpu_instance_init(cpu); + + /* Special cases not set in the X86CPUDefinition structs: */ + /* TODO: in-kernel irqchip for hvf */ + + if (cpu->max_features) { + hvf_cpu_max_instance_init(cpu); + } + + hvf_cpu_xsave_init(); +} + +static void hvf_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_target_realize = host_cpu_realizefn; + acc->cpu_instance_init = hvf_cpu_instance_init; +} + +static const TypeInfo hvf_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("hvf"), + + .parent = TYPE_ACCEL_CPU, + .class_init = hvf_cpu_accel_class_init, + .abstract = true, +}; + +static void hvf_cpu_accel_register_types(void) +{ + type_register_static(&hvf_cpu_accel_type_info); +} + +type_init(hvf_cpu_accel_register_types); diff --git a/target/i386/hvf/hvf-i386.h b/target/i386/hvf/hvf-i386.h index 2232501552..e99c02cd4b 100644 --- a/target/i386/hvf/hvf-i386.h +++ b/target/i386/hvf/hvf-i386.h @@ -13,36 +13,14 @@ * */ -#ifndef _HVF_I386_H -#define _HVF_I386_H +#ifndef HVF_I386_H +#define HVF_I386_H -#include "sysemu/hvf.h" -#include "cpu.h" -#include "x86.h" +uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, int reg); -#define HVF_MAX_VCPU 0x10 -#define MAX_VM_ID 0x40 -#define MAX_VCPU_ID 0x40 - -extern struct hvf_state hvf_global; - -struct hvf_vm { - int id; - struct hvf_vcpu_state *vcpus[HVF_MAX_VCPU]; -}; - -struct hvf_state { - uint32_t version; - struct hvf_vm *vm; - uint64_t mem_quota; -}; - -#ifdef NEED_CPU_H -/* Functions exported to host specific mode */ +void hvf_handle_io(CPUArchState *, uint16_t, void *, int, int, int); /* Host specific functions */ int hvf_inject_interrupt(CPUArchState *env, int vector); -int hvf_vcpu_run(struct hvf_vcpu_state *vcpu); -#endif #endif diff --git a/target/i386/hvf/hvf.c b/target/i386/hvf/hvf.c index 9f52bc413a..1ed8ed5154 100644 --- a/target/i386/hvf/hvf.c +++ b/target/i386/hvf/hvf.c @@ -13,10 +13,10 @@ * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. + * General Public License for more details. * - * You should have received a copy of the GNU Lesser General Public - * License along with this program; if not, see <http://www.gnu.org/licenses/>. + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. * * This file contain code under public domain from the hvdos project: * https://github.com/mist64/hvdos @@ -45,11 +45,15 @@ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. */ + #include "qemu/osdep.h" -#include "qemu-common.h" #include "qemu/error-report.h" +#include "qemu/memalign.h" #include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" +#include "sysemu/runstate.h" +#include "sysemu/cpus.h" #include "hvf-i386.h" #include "vmcs.h" #include "vmx.h" @@ -63,166 +67,13 @@ #include <Hypervisor/hv.h> #include <Hypervisor/hv_vmx.h> +#include <sys/sysctl.h> -#include "exec/address-spaces.h" #include "hw/i386/apic_internal.h" -#include "hw/boards.h" #include "qemu/main-loop.h" -#include "sysemu/accel.h" -#include "sysemu/sysemu.h" +#include "qemu/accel.h" #include "target/i386/cpu.h" -HVFState *hvf_state; -int hvf_disabled = 1; - -static void assert_hvf_ok(hv_return_t ret) -{ - if (ret == HV_SUCCESS) { - return; - } - - switch (ret) { - case HV_ERROR: - error_report("Error: HV_ERROR"); - break; - case HV_BUSY: - error_report("Error: HV_BUSY"); - break; - case HV_BAD_ARGUMENT: - error_report("Error: HV_BAD_ARGUMENT"); - break; - case HV_NO_RESOURCES: - error_report("Error: HV_NO_RESOURCES"); - break; - case HV_NO_DEVICE: - error_report("Error: HV_NO_DEVICE"); - break; - case HV_UNSUPPORTED: - error_report("Error: HV_UNSUPPORTED"); - break; - default: - error_report("Unknown Error"); - } - - abort(); -} - -/* Memory slots */ -hvf_slot *hvf_find_overlap_slot(uint64_t start, uint64_t end) -{ - hvf_slot *slot; - int x; - for (x = 0; x < hvf_state->num_slots; ++x) { - slot = &hvf_state->slots[x]; - if (slot->size && start < (slot->start + slot->size) && - end > slot->start) { - return slot; - } - } - return NULL; -} - -struct mac_slot { - int present; - uint64_t size; - uint64_t gpa_start; - uint64_t gva; -}; - -struct mac_slot mac_slots[32]; -#define ALIGN(x, y) (((x) + (y) - 1) & ~((y) - 1)) - -static int do_hvf_set_memory(hvf_slot *slot) -{ - struct mac_slot *macslot; - hv_memory_flags_t flags; - hv_return_t ret; - - macslot = &mac_slots[slot->slot_id]; - - if (macslot->present) { - if (macslot->size != slot->size) { - macslot->present = 0; - ret = hv_vm_unmap(macslot->gpa_start, macslot->size); - assert_hvf_ok(ret); - } - } - - if (!slot->size) { - return 0; - } - - flags = HV_MEMORY_READ | HV_MEMORY_WRITE | HV_MEMORY_EXEC; - - macslot->present = 1; - macslot->gpa_start = slot->start; - macslot->size = slot->size; - ret = hv_vm_map((hv_uvaddr_t)slot->mem, slot->start, slot->size, flags); - assert_hvf_ok(ret); - return 0; -} - -void hvf_set_phys_mem(MemoryRegionSection *section, bool add) -{ - hvf_slot *mem; - MemoryRegion *area = section->mr; - - if (!memory_region_is_ram(area)) { - return; - } - - mem = hvf_find_overlap_slot( - section->offset_within_address_space, - section->offset_within_address_space + int128_get64(section->size)); - - if (mem && add) { - if (mem->size == int128_get64(section->size) && - mem->start == section->offset_within_address_space && - mem->mem == (memory_region_get_ram_ptr(area) + - section->offset_within_region)) { - return; /* Same region was attempted to register, go away. */ - } - } - - /* Region needs to be reset. set the size to 0 and remap it. */ - if (mem) { - mem->size = 0; - if (do_hvf_set_memory(mem)) { - error_report("Failed to reset overlapping slot"); - abort(); - } - } - - if (!add) { - return; - } - - /* Now make a new slot. */ - int x; - - for (x = 0; x < hvf_state->num_slots; ++x) { - mem = &hvf_state->slots[x]; - if (!mem->size) { - break; - } - } - - if (x == hvf_state->num_slots) { - error_report("No free slots"); - abort(); - } - - mem->size = int128_get64(section->size); - mem->mem = memory_region_get_ram_ptr(area) + section->offset_within_region; - mem->start = section->offset_within_address_space; - mem->region = area; - - if (do_hvf_set_memory(mem)) { - error_report("Error registering new memory slot"); - abort(); - } -} - void vmx_update_tpr(CPUState *cpu) { /* TODO: need integrate APIC handling */ @@ -230,32 +81,24 @@ void vmx_update_tpr(CPUState *cpu) int tpr = cpu_get_apic_tpr(x86_cpu->apic_state) << 4; int irr = apic_get_highest_priority_irr(x86_cpu->apic_state); - wreg(cpu->hvf_fd, HV_X86_TPR, tpr); + wreg(cpu->accel->fd, HV_X86_TPR, tpr); if (irr == -1) { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0); } else { - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, (irr > tpr) ? tpr >> 4 : irr >> 4); } } -void update_apic_tpr(CPUState *cpu) +static void update_apic_tpr(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); - int tpr = rreg(cpu->hvf_fd, HV_X86_TPR) >> 4; + int tpr = rreg(cpu->accel->fd, HV_X86_TPR) >> 4; cpu_set_apic_tpr(x86_cpu->apic_state, tpr); } #define VECTORING_INFO_VECTOR_MASK 0xff -static void hvf_handle_interrupt(CPUState * cpu, int mask) -{ - cpu->interrupt_request |= mask; - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - } -} - void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, int direction, int size, int count) { @@ -270,48 +113,6 @@ void hvf_handle_io(CPUArchState *env, uint16_t port, void *buffer, } } -/* TODO: synchronize vcpu state */ -static void do_hvf_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - if (cpu_state->vcpu_dirty == 0) { - hvf_get_registers(cpu_state); - } - - cpu_state->vcpu_dirty = 1; -} - -void hvf_cpu_synchronize_state(CPUState *cpu_state) -{ - if (cpu_state->vcpu_dirty == 0) { - run_on_cpu(cpu_state, do_hvf_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -static void do_hvf_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - hvf_put_registers(cpu_state); - cpu_state->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_reset(CPUState *cpu_state) -{ - run_on_cpu(cpu_state, do_hvf_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -void _hvf_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) -{ - CPUState *cpu_state = cpu; - hvf_put_registers(cpu_state); - cpu_state->vcpu_dirty = false; -} - -void hvf_cpu_synchronize_post_init(CPUState *cpu_state) -{ - run_on_cpu(cpu_state, _hvf_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) { int read, write; @@ -346,202 +147,96 @@ static bool ept_emulation_fault(hvf_slot *slot, uint64_t gpa, uint64_t ept_qual) return false; } - return !slot; + if (!slot) { + return true; + } + if (!memory_region_is_ram(slot->region) && + !(read && memory_region_is_romd(slot->region))) { + return true; + } + return false; } -static void hvf_set_dirty_tracking(MemoryRegionSection *section, bool on) +void hvf_arch_vcpu_destroy(CPUState *cpu) { - hvf_slot *slot; - - slot = hvf_find_overlap_slot( - section->offset_within_address_space, - section->offset_within_address_space + int128_get64(section->size)); - - /* protect region against writes; begin tracking it */ - if (on) { - slot->flags |= HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ); - /* stop tracking region*/ - } else { - slot->flags &= ~HVF_SLOT_LOG; - hv_vm_protect((hv_gpaddr_t)slot->start, (size_t)slot->size, - HV_MEMORY_READ | HV_MEMORY_WRITE); - } + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + g_free(env->hvf_mmio_buf); } -static void hvf_log_start(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) +static void init_tsc_freq(CPUX86State *env) { - if (old != 0) { + size_t length; + uint64_t tsc_freq; + + if (env->tsc_khz != 0) { return; } - hvf_set_dirty_tracking(section, 1); + length = sizeof(uint64_t); + if (sysctlbyname("machdep.tsc.frequency", &tsc_freq, &length, NULL, 0)) { + return; + } + env->tsc_khz = tsc_freq / 1000; /* Hz to KHz */ } -static void hvf_log_stop(MemoryListener *listener, - MemoryRegionSection *section, int old, int new) +static void init_apic_bus_freq(CPUX86State *env) { - if (new != 0) { + size_t length; + uint64_t bus_freq; + + if (env->apic_bus_freq != 0) { return; } - hvf_set_dirty_tracking(section, 0); -} - -static void hvf_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - /* - * sync of dirty pages is handled elsewhere; just make sure we keep - * tracking the region. - */ - hvf_set_dirty_tracking(section, 1); + length = sizeof(uint64_t); + if (sysctlbyname("hw.busfrequency", &bus_freq, &length, NULL, 0)) { + return; + } + env->apic_bus_freq = bus_freq; } -static void hvf_region_add(MemoryListener *listener, - MemoryRegionSection *section) +static inline bool tsc_is_known(CPUX86State *env) { - hvf_set_phys_mem(section, true); + return env->tsc_khz != 0; } -static void hvf_region_del(MemoryListener *listener, - MemoryRegionSection *section) +static inline bool apic_bus_freq_is_known(CPUX86State *env) { - hvf_set_phys_mem(section, false); -} - -static MemoryListener hvf_memory_listener = { - .priority = 10, - .region_add = hvf_region_add, - .region_del = hvf_region_del, - .log_start = hvf_log_start, - .log_stop = hvf_log_stop, - .log_sync = hvf_log_sync, -}; - -void hvf_reset_vcpu(CPUState *cpu) { - - /* TODO: this shouldn't be needed; there is already a call to - * cpu_synchronize_all_post_reset in vl.c - */ - wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, 0); - macvm_set_cr0(cpu->hvf_fd, 0x60000010); - - wvmcs(cpu->hvf_fd, VMCS_CR4_MASK, CR4_VMXE_MASK); - wvmcs(cpu->hvf_fd, VMCS_CR4_SHADOW, 0x0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR4, CR4_VMXE_MASK); - - /* set VMCS guest state fields */ - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_SELECTOR, 0xf000); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_ACCESS_RIGHTS, 0x9b); - wvmcs(cpu->hvf_fd, VMCS_GUEST_CS_BASE, 0xffff0000); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_DS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_ES_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_LIMIT, 0xffff); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_ACCESS_RIGHTS, 0x93); - wvmcs(cpu->hvf_fd, VMCS_GUEST_SS_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_ACCESS_RIGHTS, 0x10000); - wvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_SELECTOR, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_ACCESS_RIGHTS, 0x83); - wvmcs(cpu->hvf_fd, VMCS_GUEST_TR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE, 0); - - wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT, 0); - wvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE, 0); - - /*wvmcs(cpu->hvf_fd, VMCS_GUEST_CR2, 0x0);*/ - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, 0x0); - - wreg(cpu->hvf_fd, HV_X86_RIP, 0xfff0); - wreg(cpu->hvf_fd, HV_X86_RDX, 0x623); - wreg(cpu->hvf_fd, HV_X86_RFLAGS, 0x2); - wreg(cpu->hvf_fd, HV_X86_RSP, 0x0); - wreg(cpu->hvf_fd, HV_X86_RAX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RBX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RCX, 0x0); - wreg(cpu->hvf_fd, HV_X86_RSI, 0x0); - wreg(cpu->hvf_fd, HV_X86_RDI, 0x0); - wreg(cpu->hvf_fd, HV_X86_RBP, 0x0); - - for (int i = 0; i < 8; i++) { - wreg(cpu->hvf_fd, HV_X86_R8 + i, 0x0); - } - - hv_vm_sync_tsc(0); - cpu->halted = 0; - hv_vcpu_invalidate_tlb(cpu->hvf_fd); - hv_vcpu_flush(cpu->hvf_fd); + return env->apic_bus_freq != 0; } -void hvf_vcpu_destroy(CPUState *cpu) +void hvf_kick_vcpu_thread(CPUState *cpu) { - hv_return_t ret = hv_vcpu_destroy((hv_vcpuid_t)cpu->hvf_fd); - assert_hvf_ok(ret); + cpus_kick_thread(cpu); } -static void dummy_signal(int sig) +int hvf_arch_init(void) { + return 0; } -int hvf_init_vcpu(CPUState *cpu) +int hvf_arch_init_vcpu(CPUState *cpu) { - X86CPU *x86cpu = X86_CPU(cpu); CPUX86State *env = &x86cpu->env; - int r; - - /* init cpu signals */ - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); + uint64_t reqCap; init_emu(); init_decoder(); hvf_state->hvf_caps = g_new0(struct hvf_vcpu_caps, 1); - env->hvf_emul = g_new0(HVFX86EmulatorState, 1); + env->hvf_mmio_buf = g_new(char, 4096); - r = hv_vcpu_create((hv_vcpuid_t *)&cpu->hvf_fd, HV_VCPU_DEFAULT); - cpu->vcpu_dirty = 1; - assert_hvf_ok(r); + if (x86cpu->vmware_cpuid_freq) { + init_tsc_freq(env); + init_apic_bus_freq(env); + + if (!tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + error_report("vmware-cpuid-freq: feature couldn't be enabled"); + } + } if (hv_vmx_read_capability(HV_VMX_CAP_PINBASED, &hvf_state->hvf_caps->vmx_cap_pinbased)) { @@ -561,52 +256,59 @@ int hvf_init_vcpu(CPUState *cpu) } /* set VMCS control fields */ - wvmcs(cpu->hvf_fd, VMCS_PIN_BASED_CTLS, + wvmcs(cpu->accel->fd, VMCS_PIN_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_pinbased, - VMCS_PIN_BASED_CTLS_EXTINT | - VMCS_PIN_BASED_CTLS_NMI | - VMCS_PIN_BASED_CTLS_VNMI)); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, + VMCS_PIN_BASED_CTLS_EXTINT | + VMCS_PIN_BASED_CTLS_NMI | + VMCS_PIN_BASED_CTLS_VNMI)); + wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased, - VMCS_PRI_PROC_BASED_CTLS_HLT | - VMCS_PRI_PROC_BASED_CTLS_MWAIT | - VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | - VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | + VMCS_PRI_PROC_BASED_CTLS_HLT | + VMCS_PRI_PROC_BASED_CTLS_MWAIT | + VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET | + VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW) | VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL); - wvmcs(cpu->hvf_fd, VMCS_SEC_PROC_BASED_CTLS, - cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, - VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES)); - wvmcs(cpu->hvf_fd, VMCS_ENTRY_CTLS, cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, - 0)); - wvmcs(cpu->hvf_fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ + reqCap = VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES; + + /* Is RDTSCP support in CPUID? If so, enable it in the VMCS. */ + if (hvf_get_supported_cpuid(0x80000001, 0, R_EDX) & CPUID_EXT2_RDTSCP) { + reqCap |= VMCS_PRI_PROC_BASED2_CTLS_RDTSCP; + } + + wvmcs(cpu->accel->fd, VMCS_SEC_PROC_BASED_CTLS, + cap2ctrl(hvf_state->hvf_caps->vmx_cap_procbased2, reqCap)); - wvmcs(cpu->hvf_fd, VMCS_TPR_THRESHOLD, 0); + wvmcs(cpu->accel->fd, VMCS_ENTRY_CTLS, + cap2ctrl(hvf_state->hvf_caps->vmx_cap_entry, 0)); + wvmcs(cpu->accel->fd, VMCS_EXCEPTION_BITMAP, 0); /* Double fault */ - hvf_reset_vcpu(cpu); + wvmcs(cpu->accel->fd, VMCS_TPR_THRESHOLD, 0); x86cpu = X86_CPU(cpu); - x86cpu->env.xsave_buf = qemu_memalign(4096, 4096); - - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_STAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_LSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_CSTAR, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FMASK, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_FSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_GSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_KERNELGSBASE, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_TSC_AUX, 1); - /*hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_TSC, 1);*/ - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_CS, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_EIP, 1); - hv_vcpu_enable_native_msr(cpu->hvf_fd, MSR_IA32_SYSENTER_ESP, 1); + x86cpu->env.xsave_buf_len = 4096; + x86cpu->env.xsave_buf = qemu_memalign(4096, x86cpu->env.xsave_buf_len); - return 0; -} + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(hvf_get_supported_cpuid(0xd, 0, R_ECX) <= x86cpu->env.xsave_buf_len); + + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_STAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_LSTAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_CSTAR, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FMASK, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_FSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_GSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_KERNELGSBASE, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_TSC_AUX, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_TSC, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_CS, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_EIP, 1); + hv_vcpu_enable_native_msr(cpu->accel->fd, MSR_IA32_SYSENTER_ESP, 1); -void hvf_disable(int shouldDisable) -{ - hvf_disabled = shouldDisable; + return 0; } static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_info) @@ -614,9 +316,13 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - env->exception_injected = -1; + env->exception_nr = -1; + env->exception_pending = 0; + env->exception_injected = 0; env->interrupt_injected = -1; env->nmi_injected = false; + env->ins_len = 0; + env->has_error_code = false; if (idtvec_info & VMCS_IDT_VEC_VALID) { switch (idtvec_info & VMCS_IDT_VEC_TYPE) { case VMCS_IDT_VEC_HWINTR: @@ -628,7 +334,8 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in break; case VMCS_IDT_VEC_HWEXCEPTION: case VMCS_IDT_VEC_SWEXCEPTION: - env->exception_injected = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_nr = idtvec_info & VMCS_IDT_VEC_VECNUM; + env->exception_injected = 1; break; case VMCS_IDT_VEC_PRIV_SWEXCEPTION: default: @@ -638,18 +345,18 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in (idtvec_info & VMCS_IDT_VEC_TYPE) == VMCS_IDT_VEC_SWINTR) { env->ins_len = ins_len; } - if (idtvec_info & VMCS_INTR_DEL_ERRCODE) { + if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) { env->has_error_code = true; - env->error_code = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_ERROR); + env->error_code = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_ERROR); } } - if ((rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if ((rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) & VMCS_INTERRUPTIBILITY_NMI_BLOCKING)) { env->hflags2 |= HF2_NMI_MASK; } else { env->hflags2 &= ~HF2_NMI_MASK; } - if (rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY) & + if (rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY) & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { env->hflags |= HF_INHIBIT_IRQ_MASK; @@ -658,6 +365,48 @@ static void hvf_store_events(CPUState *cpu, uint32_t ins_len, uint64_t idtvec_in } } +static void hvf_cpu_x86_cpuid(CPUX86State *env, uint32_t index, uint32_t count, + uint32_t *eax, uint32_t *ebx, + uint32_t *ecx, uint32_t *edx) +{ + /* + * A wrapper extends cpu_x86_cpuid with 0x40000000 and 0x40000010 leafs, + * leafs 0x40000001-0x4000000F are filled with zeros + * Provides vmware-cpuid-freq support to hvf + * + * Note: leaf 0x40000000 not exposes HVF, + * leaving hypervisor signature empty + */ + + if (index < 0x40000000 || index > 0x40000010 || + !tsc_is_known(env) || !apic_bus_freq_is_known(env)) { + + cpu_x86_cpuid(env, index, count, eax, ebx, ecx, edx); + return; + } + + switch (index) { + case 0x40000000: + *eax = 0x40000010; /* Max available cpuid leaf */ + *ebx = 0; /* Leave signature empty */ + *ecx = 0; + *edx = 0; + break; + case 0x40000010: + *eax = env->tsc_khz; + *ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ + *ecx = 0; + *edx = 0; + break; + default: + *eax = 0; + *ebx = 0; + *ecx = 0; + *edx = 0; + break; + } +} + int hvf_vcpu_exec(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); @@ -665,8 +414,6 @@ int hvf_vcpu_exec(CPUState *cpu) int ret = 0; uint64_t rip = 0; - cpu->halted = 0; - if (hvf_process_events(cpu)) { return EXCP_HLT; } @@ -682,29 +429,28 @@ int hvf_vcpu_exec(CPUState *cpu) } vmx_update_tpr(cpu); - qemu_mutex_unlock_iothread(); + bql_unlock(); if (!cpu_is_bsp(X86_CPU(cpu)) && cpu->halted) { - qemu_mutex_lock_iothread(); + bql_lock(); return EXCP_HLT; } - hv_return_t r = hv_vcpu_run(cpu->hvf_fd); + hv_return_t r = hv_vcpu_run(cpu->accel->fd); assert_hvf_ok(r); /* handle VMEXIT */ - uint64_t exit_reason = rvmcs(cpu->hvf_fd, VMCS_EXIT_REASON); - uint64_t exit_qual = rvmcs(cpu->hvf_fd, VMCS_EXIT_QUALIFICATION); - uint32_t ins_len = (uint32_t)rvmcs(cpu->hvf_fd, + uint64_t exit_reason = rvmcs(cpu->accel->fd, VMCS_EXIT_REASON); + uint64_t exit_qual = rvmcs(cpu->accel->fd, VMCS_EXIT_QUALIFICATION); + uint32_t ins_len = (uint32_t)rvmcs(cpu->accel->fd, VMCS_EXIT_INSTRUCTION_LENGTH); - uint64_t idtvec_info = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t idtvec_info = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO); hvf_store_events(cpu, ins_len, idtvec_info); - rip = rreg(cpu->hvf_fd, HV_X86_RIP); - RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); - env->eflags = RFLAGS(env); + rip = rreg(cpu->accel->fd, HV_X86_RIP); + env->eflags = rreg(cpu->accel->fd, HV_X86_RFLAGS); - qemu_mutex_lock_iothread(); + bql_lock(); update_apic_tpr(cpu); current_cpu = cpu; @@ -714,11 +460,12 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_HLT: { macvm_set_rip(cpu, rip + ins_len); if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (EFLAGS(env) & IF_MASK)) + (env->eflags & IF_MASK)) && !(cpu->interrupt_request & CPU_INTERRUPT_NMI) && !(idtvec_info & VMCS_IDT_VEC_VALID)) { cpu->halted = 1; ret = EXCP_HLT; + break; } ret = EXCP_INTERRUPT; break; @@ -727,25 +474,23 @@ int hvf_vcpu_exec(CPUState *cpu) ret = EXCP_INTERRUPT; break; } - /* Need to check if MMIO or unmmaped fault */ + /* Need to check if MMIO or unmapped fault */ case EXIT_REASON_EPT_FAULT: { hvf_slot *slot; - uint64_t gpa = rvmcs(cpu->hvf_fd, VMCS_GUEST_PHYSICAL_ADDRESS); + uint64_t gpa = rvmcs(cpu->accel->fd, VMCS_GUEST_PHYSICAL_ADDRESS); if (((idtvec_info & VMCS_IDT_VEC_VALID) == 0) && ((exit_qual & EXIT_QUAL_NMIUDTI) != 0)) { vmx_set_nmi_blocking(cpu); } - slot = hvf_find_overlap_slot(gpa, gpa); + slot = hvf_find_overlap_slot(gpa, 1); /* mmio */ if (ept_emulation_fault(slot, gpa, exit_qual)) { struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); exec_instruction(env, &decode); store_regs(cpu); @@ -774,11 +519,11 @@ int hvf_vcpu_exec(CPUState *cpu) } else { RAX(env) = (uint64_t)val; } - RIP(env) += ins_len; + env->eip += ins_len; store_regs(cpu); break; } else if (!string && !in) { - RAX(env) = rreg(cpu->hvf_fd, HV_X86_RAX); + RAX(env) = rreg(cpu->accel->fd, HV_X86_RAX); hvf_handle_io(env, port, &RAX(env), 1, size, 1); macvm_set_rip(cpu, rip + ins_len); break; @@ -786,8 +531,6 @@ int hvf_vcpu_exec(CPUState *cpu) struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); assert(ins_len == decode.len); exec_instruction(env, &decode); @@ -796,17 +539,21 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_CPUID: { - uint32_t rax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t rbx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RBX); - uint32_t rcx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t rdx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); - - cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); + uint32_t rax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX); + uint32_t rbx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RBX); + uint32_t rcx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX); + uint32_t rdx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX); + + if (rax == 1) { + /* CPUID1.ecx.OSXSAVE needs to know CR4 */ + env->cr[4] = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4); + } + hvf_cpu_x86_cpuid(env, rax, rcx, &rax, &rbx, &rcx, &rdx); - wreg(cpu->hvf_fd, HV_X86_RAX, rax); - wreg(cpu->hvf_fd, HV_X86_RBX, rbx); - wreg(cpu->hvf_fd, HV_X86_RCX, rcx); - wreg(cpu->hvf_fd, HV_X86_RDX, rdx); + wreg(cpu->accel->fd, HV_X86_RAX, rax); + wreg(cpu->accel->fd, HV_X86_RBX, rbx); + wreg(cpu->accel->fd, HV_X86_RCX, rcx); + wreg(cpu->accel->fd, HV_X86_RDX, rdx); macvm_set_rip(cpu, rip + ins_len); break; @@ -814,16 +561,16 @@ int hvf_vcpu_exec(CPUState *cpu) case EXIT_REASON_XSETBV: { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - uint32_t eax = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RAX); - uint32_t ecx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RCX); - uint32_t edx = (uint32_t)rreg(cpu->hvf_fd, HV_X86_RDX); + uint32_t eax = (uint32_t)rreg(cpu->accel->fd, HV_X86_RAX); + uint32_t ecx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RCX); + uint32_t edx = (uint32_t)rreg(cpu->accel->fd, HV_X86_RDX); if (ecx) { macvm_set_rip(cpu, rip + ins_len); break; } env->xcr0 = ((uint64_t)edx << 32) | eax; - wreg(cpu->hvf_fd, HV_X86_XCR0, env->xcr0 | 1); + wreg(cpu->accel->fd, HV_X86_XCR0, env->xcr0 | 1); macvm_set_rip(cpu, rip + ins_len); break; } @@ -844,11 +591,11 @@ int hvf_vcpu_exec(CPUState *cpu) { load_regs(cpu); if (exit_reason == EXIT_REASON_RDMSR) { - simulate_rdmsr(cpu); + simulate_rdmsr(env); } else { - simulate_wrmsr(cpu); + simulate_wrmsr(env); } - RIP(env) += rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); + env->eip += ins_len; store_regs(cpu); break; } @@ -862,11 +609,11 @@ int hvf_vcpu_exec(CPUState *cpu) switch (cr) { case 0x0: { - macvm_set_cr0(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr0(cpu->accel->fd, RRX(env, reg)); break; } case 4: { - macvm_set_cr4(cpu->hvf_fd, RRX(env, reg)); + macvm_set_cr4(cpu->accel->fd, RRX(env, reg)); break; } case 8: { @@ -884,7 +631,7 @@ int hvf_vcpu_exec(CPUState *cpu) error_report("Unrecognized CR %d", cr); abort(); } - RIP(env) += ins_len; + env->eip += ins_len; store_regs(cpu); break; } @@ -892,8 +639,6 @@ int hvf_vcpu_exec(CPUState *cpu) struct x86_decode decode; load_regs(cpu); - env->hvf_emul->fetch_rip = rip; - decode_instruction(env, &decode); exec_instruction(env, &decode); store_regs(cpu); @@ -904,7 +649,7 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_TASK_SWITCH: { - uint64_t vinfo = rvmcs(cpu->hvf_fd, VMCS_IDT_VECTORING_INFO); + uint64_t vinfo = rvmcs(cpu->accel->fd, VMCS_IDT_VECTORING_INFO); x68_segment_selector sel = {.sel = exit_qual & 0xffff}; vmx_handle_task_switch(cpu, sel, (exit_qual >> 30) & 0x3, vinfo & VMCS_INTR_VALID, vinfo & VECTORING_INFO_VECTOR_MASK, vinfo @@ -917,12 +662,13 @@ int hvf_vcpu_exec(CPUState *cpu) break; } case EXIT_REASON_RDPMC: - wreg(cpu->hvf_fd, HV_X86_RAX, 0); - wreg(cpu->hvf_fd, HV_X86_RDX, 0); + wreg(cpu->accel->fd, HV_X86_RAX, 0); + wreg(cpu->accel->fd, HV_X86_RDX, 0); macvm_set_rip(cpu, rip + ins_len); break; case VMX_REASON_VMCALL: - env->exception_injected = EXCP0D_GPF; + env->exception_nr = EXCP0D_GPF; + env->exception_injected = 1; env->has_error_code = true; env->error_code = 0; break; @@ -934,49 +680,35 @@ int hvf_vcpu_exec(CPUState *cpu) return ret; } -static bool hvf_allowed; +int hvf_arch_insert_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} + +int hvf_arch_remove_sw_breakpoint(CPUState *cpu, struct hvf_sw_breakpoint *bp) +{ + return -ENOSYS; +} -static int hvf_accel_init(MachineState *ms) +int hvf_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) { - int x; - hv_return_t ret; - HVFState *s; - - hvf_disable(0); - ret = hv_vm_create(HV_VM_DEFAULT); - assert_hvf_ok(ret); - - s = g_new0(HVFState, 1); - - s->num_slots = 32; - for (x = 0; x < s->num_slots; ++x) { - s->slots[x].size = 0; - s->slots[x].slot_id = x; - } - - hvf_state = s; - cpu_interrupt_handler = hvf_handle_interrupt; - memory_listener_register(&hvf_memory_listener, &address_space_memory); - return 0; + return -ENOSYS; } -static void hvf_accel_class_init(ObjectClass *oc, void *data) +int hvf_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) { - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "HVF"; - ac->init_machine = hvf_accel_init; - ac->allowed = &hvf_allowed; + return -ENOSYS; } -static const TypeInfo hvf_accel_type = { - .name = TYPE_HVF_ACCEL, - .parent = TYPE_ACCEL, - .class_init = hvf_accel_class_init, -}; +void hvf_arch_remove_all_hw_breakpoints(void) +{ +} -static void hvf_type_init(void) +void hvf_arch_update_guest_debug(CPUState *cpu) { - type_register_static(&hvf_accel_type); } -type_init(hvf_type_init); +bool hvf_arch_supports_guest_debug(void) +{ + return false; +} diff --git a/target/i386/hvf/meson.build b/target/i386/hvf/meson.build new file mode 100644 index 0000000000..05c3c8cf18 --- /dev/null +++ b/target/i386/hvf/meson.build @@ -0,0 +1,13 @@ +i386_system_ss.add(when: [hvf, 'CONFIG_HVF'], if_true: files( + 'hvf.c', + 'x86.c', + 'x86_cpuid.c', + 'x86_decode.c', + 'x86_descr.c', + 'x86_emu.c', + 'x86_flags.c', + 'x86_mmu.c', + 'x86_task.c', + 'x86hvf.c', + 'hvf-cpu.c', +)) diff --git a/target/i386/hvf/panic.h b/target/i386/hvf/panic.h index 411ef43a5b..a3eabebbb4 100644 --- a/target/i386/hvf/panic.h +++ b/target/i386/hvf/panic.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/target/i386/hvf/vmcs.h b/target/i386/hvf/vmcs.h index 2a8c0424a5..aee6f75dfd 100644 --- a/target/i386/hvf/vmcs.h +++ b/target/i386/hvf/vmcs.h @@ -26,8 +26,8 @@ * $FreeBSD$ */ -#ifndef _VMCS_H_ -#define _VMCS_H_ +#ifndef VMCS_H +#define VMCS_H #include <Hypervisor/hv.h> #include <Hypervisor/hv_vmx.h> @@ -330,7 +330,7 @@ #define EPT_VIOLATION_DATA_WRITE (1UL << 1) #define EPT_VIOLATION_INST_FETCH (1UL << 2) #define EPT_VIOLATION_GPA_READABLE (1UL << 3) -#define EPT_VIOLATION_GPA_WRITEABLE (1UL << 4) +#define EPT_VIOLATION_GPA_WRITABLE (1UL << 4) #define EPT_VIOLATION_GPA_EXECUTABLE (1UL << 5) #define EPT_VIOLATION_GLA_VALID (1UL << 7) #define EPT_VIOLATION_XLAT_VALID (1UL << 8) @@ -354,7 +354,7 @@ #define VMCS_PRI_PROC_BASED_CTLS_TSC_OFFSET (1 << 3) #define VMCS_PRI_PROC_BASED_CTLS_HLT (1 << 7) #define VMCS_PRI_PROC_BASED_CTLS_MWAIT (1 << 10) -#define VMCS_PRI_PROC_BASED_CTLS_TSC (1 << 12) +#define VMCS_PRI_PROC_BASED_CTLS_RDTSC (1 << 12) #define VMCS_PRI_PROC_BASED_CTLS_CR8_LOAD (1 << 19) #define VMCS_PRI_PROC_BASED_CTLS_CR8_STORE (1 << 20) #define VMCS_PRI_PROC_BASED_CTLS_TPR_SHADOW (1 << 21) @@ -362,6 +362,7 @@ #define VMCS_PRI_PROC_BASED_CTLS_SEC_CONTROL (1 << 31) #define VMCS_PRI_PROC_BASED2_CTLS_APIC_ACCESSES (1 << 0) +#define VMCS_PRI_PROC_BASED2_CTLS_RDTSCP (1 << 3) #define VMCS_PRI_PROC_BASED2_CTLS_X2APIC (1 << 4) enum task_switch_reason { diff --git a/target/i386/hvf/vmx.h b/target/i386/hvf/vmx.h index 5dc52ecad6..0fffcfa46c 100644 --- a/target/i386/hvf/vmx.h +++ b/target/i386/hvf/vmx.h @@ -8,7 +8,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -30,6 +30,8 @@ #include "vmcs.h" #include "cpu.h" #include "x86.h" +#include "sysemu/hvf.h" +#include "sysemu/hvf_int.h" #include "exec/address-spaces.h" @@ -78,7 +80,7 @@ static inline uint64_t cap2ctrl(uint64_t cap, uint64_t ctrl) #define AR_TYPE_ACCESSES_MASK 1 #define AR_TYPE_READABLE_MASK (1 << 1) -#define AR_TYPE_WRITEABLE_MASK (1 << 2) +#define AR_TYPE_WRITABLE_MASK (1 << 2) #define AR_TYPE_CODE_MASK (1 << 3) #define AR_TYPE_MASK 0x0f #define AR_TYPE_BUSY_64_TSS 11 @@ -121,61 +123,72 @@ static inline void macvm_set_cr0(hv_vcpuid_t vcpu, uint64_t cr0) uint64_t pdpte[4] = {0, 0, 0, 0}; uint64_t efer = rvmcs(vcpu, VMCS_GUEST_IA32_EFER); uint64_t old_cr0 = rvmcs(vcpu, VMCS_GUEST_CR0); + uint64_t changed_cr0 = old_cr0 ^ cr0; + uint64_t mask = CR0_PG_MASK | CR0_CD_MASK | CR0_NW_MASK | + CR0_NE_MASK | CR0_ET_MASK; + uint64_t entry_ctls; - if ((cr0 & CR0_PG) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE) && + if ((cr0 & CR0_PG_MASK) && (rvmcs(vcpu, VMCS_GUEST_CR4) & CR4_PAE_MASK) && !(efer & MSR_EFER_LME)) { - address_space_rw(&address_space_memory, - rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, - MEMTXATTRS_UNSPECIFIED, - (uint8_t *)pdpte, 32, 0); - } - - for (i = 0; i < 4; i++) { - wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); + address_space_read(&address_space_memory, + rvmcs(vcpu, VMCS_GUEST_CR3) & ~0x1f, + MEMTXATTRS_UNSPECIFIED, pdpte, 32); + /* Only set PDPTE when appropriate. */ + for (i = 0; i < 4; i++) { + wvmcs(vcpu, VMCS_GUEST_PDPTE0 + i * 2, pdpte[i]); + } } - wvmcs(vcpu, VMCS_CR0_MASK, CR0_CD | CR0_NE | CR0_PG); + wvmcs(vcpu, VMCS_CR0_MASK, mask); wvmcs(vcpu, VMCS_CR0_SHADOW, cr0); - cr0 &= ~CR0_CD; - wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE | CR0_ET); - if (efer & MSR_EFER_LME) { - if (!(old_cr0 & CR0_PG) && (cr0 & CR0_PG)) { - enter_long_mode(vcpu, cr0, efer); - } - if (/*(old_cr0 & CR0_PG) &&*/ !(cr0 & CR0_PG)) { - exit_long_mode(vcpu, cr0, efer); + if (changed_cr0 & CR0_PG_MASK) { + if (cr0 & CR0_PG_MASK) { + enter_long_mode(vcpu, cr0, efer); + } else { + exit_long_mode(vcpu, cr0, efer); + } } + } else { + entry_ctls = rvmcs(vcpu, VMCS_ENTRY_CTLS); + wvmcs(vcpu, VMCS_ENTRY_CTLS, entry_ctls & ~VM_ENTRY_GUEST_LMA); } + /* Filter new CR0 after we are finished examining it above. */ + cr0 = (cr0 & ~(mask & ~CR0_PG_MASK)); + wvmcs(vcpu, VMCS_GUEST_CR0, cr0 | CR0_NE_MASK | CR0_ET_MASK); + hv_vcpu_invalidate_tlb(vcpu); - hv_vcpu_flush(vcpu); } static inline void macvm_set_cr4(hv_vcpuid_t vcpu, uint64_t cr4) { - uint64_t guest_cr4 = cr4 | CR4_VMXE; + uint64_t guest_cr4 = cr4 | CR4_VMXE_MASK; wvmcs(vcpu, VMCS_GUEST_CR4, guest_cr4); wvmcs(vcpu, VMCS_CR4_SHADOW, cr4); + wvmcs(vcpu, VMCS_CR4_MASK, CR4_VMXE_MASK); hv_vcpu_invalidate_tlb(vcpu); - hv_vcpu_flush(vcpu); } static inline void macvm_set_rip(CPUState *cpu, uint64_t rip) { + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; uint64_t val; /* BUG, should take considering overlap.. */ - wreg(cpu->hvf_fd, HV_X86_RIP, rip); + wreg(cpu->accel->fd, HV_X86_RIP, rip); + env->eip = rip; /* after moving forward in rip, we need to clean INTERRUPTABILITY */ - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + val = rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY); if (val & (VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)) { - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, + env->hflags &= ~HF_INHIBIT_IRQ_MASK; + wvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY, val & ~(VMCS_INTERRUPTIBILITY_STI_BLOCKING | VMCS_INTERRUPTIBILITY_MOVSS_BLOCKING)); } @@ -187,9 +200,9 @@ static inline void vmx_clear_nmi_blocking(CPUState *cpu) CPUX86State *env = &x86_cpu->env; env->hflags2 &= ~HF2_NMI_MASK; - uint32_t gi = (uint32_t) rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + uint32_t gi = (uint32_t) rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY); gi &= ~VMCS_INTERRUPTIBILITY_NMI_BLOCKING; - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi); + wvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY, gi); } static inline void vmx_set_nmi_blocking(CPUState *cpu) @@ -198,16 +211,16 @@ static inline void vmx_set_nmi_blocking(CPUState *cpu) CPUX86State *env = &x86_cpu->env; env->hflags2 |= HF2_NMI_MASK; - uint32_t gi = (uint32_t)rvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY); + uint32_t gi = (uint32_t)rvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY); gi |= VMCS_INTERRUPTIBILITY_NMI_BLOCKING; - wvmcs(cpu->hvf_fd, VMCS_GUEST_INTERRUPTIBILITY, gi); + wvmcs(cpu->accel->fd, VMCS_GUEST_INTERRUPTIBILITY, gi); } static inline void vmx_set_nmi_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val | + val = rvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val | VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING); } @@ -216,8 +229,8 @@ static inline void vmx_clear_nmi_window_exiting(CPUState *cpu) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val & + val = rvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cpu->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val & ~VMCS_PRI_PROC_BASED_CTLS_NMI_WINDOW_EXITING); } diff --git a/target/i386/hvf/x86.c b/target/i386/hvf/x86.c index 3afcedc7fc..80e36136d0 100644 --- a/target/i386/hvf/x86.c +++ b/target/i386/hvf/x86.c @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -19,7 +19,6 @@ #include "qemu/osdep.h" #include "cpu.h" -#include "qemu-common.h" #include "x86_decode.h" #include "x86_emu.h" #include "vmcs.h" @@ -47,7 +46,7 @@ return ar; }*/ -bool x86_read_segment_descriptor(struct CPUState *cpu, +bool x86_read_segment_descriptor(CPUState *cpu, struct x86_segment_descriptor *desc, x68_segment_selector sel) { @@ -62,11 +61,11 @@ bool x86_read_segment_descriptor(struct CPUState *cpu, } if (GDT_SEL == sel.ti) { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT); + base = rvmcs(cpu->accel->fd, VMCS_GUEST_GDTR_BASE); + limit = rvmcs(cpu->accel->fd, VMCS_GUEST_GDTR_LIMIT); } else { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT); + base = rvmcs(cpu->accel->fd, VMCS_GUEST_LDTR_BASE); + limit = rvmcs(cpu->accel->fd, VMCS_GUEST_LDTR_LIMIT); } if (sel.index * 8 >= limit) { @@ -77,7 +76,7 @@ bool x86_read_segment_descriptor(struct CPUState *cpu, return true; } -bool x86_write_segment_descriptor(struct CPUState *cpu, +bool x86_write_segment_descriptor(CPUState *cpu, struct x86_segment_descriptor *desc, x68_segment_selector sel) { @@ -85,11 +84,11 @@ bool x86_write_segment_descriptor(struct CPUState *cpu, uint32_t limit; if (GDT_SEL == sel.ti) { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_GDTR_LIMIT); + base = rvmcs(cpu->accel->fd, VMCS_GUEST_GDTR_BASE); + limit = rvmcs(cpu->accel->fd, VMCS_GUEST_GDTR_LIMIT); } else { - base = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_BASE); - limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_LDTR_LIMIT); + base = rvmcs(cpu->accel->fd, VMCS_GUEST_LDTR_BASE); + limit = rvmcs(cpu->accel->fd, VMCS_GUEST_LDTR_LIMIT); } if (sel.index * 8 >= limit) { @@ -100,11 +99,11 @@ bool x86_write_segment_descriptor(struct CPUState *cpu, return true; } -bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, +bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc, int gate) { - target_ulong base = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_BASE); - uint32_t limit = rvmcs(cpu->hvf_fd, VMCS_GUEST_IDTR_LIMIT); + target_ulong base = rvmcs(cpu->accel->fd, VMCS_GUEST_IDTR_BASE); + uint32_t limit = rvmcs(cpu->accel->fd, VMCS_GUEST_IDTR_LIMIT); memset(idt_desc, 0, sizeof(*idt_desc)); if (gate * 8 >= limit) { @@ -116,30 +115,30 @@ bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, return true; } -bool x86_is_protected(struct CPUState *cpu) +bool x86_is_protected(CPUState *cpu) { - uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); - return cr0 & CR0_PE; + uint64_t cr0 = rvmcs(cpu->accel->fd, VMCS_GUEST_CR0); + return cr0 & CR0_PE_MASK; } -bool x86_is_real(struct CPUState *cpu) +bool x86_is_real(CPUState *cpu) { return !x86_is_protected(cpu); } -bool x86_is_v8086(struct CPUState *cpu) +bool x86_is_v8086(CPUState *cpu) { X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - return x86_is_protected(cpu) && (RFLAGS(env) & RFLAGS_VM); + return x86_is_protected(cpu) && (env->eflags & VM_MASK); } -bool x86_is_long_mode(struct CPUState *cpu) +bool x86_is_long_mode(CPUState *cpu) { - return rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA; + return rvmcs(cpu->accel->fd, VMCS_GUEST_IA32_EFER) & MSR_EFER_LMA; } -bool x86_is_long64_mode(struct CPUState *cpu) +bool x86_is_long64_mode(CPUState *cpu) { struct vmx_segment desc; vmx_read_segment_descriptor(cpu, &desc, R_CS); @@ -147,24 +146,24 @@ bool x86_is_long64_mode(struct CPUState *cpu) return x86_is_long_mode(cpu) && ((desc.ar >> 13) & 1); } -bool x86_is_paging_mode(struct CPUState *cpu) +bool x86_is_paging_mode(CPUState *cpu) { - uint64_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); - return cr0 & CR0_PG; + uint64_t cr0 = rvmcs(cpu->accel->fd, VMCS_GUEST_CR0); + return cr0 & CR0_PG_MASK; } -bool x86_is_pae_enabled(struct CPUState *cpu) +bool x86_is_pae_enabled(CPUState *cpu) { - uint64_t cr4 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR4); - return cr4 & CR4_PAE; + uint64_t cr4 = rvmcs(cpu->accel->fd, VMCS_GUEST_CR4); + return cr4 & CR4_PAE_MASK; } -target_ulong linear_addr(struct CPUState *cpu, target_ulong addr, X86Seg seg) +target_ulong linear_addr(CPUState *cpu, target_ulong addr, X86Seg seg) { return vmx_read_segment_base(cpu, seg) + addr; } -target_ulong linear_addr_size(struct CPUState *cpu, target_ulong addr, int size, +target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size, X86Seg seg) { switch (size) { @@ -180,7 +179,7 @@ target_ulong linear_addr_size(struct CPUState *cpu, target_ulong addr, int size, return linear_addr(cpu, addr, seg); } -target_ulong linear_rip(struct CPUState *cpu, target_ulong rip) +target_ulong linear_rip(CPUState *cpu, target_ulong rip) { return linear_addr(cpu, rip, R_CS); } diff --git a/target/i386/hvf/x86.h b/target/i386/hvf/x86.h index 103ec0976c..3570f29aa9 100644 --- a/target/i386/hvf/x86.h +++ b/target/i386/hvf/x86.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,7 +17,7 @@ */ #ifndef HVF_X86_H -#define HVF_X86_H 1 +#define HVF_X86_H typedef struct x86_register { union { @@ -42,98 +42,6 @@ typedef struct x86_register { }; } __attribute__ ((__packed__)) x86_register; -typedef enum x86_rflags { - RFLAGS_CF = (1L << 0), - RFLAGS_PF = (1L << 2), - RFLAGS_AF = (1L << 4), - RFLAGS_ZF = (1L << 6), - RFLAGS_SF = (1L << 7), - RFLAGS_TF = (1L << 8), - RFLAGS_IF = (1L << 9), - RFLAGS_DF = (1L << 10), - RFLAGS_OF = (1L << 11), - RFLAGS_IOPL = (3L << 12), - RFLAGS_NT = (1L << 14), - RFLAGS_RF = (1L << 16), - RFLAGS_VM = (1L << 17), - RFLAGS_AC = (1L << 18), - RFLAGS_VIF = (1L << 19), - RFLAGS_VIP = (1L << 20), - RFLAGS_ID = (1L << 21), -} x86_rflags; - -/* rflags register */ -typedef struct x86_reg_flags { - union { - struct { - uint64_t rflags; - }; - struct { - uint32_t eflags; - uint32_t hi32_unused1; - }; - struct { - uint32_t cf:1; - uint32_t unused1:1; - uint32_t pf:1; - uint32_t unused2:1; - uint32_t af:1; - uint32_t unused3:1; - uint32_t zf:1; - uint32_t sf:1; - uint32_t tf:1; - uint32_t ief:1; - uint32_t df:1; - uint32_t of:1; - uint32_t iopl:2; - uint32_t nt:1; - uint32_t unused4:1; - uint32_t rf:1; - uint32_t vm:1; - uint32_t ac:1; - uint32_t vif:1; - uint32_t vip:1; - uint32_t id:1; - uint32_t unused5:10; - uint32_t hi32_unused2; - }; - }; -} __attribute__ ((__packed__)) x86_reg_flags; - -typedef enum x86_reg_cr0 { - CR0_PE = (1L << 0), - CR0_MP = (1L << 1), - CR0_EM = (1L << 2), - CR0_TS = (1L << 3), - CR0_ET = (1L << 4), - CR0_NE = (1L << 5), - CR0_WP = (1L << 16), - CR0_AM = (1L << 18), - CR0_NW = (1L << 29), - CR0_CD = (1L << 30), - CR0_PG = (1L << 31), -} x86_reg_cr0; - -typedef enum x86_reg_cr4 { - CR4_VME = (1L << 0), - CR4_PVI = (1L << 1), - CR4_TSD = (1L << 2), - CR4_DE = (1L << 3), - CR4_PSE = (1L << 4), - CR4_PAE = (1L << 5), - CR4_MSE = (1L << 6), - CR4_PGE = (1L << 7), - CR4_PCE = (1L << 8), - CR4_OSFXSR = (1L << 9), - CR4_OSXMMEXCPT = (1L << 10), - CR4_VMXE = (1L << 13), - CR4_SMXE = (1L << 14), - CR4_FSGSBASE = (1L << 16), - CR4_PCIDE = (1L << 17), - CR4_OSXSAVE = (1L << 18), - CR4_SMEP = (1L << 20), -} x86_reg_cr4; - /* 16 bit Task State Segment */ typedef struct x86_tss_segment16 { uint16_t link; @@ -272,43 +180,24 @@ static inline uint32_t x86_call_gate_offset(x86_call_gate *gate) return (uint32_t)((gate->offset1 << 16) | gate->offset0); } -#define LDT_SEL 0 -#define GDT_SEL 1 +#define GDT_SEL 0 +#define LDT_SEL 1 typedef struct x68_segment_selector { union { uint16_t sel; struct { - uint16_t rpl:3; + uint16_t rpl:2; uint16_t ti:1; - uint16_t index:12; + uint16_t index:13; }; }; } __attribute__ ((__packed__)) x68_segment_selector; -typedef struct lazy_flags { - target_ulong result; - target_ulong auxbits; -} lazy_flags; - -/* Definition of hvf_x86_state is here */ -struct HVFX86EmulatorState { - int interruptable; - uint64_t fetch_rip; - uint64_t rip; - struct x86_register regs[16]; - struct x86_reg_flags rflags; - struct lazy_flags lflags; - uint8_t mmio_buf[4096]; -}; - /* useful register access macros */ -#define RIP(cpu) (cpu->hvf_emul->rip) -#define EIP(cpu) ((uint32_t)cpu->hvf_emul->rip) -#define RFLAGS(cpu) (cpu->hvf_emul->rflags.rflags) -#define EFLAGS(cpu) (cpu->hvf_emul->rflags.eflags) +#define x86_reg(cpu, reg) ((x86_register *) &cpu->regs[reg]) -#define RRX(cpu, reg) (cpu->hvf_emul->regs[reg].rrx) +#define RRX(cpu, reg) (x86_reg(cpu, reg)->rrx) #define RAX(cpu) RRX(cpu, R_EAX) #define RCX(cpu) RRX(cpu, R_ECX) #define RDX(cpu) RRX(cpu, R_EDX) @@ -326,7 +215,7 @@ struct HVFX86EmulatorState { #define R14(cpu) RRX(cpu, R_R14) #define R15(cpu) RRX(cpu, R_R15) -#define ERX(cpu, reg) (cpu->hvf_emul->regs[reg].erx) +#define ERX(cpu, reg) (x86_reg(cpu, reg)->erx) #define EAX(cpu) ERX(cpu, R_EAX) #define ECX(cpu) ERX(cpu, R_ECX) #define EDX(cpu) ERX(cpu, R_EDX) @@ -336,7 +225,7 @@ struct HVFX86EmulatorState { #define ESI(cpu) ERX(cpu, R_ESI) #define EDI(cpu) ERX(cpu, R_EDI) -#define RX(cpu, reg) (cpu->hvf_emul->regs[reg].rx) +#define RX(cpu, reg) (x86_reg(cpu, reg)->rx) #define AX(cpu) RX(cpu, R_EAX) #define CX(cpu) RX(cpu, R_ECX) #define DX(cpu) RX(cpu, R_EDX) @@ -346,43 +235,43 @@ struct HVFX86EmulatorState { #define SI(cpu) RX(cpu, R_ESI) #define DI(cpu) RX(cpu, R_EDI) -#define RL(cpu, reg) (cpu->hvf_emul->regs[reg].lx) +#define RL(cpu, reg) (x86_reg(cpu, reg)->lx) #define AL(cpu) RL(cpu, R_EAX) #define CL(cpu) RL(cpu, R_ECX) #define DL(cpu) RL(cpu, R_EDX) #define BL(cpu) RL(cpu, R_EBX) -#define RH(cpu, reg) (cpu->hvf_emul->regs[reg].hx) +#define RH(cpu, reg) (x86_reg(cpu, reg)->hx) #define AH(cpu) RH(cpu, R_EAX) #define CH(cpu) RH(cpu, R_ECX) #define DH(cpu) RH(cpu, R_EDX) #define BH(cpu) RH(cpu, R_EBX) /* deal with GDT/LDT descriptors in memory */ -bool x86_read_segment_descriptor(struct CPUState *cpu, +bool x86_read_segment_descriptor(CPUState *cpu, struct x86_segment_descriptor *desc, x68_segment_selector sel); -bool x86_write_segment_descriptor(struct CPUState *cpu, +bool x86_write_segment_descriptor(CPUState *cpu, struct x86_segment_descriptor *desc, x68_segment_selector sel); -bool x86_read_call_gate(struct CPUState *cpu, struct x86_call_gate *idt_desc, +bool x86_read_call_gate(CPUState *cpu, struct x86_call_gate *idt_desc, int gate); /* helpers */ -bool x86_is_protected(struct CPUState *cpu); -bool x86_is_real(struct CPUState *cpu); -bool x86_is_v8086(struct CPUState *cpu); -bool x86_is_long_mode(struct CPUState *cpu); -bool x86_is_long64_mode(struct CPUState *cpu); -bool x86_is_paging_mode(struct CPUState *cpu); -bool x86_is_pae_enabled(struct CPUState *cpu); +bool x86_is_protected(CPUState *cpu); +bool x86_is_real(CPUState *cpu); +bool x86_is_v8086(CPUState *cpu); +bool x86_is_long_mode(CPUState *cpu); +bool x86_is_long64_mode(CPUState *cpu); +bool x86_is_paging_mode(CPUState *cpu); +bool x86_is_pae_enabled(CPUState *cpu); enum X86Seg; -target_ulong linear_addr(struct CPUState *cpu, target_ulong addr, enum X86Seg seg); -target_ulong linear_addr_size(struct CPUState *cpu, target_ulong addr, int size, +target_ulong linear_addr(CPUState *cpu, target_ulong addr, enum X86Seg seg); +target_ulong linear_addr_size(CPUState *cpu, target_ulong addr, int size, enum X86Seg seg); -target_ulong linear_rip(struct CPUState *cpu, target_ulong rip); +target_ulong linear_rip(CPUState *cpu, target_ulong rip); static inline uint64_t rdtscp(void) { diff --git a/target/i386/hvf/x86_cpuid.c b/target/i386/hvf/x86_cpuid.c index 9874a46e92..9380b90496 100644 --- a/target/i386/hvf/x86_cpuid.c +++ b/target/i386/hvf/x86_cpuid.c @@ -7,7 +7,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -21,31 +21,28 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "cpu.h" #include "x86.h" #include "vmx.h" #include "sysemu/hvf.h" +#include "hvf-i386.h" -static uint64_t xgetbv(uint32_t xcr) +static bool xgetbv(uint32_t cpuid_ecx, uint32_t idx, uint64_t *xcr) { - uint32_t eax, edx; + uint32_t xcrl, xcrh; - __asm__ volatile ("xgetbv" - : "=a" (eax), "=d" (edx) - : "c" (xcr)); + if (cpuid_ecx & CPUID_EXT_OSXSAVE) { + /* + * The xgetbv instruction is not available to older versions of + * the assembler, so we encode the instruction manually. + */ + asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (idx)); - return (((uint64_t)edx) << 32) | eax; -} - -static bool vmx_mpx_supported() -{ - uint64_t cap_exit, cap_entry; - - hv_vmx_read_capability(HV_VMX_CAP_ENTRY, &cap_entry); - hv_vmx_read_capability(HV_VMX_CAP_EXIT, &cap_exit); + *xcr = (((uint64_t)xcrh) << 32) | xcrl; + return true; + } - return ((cap_exit & (1 << 23)) && (cap_entry & (1 << 16))); + return false; } uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, @@ -92,17 +89,15 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_7_0_EBX_CLFLUSHOPT | CPUID_7_0_EBX_CLWB | CPUID_7_0_EBX_AVX512DQ | CPUID_7_0_EBX_SHA_NI | CPUID_7_0_EBX_AVX512BW | CPUID_7_0_EBX_AVX512VL | - CPUID_7_0_EBX_INVPCID | CPUID_7_0_EBX_MPX; + CPUID_7_0_EBX_INVPCID; - if (!vmx_mpx_supported()) { - ebx &= ~CPUID_7_0_EBX_MPX; - } hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap); if (!(cap & CPU_BASED2_INVPCID)) { ebx &= ~CPUID_7_0_EBX_INVPCID; } - ecx &= CPUID_7_0_ECX_AVX512BMI | CPUID_7_0_ECX_AVX512_VPOPCNTDQ; + ecx &= CPUID_7_0_ECX_AVX512_VBMI | CPUID_7_0_ECX_AVX512_VPOPCNTDQ | + CPUID_7_0_ECX_RDPID; edx &= CPUID_7_0_EDX_AVX512_4VNNIW | CPUID_7_0_EDX_AVX512_4FMAPS; } else { ebx = 0; @@ -113,14 +108,14 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, break; case 0xD: if (idx == 0) { - uint64_t host_xcr0 = xgetbv(0); - uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | XSTATE_SSE_MASK | - XSTATE_YMM_MASK | XSTATE_BNDREGS_MASK | - XSTATE_BNDCSR_MASK | XSTATE_OPMASK_MASK | - XSTATE_ZMM_Hi256_MASK | XSTATE_Hi16_ZMM_MASK); - eax &= supp_xcr0; - if (!vmx_mpx_supported()) { - eax &= ~(XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK); + uint64_t host_xcr0; + if (xgetbv(ecx, 0, &host_xcr0)) { + uint64_t supp_xcr0 = host_xcr0 & (XSTATE_FP_MASK | + XSTATE_SSE_MASK | XSTATE_YMM_MASK | + XSTATE_BNDREGS_MASK | XSTATE_BNDCSR_MASK | + XSTATE_OPMASK_MASK | XSTATE_ZMM_Hi256_MASK | + XSTATE_Hi16_ZMM_MASK); + eax &= supp_xcr0; } } else if (idx == 1) { hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap); @@ -138,8 +133,12 @@ uint32_t hvf_get_supported_cpuid(uint32_t func, uint32_t idx, CPUID_PAT | CPUID_PSE36 | CPUID_EXT2_MMXEXT | CPUID_MMX | CPUID_FXSR | CPUID_EXT2_FXSR | CPUID_EXT2_PDPE1GB | CPUID_EXT2_3DNOWEXT | CPUID_EXT2_3DNOW | CPUID_EXT2_LM | CPUID_EXT2_RDTSCP | CPUID_EXT2_NX; + hv_vmx_read_capability(HV_VMX_CAP_PROCBASED2, &cap); + if (!(cap2ctrl(cap, CPU_BASED2_RDTSCP) & CPU_BASED2_RDTSCP)) { + edx &= ~CPUID_EXT2_RDTSCP; + } hv_vmx_read_capability(HV_VMX_CAP_PROCBASED, &cap); - if (!(cap & CPU_BASED_TSC_OFFSET)) { + if (!(cap2ctrl(cap, CPU_BASED_TSC_OFFSET) & CPU_BASED_TSC_OFFSET)) { edx &= ~CPUID_EXT2_RDTSCP; } ecx &= CPUID_EXT3_LAHF_LM | CPUID_EXT3_CMP_LEG | CPUID_EXT3_CR8LEG | diff --git a/target/i386/hvf/x86_decode.c b/target/i386/hvf/x86_decode.c index 2d7540fe7c..3728d7705e 100644 --- a/target/i386/hvf/x86_decode.c +++ b/target/i386/hvf/x86_decode.c @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,7 +18,6 @@ #include "qemu/osdep.h" -#include "qemu-common.h" #include "panic.h" #include "x86_decode.h" #include "vmx.h" @@ -29,8 +28,7 @@ static void decode_invalid(CPUX86State *env, struct x86_decode *decode) { - printf("%llx: failed to decode instruction ", env->hvf_emul->fetch_rip - - decode->len); + printf("%llx: failed to decode instruction ", env->eip); for (int i = 0; i < decode->opcode_len; i++) { printf("%x ", decode->opcode[i]); } @@ -75,8 +73,8 @@ static inline uint64_t decode_bytes(CPUX86State *env, struct x86_decode *decode, VM_PANIC_EX("%s invalid size %d\n", __func__, size); break; } - target_ulong va = linear_rip(ENV_GET_CPU(env), RIP(env)) + decode->len; - vmx_read_mem(ENV_GET_CPU(env), &val, va, size); + target_ulong va = linear_rip(env_cpu(env), env->eip) + decode->len; + vmx_read_mem(env_cpu(env), &val, va, size); decode->len += size; return val; @@ -113,7 +111,8 @@ static void decode_modrm_reg(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = decode->modrm.reg; - op->ptr = get_reg_ref(env, op->reg, decode->rex.r, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.r, + decode->operand_size); } static void decode_rax(CPUX86State *env, struct x86_decode *decode, @@ -121,7 +120,9 @@ static void decode_rax(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_EAX; - op->ptr = get_reg_ref(env, op->reg, 0, decode->operand_size); + /* Since reg is always AX, REX prefix has no impact. */ + op->ptr = get_reg_ref(env, op->reg, false, 0, + decode->operand_size); } static inline void decode_immediate(CPUX86State *env, struct x86_decode *decode, @@ -263,16 +264,16 @@ static void decode_incgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x40; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_decgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x48; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_incgroup2(CPUX86State *env, struct x86_decode *decode) @@ -288,16 +289,16 @@ static void decode_pushgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x50; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_popgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x58; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_jxx(CPUX86State *env, struct x86_decode *decode) @@ -378,16 +379,16 @@ static void decode_xchgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0x90; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_movgroup(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -402,8 +403,8 @@ static void decode_movgroup8(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[0] - 0xb0; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); decode_immediate(env, decode, &decode->op[1], decode->operand_size); } @@ -412,7 +413,8 @@ static void decode_rcx(CPUX86State *env, struct x86_decode *decode, { op->type = X86_VAR_REG; op->reg = R_ECX; - op->ptr = get_reg_ref(env, op->reg, decode->rex.b, decode->operand_size); + op->ptr = get_reg_ref(env, op->reg, decode->rex.rex, decode->rex.b, + decode->operand_size); } struct decode_tbl { @@ -451,9 +453,9 @@ struct decode_x87_tbl { struct decode_tbl invl_inst = {0x0, 0, 0, false, NULL, NULL, NULL, NULL, decode_invalid}; -struct decode_tbl _decode_tbl1[255]; -struct decode_tbl _decode_tbl2[255]; -struct decode_x87_tbl _decode_tbl3[255]; +struct decode_tbl _decode_tbl1[256]; +struct decode_tbl _decode_tbl2[256]; +struct decode_x87_tbl _decode_tbl3[256]; static void decode_x87_ins(CPUX86State *env, struct x86_decode *decode) { @@ -522,8 +524,6 @@ static void decode_sldtgroup(CPUX86State *env, struct x86_decode *decode) X86_DECODE_CMD_INVL }; decode->cmd = group[decode->modrm.reg]; - printf("%llx: decode_sldtgroup: %d\n", env->hvf_emul->fetch_rip, - decode->modrm.reg); } static void decode_lidtgroup(CPUX86State *env, struct x86_decode *decode) @@ -639,8 +639,8 @@ static void decode_bswap(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = decode->opcode[1] - 0xc8; - decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.b, - decode->operand_size); + decode->op[0].ptr = get_reg_ref(env, decode->op[0].reg, decode->rex.rex, + decode->rex.b, decode->operand_size); } static void decode_d9_4(CPUX86State *env, struct x86_decode *decode) @@ -696,15 +696,13 @@ static void decode_db_4(CPUX86State *env, struct x86_decode *decode) #define RFLAGS_MASK_NONE 0 -#define RFLAGS_MASK_OSZAPC (RFLAGS_OF | RFLAGS_SF | RFLAGS_ZF | RFLAGS_AF | \ - RFLAGS_PF | RFLAGS_CF) -#define RFLAGS_MASK_LAHF (RFLAGS_SF | RFLAGS_ZF | RFLAGS_AF | RFLAGS_PF | \ - RFLAGS_CF) -#define RFLAGS_MASK_CF (RFLAGS_CF) -#define RFLAGS_MASK_IF (RFLAGS_IF) -#define RFLAGS_MASK_TF (RFLAGS_TF) -#define RFLAGS_MASK_DF (RFLAGS_DF) -#define RFLAGS_MASK_ZF (RFLAGS_ZF) +#define RFLAGS_MASK_OSZAPC (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C) +#define RFLAGS_MASK_LAHF (CC_S | CC_Z | CC_A | CC_P | CC_C) +#define RFLAGS_MASK_CF (CC_C) +#define RFLAGS_MASK_IF (IF_MASK) +#define RFLAGS_MASK_TF (TF_MASK) +#define RFLAGS_MASK_DF (DF_MASK) +#define RFLAGS_MASK_ZF (CC_Z) struct decode_tbl _1op_inst[] = { {0x0, X86_DECODE_CMD_ADD, 1, true, decode_modrm_rm, decode_modrm_reg, NULL, @@ -1641,7 +1639,7 @@ void calc_modrm_operand16(CPUX86State *env, struct x86_decode *decode, X86Seg seg = R_DS; if (!decode->modrm.mod && 6 == decode->modrm.rm) { - op->ptr = (uint16_t)decode->displacement; + ptr = decode->displacement; goto calc_addr; } @@ -1686,38 +1684,37 @@ calc_addr: } } -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong ptr = 0; - int which = 0; if (is_extended) { reg |= R_R8; } - switch (size) { case 1: - if (is_extended || reg < 4) { - which = 1; + if (is_extended || reg < 4 || rex_present) { ptr = (target_ulong)&RL(env, reg); } else { - which = 2; ptr = (target_ulong)&RH(env, reg - 4); } break; default: - which = 3; ptr = (target_ulong)&RRX(env, reg); break; } return ptr; } -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size) +target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, + int is_extended, int size) { target_ulong val = 0; - memcpy(&val, (void *)get_reg_ref(env, reg, is_extended, size), size); + memcpy(&val, + (void *)get_reg_ref(env, reg, rex_present, is_extended, size), + size); return val; } @@ -1739,7 +1736,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, if (base_reg == R_ESP || base_reg == R_EBP) { *sel = R_SS; } - base = get_reg_val(env, decode->sib.base, decode->rex.b, addr_size); + base = get_reg_val(env, decode->sib.base, decode->rex.rex, + decode->rex.b, addr_size); } if (decode->rex.x) { @@ -1747,7 +1745,8 @@ static target_ulong get_sib_val(CPUX86State *env, struct x86_decode *decode, } if (index_reg != R_ESP) { - scaled_index = get_reg_val(env, index_reg, decode->rex.x, addr_size) << + scaled_index = get_reg_val(env, index_reg, decode->rex.rex, + decode->rex.x, addr_size) << decode->sib.scale; } return base + scaled_index; @@ -1767,8 +1766,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (4 == decode->modrm.rm) { ptr += get_sib_val(env, decode, &seg); } else if (!decode->modrm.mod && 5 == decode->modrm.rm) { - if (x86_is_long_mode(ENV_GET_CPU(env))) { - ptr += RIP(env) + decode->len; + if (x86_is_long_mode(env_cpu(env))) { + ptr += env->eip + decode->len; } else { ptr = decode->displacement; } @@ -1776,7 +1775,8 @@ void calc_modrm_operand32(CPUX86State *env, struct x86_decode *decode, if (decode->modrm.rm == R_EBP || decode->modrm.rm == R_ESP) { seg = R_SS; } - ptr += get_reg_val(env, decode->modrm.rm, decode->rex.b, addr_size); + ptr += get_reg_val(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, addr_size); } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1803,9 +1803,10 @@ void calc_modrm_operand64(CPUX86State *env, struct x86_decode *decode, if (4 == rm) { ptr = get_sib_val(env, decode, &seg) + offset; } else if (0 == mod && 5 == rm) { - ptr = RIP(env) + decode->len + (int32_t) offset; + ptr = env->eip + decode->len + (int32_t) offset; } else { - ptr = get_reg_val(env, src, decode->rex.b, 8) + (int64_t) offset; + ptr = get_reg_val(env, src, decode->rex.rex, decode->rex.b, 8) + + (int64_t) offset; } if (X86_DECODE_CMD_LEA == decode->cmd) { @@ -1822,8 +1823,8 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, if (3 == decode->modrm.mod) { op->reg = decode->modrm.reg; op->type = X86_VAR_REG; - op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.b, - decode->operand_size); + op->ptr = get_reg_ref(env, decode->modrm.rm, decode->rex.rex, + decode->rex.b, decode->operand_size); return; } @@ -1846,31 +1847,41 @@ void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, static void decode_prefix(CPUX86State *env, struct x86_decode *decode) { while (1) { + /* + * REX prefix must come after legacy prefixes. + * REX before legacy is ignored. + * Clear rex to simulate this. + */ uint8_t byte = decode_byte(env, decode); switch (byte) { case PREFIX_LOCK: decode->lock = byte; + decode->rex.rex = 0; break; case PREFIX_REPN: case PREFIX_REP: decode->rep = byte; + decode->rex.rex = 0; break; - case PREFIX_CS_SEG_OVEERIDE: - case PREFIX_SS_SEG_OVEERIDE: - case PREFIX_DS_SEG_OVEERIDE: - case PREFIX_ES_SEG_OVEERIDE: - case PREFIX_FS_SEG_OVEERIDE: - case PREFIX_GS_SEG_OVEERIDE: + case PREFIX_CS_SEG_OVERRIDE: + case PREFIX_SS_SEG_OVERRIDE: + case PREFIX_DS_SEG_OVERRIDE: + case PREFIX_ES_SEG_OVERRIDE: + case PREFIX_FS_SEG_OVERRIDE: + case PREFIX_GS_SEG_OVERRIDE: decode->segment_override = byte; + decode->rex.rex = 0; break; case PREFIX_OP_SIZE_OVERRIDE: decode->op_size_override = byte; + decode->rex.rex = 0; break; case PREFIX_ADDR_SIZE_OVERRIDE: decode->addr_size_override = byte; + decode->rex.rex = 0; break; case PREFIX_REX ... (PREFIX_REX + 0xf): - if (x86_is_long_mode(ENV_GET_CPU(env))) { + if (x86_is_long_mode(env_cpu(env))) { decode->rex.rex = byte; break; } @@ -1885,16 +1896,16 @@ static void decode_prefix(CPUX86State *env, struct x86_decode *decode) void set_addressing_size(CPUX86State *env, struct x86_decode *decode) { decode->addressing_size = -1; - if (x86_is_real(ENV_GET_CPU(env)) || x86_is_v8086(ENV_GET_CPU(env))) { + if (x86_is_real(env_cpu(env)) || x86_is_v8086(env_cpu(env))) { if (decode->addr_size_override) { decode->addressing_size = 4; } else { decode->addressing_size = 2; } - } else if (!x86_is_long_mode(ENV_GET_CPU(env))) { + } else if (!x86_is_long_mode(env_cpu(env))) { /* protected */ struct vmx_segment cs; - vmx_read_segment_descriptor(ENV_GET_CPU(env), &cs, R_CS); + vmx_read_segment_descriptor(env_cpu(env), &cs, R_CS); /* check db */ if ((cs.ar >> 14) & 1) { if (decode->addr_size_override) { @@ -1922,16 +1933,16 @@ void set_addressing_size(CPUX86State *env, struct x86_decode *decode) void set_operand_size(CPUX86State *env, struct x86_decode *decode) { decode->operand_size = -1; - if (x86_is_real(ENV_GET_CPU(env)) || x86_is_v8086(ENV_GET_CPU(env))) { + if (x86_is_real(env_cpu(env)) || x86_is_v8086(env_cpu(env))) { if (decode->op_size_override) { decode->operand_size = 4; } else { decode->operand_size = 2; } - } else if (!x86_is_long_mode(ENV_GET_CPU(env))) { + } else if (!x86_is_long_mode(env_cpu(env))) { /* protected */ struct vmx_segment cs; - vmx_read_segment_descriptor(ENV_GET_CPU(env), &cs, R_CS); + vmx_read_segment_descriptor(env_cpu(env), &cs, R_CS); /* check db */ if ((cs.ar >> 14) & 1) { if (decode->op_size_override) { @@ -2104,14 +2115,14 @@ void init_decoder() { int i; - for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { - memcpy(_decode_tbl1, &invl_inst, sizeof(invl_inst)); + for (i = 0; i < ARRAY_SIZE(_decode_tbl1); i++) { + memcpy(&_decode_tbl1[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl2); i++) { - memcpy(_decode_tbl2, &invl_inst, sizeof(invl_inst)); + memcpy(&_decode_tbl2[i], &invl_inst, sizeof(invl_inst)); } for (i = 0; i < ARRAY_SIZE(_decode_tbl3); i++) { - memcpy(_decode_tbl3, &invl_inst, sizeof(invl_inst_x87)); + memcpy(&_decode_tbl3[i], &invl_inst_x87, sizeof(invl_inst_x87)); } for (i = 0; i < ARRAY_SIZE(_1op_inst); i++) { @@ -2160,26 +2171,26 @@ target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, target_ulong addr, X86Seg seg) { switch (decode->segment_override) { - case PREFIX_CS_SEG_OVEERIDE: + case PREFIX_CS_SEG_OVERRIDE: seg = R_CS; break; - case PREFIX_SS_SEG_OVEERIDE: + case PREFIX_SS_SEG_OVERRIDE: seg = R_SS; break; - case PREFIX_DS_SEG_OVEERIDE: + case PREFIX_DS_SEG_OVERRIDE: seg = R_DS; break; - case PREFIX_ES_SEG_OVEERIDE: + case PREFIX_ES_SEG_OVERRIDE: seg = R_ES; break; - case PREFIX_FS_SEG_OVEERIDE: + case PREFIX_FS_SEG_OVERRIDE: seg = R_FS; break; - case PREFIX_GS_SEG_OVEERIDE: + case PREFIX_GS_SEG_OVERRIDE: seg = R_GS; break; default: break; } - return linear_addr_size(ENV_GET_CPU(env), addr, decode->addressing_size, seg); + return linear_addr_size(env_cpu(env), addr, decode->addressing_size, seg); } diff --git a/target/i386/hvf/x86_decode.h b/target/i386/hvf/x86_decode.h index 5ab6f31fa5..a2d7a2a27b 100644 --- a/target/i386/hvf/x86_decode.h +++ b/target/i386/hvf/x86_decode.h @@ -4,7 +4,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -16,7 +16,7 @@ */ #ifndef HVF_X86_DECODE_H -#define HVF_X86_DECODE_H 1 +#define HVF_X86_DECODE_H #include "cpu.h" #include "x86.h" @@ -27,12 +27,12 @@ typedef enum x86_prefix { PREFIX_REPN = 0xf2, PREFIX_REP = 0xf3, /* group 2 */ - PREFIX_CS_SEG_OVEERIDE = 0x2e, - PREFIX_SS_SEG_OVEERIDE = 0x36, - PREFIX_DS_SEG_OVEERIDE = 0x3e, - PREFIX_ES_SEG_OVEERIDE = 0x26, - PREFIX_FS_SEG_OVEERIDE = 0x64, - PREFIX_GS_SEG_OVEERIDE = 0x65, + PREFIX_CS_SEG_OVERRIDE = 0x2e, + PREFIX_SS_SEG_OVERRIDE = 0x36, + PREFIX_DS_SEG_OVERRIDE = 0x3e, + PREFIX_ES_SEG_OVERRIDE = 0x26, + PREFIX_FS_SEG_OVERRIDE = 0x64, + PREFIX_GS_SEG_OVERRIDE = 0x65, /* group 3 */ PREFIX_OP_SIZE_OVERRIDE = 0x66, /* group 4 */ @@ -303,8 +303,10 @@ uint64_t sign(uint64_t val, int size); uint32_t decode_instruction(CPUX86State *env, struct x86_decode *decode); -target_ulong get_reg_ref(CPUX86State *env, int reg, int is_extended, int size); -target_ulong get_reg_val(CPUX86State *env, int reg, int is_extended, int size); +target_ulong get_reg_ref(CPUX86State *env, int reg, int rex_present, + int is_extended, int size); +target_ulong get_reg_val(CPUX86State *env, int reg, int rex_present, + int is_extended, int size); void calc_modrm_operand(CPUX86State *env, struct x86_decode *decode, struct x86_decode_op *op); target_ulong decode_linear_addr(CPUX86State *env, struct x86_decode *decode, diff --git a/target/i386/hvf/x86_descr.c b/target/i386/hvf/x86_descr.c index 8c05c34f33..f33836d6cb 100644 --- a/target/i386/hvf/x86_descr.c +++ b/target/i386/hvf/x86_descr.c @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,7 +18,6 @@ #include "qemu/osdep.h" -#include "qemu-common.h" #include "vmx.h" #include "x86_descr.h" @@ -48,50 +47,52 @@ static const struct vmx_segment_field { uint32_t vmx_read_segment_limit(CPUState *cpu, X86Seg seg) { - return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit); + return (uint32_t)rvmcs(cpu->accel->fd, vmx_segment_fields[seg].limit); } uint32_t vmx_read_segment_ar(CPUState *cpu, X86Seg seg) { - return (uint32_t)rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes); + return (uint32_t)rvmcs(cpu->accel->fd, vmx_segment_fields[seg].ar_bytes); } uint64_t vmx_read_segment_base(CPUState *cpu, X86Seg seg) { - return rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base); + return rvmcs(cpu->accel->fd, vmx_segment_fields[seg].base); } x68_segment_selector vmx_read_segment_selector(CPUState *cpu, X86Seg seg) { x68_segment_selector sel; - sel.sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector); + sel.sel = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].selector); return sel; } -void vmx_write_segment_selector(struct CPUState *cpu, x68_segment_selector selector, X86Seg seg) +void vmx_write_segment_selector(CPUState *cpu, x68_segment_selector selector, X86Seg seg) { - wvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector, selector.sel); + wvmcs(cpu->accel->fd, vmx_segment_fields[seg].selector, selector.sel); } -void vmx_read_segment_descriptor(struct CPUState *cpu, struct vmx_segment *desc, X86Seg seg) +void vmx_read_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Seg seg) { - desc->sel = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].selector); - desc->base = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].base); - desc->limit = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].limit); - desc->ar = rvmcs(cpu->hvf_fd, vmx_segment_fields[seg].ar_bytes); + desc->sel = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].selector); + desc->base = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].base); + desc->limit = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].limit); + desc->ar = rvmcs(cpu->accel->fd, vmx_segment_fields[seg].ar_bytes); } void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, X86Seg seg) { const struct vmx_segment_field *sf = &vmx_segment_fields[seg]; - wvmcs(cpu->hvf_fd, sf->base, desc->base); - wvmcs(cpu->hvf_fd, sf->limit, desc->limit); - wvmcs(cpu->hvf_fd, sf->selector, desc->sel); - wvmcs(cpu->hvf_fd, sf->ar_bytes, desc->ar); + wvmcs(cpu->accel->fd, sf->base, desc->base); + wvmcs(cpu->accel->fd, sf->limit, desc->limit); + wvmcs(cpu->accel->fd, sf->selector, desc->sel); + wvmcs(cpu->accel->fd, sf->ar_bytes, desc->ar); } -void x86_segment_descriptor_to_vmx(struct CPUState *cpu, x68_segment_selector selector, struct x86_segment_descriptor *desc, struct vmx_segment *vmx_desc) +void x86_segment_descriptor_to_vmx(CPUState *cpu, x68_segment_selector selector, + struct x86_segment_descriptor *desc, + struct vmx_segment *vmx_desc) { vmx_desc->sel = selector.sel; vmx_desc->base = x86_segment_base(desc); @@ -108,7 +109,8 @@ void x86_segment_descriptor_to_vmx(struct CPUState *cpu, x68_segment_selector se desc->type; } -void vmx_segment_to_x86_descriptor(struct CPUState *cpu, struct vmx_segment *vmx_desc, struct x86_segment_descriptor *desc) +void vmx_segment_to_x86_descriptor(CPUState *cpu, struct vmx_segment *vmx_desc, + struct x86_segment_descriptor *desc) { x86_set_segment_limit(desc, vmx_desc->limit); x86_set_segment_base(desc, vmx_desc->base); diff --git a/target/i386/hvf/x86_descr.h b/target/i386/hvf/x86_descr.h index 25a2b1731c..9f06014b56 100644 --- a/target/i386/hvf/x86_descr.h +++ b/target/i386/hvf/x86_descr.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -17,7 +17,7 @@ */ #ifndef HVF_X86_DESCR_H -#define HVF_X86_DESCR_H 1 +#define HVF_X86_DESCR_H #include "x86.h" @@ -29,29 +29,29 @@ typedef struct vmx_segment { } vmx_segment; /* deal with vmstate descriptors */ -void vmx_read_segment_descriptor(struct CPUState *cpu, +void vmx_read_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, enum X86Seg seg); void vmx_write_segment_descriptor(CPUState *cpu, struct vmx_segment *desc, enum X86Seg seg); -x68_segment_selector vmx_read_segment_selector(struct CPUState *cpu, +x68_segment_selector vmx_read_segment_selector(CPUState *cpu, enum X86Seg seg); -void vmx_write_segment_selector(struct CPUState *cpu, +void vmx_write_segment_selector(CPUState *cpu, x68_segment_selector selector, enum X86Seg seg); -uint64_t vmx_read_segment_base(struct CPUState *cpu, enum X86Seg seg); -void vmx_write_segment_base(struct CPUState *cpu, enum X86Seg seg, +uint64_t vmx_read_segment_base(CPUState *cpu, enum X86Seg seg); +void vmx_write_segment_base(CPUState *cpu, enum X86Seg seg, uint64_t base); -void x86_segment_descriptor_to_vmx(struct CPUState *cpu, +void x86_segment_descriptor_to_vmx(CPUState *cpu, x68_segment_selector selector, struct x86_segment_descriptor *desc, struct vmx_segment *vmx_desc); uint32_t vmx_read_segment_limit(CPUState *cpu, enum X86Seg seg); uint32_t vmx_read_segment_ar(CPUState *cpu, enum X86Seg seg); -void vmx_segment_to_x86_descriptor(struct CPUState *cpu, +void vmx_segment_to_x86_descriptor(CPUState *cpu, struct vmx_segment *vmx_desc, struct x86_segment_descriptor *desc); diff --git a/target/i386/hvf/x86_emu.c b/target/i386/hvf/x86_emu.c index 3ea18edc68..3a3f0a50d0 100644 --- a/target/i386/hvf/x86_emu.c +++ b/target/i386/hvf/x86_emu.c @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -23,7 +23,7 @@ // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either -// version 2 of the License, or (at your option) any later version. +// version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -37,7 +37,6 @@ #include "qemu/osdep.h" #include "panic.h" -#include "qemu-common.h" #include "x86_decode.h" #include "x86.h" #include "x86_emu.h" @@ -46,7 +45,7 @@ #include "vmcs.h" #include "vmx.h" -void hvf_handle_io(struct CPUState *cpu, uint16_t port, void *data, +void hvf_handle_io(CPUState *cs, uint16_t port, void *data, int direction, int size, uint32_t count); #define EXEC_2OP_FLAGS_CMD(env, decode, cmd, FLAGS_FUNC, save_res) \ @@ -95,13 +94,13 @@ target_ulong read_reg(CPUX86State *env, int reg, int size) { switch (size) { case 1: - return env->hvf_emul->regs[reg].lx; + return x86_reg(env, reg)->lx; case 2: - return env->hvf_emul->regs[reg].rx; + return x86_reg(env, reg)->rx; case 4: - return env->hvf_emul->regs[reg].erx; + return x86_reg(env, reg)->erx; case 8: - return env->hvf_emul->regs[reg].rrx; + return x86_reg(env, reg)->rrx; default: abort(); } @@ -112,16 +111,16 @@ void write_reg(CPUX86State *env, int reg, target_ulong val, int size) { switch (size) { case 1: - env->hvf_emul->regs[reg].lx = val; + x86_reg(env, reg)->lx = val; break; case 2: - env->hvf_emul->regs[reg].rx = val; + x86_reg(env, reg)->rx = val; break; case 4: - env->hvf_emul->regs[reg].rrx = (uint32_t)val; + x86_reg(env, reg)->rrx = (uint32_t)val; break; case 8: - env->hvf_emul->regs[reg].rrx = val; + x86_reg(env, reg)->rrx = val; break; default: abort(); @@ -171,28 +170,28 @@ void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size) } } -static bool is_host_reg(struct CPUX86State *env, target_ulong ptr) +static bool is_host_reg(CPUX86State *env, target_ulong ptr) { - return (ptr - (target_ulong)&env->hvf_emul->regs[0]) < sizeof(env->hvf_emul->regs); + return (ptr - (target_ulong)&env->regs[0]) < sizeof(env->regs); } -void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size) +void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size) { if (is_host_reg(env, ptr)) { write_val_to_reg(ptr, val, size); return; } - vmx_write_mem(ENV_GET_CPU(env), ptr, &val, size); + vmx_write_mem(env_cpu(env), ptr, &val, size); } -uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes) +uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes) { - vmx_read_mem(ENV_GET_CPU(env), env->hvf_emul->mmio_buf, ptr, bytes); - return env->hvf_emul->mmio_buf; + vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, ptr, bytes); + return env->hvf_mmio_buf; } -target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size) +target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size) { target_ulong val; uint8_t *mmio_ptr; @@ -222,7 +221,7 @@ target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size) return val; } -static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode, +static void fetch_operands(CPUX86State *env, struct x86_decode *decode, int n, bool val_op0, bool val_op1, bool val_op2) { int i; @@ -261,58 +260,58 @@ static void fetch_operands(struct CPUX86State *env, struct x86_decode *decode, } } -static void exec_mov(struct CPUX86State *env, struct x86_decode *decode) +static void exec_mov(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, false, true, false); write_val_ext(env, decode->op[0].ptr, decode->op[1].val, decode->operand_size); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_add(struct CPUX86State *env, struct x86_decode *decode) +static void exec_add(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_or(struct CPUX86State *env, struct x86_decode *decode) +static void exec_or(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, |, SET_FLAGS_OSZAPC_LOGIC, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_adc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_adc(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +get_CF(env)+, SET_FLAGS_OSZAPC_ADD, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_sbb(struct CPUX86State *env, struct x86_decode *decode) +static void exec_sbb(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -get_CF(env)-, SET_FLAGS_OSZAPC_SUB, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_and(struct CPUX86State *env, struct x86_decode *decode) +static void exec_and(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_sub(struct CPUX86State *env, struct x86_decode *decode) +static void exec_sub(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_xor(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xor(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, ^, SET_FLAGS_OSZAPC_LOGIC, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_neg(struct CPUX86State *env, struct x86_decode *decode) +static void exec_neg(CPUX86State *env, struct x86_decode *decode) { /*EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false);*/ int32_t val; @@ -332,50 +331,50 @@ static void exec_neg(struct CPUX86State *env, struct x86_decode *decode) } /*lflags_to_rflags(env);*/ - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_cmp(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmp(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_inc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_inc(CPUX86State *env, struct x86_decode *decode) { decode->op[1].type = X86_VAR_IMMEDIATE; decode->op[1].val = 0; EXEC_2OP_FLAGS_CMD(env, decode, +1+, SET_FLAGS_OSZAP_ADD, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_dec(struct CPUX86State *env, struct x86_decode *decode) +static void exec_dec(CPUX86State *env, struct x86_decode *decode) { decode->op[1].type = X86_VAR_IMMEDIATE; decode->op[1].val = 0; EXEC_2OP_FLAGS_CMD(env, decode, -1-, SET_FLAGS_OSZAP_SUB, true); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_tst(struct CPUX86State *env, struct x86_decode *decode) +static void exec_tst(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, &, SET_FLAGS_OSZAPC_LOGIC, false); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_not(struct CPUX86State *env, struct x86_decode *decode) +static void exec_not(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 1, true, false, false); write_val_ext(env, decode->op[0].ptr, ~decode->op[0].val, decode->operand_size); - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_movzx(struct CPUX86State *env, struct x86_decode *decode) +void exec_movzx(CPUX86State *env, struct x86_decode *decode) { int src_op_size; int op_size = decode->operand_size; @@ -392,41 +391,43 @@ void exec_movzx(struct CPUX86State *env, struct x86_decode *decode) decode->op[1].val = read_val_ext(env, decode->op[1].ptr, src_op_size); write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_out(struct CPUX86State *env, struct x86_decode *decode) +static void exec_out(CPUX86State *env, struct x86_decode *decode) { switch (decode->opcode[0]) { case 0xe6: - hvf_handle_io(ENV_GET_CPU(env), decode->op[0].val, &AL(env), 1, 1, 1); + hvf_handle_io(env_cpu(env), decode->op[0].val, &AL(env), 1, 1, 1); break; case 0xe7: - hvf_handle_io(ENV_GET_CPU(env), decode->op[0].val, &RAX(env), 1, + hvf_handle_io(env_cpu(env), decode->op[0].val, &RAX(env), 1, decode->operand_size, 1); break; case 0xee: - hvf_handle_io(ENV_GET_CPU(env), DX(env), &AL(env), 1, 1, 1); + hvf_handle_io(env_cpu(env), DX(env), &AL(env), 1, 1, 1); break; case 0xef: - hvf_handle_io(ENV_GET_CPU(env), DX(env), &RAX(env), 1, decode->operand_size, 1); + hvf_handle_io(env_cpu(env), DX(env), &RAX(env), 1, + decode->operand_size, 1); break; default: VM_PANIC("Bad out opcode\n"); break; } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_in(struct CPUX86State *env, struct x86_decode *decode) +static void exec_in(CPUX86State *env, struct x86_decode *decode) { target_ulong val = 0; switch (decode->opcode[0]) { case 0xe4: - hvf_handle_io(ENV_GET_CPU(env), decode->op[0].val, &AL(env), 0, 1, 1); + hvf_handle_io(env_cpu(env), decode->op[0].val, &AL(env), 0, 1, 1); break; case 0xe5: - hvf_handle_io(ENV_GET_CPU(env), decode->op[0].val, &val, 0, decode->operand_size, 1); + hvf_handle_io(env_cpu(env), decode->op[0].val, &val, 0, + decode->operand_size, 1); if (decode->operand_size == 2) { AX(env) = val; } else { @@ -434,10 +435,10 @@ static void exec_in(struct CPUX86State *env, struct x86_decode *decode) } break; case 0xec: - hvf_handle_io(ENV_GET_CPU(env), DX(env), &AL(env), 0, 1, 1); + hvf_handle_io(env_cpu(env), DX(env), &AL(env), 0, 1, 1); break; case 0xed: - hvf_handle_io(ENV_GET_CPU(env), DX(env), &val, 0, decode->operand_size, 1); + hvf_handle_io(env_cpu(env), DX(env), &val, 0, decode->operand_size, 1); if (decode->operand_size == 2) { AX(env) = val; } else { @@ -450,14 +451,14 @@ static void exec_in(struct CPUX86State *env, struct x86_decode *decode) break; } - RIP(env) += decode->len; + env->eip += decode->len; } -static inline void string_increment_reg(struct CPUX86State *env, int reg, +static inline void string_increment_reg(CPUX86State *env, int reg, struct x86_decode *decode) { target_ulong val = read_reg(env, reg, decode->addressing_size); - if (env->hvf_emul->rflags.df) { + if (env->eflags & DF_MASK) { val -= decode->operand_size; } else { val += decode->operand_size; @@ -465,8 +466,8 @@ static inline void string_increment_reg(struct CPUX86State *env, int reg, write_reg(env, reg, val, decode->addressing_size); } -static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode, - void (*func)(struct CPUX86State *env, +static inline void string_rep(CPUX86State *env, struct x86_decode *decode, + void (*func)(CPUX86State *env, struct x86_decode *ins), int rep) { target_ulong rcx = read_reg(env, R_ECX, decode->addressing_size); @@ -482,19 +483,20 @@ static inline void string_rep(struct CPUX86State *env, struct x86_decode *decode } } -static void exec_ins_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_ins_single(CPUX86State *env, struct x86_decode *decode) { - target_ulong addr = linear_addr_size(ENV_GET_CPU(env), RDI(env), decode->addressing_size, - R_ES); + target_ulong addr = linear_addr_size(env_cpu(env), RDI(env), + decode->addressing_size, R_ES); - hvf_handle_io(ENV_GET_CPU(env), DX(env), env->hvf_emul->mmio_buf, 0, + hvf_handle_io(env_cpu(env), DX(env), env->hvf_mmio_buf, 0, decode->operand_size, 1); - vmx_write_mem(ENV_GET_CPU(env), addr, env->hvf_emul->mmio_buf, decode->operand_size); + vmx_write_mem(env_cpu(env), addr, env->hvf_mmio_buf, + decode->operand_size); string_increment_reg(env, R_EDI, decode); } -static void exec_ins(struct CPUX86State *env, struct x86_decode *decode) +static void exec_ins(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_ins_single, 0); @@ -502,21 +504,22 @@ static void exec_ins(struct CPUX86State *env, struct x86_decode *decode) exec_ins_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_outs_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_outs_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr = decode_linear_addr(env, decode, RSI(env), R_DS); - vmx_read_mem(ENV_GET_CPU(env), env->hvf_emul->mmio_buf, addr, decode->operand_size); - hvf_handle_io(ENV_GET_CPU(env), DX(env), env->hvf_emul->mmio_buf, 1, + vmx_read_mem(env_cpu(env), env->hvf_mmio_buf, addr, + decode->operand_size); + hvf_handle_io(env_cpu(env), DX(env), env->hvf_mmio_buf, 1, decode->operand_size, 1); string_increment_reg(env, R_ESI, decode); } -static void exec_outs(struct CPUX86State *env, struct x86_decode *decode) +static void exec_outs(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_outs_single, 0); @@ -524,18 +527,18 @@ static void exec_outs(struct CPUX86State *env, struct x86_decode *decode) exec_outs_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_movs_single(CPUX86State *env, struct x86_decode *decode) { target_ulong src_addr; target_ulong dst_addr; target_ulong val; src_addr = decode_linear_addr(env, decode, RSI(env), R_DS); - dst_addr = linear_addr_size(ENV_GET_CPU(env), RDI(env), decode->addressing_size, - R_ES); + dst_addr = linear_addr_size(env_cpu(env), RDI(env), + decode->addressing_size, R_ES); val = read_val_ext(env, src_addr, decode->operand_size); write_val_ext(env, dst_addr, val, decode->operand_size); @@ -544,7 +547,7 @@ static void exec_movs_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_movs(struct CPUX86State *env, struct x86_decode *decode) +static void exec_movs(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_movs_single, 0); @@ -552,17 +555,17 @@ static void exec_movs(struct CPUX86State *env, struct x86_decode *decode) exec_movs_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmps_single(CPUX86State *env, struct x86_decode *decode) { target_ulong src_addr; target_ulong dst_addr; src_addr = decode_linear_addr(env, decode, RSI(env), R_DS); - dst_addr = linear_addr_size(ENV_GET_CPU(env), RDI(env), decode->addressing_size, - R_ES); + dst_addr = linear_addr_size(env_cpu(env), RDI(env), + decode->addressing_size, R_ES); decode->op[0].type = X86_VAR_IMMEDIATE; decode->op[0].val = read_val_ext(env, src_addr, decode->operand_size); @@ -575,31 +578,32 @@ static void exec_cmps_single(struct CPUX86State *env, struct x86_decode *decode) string_increment_reg(env, R_EDI, decode); } -static void exec_cmps(struct CPUX86State *env, struct x86_decode *decode) +static void exec_cmps(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_cmps_single, decode->rep); } else { exec_cmps_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_stos_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_stos_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; target_ulong val; - addr = linear_addr_size(ENV_GET_CPU(env), RDI(env), decode->addressing_size, R_ES); + addr = linear_addr_size(env_cpu(env), RDI(env), + decode->addressing_size, R_ES); val = read_reg(env, R_EAX, decode->operand_size); - vmx_write_mem(ENV_GET_CPU(env), addr, &val, decode->operand_size); + vmx_write_mem(env_cpu(env), addr, &val, decode->operand_size); string_increment_reg(env, R_EDI, decode); } -static void exec_stos(struct CPUX86State *env, struct x86_decode *decode) +static void exec_stos(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_stos_single, 0); @@ -607,22 +611,23 @@ static void exec_stos(struct CPUX86State *env, struct x86_decode *decode) exec_stos_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_scas_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_scas_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; - addr = linear_addr_size(ENV_GET_CPU(env), RDI(env), decode->addressing_size, R_ES); + addr = linear_addr_size(env_cpu(env), RDI(env), + decode->addressing_size, R_ES); decode->op[1].type = X86_VAR_IMMEDIATE; - vmx_read_mem(ENV_GET_CPU(env), &decode->op[1].val, addr, decode->operand_size); + vmx_read_mem(env_cpu(env), &decode->op[1].val, addr, decode->operand_size); EXEC_2OP_FLAGS_CMD(env, decode, -, SET_FLAGS_OSZAPC_SUB, false); string_increment_reg(env, R_EDI, decode); } -static void exec_scas(struct CPUX86State *env, struct x86_decode *decode) +static void exec_scas(CPUX86State *env, struct x86_decode *decode) { decode->op[0].type = X86_VAR_REG; decode->op[0].reg = R_EAX; @@ -632,22 +637,22 @@ static void exec_scas(struct CPUX86State *env, struct x86_decode *decode) exec_scas_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_lods_single(struct CPUX86State *env, struct x86_decode *decode) +static void exec_lods_single(CPUX86State *env, struct x86_decode *decode) { target_ulong addr; target_ulong val = 0; addr = decode_linear_addr(env, decode, RSI(env), R_DS); - vmx_read_mem(ENV_GET_CPU(env), &val, addr, decode->operand_size); + vmx_read_mem(env_cpu(env), &val, addr, decode->operand_size); write_reg(env, R_EAX, val, decode->operand_size); string_increment_reg(env, R_ESI, decode); } -static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) +static void exec_lods(CPUX86State *env, struct x86_decode *decode) { if (decode->rep) { string_rep(env, decode, exec_lods_single, 0); @@ -655,39 +660,37 @@ static void exec_lods(struct CPUX86State *env, struct x86_decode *decode) exec_lods_single(env, decode); } - RIP(env) += decode->len; + env->eip += decode->len; } -#define MSR_IA32_UCODE_REV 0x00000017 - -void simulate_rdmsr(struct CPUState *cpu) +void simulate_rdmsr(CPUX86State *env) { - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; + X86CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); uint32_t msr = ECX(env); uint64_t val = 0; switch (msr) { case MSR_IA32_TSC: - val = rdtscp() + rvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET); + val = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET); break; case MSR_IA32_APICBASE: - val = cpu_get_apic_base(X86_CPU(cpu)->apic_state); + val = cpu_get_apic_base(cpu->apic_state); break; case MSR_IA32_UCODE_REV: - val = (0x100000000ULL << 32) | 0x100000000ULL; + val = cpu->ucode_rev; break; case MSR_EFER: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER); + val = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER); break; case MSR_FSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE); + val = rvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE); break; case MSR_GSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE); + val = rvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE); break; case MSR_KERNELGSBASE: - val = rvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE); + val = rvmcs(cs->accel->fd, VMCS_HOST_FS_BASE); break; case MSR_STAR: abort(); @@ -741,6 +744,10 @@ void simulate_rdmsr(struct CPUState *cpu) case MSR_MTRRdefType: val = env->mtrr_deftype; break; + case MSR_CORE_THREAD_COUNT: + val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */ + val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */ + break; default: /* fprintf(stderr, "%s: unknown msr 0x%x\n", __func__, msr); */ val = 0; @@ -751,36 +758,33 @@ void simulate_rdmsr(struct CPUState *cpu) RDX(env) = (uint32_t)(val >> 32); } -static void exec_rdmsr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_rdmsr(CPUX86State *env, struct x86_decode *decode) { - simulate_rdmsr(ENV_GET_CPU(env)); - RIP(env) += decode->len; + simulate_rdmsr(env); + env->eip += decode->len; } -void simulate_wrmsr(struct CPUState *cpu) +void simulate_wrmsr(CPUX86State *env) { - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; + X86CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); uint32_t msr = ECX(env); uint64_t data = ((uint64_t)EDX(env) << 32) | EAX(env); switch (msr) { case MSR_IA32_TSC: - /* if (!osx_is_sierra()) - wvmcs(cpu->hvf_fd, VMCS_TSC_OFFSET, data - rdtscp()); - hv_vm_sync_tsc(data);*/ break; case MSR_IA32_APICBASE: - cpu_set_apic_base(X86_CPU(cpu)->apic_state, data); + cpu_set_apic_base(cpu->apic_state, data); break; case MSR_FSBASE: - wvmcs(cpu->hvf_fd, VMCS_GUEST_FS_BASE, data); + wvmcs(cs->accel->fd, VMCS_GUEST_FS_BASE, data); break; case MSR_GSBASE: - wvmcs(cpu->hvf_fd, VMCS_GUEST_GS_BASE, data); + wvmcs(cs->accel->fd, VMCS_GUEST_GS_BASE, data); break; case MSR_KERNELGSBASE: - wvmcs(cpu->hvf_fd, VMCS_HOST_FS_BASE, data); + wvmcs(cs->accel->fd, VMCS_HOST_FS_BASE, data); break; case MSR_STAR: abort(); @@ -792,10 +796,10 @@ void simulate_wrmsr(struct CPUState *cpu) abort(); break; case MSR_EFER: - /*printf("new efer %llx\n", EFER(cpu));*/ - wvmcs(cpu->hvf_fd, VMCS_GUEST_IA32_EFER, data); + /*printf("new efer %llx\n", EFER(cs));*/ + wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, data); if (data & MSR_EFER_NXE) { - hv_vcpu_invalidate_tlb(cpu->hvf_fd); + hv_vcpu_invalidate_tlb(cs->accel->fd); } break; case MSR_MTRRphysBase(0): @@ -844,22 +848,22 @@ void simulate_wrmsr(struct CPUState *cpu) /* Related to support known hypervisor interface */ /* if (g_hypervisor_iface) - g_hypervisor_iface->wrmsr_handler(cpu, msr, data); + g_hypervisor_iface->wrmsr_handler(cs, msr, data); - printf("write msr %llx\n", RCX(cpu));*/ + printf("write msr %llx\n", RCX(cs));*/ } -static void exec_wrmsr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_wrmsr(CPUX86State *env, struct x86_decode *decode) { - simulate_wrmsr(ENV_GET_CPU(env)); - RIP(env) += decode->len; + simulate_wrmsr(env); + env->eip += decode->len; } /* * flag: * 0 - bt, 1 - btc, 2 - bts, 3 - btr */ -static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag) +static void do_bt(CPUX86State *env, struct x86_decode *decode, int flag) { int32_t displacement; uint8_t index; @@ -905,31 +909,31 @@ static void do_bt(struct CPUX86State *env, struct x86_decode *decode, int flag) set_CF(env, cf); } -static void exec_bt(struct CPUX86State *env, struct x86_decode *decode) +static void exec_bt(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 0); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_btc(struct CPUX86State *env, struct x86_decode *decode) +static void exec_btc(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 1); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_btr(struct CPUX86State *env, struct x86_decode *decode) +static void exec_btr(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 3); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_bts(struct CPUX86State *env, struct x86_decode *decode) +static void exec_bts(CPUX86State *env, struct x86_decode *decode) { do_bt(env, decode, 2); - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_shl(struct CPUX86State *env, struct x86_decode *decode) +void exec_shl(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -990,7 +994,7 @@ void exec_shl(struct CPUX86State *env, struct x86_decode *decode) exit: /* lflags_to_rflags(env); */ - RIP(env) += decode->len; + env->eip += decode->len; } void exec_movsx(CPUX86State *env, struct x86_decode *decode) @@ -1013,10 +1017,10 @@ void exec_movsx(CPUX86State *env, struct x86_decode *decode) write_val_ext(env, decode->op[0].ptr, decode->op[1].val, op_size); - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_ror(struct CPUX86State *env, struct x86_decode *decode) +void exec_ror(CPUX86State *env, struct x86_decode *decode) { uint8_t count; @@ -1091,10 +1095,10 @@ void exec_ror(struct CPUX86State *env, struct x86_decode *decode) break; } } - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_rol(struct CPUX86State *env, struct x86_decode *decode) +void exec_rol(CPUX86State *env, struct x86_decode *decode) { uint8_t count; @@ -1172,11 +1176,11 @@ void exec_rol(struct CPUX86State *env, struct x86_decode *decode) break; } } - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_rcl(struct CPUX86State *env, struct x86_decode *decode) +void exec_rcl(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -1258,10 +1262,10 @@ void exec_rcl(struct CPUX86State *env, struct x86_decode *decode) break; } } - RIP(env) += decode->len; + env->eip += decode->len; } -void exec_rcr(struct CPUX86State *env, struct x86_decode *decode) +void exec_rcr(CPUX86State *env, struct x86_decode *decode) { uint8_t count; int of = 0, cf = 0; @@ -1333,10 +1337,10 @@ void exec_rcr(struct CPUX86State *env, struct x86_decode *decode) break; } } - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xchg(CPUX86State *env, struct x86_decode *decode) { fetch_operands(env, decode, 2, true, true, false); @@ -1345,21 +1349,21 @@ static void exec_xchg(struct CPUX86State *env, struct x86_decode *decode) write_val_ext(env, decode->op[1].ptr, decode->op[0].val, decode->operand_size); - RIP(env) += decode->len; + env->eip += decode->len; } -static void exec_xadd(struct CPUX86State *env, struct x86_decode *decode) +static void exec_xadd(CPUX86State *env, struct x86_decode *decode) { EXEC_2OP_FLAGS_CMD(env, decode, +, SET_FLAGS_OSZAPC_ADD, true); write_val_ext(env, decode->op[1].ptr, decode->op[0].val, decode->operand_size); - RIP(env) += decode->len; + env->eip += decode->len; } static struct cmd_handler { enum x86_decode_cmd cmd; - void (*handler)(struct CPUX86State *env, struct x86_decode *ins); + void (*handler)(CPUX86State *env, struct x86_decode *ins); } handlers[] = { {X86_DECODE_CMD_INVL, NULL,}, {X86_DECODE_CMD_MOV, exec_mov}, @@ -1413,63 +1417,63 @@ static void init_cmd_handler() } } -void load_regs(struct CPUState *cpu) +void load_regs(CPUState *cs) { - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; int i = 0; - RRX(env, R_EAX) = rreg(cpu->hvf_fd, HV_X86_RAX); - RRX(env, R_EBX) = rreg(cpu->hvf_fd, HV_X86_RBX); - RRX(env, R_ECX) = rreg(cpu->hvf_fd, HV_X86_RCX); - RRX(env, R_EDX) = rreg(cpu->hvf_fd, HV_X86_RDX); - RRX(env, R_ESI) = rreg(cpu->hvf_fd, HV_X86_RSI); - RRX(env, R_EDI) = rreg(cpu->hvf_fd, HV_X86_RDI); - RRX(env, R_ESP) = rreg(cpu->hvf_fd, HV_X86_RSP); - RRX(env, R_EBP) = rreg(cpu->hvf_fd, HV_X86_RBP); + RRX(env, R_EAX) = rreg(cs->accel->fd, HV_X86_RAX); + RRX(env, R_EBX) = rreg(cs->accel->fd, HV_X86_RBX); + RRX(env, R_ECX) = rreg(cs->accel->fd, HV_X86_RCX); + RRX(env, R_EDX) = rreg(cs->accel->fd, HV_X86_RDX); + RRX(env, R_ESI) = rreg(cs->accel->fd, HV_X86_RSI); + RRX(env, R_EDI) = rreg(cs->accel->fd, HV_X86_RDI); + RRX(env, R_ESP) = rreg(cs->accel->fd, HV_X86_RSP); + RRX(env, R_EBP) = rreg(cs->accel->fd, HV_X86_RBP); for (i = 8; i < 16; i++) { - RRX(env, i) = rreg(cpu->hvf_fd, HV_X86_RAX + i); + RRX(env, i) = rreg(cs->accel->fd, HV_X86_RAX + i); } - RFLAGS(env) = rreg(cpu->hvf_fd, HV_X86_RFLAGS); + env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); rflags_to_lflags(env); - RIP(env) = rreg(cpu->hvf_fd, HV_X86_RIP); + env->eip = rreg(cs->accel->fd, HV_X86_RIP); } -void store_regs(struct CPUState *cpu) +void store_regs(CPUState *cs) { - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; int i = 0; - wreg(cpu->hvf_fd, HV_X86_RAX, RAX(env)); - wreg(cpu->hvf_fd, HV_X86_RBX, RBX(env)); - wreg(cpu->hvf_fd, HV_X86_RCX, RCX(env)); - wreg(cpu->hvf_fd, HV_X86_RDX, RDX(env)); - wreg(cpu->hvf_fd, HV_X86_RSI, RSI(env)); - wreg(cpu->hvf_fd, HV_X86_RDI, RDI(env)); - wreg(cpu->hvf_fd, HV_X86_RBP, RBP(env)); - wreg(cpu->hvf_fd, HV_X86_RSP, RSP(env)); + wreg(cs->accel->fd, HV_X86_RAX, RAX(env)); + wreg(cs->accel->fd, HV_X86_RBX, RBX(env)); + wreg(cs->accel->fd, HV_X86_RCX, RCX(env)); + wreg(cs->accel->fd, HV_X86_RDX, RDX(env)); + wreg(cs->accel->fd, HV_X86_RSI, RSI(env)); + wreg(cs->accel->fd, HV_X86_RDI, RDI(env)); + wreg(cs->accel->fd, HV_X86_RBP, RBP(env)); + wreg(cs->accel->fd, HV_X86_RSP, RSP(env)); for (i = 8; i < 16; i++) { - wreg(cpu->hvf_fd, HV_X86_RAX + i, RRX(env, i)); + wreg(cs->accel->fd, HV_X86_RAX + i, RRX(env, i)); } lflags_to_rflags(env); - wreg(cpu->hvf_fd, HV_X86_RFLAGS, RFLAGS(env)); - macvm_set_rip(cpu, RIP(env)); + wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags); + macvm_set_rip(cs, env->eip); } -bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins) +bool exec_instruction(CPUX86State *env, struct x86_decode *ins) { - /*if (hvf_vcpu_id(cpu)) - printf("%d, %llx: exec_instruction %s\n", hvf_vcpu_id(cpu), RIP(cpu), + /*if (hvf_vcpu_id(cs)) + printf("%d, %llx: exec_instruction %s\n", hvf_vcpu_id(cs), env->eip, decode_cmd_to_string(ins->cmd));*/ if (!_cmd_handler[ins->cmd].handler) { - printf("Unimplemented handler (%llx) for %d (%x %x) \n", RIP(env), + printf("Unimplemented handler (%llx) for %d (%x %x) \n", env->eip, ins->cmd, ins->opcode[0], ins->opcode_len > 1 ? ins->opcode[1] : 0); - RIP(env) += ins->len; + env->eip += ins->len; return true; } diff --git a/target/i386/hvf/x86_emu.h b/target/i386/hvf/x86_emu.h index fbb4832576..8bd97608c4 100644 --- a/target/i386/hvf/x86_emu.h +++ b/target/i386/hvf/x86_emu.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,35 +15,36 @@ * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef __X86_EMU_H__ -#define __X86_EMU_H__ + +#ifndef X86_EMU_H +#define X86_EMU_H #include "x86.h" #include "x86_decode.h" #include "cpu.h" void init_emu(void); -bool exec_instruction(struct CPUX86State *env, struct x86_decode *ins); +bool exec_instruction(CPUX86State *env, struct x86_decode *ins); -void load_regs(struct CPUState *cpu); -void store_regs(struct CPUState *cpu); +void load_regs(CPUState *cpu); +void store_regs(CPUState *cpu); -void simulate_rdmsr(struct CPUState *cpu); -void simulate_wrmsr(struct CPUState *cpu); +void simulate_rdmsr(CPUX86State *env); +void simulate_wrmsr(CPUX86State *env); target_ulong read_reg(CPUX86State *env, int reg, int size); void write_reg(CPUX86State *env, int reg, target_ulong val, int size); target_ulong read_val_from_reg(target_ulong reg_ptr, int size); void write_val_to_reg(target_ulong reg_ptr, target_ulong val, int size); -void write_val_ext(struct CPUX86State *env, target_ulong ptr, target_ulong val, int size); -uint8_t *read_mmio(struct CPUX86State *env, target_ulong ptr, int bytes); -target_ulong read_val_ext(struct CPUX86State *env, target_ulong ptr, int size); +void write_val_ext(CPUX86State *env, target_ulong ptr, target_ulong val, int size); +uint8_t *read_mmio(CPUX86State *env, target_ulong ptr, int bytes); +target_ulong read_val_ext(CPUX86State *env, target_ulong ptr, int size); -void exec_movzx(struct CPUX86State *env, struct x86_decode *decode); -void exec_shl(struct CPUX86State *env, struct x86_decode *decode); -void exec_movsx(struct CPUX86State *env, struct x86_decode *decode); -void exec_ror(struct CPUX86State *env, struct x86_decode *decode); -void exec_rol(struct CPUX86State *env, struct x86_decode *decode); -void exec_rcl(struct CPUX86State *env, struct x86_decode *decode); -void exec_rcr(struct CPUX86State *env, struct x86_decode *decode); +void exec_movzx(CPUX86State *env, struct x86_decode *decode); +void exec_shl(CPUX86State *env, struct x86_decode *decode); +void exec_movsx(CPUX86State *env, struct x86_decode *decode); +void exec_ror(CPUX86State *env, struct x86_decode *decode); +void exec_rol(CPUX86State *env, struct x86_decode *decode); +void exec_rcl(CPUX86State *env, struct x86_decode *decode); +void exec_rcr(CPUX86State *env, struct x86_decode *decode); #endif diff --git a/target/i386/hvf/x86_flags.c b/target/i386/hvf/x86_flags.c index ee6d33f861..03d6de5efc 100644 --- a/target/i386/hvf/x86_flags.c +++ b/target/i386/hvf/x86_flags.c @@ -6,7 +6,7 @@ // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either -// version 2 of the License, or (at your option) any later version. +// version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -23,7 +23,6 @@ #include "qemu/osdep.h" -#include "qemu-common.h" #include "panic.h" #include "cpu.h" #include "x86_flags.h" @@ -63,7 +62,7 @@ #define SET_FLAGS_OSZAPC_SIZE(size, lf_carries, lf_result) { \ target_ulong temp = ((lf_carries) & (LF_MASK_AF)) | \ (((lf_carries) >> (size - 2)) << LF_BIT_PO); \ - env->hvf_emul->lflags.result = (target_ulong)(int##size##_t)(lf_result); \ + env->hvf_lflags.result = (target_ulong)(int##size##_t)(lf_result); \ if ((size) == 32) { \ temp = ((lf_carries) & ~(LF_MASK_PDB | LF_MASK_SD)); \ } else if ((size) == 16) { \ @@ -73,7 +72,7 @@ } else { \ VM_PANIC("unimplemented"); \ } \ - env->hvf_emul->lflags.auxbits = (target_ulong)(uint32_t)temp; \ + env->hvf_lflags.auxbits = (target_ulong)(uint32_t)temp; \ } /* carries, result */ @@ -100,10 +99,10 @@ } else { \ VM_PANIC("unimplemented"); \ } \ - env->hvf_emul->lflags.result = (target_ulong)(int##size##_t)(lf_result); \ - target_ulong delta_c = (env->hvf_emul->lflags.auxbits ^ temp) & LF_MASK_CF; \ + env->hvf_lflags.result = (target_ulong)(int##size##_t)(lf_result); \ + target_ulong delta_c = (env->hvf_lflags.auxbits ^ temp) & LF_MASK_CF; \ delta_c ^= (delta_c >> 1); \ - env->hvf_emul->lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \ + env->hvf_lflags.auxbits = (target_ulong)(uint32_t)(temp ^ delta_c); \ } /* carries, result */ @@ -117,9 +116,8 @@ void SET_FLAGS_OxxxxC(CPUX86State *env, uint32_t new_of, uint32_t new_cf) { uint32_t temp_po = new_of ^ new_cf; - env->hvf_emul->lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF); - env->hvf_emul->lflags.auxbits |= (temp_po << LF_BIT_PO) | - (new_cf << LF_BIT_CF); + env->hvf_lflags.auxbits &= ~(LF_MASK_PO | LF_MASK_CF); + env->hvf_lflags.auxbits |= (temp_po << LF_BIT_PO) | (new_cf << LF_BIT_CF); } void SET_FLAGS_OSZAPC_SUB32(CPUX86State *env, uint32_t v1, uint32_t v2, @@ -215,27 +213,27 @@ void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, bool get_PF(CPUX86State *env) { - uint32_t temp = (255 & env->hvf_emul->lflags.result); - temp = temp ^ (255 & (env->hvf_emul->lflags.auxbits >> LF_BIT_PDB)); + uint32_t temp = (255 & env->hvf_lflags.result); + temp = temp ^ (255 & (env->hvf_lflags.auxbits >> LF_BIT_PDB)); temp = (temp ^ (temp >> 4)) & 0x0F; return (0x9669U >> temp) & 1; } void set_PF(CPUX86State *env, bool val) { - uint32_t temp = (255 & env->hvf_emul->lflags.result) ^ (!val); - env->hvf_emul->lflags.auxbits &= ~(LF_MASK_PDB); - env->hvf_emul->lflags.auxbits |= (temp << LF_BIT_PDB); + uint32_t temp = (255 & env->hvf_lflags.result) ^ (!val); + env->hvf_lflags.auxbits &= ~(LF_MASK_PDB); + env->hvf_lflags.auxbits |= (temp << LF_BIT_PDB); } bool get_OF(CPUX86State *env) { - return ((env->hvf_emul->lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1; + return ((env->hvf_lflags.auxbits + (1U << LF_BIT_PO)) >> LF_BIT_CF) & 1; } bool get_CF(CPUX86State *env) { - return (env->hvf_emul->lflags.auxbits >> LF_BIT_CF) & 1; + return (env->hvf_lflags.auxbits >> LF_BIT_CF) & 1; } void set_OF(CPUX86State *env, bool val) @@ -252,64 +250,64 @@ void set_CF(CPUX86State *env, bool val) bool get_AF(CPUX86State *env) { - return (env->hvf_emul->lflags.auxbits >> LF_BIT_AF) & 1; + return (env->hvf_lflags.auxbits >> LF_BIT_AF) & 1; } void set_AF(CPUX86State *env, bool val) { - env->hvf_emul->lflags.auxbits &= ~(LF_MASK_AF); - env->hvf_emul->lflags.auxbits |= val << LF_BIT_AF; + env->hvf_lflags.auxbits &= ~(LF_MASK_AF); + env->hvf_lflags.auxbits |= val << LF_BIT_AF; } bool get_ZF(CPUX86State *env) { - return !env->hvf_emul->lflags.result; + return !env->hvf_lflags.result; } void set_ZF(CPUX86State *env, bool val) { if (val) { - env->hvf_emul->lflags.auxbits ^= - (((env->hvf_emul->lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD); + env->hvf_lflags.auxbits ^= + (((env->hvf_lflags.result >> LF_SIGN_BIT) & 1) << LF_BIT_SD); /* merge the parity bits into the Parity Delta Byte */ - uint32_t temp_pdb = (255 & env->hvf_emul->lflags.result); - env->hvf_emul->lflags.auxbits ^= (temp_pdb << LF_BIT_PDB); + uint32_t temp_pdb = (255 & env->hvf_lflags.result); + env->hvf_lflags.auxbits ^= (temp_pdb << LF_BIT_PDB); /* now zero the .result value */ - env->hvf_emul->lflags.result = 0; + env->hvf_lflags.result = 0; } else { - env->hvf_emul->lflags.result |= (1 << 8); + env->hvf_lflags.result |= (1 << 8); } } bool get_SF(CPUX86State *env) { - return ((env->hvf_emul->lflags.result >> LF_SIGN_BIT) ^ - (env->hvf_emul->lflags.auxbits >> LF_BIT_SD)) & 1; + return ((env->hvf_lflags.result >> LF_SIGN_BIT) ^ + (env->hvf_lflags.auxbits >> LF_BIT_SD)) & 1; } void set_SF(CPUX86State *env, bool val) { bool temp_sf = get_SF(env); - env->hvf_emul->lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD; + env->hvf_lflags.auxbits ^= (temp_sf ^ val) << LF_BIT_SD; } void lflags_to_rflags(CPUX86State *env) { - env->hvf_emul->rflags.cf = get_CF(env); - env->hvf_emul->rflags.pf = get_PF(env); - env->hvf_emul->rflags.af = get_AF(env); - env->hvf_emul->rflags.zf = get_ZF(env); - env->hvf_emul->rflags.sf = get_SF(env); - env->hvf_emul->rflags.of = get_OF(env); + env->eflags |= get_CF(env) ? CC_C : 0; + env->eflags |= get_PF(env) ? CC_P : 0; + env->eflags |= get_AF(env) ? CC_A : 0; + env->eflags |= get_ZF(env) ? CC_Z : 0; + env->eflags |= get_SF(env) ? CC_S : 0; + env->eflags |= get_OF(env) ? CC_O : 0; } void rflags_to_lflags(CPUX86State *env) { - env->hvf_emul->lflags.auxbits = env->hvf_emul->lflags.result = 0; - set_OF(env, env->hvf_emul->rflags.of); - set_SF(env, env->hvf_emul->rflags.sf); - set_ZF(env, env->hvf_emul->rflags.zf); - set_AF(env, env->hvf_emul->rflags.af); - set_PF(env, env->hvf_emul->rflags.pf); - set_CF(env, env->hvf_emul->rflags.cf); + env->hvf_lflags.auxbits = env->hvf_lflags.result = 0; + set_OF(env, env->eflags & CC_O); + set_SF(env, env->eflags & CC_S); + set_ZF(env, env->eflags & CC_Z); + set_AF(env, env->eflags & CC_A); + set_PF(env, env->eflags & CC_P); + set_CF(env, env->eflags & CC_C); } diff --git a/target/i386/hvf/x86_flags.h b/target/i386/hvf/x86_flags.h index 8942745988..75c2a7feab 100644 --- a/target/i386/hvf/x86_flags.h +++ b/target/i386/hvf/x86_flags.h @@ -6,7 +6,7 @@ // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either -// version 2 of the License, or (at your option) any later version. +// version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,8 +20,9 @@ /* * x86 eflags functions */ -#ifndef __X86_FLAGS_H__ -#define __X86_FLAGS_H__ + +#ifndef X86_FLAGS_H +#define X86_FLAGS_H #include "cpu.h" void lflags_to_rflags(CPUX86State *env); @@ -77,4 +78,4 @@ void SET_FLAGS_OSZAPC_LOGIC16(CPUX86State *env, uint16_t v1, uint16_t v2, void SET_FLAGS_OSZAPC_LOGIC8(CPUX86State *env, uint8_t v1, uint8_t v2, uint8_t diff); -#endif /* __X86_FLAGS_H__ */ +#endif /* X86_FLAGS_H */ diff --git a/target/i386/hvf/x86_mmu.c b/target/i386/hvf/x86_mmu.c index d5a0efe718..649074a7d2 100644 --- a/target/i386/hvf/x86_mmu.c +++ b/target/i386/hvf/x86_mmu.c @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,13 +18,11 @@ #include "qemu/osdep.h" #include "panic.h" -#include "qemu-common.h" #include "cpu.h" #include "x86.h" #include "x86_mmu.h" #include "vmcs.h" #include "vmx.h" -#include "exec/address-spaces.h" #define pte_present(pte) (pte & PT_PRESENT) #define pte_write_access(pte) (pte & PT_WRITE) @@ -51,7 +49,7 @@ struct gpt_translation { bool exec_access; }; -static int gpt_top_level(struct CPUState *cpu, bool pae) +static int gpt_top_level(CPUState *cpu, bool pae) { if (!pae) { return 2; @@ -75,7 +73,7 @@ static inline int pte_size(bool pae) } -static bool get_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, +static bool get_pt_entry(CPUState *cpu, struct gpt_translation *pt, int level, bool pae) { int index; @@ -88,8 +86,8 @@ static bool get_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, } index = gpt_entry(pt->gva, level, pae); - address_space_rw(&address_space_memory, gpa + index * pte_size(pae), - MEMTXATTRS_UNSPECIFIED, (uint8_t *)&pte, pte_size(pae), 0); + address_space_read(&address_space_memory, gpa + index * pte_size(pae), + MEMTXATTRS_UNSPECIFIED, &pte, pte_size(pae)); pt->pte[level - 1] = pte; @@ -97,7 +95,7 @@ static bool get_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, } /* test page table entry */ -static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, +static bool test_pt_entry(CPUState *cpu, struct gpt_translation *pt, int level, bool *is_large, bool pae) { uint64_t pte = pt->pte[level]; @@ -128,9 +126,9 @@ static bool test_pt_entry(struct CPUState *cpu, struct gpt_translation *pt, pt->err_code |= MMU_PAGE_PT; } - uint32_t cr0 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0); + uint32_t cr0 = rvmcs(cpu->accel->fd, VMCS_GUEST_CR0); /* check protection */ - if (cr0 & CR0_WP) { + if (cr0 & CR0_WP_MASK) { if (pt->write_access && !pte_write_access(pte)) { return false; } @@ -168,12 +166,12 @@ static inline uint64_t large_page_gpa(struct gpt_translation *pt, bool pae) -static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code, +static bool walk_gpt(CPUState *cpu, target_ulong addr, int err_code, struct gpt_translation *pt, bool pae) { int top_level, level; bool is_large = false; - target_ulong cr3 = rvmcs(cpu->hvf_fd, VMCS_GUEST_CR3); + target_ulong cr3 = rvmcs(cpu->accel->fd, VMCS_GUEST_CR3); uint64_t page_mask = pae ? PAE_PTE_PAGE_MASK : LEGACY_PTE_PAGE_MASK; memset(pt, 0, sizeof(*pt)); @@ -207,7 +205,7 @@ static bool walk_gpt(struct CPUState *cpu, target_ulong addr, int err_code, } -bool mmu_gva_to_gpa(struct CPUState *cpu, target_ulong gva, uint64_t *gpa) +bool mmu_gva_to_gpa(CPUState *cpu, target_ulong gva, uint64_t *gpa) { bool res; struct gpt_translation pt; @@ -227,7 +225,7 @@ bool mmu_gva_to_gpa(struct CPUState *cpu, target_ulong gva, uint64_t *gpa) return false; } -void vmx_write_mem(struct CPUState *cpu, target_ulong gva, void *data, int bytes) +void vmx_write_mem(CPUState *cpu, target_ulong gva, void *data, int bytes) { uint64_t gpa; @@ -238,8 +236,8 @@ void vmx_write_mem(struct CPUState *cpu, target_ulong gva, void *data, int bytes if (!mmu_gva_to_gpa(cpu, gva, &gpa)) { VM_PANIC_EX("%s: mmu_gva_to_gpa %llx failed\n", __func__, gva); } else { - address_space_rw(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED, - data, copy, 1); + address_space_write(&address_space_memory, gpa, + MEMTXATTRS_UNSPECIFIED, data, copy); } bytes -= copy; @@ -248,7 +246,7 @@ void vmx_write_mem(struct CPUState *cpu, target_ulong gva, void *data, int bytes } } -void vmx_read_mem(struct CPUState *cpu, void *data, target_ulong gva, int bytes) +void vmx_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes) { uint64_t gpa; @@ -259,8 +257,8 @@ void vmx_read_mem(struct CPUState *cpu, void *data, target_ulong gva, int bytes) if (!mmu_gva_to_gpa(cpu, gva, &gpa)) { VM_PANIC_EX("%s: mmu_gva_to_gpa %llx failed\n", __func__, gva); } - address_space_rw(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED, - data, copy, 0); + address_space_read(&address_space_memory, gpa, MEMTXATTRS_UNSPECIFIED, + data, copy); bytes -= copy; gva += copy; diff --git a/target/i386/hvf/x86_mmu.h b/target/i386/hvf/x86_mmu.h index 0bd1acc94f..9447ae072c 100644 --- a/target/i386/hvf/x86_mmu.h +++ b/target/i386/hvf/x86_mmu.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -15,8 +15,9 @@ * You should have received a copy of the GNU Lesser General Public * License along with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef __X86_MMU_H__ -#define __X86_MMU_H__ + +#ifndef X86_MMU_H +#define X86_MMU_H #define PT_PRESENT (1 << 0) #define PT_WRITE (1 << 1) @@ -35,9 +36,9 @@ #define MMU_PAGE_US (1 << 2) #define MMU_PAGE_NX (1 << 3) -bool mmu_gva_to_gpa(struct CPUState *cpu, target_ulong gva, uint64_t *gpa); +bool mmu_gva_to_gpa(CPUState *cpu, target_ulong gva, uint64_t *gpa); -void vmx_write_mem(struct CPUState *cpu, target_ulong gva, void *data, int bytes); -void vmx_read_mem(struct CPUState *cpu, void *data, target_ulong gva, int bytes); +void vmx_write_mem(CPUState *cpu, target_ulong gva, void *data, int bytes); +void vmx_read_mem(CPUState *cpu, void *data, target_ulong gva, int bytes); -#endif /* __X86_MMU_H__ */ +#endif /* X86_MMU_H */ diff --git a/target/i386/hvf/x86_task.c b/target/i386/hvf/x86_task.c index 7099335e89..f09bfbdda5 100644 --- a/target/i386/hvf/x86_task.c +++ b/target/i386/hvf/x86_task.c @@ -8,7 +8,6 @@ // GNU General Public License for more details. #include "qemu/osdep.h" #include "panic.h" -#include "qemu-common.h" #include "qemu/error-report.h" #include "sysemu/hvf.h" @@ -27,10 +26,8 @@ #include <Hypervisor/hv_vmx.h> #include "hw/i386/apic_internal.h" -#include "hw/boards.h" #include "qemu/main-loop.h" -#include "sysemu/accel.h" -#include "sysemu/sysemu.h" +#include "qemu/accel.h" #include "target/i386/cpu.h" // TODO: taskswitch handling @@ -40,8 +37,8 @@ static void save_state_to_tss32(CPUState *cpu, struct x86_tss_segment32 *tss) CPUX86State *env = &x86_cpu->env; /* CR3 and ldt selector are not saved intentionally */ - tss->eip = EIP(env); - tss->eflags = EFLAGS(env); + tss->eip = (uint32_t)env->eip; + tss->eflags = (uint32_t)env->eflags; tss->eax = EAX(env); tss->ecx = ECX(env); tss->edx = EDX(env); @@ -64,10 +61,10 @@ static void load_state_from_tss32(CPUState *cpu, struct x86_tss_segment32 *tss) X86CPU *x86_cpu = X86_CPU(cpu); CPUX86State *env = &x86_cpu->env; - wvmcs(cpu->hvf_fd, VMCS_GUEST_CR3, tss->cr3); + wvmcs(cpu->accel->fd, VMCS_GUEST_CR3, tss->cr3); - RIP(env) = tss->eip; - EFLAGS(env) = tss->eflags | 2; + env->eip = tss->eip; + env->eflags = tss->eflags | 2; /* General purpose registers */ RAX(env) = tss->eax; @@ -113,11 +110,11 @@ static int task_switch_32(CPUState *cpu, x68_segment_selector tss_sel, x68_segme void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type) { - uint64_t rip = rreg(cpu->hvf_fd, HV_X86_RIP); + uint64_t rip = rreg(cpu->accel->fd, HV_X86_RIP); if (!gate_valid || (gate_type != VMCS_INTR_T_HWEXCEPTION && gate_type != VMCS_INTR_T_HWINTR && gate_type != VMCS_INTR_T_NMI)) { - int ins_len = rvmcs(cpu->hvf_fd, VMCS_EXIT_INSTRUCTION_LENGTH); + int ins_len = rvmcs(cpu->accel->fd, VMCS_EXIT_INSTRUCTION_LENGTH); macvm_set_rip(cpu, rip + ins_len); return; } @@ -160,7 +157,7 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea } if (reason == TSR_IRET) - EFLAGS(env) &= ~RFLAGS_NT; + env->eflags &= ~NT_MASK; if (reason != TSR_CALL && reason != TSR_IDT_GATE) old_tss_sel.sel = 0xffff; @@ -176,12 +173,12 @@ void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int rea //ret = task_switch_16(cpu, tss_sel, old_tss_sel, old_tss_base, &next_tss_desc); VM_PANIC("task_switch_16"); - macvm_set_cr0(cpu->hvf_fd, rvmcs(cpu->hvf_fd, VMCS_GUEST_CR0) | CR0_TS); + macvm_set_cr0(cpu->accel->fd, rvmcs(cpu->accel->fd, VMCS_GUEST_CR0) | + CR0_TS_MASK); x86_segment_descriptor_to_vmx(cpu, tss_sel, &next_tss_desc, &vmx_seg); vmx_write_segment_descriptor(cpu, &vmx_seg, R_TR); store_regs(cpu); - hv_vcpu_invalidate_tlb(cpu->hvf_fd); - hv_vcpu_flush(cpu->hvf_fd); + hv_vcpu_invalidate_tlb(cpu->accel->fd); } diff --git a/target/i386/hvf/x86_task.h b/target/i386/hvf/x86_task.h index 4f1b188d2e..4eaa61a7de 100644 --- a/target/i386/hvf/x86_task.h +++ b/target/i386/hvf/x86_task.h @@ -11,8 +11,10 @@ * You should have received a copy of the GNU General Public License along * with this program; if not, see <http://www.gnu.org/licenses/>. */ -#ifndef HVF_TASK -#define HVF_TASK + +#ifndef HVF_X86_TASK_H +#define HVF_X86_TASK_H + void vmx_handle_task_switch(CPUState *cpu, x68_segment_selector tss_sel, int reason, bool gate_valid, uint8_t gate, uint64_t gate_type); #endif diff --git a/target/i386/hvf/x86hvf.c b/target/i386/hvf/x86hvf.c index df8e946fbc..be2c46246e 100644 --- a/target/i386/hvf/x86hvf.c +++ b/target/i386/hvf/x86hvf.c @@ -6,7 +6,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -19,27 +19,27 @@ #include "qemu/osdep.h" -#include "qemu-common.h" #include "x86hvf.h" #include "vmx.h" #include "vmcs.h" #include "cpu.h" #include "x86_descr.h" #include "x86_decode.h" +#include "sysemu/hw_accel.h" #include "hw/i386/apic_internal.h" #include <Hypervisor/hv.h> #include <Hypervisor/hv_vmx.h> -void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg, +void hvf_set_segment(CPUState *cs, struct vmx_segment *vmx_seg, SegmentCache *qseg, bool is_tr) { vmx_seg->sel = qseg->selector; vmx_seg->base = qseg->base; vmx_seg->limit = qseg->limit; - if (!qseg->selector && !x86_is_real(cpu) && !is_tr) { + if (!qseg->selector && !x86_is_real(cs) && !is_tr) { /* the TR register is usable after processor reset despite * having a null selector */ vmx_seg->ar = 1 << 16; @@ -70,290 +70,279 @@ void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg) (((vmx_seg->ar >> 15) & 1) << DESC_G_SHIFT); } -void hvf_put_xsave(CPUState *cpu_state) +void hvf_put_xsave(CPUState *cs) { + void *xsave = X86_CPU(cs)->env.xsave_buf; + uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len; - struct X86XSaveArea *xsave; + x86_cpu_xsave_all_areas(X86_CPU(cs), xsave, xsave_len); - xsave = X86_CPU(cpu_state)->env.xsave_buf; - - x86_cpu_xsave_all_areas(X86_CPU(cpu_state), xsave); - - if (hv_vcpu_write_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) { + if (hv_vcpu_write_fpstate(cs->accel->fd, xsave, xsave_len)) { abort(); } } -void hvf_put_segments(CPUState *cpu_state) +static void hvf_put_segments(CPUState *cs) { - CPUX86State *env = &X86_CPU(cpu_state)->env; + CPUX86State *env = &X86_CPU(cs)->env; struct vmx_segment seg; - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE, env->idt.base); + wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT, env->idt.limit); + wvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE, env->idt.base); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE, env->gdt.base); + wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT, env->gdt.limit); + wvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE, env->gdt.base); - /* wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR2, env->cr[2]); */ - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3, env->cr[3]); - vmx_update_tpr(cpu_state); - wvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER, env->efer); + /* wvmcs(cs->accel->fd, VMCS_GUEST_CR2, env->cr[2]); */ + wvmcs(cs->accel->fd, VMCS_GUEST_CR3, env->cr[3]); + vmx_update_tpr(cs); + wvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER, env->efer); - macvm_set_cr4(cpu_state->hvf_fd, env->cr[4]); - macvm_set_cr0(cpu_state->hvf_fd, env->cr[0]); + macvm_set_cr4(cs->accel->fd, env->cr[4]); + macvm_set_cr0(cs->accel->fd, env->cr[0]); - hvf_set_segment(cpu_state, &seg, &env->segs[R_CS], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_CS); + hvf_set_segment(cs, &seg, &env->segs[R_CS], false); + vmx_write_segment_descriptor(cs, &seg, R_CS); - hvf_set_segment(cpu_state, &seg, &env->segs[R_DS], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_DS); + hvf_set_segment(cs, &seg, &env->segs[R_DS], false); + vmx_write_segment_descriptor(cs, &seg, R_DS); - hvf_set_segment(cpu_state, &seg, &env->segs[R_ES], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_ES); + hvf_set_segment(cs, &seg, &env->segs[R_ES], false); + vmx_write_segment_descriptor(cs, &seg, R_ES); - hvf_set_segment(cpu_state, &seg, &env->segs[R_SS], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_SS); + hvf_set_segment(cs, &seg, &env->segs[R_SS], false); + vmx_write_segment_descriptor(cs, &seg, R_SS); - hvf_set_segment(cpu_state, &seg, &env->segs[R_FS], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_FS); + hvf_set_segment(cs, &seg, &env->segs[R_FS], false); + vmx_write_segment_descriptor(cs, &seg, R_FS); - hvf_set_segment(cpu_state, &seg, &env->segs[R_GS], false); - vmx_write_segment_descriptor(cpu_state, &seg, R_GS); + hvf_set_segment(cs, &seg, &env->segs[R_GS], false); + vmx_write_segment_descriptor(cs, &seg, R_GS); - hvf_set_segment(cpu_state, &seg, &env->tr, true); - vmx_write_segment_descriptor(cpu_state, &seg, R_TR); + hvf_set_segment(cs, &seg, &env->tr, true); + vmx_write_segment_descriptor(cs, &seg, R_TR); - hvf_set_segment(cpu_state, &seg, &env->ldt, false); - vmx_write_segment_descriptor(cpu_state, &seg, R_LDTR); - - hv_vcpu_flush(cpu_state->hvf_fd); + hvf_set_segment(cs, &seg, &env->ldt, false); + vmx_write_segment_descriptor(cs, &seg, R_LDTR); } -void hvf_put_msrs(CPUState *cpu_state) +void hvf_put_msrs(CPUState *cs) { - CPUX86State *env = &X86_CPU(cpu_state)->env; + CPUX86State *env = &X86_CPU(cs)->env; - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, + hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS, env->sysenter_cs); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, + hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP, env->sysenter_esp); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, + hv_vcpu_write_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_STAR, env->star); + hv_vcpu_write_msr(cs->accel->fd, MSR_STAR, env->star); #ifdef TARGET_X86_64 - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_CSTAR, env->cstar); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, env->kernelgsbase); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FMASK, env->fmask); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_LSTAR, env->lstar); + hv_vcpu_write_msr(cs->accel->fd, MSR_CSTAR, env->cstar); + hv_vcpu_write_msr(cs->accel->fd, MSR_KERNELGSBASE, env->kernelgsbase); + hv_vcpu_write_msr(cs->accel->fd, MSR_FMASK, env->fmask); + hv_vcpu_write_msr(cs->accel->fd, MSR_LSTAR, env->lstar); #endif - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_GSBASE, env->segs[R_GS].base); - hv_vcpu_write_msr(cpu_state->hvf_fd, MSR_FSBASE, env->segs[R_FS].base); - - /* if (!osx_is_sierra()) - wvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET, env->tsc - rdtscp());*/ - hv_vm_sync_tsc(env->tsc); + hv_vcpu_write_msr(cs->accel->fd, MSR_GSBASE, env->segs[R_GS].base); + hv_vcpu_write_msr(cs->accel->fd, MSR_FSBASE, env->segs[R_FS].base); } -void hvf_get_xsave(CPUState *cpu_state) +void hvf_get_xsave(CPUState *cs) { - struct X86XSaveArea *xsave; - - xsave = X86_CPU(cpu_state)->env.xsave_buf; + void *xsave = X86_CPU(cs)->env.xsave_buf; + uint32_t xsave_len = X86_CPU(cs)->env.xsave_buf_len; - if (hv_vcpu_read_fpstate(cpu_state->hvf_fd, (void*)xsave, 4096)) { + if (hv_vcpu_read_fpstate(cs->accel->fd, xsave, xsave_len)) { abort(); } - x86_cpu_xrstor_all_areas(X86_CPU(cpu_state), xsave); + x86_cpu_xrstor_all_areas(X86_CPU(cs), xsave, xsave_len); } -void hvf_get_segments(CPUState *cpu_state) +static void hvf_get_segments(CPUState *cs) { - CPUX86State *env = &X86_CPU(cpu_state)->env; + CPUX86State *env = &X86_CPU(cs)->env; struct vmx_segment seg; env->interrupt_injected = -1; - vmx_read_segment_descriptor(cpu_state, &seg, R_CS); + vmx_read_segment_descriptor(cs, &seg, R_CS); hvf_get_segment(&env->segs[R_CS], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_DS); + vmx_read_segment_descriptor(cs, &seg, R_DS); hvf_get_segment(&env->segs[R_DS], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_ES); + vmx_read_segment_descriptor(cs, &seg, R_ES); hvf_get_segment(&env->segs[R_ES], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_FS); + vmx_read_segment_descriptor(cs, &seg, R_FS); hvf_get_segment(&env->segs[R_FS], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_GS); + vmx_read_segment_descriptor(cs, &seg, R_GS); hvf_get_segment(&env->segs[R_GS], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_SS); + vmx_read_segment_descriptor(cs, &seg, R_SS); hvf_get_segment(&env->segs[R_SS], &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_TR); + vmx_read_segment_descriptor(cs, &seg, R_TR); hvf_get_segment(&env->tr, &seg); - vmx_read_segment_descriptor(cpu_state, &seg, R_LDTR); + vmx_read_segment_descriptor(cs, &seg, R_LDTR); hvf_get_segment(&env->ldt, &seg); - env->idt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_LIMIT); - env->idt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IDTR_BASE); - env->gdt.limit = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_LIMIT); - env->gdt.base = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_GDTR_BASE); + env->idt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_LIMIT); + env->idt.base = rvmcs(cs->accel->fd, VMCS_GUEST_IDTR_BASE); + env->gdt.limit = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_LIMIT); + env->gdt.base = rvmcs(cs->accel->fd, VMCS_GUEST_GDTR_BASE); - env->cr[0] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR0); + env->cr[0] = rvmcs(cs->accel->fd, VMCS_GUEST_CR0); env->cr[2] = 0; - env->cr[3] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR3); - env->cr[4] = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_CR4); + env->cr[3] = rvmcs(cs->accel->fd, VMCS_GUEST_CR3); + env->cr[4] = rvmcs(cs->accel->fd, VMCS_GUEST_CR4); - env->efer = rvmcs(cpu_state->hvf_fd, VMCS_GUEST_IA32_EFER); + env->efer = rvmcs(cs->accel->fd, VMCS_GUEST_IA32_EFER); } -void hvf_get_msrs(CPUState *cpu_state) +void hvf_get_msrs(CPUState *cs) { - CPUX86State *env = &X86_CPU(cpu_state)->env; + CPUX86State *env = &X86_CPU(cs)->env; uint64_t tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_CS, &tmp); + hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_CS, &tmp); env->sysenter_cs = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_ESP, &tmp); + hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_ESP, &tmp); env->sysenter_esp = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_SYSENTER_EIP, &tmp); + hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_SYSENTER_EIP, &tmp); env->sysenter_eip = tmp; - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_STAR, &env->star); + hv_vcpu_read_msr(cs->accel->fd, MSR_STAR, &env->star); #ifdef TARGET_X86_64 - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_CSTAR, &env->cstar); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_KERNELGSBASE, &env->kernelgsbase); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_FMASK, &env->fmask); - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_LSTAR, &env->lstar); + hv_vcpu_read_msr(cs->accel->fd, MSR_CSTAR, &env->cstar); + hv_vcpu_read_msr(cs->accel->fd, MSR_KERNELGSBASE, &env->kernelgsbase); + hv_vcpu_read_msr(cs->accel->fd, MSR_FMASK, &env->fmask); + hv_vcpu_read_msr(cs->accel->fd, MSR_LSTAR, &env->lstar); #endif - hv_vcpu_read_msr(cpu_state->hvf_fd, MSR_IA32_APICBASE, &tmp); + hv_vcpu_read_msr(cs->accel->fd, MSR_IA32_APICBASE, &tmp); - env->tsc = rdtscp() + rvmcs(cpu_state->hvf_fd, VMCS_TSC_OFFSET); + env->tsc = rdtscp() + rvmcs(cs->accel->fd, VMCS_TSC_OFFSET); } -int hvf_put_registers(CPUState *cpu_state) +int hvf_put_registers(CPUState *cs) { - X86CPU *x86cpu = X86_CPU(cpu_state); + X86CPU *x86cpu = X86_CPU(cs); CPUX86State *env = &x86cpu->env; - wreg(cpu_state->hvf_fd, HV_X86_RAX, env->regs[R_EAX]); - wreg(cpu_state->hvf_fd, HV_X86_RBX, env->regs[R_EBX]); - wreg(cpu_state->hvf_fd, HV_X86_RCX, env->regs[R_ECX]); - wreg(cpu_state->hvf_fd, HV_X86_RDX, env->regs[R_EDX]); - wreg(cpu_state->hvf_fd, HV_X86_RBP, env->regs[R_EBP]); - wreg(cpu_state->hvf_fd, HV_X86_RSP, env->regs[R_ESP]); - wreg(cpu_state->hvf_fd, HV_X86_RSI, env->regs[R_ESI]); - wreg(cpu_state->hvf_fd, HV_X86_RDI, env->regs[R_EDI]); - wreg(cpu_state->hvf_fd, HV_X86_R8, env->regs[8]); - wreg(cpu_state->hvf_fd, HV_X86_R9, env->regs[9]); - wreg(cpu_state->hvf_fd, HV_X86_R10, env->regs[10]); - wreg(cpu_state->hvf_fd, HV_X86_R11, env->regs[11]); - wreg(cpu_state->hvf_fd, HV_X86_R12, env->regs[12]); - wreg(cpu_state->hvf_fd, HV_X86_R13, env->regs[13]); - wreg(cpu_state->hvf_fd, HV_X86_R14, env->regs[14]); - wreg(cpu_state->hvf_fd, HV_X86_R15, env->regs[15]); - wreg(cpu_state->hvf_fd, HV_X86_RFLAGS, env->eflags); - wreg(cpu_state->hvf_fd, HV_X86_RIP, env->eip); + wreg(cs->accel->fd, HV_X86_RAX, env->regs[R_EAX]); + wreg(cs->accel->fd, HV_X86_RBX, env->regs[R_EBX]); + wreg(cs->accel->fd, HV_X86_RCX, env->regs[R_ECX]); + wreg(cs->accel->fd, HV_X86_RDX, env->regs[R_EDX]); + wreg(cs->accel->fd, HV_X86_RBP, env->regs[R_EBP]); + wreg(cs->accel->fd, HV_X86_RSP, env->regs[R_ESP]); + wreg(cs->accel->fd, HV_X86_RSI, env->regs[R_ESI]); + wreg(cs->accel->fd, HV_X86_RDI, env->regs[R_EDI]); + wreg(cs->accel->fd, HV_X86_R8, env->regs[8]); + wreg(cs->accel->fd, HV_X86_R9, env->regs[9]); + wreg(cs->accel->fd, HV_X86_R10, env->regs[10]); + wreg(cs->accel->fd, HV_X86_R11, env->regs[11]); + wreg(cs->accel->fd, HV_X86_R12, env->regs[12]); + wreg(cs->accel->fd, HV_X86_R13, env->regs[13]); + wreg(cs->accel->fd, HV_X86_R14, env->regs[14]); + wreg(cs->accel->fd, HV_X86_R15, env->regs[15]); + wreg(cs->accel->fd, HV_X86_RFLAGS, env->eflags); + wreg(cs->accel->fd, HV_X86_RIP, env->eip); - wreg(cpu_state->hvf_fd, HV_X86_XCR0, env->xcr0); + wreg(cs->accel->fd, HV_X86_XCR0, env->xcr0); - hvf_put_xsave(cpu_state); + hvf_put_xsave(cs); - hvf_put_segments(cpu_state); + hvf_put_segments(cs); - hvf_put_msrs(cpu_state); + hvf_put_msrs(cs); - wreg(cpu_state->hvf_fd, HV_X86_DR0, env->dr[0]); - wreg(cpu_state->hvf_fd, HV_X86_DR1, env->dr[1]); - wreg(cpu_state->hvf_fd, HV_X86_DR2, env->dr[2]); - wreg(cpu_state->hvf_fd, HV_X86_DR3, env->dr[3]); - wreg(cpu_state->hvf_fd, HV_X86_DR4, env->dr[4]); - wreg(cpu_state->hvf_fd, HV_X86_DR5, env->dr[5]); - wreg(cpu_state->hvf_fd, HV_X86_DR6, env->dr[6]); - wreg(cpu_state->hvf_fd, HV_X86_DR7, env->dr[7]); + wreg(cs->accel->fd, HV_X86_DR0, env->dr[0]); + wreg(cs->accel->fd, HV_X86_DR1, env->dr[1]); + wreg(cs->accel->fd, HV_X86_DR2, env->dr[2]); + wreg(cs->accel->fd, HV_X86_DR3, env->dr[3]); + wreg(cs->accel->fd, HV_X86_DR4, env->dr[4]); + wreg(cs->accel->fd, HV_X86_DR5, env->dr[5]); + wreg(cs->accel->fd, HV_X86_DR6, env->dr[6]); + wreg(cs->accel->fd, HV_X86_DR7, env->dr[7]); return 0; } -int hvf_get_registers(CPUState *cpu_state) +int hvf_get_registers(CPUState *cs) { - X86CPU *x86cpu = X86_CPU(cpu_state); + X86CPU *x86cpu = X86_CPU(cs); CPUX86State *env = &x86cpu->env; - env->regs[R_EAX] = rreg(cpu_state->hvf_fd, HV_X86_RAX); - env->regs[R_EBX] = rreg(cpu_state->hvf_fd, HV_X86_RBX); - env->regs[R_ECX] = rreg(cpu_state->hvf_fd, HV_X86_RCX); - env->regs[R_EDX] = rreg(cpu_state->hvf_fd, HV_X86_RDX); - env->regs[R_EBP] = rreg(cpu_state->hvf_fd, HV_X86_RBP); - env->regs[R_ESP] = rreg(cpu_state->hvf_fd, HV_X86_RSP); - env->regs[R_ESI] = rreg(cpu_state->hvf_fd, HV_X86_RSI); - env->regs[R_EDI] = rreg(cpu_state->hvf_fd, HV_X86_RDI); - env->regs[8] = rreg(cpu_state->hvf_fd, HV_X86_R8); - env->regs[9] = rreg(cpu_state->hvf_fd, HV_X86_R9); - env->regs[10] = rreg(cpu_state->hvf_fd, HV_X86_R10); - env->regs[11] = rreg(cpu_state->hvf_fd, HV_X86_R11); - env->regs[12] = rreg(cpu_state->hvf_fd, HV_X86_R12); - env->regs[13] = rreg(cpu_state->hvf_fd, HV_X86_R13); - env->regs[14] = rreg(cpu_state->hvf_fd, HV_X86_R14); - env->regs[15] = rreg(cpu_state->hvf_fd, HV_X86_R15); + env->regs[R_EAX] = rreg(cs->accel->fd, HV_X86_RAX); + env->regs[R_EBX] = rreg(cs->accel->fd, HV_X86_RBX); + env->regs[R_ECX] = rreg(cs->accel->fd, HV_X86_RCX); + env->regs[R_EDX] = rreg(cs->accel->fd, HV_X86_RDX); + env->regs[R_EBP] = rreg(cs->accel->fd, HV_X86_RBP); + env->regs[R_ESP] = rreg(cs->accel->fd, HV_X86_RSP); + env->regs[R_ESI] = rreg(cs->accel->fd, HV_X86_RSI); + env->regs[R_EDI] = rreg(cs->accel->fd, HV_X86_RDI); + env->regs[8] = rreg(cs->accel->fd, HV_X86_R8); + env->regs[9] = rreg(cs->accel->fd, HV_X86_R9); + env->regs[10] = rreg(cs->accel->fd, HV_X86_R10); + env->regs[11] = rreg(cs->accel->fd, HV_X86_R11); + env->regs[12] = rreg(cs->accel->fd, HV_X86_R12); + env->regs[13] = rreg(cs->accel->fd, HV_X86_R13); + env->regs[14] = rreg(cs->accel->fd, HV_X86_R14); + env->regs[15] = rreg(cs->accel->fd, HV_X86_R15); - env->eflags = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS); - env->eip = rreg(cpu_state->hvf_fd, HV_X86_RIP); + env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); + env->eip = rreg(cs->accel->fd, HV_X86_RIP); - hvf_get_xsave(cpu_state); - env->xcr0 = rreg(cpu_state->hvf_fd, HV_X86_XCR0); + hvf_get_xsave(cs); + env->xcr0 = rreg(cs->accel->fd, HV_X86_XCR0); - hvf_get_segments(cpu_state); - hvf_get_msrs(cpu_state); + hvf_get_segments(cs); + hvf_get_msrs(cs); - env->dr[0] = rreg(cpu_state->hvf_fd, HV_X86_DR0); - env->dr[1] = rreg(cpu_state->hvf_fd, HV_X86_DR1); - env->dr[2] = rreg(cpu_state->hvf_fd, HV_X86_DR2); - env->dr[3] = rreg(cpu_state->hvf_fd, HV_X86_DR3); - env->dr[4] = rreg(cpu_state->hvf_fd, HV_X86_DR4); - env->dr[5] = rreg(cpu_state->hvf_fd, HV_X86_DR5); - env->dr[6] = rreg(cpu_state->hvf_fd, HV_X86_DR6); - env->dr[7] = rreg(cpu_state->hvf_fd, HV_X86_DR7); + env->dr[0] = rreg(cs->accel->fd, HV_X86_DR0); + env->dr[1] = rreg(cs->accel->fd, HV_X86_DR1); + env->dr[2] = rreg(cs->accel->fd, HV_X86_DR2); + env->dr[3] = rreg(cs->accel->fd, HV_X86_DR3); + env->dr[4] = rreg(cs->accel->fd, HV_X86_DR4); + env->dr[5] = rreg(cs->accel->fd, HV_X86_DR5); + env->dr[6] = rreg(cs->accel->fd, HV_X86_DR6); + env->dr[7] = rreg(cs->accel->fd, HV_X86_DR7); x86_update_hflags(env); return 0; } -static void vmx_set_int_window_exiting(CPUState *cpu) +static void vmx_set_int_window_exiting(CPUState *cs) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val | + val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val | VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } -void vmx_clear_int_window_exiting(CPUState *cpu) +void vmx_clear_int_window_exiting(CPUState *cs) { uint64_t val; - val = rvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS); - wvmcs(cpu->hvf_fd, VMCS_PRI_PROC_BASED_CTLS, val & + val = rvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS); + wvmcs(cs->accel->fd, VMCS_PRI_PROC_BASED_CTLS, val & ~VMCS_PRI_PROC_BASED_CTLS_INT_WINDOW_EXITING); } -#define NMI_VEC 2 - -bool hvf_inject_interrupts(CPUState *cpu_state) +bool hvf_inject_interrupts(CPUState *cs) { - X86CPU *x86cpu = X86_CPU(cpu_state); + X86CPU *x86cpu = X86_CPU(cs); CPUX86State *env = &x86cpu->env; uint8_t vector; @@ -361,16 +350,20 @@ bool hvf_inject_interrupts(CPUState *cpu_state) bool have_event = true; if (env->interrupt_injected != -1) { vector = env->interrupt_injected; - intr_type = VMCS_INTR_T_SWINTR; - } else if (env->exception_injected != -1) { - vector = env->exception_injected; + if (env->ins_len) { + intr_type = VMCS_INTR_T_SWINTR; + } else { + intr_type = VMCS_INTR_T_HWINTR; + } + } else if (env->exception_nr != -1) { + vector = env->exception_nr; if (vector == EXCP03_INT3 || vector == EXCP04_INTO) { intr_type = VMCS_INTR_T_SWEXCEPTION; } else { intr_type = VMCS_INTR_T_HWEXCEPTION; } } else if (env->nmi_injected) { - vector = NMI_VEC; + vector = EXCP02_NMI; intr_type = VMCS_INTR_T_NMI; } else { have_event = false; @@ -379,84 +372,89 @@ bool hvf_inject_interrupts(CPUState *cpu_state) uint64_t info = 0; if (have_event) { info = vector | intr_type | VMCS_INTR_VALID; - uint64_t reason = rvmcs(cpu_state->hvf_fd, VMCS_EXIT_REASON); + uint64_t reason = rvmcs(cs->accel->fd, VMCS_EXIT_REASON); if (env->nmi_injected && reason != EXIT_REASON_TASK_SWITCH) { - vmx_clear_nmi_blocking(cpu_state); + vmx_clear_nmi_blocking(cs); } if (!(env->hflags2 & HF2_NMI_MASK) || intr_type != VMCS_INTR_T_NMI) { info &= ~(1 << 12); /* clear undefined bit */ if (intr_type == VMCS_INTR_T_SWINTR || intr_type == VMCS_INTR_T_SWEXCEPTION) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INST_LENGTH, env->ins_len); + wvmcs(cs->accel->fd, VMCS_ENTRY_INST_LENGTH, env->ins_len); } if (env->has_error_code) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_EXCEPTION_ERROR, + wvmcs(cs->accel->fd, VMCS_ENTRY_EXCEPTION_ERROR, env->error_code); + /* Indicate that VMCS_ENTRY_EXCEPTION_ERROR is valid */ + info |= VMCS_INTR_DEL_ERRCODE; } /*printf("reinject %lx err %d\n", info, err);*/ - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); + wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); }; } - if (cpu_state->interrupt_request & CPU_INTERRUPT_NMI) { + if (cs->interrupt_request & CPU_INTERRUPT_NMI) { if (!(env->hflags2 & HF2_NMI_MASK) && !(info & VMCS_INTR_VALID)) { - cpu_state->interrupt_request &= ~CPU_INTERRUPT_NMI; - info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | NMI_VEC; - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, info); + cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + info = VMCS_INTR_VALID | VMCS_INTR_T_NMI | EXCP02_NMI; + wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, info); } else { - vmx_set_nmi_window_exiting(cpu_state); + vmx_set_nmi_window_exiting(cs); } } if (!(env->hflags & HF_INHIBIT_IRQ_MASK) && - (cpu_state->interrupt_request & CPU_INTERRUPT_HARD) && - (EFLAGS(env) & IF_MASK) && !(info & VMCS_INTR_VALID)) { - int line = cpu_get_pic_interrupt(&x86cpu->env); - cpu_state->interrupt_request &= ~CPU_INTERRUPT_HARD; + (cs->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK) && !(info & VMCS_INTR_VALID)) { + int line = cpu_get_pic_interrupt(env); + cs->interrupt_request &= ~CPU_INTERRUPT_HARD; if (line >= 0) { - wvmcs(cpu_state->hvf_fd, VMCS_ENTRY_INTR_INFO, line | + wvmcs(cs->accel->fd, VMCS_ENTRY_INTR_INFO, line | VMCS_INTR_VALID | VMCS_INTR_T_HWINTR); } } - if (cpu_state->interrupt_request & CPU_INTERRUPT_HARD) { - vmx_set_int_window_exiting(cpu_state); + if (cs->interrupt_request & CPU_INTERRUPT_HARD) { + vmx_set_int_window_exiting(cs); } - return (cpu_state->interrupt_request + return (cs->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)); } -int hvf_process_events(CPUState *cpu_state) +int hvf_process_events(CPUState *cs) { - X86CPU *cpu = X86_CPU(cpu_state); + X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - EFLAGS(env) = rreg(cpu_state->hvf_fd, HV_X86_RFLAGS); + if (!cs->vcpu_dirty) { + /* light weight sync for CPU_INTERRUPT_HARD and IF_MASK */ + env->eflags = rreg(cs->accel->fd, HV_X86_RFLAGS); + } - if (cpu_state->interrupt_request & CPU_INTERRUPT_INIT) { - hvf_cpu_synchronize_state(cpu_state); + if (cs->interrupt_request & CPU_INTERRUPT_INIT) { + cpu_synchronize_state(cs); do_cpu_init(cpu); } - if (cpu_state->interrupt_request & CPU_INTERRUPT_POLL) { - cpu_state->interrupt_request &= ~CPU_INTERRUPT_POLL; + if (cs->interrupt_request & CPU_INTERRUPT_POLL) { + cs->interrupt_request &= ~CPU_INTERRUPT_POLL; apic_poll_irq(cpu->apic_state); } - if (((cpu_state->interrupt_request & CPU_INTERRUPT_HARD) && - (EFLAGS(env) & IF_MASK)) || - (cpu_state->interrupt_request & CPU_INTERRUPT_NMI)) { - cpu_state->halted = 0; + if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cs->halted = 0; } - if (cpu_state->interrupt_request & CPU_INTERRUPT_SIPI) { - hvf_cpu_synchronize_state(cpu_state); + if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + cpu_synchronize_state(cs); do_cpu_sipi(cpu); } - if (cpu_state->interrupt_request & CPU_INTERRUPT_TPR) { - cpu_state->interrupt_request &= ~CPU_INTERRUPT_TPR; - hvf_cpu_synchronize_state(cpu_state); + if (cs->interrupt_request & CPU_INTERRUPT_TPR) { + cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + cpu_synchronize_state(cs); apic_handle_tpr_access_report(cpu->apic_state, env->eip, env->tpr_access_type); } - return cpu_state->halted; + return cs->halted; } diff --git a/target/i386/hvf/x86hvf.h b/target/i386/hvf/x86hvf.h index 79539f7282..423a89b6ad 100644 --- a/target/i386/hvf/x86hvf.h +++ b/target/i386/hvf/x86hvf.h @@ -5,7 +5,7 @@ * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,20 +20,15 @@ #include "cpu.h" #include "x86_descr.h" -int hvf_process_events(CPUState *); -int hvf_put_registers(CPUState *); -int hvf_get_registers(CPUState *); -bool hvf_inject_interrupts(CPUState *); -void hvf_set_segment(struct CPUState *cpu, struct vmx_segment *vmx_seg, +int hvf_process_events(CPUState *cs); +bool hvf_inject_interrupts(CPUState *cs); +void hvf_set_segment(CPUState *cs, struct vmx_segment *vmx_seg, SegmentCache *qseg, bool is_tr); void hvf_get_segment(SegmentCache *qseg, struct vmx_segment *vmx_seg); -void hvf_put_xsave(CPUState *cpu_state); -void hvf_put_segments(CPUState *cpu_state); -void hvf_put_msrs(CPUState *cpu_state); -void hvf_get_xsave(CPUState *cpu_state); -void hvf_get_msrs(CPUState *cpu_state); -void vmx_clear_int_window_exiting(CPUState *cpu); -void hvf_get_segments(CPUState *cpu_state); -void vmx_update_tpr(CPUState *cpu); -void hvf_cpu_synchronize_state(CPUState *cpu_state); +void hvf_put_xsave(CPUState *cs); +void hvf_put_msrs(CPUState *cs); +void hvf_get_xsave(CPUState *cs); +void hvf_get_msrs(CPUState *cs); +void vmx_clear_int_window_exiting(CPUState *cs); +void vmx_update_tpr(CPUState *cs); #endif diff --git a/target/i386/hyperv.c b/target/i386/hyperv.c deleted file mode 100644 index 3065d765ed..0000000000 --- a/target/i386/hyperv.c +++ /dev/null @@ -1,151 +0,0 @@ -/* - * QEMU KVM Hyper-V support - * - * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> - * - * Authors: - * Andrey Smetanin <asmetanin@virtuozzo.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qemu/main-loop.h" -#include "hyperv.h" -#include "hyperv-proto.h" - -uint32_t hyperv_vp_index(X86CPU *cpu) -{ - return CPU(cpu)->cpu_index; -} - -X86CPU *hyperv_find_vcpu(uint32_t vp_index) -{ - return X86_CPU(qemu_get_cpu(vp_index)); -} - -int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) -{ - CPUX86State *env = &cpu->env; - - switch (exit->type) { - case KVM_EXIT_HYPERV_SYNIC: - if (!cpu->hyperv_synic) { - return -1; - } - - /* - * For now just track changes in SynIC control and msg/evt pages msr's. - * When SynIC messaging/events processing will be added in future - * here we will do messages queues flushing and pages remapping. - */ - switch (exit->u.synic.msr) { - case HV_X64_MSR_SCONTROL: - env->msr_hv_synic_control = exit->u.synic.control; - break; - case HV_X64_MSR_SIMP: - env->msr_hv_synic_msg_page = exit->u.synic.msg_page; - break; - case HV_X64_MSR_SIEFP: - env->msr_hv_synic_evt_page = exit->u.synic.evt_page; - break; - default: - return -1; - } - return 0; - case KVM_EXIT_HYPERV_HCALL: { - uint16_t code; - - code = exit->u.hcall.input & 0xffff; - switch (code) { - case HV_POST_MESSAGE: - case HV_SIGNAL_EVENT: - default: - exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; - return 0; - } - } - default: - return -1; - } -} - -static void kvm_hv_sint_ack_handler(EventNotifier *notifier) -{ - HvSintRoute *sint_route = container_of(notifier, HvSintRoute, - sint_ack_notifier); - event_notifier_test_and_clear(notifier); - if (sint_route->sint_ack_clb) { - sint_route->sint_ack_clb(sint_route); - } -} - -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb) -{ - HvSintRoute *sint_route; - int r, gsi; - - sint_route = g_malloc0(sizeof(*sint_route)); - r = event_notifier_init(&sint_route->sint_set_notifier, false); - if (r) { - goto err; - } - - r = event_notifier_init(&sint_route->sint_ack_notifier, false); - if (r) { - goto err_sint_set_notifier; - } - - event_notifier_set_handler(&sint_route->sint_ack_notifier, - kvm_hv_sint_ack_handler); - - gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint); - if (gsi < 0) { - goto err_gsi; - } - - r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - &sint_route->sint_ack_notifier, gsi); - if (r) { - goto err_irqfd; - } - sint_route->gsi = gsi; - sint_route->sint_ack_clb = sint_ack_clb; - sint_route->vp_index = vp_index; - sint_route->sint = sint; - - return sint_route; - -err_irqfd: - kvm_irqchip_release_virq(kvm_state, gsi); -err_gsi: - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); -err_sint_set_notifier: - event_notifier_cleanup(&sint_route->sint_set_notifier); -err: - g_free(sint_route); - - return NULL; -} - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route) -{ - kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state, - &sint_route->sint_set_notifier, - sint_route->gsi); - kvm_irqchip_release_virq(kvm_state, sint_route->gsi); - event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL); - event_notifier_cleanup(&sint_route->sint_ack_notifier); - event_notifier_cleanup(&sint_route->sint_set_notifier); - g_free(sint_route); -} - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route) -{ - return event_notifier_set(&sint_route->sint_set_notifier); -} diff --git a/target/i386/hyperv.h b/target/i386/hyperv.h deleted file mode 100644 index 00c9b454bb..0000000000 --- a/target/i386/hyperv.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * QEMU KVM Hyper-V support - * - * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> - * - * Authors: - * Andrey Smetanin <asmetanin@virtuozzo.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef TARGET_I386_HYPERV_H -#define TARGET_I386_HYPERV_H - -#include "cpu.h" -#include "sysemu/kvm.h" -#include "qemu/event_notifier.h" - -typedef struct HvSintRoute HvSintRoute; -typedef void (*HvSintAckClb)(HvSintRoute *sint_route); - -struct HvSintRoute { - uint32_t sint; - uint32_t vp_index; - int gsi; - EventNotifier sint_set_notifier; - EventNotifier sint_ack_notifier; - HvSintAckClb sint_ack_clb; -}; - -int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); - -HvSintRoute *kvm_hv_sint_route_create(uint32_t vp_index, uint32_t sint, - HvSintAckClb sint_ack_clb); - -void kvm_hv_sint_route_destroy(HvSintRoute *sint_route); - -int kvm_hv_sint_route_set_sint(HvSintRoute *sint_route); - -uint32_t hyperv_vp_index(X86CPU *cpu); -X86CPU *hyperv_find_vcpu(uint32_t vp_index); - -#endif diff --git a/target/i386/kvm-stub.c b/target/i386/kvm-stub.c deleted file mode 100644 index e7a673e5db..0000000000 --- a/target/i386/kvm-stub.c +++ /dev/null @@ -1,47 +0,0 @@ -/* - * QEMU KVM x86 specific function stubs - * - * Copyright Linaro Limited 2012 - * - * Author: Peter Maydell <peter.maydell@linaro.org> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "cpu.h" -#include "kvm_i386.h" - -bool kvm_allows_irq0_override(void) -{ - return 1; -} - -#ifndef __OPTIMIZE__ -bool kvm_has_smm(void) -{ - return 1; -} - -bool kvm_enable_x2apic(void) -{ - return false; -} - -/* This function is only called inside conditionals which we - * rely on the compiler to optimize out when CONFIG_KVM is not - * defined. - */ -uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, - uint32_t index, int reg) -{ - abort(); -} -#endif - -bool kvm_hv_vpindex_settable(void) -{ - return false; -} diff --git a/target/i386/kvm.c b/target/i386/kvm.c deleted file mode 100644 index dc4047b02f..0000000000 --- a/target/i386/kvm.c +++ /dev/null @@ -1,3784 +0,0 @@ -/* - * QEMU KVM support - * - * Copyright (C) 2006-2008 Qumranet Technologies - * Copyright IBM, Corp. 2008 - * - * Authors: - * Anthony Liguori <aliguori@us.ibm.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qapi/error.h" -#include <sys/ioctl.h> -#include <sys/utsname.h> - -#include <linux/kvm.h> -#include "standard-headers/asm-x86/kvm_para.h" - -#include "qemu-common.h" -#include "cpu.h" -#include "sysemu/sysemu.h" -#include "sysemu/hw_accel.h" -#include "sysemu/kvm_int.h" -#include "kvm_i386.h" -#include "hyperv.h" -#include "hyperv-proto.h" - -#include "exec/gdbstub.h" -#include "qemu/host-utils.h" -#include "qemu/config-file.h" -#include "qemu/error-report.h" -#include "hw/i386/pc.h" -#include "hw/i386/apic.h" -#include "hw/i386/apic_internal.h" -#include "hw/i386/apic-msidef.h" -#include "hw/i386/intel_iommu.h" -#include "hw/i386/x86-iommu.h" - -#include "hw/pci/pci.h" -#include "hw/pci/msi.h" -#include "hw/pci/msix.h" -#include "migration/blocker.h" -#include "exec/memattrs.h" -#include "trace.h" - -//#define DEBUG_KVM - -#ifdef DEBUG_KVM -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - -#define MSR_KVM_WALL_CLOCK 0x11 -#define MSR_KVM_SYSTEM_TIME 0x12 - -/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus - * 255 kvm_msr_entry structs */ -#define MSR_BUF_SIZE 4096 - -const KVMCapabilityInfo kvm_arch_required_capabilities[] = { - KVM_CAP_INFO(SET_TSS_ADDR), - KVM_CAP_INFO(EXT_CPUID), - KVM_CAP_INFO(MP_STATE), - KVM_CAP_LAST_INFO -}; - -static bool has_msr_star; -static bool has_msr_hsave_pa; -static bool has_msr_tsc_aux; -static bool has_msr_tsc_adjust; -static bool has_msr_tsc_deadline; -static bool has_msr_feature_control; -static bool has_msr_misc_enable; -static bool has_msr_smbase; -static bool has_msr_bndcfgs; -static int lm_capable_kernel; -static bool has_msr_hv_hypercall; -static bool has_msr_hv_crash; -static bool has_msr_hv_reset; -static bool has_msr_hv_vpindex; -static bool hv_vpindex_settable; -static bool has_msr_hv_runtime; -static bool has_msr_hv_synic; -static bool has_msr_hv_stimer; -static bool has_msr_hv_frequencies; -static bool has_msr_hv_reenlightenment; -static bool has_msr_xss; -static bool has_msr_spec_ctrl; -static bool has_msr_virt_ssbd; -static bool has_msr_smi_count; - -static uint32_t has_architectural_pmu_version; -static uint32_t num_architectural_pmu_gp_counters; -static uint32_t num_architectural_pmu_fixed_counters; - -static int has_xsave; -static int has_xcrs; -static int has_pit_state2; - -static bool has_msr_mcg_ext_ctl; - -static struct kvm_cpuid2 *cpuid_cache; - -int kvm_has_pit_state2(void) -{ - return has_pit_state2; -} - -bool kvm_has_smm(void) -{ - return kvm_check_extension(kvm_state, KVM_CAP_X86_SMM); -} - -bool kvm_has_adjust_clock_stable(void) -{ - int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); - - return (ret == KVM_CLOCK_TSC_STABLE); -} - -bool kvm_allows_irq0_override(void) -{ - return !kvm_irqchip_in_kernel() || kvm_has_gsi_routing(); -} - -static bool kvm_x2apic_api_set_flags(uint64_t flags) -{ - KVMState *s = KVM_STATE(current_machine->accelerator); - - return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags); -} - -#define MEMORIZE(fn, _result) \ - ({ \ - static bool _memorized; \ - \ - if (_memorized) { \ - return _result; \ - } \ - _memorized = true; \ - _result = fn; \ - }) - -static bool has_x2apic_api; - -bool kvm_has_x2apic_api(void) -{ - return has_x2apic_api; -} - -bool kvm_enable_x2apic(void) -{ - return MEMORIZE( - kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS | - KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK), - has_x2apic_api); -} - -bool kvm_hv_vpindex_settable(void) -{ - return hv_vpindex_settable; -} - -static int kvm_get_tsc(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data; - int ret; - - if (env->tsc_valid) { - return 0; - } - - msr_data.info.nmsrs = 1; - msr_data.entries[0].index = MSR_IA32_TSC; - env->tsc_valid = !runstate_is_running(); - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); - if (ret < 0) { - return ret; - } - - assert(ret == 1); - env->tsc = msr_data.entries[0].data; - return 0; -} - -static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg) -{ - kvm_get_tsc(cpu); -} - -void kvm_synchronize_all_tsc(void) -{ - CPUState *cpu; - - if (kvm_enabled()) { - CPU_FOREACH(cpu) { - run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); - } - } -} - -static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) -{ - struct kvm_cpuid2 *cpuid; - int r, size; - - size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); - cpuid = g_malloc0(size); - cpuid->nent = max; - r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); - if (r == 0 && cpuid->nent >= max) { - r = -E2BIG; - } - if (r < 0) { - if (r == -E2BIG) { - g_free(cpuid); - return NULL; - } else { - fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", - strerror(-r)); - exit(1); - } - } - return cpuid; -} - -/* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough - * for all entries. - */ -static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s) -{ - struct kvm_cpuid2 *cpuid; - int max = 1; - - if (cpuid_cache != NULL) { - return cpuid_cache; - } - while ((cpuid = try_get_cpuid(s, max)) == NULL) { - max *= 2; - } - cpuid_cache = cpuid; - return cpuid; -} - -static const struct kvm_para_features { - int cap; - int feature; -} para_features[] = { - { KVM_CAP_CLOCKSOURCE, KVM_FEATURE_CLOCKSOURCE }, - { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, - { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, - { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, -}; - -static int get_para_features(KVMState *s) -{ - int i, features = 0; - - for (i = 0; i < ARRAY_SIZE(para_features); i++) { - if (kvm_check_extension(s, para_features[i].cap)) { - features |= (1 << para_features[i].feature); - } - } - - return features; -} - -static bool host_tsx_blacklisted(void) -{ - int family, model, stepping;\ - char vendor[CPUID_VENDOR_SZ + 1]; - - host_vendor_fms(vendor, &family, &model, &stepping); - - /* Check if we are running on a Haswell host known to have broken TSX */ - return !strcmp(vendor, CPUID_VENDOR_INTEL) && - (family == 6) && - ((model == 63 && stepping < 4) || - model == 60 || model == 69 || model == 70); -} - -/* Returns the value for a specific register on the cpuid entry - */ -static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) -{ - uint32_t ret = 0; - switch (reg) { - case R_EAX: - ret = entry->eax; - break; - case R_EBX: - ret = entry->ebx; - break; - case R_ECX: - ret = entry->ecx; - break; - case R_EDX: - ret = entry->edx; - break; - } - return ret; -} - -/* Find matching entry for function/index on kvm_cpuid2 struct - */ -static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, - uint32_t function, - uint32_t index) -{ - int i; - for (i = 0; i < cpuid->nent; ++i) { - if (cpuid->entries[i].function == function && - cpuid->entries[i].index == index) { - return &cpuid->entries[i]; - } - } - /* not found: */ - return NULL; -} - -uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, - uint32_t index, int reg) -{ - struct kvm_cpuid2 *cpuid; - uint32_t ret = 0; - uint32_t cpuid_1_edx; - bool found = false; - - cpuid = get_supported_cpuid(s); - - struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index); - if (entry) { - found = true; - ret = cpuid_entry_get_reg(entry, reg); - } - - /* Fixups for the data returned by KVM, below */ - - if (function == 1 && reg == R_EDX) { - /* KVM before 2.6.30 misreports the following features */ - ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; - } else if (function == 1 && reg == R_ECX) { - /* We can set the hypervisor flag, even if KVM does not return it on - * GET_SUPPORTED_CPUID - */ - ret |= CPUID_EXT_HYPERVISOR; - /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it - * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER, - * and the irqchip is in the kernel. - */ - if (kvm_irqchip_in_kernel() && - kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { - ret |= CPUID_EXT_TSC_DEADLINE_TIMER; - } - - /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled - * without the in-kernel irqchip - */ - if (!kvm_irqchip_in_kernel()) { - ret &= ~CPUID_EXT_X2APIC; - } - - if (enable_cpu_pm) { - int disable_exits = kvm_check_extension(s, - KVM_CAP_X86_DISABLE_EXITS); - - if (disable_exits & KVM_X86_DISABLE_EXITS_MWAIT) { - ret |= CPUID_EXT_MONITOR; - } - } - } else if (function == 6 && reg == R_EAX) { - ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ - } else if (function == 7 && index == 0 && reg == R_EBX) { - if (host_tsx_blacklisted()) { - ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); - } - } else if (function == 0x80000001 && reg == R_ECX) { - /* - * It's safe to enable TOPOEXT even if it's not returned by - * GET_SUPPORTED_CPUID. Unconditionally enabling TOPOEXT here allows - * us to keep CPU models including TOPOEXT runnable on older kernels. - */ - ret |= CPUID_EXT3_TOPOEXT; - } else if (function == 0x80000001 && reg == R_EDX) { - /* On Intel, kvm returns cpuid according to the Intel spec, - * so add missing bits according to the AMD spec: - */ - cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); - ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; - } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { - /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't - * be enabled without the in-kernel irqchip - */ - if (!kvm_irqchip_in_kernel()) { - ret &= ~(1U << KVM_FEATURE_PV_UNHALT); - } - } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { - ret |= 1U << KVM_HINTS_REALTIME; - found = 1; - } - - /* fallback for older kernels */ - if ((function == KVM_CPUID_FEATURES) && !found) { - ret = get_para_features(s); - } - - return ret; -} - -typedef struct HWPoisonPage { - ram_addr_t ram_addr; - QLIST_ENTRY(HWPoisonPage) list; -} HWPoisonPage; - -static QLIST_HEAD(, HWPoisonPage) hwpoison_page_list = - QLIST_HEAD_INITIALIZER(hwpoison_page_list); - -static void kvm_unpoison_all(void *param) -{ - HWPoisonPage *page, *next_page; - - QLIST_FOREACH_SAFE(page, &hwpoison_page_list, list, next_page) { - QLIST_REMOVE(page, list); - qemu_ram_remap(page->ram_addr, TARGET_PAGE_SIZE); - g_free(page); - } -} - -static void kvm_hwpoison_page_add(ram_addr_t ram_addr) -{ - HWPoisonPage *page; - - QLIST_FOREACH(page, &hwpoison_page_list, list) { - if (page->ram_addr == ram_addr) { - return; - } - } - page = g_new(HWPoisonPage, 1); - page->ram_addr = ram_addr; - QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); -} - -static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, - int *max_banks) -{ - int r; - - r = kvm_check_extension(s, KVM_CAP_MCE); - if (r > 0) { - *max_banks = r; - return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); - } - return -ENOSYS; -} - -static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | - MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; - uint64_t mcg_status = MCG_STATUS_MCIP; - int flags = 0; - - if (code == BUS_MCEERR_AR) { - status |= MCI_STATUS_AR | 0x134; - mcg_status |= MCG_STATUS_EIPV; - } else { - status |= 0xc0; - mcg_status |= MCG_STATUS_RIPV; - } - - flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; - /* We need to read back the value of MSR_EXT_MCG_CTL that was set by the - * guest kernel back into env->mcg_ext_ctl. - */ - cpu_synchronize_state(cs); - if (env->mcg_ext_ctl & MCG_EXT_CTL_LMCE_EN) { - mcg_status |= MCG_STATUS_LMCE; - flags = 0; - } - - cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, - (MCM_ADDR_PHYS << 6) | 0xc, flags); -} - -static void hardware_memory_error(void) -{ - fprintf(stderr, "Hardware memory error!\n"); - exit(1); -} - -void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) -{ - X86CPU *cpu = X86_CPU(c); - CPUX86State *env = &cpu->env; - ram_addr_t ram_addr; - hwaddr paddr; - - /* If we get an action required MCE, it has been injected by KVM - * while the VM was running. An action optional MCE instead should - * be coming from the main thread, which qemu_init_sigbus identifies - * as the "early kill" thread. - */ - assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); - - if ((env->mcg_cap & MCG_SER_P) && addr) { - ram_addr = qemu_ram_addr_from_host(addr); - if (ram_addr != RAM_ADDR_INVALID && - kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { - kvm_hwpoison_page_add(ram_addr); - kvm_mce_inject(cpu, paddr, code); - return; - } - - fprintf(stderr, "Hardware memory error for memory used by " - "QEMU itself instead of guest system!\n"); - } - - if (code == BUS_MCEERR_AR) { - hardware_memory_error(); - } - - /* Hope we are lucky for AO MCE */ -} - -static int kvm_inject_mce_oldstyle(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - - if (!kvm_has_vcpu_events() && env->exception_injected == EXCP12_MCHK) { - unsigned int bank, bank_num = env->mcg_cap & 0xff; - struct kvm_x86_mce mce; - - env->exception_injected = -1; - - /* - * There must be at least one bank in use if an MCE is pending. - * Find it and use its values for the event injection. - */ - for (bank = 0; bank < bank_num; bank++) { - if (env->mce_banks[bank * 4 + 1] & MCI_STATUS_VAL) { - break; - } - } - assert(bank < bank_num); - - mce.bank = bank; - mce.status = env->mce_banks[bank * 4 + 1]; - mce.mcg_status = env->mcg_status; - mce.addr = env->mce_banks[bank * 4 + 2]; - mce.misc = env->mce_banks[bank * 4 + 3]; - - return kvm_vcpu_ioctl(CPU(cpu), KVM_X86_SET_MCE, &mce); - } - return 0; -} - -static void cpu_update_state(void *opaque, int running, RunState state) -{ - CPUX86State *env = opaque; - - if (running) { - env->tsc_valid = false; - } -} - -unsigned long kvm_arch_vcpu_id(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - return cpu->apic_id; -} - -#ifndef KVM_CPUID_SIGNATURE_NEXT -#define KVM_CPUID_SIGNATURE_NEXT 0x40000100 -#endif - -static bool hyperv_hypercall_available(X86CPU *cpu) -{ - return cpu->hyperv_vapic || - (cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_RETRY); -} - -static bool hyperv_enabled(X86CPU *cpu) -{ - CPUState *cs = CPU(cpu); - return kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0 && - (hyperv_hypercall_available(cpu) || - cpu->hyperv_time || - cpu->hyperv_relaxed_timing || - cpu->hyperv_crash || - cpu->hyperv_reset || - cpu->hyperv_vpindex || - cpu->hyperv_runtime || - cpu->hyperv_synic || - cpu->hyperv_stimer || - cpu->hyperv_reenlightenment || - cpu->hyperv_tlbflush); -} - -static int kvm_arch_set_tsc_khz(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - int r; - - if (!env->tsc_khz) { - return 0; - } - - r = kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL) ? - kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) : - -ENOTSUP; - if (r < 0) { - /* When KVM_SET_TSC_KHZ fails, it's an error only if the current - * TSC frequency doesn't match the one we want. - */ - int cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? - kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : - -ENOTSUP; - if (cur_freq <= 0 || cur_freq != env->tsc_khz) { - warn_report("TSC frequency mismatch between " - "VM (%" PRId64 " kHz) and host (%d kHz), " - "and TSC scaling unavailable", - env->tsc_khz, cur_freq); - return r; - } - } - - return 0; -} - -static bool tsc_is_stable_and_known(CPUX86State *env) -{ - if (!env->tsc_khz) { - return false; - } - return (env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) - || env->user_tsc_khz; -} - -static int hyperv_handle_properties(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - if (cpu->hyperv_relaxed_timing) { - env->features[FEAT_HYPERV_EAX] |= HV_HYPERCALL_AVAILABLE; - } - if (cpu->hyperv_vapic) { - env->features[FEAT_HYPERV_EAX] |= HV_HYPERCALL_AVAILABLE; - env->features[FEAT_HYPERV_EAX] |= HV_APIC_ACCESS_AVAILABLE; - } - if (cpu->hyperv_time) { - if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) <= 0) { - fprintf(stderr, "Hyper-V clocksources " - "(requested by 'hv-time' cpu flag) " - "are not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_HYPERCALL_AVAILABLE; - env->features[FEAT_HYPERV_EAX] |= HV_TIME_REF_COUNT_AVAILABLE; - env->features[FEAT_HYPERV_EAX] |= HV_REFERENCE_TSC_AVAILABLE; - } - if (cpu->hyperv_frequencies) { - if (!has_msr_hv_frequencies) { - fprintf(stderr, "Hyper-V frequency MSRs " - "(requested by 'hv-frequencies' cpu flag) " - "are not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_FREQUENCY_MSRS; - env->features[FEAT_HYPERV_EDX] |= HV_FREQUENCY_MSRS_AVAILABLE; - } - if (cpu->hyperv_crash) { - if (!has_msr_hv_crash) { - fprintf(stderr, "Hyper-V crash MSRs " - "(requested by 'hv-crash' cpu flag) " - "are not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EDX] |= HV_GUEST_CRASH_MSR_AVAILABLE; - } - if (cpu->hyperv_reenlightenment) { - if (!has_msr_hv_reenlightenment) { - fprintf(stderr, - "Hyper-V Reenlightenment MSRs " - "(requested by 'hv-reenlightenment' cpu flag) " - "are not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; - } - env->features[FEAT_HYPERV_EDX] |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; - if (cpu->hyperv_reset) { - if (!has_msr_hv_reset) { - fprintf(stderr, "Hyper-V reset MSR " - "(requested by 'hv-reset' cpu flag) " - "is not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_RESET_AVAILABLE; - } - if (cpu->hyperv_vpindex) { - if (!has_msr_hv_vpindex) { - fprintf(stderr, "Hyper-V VP_INDEX MSR " - "(requested by 'hv-vpindex' cpu flag) " - "is not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_VP_INDEX_AVAILABLE; - } - if (cpu->hyperv_runtime) { - if (!has_msr_hv_runtime) { - fprintf(stderr, "Hyper-V VP_RUNTIME MSR " - "(requested by 'hv-runtime' cpu flag) " - "is not supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_VP_RUNTIME_AVAILABLE; - } - if (cpu->hyperv_synic) { - if (!has_msr_hv_synic || - kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_SYNIC, 0)) { - fprintf(stderr, "Hyper-V SynIC is not supported by kernel\n"); - return -ENOSYS; - } - - env->features[FEAT_HYPERV_EAX] |= HV_SYNIC_AVAILABLE; - } - if (cpu->hyperv_stimer) { - if (!has_msr_hv_stimer) { - fprintf(stderr, "Hyper-V timers aren't supported by kernel\n"); - return -ENOSYS; - } - env->features[FEAT_HYPERV_EAX] |= HV_SYNTIMERS_AVAILABLE; - } - return 0; -} - -static int hyperv_init_vcpu(X86CPU *cpu) -{ - if (cpu->hyperv_vpindex && !hv_vpindex_settable) { - /* - * the kernel doesn't support setting vp_index; assert that its value - * is in sync - */ - int ret; - struct { - struct kvm_msrs info; - struct kvm_msr_entry entries[1]; - } msr_data = { - .info.nmsrs = 1, - .entries[0].index = HV_X64_MSR_VP_INDEX, - }; - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); - if (ret < 0) { - return ret; - } - assert(ret == 1); - - if (msr_data.entries[0].data != hyperv_vp_index(cpu)) { - error_report("kernel's vp_index != QEMU's vp_index"); - return -ENXIO; - } - } - - return 0; -} - -static Error *invtsc_mig_blocker; - -#define KVM_MAX_CPUID_ENTRIES 100 - -int kvm_arch_init_vcpu(CPUState *cs) -{ - struct { - struct kvm_cpuid2 cpuid; - struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; - } QEMU_PACKED cpuid_data; - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - uint32_t limit, i, j, cpuid_i; - uint32_t unused; - struct kvm_cpuid_entry2 *c; - uint32_t signature[3]; - int kvm_base = KVM_CPUID_SIGNATURE; - int r; - Error *local_err = NULL; - - memset(&cpuid_data, 0, sizeof(cpuid_data)); - - cpuid_i = 0; - - r = kvm_arch_set_tsc_khz(cs); - if (r < 0) { - goto fail; - } - - /* vcpu's TSC frequency is either specified by user, or following - * the value used by KVM if the former is not present. In the - * latter case, we query it from KVM and record in env->tsc_khz, - * so that vcpu's TSC frequency can be migrated later via this field. - */ - if (!env->tsc_khz) { - r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? - kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : - -ENOTSUP; - if (r > 0) { - env->tsc_khz = r; - } - } - - /* Paravirtualization CPUIDs */ - if (hyperv_enabled(cpu)) { - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; - if (!cpu->hyperv_vendor_id) { - memcpy(signature, "Microsoft Hv", 12); - } else { - size_t len = strlen(cpu->hyperv_vendor_id); - - if (len > 12) { - error_report("hv-vendor-id truncated to 12 characters"); - len = 12; - } - memset(signature, 0, 12); - memcpy(signature, cpu->hyperv_vendor_id, len); - } - c->eax = HV_CPUID_MIN; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_INTERFACE; - memcpy(signature, "Hv#1\0\0\0\0\0\0\0\0", 12); - c->eax = signature[0]; - c->ebx = 0; - c->ecx = 0; - c->edx = 0; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_VERSION; - c->eax = 0x00001bbc; - c->ebx = 0x00060001; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_FEATURES; - r = hyperv_handle_properties(cs); - if (r) { - return r; - } - c->eax = env->features[FEAT_HYPERV_EAX]; - c->ebx = env->features[FEAT_HYPERV_EBX]; - c->edx = env->features[FEAT_HYPERV_EDX]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_ENLIGHTMENT_INFO; - if (cpu->hyperv_relaxed_timing) { - c->eax |= HV_RELAXED_TIMING_RECOMMENDED; - } - if (cpu->hyperv_vapic) { - c->eax |= HV_APIC_ACCESS_RECOMMENDED; - } - if (cpu->hyperv_tlbflush) { - if (kvm_check_extension(cs->kvm_state, - KVM_CAP_HYPERV_TLBFLUSH) <= 0) { - fprintf(stderr, "Hyper-V TLB flush support " - "(requested by 'hv-tlbflush' cpu flag) " - " is not supported by kernel\n"); - return -ENOSYS; - } - c->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; - c->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; - } - - c->ebx = cpu->hyperv_spinlock_attempts; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = HV_CPUID_IMPLEMENT_LIMITS; - - c->eax = cpu->hv_max_vps; - c->ebx = 0x40; - - kvm_base = KVM_CPUID_SIGNATURE_NEXT; - has_msr_hv_hypercall = true; - } - - if (cpu->expose_kvm) { - memcpy(signature, "KVMKVMKVM\0\0\0", 12); - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | kvm_base; - c->eax = KVM_CPUID_FEATURES | kvm_base; - c->ebx = signature[0]; - c->ecx = signature[1]; - c->edx = signature[2]; - - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_FEATURES | kvm_base; - c->eax = env->features[FEAT_KVM]; - c->edx = env->features[FEAT_KVM_HINTS]; - } - - cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); - - for (i = 0; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported level value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - switch (i) { - case 2: { - /* Keep reading function 2 till all the input is received */ - int times; - - c->function = i; - c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | - KVM_CPUID_FLAG_STATE_READ_NEXT; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - times = c->eax & 0xff; - - for (j = 1; j < times; ++j) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:2):eax & 0xf = 0x%x\n", times); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - c->function = i; - c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - break; - } - case 4: - case 0xb: - case 0xd: - for (j = 0; ; j++) { - if (i == 0xd && j == 64) { - break; - } - c->function = i; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - c->index = j; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - - if (i == 4 && c->eax == 0) { - break; - } - if (i == 0xb && !(c->ecx & 0xff00)) { - break; - } - if (i == 0xd && c->eax == 0) { - continue; - } - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - } - break; - case 0x14: { - uint32_t times; - - c->function = i; - c->index = 0; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - times = c->eax; - - for (j = 1; j <= times; ++j) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x14,ecx:0x%x)\n", j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - c->function = i; - c->index = j; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - break; - } - default: - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - break; - } - } - - if (limit >= 0x0a) { - uint32_t eax, edx; - - cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx); - - has_architectural_pmu_version = eax & 0xff; - if (has_architectural_pmu_version > 0) { - num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8; - - /* Shouldn't be more than 32, since that's the number of bits - * available in EBX to tell us _which_ counters are available. - * Play it safe. - */ - if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) { - num_architectural_pmu_gp_counters = MAX_GP_COUNTERS; - } - - if (has_architectural_pmu_version > 1) { - num_architectural_pmu_fixed_counters = edx & 0x1f; - - if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) { - num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS; - } - } - } - } - - cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); - - for (i = 0x80000000; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - switch (i) { - case 0x8000001d: - /* Query for all AMD cache information leaves */ - for (j = 0; ; j++) { - c->function = i; - c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; - c->index = j; - cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); - - if (c->eax == 0) { - break; - } - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "cpuid_data is full, no space for " - "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - } - break; - default: - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - break; - } - } - - /* Call Centaur's CPUID instructions they are supported. */ - if (env->cpuid_xlevel2 > 0) { - cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); - - for (i = 0xC0000000; i <= limit; i++) { - if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { - fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); - abort(); - } - c = &cpuid_data.entries[cpuid_i++]; - - c->function = i; - c->flags = 0; - cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); - } - } - - cpuid_data.cpuid.nent = cpuid_i; - - if (((env->cpuid_version >> 8)&0xF) >= 6 - && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == - (CPUID_MCE | CPUID_MCA) - && kvm_check_extension(cs->kvm_state, KVM_CAP_MCE) > 0) { - uint64_t mcg_cap, unsupported_caps; - int banks; - int ret; - - ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks); - if (ret < 0) { - fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret)); - return ret; - } - - if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) { - error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)", - (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks); - return -ENOTSUP; - } - - unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK); - if (unsupported_caps) { - if (unsupported_caps & MCG_LMCE_P) { - error_report("kvm: LMCE not supported"); - return -ENOTSUP; - } - warn_report("Unsupported MCG_CAP bits: 0x%" PRIx64, - unsupported_caps); - } - - env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK; - ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap); - if (ret < 0) { - fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret)); - return ret; - } - } - - qemu_add_vm_change_state_handler(cpu_update_state, env); - - c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); - if (c) { - has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || - !!(c->ecx & CPUID_EXT_SMX); - } - - if (env->mcg_cap & MCG_LMCE_P) { - has_msr_mcg_ext_ctl = has_msr_feature_control = true; - } - - if (!env->user_tsc_khz) { - if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && - invtsc_mig_blocker == NULL) { - /* for migration */ - error_setg(&invtsc_mig_blocker, - "State blocked by non-migratable CPU device" - " (invtsc flag)"); - r = migrate_add_blocker(invtsc_mig_blocker, &local_err); - if (local_err) { - error_report_err(local_err); - error_free(invtsc_mig_blocker); - goto fail; - } - /* for savevm */ - vmstate_x86_cpu.unmigratable = 1; - } - } - - if (cpu->vmware_cpuid_freq - /* Guests depend on 0x40000000 to detect this feature, so only expose - * it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */ - && cpu->expose_kvm - && kvm_base == KVM_CPUID_SIGNATURE - /* TSC clock must be stable and known for this feature. */ - && tsc_is_stable_and_known(env)) { - - c = &cpuid_data.entries[cpuid_i++]; - c->function = KVM_CPUID_SIGNATURE | 0x10; - c->eax = env->tsc_khz; - /* LAPIC resolution of 1ns (freq: 1GHz) is hardcoded in KVM's - * APIC_BUS_CYCLE_NS */ - c->ebx = 1000000; - c->ecx = c->edx = 0; - - c = cpuid_find_entry(&cpuid_data.cpuid, kvm_base, 0); - c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); - } - - cpuid_data.cpuid.nent = cpuid_i; - - cpuid_data.cpuid.padding = 0; - r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); - if (r) { - goto fail; - } - - if (has_xsave) { - env->xsave_buf = qemu_memalign(4096, sizeof(struct kvm_xsave)); - } - cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); - - if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { - has_msr_tsc_aux = false; - } - - r = hyperv_init_vcpu(cpu); - if (r) { - goto fail; - } - - return 0; - - fail: - migrate_del_blocker(invtsc_mig_blocker); - return r; -} - -void kvm_arch_reset_vcpu(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - - env->xcr0 = 1; - if (kvm_irqchip_in_kernel()) { - env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE : - KVM_MP_STATE_UNINITIALIZED; - } else { - env->mp_state = KVM_MP_STATE_RUNNABLE; - } - - if (cpu->hyperv_synic) { - int i; - for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { - env->msr_hv_synic_sint[i] = HV_SINT_MASKED; - } - } -} - -void kvm_arch_do_init_vcpu(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - - /* APs get directly into wait-for-SIPI state. */ - if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) { - env->mp_state = KVM_MP_STATE_INIT_RECEIVED; - } -} - -static int kvm_get_supported_msrs(KVMState *s) -{ - static int kvm_supported_msrs; - int ret = 0; - - /* first time */ - if (kvm_supported_msrs == 0) { - struct kvm_msr_list msr_list, *kvm_msr_list; - - kvm_supported_msrs = -1; - - /* Obtain MSR list from KVM. These are the MSRs that we must - * save/restore */ - msr_list.nmsrs = 0; - ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); - if (ret < 0 && ret != -E2BIG) { - return ret; - } - /* Old kernel modules had a bug and could write beyond the provided - memory. Allocate at least a safe amount of 1K. */ - kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) + - msr_list.nmsrs * - sizeof(msr_list.indices[0]))); - - kvm_msr_list->nmsrs = msr_list.nmsrs; - ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); - if (ret >= 0) { - int i; - - for (i = 0; i < kvm_msr_list->nmsrs; i++) { - switch (kvm_msr_list->indices[i]) { - case MSR_STAR: - has_msr_star = true; - break; - case MSR_VM_HSAVE_PA: - has_msr_hsave_pa = true; - break; - case MSR_TSC_AUX: - has_msr_tsc_aux = true; - break; - case MSR_TSC_ADJUST: - has_msr_tsc_adjust = true; - break; - case MSR_IA32_TSCDEADLINE: - has_msr_tsc_deadline = true; - break; - case MSR_IA32_SMBASE: - has_msr_smbase = true; - break; - case MSR_SMI_COUNT: - has_msr_smi_count = true; - break; - case MSR_IA32_MISC_ENABLE: - has_msr_misc_enable = true; - break; - case MSR_IA32_BNDCFGS: - has_msr_bndcfgs = true; - break; - case MSR_IA32_XSS: - has_msr_xss = true; - break; - case HV_X64_MSR_CRASH_CTL: - has_msr_hv_crash = true; - break; - case HV_X64_MSR_RESET: - has_msr_hv_reset = true; - break; - case HV_X64_MSR_VP_INDEX: - has_msr_hv_vpindex = true; - break; - case HV_X64_MSR_VP_RUNTIME: - has_msr_hv_runtime = true; - break; - case HV_X64_MSR_SCONTROL: - has_msr_hv_synic = true; - break; - case HV_X64_MSR_STIMER0_CONFIG: - has_msr_hv_stimer = true; - break; - case HV_X64_MSR_TSC_FREQUENCY: - has_msr_hv_frequencies = true; - break; - case HV_X64_MSR_REENLIGHTENMENT_CONTROL: - has_msr_hv_reenlightenment = true; - break; - case MSR_IA32_SPEC_CTRL: - has_msr_spec_ctrl = true; - break; - case MSR_VIRT_SSBD: - has_msr_virt_ssbd = true; - break; - } - } - } - - g_free(kvm_msr_list); - } - - return ret; -} - -static Notifier smram_machine_done; -static KVMMemoryListener smram_listener; -static AddressSpace smram_address_space; -static MemoryRegion smram_as_root; -static MemoryRegion smram_as_mem; - -static void register_smram_listener(Notifier *n, void *unused) -{ - MemoryRegion *smram = - (MemoryRegion *) object_resolve_path("/machine/smram", NULL); - - /* Outer container... */ - memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull); - memory_region_set_enabled(&smram_as_root, true); - - /* ... with two regions inside: normal system memory with low - * priority, and... - */ - memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram", - get_system_memory(), 0, ~0ull); - memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0); - memory_region_set_enabled(&smram_as_mem, true); - - if (smram) { - /* ... SMRAM with higher priority */ - memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10); - memory_region_set_enabled(smram, true); - } - - address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); - kvm_memory_listener_register(kvm_state, &smram_listener, - &smram_address_space, 1); -} - -int kvm_arch_init(MachineState *ms, KVMState *s) -{ - uint64_t identity_base = 0xfffbc000; - uint64_t shadow_mem; - int ret; - struct utsname utsname; - - has_xsave = kvm_check_extension(s, KVM_CAP_XSAVE); - has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); - has_pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2); - - hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); - - ret = kvm_get_supported_msrs(s); - if (ret < 0) { - return ret; - } - - uname(&utsname); - lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; - - /* - * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. - * In order to use vm86 mode, an EPT identity map and a TSS are needed. - * Since these must be part of guest physical memory, we need to allocate - * them, both by setting their start addresses in the kernel and by - * creating a corresponding e820 entry. We need 4 pages before the BIOS. - * - * Older KVM versions may not support setting the identity map base. In - * that case we need to stick with the default, i.e. a 256K maximum BIOS - * size. - */ - if (kvm_check_extension(s, KVM_CAP_SET_IDENTITY_MAP_ADDR)) { - /* Allows up to 16M BIOSes. */ - identity_base = 0xfeffc000; - - ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); - if (ret < 0) { - return ret; - } - } - - /* Set TSS base one page after EPT identity map. */ - ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); - if (ret < 0) { - return ret; - } - - /* Tell fw_cfg to notify the BIOS to reserve the range. */ - ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); - if (ret < 0) { - fprintf(stderr, "e820_add_entry() table is full\n"); - return ret; - } - qemu_register_reset(kvm_unpoison_all, NULL); - - shadow_mem = machine_kvm_shadow_mem(ms); - if (shadow_mem != -1) { - shadow_mem /= 4096; - ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); - if (ret < 0) { - return ret; - } - } - - if (kvm_check_extension(s, KVM_CAP_X86_SMM) && - object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE) && - pc_machine_is_smm_enabled(PC_MACHINE(ms))) { - smram_machine_done.notify = register_smram_listener; - qemu_add_machine_init_done_notifier(&smram_machine_done); - } - - if (enable_cpu_pm) { - int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); - int ret; - -/* Work around for kernel header with a typo. TODO: fix header and drop. */ -#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) -#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL -#endif - if (disable_exits) { - disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | - KVM_X86_DISABLE_EXITS_HLT | - KVM_X86_DISABLE_EXITS_PAUSE); - } - - ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, - disable_exits); - if (ret < 0) { - error_report("kvm: guest stopping CPU not supported: %s", - strerror(-ret)); - } - } - - return 0; -} - -static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) -{ - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->type = 3; - lhs->present = 1; - lhs->dpl = 3; - lhs->db = 0; - lhs->s = 1; - lhs->l = 0; - lhs->g = 0; - lhs->avl = 0; - lhs->unusable = 0; -} - -static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) -{ - unsigned flags = rhs->flags; - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; - lhs->present = (flags & DESC_P_MASK) != 0; - lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; - lhs->db = (flags >> DESC_B_SHIFT) & 1; - lhs->s = (flags & DESC_S_MASK) != 0; - lhs->l = (flags >> DESC_L_SHIFT) & 1; - lhs->g = (flags & DESC_G_MASK) != 0; - lhs->avl = (flags & DESC_AVL_MASK) != 0; - lhs->unusable = !lhs->present; - lhs->padding = 0; -} - -static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) -{ - lhs->selector = rhs->selector; - lhs->base = rhs->base; - lhs->limit = rhs->limit; - lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | - ((rhs->present && !rhs->unusable) * DESC_P_MASK) | - (rhs->dpl << DESC_DPL_SHIFT) | - (rhs->db << DESC_B_SHIFT) | - (rhs->s * DESC_S_MASK) | - (rhs->l << DESC_L_SHIFT) | - (rhs->g * DESC_G_MASK) | - (rhs->avl * DESC_AVL_MASK); -} - -static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) -{ - if (set) { - *kvm_reg = *qemu_reg; - } else { - *qemu_reg = *kvm_reg; - } -} - -static int kvm_getput_regs(X86CPU *cpu, int set) -{ - CPUX86State *env = &cpu->env; - struct kvm_regs regs; - int ret = 0; - - if (!set) { - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, ®s); - if (ret < 0) { - return ret; - } - } - - kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); - kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); - kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); - kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); - kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); - kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); - kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); - kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); -#ifdef TARGET_X86_64 - kvm_getput_reg(®s.r8, &env->regs[8], set); - kvm_getput_reg(®s.r9, &env->regs[9], set); - kvm_getput_reg(®s.r10, &env->regs[10], set); - kvm_getput_reg(®s.r11, &env->regs[11], set); - kvm_getput_reg(®s.r12, &env->regs[12], set); - kvm_getput_reg(®s.r13, &env->regs[13], set); - kvm_getput_reg(®s.r14, &env->regs[14], set); - kvm_getput_reg(®s.r15, &env->regs[15], set); -#endif - - kvm_getput_reg(®s.rflags, &env->eflags, set); - kvm_getput_reg(®s.rip, &env->eip, set); - - if (set) { - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, ®s); - } - - return ret; -} - -static int kvm_put_fpu(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_fpu fpu; - int i; - - memset(&fpu, 0, sizeof fpu); - fpu.fsw = env->fpus & ~(7 << 11); - fpu.fsw |= (env->fpstt & 7) << 11; - fpu.fcw = env->fpuc; - fpu.last_opcode = env->fpop; - fpu.last_ip = env->fpip; - fpu.last_dp = env->fpdp; - for (i = 0; i < 8; ++i) { - fpu.ftwx |= (!env->fptags[i]) << i; - } - memcpy(fpu.fpr, env->fpregs, sizeof env->fpregs); - for (i = 0; i < CPU_NB_REGS; i++) { - stq_p(&fpu.xmm[i][0], env->xmm_regs[i].ZMM_Q(0)); - stq_p(&fpu.xmm[i][8], env->xmm_regs[i].ZMM_Q(1)); - } - fpu.mxcsr = env->mxcsr; - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_FPU, &fpu); -} - -#define XSAVE_FCW_FSW 0 -#define XSAVE_FTW_FOP 1 -#define XSAVE_CWD_RIP 2 -#define XSAVE_CWD_RDP 4 -#define XSAVE_MXCSR 6 -#define XSAVE_ST_SPACE 8 -#define XSAVE_XMM_SPACE 40 -#define XSAVE_XSTATE_BV 128 -#define XSAVE_YMMH_SPACE 144 -#define XSAVE_BNDREGS 240 -#define XSAVE_BNDCSR 256 -#define XSAVE_OPMASK 272 -#define XSAVE_ZMM_Hi256 288 -#define XSAVE_Hi16_ZMM 416 -#define XSAVE_PKRU 672 - -#define XSAVE_BYTE_OFFSET(word_offset) \ - ((word_offset) * sizeof_field(struct kvm_xsave, region[0])) - -#define ASSERT_OFFSET(word_offset, field) \ - QEMU_BUILD_BUG_ON(XSAVE_BYTE_OFFSET(word_offset) != \ - offsetof(X86XSaveArea, field)) - -ASSERT_OFFSET(XSAVE_FCW_FSW, legacy.fcw); -ASSERT_OFFSET(XSAVE_FTW_FOP, legacy.ftw); -ASSERT_OFFSET(XSAVE_CWD_RIP, legacy.fpip); -ASSERT_OFFSET(XSAVE_CWD_RDP, legacy.fpdp); -ASSERT_OFFSET(XSAVE_MXCSR, legacy.mxcsr); -ASSERT_OFFSET(XSAVE_ST_SPACE, legacy.fpregs); -ASSERT_OFFSET(XSAVE_XMM_SPACE, legacy.xmm_regs); -ASSERT_OFFSET(XSAVE_XSTATE_BV, header.xstate_bv); -ASSERT_OFFSET(XSAVE_YMMH_SPACE, avx_state); -ASSERT_OFFSET(XSAVE_BNDREGS, bndreg_state); -ASSERT_OFFSET(XSAVE_BNDCSR, bndcsr_state); -ASSERT_OFFSET(XSAVE_OPMASK, opmask_state); -ASSERT_OFFSET(XSAVE_ZMM_Hi256, zmm_hi256_state); -ASSERT_OFFSET(XSAVE_Hi16_ZMM, hi16_zmm_state); -ASSERT_OFFSET(XSAVE_PKRU, pkru_state); - -static int kvm_put_xsave(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - X86XSaveArea *xsave = env->xsave_buf; - - if (!has_xsave) { - return kvm_put_fpu(cpu); - } - x86_cpu_xsave_all_areas(cpu, xsave); - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); -} - -static int kvm_put_xcrs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_xcrs xcrs = {}; - - if (!has_xcrs) { - return 0; - } - - xcrs.nr_xcrs = 1; - xcrs.flags = 0; - xcrs.xcrs[0].xcr = 0; - xcrs.xcrs[0].value = env->xcr0; - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs); -} - -static int kvm_put_sregs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_sregs sregs; - - memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); - if (env->interrupt_injected >= 0) { - sregs.interrupt_bitmap[env->interrupt_injected / 64] |= - (uint64_t)1 << (env->interrupt_injected % 64); - } - - if ((env->eflags & VM_MASK)) { - set_v8086_seg(&sregs.cs, &env->segs[R_CS]); - set_v8086_seg(&sregs.ds, &env->segs[R_DS]); - set_v8086_seg(&sregs.es, &env->segs[R_ES]); - set_v8086_seg(&sregs.fs, &env->segs[R_FS]); - set_v8086_seg(&sregs.gs, &env->segs[R_GS]); - set_v8086_seg(&sregs.ss, &env->segs[R_SS]); - } else { - set_seg(&sregs.cs, &env->segs[R_CS]); - set_seg(&sregs.ds, &env->segs[R_DS]); - set_seg(&sregs.es, &env->segs[R_ES]); - set_seg(&sregs.fs, &env->segs[R_FS]); - set_seg(&sregs.gs, &env->segs[R_GS]); - set_seg(&sregs.ss, &env->segs[R_SS]); - } - - set_seg(&sregs.tr, &env->tr); - set_seg(&sregs.ldt, &env->ldt); - - sregs.idt.limit = env->idt.limit; - sregs.idt.base = env->idt.base; - memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); - sregs.gdt.limit = env->gdt.limit; - sregs.gdt.base = env->gdt.base; - memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); - - sregs.cr0 = env->cr[0]; - sregs.cr2 = env->cr[2]; - sregs.cr3 = env->cr[3]; - sregs.cr4 = env->cr[4]; - - sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); - sregs.apic_base = cpu_get_apic_base(cpu->apic_state); - - sregs.efer = env->efer; - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); -} - -static void kvm_msr_buf_reset(X86CPU *cpu) -{ - memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE); -} - -static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value) -{ - struct kvm_msrs *msrs = cpu->kvm_msr_buf; - void *limit = ((void *)msrs) + MSR_BUF_SIZE; - struct kvm_msr_entry *entry = &msrs->entries[msrs->nmsrs]; - - assert((void *)(entry + 1) <= limit); - - entry->index = index; - entry->reserved = 0; - entry->data = value; - msrs->nmsrs++; -} - -static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value) -{ - kvm_msr_buf_reset(cpu); - kvm_msr_entry_add(cpu, index, value); - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); -} - -void kvm_put_apicbase(X86CPU *cpu, uint64_t value) -{ - int ret; - - ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value); - assert(ret == 1); -} - -static int kvm_put_tscdeadline_msr(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - int ret; - - if (!has_msr_tsc_deadline) { - return 0; - } - - ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline); - if (ret < 0) { - return ret; - } - - assert(ret == 1); - return 0; -} - -/* - * Provide a separate write service for the feature control MSR in order to - * kick the VCPU out of VMXON or even guest mode on reset. This has to be done - * before writing any other state because forcibly leaving nested mode - * invalidates the VCPU state. - */ -static int kvm_put_msr_feature_control(X86CPU *cpu) -{ - int ret; - - if (!has_msr_feature_control) { - return 0; - } - - ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL, - cpu->env.msr_ia32_feature_control); - if (ret < 0) { - return ret; - } - - assert(ret == 1); - return 0; -} - -static int kvm_put_msrs(X86CPU *cpu, int level) -{ - CPUX86State *env = &cpu->env; - int i; - int ret; - - kvm_msr_buf_reset(cpu); - - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs); - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp); - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip); - kvm_msr_entry_add(cpu, MSR_PAT, env->pat); - if (has_msr_star) { - kvm_msr_entry_add(cpu, MSR_STAR, env->star); - } - if (has_msr_hsave_pa) { - kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave); - } - if (has_msr_tsc_aux) { - kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux); - } - if (has_msr_tsc_adjust) { - kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust); - } - if (has_msr_misc_enable) { - kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, - env->msr_ia32_misc_enable); - } - if (has_msr_smbase) { - kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase); - } - if (has_msr_smi_count) { - kvm_msr_entry_add(cpu, MSR_SMI_COUNT, env->msr_smi_count); - } - if (has_msr_bndcfgs) { - kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs); - } - if (has_msr_xss) { - kvm_msr_entry_add(cpu, MSR_IA32_XSS, env->xss); - } - if (has_msr_spec_ctrl) { - kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl); - } - if (has_msr_virt_ssbd) { - kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); - } - -#ifdef TARGET_X86_64 - if (lm_capable_kernel) { - kvm_msr_entry_add(cpu, MSR_CSTAR, env->cstar); - kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); - kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); - kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); - } -#endif - - /* - * The following MSRs have side effects on the guest or are too heavy - * for normal writeback. Limit them to reset or full state updates. - */ - if (level >= KVM_PUT_RESET_STATE) { - kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc); - kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr); - kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr); - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { - kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); - } - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { - kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr); - } - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { - kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr); - } - if (has_architectural_pmu_version > 0) { - if (has_architectural_pmu_version > 1) { - /* Stop the counter. */ - kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); - } - - /* Set the counter values. */ - for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { - kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, - env->msr_fixed_counters[i]); - } - for (i = 0; i < num_architectural_pmu_gp_counters; i++) { - kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, - env->msr_gp_counters[i]); - kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, - env->msr_gp_evtsel[i]); - } - if (has_architectural_pmu_version > 1) { - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, - env->msr_global_status); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, - env->msr_global_ovf_ctrl); - - /* Now start the PMU. */ - kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, - env->msr_fixed_ctr_ctrl); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, - env->msr_global_ctrl); - } - } - /* - * Hyper-V partition-wide MSRs: to avoid clearing them on cpu hot-add, - * only sync them to KVM on the first cpu - */ - if (current_cpu == first_cpu) { - if (has_msr_hv_hypercall) { - kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, - env->msr_hv_guest_os_id); - kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, - env->msr_hv_hypercall); - } - if (cpu->hyperv_time) { - kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, - env->msr_hv_tsc); - } - if (cpu->hyperv_reenlightenment) { - kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, - env->msr_hv_reenlightenment_control); - kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, - env->msr_hv_tsc_emulation_control); - kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, - env->msr_hv_tsc_emulation_status); - } - } - if (cpu->hyperv_vapic) { - kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, - env->msr_hv_vapic); - } - if (has_msr_hv_crash) { - int j; - - for (j = 0; j < HV_CRASH_PARAMS; j++) - kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, - env->msr_hv_crash_params[j]); - - kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_NOTIFY); - } - if (has_msr_hv_runtime) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); - } - if (cpu->hyperv_vpindex && hv_vpindex_settable) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, hyperv_vp_index(cpu)); - } - if (cpu->hyperv_synic) { - int j; - - kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION); - - kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, - env->msr_hv_synic_control); - kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, - env->msr_hv_synic_evt_page); - kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, - env->msr_hv_synic_msg_page); - - for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) { - kvm_msr_entry_add(cpu, HV_X64_MSR_SINT0 + j, - env->msr_hv_synic_sint[j]); - } - } - if (has_msr_hv_stimer) { - int j; - - for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) { - kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_CONFIG + j * 2, - env->msr_hv_stimer_config[j]); - } - - for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) { - kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_COUNT + j * 2, - env->msr_hv_stimer_count[j]); - } - } - if (env->features[FEAT_1_EDX] & CPUID_MTRR) { - uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits); - - kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype); - kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]); - kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]); - kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]); - for (i = 0; i < MSR_MTRRcap_VCNT; i++) { - /* The CPU GPs if we write to a bit above the physical limit of - * the host CPU (and KVM emulates that) - */ - uint64_t mask = env->mtrr_var[i].mask; - mask &= phys_mask; - - kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), - env->mtrr_var[i].base); - kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask); - } - } - if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { - int addr_num = kvm_arch_get_supported_cpuid(kvm_state, - 0x14, 1, R_EAX) & 0x7; - - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, - env->msr_rtit_ctrl); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, - env->msr_rtit_status); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, - env->msr_rtit_output_base); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, - env->msr_rtit_output_mask); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, - env->msr_rtit_cr3_match); - for (i = 0; i < addr_num; i++) { - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, - env->msr_rtit_addrs[i]); - } - } - - /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see - * kvm_put_msr_feature_control. */ - } - if (env->mcg_cap) { - int i; - - kvm_msr_entry_add(cpu, MSR_MCG_STATUS, env->mcg_status); - kvm_msr_entry_add(cpu, MSR_MCG_CTL, env->mcg_ctl); - if (has_msr_mcg_ext_ctl) { - kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, env->mcg_ext_ctl); - } - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { - kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, env->mce_banks[i]); - } - } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); - if (ret < 0) { - return ret; - } - - if (ret < cpu->kvm_msr_buf->nmsrs) { - struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; - error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, - (uint32_t)e->index, (uint64_t)e->data); - } - - assert(ret == cpu->kvm_msr_buf->nmsrs); - return 0; -} - - -static int kvm_get_fpu(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_fpu fpu; - int i, ret; - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_FPU, &fpu); - if (ret < 0) { - return ret; - } - - env->fpstt = (fpu.fsw >> 11) & 7; - env->fpus = fpu.fsw; - env->fpuc = fpu.fcw; - env->fpop = fpu.last_opcode; - env->fpip = fpu.last_ip; - env->fpdp = fpu.last_dp; - for (i = 0; i < 8; ++i) { - env->fptags[i] = !((fpu.ftwx >> i) & 1); - } - memcpy(env->fpregs, fpu.fpr, sizeof env->fpregs); - for (i = 0; i < CPU_NB_REGS; i++) { - env->xmm_regs[i].ZMM_Q(0) = ldq_p(&fpu.xmm[i][0]); - env->xmm_regs[i].ZMM_Q(1) = ldq_p(&fpu.xmm[i][8]); - } - env->mxcsr = fpu.mxcsr; - - return 0; -} - -static int kvm_get_xsave(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - X86XSaveArea *xsave = env->xsave_buf; - int ret; - - if (!has_xsave) { - return kvm_get_fpu(cpu); - } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XSAVE, xsave); - if (ret < 0) { - return ret; - } - x86_cpu_xrstor_all_areas(cpu, xsave); - - return 0; -} - -static int kvm_get_xcrs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - int i, ret; - struct kvm_xcrs xcrs; - - if (!has_xcrs) { - return 0; - } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs); - if (ret < 0) { - return ret; - } - - for (i = 0; i < xcrs.nr_xcrs; i++) { - /* Only support xcr0 now */ - if (xcrs.xcrs[i].xcr == 0) { - env->xcr0 = xcrs.xcrs[i].value; - break; - } - } - return 0; -} - -static int kvm_get_sregs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_sregs sregs; - int bit, i, ret; - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); - if (ret < 0) { - return ret; - } - - /* There can only be one pending IRQ set in the bitmap at a time, so try - to find it and save its number instead (-1 for none). */ - env->interrupt_injected = -1; - for (i = 0; i < ARRAY_SIZE(sregs.interrupt_bitmap); i++) { - if (sregs.interrupt_bitmap[i]) { - bit = ctz64(sregs.interrupt_bitmap[i]); - env->interrupt_injected = i * 64 + bit; - break; - } - } - - get_seg(&env->segs[R_CS], &sregs.cs); - get_seg(&env->segs[R_DS], &sregs.ds); - get_seg(&env->segs[R_ES], &sregs.es); - get_seg(&env->segs[R_FS], &sregs.fs); - get_seg(&env->segs[R_GS], &sregs.gs); - get_seg(&env->segs[R_SS], &sregs.ss); - - get_seg(&env->tr, &sregs.tr); - get_seg(&env->ldt, &sregs.ldt); - - env->idt.limit = sregs.idt.limit; - env->idt.base = sregs.idt.base; - env->gdt.limit = sregs.gdt.limit; - env->gdt.base = sregs.gdt.base; - - env->cr[0] = sregs.cr0; - env->cr[2] = sregs.cr2; - env->cr[3] = sregs.cr3; - env->cr[4] = sregs.cr4; - - env->efer = sregs.efer; - - /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ - x86_update_hflags(env); - - return 0; -} - -static int kvm_get_msrs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; - int ret, i; - uint64_t mtrr_top_bits; - - kvm_msr_buf_reset(cpu); - - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0); - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0); - kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0); - kvm_msr_entry_add(cpu, MSR_PAT, 0); - if (has_msr_star) { - kvm_msr_entry_add(cpu, MSR_STAR, 0); - } - if (has_msr_hsave_pa) { - kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0); - } - if (has_msr_tsc_aux) { - kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0); - } - if (has_msr_tsc_adjust) { - kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0); - } - if (has_msr_tsc_deadline) { - kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0); - } - if (has_msr_misc_enable) { - kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0); - } - if (has_msr_smbase) { - kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, 0); - } - if (has_msr_smi_count) { - kvm_msr_entry_add(cpu, MSR_SMI_COUNT, 0); - } - if (has_msr_feature_control) { - kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0); - } - if (has_msr_bndcfgs) { - kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0); - } - if (has_msr_xss) { - kvm_msr_entry_add(cpu, MSR_IA32_XSS, 0); - } - if (has_msr_spec_ctrl) { - kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0); - } - if (has_msr_virt_ssbd) { - kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); - } - if (!env->tsc_valid) { - kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0); - env->tsc_valid = !runstate_is_running(); - } - -#ifdef TARGET_X86_64 - if (lm_capable_kernel) { - kvm_msr_entry_add(cpu, MSR_CSTAR, 0); - kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); - kvm_msr_entry_add(cpu, MSR_FMASK, 0); - kvm_msr_entry_add(cpu, MSR_LSTAR, 0); - } -#endif - kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); - kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0); - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { - kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0); - } - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { - kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0); - } - if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { - kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0); - } - if (has_architectural_pmu_version > 0) { - if (has_architectural_pmu_version > 1) { - kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0); - kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0); - } - for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { - kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0); - } - for (i = 0; i < num_architectural_pmu_gp_counters; i++) { - kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0); - kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0); - } - } - - if (env->mcg_cap) { - kvm_msr_entry_add(cpu, MSR_MCG_STATUS, 0); - kvm_msr_entry_add(cpu, MSR_MCG_CTL, 0); - if (has_msr_mcg_ext_ctl) { - kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, 0); - } - for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { - kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, 0); - } - } - - if (has_msr_hv_hypercall) { - kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0); - } - if (cpu->hyperv_vapic) { - kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0); - } - if (cpu->hyperv_time) { - kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0); - } - if (cpu->hyperv_reenlightenment) { - kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0); - } - if (has_msr_hv_crash) { - int j; - - for (j = 0; j < HV_CRASH_PARAMS; j++) { - kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 0); - } - } - if (has_msr_hv_runtime) { - kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, 0); - } - if (cpu->hyperv_synic) { - uint32_t msr; - - kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0); - kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0); - for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) { - kvm_msr_entry_add(cpu, msr, 0); - } - } - if (has_msr_hv_stimer) { - uint32_t msr; - - for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT; - msr++) { - kvm_msr_entry_add(cpu, msr, 0); - } - } - if (env->features[FEAT_1_EDX] & CPUID_MTRR) { - kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0); - kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0); - for (i = 0; i < MSR_MTRRcap_VCNT; i++) { - kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0); - kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0); - } - } - - if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { - int addr_num = - kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7; - - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0); - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0); - for (i = 0; i < addr_num; i++) { - kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0); - } - } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); - if (ret < 0) { - return ret; - } - - if (ret < cpu->kvm_msr_buf->nmsrs) { - struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; - error_report("error: failed to get MSR 0x%" PRIx32, - (uint32_t)e->index); - } - - assert(ret == cpu->kvm_msr_buf->nmsrs); - /* - * MTRR masks: Each mask consists of 5 parts - * a 10..0: must be zero - * b 11 : valid bit - * c n-1.12: actual mask bits - * d 51..n: reserved must be zero - * e 63.52: reserved must be zero - * - * 'n' is the number of physical bits supported by the CPU and is - * apparently always <= 52. We know our 'n' but don't know what - * the destinations 'n' is; it might be smaller, in which case - * it masks (c) on loading. It might be larger, in which case - * we fill 'd' so that d..c is consistent irrespetive of the 'n' - * we're migrating to. - */ - - if (cpu->fill_mtrr_mask) { - QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52); - assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS); - mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits); - } else { - mtrr_top_bits = 0; - } - - for (i = 0; i < ret; i++) { - uint32_t index = msrs[i].index; - switch (index) { - case MSR_IA32_SYSENTER_CS: - env->sysenter_cs = msrs[i].data; - break; - case MSR_IA32_SYSENTER_ESP: - env->sysenter_esp = msrs[i].data; - break; - case MSR_IA32_SYSENTER_EIP: - env->sysenter_eip = msrs[i].data; - break; - case MSR_PAT: - env->pat = msrs[i].data; - break; - case MSR_STAR: - env->star = msrs[i].data; - break; -#ifdef TARGET_X86_64 - case MSR_CSTAR: - env->cstar = msrs[i].data; - break; - case MSR_KERNELGSBASE: - env->kernelgsbase = msrs[i].data; - break; - case MSR_FMASK: - env->fmask = msrs[i].data; - break; - case MSR_LSTAR: - env->lstar = msrs[i].data; - break; -#endif - case MSR_IA32_TSC: - env->tsc = msrs[i].data; - break; - case MSR_TSC_AUX: - env->tsc_aux = msrs[i].data; - break; - case MSR_TSC_ADJUST: - env->tsc_adjust = msrs[i].data; - break; - case MSR_IA32_TSCDEADLINE: - env->tsc_deadline = msrs[i].data; - break; - case MSR_VM_HSAVE_PA: - env->vm_hsave = msrs[i].data; - break; - case MSR_KVM_SYSTEM_TIME: - env->system_time_msr = msrs[i].data; - break; - case MSR_KVM_WALL_CLOCK: - env->wall_clock_msr = msrs[i].data; - break; - case MSR_MCG_STATUS: - env->mcg_status = msrs[i].data; - break; - case MSR_MCG_CTL: - env->mcg_ctl = msrs[i].data; - break; - case MSR_MCG_EXT_CTL: - env->mcg_ext_ctl = msrs[i].data; - break; - case MSR_IA32_MISC_ENABLE: - env->msr_ia32_misc_enable = msrs[i].data; - break; - case MSR_IA32_SMBASE: - env->smbase = msrs[i].data; - break; - case MSR_SMI_COUNT: - env->msr_smi_count = msrs[i].data; - break; - case MSR_IA32_FEATURE_CONTROL: - env->msr_ia32_feature_control = msrs[i].data; - break; - case MSR_IA32_BNDCFGS: - env->msr_bndcfgs = msrs[i].data; - break; - case MSR_IA32_XSS: - env->xss = msrs[i].data; - break; - default: - if (msrs[i].index >= MSR_MC0_CTL && - msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { - env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data; - } - break; - case MSR_KVM_ASYNC_PF_EN: - env->async_pf_en_msr = msrs[i].data; - break; - case MSR_KVM_PV_EOI_EN: - env->pv_eoi_en_msr = msrs[i].data; - break; - case MSR_KVM_STEAL_TIME: - env->steal_time_msr = msrs[i].data; - break; - case MSR_CORE_PERF_FIXED_CTR_CTRL: - env->msr_fixed_ctr_ctrl = msrs[i].data; - break; - case MSR_CORE_PERF_GLOBAL_CTRL: - env->msr_global_ctrl = msrs[i].data; - break; - case MSR_CORE_PERF_GLOBAL_STATUS: - env->msr_global_status = msrs[i].data; - break; - case MSR_CORE_PERF_GLOBAL_OVF_CTRL: - env->msr_global_ovf_ctrl = msrs[i].data; - break; - case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1: - env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data; - break; - case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1: - env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data; - break; - case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1: - env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data; - break; - case HV_X64_MSR_HYPERCALL: - env->msr_hv_hypercall = msrs[i].data; - break; - case HV_X64_MSR_GUEST_OS_ID: - env->msr_hv_guest_os_id = msrs[i].data; - break; - case HV_X64_MSR_APIC_ASSIST_PAGE: - env->msr_hv_vapic = msrs[i].data; - break; - case HV_X64_MSR_REFERENCE_TSC: - env->msr_hv_tsc = msrs[i].data; - break; - case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: - env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data; - break; - case HV_X64_MSR_VP_RUNTIME: - env->msr_hv_runtime = msrs[i].data; - break; - case HV_X64_MSR_SCONTROL: - env->msr_hv_synic_control = msrs[i].data; - break; - case HV_X64_MSR_SIEFP: - env->msr_hv_synic_evt_page = msrs[i].data; - break; - case HV_X64_MSR_SIMP: - env->msr_hv_synic_msg_page = msrs[i].data; - break; - case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: - env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data; - break; - case HV_X64_MSR_STIMER0_CONFIG: - case HV_X64_MSR_STIMER1_CONFIG: - case HV_X64_MSR_STIMER2_CONFIG: - case HV_X64_MSR_STIMER3_CONFIG: - env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] = - msrs[i].data; - break; - case HV_X64_MSR_STIMER0_COUNT: - case HV_X64_MSR_STIMER1_COUNT: - case HV_X64_MSR_STIMER2_COUNT: - case HV_X64_MSR_STIMER3_COUNT: - env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = - msrs[i].data; - break; - case HV_X64_MSR_REENLIGHTENMENT_CONTROL: - env->msr_hv_reenlightenment_control = msrs[i].data; - break; - case HV_X64_MSR_TSC_EMULATION_CONTROL: - env->msr_hv_tsc_emulation_control = msrs[i].data; - break; - case HV_X64_MSR_TSC_EMULATION_STATUS: - env->msr_hv_tsc_emulation_status = msrs[i].data; - break; - case MSR_MTRRdefType: - env->mtrr_deftype = msrs[i].data; - break; - case MSR_MTRRfix64K_00000: - env->mtrr_fixed[0] = msrs[i].data; - break; - case MSR_MTRRfix16K_80000: - env->mtrr_fixed[1] = msrs[i].data; - break; - case MSR_MTRRfix16K_A0000: - env->mtrr_fixed[2] = msrs[i].data; - break; - case MSR_MTRRfix4K_C0000: - env->mtrr_fixed[3] = msrs[i].data; - break; - case MSR_MTRRfix4K_C8000: - env->mtrr_fixed[4] = msrs[i].data; - break; - case MSR_MTRRfix4K_D0000: - env->mtrr_fixed[5] = msrs[i].data; - break; - case MSR_MTRRfix4K_D8000: - env->mtrr_fixed[6] = msrs[i].data; - break; - case MSR_MTRRfix4K_E0000: - env->mtrr_fixed[7] = msrs[i].data; - break; - case MSR_MTRRfix4K_E8000: - env->mtrr_fixed[8] = msrs[i].data; - break; - case MSR_MTRRfix4K_F0000: - env->mtrr_fixed[9] = msrs[i].data; - break; - case MSR_MTRRfix4K_F8000: - env->mtrr_fixed[10] = msrs[i].data; - break; - case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1): - if (index & 1) { - env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data | - mtrr_top_bits; - } else { - env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data; - } - break; - case MSR_IA32_SPEC_CTRL: - env->spec_ctrl = msrs[i].data; - break; - case MSR_VIRT_SSBD: - env->virt_ssbd = msrs[i].data; - break; - case MSR_IA32_RTIT_CTL: - env->msr_rtit_ctrl = msrs[i].data; - break; - case MSR_IA32_RTIT_STATUS: - env->msr_rtit_status = msrs[i].data; - break; - case MSR_IA32_RTIT_OUTPUT_BASE: - env->msr_rtit_output_base = msrs[i].data; - break; - case MSR_IA32_RTIT_OUTPUT_MASK: - env->msr_rtit_output_mask = msrs[i].data; - break; - case MSR_IA32_RTIT_CR3_MATCH: - env->msr_rtit_cr3_match = msrs[i].data; - break; - case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: - env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data; - break; - } - } - - return 0; -} - -static int kvm_put_mp_state(X86CPU *cpu) -{ - struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state }; - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); -} - -static int kvm_get_mp_state(X86CPU *cpu) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - struct kvm_mp_state mp_state; - int ret; - - ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); - if (ret < 0) { - return ret; - } - env->mp_state = mp_state.mp_state; - if (kvm_irqchip_in_kernel()) { - cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); - } - return 0; -} - -static int kvm_get_apic(X86CPU *cpu) -{ - DeviceState *apic = cpu->apic_state; - struct kvm_lapic_state kapic; - int ret; - - if (apic && kvm_irqchip_in_kernel()) { - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic); - if (ret < 0) { - return ret; - } - - kvm_get_apic_state(apic, &kapic); - } - return 0; -} - -static int kvm_put_vcpu_events(X86CPU *cpu, int level) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - struct kvm_vcpu_events events = {}; - - if (!kvm_has_vcpu_events()) { - return 0; - } - - events.exception.injected = (env->exception_injected >= 0); - events.exception.nr = env->exception_injected; - events.exception.has_error_code = env->has_error_code; - events.exception.error_code = env->error_code; - events.exception.pad = 0; - - events.interrupt.injected = (env->interrupt_injected >= 0); - events.interrupt.nr = env->interrupt_injected; - events.interrupt.soft = env->soft_interrupt; - - events.nmi.injected = env->nmi_injected; - events.nmi.pending = env->nmi_pending; - events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); - events.nmi.pad = 0; - - events.sipi_vector = env->sipi_vector; - events.flags = 0; - - if (has_msr_smbase) { - events.smi.smm = !!(env->hflags & HF_SMM_MASK); - events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK); - if (kvm_irqchip_in_kernel()) { - /* As soon as these are moved to the kernel, remove them - * from cs->interrupt_request. - */ - events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; - events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; - cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); - } else { - /* Keep these in cs->interrupt_request. */ - events.smi.pending = 0; - events.smi.latched_init = 0; - } - /* Stop SMI delivery on old machine types to avoid a reboot - * on an inward migration of an old VM. - */ - if (!cpu->kvm_no_smi_migration) { - events.flags |= KVM_VCPUEVENT_VALID_SMM; - } - } - - if (level >= KVM_PUT_RESET_STATE) { - events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; - if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { - events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR; - } - } - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); -} - -static int kvm_get_vcpu_events(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_vcpu_events events; - int ret; - - if (!kvm_has_vcpu_events()) { - return 0; - } - - memset(&events, 0, sizeof(events)); - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); - if (ret < 0) { - return ret; - } - env->exception_injected = - events.exception.injected ? events.exception.nr : -1; - env->has_error_code = events.exception.has_error_code; - env->error_code = events.exception.error_code; - - env->interrupt_injected = - events.interrupt.injected ? events.interrupt.nr : -1; - env->soft_interrupt = events.interrupt.soft; - - env->nmi_injected = events.nmi.injected; - env->nmi_pending = events.nmi.pending; - if (events.nmi.masked) { - env->hflags2 |= HF2_NMI_MASK; - } else { - env->hflags2 &= ~HF2_NMI_MASK; - } - - if (events.flags & KVM_VCPUEVENT_VALID_SMM) { - if (events.smi.smm) { - env->hflags |= HF_SMM_MASK; - } else { - env->hflags &= ~HF_SMM_MASK; - } - if (events.smi.pending) { - cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); - } else { - cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); - } - if (events.smi.smm_inside_nmi) { - env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK; - } else { - env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; - } - if (events.smi.latched_init) { - cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); - } else { - cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); - } - } - - env->sipi_vector = events.sipi_vector; - - return 0; -} - -static int kvm_guest_debug_workarounds(X86CPU *cpu) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - int ret = 0; - unsigned long reinject_trap = 0; - - if (!kvm_has_vcpu_events()) { - if (env->exception_injected == 1) { - reinject_trap = KVM_GUESTDBG_INJECT_DB; - } else if (env->exception_injected == 3) { - reinject_trap = KVM_GUESTDBG_INJECT_BP; - } - env->exception_injected = -1; - } - - /* - * Kernels before KVM_CAP_X86_ROBUST_SINGLESTEP overwrote flags.TF - * injected via SET_GUEST_DEBUG while updating GP regs. Work around this - * by updating the debug state once again if single-stepping is on. - * Another reason to call kvm_update_guest_debug here is a pending debug - * trap raise by the guest. On kernels without SET_VCPU_EVENTS we have to - * reinject them via SET_GUEST_DEBUG. - */ - if (reinject_trap || - (!kvm_has_robust_singlestep() && cs->singlestep_enabled)) { - ret = kvm_update_guest_debug(cs, reinject_trap); - } - return ret; -} - -static int kvm_put_debugregs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_debugregs dbgregs; - int i; - - if (!kvm_has_debugregs()) { - return 0; - } - - for (i = 0; i < 4; i++) { - dbgregs.db[i] = env->dr[i]; - } - dbgregs.dr6 = env->dr[6]; - dbgregs.dr7 = env->dr[7]; - dbgregs.flags = 0; - - return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs); -} - -static int kvm_get_debugregs(X86CPU *cpu) -{ - CPUX86State *env = &cpu->env; - struct kvm_debugregs dbgregs; - int i, ret; - - if (!kvm_has_debugregs()) { - return 0; - } - - ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs); - if (ret < 0) { - return ret; - } - for (i = 0; i < 4; i++) { - env->dr[i] = dbgregs.db[i]; - } - env->dr[4] = env->dr[6] = dbgregs.dr6; - env->dr[5] = env->dr[7] = dbgregs.dr7; - - return 0; -} - -int kvm_arch_put_registers(CPUState *cpu, int level) -{ - X86CPU *x86_cpu = X86_CPU(cpu); - int ret; - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - - if (level >= KVM_PUT_RESET_STATE) { - ret = kvm_put_msr_feature_control(x86_cpu); - if (ret < 0) { - return ret; - } - } - - if (level == KVM_PUT_FULL_STATE) { - /* We don't check for kvm_arch_set_tsc_khz() errors here, - * because TSC frequency mismatch shouldn't abort migration, - * unless the user explicitly asked for a more strict TSC - * setting (e.g. using an explicit "tsc-freq" option). - */ - kvm_arch_set_tsc_khz(cpu); - } - - ret = kvm_getput_regs(x86_cpu, 1); - if (ret < 0) { - return ret; - } - ret = kvm_put_xsave(x86_cpu); - if (ret < 0) { - return ret; - } - ret = kvm_put_xcrs(x86_cpu); - if (ret < 0) { - return ret; - } - ret = kvm_put_sregs(x86_cpu); - if (ret < 0) { - return ret; - } - /* must be before kvm_put_msrs */ - ret = kvm_inject_mce_oldstyle(x86_cpu); - if (ret < 0) { - return ret; - } - ret = kvm_put_msrs(x86_cpu, level); - if (ret < 0) { - return ret; - } - ret = kvm_put_vcpu_events(x86_cpu, level); - if (ret < 0) { - return ret; - } - if (level >= KVM_PUT_RESET_STATE) { - ret = kvm_put_mp_state(x86_cpu); - if (ret < 0) { - return ret; - } - } - - ret = kvm_put_tscdeadline_msr(x86_cpu); - if (ret < 0) { - return ret; - } - ret = kvm_put_debugregs(x86_cpu); - if (ret < 0) { - return ret; - } - /* must be last */ - ret = kvm_guest_debug_workarounds(x86_cpu); - if (ret < 0) { - return ret; - } - return 0; -} - -int kvm_arch_get_registers(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - int ret; - - assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs)); - - ret = kvm_get_vcpu_events(cpu); - if (ret < 0) { - goto out; - } - /* - * KVM_GET_MPSTATE can modify CS and RIP, call it before - * KVM_GET_REGS and KVM_GET_SREGS. - */ - ret = kvm_get_mp_state(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_getput_regs(cpu, 0); - if (ret < 0) { - goto out; - } - ret = kvm_get_xsave(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_get_xcrs(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_get_sregs(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_get_msrs(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_get_apic(cpu); - if (ret < 0) { - goto out; - } - ret = kvm_get_debugregs(cpu); - if (ret < 0) { - goto out; - } - ret = 0; - out: - cpu_sync_bndcs_hflags(&cpu->env); - return ret; -} - -void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) -{ - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; - int ret; - - /* Inject NMI */ - if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - qemu_mutex_lock_iothread(); - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - qemu_mutex_unlock_iothread(); - DPRINTF("injected NMI\n"); - ret = kvm_vcpu_ioctl(cpu, KVM_NMI); - if (ret < 0) { - fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", - strerror(-ret)); - } - } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - qemu_mutex_lock_iothread(); - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; - qemu_mutex_unlock_iothread(); - DPRINTF("injected SMI\n"); - ret = kvm_vcpu_ioctl(cpu, KVM_SMI); - if (ret < 0) { - fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n", - strerror(-ret)); - } - } - } - - if (!kvm_pic_in_kernel()) { - qemu_mutex_lock_iothread(); - } - - /* Force the VCPU out of its inner loop to process any INIT requests - * or (for userspace APIC, but it is cheap to combine the checks here) - * pending TPR access reports. - */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; - } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; - } - } - - if (!kvm_pic_in_kernel()) { - /* Try to inject an interrupt if the guest can accept it */ - if (run->ready_for_interrupt_injection && - (cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) { - int irq; - - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - struct kvm_interrupt intr; - - intr.irq = irq; - DPRINTF("injected interrupt %d\n", irq); - ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr); - if (ret < 0) { - fprintf(stderr, - "KVM: injection failed, interrupt lost (%s)\n", - strerror(-ret)); - } - } - } - - /* If we have an interrupt but the guest is not ready to receive an - * interrupt, request an interrupt window exit. This will - * cause a return to userspace as soon as the guest is ready to - * receive interrupts. */ - if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { - run->request_interrupt_window = 1; - } else { - run->request_interrupt_window = 0; - } - - DPRINTF("setting tpr\n"); - run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); - - qemu_mutex_unlock_iothread(); - } -} - -MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) -{ - X86CPU *x86_cpu = X86_CPU(cpu); - CPUX86State *env = &x86_cpu->env; - - if (run->flags & KVM_RUN_X86_SMM) { - env->hflags |= HF_SMM_MASK; - } else { - env->hflags &= ~HF_SMM_MASK; - } - if (run->if_flag) { - env->eflags |= IF_MASK; - } else { - env->eflags &= ~IF_MASK; - } - - /* We need to protect the apic state against concurrent accesses from - * different threads in case the userspace irqchip is used. */ - if (!kvm_irqchip_in_kernel()) { - qemu_mutex_lock_iothread(); - } - cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); - cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); - if (!kvm_irqchip_in_kernel()) { - qemu_mutex_unlock_iothread(); - } - return cpu_get_mem_attrs(env); -} - -int kvm_arch_process_async_events(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - if (cs->interrupt_request & CPU_INTERRUPT_MCE) { - /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ - assert(env->mcg_cap); - - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; - - kvm_cpu_synchronize_state(cs); - - if (env->exception_injected == EXCP08_DBLE) { - /* this means triple fault */ - qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); - cs->exit_request = 1; - return 0; - } - env->exception_injected = EXCP12_MCHK; - env->has_error_code = 0; - - cs->halted = 0; - if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) { - env->mp_state = KVM_MP_STATE_RUNNABLE; - } - } - - if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - kvm_cpu_synchronize_state(cs); - do_cpu_init(cpu); - } - - if (kvm_irqchip_in_kernel()) { - return 0; - } - - if (cs->interrupt_request & CPU_INTERRUPT_POLL) { - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; - apic_poll_irq(cpu->apic_state); - } - if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) || - (cs->interrupt_request & CPU_INTERRUPT_NMI)) { - cs->halted = 0; - } - if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { - kvm_cpu_synchronize_state(cs); - do_cpu_sipi(cpu); - } - if (cs->interrupt_request & CPU_INTERRUPT_TPR) { - cs->interrupt_request &= ~CPU_INTERRUPT_TPR; - kvm_cpu_synchronize_state(cs); - apic_handle_tpr_access_report(cpu->apic_state, env->eip, - env->tpr_access_type); - } - - return cs->halted; -} - -static int kvm_handle_halt(X86CPU *cpu) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - - if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) && - !(cs->interrupt_request & CPU_INTERRUPT_NMI)) { - cs->halted = 1; - return EXCP_HLT; - } - - return 0; -} - -static int kvm_handle_tpr_access(X86CPU *cpu) -{ - CPUState *cs = CPU(cpu); - struct kvm_run *run = cs->kvm_run; - - apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip, - run->tpr_access.is_write ? TPR_ACCESS_WRITE - : TPR_ACCESS_READ); - return 1; -} - -int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) -{ - static const uint8_t int3 = 0xcc; - - if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || - cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) { - return -EINVAL; - } - return 0; -} - -int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) -{ - uint8_t int3; - - if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0) || int3 != 0xcc || - cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { - return -EINVAL; - } - return 0; -} - -static struct { - target_ulong addr; - int len; - int type; -} hw_breakpoint[4]; - -static int nb_hw_breakpoint; - -static int find_hw_breakpoint(target_ulong addr, int len, int type) -{ - int n; - - for (n = 0; n < nb_hw_breakpoint; n++) { - if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && - (hw_breakpoint[n].len == len || len == -1)) { - return n; - } - } - return -1; -} - -int kvm_arch_insert_hw_breakpoint(target_ulong addr, - target_ulong len, int type) -{ - switch (type) { - case GDB_BREAKPOINT_HW: - len = 1; - break; - case GDB_WATCHPOINT_WRITE: - case GDB_WATCHPOINT_ACCESS: - switch (len) { - case 1: - break; - case 2: - case 4: - case 8: - if (addr & (len - 1)) { - return -EINVAL; - } - break; - default: - return -EINVAL; - } - break; - default: - return -ENOSYS; - } - - if (nb_hw_breakpoint == 4) { - return -ENOBUFS; - } - if (find_hw_breakpoint(addr, len, type) >= 0) { - return -EEXIST; - } - hw_breakpoint[nb_hw_breakpoint].addr = addr; - hw_breakpoint[nb_hw_breakpoint].len = len; - hw_breakpoint[nb_hw_breakpoint].type = type; - nb_hw_breakpoint++; - - return 0; -} - -int kvm_arch_remove_hw_breakpoint(target_ulong addr, - target_ulong len, int type) -{ - int n; - - n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); - if (n < 0) { - return -ENOENT; - } - nb_hw_breakpoint--; - hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; - - return 0; -} - -void kvm_arch_remove_all_hw_breakpoints(void) -{ - nb_hw_breakpoint = 0; -} - -static CPUWatchpoint hw_watchpoint; - -static int kvm_handle_debug(X86CPU *cpu, - struct kvm_debug_exit_arch *arch_info) -{ - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; - int ret = 0; - int n; - - if (arch_info->exception == 1) { - if (arch_info->dr6 & (1 << 14)) { - if (cs->singlestep_enabled) { - ret = EXCP_DEBUG; - } - } else { - for (n = 0; n < 4; n++) { - if (arch_info->dr6 & (1 << n)) { - switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { - case 0x0: - ret = EXCP_DEBUG; - break; - case 0x1: - ret = EXCP_DEBUG; - cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; - hw_watchpoint.flags = BP_MEM_WRITE; - break; - case 0x3: - ret = EXCP_DEBUG; - cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_breakpoint[n].addr; - hw_watchpoint.flags = BP_MEM_ACCESS; - break; - } - } - } - } - } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) { - ret = EXCP_DEBUG; - } - if (ret == 0) { - cpu_synchronize_state(cs); - assert(env->exception_injected == -1); - - /* pass to guest */ - env->exception_injected = arch_info->exception; - env->has_error_code = 0; - } - - return ret; -} - -void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) -{ - const uint8_t type_code[] = { - [GDB_BREAKPOINT_HW] = 0x0, - [GDB_WATCHPOINT_WRITE] = 0x1, - [GDB_WATCHPOINT_ACCESS] = 0x3 - }; - const uint8_t len_code[] = { - [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2 - }; - int n; - - if (kvm_sw_breakpoints_active(cpu)) { - dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; - } - if (nb_hw_breakpoint > 0) { - dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; - dbg->arch.debugreg[7] = 0x0600; - for (n = 0; n < nb_hw_breakpoint; n++) { - dbg->arch.debugreg[n] = hw_breakpoint[n].addr; - dbg->arch.debugreg[7] |= (2 << (n * 2)) | - (type_code[hw_breakpoint[n].type] << (16 + n*4)) | - ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); - } - } -} - -static bool host_supports_vmx(void) -{ - uint32_t ecx, unused; - - host_cpuid(1, 0, &unused, &unused, &ecx, &unused); - return ecx & CPUID_EXT_VMX; -} - -#define VMX_INVALID_GUEST_STATE 0x80000021 - -int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) -{ - X86CPU *cpu = X86_CPU(cs); - uint64_t code; - int ret; - - switch (run->exit_reason) { - case KVM_EXIT_HLT: - DPRINTF("handle_hlt\n"); - qemu_mutex_lock_iothread(); - ret = kvm_handle_halt(cpu); - qemu_mutex_unlock_iothread(); - break; - case KVM_EXIT_SET_TPR: - ret = 0; - break; - case KVM_EXIT_TPR_ACCESS: - qemu_mutex_lock_iothread(); - ret = kvm_handle_tpr_access(cpu); - qemu_mutex_unlock_iothread(); - break; - case KVM_EXIT_FAIL_ENTRY: - code = run->fail_entry.hardware_entry_failure_reason; - fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", - code); - if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { - fprintf(stderr, - "\nIf you're running a guest on an Intel machine without " - "unrestricted mode\n" - "support, the failure can be most likely due to the guest " - "entering an invalid\n" - "state for Intel VT. For example, the guest maybe running " - "in big real mode\n" - "which is not supported on less recent Intel processors." - "\n\n"); - } - ret = -1; - break; - case KVM_EXIT_EXCEPTION: - fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", - run->ex.exception, run->ex.error_code); - ret = -1; - break; - case KVM_EXIT_DEBUG: - DPRINTF("kvm_exit_debug\n"); - qemu_mutex_lock_iothread(); - ret = kvm_handle_debug(cpu, &run->debug.arch); - qemu_mutex_unlock_iothread(); - break; - case KVM_EXIT_HYPERV: - ret = kvm_hv_handle_exit(cpu, &run->hyperv); - break; - case KVM_EXIT_IOAPIC_EOI: - ioapic_eoi_broadcast(run->eoi.vector); - ret = 0; - break; - default: - fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); - ret = -1; - break; - } - - return ret; -} - -bool kvm_arch_stop_on_emulation_error(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - - kvm_cpu_synchronize_state(cs); - return !(env->cr[0] & CR0_PE_MASK) || - ((env->segs[R_CS].selector & 3) != 3); -} - -void kvm_arch_init_irq_routing(KVMState *s) -{ - if (!kvm_check_extension(s, KVM_CAP_IRQ_ROUTING)) { - /* If kernel can't do irq routing, interrupt source - * override 0->2 cannot be set up as required by HPET. - * So we have to disable it. - */ - no_hpet = 1; - } - /* We know at this point that we're using the in-kernel - * irqchip, so we can use irqfds, and on x86 we know - * we can use msi via irqfd and GSI routing. - */ - kvm_msi_via_irqfd_allowed = true; - kvm_gsi_routing_allowed = true; - - if (kvm_irqchip_is_split()) { - int i; - - /* If the ioapic is in QEMU and the lapics are in KVM, reserve - MSI routes for signaling interrupts to the local apics. */ - for (i = 0; i < IOAPIC_NUM_PINS; i++) { - if (kvm_irqchip_add_msi_route(s, 0, NULL) < 0) { - error_report("Could not enable split IRQ mode."); - exit(1); - } - } - } -} - -int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) -{ - int ret; - if (machine_kernel_irqchip_split(ms)) { - ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); - if (ret) { - error_report("Could not enable split irqchip mode: %s", - strerror(-ret)); - exit(1); - } else { - DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n"); - kvm_split_irqchip = true; - return 1; - } - } else { - return 0; - } -} - -/* Classic KVM device assignment interface. Will remain x86 only. */ -int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr, - uint32_t flags, uint32_t *dev_id) -{ - struct kvm_assigned_pci_dev dev_data = { - .segnr = dev_addr->domain, - .busnr = dev_addr->bus, - .devfn = PCI_DEVFN(dev_addr->slot, dev_addr->function), - .flags = flags, - }; - int ret; - - dev_data.assigned_dev_id = - (dev_addr->domain << 16) | (dev_addr->bus << 8) | dev_data.devfn; - - ret = kvm_vm_ioctl(s, KVM_ASSIGN_PCI_DEVICE, &dev_data); - if (ret < 0) { - return ret; - } - - *dev_id = dev_data.assigned_dev_id; - - return 0; -} - -int kvm_device_pci_deassign(KVMState *s, uint32_t dev_id) -{ - struct kvm_assigned_pci_dev dev_data = { - .assigned_dev_id = dev_id, - }; - - return kvm_vm_ioctl(s, KVM_DEASSIGN_PCI_DEVICE, &dev_data); -} - -static int kvm_assign_irq_internal(KVMState *s, uint32_t dev_id, - uint32_t irq_type, uint32_t guest_irq) -{ - struct kvm_assigned_irq assigned_irq = { - .assigned_dev_id = dev_id, - .guest_irq = guest_irq, - .flags = irq_type, - }; - - if (kvm_check_extension(s, KVM_CAP_ASSIGN_DEV_IRQ)) { - return kvm_vm_ioctl(s, KVM_ASSIGN_DEV_IRQ, &assigned_irq); - } else { - return kvm_vm_ioctl(s, KVM_ASSIGN_IRQ, &assigned_irq); - } -} - -int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, bool use_host_msi, - uint32_t guest_irq) -{ - uint32_t irq_type = KVM_DEV_IRQ_GUEST_INTX | - (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX); - - return kvm_assign_irq_internal(s, dev_id, irq_type, guest_irq); -} - -int kvm_device_intx_set_mask(KVMState *s, uint32_t dev_id, bool masked) -{ - struct kvm_assigned_pci_dev dev_data = { - .assigned_dev_id = dev_id, - .flags = masked ? KVM_DEV_ASSIGN_MASK_INTX : 0, - }; - - return kvm_vm_ioctl(s, KVM_ASSIGN_SET_INTX_MASK, &dev_data); -} - -static int kvm_deassign_irq_internal(KVMState *s, uint32_t dev_id, - uint32_t type) -{ - struct kvm_assigned_irq assigned_irq = { - .assigned_dev_id = dev_id, - .flags = type, - }; - - return kvm_vm_ioctl(s, KVM_DEASSIGN_DEV_IRQ, &assigned_irq); -} - -int kvm_device_intx_deassign(KVMState *s, uint32_t dev_id, bool use_host_msi) -{ - return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_INTX | - (use_host_msi ? KVM_DEV_IRQ_HOST_MSI : KVM_DEV_IRQ_HOST_INTX)); -} - -int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, int virq) -{ - return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSI | - KVM_DEV_IRQ_GUEST_MSI, virq); -} - -int kvm_device_msi_deassign(KVMState *s, uint32_t dev_id) -{ - return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSI | - KVM_DEV_IRQ_HOST_MSI); -} - -bool kvm_device_msix_supported(KVMState *s) -{ - /* The kernel lacks a corresponding KVM_CAP, so we probe by calling - * KVM_ASSIGN_SET_MSIX_NR with an invalid parameter. */ - return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, NULL) == -EFAULT; -} - -int kvm_device_msix_init_vectors(KVMState *s, uint32_t dev_id, - uint32_t nr_vectors) -{ - struct kvm_assigned_msix_nr msix_nr = { - .assigned_dev_id = dev_id, - .entry_nr = nr_vectors, - }; - - return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_NR, &msix_nr); -} - -int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, uint32_t vector, - int virq) -{ - struct kvm_assigned_msix_entry msix_entry = { - .assigned_dev_id = dev_id, - .gsi = virq, - .entry = vector, - }; - - return kvm_vm_ioctl(s, KVM_ASSIGN_SET_MSIX_ENTRY, &msix_entry); -} - -int kvm_device_msix_assign(KVMState *s, uint32_t dev_id) -{ - return kvm_assign_irq_internal(s, dev_id, KVM_DEV_IRQ_HOST_MSIX | - KVM_DEV_IRQ_GUEST_MSIX, 0); -} - -int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id) -{ - return kvm_deassign_irq_internal(s, dev_id, KVM_DEV_IRQ_GUEST_MSIX | - KVM_DEV_IRQ_HOST_MSIX); -} - -int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, - uint64_t address, uint32_t data, PCIDevice *dev) -{ - X86IOMMUState *iommu = x86_iommu_get_default(); - - if (iommu) { - int ret; - MSIMessage src, dst; - X86IOMMUClass *class = X86_IOMMU_GET_CLASS(iommu); - - if (!class->int_remap) { - return 0; - } - - src.address = route->u.msi.address_hi; - src.address <<= VTD_MSI_ADDR_HI_SHIFT; - src.address |= route->u.msi.address_lo; - src.data = route->u.msi.data; - - ret = class->int_remap(iommu, &src, &dst, dev ? \ - pci_requester_id(dev) : \ - X86_IOMMU_SID_INVALID); - if (ret) { - trace_kvm_x86_fixup_msi_error(route->gsi); - return 1; - } - - route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; - route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; - route->u.msi.data = dst.data; - } - - return 0; -} - -typedef struct MSIRouteEntry MSIRouteEntry; - -struct MSIRouteEntry { - PCIDevice *dev; /* Device pointer */ - int vector; /* MSI/MSIX vector index */ - int virq; /* Virtual IRQ index */ - QLIST_ENTRY(MSIRouteEntry) list; -}; - -/* List of used GSI routes */ -static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ - QLIST_HEAD_INITIALIZER(msi_route_list); - -static void kvm_update_msi_routes_all(void *private, bool global, - uint32_t index, uint32_t mask) -{ - int cnt = 0; - MSIRouteEntry *entry; - MSIMessage msg; - PCIDevice *dev; - - /* TODO: explicit route update */ - QLIST_FOREACH(entry, &msi_route_list, list) { - cnt++; - dev = entry->dev; - if (!msix_enabled(dev) && !msi_enabled(dev)) { - continue; - } - msg = pci_get_msi_message(dev, entry->vector); - kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); - } - kvm_irqchip_commit_routes(kvm_state); - trace_kvm_x86_update_msi_routes(cnt); -} - -int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, - int vector, PCIDevice *dev) -{ - static bool notify_list_inited = false; - MSIRouteEntry *entry; - - if (!dev) { - /* These are (possibly) IOAPIC routes only used for split - * kernel irqchip mode, while what we are housekeeping are - * PCI devices only. */ - return 0; - } - - entry = g_new0(MSIRouteEntry, 1); - entry->dev = dev; - entry->vector = vector; - entry->virq = route->gsi; - QLIST_INSERT_HEAD(&msi_route_list, entry, list); - - trace_kvm_x86_add_msi_route(route->gsi); - - if (!notify_list_inited) { - /* For the first time we do add route, add ourselves into - * IOMMU's IEC notify list if needed. */ - X86IOMMUState *iommu = x86_iommu_get_default(); - if (iommu) { - x86_iommu_iec_register_notifier(iommu, - kvm_update_msi_routes_all, - NULL); - } - notify_list_inited = true; - } - return 0; -} - -int kvm_arch_release_virq_post(int virq) -{ - MSIRouteEntry *entry, *next; - QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) { - if (entry->virq == virq) { - trace_kvm_x86_remove_msi_route(virq); - QLIST_REMOVE(entry, list); - g_free(entry); - break; - } - } - return 0; -} - -int kvm_arch_msi_data_to_gsi(uint32_t data) -{ - abort(); -} diff --git a/target/i386/hyperv-proto.h b/target/i386/kvm/hyperv-proto.h index d6d5a79293..464fbf09e3 100644 --- a/target/i386/hyperv-proto.h +++ b/target/i386/kvm/hyperv-proto.h @@ -1,7 +1,7 @@ /* - * Definitions for Hyper-V guest/hypervisor interaction + * Definitions for Hyper-V guest/hypervisor interaction - x86-specific part * - * Copyright (C) 2017 Parallels International GmbH + * Copyright (c) 2017-2018 Virtuozzo International GmbH. * * This work is licensed under the terms of the GNU GPL, version 2 or later. * See the COPYING file in the top-level directory. @@ -10,7 +10,7 @@ #ifndef TARGET_I386_HYPERV_PROTO_H #define TARGET_I386_HYPERV_PROTO_H -#include "qemu/bitmap.h" +#include "hw/hyperv/hyperv-proto.h" #define HV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000 #define HV_CPUID_INTERFACE 0x40000001 @@ -18,6 +18,10 @@ #define HV_CPUID_FEATURES 0x40000003 #define HV_CPUID_ENLIGHTMENT_INFO 0x40000004 #define HV_CPUID_IMPLEMENT_LIMITS 0x40000005 +#define HV_CPUID_NESTED_FEATURES 0x4000000A +#define HV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080 +#define HV_CPUID_SYNDBG_INTERFACE 0x40000081 +#define HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082 #define HV_CPUID_MIN 0x40000005 #define HV_CPUID_MAX 0x4000ffff #define HV_HYPERVISOR_PRESENT_BIT 0x80000000 @@ -38,16 +42,30 @@ #define HV_ACCESS_REENLIGHTENMENTS_CONTROL (1u << 13) /* + * HV_CPUID_FEATURES.EBX bits + */ +#define HV_POST_MESSAGES (1u << 4) +#define HV_SIGNAL_EVENTS (1u << 5) + +/* * HV_CPUID_FEATURES.EDX bits */ #define HV_MWAIT_AVAILABLE (1u << 0) #define HV_GUEST_DEBUGGING_AVAILABLE (1u << 1) #define HV_PERF_MONITOR_AVAILABLE (1u << 2) #define HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE (1u << 3) -#define HV_HYPERCALL_PARAMS_XMM_AVAILABLE (1u << 4) +#define HV_HYPERCALL_XMM_INPUT_AVAILABLE (1u << 4) #define HV_GUEST_IDLE_STATE_AVAILABLE (1u << 5) #define HV_FREQUENCY_MSRS_AVAILABLE (1u << 8) #define HV_GUEST_CRASH_MSR_AVAILABLE (1u << 10) +#define HV_FEATURE_DEBUG_MSRS_AVAILABLE (1u << 11) +#define HV_EXT_GVA_RANGES_FLUSH_AVAILABLE (1u << 14) +#define HV_STIMER_DIRECT_MODE_AVAILABLE (1u << 19) + +/* + * HV_CPUID_FEATURES.EBX bits + */ +#define HV_PARTITION_DEBUGGING_ALLOWED (1u << 12) /* * HV_CPUID_ENLIGHTMENT_INFO.EAX bits @@ -58,7 +76,22 @@ #define HV_APIC_ACCESS_RECOMMENDED (1u << 3) #define HV_SYSTEM_RESET_RECOMMENDED (1u << 4) #define HV_RELAXED_TIMING_RECOMMENDED (1u << 5) +#define HV_DEPRECATING_AEOI_RECOMMENDED (1u << 9) +#define HV_CLUSTER_IPI_RECOMMENDED (1u << 10) #define HV_EX_PROCESSOR_MASKS_RECOMMENDED (1u << 11) +#define HV_ENLIGHTENED_VMCS_RECOMMENDED (1u << 14) +#define HV_NO_NONARCH_CORESHARING (1u << 18) + +/* + * HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX bits + */ +#define HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING (1u << 1) + +/* + * HV_CPUID_NESTED_FEATURES.EAX bits + */ +#define HV_NESTED_DIRECT_FLUSH (1u << 17) +#define HV_NESTED_MSR_BITMAP (1u << 19) /* * Basic virtualized MSRs @@ -119,6 +152,18 @@ #define HV_X64_MSR_STIMER3_COUNT 0x400000B7 /* + * Hyper-V Synthetic debug options MSR + */ +#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1 +#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2 +#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3 +#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4 +#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5 +#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF + +#define HV_X64_SYNDBG_OPTION_USE_HCALLS BIT(2) + +/* * Guest crash notification MSRs */ #define HV_X64_MSR_CRASH_P0 0x40000100 @@ -134,29 +179,11 @@ * Reenlightenment notification MSRs */ #define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106 +#define HV_REENLIGHTENMENT_ENABLE_BIT (1u << 16) #define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107 #define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108 /* - * Hypercall status code - */ -#define HV_STATUS_SUCCESS 0 -#define HV_STATUS_INVALID_HYPERCALL_CODE 2 -#define HV_STATUS_INVALID_HYPERCALL_INPUT 3 -#define HV_STATUS_INVALID_ALIGNMENT 4 -#define HV_STATUS_INVALID_PARAMETER 5 -#define HV_STATUS_INSUFFICIENT_MEMORY 11 -#define HV_STATUS_INVALID_CONNECTION_ID 18 -#define HV_STATUS_INSUFFICIENT_BUFFERS 19 - -/* - * Hypercall numbers - */ -#define HV_POST_MESSAGE 0x005c -#define HV_SIGNAL_EVENT 0x005d -#define HV_HYPERCALL_FAST (1u << 16) - -/* * Hypercall MSR bits */ #define HV_HYPERCALL_ENABLE (1u << 0) @@ -165,7 +192,6 @@ * Synthetic interrupt controller definitions */ #define HV_SYNIC_VERSION 1 -#define HV_SINT_COUNT 16 #define HV_SYNIC_ENABLE (1u << 0) #define HV_SIMP_ENABLE (1u << 0) #define HV_SIEFP_ENABLE (1u << 0) @@ -176,93 +202,15 @@ #define HV_STIMER_COUNT 4 /* - * Message size - */ -#define HV_MESSAGE_PAYLOAD_SIZE 240 - -/* - * Message types + * Synthetic debugger control definitions */ -#define HV_MESSAGE_NONE 0x00000000 -#define HV_MESSAGE_VMBUS 0x00000001 -#define HV_MESSAGE_UNMAPPED_GPA 0x80000000 -#define HV_MESSAGE_GPA_INTERCEPT 0x80000001 -#define HV_MESSAGE_TIMER_EXPIRED 0x80000010 -#define HV_MESSAGE_INVALID_VP_REGISTER_VALUE 0x80000020 -#define HV_MESSAGE_UNRECOVERABLE_EXCEPTION 0x80000021 -#define HV_MESSAGE_UNSUPPORTED_FEATURE 0x80000022 -#define HV_MESSAGE_EVENTLOG_BUFFERCOMPLETE 0x80000040 -#define HV_MESSAGE_X64_IOPORT_INTERCEPT 0x80010000 -#define HV_MESSAGE_X64_MSR_INTERCEPT 0x80010001 -#define HV_MESSAGE_X64_CPUID_INTERCEPT 0x80010002 -#define HV_MESSAGE_X64_EXCEPTION_INTERCEPT 0x80010003 -#define HV_MESSAGE_X64_APIC_EOI 0x80010004 -#define HV_MESSAGE_X64_LEGACY_FP_ERROR 0x80010005 - -/* - * Message flags - */ -#define HV_MESSAGE_FLAG_PENDING 0x1 - -/* - * Event flags number per SINT - */ -#define HV_EVENT_FLAGS_COUNT (256 * 8) - -/* - * Connection id valid bits - */ -#define HV_CONNECTION_ID_MASK 0x00ffffff - -/* - * Input structure for POST_MESSAGE hypercall - */ -struct hyperv_post_message_input { - uint32_t connection_id; - uint32_t _reserved; - uint32_t message_type; - uint32_t payload_size; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -/* - * Input structure for SIGNAL_EVENT hypercall - */ -struct hyperv_signal_event_input { - uint32_t connection_id; - uint16_t flag_number; - uint16_t _reserved_zero; -}; - -/* - * SynIC message structures - */ -struct hyperv_message_header { - uint32_t message_type; - uint8_t payload_size; - uint8_t message_flags; /* HV_MESSAGE_FLAG_XX */ - uint8_t _reserved[2]; - uint64_t sender; -}; - -struct hyperv_message { - struct hyperv_message_header header; - uint8_t payload[HV_MESSAGE_PAYLOAD_SIZE]; -}; - -struct hyperv_message_page { - struct hyperv_message slot[HV_SINT_COUNT]; -}; - -/* - * SynIC event flags structures - */ -struct hyperv_event_flags { - DECLARE_BITMAP(flags, HV_EVENT_FLAGS_COUNT); -}; - -struct hyperv_event_flags_page { - struct hyperv_event_flags slot[HV_SINT_COUNT]; -}; +#define HV_SYNDBG_CONTROL_SEND (1u << 0) +#define HV_SYNDBG_CONTROL_RECV (1u << 1) +#define HV_SYNDBG_CONTROL_SEND_SIZE(ctl) ((ctl >> 16) & 0xffff) +#define HV_SYNDBG_STATUS_INVALID (0) +#define HV_SYNDBG_STATUS_SEND_SUCCESS (1u << 0) +#define HV_SYNDBG_STATUS_RECV_SUCCESS (1u << 2) +#define HV_SYNDBG_STATUS_RESET (1u << 3) +#define HV_SYNDBG_STATUS_SET_SIZE(st, sz) (st | (sz << 16)) #endif diff --git a/target/i386/kvm/hyperv-stub.c b/target/i386/kvm/hyperv-stub.c new file mode 100644 index 0000000000..3263dcf05d --- /dev/null +++ b/target/i386/kvm/hyperv-stub.c @@ -0,0 +1,58 @@ +/* + * Stubs for CONFIG_HYPERV=n + * + * Copyright (c) 2015-2018 Virtuozzo International GmbH. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + return -1; + } + + /* + * Tracking the changes in the MSRs is unnecessary as there are no + * users for them beside save/load, which is handled nicely by the + * generic MSR save/load code + */ + return 0; + case KVM_EXIT_HYPERV_HCALL: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + return 0; + case KVM_EXIT_HYPERV_SYNDBG: + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) { + return -1; + } + + return 0; + default: + return -1; + } +} +#endif + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + return -ENOSYS; +} + +void hyperv_x86_synic_reset(X86CPU *cpu) +{ +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ +} + +void hyperv_x86_set_vmbus_recommended_features_enabled(void) +{ +} diff --git a/target/i386/kvm/hyperv.c b/target/i386/kvm/hyperv.c new file mode 100644 index 0000000000..f2a3fe650a --- /dev/null +++ b/target/i386/kvm/hyperv.c @@ -0,0 +1,156 @@ +/* + * QEMU KVM Hyper-V support + * + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "hyperv.h" +#include "hw/hyperv/hyperv.h" +#include "hyperv-proto.h" + +int hyperv_x86_synic_add(X86CPU *cpu) +{ + hyperv_synic_add(CPU(cpu)); + return 0; +} + +/* + * All devices possibly using SynIC have to be reset before calling this to let + * them remove their SINT routes first. + */ +void hyperv_x86_synic_reset(X86CPU *cpu) +{ + hyperv_synic_reset(CPU(cpu)); +} + +void hyperv_x86_synic_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool enable = env->msr_hv_synic_control & HV_SYNIC_ENABLE; + hwaddr msg_page_addr = (env->msr_hv_synic_msg_page & HV_SIMP_ENABLE) ? + (env->msr_hv_synic_msg_page & TARGET_PAGE_MASK) : 0; + hwaddr event_page_addr = (env->msr_hv_synic_evt_page & HV_SIEFP_ENABLE) ? + (env->msr_hv_synic_evt_page & TARGET_PAGE_MASK) : 0; + hyperv_synic_update(CPU(cpu), enable, msg_page_addr, event_page_addr); +} + +static void async_synic_update(CPUState *cs, run_on_cpu_data data) +{ + bql_lock(); + hyperv_x86_synic_update(X86_CPU(cs)); + bql_unlock(); +} + +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit) +{ + CPUX86State *env = &cpu->env; + + switch (exit->type) { + case KVM_EXIT_HYPERV_SYNIC: + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + return -1; + } + + switch (exit->u.synic.msr) { + case HV_X64_MSR_SCONTROL: + env->msr_hv_synic_control = exit->u.synic.control; + break; + case HV_X64_MSR_SIMP: + env->msr_hv_synic_msg_page = exit->u.synic.msg_page; + break; + case HV_X64_MSR_SIEFP: + env->msr_hv_synic_evt_page = exit->u.synic.evt_page; + break; + default: + return -1; + } + + /* + * this will run in this cpu thread before it returns to KVM, but in a + * safe environment (i.e. when all cpus are quiescent) -- this is + * necessary because memory hierarchy is being changed + */ + async_safe_run_on_cpu(CPU(cpu), async_synic_update, RUN_ON_CPU_NULL); + + return 0; + case KVM_EXIT_HYPERV_HCALL: { + uint16_t code = exit->u.hcall.input & 0xffff; + bool fast = exit->u.hcall.input & HV_HYPERCALL_FAST; + uint64_t in_param = exit->u.hcall.params[0]; + uint64_t out_param = exit->u.hcall.params[1]; + + switch (code) { + case HV_POST_MESSAGE: + exit->u.hcall.result = hyperv_hcall_post_message(in_param, fast); + break; + case HV_SIGNAL_EVENT: + exit->u.hcall.result = hyperv_hcall_signal_event(in_param, fast); + break; + case HV_POST_DEBUG_DATA: + exit->u.hcall.result = + hyperv_hcall_post_dbg_data(in_param, out_param, fast); + break; + case HV_RETRIEVE_DEBUG_DATA: + exit->u.hcall.result = + hyperv_hcall_retreive_dbg_data(in_param, out_param, fast); + break; + case HV_RESET_DEBUG_SESSION: + exit->u.hcall.result = + hyperv_hcall_reset_dbg_session(out_param); + break; + default: + exit->u.hcall.result = HV_STATUS_INVALID_HYPERCALL_CODE; + } + return 0; + } + + case KVM_EXIT_HYPERV_SYNDBG: + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) { + return -1; + } + + switch (exit->u.syndbg.msr) { + case HV_X64_MSR_SYNDBG_CONTROL: { + uint64_t control = exit->u.syndbg.control; + env->msr_hv_syndbg_control = control; + env->msr_hv_syndbg_send_page = exit->u.syndbg.send_page; + env->msr_hv_syndbg_recv_page = exit->u.syndbg.recv_page; + exit->u.syndbg.status = HV_STATUS_SUCCESS; + if (control & HV_SYNDBG_CONTROL_SEND) { + exit->u.syndbg.status = + hyperv_syndbg_send(env->msr_hv_syndbg_send_page, + HV_SYNDBG_CONTROL_SEND_SIZE(control)); + } else if (control & HV_SYNDBG_CONTROL_RECV) { + exit->u.syndbg.status = + hyperv_syndbg_recv(env->msr_hv_syndbg_recv_page, + TARGET_PAGE_SIZE); + } + break; + } + case HV_X64_MSR_SYNDBG_PENDING_BUFFER: + env->msr_hv_syndbg_pending_page = exit->u.syndbg.pending_page; + hyperv_syndbg_set_pending_page(env->msr_hv_syndbg_pending_page); + break; + default: + return -1; + } + + return 0; + default: + return -1; + } +} + +void hyperv_x86_set_vmbus_recommended_features_enabled(void) +{ + hyperv_set_vmbus_recommended_features_enabled(); +} diff --git a/target/i386/kvm/hyperv.h b/target/i386/kvm/hyperv.h new file mode 100644 index 0000000000..e3982c8f4d --- /dev/null +++ b/target/i386/kvm/hyperv.h @@ -0,0 +1,31 @@ +/* + * QEMU KVM Hyper-V support + * + * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com> + * + * Authors: + * Andrey Smetanin <asmetanin@virtuozzo.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef TARGET_I386_HYPERV_H +#define TARGET_I386_HYPERV_H + +#include "cpu.h" +#include "sysemu/kvm.h" +#include "hw/hyperv/hyperv.h" + +#ifdef CONFIG_KVM +int kvm_hv_handle_exit(X86CPU *cpu, struct kvm_hyperv_exit *exit); +#endif + +int hyperv_x86_synic_add(X86CPU *cpu); +void hyperv_x86_synic_reset(X86CPU *cpu); +void hyperv_x86_synic_update(X86CPU *cpu); + +void hyperv_x86_set_vmbus_recommended_features_enabled(void); + +#endif diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c new file mode 100644 index 0000000000..9c791b7b05 --- /dev/null +++ b/target/i386/kvm/kvm-cpu.c @@ -0,0 +1,208 @@ +/* + * x86 KVM CPU type initialization + * + * Copyright 2021 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "host-cpu.h" +#include "kvm-cpu.h" +#include "qapi/error.h" +#include "sysemu/sysemu.h" +#include "hw/boards.h" + +#include "kvm_i386.h" +#include "hw/core/accel-cpu.h" + +static bool kvm_cpu_realizefn(CPUState *cs, Error **errp) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + /* + * The realize order is important, since x86_cpu_realize() checks if + * nothing else has been set by the user (or by accelerators) in + * cpu->ucode_rev and cpu->phys_bits, and updates the CPUID results in + * mwait.ecx. + * This accel realization code also assumes cpu features are already expanded. + * + * realize order: + * + * x86_cpu_realize(): + * -> x86_cpu_expand_features() + * -> cpu_exec_realizefn(): + * -> accel_cpu_common_realize() + * kvm_cpu_realizefn() -> host_cpu_realizefn() + * -> cpu_common_realizefn() + * -> check/update ucode_rev, phys_bits, mwait + */ + if (cpu->max_features) { + if (enable_cpu_pm && kvm_has_waitpkg()) { + env->features[FEAT_7_0_ECX] |= CPUID_7_0_ECX_WAITPKG; + } + if (cpu->ucode_rev == 0) { + cpu->ucode_rev = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_UCODE_REV); + } + } + return host_cpu_realizefn(cs, errp); +} + +static bool lmce_supported(void) +{ + uint64_t mce_cap = 0; + + if (kvm_ioctl(kvm_state, KVM_X86_GET_MCE_CAP_SUPPORTED, &mce_cap) < 0) { + return false; + } + return !!(mce_cap & MCG_LMCE_P); +} + +static void kvm_cpu_max_instance_init(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + KVMState *s = kvm_state; + + host_cpu_max_instance_init(cpu); + + if (lmce_supported()) { + object_property_set_bool(OBJECT(cpu), "lmce", true, &error_abort); + } + + env->cpuid_min_level = + kvm_arch_get_supported_cpuid(s, 0x0, 0, R_EAX); + env->cpuid_min_xlevel = + kvm_arch_get_supported_cpuid(s, 0x80000000, 0, R_EAX); + env->cpuid_min_xlevel2 = + kvm_arch_get_supported_cpuid(s, 0xC0000000, 0, R_EAX); +} + +static void kvm_cpu_xsave_init(void) +{ + static bool first = true; + uint32_t eax, ebx, ecx, edx; + int i; + + if (!first) { + return; + } + first = false; + + /* x87 and SSE states are in the legacy region of the XSAVE area. */ + x86_ext_save_areas[XSTATE_FP_BIT].offset = 0; + x86_ext_save_areas[XSTATE_SSE_BIT].offset = 0; + + for (i = XSTATE_SSE_BIT + 1; i < XSAVE_STATE_AREA_COUNT; i++) { + ExtSaveArea *esa = &x86_ext_save_areas[i]; + + if (!esa->size) { + continue; + } + if ((x86_cpu_get_supported_feature_word(esa->feature, false) & esa->bits) + != esa->bits) { + continue; + } + host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx); + if (eax != 0) { + assert(esa->size == eax); + esa->offset = ebx; + esa->ecx = ecx; + } + } +} + +/* + * KVM-specific features that are automatically added/removed + * from cpudef models when KVM is enabled. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + * + * NOTE: features can be enabled by default only if they were + * already available in the oldest kernel version supported + * by the KVM accelerator (see "OS requirements" section at + * docs/system/target-i386.rst) + */ +static PropValue kvm_default_props[] = { + { "kvmclock", "on" }, + { "kvm-nopiodelay", "on" }, + { "kvm-asyncpf", "on" }, + { "kvm-steal-time", "on" }, + { "kvm-pv-eoi", "on" }, + { "kvmclock-stable-bit", "on" }, + { "x2apic", "on" }, + { "kvm-msi-ext-dest-id", "off" }, + { "acpi", "off" }, + { "monitor", "off" }, + { "svm", "off" }, + { NULL, NULL }, +}; + +/* + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + */ +void x86_cpu_change_kvm_default(const char *prop, const char *value) +{ + PropValue *pv; + for (pv = kvm_default_props; pv->prop; pv++) { + if (!strcmp(pv->prop, prop)) { + pv->value = value; + break; + } + } + + /* + * It is valid to call this function only for properties that + * are already present in the kvm_default_props table. + */ + assert(pv->prop); +} + +static void kvm_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + + host_cpu_instance_init(cpu); + + if (xcc->model) { + /* only applies to builtin_x86_defs cpus */ + if (!kvm_irqchip_in_kernel()) { + x86_cpu_change_kvm_default("x2apic", "off"); + } else if (kvm_irqchip_is_split()) { + x86_cpu_change_kvm_default("kvm-msi-ext-dest-id", "on"); + } + + /* Special cases not set in the X86CPUDefinition structs: */ + x86_cpu_apply_props(cpu, kvm_default_props); + } + + if (cpu->max_features) { + kvm_cpu_max_instance_init(cpu); + } + + kvm_cpu_xsave_init(); +} + +static void kvm_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + + acc->cpu_target_realize = kvm_cpu_realizefn; + acc->cpu_instance_init = kvm_cpu_instance_init; +} +static const TypeInfo kvm_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("kvm"), + + .parent = TYPE_ACCEL_CPU, + .class_init = kvm_cpu_accel_class_init, + .abstract = true, +}; +static void kvm_cpu_accel_register_types(void) +{ + type_register_static(&kvm_cpu_accel_type_info); +} +type_init(kvm_cpu_accel_register_types); diff --git a/target/i386/kvm/kvm-cpu.h b/target/i386/kvm/kvm-cpu.h new file mode 100644 index 0000000000..e858ca21e5 --- /dev/null +++ b/target/i386/kvm/kvm-cpu.h @@ -0,0 +1,41 @@ +/* + * i386 KVM CPU type and functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef KVM_CPU_H +#define KVM_CPU_H + +#ifdef CONFIG_KVM +/* + * Change the value of a KVM-specific default + * + * If value is NULL, no default will be set and the original + * value from the CPU model table will be kept. + * + * It is valid to call this function only for properties that + * are already present in the kvm_default_props table. + */ +void x86_cpu_change_kvm_default(const char *prop, const char *value); + +#else /* !CONFIG_KVM */ + +#define x86_cpu_change_kvm_default(a, b) + +#endif /* CONFIG_KVM */ + +#endif /* KVM_CPU_H */ diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c new file mode 100644 index 0000000000..e68cbe9293 --- /dev/null +++ b/target/i386/kvm/kvm.c @@ -0,0 +1,5831 @@ +/* + * QEMU KVM support + * + * Copyright (C) 2006-2008 Qumranet Technologies + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qapi/qapi-events-run-state.h" +#include "qapi/error.h" +#include "qapi/visitor.h" +#include <sys/ioctl.h> +#include <sys/utsname.h> +#include <sys/syscall.h> + +#include <linux/kvm.h> +#include "standard-headers/asm-x86/kvm_para.h" +#include "hw/xen/interface/arch-x86/cpuid.h" + +#include "cpu.h" +#include "host-cpu.h" +#include "sysemu/sysemu.h" +#include "sysemu/hw_accel.h" +#include "sysemu/kvm_int.h" +#include "sysemu/runstate.h" +#include "kvm_i386.h" +#include "sev.h" +#include "xen-emu.h" +#include "hyperv.h" +#include "hyperv-proto.h" + +#include "exec/gdbstub.h" +#include "qemu/host-utils.h" +#include "qemu/main-loop.h" +#include "qemu/ratelimit.h" +#include "qemu/config-file.h" +#include "qemu/error-report.h" +#include "qemu/memalign.h" +#include "hw/i386/x86.h" +#include "hw/i386/kvm/xen_evtchn.h" +#include "hw/i386/pc.h" +#include "hw/i386/apic.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/intel_iommu.h" +#include "hw/i386/x86-iommu.h" +#include "hw/i386/e820_memory_layout.h" + +#include "hw/xen/xen.h" + +#include "hw/pci/pci.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "migration/blocker.h" +#include "exec/memattrs.h" +#include "trace.h" + +#include CONFIG_DEVICES + +//#define DEBUG_KVM + +#ifdef DEBUG_KVM +#define DPRINTF(fmt, ...) \ + do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) +#else +#define DPRINTF(fmt, ...) \ + do { } while (0) +#endif + +/* From arch/x86/kvm/lapic.h */ +#define KVM_APIC_BUS_CYCLE_NS 1 +#define KVM_APIC_BUS_FREQUENCY (1000000000ULL / KVM_APIC_BUS_CYCLE_NS) + +#define MSR_KVM_WALL_CLOCK 0x11 +#define MSR_KVM_SYSTEM_TIME 0x12 + +/* A 4096-byte buffer can hold the 8-byte kvm_msrs header, plus + * 255 kvm_msr_entry structs */ +#define MSR_BUF_SIZE 4096 + +static void kvm_init_msrs(X86CPU *cpu); + +const KVMCapabilityInfo kvm_arch_required_capabilities[] = { + KVM_CAP_INFO(SET_TSS_ADDR), + KVM_CAP_INFO(EXT_CPUID), + KVM_CAP_INFO(MP_STATE), + KVM_CAP_INFO(SIGNAL_MSI), + KVM_CAP_INFO(IRQ_ROUTING), + KVM_CAP_INFO(DEBUGREGS), + KVM_CAP_INFO(XSAVE), + KVM_CAP_INFO(VCPU_EVENTS), + KVM_CAP_INFO(X86_ROBUST_SINGLESTEP), + KVM_CAP_INFO(MCE), + KVM_CAP_INFO(ADJUST_CLOCK), + KVM_CAP_INFO(SET_IDENTITY_MAP_ADDR), + KVM_CAP_LAST_INFO +}; + +static bool has_msr_star; +static bool has_msr_hsave_pa; +static bool has_msr_tsc_aux; +static bool has_msr_tsc_adjust; +static bool has_msr_tsc_deadline; +static bool has_msr_feature_control; +static bool has_msr_misc_enable; +static bool has_msr_smbase; +static bool has_msr_bndcfgs; +static int lm_capable_kernel; +static bool has_msr_hv_hypercall; +static bool has_msr_hv_crash; +static bool has_msr_hv_reset; +static bool has_msr_hv_vpindex; +static bool hv_vpindex_settable; +static bool has_msr_hv_runtime; +static bool has_msr_hv_synic; +static bool has_msr_hv_stimer; +static bool has_msr_hv_frequencies; +static bool has_msr_hv_reenlightenment; +static bool has_msr_hv_syndbg_options; +static bool has_msr_xss; +static bool has_msr_umwait; +static bool has_msr_spec_ctrl; +static bool has_tsc_scale_msr; +static bool has_msr_tsx_ctrl; +static bool has_msr_virt_ssbd; +static bool has_msr_smi_count; +static bool has_msr_arch_capabs; +static bool has_msr_core_capabs; +static bool has_msr_vmx_vmfunc; +static bool has_msr_ucode_rev; +static bool has_msr_vmx_procbased_ctls2; +static bool has_msr_perf_capabs; +static bool has_msr_pkrs; + +static uint32_t has_architectural_pmu_version; +static uint32_t num_architectural_pmu_gp_counters; +static uint32_t num_architectural_pmu_fixed_counters; + +static int has_xsave2; +static int has_xcrs; +static int has_sregs2; +static int has_exception_payload; +static int has_triple_fault_event; + +static bool has_msr_mcg_ext_ctl; + +static struct kvm_cpuid2 *cpuid_cache; +static struct kvm_cpuid2 *hv_cpuid_cache; +static struct kvm_msr_list *kvm_feature_msrs; + +static KVMMSRHandlers msr_handlers[KVM_MSR_FILTER_MAX_RANGES]; + +#define BUS_LOCK_SLICE_TIME 1000000000ULL /* ns */ +static RateLimit bus_lock_ratelimit_ctrl; +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value); + +bool kvm_has_smm(void) +{ + return kvm_vm_check_extension(kvm_state, KVM_CAP_X86_SMM); +} + +bool kvm_has_adjust_clock_stable(void) +{ + int ret = kvm_check_extension(kvm_state, KVM_CAP_ADJUST_CLOCK); + + return (ret & KVM_CLOCK_TSC_STABLE); +} + +bool kvm_has_exception_payload(void) +{ + return has_exception_payload; +} + +static bool kvm_x2apic_api_set_flags(uint64_t flags) +{ + KVMState *s = KVM_STATE(current_accel()); + + return !kvm_vm_enable_cap(s, KVM_CAP_X2APIC_API, 0, flags); +} + +#define MEMORIZE(fn, _result) \ + ({ \ + static bool _memorized; \ + \ + if (_memorized) { \ + return _result; \ + } \ + _memorized = true; \ + _result = fn; \ + }) + +static bool has_x2apic_api; + +bool kvm_has_x2apic_api(void) +{ + return has_x2apic_api; +} + +bool kvm_enable_x2apic(void) +{ + return MEMORIZE( + kvm_x2apic_api_set_flags(KVM_X2APIC_API_USE_32BIT_IDS | + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK), + has_x2apic_api); +} + +bool kvm_hv_vpindex_settable(void) +{ + return hv_vpindex_settable; +} + +static int kvm_get_tsc(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint64_t value; + int ret; + + if (env->tsc_valid) { + return 0; + } + + env->tsc_valid = !runstate_is_running(); + + ret = kvm_get_one_msr(cpu, MSR_IA32_TSC, &value); + if (ret < 0) { + return ret; + } + + env->tsc = value; + return 0; +} + +static inline void do_kvm_synchronize_tsc(CPUState *cpu, run_on_cpu_data arg) +{ + kvm_get_tsc(cpu); +} + +void kvm_synchronize_all_tsc(void) +{ + CPUState *cpu; + + if (kvm_enabled()) { + CPU_FOREACH(cpu) { + run_on_cpu(cpu, do_kvm_synchronize_tsc, RUN_ON_CPU_NULL); + } + } +} + +static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) +{ + struct kvm_cpuid2 *cpuid; + int r, size; + + size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); + cpuid = g_malloc0(size); + cpuid->nent = max; + r = kvm_ioctl(s, KVM_GET_SUPPORTED_CPUID, cpuid); + if (r == 0 && cpuid->nent >= max) { + r = -E2BIG; + } + if (r < 0) { + if (r == -E2BIG) { + g_free(cpuid); + return NULL; + } else { + fprintf(stderr, "KVM_GET_SUPPORTED_CPUID failed: %s\n", + strerror(-r)); + exit(1); + } + } + return cpuid; +} + +/* Run KVM_GET_SUPPORTED_CPUID ioctl(), allocating a buffer large enough + * for all entries. + */ +static struct kvm_cpuid2 *get_supported_cpuid(KVMState *s) +{ + struct kvm_cpuid2 *cpuid; + int max = 1; + + if (cpuid_cache != NULL) { + return cpuid_cache; + } + while ((cpuid = try_get_cpuid(s, max)) == NULL) { + max *= 2; + } + cpuid_cache = cpuid; + return cpuid; +} + +static bool host_tsx_broken(void) +{ + int family, model, stepping;\ + char vendor[CPUID_VENDOR_SZ + 1]; + + host_cpu_vendor_fms(vendor, &family, &model, &stepping); + + /* Check if we are running on a Haswell host known to have broken TSX */ + return !strcmp(vendor, CPUID_VENDOR_INTEL) && + (family == 6) && + ((model == 63 && stepping < 4) || + model == 60 || model == 69 || model == 70); +} + +/* Returns the value for a specific register on the cpuid entry + */ +static uint32_t cpuid_entry_get_reg(struct kvm_cpuid_entry2 *entry, int reg) +{ + uint32_t ret = 0; + switch (reg) { + case R_EAX: + ret = entry->eax; + break; + case R_EBX: + ret = entry->ebx; + break; + case R_ECX: + ret = entry->ecx; + break; + case R_EDX: + ret = entry->edx; + break; + } + return ret; +} + +/* Find matching entry for function/index on kvm_cpuid2 struct + */ +static struct kvm_cpuid_entry2 *cpuid_find_entry(struct kvm_cpuid2 *cpuid, + uint32_t function, + uint32_t index) +{ + int i; + for (i = 0; i < cpuid->nent; ++i) { + if (cpuid->entries[i].function == function && + cpuid->entries[i].index == index) { + return &cpuid->entries[i]; + } + } + /* not found: */ + return NULL; +} + +uint32_t kvm_arch_get_supported_cpuid(KVMState *s, uint32_t function, + uint32_t index, int reg) +{ + struct kvm_cpuid2 *cpuid; + uint32_t ret = 0; + uint32_t cpuid_1_edx, unused; + uint64_t bitmask; + + cpuid = get_supported_cpuid(s); + + struct kvm_cpuid_entry2 *entry = cpuid_find_entry(cpuid, function, index); + if (entry) { + ret = cpuid_entry_get_reg(entry, reg); + } + + /* Fixups for the data returned by KVM, below */ + + if (function == 1 && reg == R_EDX) { + /* KVM before 2.6.30 misreports the following features */ + ret |= CPUID_MTRR | CPUID_PAT | CPUID_MCE | CPUID_MCA; + /* KVM never reports CPUID_HT but QEMU can support when vcpus > 1 */ + ret |= CPUID_HT; + } else if (function == 1 && reg == R_ECX) { + /* We can set the hypervisor flag, even if KVM does not return it on + * GET_SUPPORTED_CPUID + */ + ret |= CPUID_EXT_HYPERVISOR; + /* tsc-deadline flag is not returned by GET_SUPPORTED_CPUID, but it + * can be enabled if the kernel has KVM_CAP_TSC_DEADLINE_TIMER, + * and the irqchip is in the kernel. + */ + if (kvm_irqchip_in_kernel() && + kvm_check_extension(s, KVM_CAP_TSC_DEADLINE_TIMER)) { + ret |= CPUID_EXT_TSC_DEADLINE_TIMER; + } + + /* x2apic is reported by GET_SUPPORTED_CPUID, but it can't be enabled + * without the in-kernel irqchip + */ + if (!kvm_irqchip_in_kernel()) { + ret &= ~CPUID_EXT_X2APIC; + } + + if (enable_cpu_pm) { + int disable_exits = kvm_check_extension(s, + KVM_CAP_X86_DISABLE_EXITS); + + if (disable_exits & KVM_X86_DISABLE_EXITS_MWAIT) { + ret |= CPUID_EXT_MONITOR; + } + } + } else if (function == 6 && reg == R_EAX) { + ret |= CPUID_6_EAX_ARAT; /* safe to allow because of emulated APIC */ + } else if (function == 7 && index == 0 && reg == R_EBX) { + /* Not new instructions, just an optimization. */ + uint32_t ebx; + host_cpuid(7, 0, &unused, &ebx, &unused, &unused); + ret |= ebx & CPUID_7_0_EBX_ERMS; + + if (host_tsx_broken()) { + ret &= ~(CPUID_7_0_EBX_RTM | CPUID_7_0_EBX_HLE); + } + } else if (function == 7 && index == 0 && reg == R_EDX) { + /* Not new instructions, just an optimization. */ + uint32_t edx; + host_cpuid(7, 0, &unused, &unused, &unused, &edx); + ret |= edx & CPUID_7_0_EDX_FSRM; + + /* + * Linux v4.17-v4.20 incorrectly return ARCH_CAPABILITIES on SVM hosts. + * We can detect the bug by checking if MSR_IA32_ARCH_CAPABILITIES is + * returned by KVM_GET_MSR_INDEX_LIST. + */ + if (!has_msr_arch_capabs) { + ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES; + } + } else if (function == 7 && index == 1 && reg == R_EAX) { + /* Not new instructions, just an optimization. */ + uint32_t eax; + host_cpuid(7, 1, &eax, &unused, &unused, &unused); + ret |= eax & (CPUID_7_1_EAX_FZRM | CPUID_7_1_EAX_FSRS | CPUID_7_1_EAX_FSRC); + } else if (function == 7 && index == 2 && reg == R_EDX) { + uint32_t edx; + host_cpuid(7, 2, &unused, &unused, &unused, &edx); + ret |= edx & CPUID_7_2_EDX_MCDT_NO; + } else if (function == 0xd && index == 0 && + (reg == R_EAX || reg == R_EDX)) { + /* + * The value returned by KVM_GET_SUPPORTED_CPUID does not include + * features that still have to be enabled with the arch_prctl + * system call. QEMU needs the full value, which is retrieved + * with KVM_GET_DEVICE_ATTR. + */ + struct kvm_device_attr attr = { + .group = 0, + .attr = KVM_X86_XCOMP_GUEST_SUPP, + .addr = (unsigned long) &bitmask + }; + + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES); + if (!sys_attr) { + return ret; + } + + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr); + if (rc < 0) { + if (rc != -ENXIO) { + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) " + "error: %d", rc); + } + return ret; + } + ret = (reg == R_EAX) ? bitmask : bitmask >> 32; + } else if (function == 0x80000001 && reg == R_ECX) { + /* + * It's safe to enable TOPOEXT even if it's not returned by + * GET_SUPPORTED_CPUID. Unconditionally enabling TOPOEXT here allows + * us to keep CPU models including TOPOEXT runnable on older kernels. + */ + ret |= CPUID_EXT3_TOPOEXT; + } else if (function == 0x80000001 && reg == R_EDX) { + /* On Intel, kvm returns cpuid according to the Intel spec, + * so add missing bits according to the AMD spec: + */ + cpuid_1_edx = kvm_arch_get_supported_cpuid(s, 1, 0, R_EDX); + ret |= cpuid_1_edx & CPUID_EXT2_AMD_ALIASES; + } else if (function == KVM_CPUID_FEATURES && reg == R_EAX) { + /* kvm_pv_unhalt is reported by GET_SUPPORTED_CPUID, but it can't + * be enabled without the in-kernel irqchip + */ + if (!kvm_irqchip_in_kernel()) { + ret &= ~(1U << KVM_FEATURE_PV_UNHALT); + } + if (kvm_irqchip_is_split()) { + ret |= 1U << KVM_FEATURE_MSI_EXT_DEST_ID; + } + } else if (function == KVM_CPUID_FEATURES && reg == R_EDX) { + ret |= 1U << KVM_HINTS_REALTIME; + } + + return ret; +} + +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index) +{ + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data = {}; + uint64_t value; + uint32_t ret, can_be_one, must_be_one; + + if (kvm_feature_msrs == NULL) { /* Host doesn't support feature MSRs */ + return 0; + } + + /* Check if requested MSR is supported feature MSR */ + int i; + for (i = 0; i < kvm_feature_msrs->nmsrs; i++) + if (kvm_feature_msrs->indices[i] == index) { + break; + } + if (i == kvm_feature_msrs->nmsrs) { + return 0; /* if the feature MSR is not supported, simply return 0 */ + } + + msr_data.info.nmsrs = 1; + msr_data.entries[0].index = index; + + ret = kvm_ioctl(s, KVM_GET_MSRS, &msr_data); + if (ret != 1) { + error_report("KVM get MSR (index=0x%x) feature failed, %s", + index, strerror(-ret)); + exit(1); + } + + value = msr_data.entries[0].data; + switch (index) { + case MSR_IA32_VMX_PROCBASED_CTLS2: + if (!has_msr_vmx_procbased_ctls2) { + /* KVM forgot to add these bits for some time, do this ourselves. */ + if (kvm_arch_get_supported_cpuid(s, 0xD, 1, R_ECX) & + CPUID_XSAVE_XSAVES) { + value |= (uint64_t)VMX_SECONDARY_EXEC_XSAVES << 32; + } + if (kvm_arch_get_supported_cpuid(s, 1, 0, R_ECX) & + CPUID_EXT_RDRAND) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDRAND_EXITING << 32; + } + if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & + CPUID_7_0_EBX_INVPCID) { + value |= (uint64_t)VMX_SECONDARY_EXEC_ENABLE_INVPCID << 32; + } + if (kvm_arch_get_supported_cpuid(s, 7, 0, R_EBX) & + CPUID_7_0_EBX_RDSEED) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDSEED_EXITING << 32; + } + if (kvm_arch_get_supported_cpuid(s, 0x80000001, 0, R_EDX) & + CPUID_EXT2_RDTSCP) { + value |= (uint64_t)VMX_SECONDARY_EXEC_RDTSCP << 32; + } + } + /* fall through */ + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + /* + * Return true for bits that can be one, but do not have to be one. + * The SDM tells us which bits could have a "must be one" setting, + * so we can do the opposite transformation in make_vmx_msr_value. + */ + must_be_one = (uint32_t)value; + can_be_one = (uint32_t)(value >> 32); + return can_be_one & ~must_be_one; + + default: + return value; + } +} + +static int kvm_get_mce_cap_supported(KVMState *s, uint64_t *mce_cap, + int *max_banks) +{ + *max_banks = kvm_check_extension(s, KVM_CAP_MCE); + return kvm_ioctl(s, KVM_X86_GET_MCE_CAP_SUPPORTED, mce_cap); +} + +static void kvm_mce_inject(X86CPU *cpu, hwaddr paddr, int code) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN | + MCI_STATUS_MISCV | MCI_STATUS_ADDRV | MCI_STATUS_S; + uint64_t mcg_status = MCG_STATUS_MCIP; + int flags = 0; + + if (code == BUS_MCEERR_AR) { + status |= MCI_STATUS_AR | 0x134; + mcg_status |= MCG_STATUS_RIPV | MCG_STATUS_EIPV; + } else { + status |= 0xc0; + mcg_status |= MCG_STATUS_RIPV; + } + + flags = cpu_x86_support_mca_broadcast(env) ? MCE_INJECT_BROADCAST : 0; + /* We need to read back the value of MSR_EXT_MCG_CTL that was set by the + * guest kernel back into env->mcg_ext_ctl. + */ + cpu_synchronize_state(cs); + if (env->mcg_ext_ctl & MCG_EXT_CTL_LMCE_EN) { + mcg_status |= MCG_STATUS_LMCE; + flags = 0; + } + + cpu_x86_inject_mce(NULL, cpu, 9, status, mcg_status, paddr, + (MCM_ADDR_PHYS << 6) | 0xc, flags); +} + +static void emit_hypervisor_memory_failure(MemoryFailureAction action, bool ar) +{ + MemoryFailureFlags mff = {.action_required = ar, .recursive = false}; + + qapi_event_send_memory_failure(MEMORY_FAILURE_RECIPIENT_HYPERVISOR, action, + &mff); +} + +static void hardware_memory_error(void *host_addr) +{ + emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_FATAL, true); + error_report("QEMU got Hardware memory error at addr %p", host_addr); + exit(1); +} + +void kvm_arch_on_sigbus_vcpu(CPUState *c, int code, void *addr) +{ + X86CPU *cpu = X86_CPU(c); + CPUX86State *env = &cpu->env; + ram_addr_t ram_addr; + hwaddr paddr; + + /* If we get an action required MCE, it has been injected by KVM + * while the VM was running. An action optional MCE instead should + * be coming from the main thread, which qemu_init_sigbus identifies + * as the "early kill" thread. + */ + assert(code == BUS_MCEERR_AR || code == BUS_MCEERR_AO); + + if ((env->mcg_cap & MCG_SER_P) && addr) { + ram_addr = qemu_ram_addr_from_host(addr); + if (ram_addr != RAM_ADDR_INVALID && + kvm_physical_memory_addr_from_host(c->kvm_state, addr, &paddr)) { + kvm_hwpoison_page_add(ram_addr); + kvm_mce_inject(cpu, paddr, code); + + /* + * Use different logging severity based on error type. + * If there is additional MCE reporting on the hypervisor, QEMU VA + * could be another source to identify the PA and MCE details. + */ + if (code == BUS_MCEERR_AR) { + error_report("Guest MCE Memory Error at QEMU addr %p and " + "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", + addr, paddr, "BUS_MCEERR_AR"); + } else { + warn_report("Guest MCE Memory Error at QEMU addr %p and " + "GUEST addr 0x%" HWADDR_PRIx " of type %s injected", + addr, paddr, "BUS_MCEERR_AO"); + } + + return; + } + + if (code == BUS_MCEERR_AO) { + warn_report("Hardware memory error at addr %p of type %s " + "for memory used by QEMU itself instead of guest system!", + addr, "BUS_MCEERR_AO"); + } + } + + if (code == BUS_MCEERR_AR) { + hardware_memory_error(addr); + } + + /* Hope we are lucky for AO MCE, just notify a event */ + emit_hypervisor_memory_failure(MEMORY_FAILURE_ACTION_IGNORE, false); +} + +static void kvm_queue_exception(CPUX86State *env, + int32_t exception_nr, + uint8_t exception_has_payload, + uint64_t exception_payload) +{ + assert(env->exception_nr == -1); + assert(!env->exception_pending); + assert(!env->exception_injected); + assert(!env->exception_has_payload); + + env->exception_nr = exception_nr; + + if (has_exception_payload) { + env->exception_pending = 1; + + env->exception_has_payload = exception_has_payload; + env->exception_payload = exception_payload; + } else { + env->exception_injected = 1; + + if (exception_nr == EXCP01_DB) { + assert(exception_has_payload); + env->dr[6] = exception_payload; + } else if (exception_nr == EXCP0E_PAGE) { + assert(exception_has_payload); + env->cr[2] = exception_payload; + } else { + assert(!exception_has_payload); + } + } +} + +static void cpu_update_state(void *opaque, bool running, RunState state) +{ + CPUX86State *env = opaque; + + if (running) { + env->tsc_valid = false; + } +} + +unsigned long kvm_arch_vcpu_id(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + return cpu->apic_id; +} + +#ifndef KVM_CPUID_SIGNATURE_NEXT +#define KVM_CPUID_SIGNATURE_NEXT 0x40000100 +#endif + +static bool hyperv_enabled(X86CPU *cpu) +{ + return kvm_check_extension(kvm_state, KVM_CAP_HYPERV) > 0 && + ((cpu->hyperv_spinlock_attempts != HYPERV_SPINLOCK_NEVER_NOTIFY) || + cpu->hyperv_features || cpu->hyperv_passthrough); +} + +/* + * Check whether target_freq is within conservative + * ntp correctable bounds (250ppm) of freq + */ +static inline bool freq_within_bounds(int freq, int target_freq) +{ + int max_freq = freq + (freq * 250 / 1000000); + int min_freq = freq - (freq * 250 / 1000000); + + if (target_freq >= min_freq && target_freq <= max_freq) { + return true; + } + + return false; +} + +static int kvm_arch_set_tsc_khz(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int r, cur_freq; + bool set_ioctl = false; + + if (!env->tsc_khz) { + return 0; + } + + cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : -ENOTSUP; + + /* + * If TSC scaling is supported, attempt to set TSC frequency. + */ + if (kvm_check_extension(cs->kvm_state, KVM_CAP_TSC_CONTROL)) { + set_ioctl = true; + } + + /* + * If desired TSC frequency is within bounds of NTP correction, + * attempt to set TSC frequency. + */ + if (cur_freq != -ENOTSUP && freq_within_bounds(cur_freq, env->tsc_khz)) { + set_ioctl = true; + } + + r = set_ioctl ? + kvm_vcpu_ioctl(cs, KVM_SET_TSC_KHZ, env->tsc_khz) : + -ENOTSUP; + + if (r < 0) { + /* When KVM_SET_TSC_KHZ fails, it's an error only if the current + * TSC frequency doesn't match the one we want. + */ + cur_freq = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (cur_freq <= 0 || cur_freq != env->tsc_khz) { + warn_report("TSC frequency mismatch between " + "VM (%" PRId64 " kHz) and host (%d kHz), " + "and TSC scaling unavailable", + env->tsc_khz, cur_freq); + return r; + } + } + + return 0; +} + +static bool tsc_is_stable_and_known(CPUX86State *env) +{ + if (!env->tsc_khz) { + return false; + } + return (env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) + || env->user_tsc_khz; +} + +#define DEFAULT_EVMCS_VERSION ((1 << 8) | 1) + +static struct { + const char *desc; + struct { + uint32_t func; + int reg; + uint32_t bits; + } flags[2]; + uint64_t dependencies; +} kvm_hyperv_properties[] = { + [HYPERV_FEAT_RELAXED] = { + .desc = "relaxed timing (hv-relaxed)", + .flags = { + {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, + .bits = HV_RELAXED_TIMING_RECOMMENDED} + } + }, + [HYPERV_FEAT_VAPIC] = { + .desc = "virtual APIC (hv-vapic)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_APIC_ACCESS_AVAILABLE} + } + }, + [HYPERV_FEAT_TIME] = { + .desc = "clocksources (hv-time)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_TIME_REF_COUNT_AVAILABLE | HV_REFERENCE_TSC_AVAILABLE} + } + }, + [HYPERV_FEAT_CRASH] = { + .desc = "crash MSRs (hv-crash)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_GUEST_CRASH_MSR_AVAILABLE} + } + }, + [HYPERV_FEAT_RESET] = { + .desc = "reset MSR (hv-reset)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_RESET_AVAILABLE} + } + }, + [HYPERV_FEAT_VPINDEX] = { + .desc = "VP_INDEX MSR (hv-vpindex)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_VP_INDEX_AVAILABLE} + } + }, + [HYPERV_FEAT_RUNTIME] = { + .desc = "VP_RUNTIME MSR (hv-runtime)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_VP_RUNTIME_AVAILABLE} + } + }, + [HYPERV_FEAT_SYNIC] = { + .desc = "synthetic interrupt controller (hv-synic)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_SYNIC_AVAILABLE} + } + }, + [HYPERV_FEAT_STIMER] = { + .desc = "synthetic timers (hv-stimer)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_SYNTIMERS_AVAILABLE} + }, + .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_TIME) + }, + [HYPERV_FEAT_FREQUENCIES] = { + .desc = "frequency MSRs (hv-frequencies)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_ACCESS_FREQUENCY_MSRS}, + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_FREQUENCY_MSRS_AVAILABLE} + } + }, + [HYPERV_FEAT_REENLIGHTENMENT] = { + .desc = "reenlightenment MSRs (hv-reenlightenment)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EAX, + .bits = HV_ACCESS_REENLIGHTENMENTS_CONTROL} + } + }, + [HYPERV_FEAT_TLBFLUSH] = { + .desc = "paravirtualized TLB flush (hv-tlbflush)", + .flags = { + {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, + .bits = HV_REMOTE_TLB_FLUSH_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED} + }, + .dependencies = BIT(HYPERV_FEAT_VPINDEX) + }, + [HYPERV_FEAT_EVMCS] = { + .desc = "enlightened VMCS (hv-evmcs)", + .flags = { + {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, + .bits = HV_ENLIGHTENED_VMCS_RECOMMENDED} + }, + .dependencies = BIT(HYPERV_FEAT_VAPIC) + }, + [HYPERV_FEAT_IPI] = { + .desc = "paravirtualized IPI (hv-ipi)", + .flags = { + {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, + .bits = HV_CLUSTER_IPI_RECOMMENDED | + HV_EX_PROCESSOR_MASKS_RECOMMENDED} + }, + .dependencies = BIT(HYPERV_FEAT_VPINDEX) + }, + [HYPERV_FEAT_STIMER_DIRECT] = { + .desc = "direct mode synthetic timers (hv-stimer-direct)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_STIMER_DIRECT_MODE_AVAILABLE} + }, + .dependencies = BIT(HYPERV_FEAT_STIMER) + }, + [HYPERV_FEAT_AVIC] = { + .desc = "AVIC/APICv support (hv-avic/hv-apicv)", + .flags = { + {.func = HV_CPUID_ENLIGHTMENT_INFO, .reg = R_EAX, + .bits = HV_DEPRECATING_AEOI_RECOMMENDED} + } + }, +#ifdef CONFIG_SYNDBG + [HYPERV_FEAT_SYNDBG] = { + .desc = "Enable synthetic kernel debugger channel (hv-syndbg)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_FEATURE_DEBUG_MSRS_AVAILABLE} + }, + .dependencies = BIT(HYPERV_FEAT_SYNIC) | BIT(HYPERV_FEAT_RELAXED) + }, +#endif + [HYPERV_FEAT_MSR_BITMAP] = { + .desc = "enlightened MSR-Bitmap (hv-emsr-bitmap)", + .flags = { + {.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX, + .bits = HV_NESTED_MSR_BITMAP} + } + }, + [HYPERV_FEAT_XMM_INPUT] = { + .desc = "XMM fast hypercall input (hv-xmm-input)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_HYPERCALL_XMM_INPUT_AVAILABLE} + } + }, + [HYPERV_FEAT_TLBFLUSH_EXT] = { + .desc = "Extended gva ranges for TLB flush hypercalls (hv-tlbflush-ext)", + .flags = { + {.func = HV_CPUID_FEATURES, .reg = R_EDX, + .bits = HV_EXT_GVA_RANGES_FLUSH_AVAILABLE} + }, + .dependencies = BIT(HYPERV_FEAT_TLBFLUSH) + }, + [HYPERV_FEAT_TLBFLUSH_DIRECT] = { + .desc = "direct TLB flush (hv-tlbflush-direct)", + .flags = { + {.func = HV_CPUID_NESTED_FEATURES, .reg = R_EAX, + .bits = HV_NESTED_DIRECT_FLUSH} + }, + .dependencies = BIT(HYPERV_FEAT_VAPIC) + }, +}; + +static struct kvm_cpuid2 *try_get_hv_cpuid(CPUState *cs, int max, + bool do_sys_ioctl) +{ + struct kvm_cpuid2 *cpuid; + int r, size; + + size = sizeof(*cpuid) + max * sizeof(*cpuid->entries); + cpuid = g_malloc0(size); + cpuid->nent = max; + + if (do_sys_ioctl) { + r = kvm_ioctl(kvm_state, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + } else { + r = kvm_vcpu_ioctl(cs, KVM_GET_SUPPORTED_HV_CPUID, cpuid); + } + if (r == 0 && cpuid->nent >= max) { + r = -E2BIG; + } + if (r < 0) { + if (r == -E2BIG) { + g_free(cpuid); + return NULL; + } else { + fprintf(stderr, "KVM_GET_SUPPORTED_HV_CPUID failed: %s\n", + strerror(-r)); + exit(1); + } + } + return cpuid; +} + +/* + * Run KVM_GET_SUPPORTED_HV_CPUID ioctl(), allocating a buffer large enough + * for all entries. + */ +static struct kvm_cpuid2 *get_supported_hv_cpuid(CPUState *cs) +{ + struct kvm_cpuid2 *cpuid; + /* 0x40000000..0x40000005, 0x4000000A, 0x40000080..0x40000082 leaves */ + int max = 11; + int i; + bool do_sys_ioctl; + + do_sys_ioctl = + kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID) > 0; + + /* + * Non-empty KVM context is needed when KVM_CAP_SYS_HYPERV_CPUID is + * unsupported, kvm_hyperv_expand_features() checks for that. + */ + assert(do_sys_ioctl || cs->kvm_state); + + /* + * When the buffer is too small, KVM_GET_SUPPORTED_HV_CPUID fails with + * -E2BIG, however, it doesn't report back the right size. Keep increasing + * it and re-trying until we succeed. + */ + while ((cpuid = try_get_hv_cpuid(cs, max, do_sys_ioctl)) == NULL) { + max++; + } + + /* + * KVM_GET_SUPPORTED_HV_CPUID does not set EVMCS CPUID bit before + * KVM_CAP_HYPERV_ENLIGHTENED_VMCS is enabled but we want to get the + * information early, just check for the capability and set the bit + * manually. + */ + if (!do_sys_ioctl && kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) { + for (i = 0; i < cpuid->nent; i++) { + if (cpuid->entries[i].function == HV_CPUID_ENLIGHTMENT_INFO) { + cpuid->entries[i].eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + } + } + } + + return cpuid; +} + +/* + * When KVM_GET_SUPPORTED_HV_CPUID is not supported we fill CPUID feature + * leaves from KVM_CAP_HYPERV* and present MSRs data. + */ +static struct kvm_cpuid2 *get_supported_hv_cpuid_legacy(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + struct kvm_cpuid2 *cpuid; + struct kvm_cpuid_entry2 *entry_feat, *entry_recomm; + + /* HV_CPUID_FEATURES, HV_CPUID_ENLIGHTMENT_INFO */ + cpuid = g_malloc0(sizeof(*cpuid) + 2 * sizeof(*cpuid->entries)); + cpuid->nent = 2; + + /* HV_CPUID_VENDOR_AND_MAX_FUNCTIONS */ + entry_feat = &cpuid->entries[0]; + entry_feat->function = HV_CPUID_FEATURES; + + entry_recomm = &cpuid->entries[1]; + entry_recomm->function = HV_CPUID_ENLIGHTMENT_INFO; + entry_recomm->ebx = cpu->hyperv_spinlock_attempts; + + if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV) > 0) { + entry_feat->eax |= HV_HYPERCALL_AVAILABLE; + entry_feat->eax |= HV_APIC_ACCESS_AVAILABLE; + entry_feat->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; + entry_recomm->eax |= HV_RELAXED_TIMING_RECOMMENDED; + entry_recomm->eax |= HV_APIC_ACCESS_RECOMMENDED; + } + + if (kvm_check_extension(cs->kvm_state, KVM_CAP_HYPERV_TIME) > 0) { + entry_feat->eax |= HV_TIME_REF_COUNT_AVAILABLE; + entry_feat->eax |= HV_REFERENCE_TSC_AVAILABLE; + } + + if (has_msr_hv_frequencies) { + entry_feat->eax |= HV_ACCESS_FREQUENCY_MSRS; + entry_feat->edx |= HV_FREQUENCY_MSRS_AVAILABLE; + } + + if (has_msr_hv_crash) { + entry_feat->edx |= HV_GUEST_CRASH_MSR_AVAILABLE; + } + + if (has_msr_hv_reenlightenment) { + entry_feat->eax |= HV_ACCESS_REENLIGHTENMENTS_CONTROL; + } + + if (has_msr_hv_reset) { + entry_feat->eax |= HV_RESET_AVAILABLE; + } + + if (has_msr_hv_vpindex) { + entry_feat->eax |= HV_VP_INDEX_AVAILABLE; + } + + if (has_msr_hv_runtime) { + entry_feat->eax |= HV_VP_RUNTIME_AVAILABLE; + } + + if (has_msr_hv_synic) { + unsigned int cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + + if (kvm_check_extension(cs->kvm_state, cap) > 0) { + entry_feat->eax |= HV_SYNIC_AVAILABLE; + } + } + + if (has_msr_hv_stimer) { + entry_feat->eax |= HV_SYNTIMERS_AVAILABLE; + } + + if (has_msr_hv_syndbg_options) { + entry_feat->edx |= HV_GUEST_DEBUGGING_AVAILABLE; + entry_feat->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE; + entry_feat->ebx |= HV_PARTITION_DEBUGGING_ALLOWED; + } + + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_TLBFLUSH) > 0) { + entry_recomm->eax |= HV_REMOTE_TLB_FLUSH_RECOMMENDED; + entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_ENLIGHTENED_VMCS) > 0) { + entry_recomm->eax |= HV_ENLIGHTENED_VMCS_RECOMMENDED; + } + + if (kvm_check_extension(cs->kvm_state, + KVM_CAP_HYPERV_SEND_IPI) > 0) { + entry_recomm->eax |= HV_CLUSTER_IPI_RECOMMENDED; + entry_recomm->eax |= HV_EX_PROCESSOR_MASKS_RECOMMENDED; + } + + return cpuid; +} + +static uint32_t hv_cpuid_get_host(CPUState *cs, uint32_t func, int reg) +{ + struct kvm_cpuid_entry2 *entry; + struct kvm_cpuid2 *cpuid; + + if (hv_cpuid_cache) { + cpuid = hv_cpuid_cache; + } else { + if (kvm_check_extension(kvm_state, KVM_CAP_HYPERV_CPUID) > 0) { + cpuid = get_supported_hv_cpuid(cs); + } else { + /* + * 'cs->kvm_state' may be NULL when Hyper-V features are expanded + * before KVM context is created but this is only done when + * KVM_CAP_SYS_HYPERV_CPUID is supported and it implies + * KVM_CAP_HYPERV_CPUID. + */ + assert(cs->kvm_state); + + cpuid = get_supported_hv_cpuid_legacy(cs); + } + hv_cpuid_cache = cpuid; + } + + if (!cpuid) { + return 0; + } + + entry = cpuid_find_entry(cpuid, func, 0); + if (!entry) { + return 0; + } + + return cpuid_entry_get_reg(entry, reg); +} + +static bool hyperv_feature_supported(CPUState *cs, int feature) +{ + uint32_t func, bits; + int i, reg; + + for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties[feature].flags); i++) { + + func = kvm_hyperv_properties[feature].flags[i].func; + reg = kvm_hyperv_properties[feature].flags[i].reg; + bits = kvm_hyperv_properties[feature].flags[i].bits; + + if (!func) { + continue; + } + + if ((hv_cpuid_get_host(cs, func, reg) & bits) != bits) { + return false; + } + } + + return true; +} + +/* Checks that all feature dependencies are enabled */ +static bool hv_feature_check_deps(X86CPU *cpu, int feature, Error **errp) +{ + uint64_t deps; + int dep_feat; + + deps = kvm_hyperv_properties[feature].dependencies; + while (deps) { + dep_feat = ctz64(deps); + if (!(hyperv_feat_enabled(cpu, dep_feat))) { + error_setg(errp, "Hyper-V %s requires Hyper-V %s", + kvm_hyperv_properties[feature].desc, + kvm_hyperv_properties[dep_feat].desc); + return false; + } + deps &= ~(1ull << dep_feat); + } + + return true; +} + +static uint32_t hv_build_cpuid_leaf(CPUState *cs, uint32_t func, int reg) +{ + X86CPU *cpu = X86_CPU(cs); + uint32_t r = 0; + int i, j; + + for (i = 0; i < ARRAY_SIZE(kvm_hyperv_properties); i++) { + if (!hyperv_feat_enabled(cpu, i)) { + continue; + } + + for (j = 0; j < ARRAY_SIZE(kvm_hyperv_properties[i].flags); j++) { + if (kvm_hyperv_properties[i].flags[j].func != func) { + continue; + } + if (kvm_hyperv_properties[i].flags[j].reg != reg) { + continue; + } + + r |= kvm_hyperv_properties[i].flags[j].bits; + } + } + + /* HV_CPUID_NESTED_FEATURES.EAX also encodes the supported eVMCS range */ + if (func == HV_CPUID_NESTED_FEATURES && reg == R_EAX) { + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { + r |= DEFAULT_EVMCS_VERSION; + } + } + + return r; +} + +/* + * Expand Hyper-V CPU features. In partucular, check that all the requested + * features are supported by the host and the sanity of the configuration + * (that all the required dependencies are included). Also, this takes care + * of 'hv_passthrough' mode and fills the environment with all supported + * Hyper-V features. + */ +bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp) +{ + CPUState *cs = CPU(cpu); + Error *local_err = NULL; + int feat; + + if (!hyperv_enabled(cpu)) + return true; + + /* + * When kvm_hyperv_expand_features is called at CPU feature expansion + * time per-CPU kvm_state is not available yet so we can only proceed + * when KVM_CAP_SYS_HYPERV_CPUID is supported. + */ + if (!cs->kvm_state && + !kvm_check_extension(kvm_state, KVM_CAP_SYS_HYPERV_CPUID)) + return true; + + if (cpu->hyperv_passthrough) { + cpu->hyperv_vendor_id[0] = + hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EBX); + cpu->hyperv_vendor_id[1] = + hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_ECX); + cpu->hyperv_vendor_id[2] = + hv_cpuid_get_host(cs, HV_CPUID_VENDOR_AND_MAX_FUNCTIONS, R_EDX); + cpu->hyperv_vendor = g_realloc(cpu->hyperv_vendor, + sizeof(cpu->hyperv_vendor_id) + 1); + memcpy(cpu->hyperv_vendor, cpu->hyperv_vendor_id, + sizeof(cpu->hyperv_vendor_id)); + cpu->hyperv_vendor[sizeof(cpu->hyperv_vendor_id)] = 0; + + cpu->hyperv_interface_id[0] = + hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EAX); + cpu->hyperv_interface_id[1] = + hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EBX); + cpu->hyperv_interface_id[2] = + hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_ECX); + cpu->hyperv_interface_id[3] = + hv_cpuid_get_host(cs, HV_CPUID_INTERFACE, R_EDX); + + cpu->hyperv_ver_id_build = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EAX); + cpu->hyperv_ver_id_major = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) >> 16; + cpu->hyperv_ver_id_minor = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EBX) & 0xffff; + cpu->hyperv_ver_id_sp = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_ECX); + cpu->hyperv_ver_id_sb = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) >> 24; + cpu->hyperv_ver_id_sn = + hv_cpuid_get_host(cs, HV_CPUID_VERSION, R_EDX) & 0xffffff; + + cpu->hv_max_vps = hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, + R_EAX); + cpu->hyperv_limits[0] = + hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EBX); + cpu->hyperv_limits[1] = + hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_ECX); + cpu->hyperv_limits[2] = + hv_cpuid_get_host(cs, HV_CPUID_IMPLEMENT_LIMITS, R_EDX); + + cpu->hyperv_spinlock_attempts = + hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EBX); + + /* + * Mark feature as enabled in 'cpu->hyperv_features' as + * hv_build_cpuid_leaf() uses this info to build guest CPUIDs. + */ + for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { + if (hyperv_feature_supported(cs, feat)) { + cpu->hyperv_features |= BIT(feat); + } + } + } else { + /* Check features availability and dependencies */ + for (feat = 0; feat < ARRAY_SIZE(kvm_hyperv_properties); feat++) { + /* If the feature was not requested skip it. */ + if (!hyperv_feat_enabled(cpu, feat)) { + continue; + } + + /* Check if the feature is supported by KVM */ + if (!hyperv_feature_supported(cs, feat)) { + error_setg(errp, "Hyper-V %s is not supported by kernel", + kvm_hyperv_properties[feat].desc); + return false; + } + + /* Check dependencies */ + if (!hv_feature_check_deps(cpu, feat, &local_err)) { + error_propagate(errp, local_err); + return false; + } + } + } + + /* Additional dependencies not covered by kvm_hyperv_properties[] */ + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && + !cpu->hyperv_synic_kvm_only && + !hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX)) { + error_setg(errp, "Hyper-V %s requires Hyper-V %s", + kvm_hyperv_properties[HYPERV_FEAT_SYNIC].desc, + kvm_hyperv_properties[HYPERV_FEAT_VPINDEX].desc); + return false; + } + + return true; +} + +/* + * Fill in Hyper-V CPUIDs. Returns the number of entries filled in cpuid_ent. + */ +static int hyperv_fill_cpuids(CPUState *cs, + struct kvm_cpuid_entry2 *cpuid_ent) +{ + X86CPU *cpu = X86_CPU(cs); + struct kvm_cpuid_entry2 *c; + uint32_t signature[3]; + uint32_t cpuid_i = 0, max_cpuid_leaf = 0; + uint32_t nested_eax = + hv_build_cpuid_leaf(cs, HV_CPUID_NESTED_FEATURES, R_EAX); + + max_cpuid_leaf = nested_eax ? HV_CPUID_NESTED_FEATURES : + HV_CPUID_IMPLEMENT_LIMITS; + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) { + max_cpuid_leaf = + MAX(max_cpuid_leaf, HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES); + } + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_VENDOR_AND_MAX_FUNCTIONS; + c->eax = max_cpuid_leaf; + c->ebx = cpu->hyperv_vendor_id[0]; + c->ecx = cpu->hyperv_vendor_id[1]; + c->edx = cpu->hyperv_vendor_id[2]; + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_INTERFACE; + c->eax = cpu->hyperv_interface_id[0]; + c->ebx = cpu->hyperv_interface_id[1]; + c->ecx = cpu->hyperv_interface_id[2]; + c->edx = cpu->hyperv_interface_id[3]; + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_VERSION; + c->eax = cpu->hyperv_ver_id_build; + c->ebx = (uint32_t)cpu->hyperv_ver_id_major << 16 | + cpu->hyperv_ver_id_minor; + c->ecx = cpu->hyperv_ver_id_sp; + c->edx = (uint32_t)cpu->hyperv_ver_id_sb << 24 | + (cpu->hyperv_ver_id_sn & 0xffffff); + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_FEATURES; + c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EAX); + c->ebx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EBX); + c->edx = hv_build_cpuid_leaf(cs, HV_CPUID_FEATURES, R_EDX); + + /* Unconditionally required with any Hyper-V enlightenment */ + c->eax |= HV_HYPERCALL_AVAILABLE; + + /* SynIC and Vmbus devices require messages/signals hypercalls */ + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC) && + !cpu->hyperv_synic_kvm_only) { + c->ebx |= HV_POST_MESSAGES | HV_SIGNAL_EVENTS; + } + + + /* Not exposed by KVM but needed to make CPU hotplug in Windows work */ + c->edx |= HV_CPU_DYNAMIC_PARTITIONING_AVAILABLE; + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_ENLIGHTMENT_INFO; + c->eax = hv_build_cpuid_leaf(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX); + c->ebx = cpu->hyperv_spinlock_attempts; + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) && + !hyperv_feat_enabled(cpu, HYPERV_FEAT_AVIC)) { + c->eax |= HV_APIC_ACCESS_RECOMMENDED; + } + + if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_ON) { + c->eax |= HV_NO_NONARCH_CORESHARING; + } else if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO) { + c->eax |= hv_cpuid_get_host(cs, HV_CPUID_ENLIGHTMENT_INFO, R_EAX) & + HV_NO_NONARCH_CORESHARING; + } + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_IMPLEMENT_LIMITS; + c->eax = cpu->hv_max_vps; + c->ebx = cpu->hyperv_limits[0]; + c->ecx = cpu->hyperv_limits[1]; + c->edx = cpu->hyperv_limits[2]; + + if (nested_eax) { + uint32_t function; + + /* Create zeroed 0x40000006..0x40000009 leaves */ + for (function = HV_CPUID_IMPLEMENT_LIMITS + 1; + function < HV_CPUID_NESTED_FEATURES; function++) { + c = &cpuid_ent[cpuid_i++]; + c->function = function; + } + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_NESTED_FEATURES; + c->eax = nested_eax; + } + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG)) { + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS; + c->eax = hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS) ? + HV_CPUID_NESTED_FEATURES : HV_CPUID_IMPLEMENT_LIMITS; + memcpy(signature, "Microsoft VS", 12); + c->eax = 0; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_SYNDBG_INTERFACE; + memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12); + c->eax = signature[0]; + c->ebx = 0; + c->ecx = 0; + c->edx = 0; + + c = &cpuid_ent[cpuid_i++]; + c->function = HV_CPUID_SYNDBG_PLATFORM_CAPABILITIES; + c->eax = HV_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING; + c->ebx = 0; + c->ecx = 0; + c->edx = 0; + } + + return cpuid_i; +} + +static Error *hv_passthrough_mig_blocker; +static Error *hv_no_nonarch_cs_mig_blocker; + +/* Checks that the exposed eVMCS version range is supported by KVM */ +static bool evmcs_version_supported(uint16_t evmcs_version, + uint16_t supported_evmcs_version) +{ + uint8_t min_version = evmcs_version & 0xff; + uint8_t max_version = evmcs_version >> 8; + uint8_t min_supported_version = supported_evmcs_version & 0xff; + uint8_t max_supported_version = supported_evmcs_version >> 8; + + return (min_version >= min_supported_version) && + (max_version <= max_supported_version); +} + +static int hyperv_init_vcpu(X86CPU *cpu) +{ + CPUState *cs = CPU(cpu); + Error *local_err = NULL; + int ret; + + if (cpu->hyperv_passthrough && hv_passthrough_mig_blocker == NULL) { + error_setg(&hv_passthrough_mig_blocker, + "'hv-passthrough' CPU flag prevents migration, use explicit" + " set of hv-* flags instead"); + ret = migrate_add_blocker(&hv_passthrough_mig_blocker, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + } + + if (cpu->hyperv_no_nonarch_cs == ON_OFF_AUTO_AUTO && + hv_no_nonarch_cs_mig_blocker == NULL) { + error_setg(&hv_no_nonarch_cs_mig_blocker, + "'hv-no-nonarch-coresharing=auto' CPU flag prevents migration" + " use explicit 'hv-no-nonarch-coresharing=on' instead (but" + " make sure SMT is disabled and/or that vCPUs are properly" + " pinned)"); + ret = migrate_add_blocker(&hv_no_nonarch_cs_mig_blocker, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + } + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) && !hv_vpindex_settable) { + /* + * the kernel doesn't support setting vp_index; assert that its value + * is in sync + */ + uint64_t value; + + ret = kvm_get_one_msr(cpu, HV_X64_MSR_VP_INDEX, &value); + if (ret < 0) { + return ret; + } + + if (value != hyperv_vp_index(CPU(cpu))) { + error_report("kernel's vp_index != QEMU's vp_index"); + return -ENXIO; + } + } + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + uint32_t synic_cap = cpu->hyperv_synic_kvm_only ? + KVM_CAP_HYPERV_SYNIC : KVM_CAP_HYPERV_SYNIC2; + ret = kvm_vcpu_enable_cap(cs, synic_cap, 0); + if (ret < 0) { + error_report("failed to turn on HyperV SynIC in KVM: %s", + strerror(-ret)); + return ret; + } + + if (!cpu->hyperv_synic_kvm_only) { + ret = hyperv_x86_synic_add(cpu); + if (ret < 0) { + error_report("failed to create HyperV SynIC: %s", + strerror(-ret)); + return ret; + } + } + } + + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_EVMCS)) { + uint16_t evmcs_version = DEFAULT_EVMCS_VERSION; + uint16_t supported_evmcs_version; + + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENLIGHTENED_VMCS, 0, + (uintptr_t)&supported_evmcs_version); + + /* + * KVM is required to support EVMCS ver.1. as that's what 'hv-evmcs' + * option sets. Note: we hardcode the maximum supported eVMCS version + * to '1' as well so 'hv-evmcs' feature is migratable even when (and if) + * ver.2 is implemented. A new option (e.g. 'hv-evmcs=2') will then have + * to be added. + */ + if (ret < 0) { + error_report("Hyper-V %s is not supported by kernel", + kvm_hyperv_properties[HYPERV_FEAT_EVMCS].desc); + return ret; + } + + if (!evmcs_version_supported(evmcs_version, supported_evmcs_version)) { + error_report("eVMCS version range [%d..%d] is not supported by " + "kernel (supported: [%d..%d])", evmcs_version & 0xff, + evmcs_version >> 8, supported_evmcs_version & 0xff, + supported_evmcs_version >> 8); + return -ENOTSUP; + } + } + + if (cpu->hyperv_enforce_cpuid) { + ret = kvm_vcpu_enable_cap(cs, KVM_CAP_HYPERV_ENFORCE_CPUID, 0, 1); + if (ret < 0) { + error_report("failed to enable KVM_CAP_HYPERV_ENFORCE_CPUID: %s", + strerror(-ret)); + return ret; + } + } + + /* Skip SynIC and VP_INDEX since they are hard deps already */ + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_STIMER) && + hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC) && + hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) { + hyperv_x86_set_vmbus_recommended_features_enabled(); + } + + return 0; +} + +static Error *invtsc_mig_blocker; + +#define KVM_MAX_CPUID_ENTRIES 100 + +static void kvm_init_xsave(CPUX86State *env) +{ + if (has_xsave2) { + env->xsave_buf_len = QEMU_ALIGN_UP(has_xsave2, 4096); + } else { + env->xsave_buf_len = sizeof(struct kvm_xsave); + } + + env->xsave_buf = qemu_memalign(4096, env->xsave_buf_len); + memset(env->xsave_buf, 0, env->xsave_buf_len); + /* + * The allocated storage must be large enough for all of the + * possible XSAVE state components. + */ + assert(kvm_arch_get_supported_cpuid(kvm_state, 0xd, 0, R_ECX) <= + env->xsave_buf_len); +} + +static void kvm_init_nested_state(CPUX86State *env) +{ + struct kvm_vmx_nested_state_hdr *vmx_hdr; + uint32_t size; + + if (!env->nested_state) { + return; + } + + size = env->nested_state->size; + + memset(env->nested_state, 0, size); + env->nested_state->size = size; + + if (cpu_has_vmx(env)) { + env->nested_state->format = KVM_STATE_NESTED_FORMAT_VMX; + vmx_hdr = &env->nested_state->hdr.vmx; + vmx_hdr->vmxon_pa = -1ull; + vmx_hdr->vmcs12_pa = -1ull; + } else if (cpu_has_svm(env)) { + env->nested_state->format = KVM_STATE_NESTED_FORMAT_SVM; + } +} + +int kvm_arch_init_vcpu(CPUState *cs) +{ + struct { + struct kvm_cpuid2 cpuid; + struct kvm_cpuid_entry2 entries[KVM_MAX_CPUID_ENTRIES]; + } cpuid_data; + /* + * The kernel defines these structs with padding fields so there + * should be no extra padding in our cpuid_data struct. + */ + QEMU_BUILD_BUG_ON(sizeof(cpuid_data) != + sizeof(struct kvm_cpuid2) + + sizeof(struct kvm_cpuid_entry2) * KVM_MAX_CPUID_ENTRIES); + + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint32_t limit, i, j, cpuid_i; + uint32_t unused; + struct kvm_cpuid_entry2 *c; + uint32_t signature[3]; + int kvm_base = KVM_CPUID_SIGNATURE; + int max_nested_state_len; + int r; + Error *local_err = NULL; + + memset(&cpuid_data, 0, sizeof(cpuid_data)); + + cpuid_i = 0; + + has_xsave2 = kvm_check_extension(cs->kvm_state, KVM_CAP_XSAVE2); + + r = kvm_arch_set_tsc_khz(cs); + if (r < 0) { + return r; + } + + /* vcpu's TSC frequency is either specified by user, or following + * the value used by KVM if the former is not present. In the + * latter case, we query it from KVM and record in env->tsc_khz, + * so that vcpu's TSC frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + r = kvm_check_extension(cs->kvm_state, KVM_CAP_GET_TSC_KHZ) ? + kvm_vcpu_ioctl(cs, KVM_GET_TSC_KHZ) : + -ENOTSUP; + if (r > 0) { + env->tsc_khz = r; + } + } + + env->apic_bus_freq = KVM_APIC_BUS_FREQUENCY; + + /* + * kvm_hyperv_expand_features() is called here for the second time in case + * KVM_CAP_SYS_HYPERV_CPUID is not supported. While we can't possibly handle + * 'query-cpu-model-expansion' in this case as we don't have a KVM vCPU to + * check which Hyper-V enlightenments are supported and which are not, we + * can still proceed and check/expand Hyper-V enlightenments here so legacy + * behavior is preserved. + */ + if (!kvm_hyperv_expand_features(cpu, &local_err)) { + error_report_err(local_err); + return -ENOSYS; + } + + if (hyperv_enabled(cpu)) { + r = hyperv_init_vcpu(cpu); + if (r) { + return r; + } + + cpuid_i = hyperv_fill_cpuids(cs, cpuid_data.entries); + kvm_base = KVM_CPUID_SIGNATURE_NEXT; + has_msr_hv_hypercall = true; + } + + if (cs->kvm_state->xen_version) { +#ifdef CONFIG_XEN_EMU + struct kvm_cpuid_entry2 *xen_max_leaf; + + memcpy(signature, "XenVMMXenVMM", 12); + + xen_max_leaf = c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_SIGNATURE; + c->eax = kvm_base + XEN_CPUID_TIME; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_VENDOR; + c->eax = cs->kvm_state->xen_version; + c->ebx = 0; + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM_MSR; + /* Number of hypercall-transfer pages */ + c->eax = 1; + /* Hypercall MSR base address */ + if (hyperv_enabled(cpu)) { + c->ebx = XEN_HYPERCALL_MSR_HYPERV; + kvm_xen_init(cs->kvm_state, c->ebx); + } else { + c->ebx = XEN_HYPERCALL_MSR; + } + c->ecx = 0; + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_TIME; + c->eax = ((!!tsc_is_stable_and_known(env) << 1) | + (!!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP) << 2)); + /* default=0 (emulate if necessary) */ + c->ebx = 0; + /* guest tsc frequency */ + c->ecx = env->user_tsc_khz; + /* guest tsc incarnation (migration count) */ + c->edx = 0; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = kvm_base + XEN_CPUID_HVM; + xen_max_leaf->eax = kvm_base + XEN_CPUID_HVM; + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 5)) { + c->function = kvm_base + XEN_CPUID_HVM; + + if (cpu->xen_vapic) { + c->eax |= XEN_HVM_CPUID_APIC_ACCESS_VIRT; + c->eax |= XEN_HVM_CPUID_X2APIC_VIRT; + } + + c->eax |= XEN_HVM_CPUID_IOMMU_MAPPINGS; + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 6)) { + c->eax |= XEN_HVM_CPUID_VCPU_ID_PRESENT; + c->ebx = cs->cpu_index; + } + + if (cs->kvm_state->xen_version >= XEN_VERSION(4, 17)) { + c->eax |= XEN_HVM_CPUID_UPCALL_VECTOR; + } + } + + r = kvm_xen_init_vcpu(cs); + if (r) { + return r; + } + + kvm_base += 0x100; +#else /* CONFIG_XEN_EMU */ + /* This should never happen as kvm_arch_init() would have died first. */ + fprintf(stderr, "Cannot enable Xen CPUID without Xen support\n"); + abort(); +#endif + } else if (cpu->expose_kvm) { + memcpy(signature, "KVMKVMKVM\0\0\0", 12); + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | kvm_base; + c->eax = KVM_CPUID_FEATURES | kvm_base; + c->ebx = signature[0]; + c->ecx = signature[1]; + c->edx = signature[2]; + + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_FEATURES | kvm_base; + c->eax = env->features[FEAT_KVM]; + c->edx = env->features[FEAT_KVM_HINTS]; + } + + cpu_x86_cpuid(env, 0, 0, &limit, &unused, &unused, &unused); + + if (cpu->kvm_pv_enforce_cpuid) { + r = kvm_vcpu_enable_cap(cs, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 0, 1); + if (r < 0) { + fprintf(stderr, + "failed to enable KVM_CAP_ENFORCE_PV_FEATURE_CPUID: %s", + strerror(-r)); + abort(); + } + } + + for (i = 0; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported level value: 0x%x\n", limit); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + + switch (i) { + case 2: { + /* Keep reading function 2 till all the input is received */ + int times; + + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC | + KVM_CPUID_FLAG_STATE_READ_NEXT; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + times = c->eax & 0xff; + + for (j = 1; j < times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:2):eax & 0xf = 0x%x\n", times); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + c->function = i; + c->flags = KVM_CPUID_FLAG_STATEFUL_FUNC; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + break; + } + case 0x1f: + if (env->nr_dies < 2) { + cpuid_i--; + break; + } + /* fallthrough */ + case 4: + case 0xb: + case 0xd: + for (j = 0; ; j++) { + if (i == 0xd && j == 64) { + break; + } + + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (i == 4 && c->eax == 0) { + break; + } + if (i == 0xb && !(c->ecx & 0xff00)) { + break; + } + if (i == 0x1f && !(c->ecx & 0xff00)) { + break; + } + if (i == 0xd && c->eax == 0) { + continue; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + } + break; + case 0x12: + for (j = 0; ; j++) { + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (j > 1 && (c->eax & 0xf) != 1) { + break; + } + + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x12,ecx:0x%x)\n", j); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + } + break; + case 0x7: + case 0x14: + case 0x1d: + case 0x1e: { + uint32_t times; + + c->function = i; + c->index = 0; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + times = c->eax; + + for (j = 1; j <= times; ++j) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + c->function = i; + c->index = j; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + break; + } + default: + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + if (!c->eax && !c->ebx && !c->ecx && !c->edx) { + /* + * KVM already returns all zeroes if a CPUID entry is missing, + * so we can omit it and avoid hitting KVM's 80-entry limit. + */ + cpuid_i--; + } + break; + } + } + + if (limit >= 0x0a) { + uint32_t eax, edx; + + cpu_x86_cpuid(env, 0x0a, 0, &eax, &unused, &unused, &edx); + + has_architectural_pmu_version = eax & 0xff; + if (has_architectural_pmu_version > 0) { + num_architectural_pmu_gp_counters = (eax & 0xff00) >> 8; + + /* Shouldn't be more than 32, since that's the number of bits + * available in EBX to tell us _which_ counters are available. + * Play it safe. + */ + if (num_architectural_pmu_gp_counters > MAX_GP_COUNTERS) { + num_architectural_pmu_gp_counters = MAX_GP_COUNTERS; + } + + if (has_architectural_pmu_version > 1) { + num_architectural_pmu_fixed_counters = edx & 0x1f; + + if (num_architectural_pmu_fixed_counters > MAX_FIXED_COUNTERS) { + num_architectural_pmu_fixed_counters = MAX_FIXED_COUNTERS; + } + } + } + } + + cpu_x86_cpuid(env, 0x80000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0x80000000; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported xlevel value: 0x%x\n", limit); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + + switch (i) { + case 0x8000001d: + /* Query for all AMD cache information leaves */ + for (j = 0; ; j++) { + c->function = i; + c->flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; + c->index = j; + cpu_x86_cpuid(env, i, j, &c->eax, &c->ebx, &c->ecx, &c->edx); + + if (c->eax == 0) { + break; + } + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "cpuid_data is full, no space for " + "cpuid(eax:0x%x,ecx:0x%x)\n", i, j); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + } + break; + default: + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + if (!c->eax && !c->ebx && !c->ecx && !c->edx) { + /* + * KVM already returns all zeroes if a CPUID entry is missing, + * so we can omit it and avoid hitting KVM's 80-entry limit. + */ + cpuid_i--; + } + break; + } + } + + /* Call Centaur's CPUID instructions they are supported. */ + if (env->cpuid_xlevel2 > 0) { + cpu_x86_cpuid(env, 0xC0000000, 0, &limit, &unused, &unused, &unused); + + for (i = 0xC0000000; i <= limit; i++) { + if (cpuid_i == KVM_MAX_CPUID_ENTRIES) { + fprintf(stderr, "unsupported xlevel2 value: 0x%x\n", limit); + abort(); + } + c = &cpuid_data.entries[cpuid_i++]; + + c->function = i; + c->flags = 0; + cpu_x86_cpuid(env, i, 0, &c->eax, &c->ebx, &c->ecx, &c->edx); + } + } + + cpuid_data.cpuid.nent = cpuid_i; + + if (((env->cpuid_version >> 8)&0xF) >= 6 + && (env->features[FEAT_1_EDX] & (CPUID_MCE | CPUID_MCA)) == + (CPUID_MCE | CPUID_MCA)) { + uint64_t mcg_cap, unsupported_caps; + int banks; + int ret; + + ret = kvm_get_mce_cap_supported(cs->kvm_state, &mcg_cap, &banks); + if (ret < 0) { + fprintf(stderr, "kvm_get_mce_cap_supported: %s", strerror(-ret)); + return ret; + } + + if (banks < (env->mcg_cap & MCG_CAP_BANKS_MASK)) { + error_report("kvm: Unsupported MCE bank count (QEMU = %d, KVM = %d)", + (int)(env->mcg_cap & MCG_CAP_BANKS_MASK), banks); + return -ENOTSUP; + } + + unsupported_caps = env->mcg_cap & ~(mcg_cap | MCG_CAP_BANKS_MASK); + if (unsupported_caps) { + if (unsupported_caps & MCG_LMCE_P) { + error_report("kvm: LMCE not supported"); + return -ENOTSUP; + } + warn_report("Unsupported MCG_CAP bits: 0x%" PRIx64, + unsupported_caps); + } + + env->mcg_cap &= mcg_cap | MCG_CAP_BANKS_MASK; + ret = kvm_vcpu_ioctl(cs, KVM_X86_SETUP_MCE, &env->mcg_cap); + if (ret < 0) { + fprintf(stderr, "KVM_X86_SETUP_MCE: %s", strerror(-ret)); + return ret; + } + } + + cpu->vmsentry = qemu_add_vm_change_state_handler(cpu_update_state, env); + + c = cpuid_find_entry(&cpuid_data.cpuid, 1, 0); + if (c) { + has_msr_feature_control = !!(c->ecx & CPUID_EXT_VMX) || + !!(c->ecx & CPUID_EXT_SMX); + } + + c = cpuid_find_entry(&cpuid_data.cpuid, 7, 0); + if (c && (c->ebx & CPUID_7_0_EBX_SGX)) { + has_msr_feature_control = true; + } + + if (env->mcg_cap & MCG_LMCE_P) { + has_msr_mcg_ext_ctl = has_msr_feature_control = true; + } + + if (!env->user_tsc_khz) { + if ((env->features[FEAT_8000_0007_EDX] & CPUID_APM_INVTSC) && + invtsc_mig_blocker == NULL) { + error_setg(&invtsc_mig_blocker, + "State blocked by non-migratable CPU device" + " (invtsc flag)"); + r = migrate_add_blocker(&invtsc_mig_blocker, &local_err); + if (r < 0) { + error_report_err(local_err); + return r; + } + } + } + + if (cpu->vmware_cpuid_freq + /* Guests depend on 0x40000000 to detect this feature, so only expose + * it if KVM exposes leaf 0x40000000. (Conflicts with Hyper-V) */ + && cpu->expose_kvm + && kvm_base == KVM_CPUID_SIGNATURE + /* TSC clock must be stable and known for this feature. */ + && tsc_is_stable_and_known(env)) { + + c = &cpuid_data.entries[cpuid_i++]; + c->function = KVM_CPUID_SIGNATURE | 0x10; + c->eax = env->tsc_khz; + c->ebx = env->apic_bus_freq / 1000; /* Hz to KHz */ + c->ecx = c->edx = 0; + + c = cpuid_find_entry(&cpuid_data.cpuid, kvm_base, 0); + c->eax = MAX(c->eax, KVM_CPUID_SIGNATURE | 0x10); + } + + cpuid_data.cpuid.nent = cpuid_i; + + cpuid_data.cpuid.padding = 0; + r = kvm_vcpu_ioctl(cs, KVM_SET_CPUID2, &cpuid_data); + if (r) { + goto fail; + } + kvm_init_xsave(env); + + max_nested_state_len = kvm_max_nested_state_length(); + if (max_nested_state_len > 0) { + assert(max_nested_state_len >= offsetof(struct kvm_nested_state, data)); + + if (cpu_has_vmx(env) || cpu_has_svm(env)) { + env->nested_state = g_malloc0(max_nested_state_len); + env->nested_state->size = max_nested_state_len; + + kvm_init_nested_state(env); + } + } + + cpu->kvm_msr_buf = g_malloc0(MSR_BUF_SIZE); + + if (!(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_RDTSCP)) { + has_msr_tsc_aux = false; + } + + kvm_init_msrs(cpu); + + return 0; + + fail: + migrate_del_blocker(&invtsc_mig_blocker); + + return r; +} + +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + g_free(env->xsave_buf); + + g_free(cpu->kvm_msr_buf); + cpu->kvm_msr_buf = NULL; + + g_free(env->nested_state); + env->nested_state = NULL; + + qemu_del_vm_change_state_handler(cpu->vmsentry); + + return 0; +} + +void kvm_arch_reset_vcpu(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + env->xcr0 = 1; + if (kvm_irqchip_in_kernel()) { + env->mp_state = cpu_is_bsp(cpu) ? KVM_MP_STATE_RUNNABLE : + KVM_MP_STATE_UNINITIALIZED; + } else { + env->mp_state = KVM_MP_STATE_RUNNABLE; + } + + /* enabled by default */ + env->poll_control_msr = 1; + + kvm_init_nested_state(env); + + sev_es_set_reset_vector(CPU(cpu)); +} + +void kvm_arch_after_reset_vcpu(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int i; + + /* + * Reset SynIC after all other devices have been reset to let them remove + * their SINT routes first. + */ + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + for (i = 0; i < ARRAY_SIZE(env->msr_hv_synic_sint); i++) { + env->msr_hv_synic_sint[i] = HV_SINT_MASKED; + } + + hyperv_x86_synic_reset(cpu); + } +} + +void kvm_arch_do_init_vcpu(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + /* APs get directly into wait-for-SIPI state. */ + if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) { + env->mp_state = KVM_MP_STATE_INIT_RECEIVED; + } +} + +static int kvm_get_supported_feature_msrs(KVMState *s) +{ + int ret = 0; + + if (kvm_feature_msrs != NULL) { + return 0; + } + + if (!kvm_check_extension(s, KVM_CAP_GET_MSR_FEATURES)) { + return 0; + } + + struct kvm_msr_list msr_list; + + msr_list.nmsrs = 0; + ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, &msr_list); + if (ret < 0 && ret != -E2BIG) { + error_report("Fetch KVM feature MSR list failed: %s", + strerror(-ret)); + return ret; + } + + assert(msr_list.nmsrs > 0); + kvm_feature_msrs = g_malloc0(sizeof(msr_list) + + msr_list.nmsrs * sizeof(msr_list.indices[0])); + + kvm_feature_msrs->nmsrs = msr_list.nmsrs; + ret = kvm_ioctl(s, KVM_GET_MSR_FEATURE_INDEX_LIST, kvm_feature_msrs); + + if (ret < 0) { + error_report("Fetch KVM feature MSR list failed: %s", + strerror(-ret)); + g_free(kvm_feature_msrs); + kvm_feature_msrs = NULL; + return ret; + } + + return 0; +} + +static int kvm_get_supported_msrs(KVMState *s) +{ + int ret = 0; + struct kvm_msr_list msr_list, *kvm_msr_list; + + /* + * Obtain MSR list from KVM. These are the MSRs that we must + * save/restore. + */ + msr_list.nmsrs = 0; + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, &msr_list); + if (ret < 0 && ret != -E2BIG) { + return ret; + } + /* + * Old kernel modules had a bug and could write beyond the provided + * memory. Allocate at least a safe amount of 1K. + */ + kvm_msr_list = g_malloc0(MAX(1024, sizeof(msr_list) + + msr_list.nmsrs * + sizeof(msr_list.indices[0]))); + + kvm_msr_list->nmsrs = msr_list.nmsrs; + ret = kvm_ioctl(s, KVM_GET_MSR_INDEX_LIST, kvm_msr_list); + if (ret >= 0) { + int i; + + for (i = 0; i < kvm_msr_list->nmsrs; i++) { + switch (kvm_msr_list->indices[i]) { + case MSR_STAR: + has_msr_star = true; + break; + case MSR_VM_HSAVE_PA: + has_msr_hsave_pa = true; + break; + case MSR_TSC_AUX: + has_msr_tsc_aux = true; + break; + case MSR_TSC_ADJUST: + has_msr_tsc_adjust = true; + break; + case MSR_IA32_TSCDEADLINE: + has_msr_tsc_deadline = true; + break; + case MSR_IA32_SMBASE: + has_msr_smbase = true; + break; + case MSR_SMI_COUNT: + has_msr_smi_count = true; + break; + case MSR_IA32_MISC_ENABLE: + has_msr_misc_enable = true; + break; + case MSR_IA32_BNDCFGS: + has_msr_bndcfgs = true; + break; + case MSR_IA32_XSS: + has_msr_xss = true; + break; + case MSR_IA32_UMWAIT_CONTROL: + has_msr_umwait = true; + break; + case HV_X64_MSR_CRASH_CTL: + has_msr_hv_crash = true; + break; + case HV_X64_MSR_RESET: + has_msr_hv_reset = true; + break; + case HV_X64_MSR_VP_INDEX: + has_msr_hv_vpindex = true; + break; + case HV_X64_MSR_VP_RUNTIME: + has_msr_hv_runtime = true; + break; + case HV_X64_MSR_SCONTROL: + has_msr_hv_synic = true; + break; + case HV_X64_MSR_STIMER0_CONFIG: + has_msr_hv_stimer = true; + break; + case HV_X64_MSR_TSC_FREQUENCY: + has_msr_hv_frequencies = true; + break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + has_msr_hv_reenlightenment = true; + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + has_msr_hv_syndbg_options = true; + break; + case MSR_IA32_SPEC_CTRL: + has_msr_spec_ctrl = true; + break; + case MSR_AMD64_TSC_RATIO: + has_tsc_scale_msr = true; + break; + case MSR_IA32_TSX_CTRL: + has_msr_tsx_ctrl = true; + break; + case MSR_VIRT_SSBD: + has_msr_virt_ssbd = true; + break; + case MSR_IA32_ARCH_CAPABILITIES: + has_msr_arch_capabs = true; + break; + case MSR_IA32_CORE_CAPABILITY: + has_msr_core_capabs = true; + break; + case MSR_IA32_PERF_CAPABILITIES: + has_msr_perf_capabs = true; + break; + case MSR_IA32_VMX_VMFUNC: + has_msr_vmx_vmfunc = true; + break; + case MSR_IA32_UCODE_REV: + has_msr_ucode_rev = true; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + has_msr_vmx_procbased_ctls2 = true; + break; + case MSR_IA32_PKRS: + has_msr_pkrs = true; + break; + } + } + } + + g_free(kvm_msr_list); + + return ret; +} + +static bool kvm_rdmsr_core_thread_count(X86CPU *cpu, uint32_t msr, + uint64_t *val) +{ + CPUState *cs = CPU(cpu); + + *val = cs->nr_threads * cs->nr_cores; /* thread count, bits 15..0 */ + *val |= ((uint32_t)cs->nr_cores << 16); /* core count, bits 31..16 */ + + return true; +} + +static Notifier smram_machine_done; +static KVMMemoryListener smram_listener; +static AddressSpace smram_address_space; +static MemoryRegion smram_as_root; +static MemoryRegion smram_as_mem; + +static void register_smram_listener(Notifier *n, void *unused) +{ + MemoryRegion *smram = + (MemoryRegion *) object_resolve_path("/machine/smram", NULL); + + /* Outer container... */ + memory_region_init(&smram_as_root, OBJECT(kvm_state), "mem-container-smram", ~0ull); + memory_region_set_enabled(&smram_as_root, true); + + /* ... with two regions inside: normal system memory with low + * priority, and... + */ + memory_region_init_alias(&smram_as_mem, OBJECT(kvm_state), "mem-smram", + get_system_memory(), 0, ~0ull); + memory_region_add_subregion_overlap(&smram_as_root, 0, &smram_as_mem, 0); + memory_region_set_enabled(&smram_as_mem, true); + + if (smram) { + /* ... SMRAM with higher priority */ + memory_region_add_subregion_overlap(&smram_as_root, 0, smram, 10); + memory_region_set_enabled(smram, true); + } + + address_space_init(&smram_address_space, &smram_as_root, "KVM-SMRAM"); + kvm_memory_listener_register(kvm_state, &smram_listener, + &smram_address_space, 1, "kvm-smram"); +} + +int kvm_arch_get_default_type(MachineState *ms) +{ + return 0; +} + +int kvm_arch_init(MachineState *ms, KVMState *s) +{ + uint64_t identity_base = 0xfffbc000; + uint64_t shadow_mem; + int ret; + struct utsname utsname; + Error *local_err = NULL; + + /* + * Initialize SEV context, if required + * + * If no memory encryption is requested (ms->cgs == NULL) this is + * a no-op. + * + * It's also a no-op if a non-SEV confidential guest support + * mechanism is selected. SEV is the only mechanism available to + * select on x86 at present, so this doesn't arise, but if new + * mechanisms are supported in future (e.g. TDX), they'll need + * their own initialization either here or elsewhere. + */ + ret = sev_kvm_init(ms->cgs, &local_err); + if (ret < 0) { + error_report_err(local_err); + return ret; + } + + has_xcrs = kvm_check_extension(s, KVM_CAP_XCRS); + has_sregs2 = kvm_check_extension(s, KVM_CAP_SREGS2) > 0; + + hv_vpindex_settable = kvm_check_extension(s, KVM_CAP_HYPERV_VP_INDEX); + + has_exception_payload = kvm_check_extension(s, KVM_CAP_EXCEPTION_PAYLOAD); + if (has_exception_payload) { + ret = kvm_vm_enable_cap(s, KVM_CAP_EXCEPTION_PAYLOAD, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable exception payload cap: %s", + strerror(-ret)); + return ret; + } + } + + has_triple_fault_event = kvm_check_extension(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT); + if (has_triple_fault_event) { + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 0, true); + if (ret < 0) { + error_report("kvm: Failed to enable triple fault event cap: %s", + strerror(-ret)); + return ret; + } + } + + if (s->xen_version) { +#ifdef CONFIG_XEN_EMU + if (!object_dynamic_cast(OBJECT(ms), TYPE_PC_MACHINE)) { + error_report("kvm: Xen support only available in PC machine"); + return -ENOTSUP; + } + /* hyperv_enabled() doesn't work yet. */ + uint32_t msr = XEN_HYPERCALL_MSR; + ret = kvm_xen_init(s, msr); + if (ret < 0) { + return ret; + } +#else + error_report("kvm: Xen support not enabled in qemu"); + return -ENOTSUP; +#endif + } + + ret = kvm_get_supported_msrs(s); + if (ret < 0) { + return ret; + } + + kvm_get_supported_feature_msrs(s); + + uname(&utsname); + lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; + + /* + * On older Intel CPUs, KVM uses vm86 mode to emulate 16-bit code directly. + * In order to use vm86 mode, an EPT identity map and a TSS are needed. + * Since these must be part of guest physical memory, we need to allocate + * them, both by setting their start addresses in the kernel and by + * creating a corresponding e820 entry. We need 4 pages before the BIOS, + * so this value allows up to 16M BIOSes. + */ + identity_base = 0xfeffc000; + ret = kvm_vm_ioctl(s, KVM_SET_IDENTITY_MAP_ADDR, &identity_base); + if (ret < 0) { + return ret; + } + + /* Set TSS base one page after EPT identity map. */ + ret = kvm_vm_ioctl(s, KVM_SET_TSS_ADDR, identity_base + 0x1000); + if (ret < 0) { + return ret; + } + + /* Tell fw_cfg to notify the BIOS to reserve the range. */ + ret = e820_add_entry(identity_base, 0x4000, E820_RESERVED); + if (ret < 0) { + fprintf(stderr, "e820_add_entry() table is full\n"); + return ret; + } + + shadow_mem = object_property_get_int(OBJECT(s), "kvm-shadow-mem", &error_abort); + if (shadow_mem != -1) { + shadow_mem /= 4096; + ret = kvm_vm_ioctl(s, KVM_SET_NR_MMU_PAGES, shadow_mem); + if (ret < 0) { + return ret; + } + } + + if (kvm_check_extension(s, KVM_CAP_X86_SMM) && + object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE) && + x86_machine_is_smm_enabled(X86_MACHINE(ms))) { + smram_machine_done.notify = register_smram_listener; + qemu_add_machine_init_done_notifier(&smram_machine_done); + } + + if (enable_cpu_pm) { + int disable_exits = kvm_check_extension(s, KVM_CAP_X86_DISABLE_EXITS); +/* Work around for kernel header with a typo. TODO: fix header and drop. */ +#if defined(KVM_X86_DISABLE_EXITS_HTL) && !defined(KVM_X86_DISABLE_EXITS_HLT) +#define KVM_X86_DISABLE_EXITS_HLT KVM_X86_DISABLE_EXITS_HTL +#endif + if (disable_exits) { + disable_exits &= (KVM_X86_DISABLE_EXITS_MWAIT | + KVM_X86_DISABLE_EXITS_HLT | + KVM_X86_DISABLE_EXITS_PAUSE | + KVM_X86_DISABLE_EXITS_CSTATE); + } + + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_DISABLE_EXITS, 0, + disable_exits); + if (ret < 0) { + error_report("kvm: guest stopping CPU not supported: %s", + strerror(-ret)); + } + } + + if (object_dynamic_cast(OBJECT(ms), TYPE_X86_MACHINE)) { + X86MachineState *x86ms = X86_MACHINE(ms); + + if (x86ms->bus_lock_ratelimit > 0) { + ret = kvm_check_extension(s, KVM_CAP_X86_BUS_LOCK_EXIT); + if (!(ret & KVM_BUS_LOCK_DETECTION_EXIT)) { + error_report("kvm: bus lock detection unsupported"); + return -ENOTSUP; + } + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_BUS_LOCK_EXIT, 0, + KVM_BUS_LOCK_DETECTION_EXIT); + if (ret < 0) { + error_report("kvm: Failed to enable bus lock detection cap: %s", + strerror(-ret)); + return ret; + } + ratelimit_init(&bus_lock_ratelimit_ctrl); + ratelimit_set_speed(&bus_lock_ratelimit_ctrl, + x86ms->bus_lock_ratelimit, BUS_LOCK_SLICE_TIME); + } + } + + if (s->notify_vmexit != NOTIFY_VMEXIT_OPTION_DISABLE && + kvm_check_extension(s, KVM_CAP_X86_NOTIFY_VMEXIT)) { + uint64_t notify_window_flags = + ((uint64_t)s->notify_window << 32) | + KVM_X86_NOTIFY_VMEXIT_ENABLED | + KVM_X86_NOTIFY_VMEXIT_USER; + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_NOTIFY_VMEXIT, 0, + notify_window_flags); + if (ret < 0) { + error_report("kvm: Failed to enable notify vmexit cap: %s", + strerror(-ret)); + return ret; + } + } + if (kvm_vm_check_extension(s, KVM_CAP_X86_USER_SPACE_MSR)) { + bool r; + + ret = kvm_vm_enable_cap(s, KVM_CAP_X86_USER_SPACE_MSR, 0, + KVM_MSR_EXIT_REASON_FILTER); + if (ret) { + error_report("Could not enable user space MSRs: %s", + strerror(-ret)); + exit(1); + } + + r = kvm_filter_msr(s, MSR_CORE_THREAD_COUNT, + kvm_rdmsr_core_thread_count, NULL); + if (!r) { + error_report("Could not install MSR_CORE_THREAD_COUNT handler: %s", + strerror(-ret)); + exit(1); + } + } + + return 0; +} + +static void set_v8086_seg(struct kvm_segment *lhs, const SegmentCache *rhs) +{ + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->type = 3; + lhs->present = 1; + lhs->dpl = 3; + lhs->db = 0; + lhs->s = 1; + lhs->l = 0; + lhs->g = 0; + lhs->avl = 0; + lhs->unusable = 0; +} + +static void set_seg(struct kvm_segment *lhs, const SegmentCache *rhs) +{ + unsigned flags = rhs->flags; + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->type = (flags >> DESC_TYPE_SHIFT) & 15; + lhs->present = (flags & DESC_P_MASK) != 0; + lhs->dpl = (flags >> DESC_DPL_SHIFT) & 3; + lhs->db = (flags >> DESC_B_SHIFT) & 1; + lhs->s = (flags & DESC_S_MASK) != 0; + lhs->l = (flags >> DESC_L_SHIFT) & 1; + lhs->g = (flags & DESC_G_MASK) != 0; + lhs->avl = (flags & DESC_AVL_MASK) != 0; + lhs->unusable = !lhs->present; + lhs->padding = 0; +} + +static void get_seg(SegmentCache *lhs, const struct kvm_segment *rhs) +{ + lhs->selector = rhs->selector; + lhs->base = rhs->base; + lhs->limit = rhs->limit; + lhs->flags = (rhs->type << DESC_TYPE_SHIFT) | + ((rhs->present && !rhs->unusable) * DESC_P_MASK) | + (rhs->dpl << DESC_DPL_SHIFT) | + (rhs->db << DESC_B_SHIFT) | + (rhs->s * DESC_S_MASK) | + (rhs->l << DESC_L_SHIFT) | + (rhs->g * DESC_G_MASK) | + (rhs->avl * DESC_AVL_MASK); +} + +static void kvm_getput_reg(__u64 *kvm_reg, target_ulong *qemu_reg, int set) +{ + if (set) { + *kvm_reg = *qemu_reg; + } else { + *qemu_reg = *kvm_reg; + } +} + +static int kvm_getput_regs(X86CPU *cpu, int set) +{ + CPUX86State *env = &cpu->env; + struct kvm_regs regs; + int ret = 0; + + if (!set) { + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_REGS, ®s); + if (ret < 0) { + return ret; + } + } + + kvm_getput_reg(®s.rax, &env->regs[R_EAX], set); + kvm_getput_reg(®s.rbx, &env->regs[R_EBX], set); + kvm_getput_reg(®s.rcx, &env->regs[R_ECX], set); + kvm_getput_reg(®s.rdx, &env->regs[R_EDX], set); + kvm_getput_reg(®s.rsi, &env->regs[R_ESI], set); + kvm_getput_reg(®s.rdi, &env->regs[R_EDI], set); + kvm_getput_reg(®s.rsp, &env->regs[R_ESP], set); + kvm_getput_reg(®s.rbp, &env->regs[R_EBP], set); +#ifdef TARGET_X86_64 + kvm_getput_reg(®s.r8, &env->regs[8], set); + kvm_getput_reg(®s.r9, &env->regs[9], set); + kvm_getput_reg(®s.r10, &env->regs[10], set); + kvm_getput_reg(®s.r11, &env->regs[11], set); + kvm_getput_reg(®s.r12, &env->regs[12], set); + kvm_getput_reg(®s.r13, &env->regs[13], set); + kvm_getput_reg(®s.r14, &env->regs[14], set); + kvm_getput_reg(®s.r15, &env->regs[15], set); +#endif + + kvm_getput_reg(®s.rflags, &env->eflags, set); + kvm_getput_reg(®s.rip, &env->eip, set); + + if (set) { + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_REGS, ®s); + } + + return ret; +} + +static int kvm_put_xsave(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + void *xsave = env->xsave_buf; + + x86_cpu_xsave_all_areas(cpu, xsave, env->xsave_buf_len); + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XSAVE, xsave); +} + +static int kvm_put_xcrs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_xcrs xcrs = {}; + + if (!has_xcrs) { + return 0; + } + + xcrs.nr_xcrs = 1; + xcrs.flags = 0; + xcrs.xcrs[0].xcr = 0; + xcrs.xcrs[0].value = env->xcr0; + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_XCRS, &xcrs); +} + +static int kvm_put_sregs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs sregs; + + /* + * The interrupt_bitmap is ignored because KVM_SET_SREGS is + * always followed by KVM_SET_VCPU_EVENTS. + */ + memset(sregs.interrupt_bitmap, 0, sizeof(sregs.interrupt_bitmap)); + + if ((env->eflags & VM_MASK)) { + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + } else { + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); + } + + set_seg(&sregs.tr, &env->tr); + set_seg(&sregs.ldt, &env->ldt); + + sregs.idt.limit = env->idt.limit; + sregs.idt.base = env->idt.base; + memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); + sregs.gdt.limit = env->gdt.limit; + sregs.gdt.base = env->gdt.base; + memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); + + sregs.cr0 = env->cr[0]; + sregs.cr2 = env->cr[2]; + sregs.cr3 = env->cr[3]; + sregs.cr4 = env->cr[4]; + + sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); + sregs.apic_base = cpu_get_apic_base(cpu->apic_state); + + sregs.efer = env->efer; + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS, &sregs); +} + +static int kvm_put_sregs2(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs2 sregs; + int i; + + sregs.flags = 0; + + if ((env->eflags & VM_MASK)) { + set_v8086_seg(&sregs.cs, &env->segs[R_CS]); + set_v8086_seg(&sregs.ds, &env->segs[R_DS]); + set_v8086_seg(&sregs.es, &env->segs[R_ES]); + set_v8086_seg(&sregs.fs, &env->segs[R_FS]); + set_v8086_seg(&sregs.gs, &env->segs[R_GS]); + set_v8086_seg(&sregs.ss, &env->segs[R_SS]); + } else { + set_seg(&sregs.cs, &env->segs[R_CS]); + set_seg(&sregs.ds, &env->segs[R_DS]); + set_seg(&sregs.es, &env->segs[R_ES]); + set_seg(&sregs.fs, &env->segs[R_FS]); + set_seg(&sregs.gs, &env->segs[R_GS]); + set_seg(&sregs.ss, &env->segs[R_SS]); + } + + set_seg(&sregs.tr, &env->tr); + set_seg(&sregs.ldt, &env->ldt); + + sregs.idt.limit = env->idt.limit; + sregs.idt.base = env->idt.base; + memset(sregs.idt.padding, 0, sizeof sregs.idt.padding); + sregs.gdt.limit = env->gdt.limit; + sregs.gdt.base = env->gdt.base; + memset(sregs.gdt.padding, 0, sizeof sregs.gdt.padding); + + sregs.cr0 = env->cr[0]; + sregs.cr2 = env->cr[2]; + sregs.cr3 = env->cr[3]; + sregs.cr4 = env->cr[4]; + + sregs.cr8 = cpu_get_apic_tpr(cpu->apic_state); + sregs.apic_base = cpu_get_apic_base(cpu->apic_state); + + sregs.efer = env->efer; + + if (env->pdptrs_valid) { + for (i = 0; i < 4; i++) { + sregs.pdptrs[i] = env->pdptrs[i]; + } + sregs.flags |= KVM_SREGS2_FLAGS_PDPTRS_VALID; + } + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_SREGS2, &sregs); +} + + +static void kvm_msr_buf_reset(X86CPU *cpu) +{ + memset(cpu->kvm_msr_buf, 0, MSR_BUF_SIZE); +} + +static void kvm_msr_entry_add(X86CPU *cpu, uint32_t index, uint64_t value) +{ + struct kvm_msrs *msrs = cpu->kvm_msr_buf; + void *limit = ((void *)msrs) + MSR_BUF_SIZE; + struct kvm_msr_entry *entry = &msrs->entries[msrs->nmsrs]; + + assert((void *)(entry + 1) <= limit); + + entry->index = index; + entry->reserved = 0; + entry->data = value; + msrs->nmsrs++; +} + +static int kvm_put_one_msr(X86CPU *cpu, int index, uint64_t value) +{ + kvm_msr_buf_reset(cpu); + kvm_msr_entry_add(cpu, index, value); + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); +} + +static int kvm_get_one_msr(X86CPU *cpu, int index, uint64_t *value) +{ + int ret; + struct { + struct kvm_msrs info; + struct kvm_msr_entry entries[1]; + } msr_data = { + .info.nmsrs = 1, + .entries[0].index = index, + }; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, &msr_data); + if (ret < 0) { + return ret; + } + assert(ret == 1); + *value = msr_data.entries[0].data; + return ret; +} +void kvm_put_apicbase(X86CPU *cpu, uint64_t value) +{ + int ret; + + ret = kvm_put_one_msr(cpu, MSR_IA32_APICBASE, value); + assert(ret == 1); +} + +static int kvm_put_tscdeadline_msr(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int ret; + + if (!has_msr_tsc_deadline) { + return 0; + } + + ret = kvm_put_one_msr(cpu, MSR_IA32_TSCDEADLINE, env->tsc_deadline); + if (ret < 0) { + return ret; + } + + assert(ret == 1); + return 0; +} + +/* + * Provide a separate write service for the feature control MSR in order to + * kick the VCPU out of VMXON or even guest mode on reset. This has to be done + * before writing any other state because forcibly leaving nested mode + * invalidates the VCPU state. + */ +static int kvm_put_msr_feature_control(X86CPU *cpu) +{ + int ret; + + if (!has_msr_feature_control) { + return 0; + } + + ret = kvm_put_one_msr(cpu, MSR_IA32_FEATURE_CONTROL, + cpu->env.msr_ia32_feature_control); + if (ret < 0) { + return ret; + } + + assert(ret == 1); + return 0; +} + +static uint64_t make_vmx_msr_value(uint32_t index, uint32_t features) +{ + uint32_t default1, can_be_one, can_be_zero; + uint32_t must_be_one; + + switch (index) { + case MSR_IA32_VMX_TRUE_PINBASED_CTLS: + default1 = 0x00000016; + break; + case MSR_IA32_VMX_TRUE_PROCBASED_CTLS: + default1 = 0x0401e172; + break; + case MSR_IA32_VMX_TRUE_ENTRY_CTLS: + default1 = 0x000011ff; + break; + case MSR_IA32_VMX_TRUE_EXIT_CTLS: + default1 = 0x00036dff; + break; + case MSR_IA32_VMX_PROCBASED_CTLS2: + default1 = 0; + break; + default: + abort(); + } + + /* If a feature bit is set, the control can be either set or clear. + * Otherwise the value is limited to either 0 or 1 by default1. + */ + can_be_one = features | default1; + can_be_zero = features | ~default1; + must_be_one = ~can_be_zero; + + /* + * Bit 0:31 -> 0 if the control bit can be zero (i.e. 1 if it must be one). + * Bit 32:63 -> 1 if the control bit can be one. + */ + return must_be_one | (((uint64_t)can_be_one) << 32); +} + +static void kvm_msr_entry_add_vmx(X86CPU *cpu, FeatureWordArray f) +{ + uint64_t kvm_vmx_basic = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_BASIC); + + if (!kvm_vmx_basic) { + /* If the kernel doesn't support VMX feature (kvm_intel.nested=0), + * then kvm_vmx_basic will be 0 and KVM_SET_MSR will fail. + */ + return; + } + + uint64_t kvm_vmx_misc = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_MISC); + uint64_t kvm_vmx_ept_vpid = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_VMX_EPT_VPID_CAP); + + /* + * If the guest is 64-bit, a value of 1 is allowed for the host address + * space size vmexit control. + */ + uint64_t fixed_vmx_exit = f[FEAT_8000_0001_EDX] & CPUID_EXT2_LM + ? (uint64_t)VMX_VM_EXIT_HOST_ADDR_SPACE_SIZE << 32 : 0; + + /* + * Bits 0-30, 32-44 and 50-53 come from the host. KVM should + * not change them for backwards compatibility. + */ + uint64_t fixed_vmx_basic = kvm_vmx_basic & + (MSR_VMX_BASIC_VMCS_REVISION_MASK | + MSR_VMX_BASIC_VMXON_REGION_SIZE_MASK | + MSR_VMX_BASIC_VMCS_MEM_TYPE_MASK); + + /* + * Same for bits 0-4 and 25-27. Bits 16-24 (CR3 target count) can + * change in the future but are always zero for now, clear them to be + * future proof. Bits 32-63 in theory could change, though KVM does + * not support dual-monitor treatment and probably never will; mask + * them out as well. + */ + uint64_t fixed_vmx_misc = kvm_vmx_misc & + (MSR_VMX_MISC_PREEMPTION_TIMER_SHIFT_MASK | + MSR_VMX_MISC_MAX_MSR_LIST_SIZE_MASK); + + /* + * EPT memory types should not change either, so we do not bother + * adding features for them. + */ + uint64_t fixed_vmx_ept_mask = + (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_ENABLE_EPT ? + MSR_VMX_EPT_UC | MSR_VMX_EPT_WB : 0); + uint64_t fixed_vmx_ept_vpid = kvm_vmx_ept_vpid & fixed_vmx_ept_mask; + + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_PROCBASED_CTLS, + f[FEAT_VMX_PROCBASED_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_PINBASED_CTLS, + f[FEAT_VMX_PINBASED_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_EXIT_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_EXIT_CTLS, + f[FEAT_VMX_EXIT_CTLS]) | fixed_vmx_exit); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS, + make_vmx_msr_value(MSR_IA32_VMX_TRUE_ENTRY_CTLS, + f[FEAT_VMX_ENTRY_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_PROCBASED_CTLS2, + make_vmx_msr_value(MSR_IA32_VMX_PROCBASED_CTLS2, + f[FEAT_VMX_SECONDARY_CTLS])); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_EPT_VPID_CAP, + f[FEAT_VMX_EPT_VPID_CAPS] | fixed_vmx_ept_vpid); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_BASIC, + f[FEAT_VMX_BASIC] | fixed_vmx_basic); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_MISC, + f[FEAT_VMX_MISC] | fixed_vmx_misc); + if (has_msr_vmx_vmfunc) { + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMFUNC, f[FEAT_VMX_VMFUNC]); + } + + /* + * Just to be safe, write these with constant values. The CRn_FIXED1 + * MSRs are generated by KVM based on the vCPU's CPUID. + */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR0_FIXED0, + CR0_PE_MASK | CR0_PG_MASK | CR0_NE_MASK); + kvm_msr_entry_add(cpu, MSR_IA32_VMX_CR4_FIXED0, + CR4_VMXE_MASK); + + if (f[FEAT_VMX_SECONDARY_CTLS] & VMX_SECONDARY_EXEC_TSC_SCALING) { + /* TSC multiplier (0x2032). */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x32); + } else { + /* Preemption timer (0x482E). */ + kvm_msr_entry_add(cpu, MSR_IA32_VMX_VMCS_ENUM, 0x2E); + } +} + +static void kvm_msr_entry_add_perf(X86CPU *cpu, FeatureWordArray f) +{ + uint64_t kvm_perf_cap = + kvm_arch_get_supported_msr_feature(kvm_state, + MSR_IA32_PERF_CAPABILITIES); + + if (kvm_perf_cap) { + kvm_msr_entry_add(cpu, MSR_IA32_PERF_CAPABILITIES, + kvm_perf_cap & f[FEAT_PERF_CAPABILITIES]); + } +} + +static int kvm_buf_set_msrs(X86CPU *cpu) +{ + int ret = kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; + } + + if (ret < cpu->kvm_msr_buf->nmsrs) { + struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; + error_report("error: failed to set MSR 0x%" PRIx32 " to 0x%" PRIx64, + (uint32_t)e->index, (uint64_t)e->data); + } + + assert(ret == cpu->kvm_msr_buf->nmsrs); + return 0; +} + +static void kvm_init_msrs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + kvm_msr_buf_reset(cpu); + if (has_msr_arch_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_ARCH_CAPABILITIES, + env->features[FEAT_ARCH_CAPABILITIES]); + } + + if (has_msr_core_capabs) { + kvm_msr_entry_add(cpu, MSR_IA32_CORE_CAPABILITY, + env->features[FEAT_CORE_CAPABILITY]); + } + + if (has_msr_perf_capabs && cpu->enable_pmu) { + kvm_msr_entry_add_perf(cpu, env->features); + } + + if (has_msr_ucode_rev) { + kvm_msr_entry_add(cpu, MSR_IA32_UCODE_REV, cpu->ucode_rev); + } + + /* + * Older kernels do not include VMX MSRs in KVM_GET_MSR_INDEX_LIST, but + * all kernels with MSR features should have them. + */ + if (kvm_feature_msrs && cpu_has_vmx(env)) { + kvm_msr_entry_add_vmx(cpu, env->features); + } + + assert(kvm_buf_set_msrs(cpu) == 0); +} + +static int kvm_put_msrs(X86CPU *cpu, int level) +{ + CPUX86State *env = &cpu->env; + int i; + + kvm_msr_buf_reset(cpu); + + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, env->sysenter_cs); + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, env->sysenter_esp); + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, env->sysenter_eip); + kvm_msr_entry_add(cpu, MSR_PAT, env->pat); + if (has_msr_star) { + kvm_msr_entry_add(cpu, MSR_STAR, env->star); + } + if (has_msr_hsave_pa) { + kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, env->vm_hsave); + } + if (has_msr_tsc_aux) { + kvm_msr_entry_add(cpu, MSR_TSC_AUX, env->tsc_aux); + } + if (has_msr_tsc_adjust) { + kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, env->tsc_adjust); + } + if (has_msr_misc_enable) { + kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, + env->msr_ia32_misc_enable); + } + if (has_msr_smbase) { + kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, env->smbase); + } + if (has_msr_smi_count) { + kvm_msr_entry_add(cpu, MSR_SMI_COUNT, env->msr_smi_count); + } + if (has_msr_pkrs) { + kvm_msr_entry_add(cpu, MSR_IA32_PKRS, env->pkrs); + } + if (has_msr_bndcfgs) { + kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, env->msr_bndcfgs); + } + if (has_msr_xss) { + kvm_msr_entry_add(cpu, MSR_IA32_XSS, env->xss); + } + if (has_msr_umwait) { + kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, env->umwait); + } + if (has_msr_spec_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, env->spec_ctrl); + } + if (has_tsc_scale_msr) { + kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, env->amd_tsc_scale_msr); + } + + if (has_msr_tsx_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, env->tsx_ctrl); + } + if (has_msr_virt_ssbd) { + kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, env->virt_ssbd); + } + +#ifdef TARGET_X86_64 + if (lm_capable_kernel) { + kvm_msr_entry_add(cpu, MSR_CSTAR, env->cstar); + kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, env->kernelgsbase); + kvm_msr_entry_add(cpu, MSR_FMASK, env->fmask); + kvm_msr_entry_add(cpu, MSR_LSTAR, env->lstar); + } +#endif + + /* + * The following MSRs have side effects on the guest or are too heavy + * for normal writeback. Limit them to reset or full state updates. + */ + if (level >= KVM_PUT_RESET_STATE) { + kvm_msr_entry_add(cpu, MSR_IA32_TSC, env->tsc); + kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, env->system_time_msr); + kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, env->wall_clock_msr); + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) { + kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, env->async_pf_int_msr); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { + kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, env->async_pf_en_msr); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { + kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, env->pv_eoi_en_msr); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { + kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, env->steal_time_msr); + } + + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { + kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, env->poll_control_msr); + } + + if (has_architectural_pmu_version > 0) { + if (has_architectural_pmu_version > 1) { + /* Stop the counter. */ + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); + } + + /* Set the counter values. */ + for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, + env->msr_fixed_counters[i]); + } + for (i = 0; i < num_architectural_pmu_gp_counters; i++) { + kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, + env->msr_gp_counters[i]); + kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, + env->msr_gp_evtsel[i]); + } + if (has_architectural_pmu_version > 1) { + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, + env->msr_global_status); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, + env->msr_global_ovf_ctrl); + + /* Now start the PMU. */ + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, + env->msr_fixed_ctr_ctrl); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, + env->msr_global_ctrl); + } + } + /* + * Hyper-V partition-wide MSRs: to avoid clearing them on cpu hot-add, + * only sync them to KVM on the first cpu + */ + if (current_cpu == first_cpu) { + if (has_msr_hv_hypercall) { + kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, + env->msr_hv_guest_os_id); + kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, + env->msr_hv_hypercall); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, + env->msr_hv_tsc); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, + env->msr_hv_reenlightenment_control); + kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, + env->msr_hv_tsc_emulation_control); + kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, + env->msr_hv_tsc_emulation_status); + } +#ifdef CONFIG_SYNDBG + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNDBG) && + has_msr_hv_syndbg_options) { + kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS, + hyperv_syndbg_query_options()); + } +#endif + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, + env->msr_hv_vapic); + } + if (has_msr_hv_crash) { + int j; + + for (j = 0; j < HV_CRASH_PARAMS; j++) + kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, + env->msr_hv_crash_params[j]); + + kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_NOTIFY); + } + if (has_msr_hv_runtime) { + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, env->msr_hv_runtime); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VPINDEX) + && hv_vpindex_settable) { + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_INDEX, + hyperv_vp_index(CPU(cpu))); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + int j; + + kvm_msr_entry_add(cpu, HV_X64_MSR_SVERSION, HV_SYNIC_VERSION); + + kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, + env->msr_hv_synic_control); + kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, + env->msr_hv_synic_evt_page); + kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, + env->msr_hv_synic_msg_page); + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_synic_sint); j++) { + kvm_msr_entry_add(cpu, HV_X64_MSR_SINT0 + j, + env->msr_hv_synic_sint[j]); + } + } + if (has_msr_hv_stimer) { + int j; + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_config); j++) { + kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_CONFIG + j * 2, + env->msr_hv_stimer_config[j]); + } + + for (j = 0; j < ARRAY_SIZE(env->msr_hv_stimer_count); j++) { + kvm_msr_entry_add(cpu, HV_X64_MSR_STIMER0_COUNT + j * 2, + env->msr_hv_stimer_count[j]); + } + } + if (env->features[FEAT_1_EDX] & CPUID_MTRR) { + uint64_t phys_mask = MAKE_64BIT_MASK(0, cpu->phys_bits); + + kvm_msr_entry_add(cpu, MSR_MTRRdefType, env->mtrr_deftype); + kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, env->mtrr_fixed[0]); + kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, env->mtrr_fixed[1]); + kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, env->mtrr_fixed[2]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, env->mtrr_fixed[3]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, env->mtrr_fixed[4]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, env->mtrr_fixed[5]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, env->mtrr_fixed[6]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, env->mtrr_fixed[7]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, env->mtrr_fixed[8]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, env->mtrr_fixed[9]); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, env->mtrr_fixed[10]); + for (i = 0; i < MSR_MTRRcap_VCNT; i++) { + /* The CPU GPs if we write to a bit above the physical limit of + * the host CPU (and KVM emulates that) + */ + uint64_t mask = env->mtrr_var[i].mask; + mask &= phys_mask; + + kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), + env->mtrr_var[i].base); + kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), mask); + } + } + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { + int addr_num = kvm_arch_get_supported_cpuid(kvm_state, + 0x14, 1, R_EAX) & 0x7; + + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, + env->msr_rtit_ctrl); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, + env->msr_rtit_status); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, + env->msr_rtit_output_base); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, + env->msr_rtit_output_mask); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, + env->msr_rtit_cr3_match); + for (i = 0; i < addr_num; i++) { + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, + env->msr_rtit_addrs[i]); + } + } + + if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) { + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, + env->msr_ia32_sgxlepubkeyhash[0]); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, + env->msr_ia32_sgxlepubkeyhash[1]); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, + env->msr_ia32_sgxlepubkeyhash[2]); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, + env->msr_ia32_sgxlepubkeyhash[3]); + } + + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, + env->msr_xfd); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, + env->msr_xfd_err); + } + + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + uint64_t depth; + int ret; + + /* + * Only migrate Arch LBR states when the host Arch LBR depth + * equals that of source guest's, this is to avoid mismatch + * of guest/host config for the msr hence avoid unexpected + * misbehavior. + */ + ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth); + + if (ret == 1 && !!depth && depth == env->msr_lbr_depth) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, env->msr_lbr_ctl); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, env->msr_lbr_depth); + + for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) { + if (!env->lbr_records[i].from) { + continue; + } + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i, + env->lbr_records[i].from); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i, + env->lbr_records[i].to); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i, + env->lbr_records[i].info); + } + } + } + + /* Note: MSR_IA32_FEATURE_CONTROL is written separately, see + * kvm_put_msr_feature_control. */ + } + + if (env->mcg_cap) { + kvm_msr_entry_add(cpu, MSR_MCG_STATUS, env->mcg_status); + kvm_msr_entry_add(cpu, MSR_MCG_CTL, env->mcg_ctl); + if (has_msr_mcg_ext_ctl) { + kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, env->mcg_ext_ctl); + } + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { + kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, env->mce_banks[i]); + } + } + + return kvm_buf_set_msrs(cpu); +} + + +static int kvm_get_xsave(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + void *xsave = env->xsave_buf; + int type, ret; + + type = has_xsave2 ? KVM_GET_XSAVE2 : KVM_GET_XSAVE; + ret = kvm_vcpu_ioctl(CPU(cpu), type, xsave); + if (ret < 0) { + return ret; + } + x86_cpu_xrstor_all_areas(cpu, xsave, env->xsave_buf_len); + + return 0; +} + +static int kvm_get_xcrs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int i, ret; + struct kvm_xcrs xcrs; + + if (!has_xcrs) { + return 0; + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_XCRS, &xcrs); + if (ret < 0) { + return ret; + } + + for (i = 0; i < xcrs.nr_xcrs; i++) { + /* Only support xcr0 now */ + if (xcrs.xcrs[i].xcr == 0) { + env->xcr0 = xcrs.xcrs[i].value; + break; + } + } + return 0; +} + +static int kvm_get_sregs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs sregs; + int ret; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS, &sregs); + if (ret < 0) { + return ret; + } + + /* + * The interrupt_bitmap is ignored because KVM_GET_SREGS is + * always preceded by KVM_GET_VCPU_EVENTS. + */ + + get_seg(&env->segs[R_CS], &sregs.cs); + get_seg(&env->segs[R_DS], &sregs.ds); + get_seg(&env->segs[R_ES], &sregs.es); + get_seg(&env->segs[R_FS], &sregs.fs); + get_seg(&env->segs[R_GS], &sregs.gs); + get_seg(&env->segs[R_SS], &sregs.ss); + + get_seg(&env->tr, &sregs.tr); + get_seg(&env->ldt, &sregs.ldt); + + env->idt.limit = sregs.idt.limit; + env->idt.base = sregs.idt.base; + env->gdt.limit = sregs.gdt.limit; + env->gdt.base = sregs.gdt.base; + + env->cr[0] = sregs.cr0; + env->cr[2] = sregs.cr2; + env->cr[3] = sregs.cr3; + env->cr[4] = sregs.cr4; + + env->efer = sregs.efer; + if (sev_es_enabled() && env->efer & MSR_EFER_LME && + env->cr[0] & CR0_PG_MASK) { + env->efer |= MSR_EFER_LMA; + } + + /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ + x86_update_hflags(env); + + return 0; +} + +static int kvm_get_sregs2(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_sregs2 sregs; + int i, ret; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_SREGS2, &sregs); + if (ret < 0) { + return ret; + } + + get_seg(&env->segs[R_CS], &sregs.cs); + get_seg(&env->segs[R_DS], &sregs.ds); + get_seg(&env->segs[R_ES], &sregs.es); + get_seg(&env->segs[R_FS], &sregs.fs); + get_seg(&env->segs[R_GS], &sregs.gs); + get_seg(&env->segs[R_SS], &sregs.ss); + + get_seg(&env->tr, &sregs.tr); + get_seg(&env->ldt, &sregs.ldt); + + env->idt.limit = sregs.idt.limit; + env->idt.base = sregs.idt.base; + env->gdt.limit = sregs.gdt.limit; + env->gdt.base = sregs.gdt.base; + + env->cr[0] = sregs.cr0; + env->cr[2] = sregs.cr2; + env->cr[3] = sregs.cr3; + env->cr[4] = sregs.cr4; + + env->efer = sregs.efer; + if (sev_es_enabled() && env->efer & MSR_EFER_LME && + env->cr[0] & CR0_PG_MASK) { + env->efer |= MSR_EFER_LMA; + } + + env->pdptrs_valid = sregs.flags & KVM_SREGS2_FLAGS_PDPTRS_VALID; + + if (env->pdptrs_valid) { + for (i = 0; i < 4; i++) { + env->pdptrs[i] = sregs.pdptrs[i]; + } + } + + /* changes to apic base and cr8/tpr are read back via kvm_arch_post_run */ + x86_update_hflags(env); + + return 0; +} + +static int kvm_get_msrs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_msr_entry *msrs = cpu->kvm_msr_buf->entries; + int ret, i; + uint64_t mtrr_top_bits; + + kvm_msr_buf_reset(cpu); + + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_CS, 0); + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_ESP, 0); + kvm_msr_entry_add(cpu, MSR_IA32_SYSENTER_EIP, 0); + kvm_msr_entry_add(cpu, MSR_PAT, 0); + if (has_msr_star) { + kvm_msr_entry_add(cpu, MSR_STAR, 0); + } + if (has_msr_hsave_pa) { + kvm_msr_entry_add(cpu, MSR_VM_HSAVE_PA, 0); + } + if (has_msr_tsc_aux) { + kvm_msr_entry_add(cpu, MSR_TSC_AUX, 0); + } + if (has_msr_tsc_adjust) { + kvm_msr_entry_add(cpu, MSR_TSC_ADJUST, 0); + } + if (has_msr_tsc_deadline) { + kvm_msr_entry_add(cpu, MSR_IA32_TSCDEADLINE, 0); + } + if (has_msr_misc_enable) { + kvm_msr_entry_add(cpu, MSR_IA32_MISC_ENABLE, 0); + } + if (has_msr_smbase) { + kvm_msr_entry_add(cpu, MSR_IA32_SMBASE, 0); + } + if (has_msr_smi_count) { + kvm_msr_entry_add(cpu, MSR_SMI_COUNT, 0); + } + if (has_msr_feature_control) { + kvm_msr_entry_add(cpu, MSR_IA32_FEATURE_CONTROL, 0); + } + if (has_msr_pkrs) { + kvm_msr_entry_add(cpu, MSR_IA32_PKRS, 0); + } + if (has_msr_bndcfgs) { + kvm_msr_entry_add(cpu, MSR_IA32_BNDCFGS, 0); + } + if (has_msr_xss) { + kvm_msr_entry_add(cpu, MSR_IA32_XSS, 0); + } + if (has_msr_umwait) { + kvm_msr_entry_add(cpu, MSR_IA32_UMWAIT_CONTROL, 0); + } + if (has_msr_spec_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_SPEC_CTRL, 0); + } + if (has_tsc_scale_msr) { + kvm_msr_entry_add(cpu, MSR_AMD64_TSC_RATIO, 0); + } + + if (has_msr_tsx_ctrl) { + kvm_msr_entry_add(cpu, MSR_IA32_TSX_CTRL, 0); + } + if (has_msr_virt_ssbd) { + kvm_msr_entry_add(cpu, MSR_VIRT_SSBD, 0); + } + if (!env->tsc_valid) { + kvm_msr_entry_add(cpu, MSR_IA32_TSC, 0); + env->tsc_valid = !runstate_is_running(); + } + +#ifdef TARGET_X86_64 + if (lm_capable_kernel) { + kvm_msr_entry_add(cpu, MSR_CSTAR, 0); + kvm_msr_entry_add(cpu, MSR_KERNELGSBASE, 0); + kvm_msr_entry_add(cpu, MSR_FMASK, 0); + kvm_msr_entry_add(cpu, MSR_LSTAR, 0); + } +#endif + kvm_msr_entry_add(cpu, MSR_KVM_SYSTEM_TIME, 0); + kvm_msr_entry_add(cpu, MSR_KVM_WALL_CLOCK, 0); + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF_INT)) { + kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_INT, 0); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_ASYNC_PF)) { + kvm_msr_entry_add(cpu, MSR_KVM_ASYNC_PF_EN, 0); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_PV_EOI)) { + kvm_msr_entry_add(cpu, MSR_KVM_PV_EOI_EN, 0); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_STEAL_TIME)) { + kvm_msr_entry_add(cpu, MSR_KVM_STEAL_TIME, 0); + } + if (env->features[FEAT_KVM] & (1 << KVM_FEATURE_POLL_CONTROL)) { + kvm_msr_entry_add(cpu, MSR_KVM_POLL_CONTROL, 1); + } + if (has_architectural_pmu_version > 0) { + if (has_architectural_pmu_version > 1) { + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR_CTRL, 0); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_CTRL, 0); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_STATUS, 0); + kvm_msr_entry_add(cpu, MSR_CORE_PERF_GLOBAL_OVF_CTRL, 0); + } + for (i = 0; i < num_architectural_pmu_fixed_counters; i++) { + kvm_msr_entry_add(cpu, MSR_CORE_PERF_FIXED_CTR0 + i, 0); + } + for (i = 0; i < num_architectural_pmu_gp_counters; i++) { + kvm_msr_entry_add(cpu, MSR_P6_PERFCTR0 + i, 0); + kvm_msr_entry_add(cpu, MSR_P6_EVNTSEL0 + i, 0); + } + } + + if (env->mcg_cap) { + kvm_msr_entry_add(cpu, MSR_MCG_STATUS, 0); + kvm_msr_entry_add(cpu, MSR_MCG_CTL, 0); + if (has_msr_mcg_ext_ctl) { + kvm_msr_entry_add(cpu, MSR_MCG_EXT_CTL, 0); + } + for (i = 0; i < (env->mcg_cap & 0xff) * 4; i++) { + kvm_msr_entry_add(cpu, MSR_MC0_CTL + i, 0); + } + } + + if (has_msr_hv_hypercall) { + kvm_msr_entry_add(cpu, HV_X64_MSR_HYPERCALL, 0); + kvm_msr_entry_add(cpu, HV_X64_MSR_GUEST_OS_ID, 0); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_VAPIC)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_APIC_ASSIST_PAGE, 0); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_TIME)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_REFERENCE_TSC, 0); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_REENLIGHTENMENT)) { + kvm_msr_entry_add(cpu, HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0); + kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_CONTROL, 0); + kvm_msr_entry_add(cpu, HV_X64_MSR_TSC_EMULATION_STATUS, 0); + } + if (has_msr_hv_syndbg_options) { + kvm_msr_entry_add(cpu, HV_X64_MSR_SYNDBG_OPTIONS, 0); + } + if (has_msr_hv_crash) { + int j; + + for (j = 0; j < HV_CRASH_PARAMS; j++) { + kvm_msr_entry_add(cpu, HV_X64_MSR_CRASH_P0 + j, 0); + } + } + if (has_msr_hv_runtime) { + kvm_msr_entry_add(cpu, HV_X64_MSR_VP_RUNTIME, 0); + } + if (hyperv_feat_enabled(cpu, HYPERV_FEAT_SYNIC)) { + uint32_t msr; + + kvm_msr_entry_add(cpu, HV_X64_MSR_SCONTROL, 0); + kvm_msr_entry_add(cpu, HV_X64_MSR_SIEFP, 0); + kvm_msr_entry_add(cpu, HV_X64_MSR_SIMP, 0); + for (msr = HV_X64_MSR_SINT0; msr <= HV_X64_MSR_SINT15; msr++) { + kvm_msr_entry_add(cpu, msr, 0); + } + } + if (has_msr_hv_stimer) { + uint32_t msr; + + for (msr = HV_X64_MSR_STIMER0_CONFIG; msr <= HV_X64_MSR_STIMER3_COUNT; + msr++) { + kvm_msr_entry_add(cpu, msr, 0); + } + } + if (env->features[FEAT_1_EDX] & CPUID_MTRR) { + kvm_msr_entry_add(cpu, MSR_MTRRdefType, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix64K_00000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix16K_80000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix16K_A0000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C0000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_C8000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D0000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_D8000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E0000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_E8000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F0000, 0); + kvm_msr_entry_add(cpu, MSR_MTRRfix4K_F8000, 0); + for (i = 0; i < MSR_MTRRcap_VCNT; i++) { + kvm_msr_entry_add(cpu, MSR_MTRRphysBase(i), 0); + kvm_msr_entry_add(cpu, MSR_MTRRphysMask(i), 0); + } + } + + if (env->features[FEAT_7_0_EBX] & CPUID_7_0_EBX_INTEL_PT) { + int addr_num = + kvm_arch_get_supported_cpuid(kvm_state, 0x14, 1, R_EAX) & 0x7; + + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CTL, 0); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_STATUS, 0); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_BASE, 0); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_OUTPUT_MASK, 0); + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_CR3_MATCH, 0); + for (i = 0; i < addr_num; i++) { + kvm_msr_entry_add(cpu, MSR_IA32_RTIT_ADDR0_A + i, 0); + } + } + + if (env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC) { + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH0, 0); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH1, 0); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH2, 0); + kvm_msr_entry_add(cpu, MSR_IA32_SGXLEPUBKEYHASH3, 0); + } + + if (env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD) { + kvm_msr_entry_add(cpu, MSR_IA32_XFD, 0); + kvm_msr_entry_add(cpu, MSR_IA32_XFD_ERR, 0); + } + + if (kvm_enabled() && cpu->enable_pmu && + (env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR)) { + uint64_t depth; + + ret = kvm_get_one_msr(cpu, MSR_ARCH_LBR_DEPTH, &depth); + if (ret == 1 && depth == ARCH_LBR_NR_ENTRIES) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_CTL, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_DEPTH, 0); + + for (i = 0; i < ARCH_LBR_NR_ENTRIES; i++) { + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_FROM_0 + i, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_TO_0 + i, 0); + kvm_msr_entry_add(cpu, MSR_ARCH_LBR_INFO_0 + i, 0); + } + } + } + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_MSRS, cpu->kvm_msr_buf); + if (ret < 0) { + return ret; + } + + if (ret < cpu->kvm_msr_buf->nmsrs) { + struct kvm_msr_entry *e = &cpu->kvm_msr_buf->entries[ret]; + error_report("error: failed to get MSR 0x%" PRIx32, + (uint32_t)e->index); + } + + assert(ret == cpu->kvm_msr_buf->nmsrs); + /* + * MTRR masks: Each mask consists of 5 parts + * a 10..0: must be zero + * b 11 : valid bit + * c n-1.12: actual mask bits + * d 51..n: reserved must be zero + * e 63.52: reserved must be zero + * + * 'n' is the number of physical bits supported by the CPU and is + * apparently always <= 52. We know our 'n' but don't know what + * the destinations 'n' is; it might be smaller, in which case + * it masks (c) on loading. It might be larger, in which case + * we fill 'd' so that d..c is consistent irrespetive of the 'n' + * we're migrating to. + */ + + if (cpu->fill_mtrr_mask) { + QEMU_BUILD_BUG_ON(TARGET_PHYS_ADDR_SPACE_BITS > 52); + assert(cpu->phys_bits <= TARGET_PHYS_ADDR_SPACE_BITS); + mtrr_top_bits = MAKE_64BIT_MASK(cpu->phys_bits, 52 - cpu->phys_bits); + } else { + mtrr_top_bits = 0; + } + + for (i = 0; i < ret; i++) { + uint32_t index = msrs[i].index; + switch (index) { + case MSR_IA32_SYSENTER_CS: + env->sysenter_cs = msrs[i].data; + break; + case MSR_IA32_SYSENTER_ESP: + env->sysenter_esp = msrs[i].data; + break; + case MSR_IA32_SYSENTER_EIP: + env->sysenter_eip = msrs[i].data; + break; + case MSR_PAT: + env->pat = msrs[i].data; + break; + case MSR_STAR: + env->star = msrs[i].data; + break; +#ifdef TARGET_X86_64 + case MSR_CSTAR: + env->cstar = msrs[i].data; + break; + case MSR_KERNELGSBASE: + env->kernelgsbase = msrs[i].data; + break; + case MSR_FMASK: + env->fmask = msrs[i].data; + break; + case MSR_LSTAR: + env->lstar = msrs[i].data; + break; +#endif + case MSR_IA32_TSC: + env->tsc = msrs[i].data; + break; + case MSR_TSC_AUX: + env->tsc_aux = msrs[i].data; + break; + case MSR_TSC_ADJUST: + env->tsc_adjust = msrs[i].data; + break; + case MSR_IA32_TSCDEADLINE: + env->tsc_deadline = msrs[i].data; + break; + case MSR_VM_HSAVE_PA: + env->vm_hsave = msrs[i].data; + break; + case MSR_KVM_SYSTEM_TIME: + env->system_time_msr = msrs[i].data; + break; + case MSR_KVM_WALL_CLOCK: + env->wall_clock_msr = msrs[i].data; + break; + case MSR_MCG_STATUS: + env->mcg_status = msrs[i].data; + break; + case MSR_MCG_CTL: + env->mcg_ctl = msrs[i].data; + break; + case MSR_MCG_EXT_CTL: + env->mcg_ext_ctl = msrs[i].data; + break; + case MSR_IA32_MISC_ENABLE: + env->msr_ia32_misc_enable = msrs[i].data; + break; + case MSR_IA32_SMBASE: + env->smbase = msrs[i].data; + break; + case MSR_SMI_COUNT: + env->msr_smi_count = msrs[i].data; + break; + case MSR_IA32_FEATURE_CONTROL: + env->msr_ia32_feature_control = msrs[i].data; + break; + case MSR_IA32_BNDCFGS: + env->msr_bndcfgs = msrs[i].data; + break; + case MSR_IA32_XSS: + env->xss = msrs[i].data; + break; + case MSR_IA32_UMWAIT_CONTROL: + env->umwait = msrs[i].data; + break; + case MSR_IA32_PKRS: + env->pkrs = msrs[i].data; + break; + default: + if (msrs[i].index >= MSR_MC0_CTL && + msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { + env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data; + } + break; + case MSR_KVM_ASYNC_PF_EN: + env->async_pf_en_msr = msrs[i].data; + break; + case MSR_KVM_ASYNC_PF_INT: + env->async_pf_int_msr = msrs[i].data; + break; + case MSR_KVM_PV_EOI_EN: + env->pv_eoi_en_msr = msrs[i].data; + break; + case MSR_KVM_STEAL_TIME: + env->steal_time_msr = msrs[i].data; + break; + case MSR_KVM_POLL_CONTROL: { + env->poll_control_msr = msrs[i].data; + break; + } + case MSR_CORE_PERF_FIXED_CTR_CTRL: + env->msr_fixed_ctr_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_CTRL: + env->msr_global_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_STATUS: + env->msr_global_status = msrs[i].data; + break; + case MSR_CORE_PERF_GLOBAL_OVF_CTRL: + env->msr_global_ovf_ctrl = msrs[i].data; + break; + case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR0 + MAX_FIXED_COUNTERS - 1: + env->msr_fixed_counters[index - MSR_CORE_PERF_FIXED_CTR0] = msrs[i].data; + break; + case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR0 + MAX_GP_COUNTERS - 1: + env->msr_gp_counters[index - MSR_P6_PERFCTR0] = msrs[i].data; + break; + case MSR_P6_EVNTSEL0 ... MSR_P6_EVNTSEL0 + MAX_GP_COUNTERS - 1: + env->msr_gp_evtsel[index - MSR_P6_EVNTSEL0] = msrs[i].data; + break; + case HV_X64_MSR_HYPERCALL: + env->msr_hv_hypercall = msrs[i].data; + break; + case HV_X64_MSR_GUEST_OS_ID: + env->msr_hv_guest_os_id = msrs[i].data; + break; + case HV_X64_MSR_APIC_ASSIST_PAGE: + env->msr_hv_vapic = msrs[i].data; + break; + case HV_X64_MSR_REFERENCE_TSC: + env->msr_hv_tsc = msrs[i].data; + break; + case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4: + env->msr_hv_crash_params[index - HV_X64_MSR_CRASH_P0] = msrs[i].data; + break; + case HV_X64_MSR_VP_RUNTIME: + env->msr_hv_runtime = msrs[i].data; + break; + case HV_X64_MSR_SCONTROL: + env->msr_hv_synic_control = msrs[i].data; + break; + case HV_X64_MSR_SIEFP: + env->msr_hv_synic_evt_page = msrs[i].data; + break; + case HV_X64_MSR_SIMP: + env->msr_hv_synic_msg_page = msrs[i].data; + break; + case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15: + env->msr_hv_synic_sint[index - HV_X64_MSR_SINT0] = msrs[i].data; + break; + case HV_X64_MSR_STIMER0_CONFIG: + case HV_X64_MSR_STIMER1_CONFIG: + case HV_X64_MSR_STIMER2_CONFIG: + case HV_X64_MSR_STIMER3_CONFIG: + env->msr_hv_stimer_config[(index - HV_X64_MSR_STIMER0_CONFIG)/2] = + msrs[i].data; + break; + case HV_X64_MSR_STIMER0_COUNT: + case HV_X64_MSR_STIMER1_COUNT: + case HV_X64_MSR_STIMER2_COUNT: + case HV_X64_MSR_STIMER3_COUNT: + env->msr_hv_stimer_count[(index - HV_X64_MSR_STIMER0_COUNT)/2] = + msrs[i].data; + break; + case HV_X64_MSR_REENLIGHTENMENT_CONTROL: + env->msr_hv_reenlightenment_control = msrs[i].data; + break; + case HV_X64_MSR_TSC_EMULATION_CONTROL: + env->msr_hv_tsc_emulation_control = msrs[i].data; + break; + case HV_X64_MSR_TSC_EMULATION_STATUS: + env->msr_hv_tsc_emulation_status = msrs[i].data; + break; + case HV_X64_MSR_SYNDBG_OPTIONS: + env->msr_hv_syndbg_options = msrs[i].data; + break; + case MSR_MTRRdefType: + env->mtrr_deftype = msrs[i].data; + break; + case MSR_MTRRfix64K_00000: + env->mtrr_fixed[0] = msrs[i].data; + break; + case MSR_MTRRfix16K_80000: + env->mtrr_fixed[1] = msrs[i].data; + break; + case MSR_MTRRfix16K_A0000: + env->mtrr_fixed[2] = msrs[i].data; + break; + case MSR_MTRRfix4K_C0000: + env->mtrr_fixed[3] = msrs[i].data; + break; + case MSR_MTRRfix4K_C8000: + env->mtrr_fixed[4] = msrs[i].data; + break; + case MSR_MTRRfix4K_D0000: + env->mtrr_fixed[5] = msrs[i].data; + break; + case MSR_MTRRfix4K_D8000: + env->mtrr_fixed[6] = msrs[i].data; + break; + case MSR_MTRRfix4K_E0000: + env->mtrr_fixed[7] = msrs[i].data; + break; + case MSR_MTRRfix4K_E8000: + env->mtrr_fixed[8] = msrs[i].data; + break; + case MSR_MTRRfix4K_F0000: + env->mtrr_fixed[9] = msrs[i].data; + break; + case MSR_MTRRfix4K_F8000: + env->mtrr_fixed[10] = msrs[i].data; + break; + case MSR_MTRRphysBase(0) ... MSR_MTRRphysMask(MSR_MTRRcap_VCNT - 1): + if (index & 1) { + env->mtrr_var[MSR_MTRRphysIndex(index)].mask = msrs[i].data | + mtrr_top_bits; + } else { + env->mtrr_var[MSR_MTRRphysIndex(index)].base = msrs[i].data; + } + break; + case MSR_IA32_SPEC_CTRL: + env->spec_ctrl = msrs[i].data; + break; + case MSR_AMD64_TSC_RATIO: + env->amd_tsc_scale_msr = msrs[i].data; + break; + case MSR_IA32_TSX_CTRL: + env->tsx_ctrl = msrs[i].data; + break; + case MSR_VIRT_SSBD: + env->virt_ssbd = msrs[i].data; + break; + case MSR_IA32_RTIT_CTL: + env->msr_rtit_ctrl = msrs[i].data; + break; + case MSR_IA32_RTIT_STATUS: + env->msr_rtit_status = msrs[i].data; + break; + case MSR_IA32_RTIT_OUTPUT_BASE: + env->msr_rtit_output_base = msrs[i].data; + break; + case MSR_IA32_RTIT_OUTPUT_MASK: + env->msr_rtit_output_mask = msrs[i].data; + break; + case MSR_IA32_RTIT_CR3_MATCH: + env->msr_rtit_cr3_match = msrs[i].data; + break; + case MSR_IA32_RTIT_ADDR0_A ... MSR_IA32_RTIT_ADDR3_B: + env->msr_rtit_addrs[index - MSR_IA32_RTIT_ADDR0_A] = msrs[i].data; + break; + case MSR_IA32_SGXLEPUBKEYHASH0 ... MSR_IA32_SGXLEPUBKEYHASH3: + env->msr_ia32_sgxlepubkeyhash[index - MSR_IA32_SGXLEPUBKEYHASH0] = + msrs[i].data; + break; + case MSR_IA32_XFD: + env->msr_xfd = msrs[i].data; + break; + case MSR_IA32_XFD_ERR: + env->msr_xfd_err = msrs[i].data; + break; + case MSR_ARCH_LBR_CTL: + env->msr_lbr_ctl = msrs[i].data; + break; + case MSR_ARCH_LBR_DEPTH: + env->msr_lbr_depth = msrs[i].data; + break; + case MSR_ARCH_LBR_FROM_0 ... MSR_ARCH_LBR_FROM_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_FROM_0].from = msrs[i].data; + break; + case MSR_ARCH_LBR_TO_0 ... MSR_ARCH_LBR_TO_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_TO_0].to = msrs[i].data; + break; + case MSR_ARCH_LBR_INFO_0 ... MSR_ARCH_LBR_INFO_0 + 31: + env->lbr_records[index - MSR_ARCH_LBR_INFO_0].info = msrs[i].data; + break; + } + } + + return 0; +} + +static int kvm_put_mp_state(X86CPU *cpu) +{ + struct kvm_mp_state mp_state = { .mp_state = cpu->env.mp_state }; + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_MP_STATE, &mp_state); +} + +static int kvm_get_mp_state(X86CPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + struct kvm_mp_state mp_state; + int ret; + + ret = kvm_vcpu_ioctl(cs, KVM_GET_MP_STATE, &mp_state); + if (ret < 0) { + return ret; + } + env->mp_state = mp_state.mp_state; + if (kvm_irqchip_in_kernel()) { + cs->halted = (mp_state.mp_state == KVM_MP_STATE_HALTED); + } + return 0; +} + +static int kvm_get_apic(X86CPU *cpu) +{ + DeviceState *apic = cpu->apic_state; + struct kvm_lapic_state kapic; + int ret; + + if (apic && kvm_irqchip_in_kernel()) { + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_LAPIC, &kapic); + if (ret < 0) { + return ret; + } + + kvm_get_apic_state(apic, &kapic); + } + return 0; +} + +static int kvm_put_vcpu_events(X86CPU *cpu, int level) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + struct kvm_vcpu_events events = {}; + + events.flags = 0; + + if (has_exception_payload) { + events.flags |= KVM_VCPUEVENT_VALID_PAYLOAD; + events.exception.pending = env->exception_pending; + events.exception_has_payload = env->exception_has_payload; + events.exception_payload = env->exception_payload; + } + events.exception.nr = env->exception_nr; + events.exception.injected = env->exception_injected; + events.exception.has_error_code = env->has_error_code; + events.exception.error_code = env->error_code; + + events.interrupt.injected = (env->interrupt_injected >= 0); + events.interrupt.nr = env->interrupt_injected; + events.interrupt.soft = env->soft_interrupt; + + events.nmi.injected = env->nmi_injected; + events.nmi.pending = env->nmi_pending; + events.nmi.masked = !!(env->hflags2 & HF2_NMI_MASK); + + events.sipi_vector = env->sipi_vector; + + if (has_msr_smbase) { + events.smi.smm = !!(env->hflags & HF_SMM_MASK); + events.smi.smm_inside_nmi = !!(env->hflags2 & HF2_SMM_INSIDE_NMI_MASK); + if (kvm_irqchip_in_kernel()) { + /* As soon as these are moved to the kernel, remove them + * from cs->interrupt_request. + */ + events.smi.pending = cs->interrupt_request & CPU_INTERRUPT_SMI; + events.smi.latched_init = cs->interrupt_request & CPU_INTERRUPT_INIT; + cs->interrupt_request &= ~(CPU_INTERRUPT_INIT | CPU_INTERRUPT_SMI); + } else { + /* Keep these in cs->interrupt_request. */ + events.smi.pending = 0; + events.smi.latched_init = 0; + } + /* Stop SMI delivery on old machine types to avoid a reboot + * on an inward migration of an old VM. + */ + if (!cpu->kvm_no_smi_migration) { + events.flags |= KVM_VCPUEVENT_VALID_SMM; + } + } + + if (level >= KVM_PUT_RESET_STATE) { + events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; + if (env->mp_state == KVM_MP_STATE_SIPI_RECEIVED) { + events.flags |= KVM_VCPUEVENT_VALID_SIPI_VECTOR; + } + } + + if (has_triple_fault_event) { + events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT; + events.triple_fault.pending = env->triple_fault_pending; + } + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_VCPU_EVENTS, &events); +} + +static int kvm_get_vcpu_events(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_vcpu_events events; + int ret; + + memset(&events, 0, sizeof(events)); + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_VCPU_EVENTS, &events); + if (ret < 0) { + return ret; + } + + if (events.flags & KVM_VCPUEVENT_VALID_PAYLOAD) { + env->exception_pending = events.exception.pending; + env->exception_has_payload = events.exception_has_payload; + env->exception_payload = events.exception_payload; + } else { + env->exception_pending = 0; + env->exception_has_payload = false; + } + env->exception_injected = events.exception.injected; + env->exception_nr = + (env->exception_pending || env->exception_injected) ? + events.exception.nr : -1; + env->has_error_code = events.exception.has_error_code; + env->error_code = events.exception.error_code; + + env->interrupt_injected = + events.interrupt.injected ? events.interrupt.nr : -1; + env->soft_interrupt = events.interrupt.soft; + + env->nmi_injected = events.nmi.injected; + env->nmi_pending = events.nmi.pending; + if (events.nmi.masked) { + env->hflags2 |= HF2_NMI_MASK; + } else { + env->hflags2 &= ~HF2_NMI_MASK; + } + + if (events.flags & KVM_VCPUEVENT_VALID_SMM) { + if (events.smi.smm) { + env->hflags |= HF_SMM_MASK; + } else { + env->hflags &= ~HF_SMM_MASK; + } + if (events.smi.pending) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); + } else { + cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_SMI); + } + if (events.smi.smm_inside_nmi) { + env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK; + } else { + env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; + } + if (events.smi.latched_init) { + cpu_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); + } else { + cpu_reset_interrupt(CPU(cpu), CPU_INTERRUPT_INIT); + } + } + + if (events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT) { + env->triple_fault_pending = events.triple_fault.pending; + } + + env->sipi_vector = events.sipi_vector; + + return 0; +} + +static int kvm_put_debugregs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_debugregs dbgregs; + int i; + + memset(&dbgregs, 0, sizeof(dbgregs)); + for (i = 0; i < 4; i++) { + dbgregs.db[i] = env->dr[i]; + } + dbgregs.dr6 = env->dr[6]; + dbgregs.dr7 = env->dr[7]; + dbgregs.flags = 0; + + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_DEBUGREGS, &dbgregs); +} + +static int kvm_get_debugregs(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + struct kvm_debugregs dbgregs; + int i, ret; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_DEBUGREGS, &dbgregs); + if (ret < 0) { + return ret; + } + for (i = 0; i < 4; i++) { + env->dr[i] = dbgregs.db[i]; + } + env->dr[4] = env->dr[6] = dbgregs.dr6; + env->dr[5] = env->dr[7] = dbgregs.dr7; + + return 0; +} + +static int kvm_put_nested_state(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int max_nested_state_len = kvm_max_nested_state_length(); + + if (!env->nested_state) { + return 0; + } + + /* + * Copy flags that are affected by reset from env->hflags and env->hflags2. + */ + if (env->hflags & HF_GUEST_MASK) { + env->nested_state->flags |= KVM_STATE_NESTED_GUEST_MODE; + } else { + env->nested_state->flags &= ~KVM_STATE_NESTED_GUEST_MODE; + } + + /* Don't set KVM_STATE_NESTED_GIF_SET on VMX as it is illegal */ + if (cpu_has_svm(env) && (env->hflags2 & HF2_GIF_MASK)) { + env->nested_state->flags |= KVM_STATE_NESTED_GIF_SET; + } else { + env->nested_state->flags &= ~KVM_STATE_NESTED_GIF_SET; + } + + assert(env->nested_state->size <= max_nested_state_len); + return kvm_vcpu_ioctl(CPU(cpu), KVM_SET_NESTED_STATE, env->nested_state); +} + +static int kvm_get_nested_state(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + int max_nested_state_len = kvm_max_nested_state_length(); + int ret; + + if (!env->nested_state) { + return 0; + } + + /* + * It is possible that migration restored a smaller size into + * nested_state->hdr.size than what our kernel support. + * We preserve migration origin nested_state->hdr.size for + * call to KVM_SET_NESTED_STATE but wish that our next call + * to KVM_GET_NESTED_STATE will use max size our kernel support. + */ + env->nested_state->size = max_nested_state_len; + + ret = kvm_vcpu_ioctl(CPU(cpu), KVM_GET_NESTED_STATE, env->nested_state); + if (ret < 0) { + return ret; + } + + /* + * Copy flags that are affected by reset to env->hflags and env->hflags2. + */ + if (env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE) { + env->hflags |= HF_GUEST_MASK; + } else { + env->hflags &= ~HF_GUEST_MASK; + } + + /* Keep HF2_GIF_MASK set on !SVM as x86_cpu_pending_interrupt() needs it */ + if (cpu_has_svm(env)) { + if (env->nested_state->flags & KVM_STATE_NESTED_GIF_SET) { + env->hflags2 |= HF2_GIF_MASK; + } else { + env->hflags2 &= ~HF2_GIF_MASK; + } + } + + return ret; +} + +int kvm_arch_put_registers(CPUState *cpu, int level) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + int ret; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + /* + * Put MSR_IA32_FEATURE_CONTROL first, this ensures the VM gets out of VMX + * root operation upon vCPU reset. kvm_put_msr_feature_control() should also + * precede kvm_put_nested_state() when 'real' nested state is set. + */ + if (level >= KVM_PUT_RESET_STATE) { + ret = kvm_put_msr_feature_control(x86_cpu); + if (ret < 0) { + return ret; + } + } + + /* must be before kvm_put_nested_state so that EFER.SVME is set */ + ret = has_sregs2 ? kvm_put_sregs2(x86_cpu) : kvm_put_sregs(x86_cpu); + if (ret < 0) { + return ret; + } + + if (level >= KVM_PUT_RESET_STATE) { + ret = kvm_put_nested_state(x86_cpu); + if (ret < 0) { + return ret; + } + } + + if (level == KVM_PUT_FULL_STATE) { + /* We don't check for kvm_arch_set_tsc_khz() errors here, + * because TSC frequency mismatch shouldn't abort migration, + * unless the user explicitly asked for a more strict TSC + * setting (e.g. using an explicit "tsc-freq" option). + */ + kvm_arch_set_tsc_khz(cpu); + } + +#ifdef CONFIG_XEN_EMU + if (xen_mode == XEN_EMULATE && level == KVM_PUT_FULL_STATE) { + ret = kvm_put_xen_state(cpu); + if (ret < 0) { + return ret; + } + } +#endif + + ret = kvm_getput_regs(x86_cpu, 1); + if (ret < 0) { + return ret; + } + ret = kvm_put_xsave(x86_cpu); + if (ret < 0) { + return ret; + } + ret = kvm_put_xcrs(x86_cpu); + if (ret < 0) { + return ret; + } + ret = kvm_put_msrs(x86_cpu, level); + if (ret < 0) { + return ret; + } + ret = kvm_put_vcpu_events(x86_cpu, level); + if (ret < 0) { + return ret; + } + if (level >= KVM_PUT_RESET_STATE) { + ret = kvm_put_mp_state(x86_cpu); + if (ret < 0) { + return ret; + } + } + + ret = kvm_put_tscdeadline_msr(x86_cpu); + if (ret < 0) { + return ret; + } + ret = kvm_put_debugregs(x86_cpu); + if (ret < 0) { + return ret; + } + return 0; +} + +int kvm_arch_get_registers(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + int ret; + + assert(cpu_is_stopped(cs) || qemu_cpu_is_self(cs)); + + ret = kvm_get_vcpu_events(cpu); + if (ret < 0) { + goto out; + } + /* + * KVM_GET_MPSTATE can modify CS and RIP, call it before + * KVM_GET_REGS and KVM_GET_SREGS. + */ + ret = kvm_get_mp_state(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_getput_regs(cpu, 0); + if (ret < 0) { + goto out; + } + ret = kvm_get_xsave(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_get_xcrs(cpu); + if (ret < 0) { + goto out; + } + ret = has_sregs2 ? kvm_get_sregs2(cpu) : kvm_get_sregs(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_get_msrs(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_get_apic(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_get_debugregs(cpu); + if (ret < 0) { + goto out; + } + ret = kvm_get_nested_state(cpu); + if (ret < 0) { + goto out; + } +#ifdef CONFIG_XEN_EMU + if (xen_mode == XEN_EMULATE) { + ret = kvm_get_xen_state(cs); + if (ret < 0) { + goto out; + } + } +#endif + ret = 0; + out: + cpu_sync_bndcs_hflags(&cpu->env); + return ret; +} + +void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + int ret; + + /* Inject NMI */ + if (cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + bql_lock(); + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + bql_unlock(); + DPRINTF("injected NMI\n"); + ret = kvm_vcpu_ioctl(cpu, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } + } + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + bql_lock(); + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + bql_unlock(); + DPRINTF("injected SMI\n"); + ret = kvm_vcpu_ioctl(cpu, KVM_SMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, SMI lost (%s)\n", + strerror(-ret)); + } + } + } + + if (!kvm_pic_in_kernel()) { + bql_lock(); + } + + /* Force the VCPU out of its inner loop to process any INIT requests + * or (for userspace APIC, but it is cheap to combine the checks here) + * pending TPR access reports. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + cpu->exit_request = 1; + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->exit_request = 1; + } + } + + if (!kvm_pic_in_kernel()) { + /* Try to inject an interrupt if the guest can accept it */ + if (run->ready_for_interrupt_injection && + (cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) { + int irq; + + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + struct kvm_interrupt intr; + + intr.irq = irq; + DPRINTF("injected interrupt %d\n", irq); + ret = kvm_vcpu_ioctl(cpu, KVM_INTERRUPT, &intr); + if (ret < 0) { + fprintf(stderr, + "KVM: injection failed, interrupt lost (%s)\n", + strerror(-ret)); + } + } + } + + /* If we have an interrupt but the guest is not ready to receive an + * interrupt, request an interrupt window exit. This will + * cause a return to userspace as soon as the guest is ready to + * receive interrupts. */ + if ((cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + run->request_interrupt_window = 1; + } else { + run->request_interrupt_window = 0; + } + + DPRINTF("setting tpr\n"); + run->cr8 = cpu_get_apic_tpr(x86_cpu->apic_state); + + bql_unlock(); + } +} + +static void kvm_rate_limit_on_bus_lock(void) +{ + uint64_t delay_ns = ratelimit_calculate_delay(&bus_lock_ratelimit_ctrl, 1); + + if (delay_ns) { + g_usleep(delay_ns / SCALE_US); + } +} + +MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + if (run->flags & KVM_RUN_X86_SMM) { + env->hflags |= HF_SMM_MASK; + } else { + env->hflags &= ~HF_SMM_MASK; + } + if (run->if_flag) { + env->eflags |= IF_MASK; + } else { + env->eflags &= ~IF_MASK; + } + if (run->flags & KVM_RUN_X86_BUS_LOCK) { + kvm_rate_limit_on_bus_lock(); + } + +#ifdef CONFIG_XEN_EMU + /* + * If the callback is asserted as a GSI (or PCI INTx) then check if + * vcpu_info->evtchn_upcall_pending has been cleared, and deassert + * the callback IRQ if so. Ideally we could hook into the PIC/IOAPIC + * EOI and only resample then, exactly how the VFIO eventfd pairs + * are designed to work for level triggered interrupts. + */ + if (x86_cpu->env.xen_callback_asserted) { + kvm_xen_maybe_deassert_callback(cpu); + } +#endif + + /* We need to protect the apic state against concurrent accesses from + * different threads in case the userspace irqchip is used. */ + if (!kvm_irqchip_in_kernel()) { + bql_lock(); + } + cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); + cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); + if (!kvm_irqchip_in_kernel()) { + bql_unlock(); + } + return cpu_get_mem_attrs(env); +} + +int kvm_arch_process_async_events(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + if (cs->interrupt_request & CPU_INTERRUPT_MCE) { + /* We must not raise CPU_INTERRUPT_MCE if it's not supported. */ + assert(env->mcg_cap); + + cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + + kvm_cpu_synchronize_state(cs); + + if (env->exception_nr == EXCP08_DBLE) { + /* this means triple fault */ + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + cs->exit_request = 1; + return 0; + } + kvm_queue_exception(env, EXCP12_MCHK, 0, 0); + env->has_error_code = 0; + + cs->halted = 0; + if (kvm_irqchip_in_kernel() && env->mp_state == KVM_MP_STATE_HALTED) { + env->mp_state = KVM_MP_STATE_RUNNABLE; + } + } + + if ((cs->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + kvm_cpu_synchronize_state(cs); + do_cpu_init(cpu); + } + + if (kvm_irqchip_in_kernel()) { + return 0; + } + + if (cs->interrupt_request & CPU_INTERRUPT_POLL) { + cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(cpu->apic_state); + } + if (((cs->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cs->halted = 0; + } + if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { + kvm_cpu_synchronize_state(cs); + do_cpu_sipi(cpu); + } + if (cs->interrupt_request & CPU_INTERRUPT_TPR) { + cs->interrupt_request &= ~CPU_INTERRUPT_TPR; + kvm_cpu_synchronize_state(cs); + apic_handle_tpr_access_report(cpu->apic_state, env->eip, + env->tpr_access_type); + } + + return cs->halted; +} + +static int kvm_handle_halt(X86CPU *cpu) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + + if (!((cs->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) && + !(cs->interrupt_request & CPU_INTERRUPT_NMI)) { + cs->halted = 1; + return EXCP_HLT; + } + + return 0; +} + +static int kvm_handle_tpr_access(X86CPU *cpu) +{ + CPUState *cs = CPU(cpu); + struct kvm_run *run = cs->kvm_run; + + apic_handle_tpr_access_report(cpu->apic_state, run->tpr_access.rip, + run->tpr_access.is_write ? TPR_ACCESS_WRITE + : TPR_ACCESS_READ); + return 1; +} + +int kvm_arch_insert_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + static const uint8_t int3 = 0xcc; + + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 0) || + cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&int3, 1, 1)) { + return -EINVAL; + } + return 0; +} + +int kvm_arch_remove_sw_breakpoint(CPUState *cs, struct kvm_sw_breakpoint *bp) +{ + uint8_t int3; + + if (cpu_memory_rw_debug(cs, bp->pc, &int3, 1, 0)) { + return -EINVAL; + } + if (int3 != 0xcc) { + return 0; + } + if (cpu_memory_rw_debug(cs, bp->pc, (uint8_t *)&bp->saved_insn, 1, 1)) { + return -EINVAL; + } + return 0; +} + +static struct { + target_ulong addr; + int len; + int type; +} hw_breakpoint[4]; + +static int nb_hw_breakpoint; + +static int find_hw_breakpoint(target_ulong addr, int len, int type) +{ + int n; + + for (n = 0; n < nb_hw_breakpoint; n++) { + if (hw_breakpoint[n].addr == addr && hw_breakpoint[n].type == type && + (hw_breakpoint[n].len == len || len == -1)) { + return n; + } + } + return -1; +} + +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + switch (type) { + case GDB_BREAKPOINT_HW: + len = 1; + break; + case GDB_WATCHPOINT_WRITE: + case GDB_WATCHPOINT_ACCESS: + switch (len) { + case 1: + break; + case 2: + case 4: + case 8: + if (addr & (len - 1)) { + return -EINVAL; + } + break; + default: + return -EINVAL; + } + break; + default: + return -ENOSYS; + } + + if (nb_hw_breakpoint == 4) { + return -ENOBUFS; + } + if (find_hw_breakpoint(addr, len, type) >= 0) { + return -EEXIST; + } + hw_breakpoint[nb_hw_breakpoint].addr = addr; + hw_breakpoint[nb_hw_breakpoint].len = len; + hw_breakpoint[nb_hw_breakpoint].type = type; + nb_hw_breakpoint++; + + return 0; +} + +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) +{ + int n; + + n = find_hw_breakpoint(addr, (type == GDB_BREAKPOINT_HW) ? 1 : len, type); + if (n < 0) { + return -ENOENT; + } + nb_hw_breakpoint--; + hw_breakpoint[n] = hw_breakpoint[nb_hw_breakpoint]; + + return 0; +} + +void kvm_arch_remove_all_hw_breakpoints(void) +{ + nb_hw_breakpoint = 0; +} + +static CPUWatchpoint hw_watchpoint; + +static int kvm_handle_debug(X86CPU *cpu, + struct kvm_debug_exit_arch *arch_info) +{ + CPUState *cs = CPU(cpu); + CPUX86State *env = &cpu->env; + int ret = 0; + int n; + + if (arch_info->exception == EXCP01_DB) { + if (arch_info->dr6 & DR6_BS) { + if (cs->singlestep_enabled) { + ret = EXCP_DEBUG; + } + } else { + for (n = 0; n < 4; n++) { + if (arch_info->dr6 & (1 << n)) { + switch ((arch_info->dr7 >> (16 + n*4)) & 0x3) { + case 0x0: + ret = EXCP_DEBUG; + break; + case 0x1: + ret = EXCP_DEBUG; + cs->watchpoint_hit = &hw_watchpoint; + hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.flags = BP_MEM_WRITE; + break; + case 0x3: + ret = EXCP_DEBUG; + cs->watchpoint_hit = &hw_watchpoint; + hw_watchpoint.vaddr = hw_breakpoint[n].addr; + hw_watchpoint.flags = BP_MEM_ACCESS; + break; + } + } + } + } + } else if (kvm_find_sw_breakpoint(cs, arch_info->pc)) { + ret = EXCP_DEBUG; + } + if (ret == 0) { + cpu_synchronize_state(cs); + assert(env->exception_nr == -1); + + /* pass to guest */ + kvm_queue_exception(env, arch_info->exception, + arch_info->exception == EXCP01_DB, + arch_info->dr6); + env->has_error_code = 0; + } + + return ret; +} + +void kvm_arch_update_guest_debug(CPUState *cpu, struct kvm_guest_debug *dbg) +{ + const uint8_t type_code[] = { + [GDB_BREAKPOINT_HW] = 0x0, + [GDB_WATCHPOINT_WRITE] = 0x1, + [GDB_WATCHPOINT_ACCESS] = 0x3 + }; + const uint8_t len_code[] = { + [1] = 0x0, [2] = 0x1, [4] = 0x3, [8] = 0x2 + }; + int n; + + if (kvm_sw_breakpoints_active(cpu)) { + dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP; + } + if (nb_hw_breakpoint > 0) { + dbg->control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP; + dbg->arch.debugreg[7] = 0x0600; + for (n = 0; n < nb_hw_breakpoint; n++) { + dbg->arch.debugreg[n] = hw_breakpoint[n].addr; + dbg->arch.debugreg[7] |= (2 << (n * 2)) | + (type_code[hw_breakpoint[n].type] << (16 + n*4)) | + ((uint32_t)len_code[hw_breakpoint[n].len] << (18 + n*4)); + } + } +} + +static bool kvm_install_msr_filters(KVMState *s) +{ + uint64_t zero = 0; + struct kvm_msr_filter filter = { + .flags = KVM_MSR_FILTER_DEFAULT_ALLOW, + }; + int r, i, j = 0; + + for (i = 0; i < KVM_MSR_FILTER_MAX_RANGES; i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (handler->msr) { + struct kvm_msr_filter_range *range = &filter.ranges[j++]; + + *range = (struct kvm_msr_filter_range) { + .flags = 0, + .nmsrs = 1, + .base = handler->msr, + .bitmap = (__u8 *)&zero, + }; + + if (handler->rdmsr) { + range->flags |= KVM_MSR_FILTER_READ; + } + + if (handler->wrmsr) { + range->flags |= KVM_MSR_FILTER_WRITE; + } + } + } + + r = kvm_vm_ioctl(s, KVM_X86_SET_MSR_FILTER, &filter); + if (r) { + return false; + } + + return true; +} + +bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, + QEMUWRMSRHandler *wrmsr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + if (!msr_handlers[i].msr) { + msr_handlers[i] = (KVMMSRHandlers) { + .msr = msr, + .rdmsr = rdmsr, + .wrmsr = wrmsr, + }; + + if (!kvm_install_msr_filters(s)) { + msr_handlers[i] = (KVMMSRHandlers) { }; + return false; + } + + return true; + } + } + + return false; +} + +static int kvm_handle_rdmsr(X86CPU *cpu, struct kvm_run *run) +{ + int i; + bool r; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (run->msr.index == handler->msr) { + if (handler->rdmsr) { + r = handler->rdmsr(cpu, handler->msr, + (uint64_t *)&run->msr.data); + run->msr.error = r ? 0 : 1; + return 0; + } + } + } + + assert(false); +} + +static int kvm_handle_wrmsr(X86CPU *cpu, struct kvm_run *run) +{ + int i; + bool r; + + for (i = 0; i < ARRAY_SIZE(msr_handlers); i++) { + KVMMSRHandlers *handler = &msr_handlers[i]; + if (run->msr.index == handler->msr) { + if (handler->wrmsr) { + r = handler->wrmsr(cpu, handler->msr, run->msr.data); + run->msr.error = r ? 0 : 1; + return 0; + } + } + } + + assert(false); +} + +static bool has_sgx_provisioning; + +static bool __kvm_enable_sgx_provisioning(KVMState *s) +{ + int fd, ret; + + if (!kvm_vm_check_extension(s, KVM_CAP_SGX_ATTRIBUTE)) { + return false; + } + + fd = qemu_open_old("/dev/sgx_provision", O_RDONLY); + if (fd < 0) { + return false; + } + + ret = kvm_vm_enable_cap(s, KVM_CAP_SGX_ATTRIBUTE, 0, fd); + if (ret) { + error_report("Could not enable SGX PROVISIONKEY: %s", strerror(-ret)); + exit(1); + } + close(fd); + return true; +} + +bool kvm_enable_sgx_provisioning(KVMState *s) +{ + return MEMORIZE(__kvm_enable_sgx_provisioning(s), has_sgx_provisioning); +} + +static bool host_supports_vmx(void) +{ + uint32_t ecx, unused; + + host_cpuid(1, 0, &unused, &unused, &ecx, &unused); + return ecx & CPUID_EXT_VMX; +} + +#define VMX_INVALID_GUEST_STATE 0x80000021 + +int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) +{ + X86CPU *cpu = X86_CPU(cs); + uint64_t code; + int ret; + bool ctx_invalid; + char str[256]; + KVMState *state; + + switch (run->exit_reason) { + case KVM_EXIT_HLT: + DPRINTF("handle_hlt\n"); + bql_lock(); + ret = kvm_handle_halt(cpu); + bql_unlock(); + break; + case KVM_EXIT_SET_TPR: + ret = 0; + break; + case KVM_EXIT_TPR_ACCESS: + bql_lock(); + ret = kvm_handle_tpr_access(cpu); + bql_unlock(); + break; + case KVM_EXIT_FAIL_ENTRY: + code = run->fail_entry.hardware_entry_failure_reason; + fprintf(stderr, "KVM: entry failed, hardware error 0x%" PRIx64 "\n", + code); + if (host_supports_vmx() && code == VMX_INVALID_GUEST_STATE) { + fprintf(stderr, + "\nIf you're running a guest on an Intel machine without " + "unrestricted mode\n" + "support, the failure can be most likely due to the guest " + "entering an invalid\n" + "state for Intel VT. For example, the guest maybe running " + "in big real mode\n" + "which is not supported on less recent Intel processors." + "\n\n"); + } + ret = -1; + break; + case KVM_EXIT_EXCEPTION: + fprintf(stderr, "KVM: exception %d exit (error code 0x%x)\n", + run->ex.exception, run->ex.error_code); + ret = -1; + break; + case KVM_EXIT_DEBUG: + DPRINTF("kvm_exit_debug\n"); + bql_lock(); + ret = kvm_handle_debug(cpu, &run->debug.arch); + bql_unlock(); + break; + case KVM_EXIT_HYPERV: + ret = kvm_hv_handle_exit(cpu, &run->hyperv); + break; + case KVM_EXIT_IOAPIC_EOI: + ioapic_eoi_broadcast(run->eoi.vector); + ret = 0; + break; + case KVM_EXIT_X86_BUS_LOCK: + /* already handled in kvm_arch_post_run */ + ret = 0; + break; + case KVM_EXIT_NOTIFY: + ctx_invalid = !!(run->notify.flags & KVM_NOTIFY_CONTEXT_INVALID); + state = KVM_STATE(current_accel()); + sprintf(str, "Encounter a notify exit with %svalid context in" + " guest. There can be possible misbehaves in guest." + " Please have a look.", ctx_invalid ? "in" : ""); + if (ctx_invalid || + state->notify_vmexit == NOTIFY_VMEXIT_OPTION_INTERNAL_ERROR) { + warn_report("KVM internal error: %s", str); + ret = -1; + } else { + warn_report_once("KVM: %s", str); + ret = 0; + } + break; + case KVM_EXIT_X86_RDMSR: + /* We only enable MSR filtering, any other exit is bogus */ + assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); + ret = kvm_handle_rdmsr(cpu, run); + break; + case KVM_EXIT_X86_WRMSR: + /* We only enable MSR filtering, any other exit is bogus */ + assert(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER); + ret = kvm_handle_wrmsr(cpu, run); + break; +#ifdef CONFIG_XEN_EMU + case KVM_EXIT_XEN: + ret = kvm_xen_handle_exit(cpu, &run->xen); + break; +#endif + default: + fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); + ret = -1; + break; + } + + return ret; +} + +bool kvm_arch_stop_on_emulation_error(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + kvm_cpu_synchronize_state(cs); + return !(env->cr[0] & CR0_PE_MASK) || + ((env->segs[R_CS].selector & 3) != 3); +} + +void kvm_arch_init_irq_routing(KVMState *s) +{ + /* We know at this point that we're using the in-kernel + * irqchip, so we can use irqfds, and on x86 we know + * we can use msi via irqfd and GSI routing. + */ + kvm_msi_via_irqfd_allowed = true; + kvm_gsi_routing_allowed = true; + + if (kvm_irqchip_is_split()) { + KVMRouteChange c = kvm_irqchip_begin_route_changes(s); + int i; + + /* If the ioapic is in QEMU and the lapics are in KVM, reserve + MSI routes for signaling interrupts to the local apics. */ + for (i = 0; i < IOAPIC_NUM_PINS; i++) { + if (kvm_irqchip_add_msi_route(&c, 0, NULL) < 0) { + error_report("Could not enable split IRQ mode."); + exit(1); + } + } + kvm_irqchip_commit_route_changes(&c); + } +} + +int kvm_arch_irqchip_create(KVMState *s) +{ + int ret; + if (kvm_kernel_irqchip_split()) { + ret = kvm_vm_enable_cap(s, KVM_CAP_SPLIT_IRQCHIP, 0, 24); + if (ret) { + error_report("Could not enable split irqchip mode: %s", + strerror(-ret)); + exit(1); + } else { + DPRINTF("Enabled KVM_CAP_SPLIT_IRQCHIP\n"); + kvm_split_irqchip = true; + return 1; + } + } else { + return 0; + } +} + +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address) +{ + CPUX86State *env; + uint64_t ext_id; + + if (!first_cpu) { + return address; + } + env = &X86_CPU(first_cpu)->env; + if (!(env->features[FEAT_KVM] & (1 << KVM_FEATURE_MSI_EXT_DEST_ID))) { + return address; + } + + /* + * If the remappable format bit is set, or the upper bits are + * already set in address_hi, or the low extended bits aren't + * there anyway, do nothing. + */ + ext_id = address & (0xff << MSI_ADDR_DEST_IDX_SHIFT); + if (!ext_id || (ext_id & (1 << MSI_ADDR_DEST_IDX_SHIFT)) || (address >> 32)) { + return address; + } + + address &= ~ext_id; + address |= ext_id << 35; + return address; +} + +int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route, + uint64_t address, uint32_t data, PCIDevice *dev) +{ + X86IOMMUState *iommu = x86_iommu_get_default(); + + if (iommu) { + X86IOMMUClass *class = X86_IOMMU_DEVICE_GET_CLASS(iommu); + + if (class->int_remap) { + int ret; + MSIMessage src, dst; + + src.address = route->u.msi.address_hi; + src.address <<= VTD_MSI_ADDR_HI_SHIFT; + src.address |= route->u.msi.address_lo; + src.data = route->u.msi.data; + + ret = class->int_remap(iommu, &src, &dst, dev ? \ + pci_requester_id(dev) : \ + X86_IOMMU_SID_INVALID); + if (ret) { + trace_kvm_x86_fixup_msi_error(route->gsi); + return 1; + } + + /* + * Handled untranslated compatibility format interrupt with + * extended destination ID in the low bits 11-5. */ + dst.address = kvm_swizzle_msi_ext_dest_id(dst.address); + + route->u.msi.address_hi = dst.address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = dst.address & VTD_MSI_ADDR_LO_MASK; + route->u.msi.data = dst.data; + return 0; + } + } + +#ifdef CONFIG_XEN_EMU + if (xen_mode == XEN_EMULATE) { + int handled = xen_evtchn_translate_pirq_msi(route, address, data); + + /* + * If it was a PIRQ and successfully routed (handled == 0) or it was + * an error (handled < 0), return. If it wasn't a PIRQ, keep going. + */ + if (handled <= 0) { + return handled; + } + } +#endif + + address = kvm_swizzle_msi_ext_dest_id(address); + route->u.msi.address_hi = address >> VTD_MSI_ADDR_HI_SHIFT; + route->u.msi.address_lo = address & VTD_MSI_ADDR_LO_MASK; + return 0; +} + +typedef struct MSIRouteEntry MSIRouteEntry; + +struct MSIRouteEntry { + PCIDevice *dev; /* Device pointer */ + int vector; /* MSI/MSIX vector index */ + int virq; /* Virtual IRQ index */ + QLIST_ENTRY(MSIRouteEntry) list; +}; + +/* List of used GSI routes */ +static QLIST_HEAD(, MSIRouteEntry) msi_route_list = \ + QLIST_HEAD_INITIALIZER(msi_route_list); + +void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask) +{ + int cnt = 0, vector; + MSIRouteEntry *entry; + MSIMessage msg; + PCIDevice *dev; + + /* TODO: explicit route update */ + QLIST_FOREACH(entry, &msi_route_list, list) { + cnt++; + vector = entry->vector; + dev = entry->dev; + if (msix_enabled(dev) && !msix_is_masked(dev, vector)) { + msg = msix_get_message(dev, vector); + } else if (msi_enabled(dev) && !msi_is_masked(dev, vector)) { + msg = msi_get_message(dev, vector); + } else { + /* + * Either MSI/MSIX is disabled for the device, or the + * specific message was masked out. Skip this one. + */ + continue; + } + kvm_irqchip_update_msi_route(kvm_state, entry->virq, msg, dev); + } + kvm_irqchip_commit_routes(kvm_state); + trace_kvm_x86_update_msi_routes(cnt); +} + +int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route, + int vector, PCIDevice *dev) +{ + static bool notify_list_inited = false; + MSIRouteEntry *entry; + + if (!dev) { + /* These are (possibly) IOAPIC routes only used for split + * kernel irqchip mode, while what we are housekeeping are + * PCI devices only. */ + return 0; + } + + entry = g_new0(MSIRouteEntry, 1); + entry->dev = dev; + entry->vector = vector; + entry->virq = route->gsi; + QLIST_INSERT_HEAD(&msi_route_list, entry, list); + + trace_kvm_x86_add_msi_route(route->gsi); + + if (!notify_list_inited) { + /* For the first time we do add route, add ourselves into + * IOMMU's IEC notify list if needed. */ + X86IOMMUState *iommu = x86_iommu_get_default(); + if (iommu) { + x86_iommu_iec_register_notifier(iommu, + kvm_update_msi_routes_all, + NULL); + } + notify_list_inited = true; + } + return 0; +} + +int kvm_arch_release_virq_post(int virq) +{ + MSIRouteEntry *entry, *next; + QLIST_FOREACH_SAFE(entry, &msi_route_list, list, next) { + if (entry->virq == virq) { + trace_kvm_x86_remove_msi_route(virq); + QLIST_REMOVE(entry, list); + g_free(entry); + break; + } + } + return 0; +} + +int kvm_arch_msi_data_to_gsi(uint32_t data) +{ + abort(); +} + +bool kvm_has_waitpkg(void) +{ + return has_msr_umwait; +} + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return !sev_es_enabled(); +} + +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025 + +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask) +{ + KVMState *s = kvm_state; + uint64_t supported; + + mask &= XSTATE_DYNAMIC_MASK; + if (!mask) { + return; + } + /* + * Just ignore bits that are not in CPUID[EAX=0xD,ECX=0]. + * ARCH_REQ_XCOMP_GUEST_PERM would fail, and QEMU has warned + * about them already because they are not supported features. + */ + supported = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX); + supported |= (uint64_t)kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EDX) << 32; + mask &= supported; + + while (mask) { + int bit = ctz64(mask); + int rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, bit); + if (rc) { + /* + * Older kernel version (<5.17) do not support + * ARCH_REQ_XCOMP_GUEST_PERM, but also do not return + * any dynamic feature from kvm_arch_get_supported_cpuid. + */ + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure " + "for feature bit %d", bit); + } + mask &= ~BIT_ULL(bit); + } +} + +static int kvm_arch_get_notify_vmexit(Object *obj, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + return s->notify_vmexit; +} + +static void kvm_arch_set_notify_vmexit(Object *obj, int value, Error **errp) +{ + KVMState *s = KVM_STATE(obj); + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + s->notify_vmexit = value; +} + +static void kvm_arch_get_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->notify_window; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_arch_set_notify_window(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value; + + if (s->fd != -1) { + error_setg(errp, "Cannot set properties after the accelerator has been initialized"); + return; + } + + if (!visit_type_uint32(v, name, &value, errp)) { + return; + } + + s->notify_window = value; +} + +static void kvm_arch_get_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint32_t value = s->xen_version; + + visit_type_uint32(v, name, &value, errp); +} + +static void kvm_arch_set_xen_version(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint32_t value; + + visit_type_uint32(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->xen_version = value; + if (value && xen_mode == XEN_DISABLED) { + xen_mode = XEN_EMULATE; + } +} + +static void kvm_arch_get_xen_gnttab_max_frames(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint16_t value = s->xen_gnttab_max_frames; + + visit_type_uint16(v, name, &value, errp); +} + +static void kvm_arch_set_xen_gnttab_max_frames(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint16_t value; + + visit_type_uint16(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->xen_gnttab_max_frames = value; +} + +static void kvm_arch_get_xen_evtchn_max_pirq(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + uint16_t value = s->xen_evtchn_max_pirq; + + visit_type_uint16(v, name, &value, errp); +} + +static void kvm_arch_set_xen_evtchn_max_pirq(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + KVMState *s = KVM_STATE(obj); + Error *error = NULL; + uint16_t value; + + visit_type_uint16(v, name, &value, &error); + if (error) { + error_propagate(errp, error); + return; + } + + s->xen_evtchn_max_pirq = value; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ + object_class_property_add_enum(oc, "notify-vmexit", "NotifyVMexitOption", + &NotifyVmexitOption_lookup, + kvm_arch_get_notify_vmexit, + kvm_arch_set_notify_vmexit); + object_class_property_set_description(oc, "notify-vmexit", + "Enable notify VM exit"); + + object_class_property_add(oc, "notify-window", "uint32", + kvm_arch_get_notify_window, + kvm_arch_set_notify_window, + NULL, NULL); + object_class_property_set_description(oc, "notify-window", + "Clock cycles without an event window " + "after which a notification VM exit occurs"); + + object_class_property_add(oc, "xen-version", "uint32", + kvm_arch_get_xen_version, + kvm_arch_set_xen_version, + NULL, NULL); + object_class_property_set_description(oc, "xen-version", + "Xen version to be emulated " + "(in XENVER_version form " + "e.g. 0x4000a for 4.10)"); + + object_class_property_add(oc, "xen-gnttab-max-frames", "uint16", + kvm_arch_get_xen_gnttab_max_frames, + kvm_arch_set_xen_gnttab_max_frames, + NULL, NULL); + object_class_property_set_description(oc, "xen-gnttab-max-frames", + "Maximum number of grant table frames"); + + object_class_property_add(oc, "xen-evtchn-max-pirq", "uint16", + kvm_arch_get_xen_evtchn_max_pirq, + kvm_arch_set_xen_evtchn_max_pirq, + NULL, NULL); + object_class_property_set_description(oc, "xen-evtchn-max-pirq", + "Maximum number of Xen PIRQs"); +} + +void kvm_set_max_apic_id(uint32_t max_apic_id) +{ + kvm_vm_enable_cap(kvm_state, KVM_CAP_MAX_VCPU_ID, 0, max_apic_id); +} diff --git a/target/i386/kvm/kvm_i386.h b/target/i386/kvm/kvm_i386.h new file mode 100644 index 0000000000..30fedcffea --- /dev/null +++ b/target/i386/kvm/kvm_i386.h @@ -0,0 +1,81 @@ +/* + * QEMU KVM support -- x86 specific functions. + * + * Copyright (c) 2012 Linaro Limited + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_KVM_I386_H +#define QEMU_KVM_I386_H + +#include "sysemu/kvm.h" + +#ifdef CONFIG_KVM + +#define kvm_pit_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) +#define kvm_pic_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) +#define kvm_ioapic_in_kernel() \ + (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) + +#else + +#define kvm_pit_in_kernel() 0 +#define kvm_pic_in_kernel() 0 +#define kvm_ioapic_in_kernel() 0 + +#endif /* CONFIG_KVM */ + +bool kvm_has_smm(void); +bool kvm_enable_x2apic(void); +bool kvm_hv_vpindex_settable(void); + +bool kvm_enable_sgx_provisioning(KVMState *s); +bool kvm_hyperv_expand_features(X86CPU *cpu, Error **errp); + +void kvm_arch_reset_vcpu(X86CPU *cs); +void kvm_arch_after_reset_vcpu(X86CPU *cpu); +void kvm_arch_do_init_vcpu(X86CPU *cs); +uint32_t kvm_arch_get_supported_cpuid(KVMState *env, uint32_t function, + uint32_t index, int reg); +uint64_t kvm_arch_get_supported_msr_feature(KVMState *s, uint32_t index); + +void kvm_set_max_apic_id(uint32_t max_apic_id); +void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask); + +#ifdef CONFIG_KVM + +bool kvm_has_adjust_clock_stable(void); +bool kvm_has_exception_payload(void); +void kvm_synchronize_all_tsc(void); + +void kvm_get_apic_state(DeviceState *d, struct kvm_lapic_state *kapic); +void kvm_put_apicbase(X86CPU *cpu, uint64_t value); + +bool kvm_has_x2apic_api(void); +bool kvm_has_waitpkg(void); + +uint64_t kvm_swizzle_msi_ext_dest_id(uint64_t address); +void kvm_update_msi_routes_all(void *private, bool global, + uint32_t index, uint32_t mask); + +typedef bool QEMURDMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t *val); +typedef bool QEMUWRMSRHandler(X86CPU *cpu, uint32_t msr, uint64_t val); +typedef struct kvm_msr_handlers { + uint32_t msr; + QEMURDMSRHandler *rdmsr; + QEMUWRMSRHandler *wrmsr; +} KVMMSRHandlers; + +bool kvm_filter_msr(KVMState *s, uint32_t msr, QEMURDMSRHandler *rdmsr, + QEMUWRMSRHandler *wrmsr); + +#endif /* CONFIG_KVM */ + +void kvm_pc_setup_irq_routing(bool pci_enabled); + +#endif diff --git a/target/i386/kvm/meson.build b/target/i386/kvm/meson.build new file mode 100644 index 0000000000..84d9143e60 --- /dev/null +++ b/target/i386/kvm/meson.build @@ -0,0 +1,14 @@ +i386_kvm_ss = ss.source_set() + +i386_kvm_ss.add(files( + 'kvm.c', + 'kvm-cpu.c', +)) + +i386_kvm_ss.add(when: 'CONFIG_XEN_EMU', if_true: files('xen-emu.c')) + +i386_kvm_ss.add(when: 'CONFIG_SEV', if_false: files('sev-stub.c')) + +i386_system_ss.add(when: 'CONFIG_HYPERV', if_true: files('hyperv.c'), if_false: files('hyperv-stub.c')) + +i386_system_ss.add_all(when: 'CONFIG_KVM', if_true: i386_kvm_ss) diff --git a/target/i386/kvm/sev-stub.c b/target/i386/kvm/sev-stub.c new file mode 100644 index 0000000000..1be5341e8a --- /dev/null +++ b/target/i386/kvm/sev-stub.c @@ -0,0 +1,21 @@ +/* + * QEMU SEV stub + * + * Copyright Advanced Micro Devices 2018 + * + * Authors: + * Brijesh Singh <brijesh.singh@amd.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sev.h" + +int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) +{ + /* If we get here, cgs must be some non-SEV thing */ + return 0; +} diff --git a/target/i386/kvm/trace-events b/target/i386/kvm/trace-events new file mode 100644 index 0000000000..b365a8e8e2 --- /dev/null +++ b/target/i386/kvm/trace-events @@ -0,0 +1,14 @@ +# See docs/devel/tracing.rst for syntax documentation. + +# kvm.c +kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 +kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" +kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" +kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" + +# xen-emu.c +kvm_xen_hypercall(int cpu, uint8_t cpl, uint64_t input, uint64_t a0, uint64_t a1, uint64_t a2, uint64_t ret) "xen_hypercall: cpu %d cpl %d input %" PRIu64 " a0 0x%" PRIx64 " a1 0x%" PRIx64 " a2 0x%" PRIx64" ret 0x%" PRIx64 +kvm_xen_soft_reset(void) "" +kvm_xen_set_shared_info(uint64_t gfn) "shared info at gfn 0x%" PRIx64 +kvm_xen_set_vcpu_attr(int cpu, int type, uint64_t gpa) "vcpu attr cpu %d type %d gpa 0x%" PRIx64 +kvm_xen_set_vcpu_callback(int cpu, int vector) "callback vcpu %d vector %d" diff --git a/target/i386/kvm/trace.h b/target/i386/kvm/trace.h new file mode 100644 index 0000000000..46b75c6942 --- /dev/null +++ b/target/i386/kvm/trace.h @@ -0,0 +1 @@ +#include "trace/trace-target_i386_kvm.h" diff --git a/target/i386/kvm/xen-compat.h b/target/i386/kvm/xen-compat.h new file mode 100644 index 0000000000..7f30180cc2 --- /dev/null +++ b/target/i386/kvm/xen-compat.h @@ -0,0 +1,70 @@ +/* + * Xen HVM emulation support in KVM + * + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_I386_KVM_XEN_COMPAT_H +#define QEMU_I386_KVM_XEN_COMPAT_H + +#include "hw/xen/interface/memory.h" + +typedef uint32_t compat_pfn_t; +typedef uint32_t compat_ulong_t; +typedef uint32_t compat_ptr_t; + +#define __DEFINE_COMPAT_HANDLE(name, type) \ + typedef struct { \ + compat_ptr_t c; \ + type *_[0] __attribute__((packed)); \ + } __compat_handle_ ## name; \ + +#define DEFINE_COMPAT_HANDLE(name) __DEFINE_COMPAT_HANDLE(name, name) +#define COMPAT_HANDLE(name) __compat_handle_ ## name + +DEFINE_COMPAT_HANDLE(compat_pfn_t); +DEFINE_COMPAT_HANDLE(compat_ulong_t); +DEFINE_COMPAT_HANDLE(int); + +struct compat_xen_add_to_physmap { + domid_t domid; + uint16_t size; + unsigned int space; + compat_ulong_t idx; + compat_pfn_t gpfn; +}; + +struct compat_xen_add_to_physmap_batch { + domid_t domid; + uint16_t space; + uint16_t size; + uint16_t extra; + COMPAT_HANDLE(compat_ulong_t) idxs; + COMPAT_HANDLE(compat_pfn_t) gpfns; + COMPAT_HANDLE(int) errs; +}; + +struct compat_physdev_map_pirq { + domid_t domid; + uint16_t pad; + /* IN */ + int type; + /* IN (ignored for ..._MULTI_MSI) */ + int index; + /* IN or OUT */ + int pirq; + /* IN - high 16 bits hold segment for ..._MSI_SEG and ..._MULTI_MSI */ + int bus; + /* IN */ + int devfn; + /* IN (also OUT for ..._MULTI_MSI) */ + int entry_nr; + /* IN */ + uint64_t table_base; +} __attribute__((packed)); + +#endif /* QEMU_I386_XEN_COMPAT_H */ diff --git a/target/i386/kvm/xen-emu.c b/target/i386/kvm/xen-emu.c new file mode 100644 index 0000000000..fc2c2321ac --- /dev/null +++ b/target/i386/kvm/xen-emu.c @@ -0,0 +1,1945 @@ +/* + * Xen HVM emulation support in KVM + * + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "hw/xen/xen.h" +#include "sysemu/kvm_int.h" +#include "sysemu/kvm_xen.h" +#include "kvm/kvm_i386.h" +#include "exec/address-spaces.h" +#include "xen-emu.h" +#include "trace.h" +#include "sysemu/runstate.h" + +#include "hw/pci/msi.h" +#include "hw/i386/apic-msidef.h" +#include "hw/i386/e820_memory_layout.h" +#include "hw/i386/kvm/xen_overlay.h" +#include "hw/i386/kvm/xen_evtchn.h" +#include "hw/i386/kvm/xen_gnttab.h" +#include "hw/i386/kvm/xen_primary_console.h" +#include "hw/i386/kvm/xen_xenstore.h" + +#include "hw/xen/interface/version.h" +#include "hw/xen/interface/sched.h" +#include "hw/xen/interface/memory.h" +#include "hw/xen/interface/hvm/hvm_op.h" +#include "hw/xen/interface/hvm/params.h" +#include "hw/xen/interface/vcpu.h" +#include "hw/xen/interface/event_channel.h" +#include "hw/xen/interface/grant_table.h" + +#include "xen-compat.h" + +static void xen_vcpu_singleshot_timer_event(void *opaque); +static void xen_vcpu_periodic_timer_event(void *opaque); +static int vcpuop_stop_singleshot_timer(CPUState *cs); + +#ifdef TARGET_X86_64 +#define hypercall_compat32(longmode) (!(longmode)) +#else +#define hypercall_compat32(longmode) (false) +#endif + +static bool kvm_gva_to_gpa(CPUState *cs, uint64_t gva, uint64_t *gpa, + size_t *len, bool is_write) +{ + struct kvm_translation tr = { + .linear_address = gva, + }; + + if (len) { + *len = TARGET_PAGE_SIZE - (gva & ~TARGET_PAGE_MASK); + } + + if (kvm_vcpu_ioctl(cs, KVM_TRANSLATE, &tr) || !tr.valid || + (is_write && !tr.writeable)) { + return false; + } + *gpa = tr.physical_address; + return true; +} + +static int kvm_gva_rw(CPUState *cs, uint64_t gva, void *_buf, size_t sz, + bool is_write) +{ + uint8_t *buf = (uint8_t *)_buf; + uint64_t gpa; + size_t len; + + while (sz) { + if (!kvm_gva_to_gpa(cs, gva, &gpa, &len, is_write)) { + return -EFAULT; + } + if (len > sz) { + len = sz; + } + + cpu_physical_memory_rw(gpa, buf, len, is_write); + + buf += len; + sz -= len; + gva += len; + } + + return 0; +} + +static inline int kvm_copy_from_gva(CPUState *cs, uint64_t gva, void *buf, + size_t sz) +{ + return kvm_gva_rw(cs, gva, buf, sz, false); +} + +static inline int kvm_copy_to_gva(CPUState *cs, uint64_t gva, void *buf, + size_t sz) +{ + return kvm_gva_rw(cs, gva, buf, sz, true); +} + +int kvm_xen_init(KVMState *s, uint32_t hypercall_msr) +{ + const int required_caps = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR | + KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL | KVM_XEN_HVM_CONFIG_SHARED_INFO; + struct kvm_xen_hvm_config cfg = { + .msr = hypercall_msr, + .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL, + }; + int xen_caps, ret; + + xen_caps = kvm_check_extension(s, KVM_CAP_XEN_HVM); + if (required_caps & ~xen_caps) { + error_report("kvm: Xen HVM guest support not present or insufficient"); + return -ENOSYS; + } + + if (xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND) { + struct kvm_xen_hvm_attr ha = { + .type = KVM_XEN_ATTR_TYPE_XEN_VERSION, + .u.xen_version = s->xen_version, + }; + (void)kvm_vm_ioctl(s, KVM_XEN_HVM_SET_ATTR, &ha); + + cfg.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND; + } + + ret = kvm_vm_ioctl(s, KVM_XEN_HVM_CONFIG, &cfg); + if (ret < 0) { + error_report("kvm: Failed to enable Xen HVM support: %s", + strerror(-ret)); + return ret; + } + + /* If called a second time, don't repeat the rest of the setup. */ + if (s->xen_caps) { + return 0; + } + + /* + * Event channel delivery via GSI/PCI_INTX needs to poll the vcpu_info + * of vCPU0 to deassert the IRQ when ->evtchn_upcall_pending is cleared. + * + * In the kernel, there's a notifier hook on the PIC/IOAPIC which allows + * such things to be polled at precisely the right time. We *could* do + * it nicely in the kernel: check vcpu_info[0]->evtchn_upcall_pending at + * the moment the IRQ is acked, and see if it should be reasserted. + * + * But the in-kernel irqchip is deprecated, so we're unlikely to add + * that support in the kernel. Insist on using the split irqchip mode + * instead. + * + * This leaves us polling for the level going low in QEMU, which lacks + * the appropriate hooks in its PIC/IOAPIC code. Even VFIO is sending a + * spurious 'ack' to an INTX IRQ every time there's any MMIO access to + * the device (for which it has to unmap the device and trap access, for + * some period after an IRQ!!). In the Xen case, we do it on exit from + * KVM_RUN, if the flag is set to say that the GSI is currently asserted. + * Which is kind of icky, but less so than the VFIO one. I may fix them + * both later... + */ + if (!kvm_kernel_irqchip_split()) { + error_report("kvm: Xen support requires kernel-irqchip=split"); + return -EINVAL; + } + + s->xen_caps = xen_caps; + + /* Tell fw_cfg to notify the BIOS to reserve the range. */ + ret = e820_add_entry(XEN_SPECIAL_AREA_ADDR, XEN_SPECIAL_AREA_SIZE, + E820_RESERVED); + if (ret < 0) { + fprintf(stderr, "e820_add_entry() table is full\n"); + return ret; + } + + /* The pages couldn't be overlaid until KVM was initialized */ + xen_primary_console_reset(); + xen_xenstore_reset(); + + return 0; +} + +int kvm_xen_init_vcpu(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int err; + + /* + * The kernel needs to know the Xen/ACPI vCPU ID because that's + * what the guest uses in hypercalls such as timers. It doesn't + * match the APIC ID which is generally used for talking to the + * kernel about vCPUs. And if vCPU threads race with creating + * their KVM vCPUs out of order, it doesn't necessarily match + * with the kernel's internal vCPU indices either. + */ + if (kvm_xen_has_cap(EVTCHN_SEND)) { + struct kvm_xen_vcpu_attr va = { + .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID, + .u.vcpu_id = cs->cpu_index, + }; + err = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va); + if (err) { + error_report("kvm: Failed to set Xen vCPU ID attribute: %s", + strerror(-err)); + return err; + } + } + + env->xen_vcpu_info_gpa = INVALID_GPA; + env->xen_vcpu_info_default_gpa = INVALID_GPA; + env->xen_vcpu_time_info_gpa = INVALID_GPA; + env->xen_vcpu_runstate_gpa = INVALID_GPA; + + qemu_mutex_init(&env->xen_timers_lock); + env->xen_singleshot_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + xen_vcpu_singleshot_timer_event, + cpu); + if (!env->xen_singleshot_timer) { + return -ENOMEM; + } + env->xen_singleshot_timer->opaque = cs; + + env->xen_periodic_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, + xen_vcpu_periodic_timer_event, + cpu); + if (!env->xen_periodic_timer) { + return -ENOMEM; + } + env->xen_periodic_timer->opaque = cs; + + return 0; +} + +uint32_t kvm_xen_get_caps(void) +{ + return kvm_state->xen_caps; +} + +static bool kvm_xen_hcall_xen_version(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + int err = 0; + + switch (cmd) { + case XENVER_get_features: { + struct xen_feature_info fi; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(fi) == 8); + + err = kvm_copy_from_gva(CPU(cpu), arg, &fi, sizeof(fi)); + if (err) { + break; + } + + fi.submap = 0; + if (fi.submap_idx == 0) { + fi.submap |= 1 << XENFEAT_writable_page_tables | + 1 << XENFEAT_writable_descriptor_tables | + 1 << XENFEAT_auto_translated_physmap | + 1 << XENFEAT_hvm_callback_vector | + 1 << XENFEAT_hvm_safe_pvclock | + 1 << XENFEAT_hvm_pirqs; + } + + err = kvm_copy_to_gva(CPU(cpu), arg, &fi, sizeof(fi)); + break; + } + + default: + return false; + } + + exit->u.hcall.result = err; + return true; +} + +static int kvm_xen_set_vcpu_attr(CPUState *cs, uint16_t type, uint64_t gpa) +{ + struct kvm_xen_vcpu_attr xhsi; + + xhsi.type = type; + xhsi.u.gpa = gpa; + + trace_kvm_xen_set_vcpu_attr(cs->cpu_index, type, gpa); + + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xhsi); +} + +static int kvm_xen_set_vcpu_callback_vector(CPUState *cs) +{ + uint8_t vector = X86_CPU(cs)->env.xen_vcpu_callback_vector; + struct kvm_xen_vcpu_attr xva; + + xva.type = KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR; + xva.u.vector = vector; + + trace_kvm_xen_set_vcpu_callback(cs->cpu_index, vector); + + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &xva); +} + +static void do_set_vcpu_callback_vector(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_callback_vector = data.host_int; + + if (kvm_xen_has_cap(EVTCHN_SEND)) { + kvm_xen_set_vcpu_callback_vector(cs); + } +} + +static int set_vcpu_info(CPUState *cs, uint64_t gpa) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + MemoryRegionSection mrs = { .mr = NULL }; + void *vcpu_info_hva = NULL; + int ret; + + ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO, gpa); + if (ret || gpa == INVALID_GPA) { + goto out; + } + + mrs = memory_region_find(get_system_memory(), gpa, + sizeof(struct vcpu_info)); + if (mrs.mr && mrs.mr->ram_block && + !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) { + vcpu_info_hva = qemu_map_ram_ptr(mrs.mr->ram_block, + mrs.offset_within_region); + } + if (!vcpu_info_hva) { + if (mrs.mr) { + memory_region_unref(mrs.mr); + mrs.mr = NULL; + } + ret = -EINVAL; + } + + out: + if (env->xen_vcpu_info_mr) { + memory_region_unref(env->xen_vcpu_info_mr); + } + env->xen_vcpu_info_hva = vcpu_info_hva; + env->xen_vcpu_info_mr = mrs.mr; + return ret; +} + +static void do_set_vcpu_info_default_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_info_default_gpa = data.host_ulong; + + /* Changing the default does nothing if a vcpu_info was explicitly set. */ + if (env->xen_vcpu_info_gpa == INVALID_GPA) { + set_vcpu_info(cs, env->xen_vcpu_info_default_gpa); + } +} + +static void do_set_vcpu_info_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_info_gpa = data.host_ulong; + + set_vcpu_info(cs, env->xen_vcpu_info_gpa); +} + +void *kvm_xen_get_vcpu_info_hva(uint32_t vcpu_id) +{ + CPUState *cs = qemu_get_cpu(vcpu_id); + if (!cs) { + return NULL; + } + + return X86_CPU(cs)->env.xen_vcpu_info_hva; +} + +void kvm_xen_maybe_deassert_callback(CPUState *cs) +{ + CPUX86State *env = &X86_CPU(cs)->env; + struct vcpu_info *vi = env->xen_vcpu_info_hva; + if (!vi) { + return; + } + + /* If the evtchn_upcall_pending flag is cleared, turn the GSI off. */ + if (!vi->evtchn_upcall_pending) { + bql_lock(); + /* + * Check again now we have the lock, because it may have been + * asserted in the interim. And we don't want to take the lock + * every time because this is a fast path. + */ + if (!vi->evtchn_upcall_pending) { + X86_CPU(cs)->env.xen_callback_asserted = false; + xen_evtchn_set_callback_level(0); + } + bql_unlock(); + } +} + +void kvm_xen_set_callback_asserted(void) +{ + CPUState *cs = qemu_get_cpu(0); + + if (cs) { + X86_CPU(cs)->env.xen_callback_asserted = true; + } +} + +bool kvm_xen_has_vcpu_callback_vector(void) +{ + CPUState *cs = qemu_get_cpu(0); + + return cs && !!X86_CPU(cs)->env.xen_vcpu_callback_vector; +} + +void kvm_xen_inject_vcpu_callback_vector(uint32_t vcpu_id, int type) +{ + CPUState *cs = qemu_get_cpu(vcpu_id); + uint8_t vector; + + if (!cs) { + return; + } + + vector = X86_CPU(cs)->env.xen_vcpu_callback_vector; + if (vector) { + /* + * The per-vCPU callback vector injected via lapic. Just + * deliver it as an MSI. + */ + MSIMessage msg = { + .address = APIC_DEFAULT_ADDRESS | + (X86_CPU(cs)->apic_id << MSI_ADDR_DEST_ID_SHIFT), + .data = vector | (1UL << MSI_DATA_LEVEL_SHIFT), + }; + kvm_irqchip_send_msi(kvm_state, msg); + return; + } + + switch (type) { + case HVM_PARAM_CALLBACK_TYPE_VECTOR: + /* + * If the evtchn_upcall_pending field in the vcpu_info is set, then + * KVM will automatically deliver the vector on entering the vCPU + * so all we have to do is kick it out. + */ + qemu_cpu_kick(cs); + break; + + case HVM_PARAM_CALLBACK_TYPE_GSI: + case HVM_PARAM_CALLBACK_TYPE_PCI_INTX: + if (vcpu_id == 0) { + xen_evtchn_set_callback_level(1); + } + break; + } +} + +/* Must always be called with xen_timers_lock held */ +static int kvm_xen_set_vcpu_timer(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + struct kvm_xen_vcpu_attr va = { + .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, + .u.timer.port = env->xen_virq[VIRQ_TIMER], + .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL, + .u.timer.expires_ns = env->xen_singleshot_timer_ns, + }; + + return kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_SET_ATTR, &va); +} + +static void do_set_vcpu_timer_virq(CPUState *cs, run_on_cpu_data data) +{ + QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); + kvm_xen_set_vcpu_timer(cs); +} + +int kvm_xen_set_vcpu_virq(uint32_t vcpu_id, uint16_t virq, uint16_t port) +{ + CPUState *cs = qemu_get_cpu(vcpu_id); + + if (!cs) { + return -ENOENT; + } + + /* cpu.h doesn't include the actual Xen header. */ + qemu_build_assert(NR_VIRQS == XEN_NR_VIRQS); + + if (virq >= NR_VIRQS) { + return -EINVAL; + } + + if (port && X86_CPU(cs)->env.xen_virq[virq]) { + return -EEXIST; + } + + X86_CPU(cs)->env.xen_virq[virq] = port; + if (virq == VIRQ_TIMER && kvm_xen_has_cap(EVTCHN_SEND)) { + async_run_on_cpu(cs, do_set_vcpu_timer_virq, + RUN_ON_CPU_HOST_INT(port)); + } + return 0; +} + +static void do_set_vcpu_time_info_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_time_info_gpa = data.host_ulong; + + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, + env->xen_vcpu_time_info_gpa); +} + +static void do_set_vcpu_runstate_gpa(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_runstate_gpa = data.host_ulong; + + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + env->xen_vcpu_runstate_gpa); +} + +static void do_vcpu_soft_reset(CPUState *cs, run_on_cpu_data data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->xen_vcpu_info_gpa = INVALID_GPA; + env->xen_vcpu_info_default_gpa = INVALID_GPA; + env->xen_vcpu_time_info_gpa = INVALID_GPA; + env->xen_vcpu_runstate_gpa = INVALID_GPA; + env->xen_vcpu_callback_vector = 0; + memset(env->xen_virq, 0, sizeof(env->xen_virq)); + + set_vcpu_info(cs, INVALID_GPA); + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, + INVALID_GPA); + kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + INVALID_GPA); + if (kvm_xen_has_cap(EVTCHN_SEND)) { + kvm_xen_set_vcpu_callback_vector(cs); + + QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); + env->xen_singleshot_timer_ns = 0; + kvm_xen_set_vcpu_timer(cs); + } else { + vcpuop_stop_singleshot_timer(cs); + }; + +} + +static int xen_set_shared_info(uint64_t gfn) +{ + uint64_t gpa = gfn << TARGET_PAGE_BITS; + int i, err; + + BQL_LOCK_GUARD(); + + /* + * The xen_overlay device tells KVM about it too, since it had to + * do that on migration load anyway (unless we're going to jump + * through lots of hoops to maintain the fiction that this isn't + * KVM-specific. + */ + err = xen_overlay_map_shinfo_page(gpa); + if (err) { + return err; + } + + trace_kvm_xen_set_shared_info(gfn); + + for (i = 0; i < XEN_LEGACY_MAX_VCPUS; i++) { + CPUState *cpu = qemu_get_cpu(i); + if (cpu) { + async_run_on_cpu(cpu, do_set_vcpu_info_default_gpa, + RUN_ON_CPU_HOST_ULONG(gpa)); + } + gpa += sizeof(vcpu_info_t); + } + + return err; +} + +static int add_to_physmap_one(uint32_t space, uint64_t idx, uint64_t gfn) +{ + switch (space) { + case XENMAPSPACE_shared_info: + if (idx > 0) { + return -EINVAL; + } + return xen_set_shared_info(gfn); + + case XENMAPSPACE_grant_table: + return xen_gnttab_map_page(idx, gfn); + + case XENMAPSPACE_gmfn: + case XENMAPSPACE_gmfn_range: + return -ENOTSUP; + + case XENMAPSPACE_gmfn_foreign: + case XENMAPSPACE_dev_mmio: + return -EPERM; + + default: + return -EINVAL; + } +} + +static int do_add_to_physmap(struct kvm_xen_exit *exit, X86CPU *cpu, + uint64_t arg) +{ + struct xen_add_to_physmap xatp; + CPUState *cs = CPU(cpu); + + if (hypercall_compat32(exit->u.hcall.longmode)) { + struct compat_xen_add_to_physmap xatp32; + + qemu_build_assert(sizeof(struct compat_xen_add_to_physmap) == 16); + if (kvm_copy_from_gva(cs, arg, &xatp32, sizeof(xatp32))) { + return -EFAULT; + } + xatp.domid = xatp32.domid; + xatp.size = xatp32.size; + xatp.space = xatp32.space; + xatp.idx = xatp32.idx; + xatp.gpfn = xatp32.gpfn; + } else { + if (kvm_copy_from_gva(cs, arg, &xatp, sizeof(xatp))) { + return -EFAULT; + } + } + + if (xatp.domid != DOMID_SELF && xatp.domid != xen_domid) { + return -ESRCH; + } + + return add_to_physmap_one(xatp.space, xatp.idx, xatp.gpfn); +} + +static int do_add_to_physmap_batch(struct kvm_xen_exit *exit, X86CPU *cpu, + uint64_t arg) +{ + struct xen_add_to_physmap_batch xatpb; + unsigned long idxs_gva, gpfns_gva, errs_gva; + CPUState *cs = CPU(cpu); + size_t op_sz; + + if (hypercall_compat32(exit->u.hcall.longmode)) { + struct compat_xen_add_to_physmap_batch xatpb32; + + qemu_build_assert(sizeof(struct compat_xen_add_to_physmap_batch) == 20); + if (kvm_copy_from_gva(cs, arg, &xatpb32, sizeof(xatpb32))) { + return -EFAULT; + } + xatpb.domid = xatpb32.domid; + xatpb.space = xatpb32.space; + xatpb.size = xatpb32.size; + + idxs_gva = xatpb32.idxs.c; + gpfns_gva = xatpb32.gpfns.c; + errs_gva = xatpb32.errs.c; + op_sz = sizeof(uint32_t); + } else { + if (kvm_copy_from_gva(cs, arg, &xatpb, sizeof(xatpb))) { + return -EFAULT; + } + op_sz = sizeof(unsigned long); + idxs_gva = (unsigned long)xatpb.idxs.p; + gpfns_gva = (unsigned long)xatpb.gpfns.p; + errs_gva = (unsigned long)xatpb.errs.p; + } + + if (xatpb.domid != DOMID_SELF && xatpb.domid != xen_domid) { + return -ESRCH; + } + + /* Explicitly invalid for the batch op. Not that we implement it anyway. */ + if (xatpb.space == XENMAPSPACE_gmfn_range) { + return -EINVAL; + } + + while (xatpb.size--) { + unsigned long idx = 0; + unsigned long gpfn = 0; + int err; + + /* For 32-bit compat this only copies the low 32 bits of each */ + if (kvm_copy_from_gva(cs, idxs_gva, &idx, op_sz) || + kvm_copy_from_gva(cs, gpfns_gva, &gpfn, op_sz)) { + return -EFAULT; + } + idxs_gva += op_sz; + gpfns_gva += op_sz; + + err = add_to_physmap_one(xatpb.space, idx, gpfn); + + if (kvm_copy_to_gva(cs, errs_gva, &err, sizeof(err))) { + return -EFAULT; + } + errs_gva += sizeof(err); + } + return 0; +} + +static bool kvm_xen_hcall_memory_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + int err; + + switch (cmd) { + case XENMEM_add_to_physmap: + err = do_add_to_physmap(exit, cpu, arg); + break; + + case XENMEM_add_to_physmap_batch: + err = do_add_to_physmap_batch(exit, cpu, arg); + break; + + default: + return false; + } + + exit->u.hcall.result = err; + return true; +} + +static bool handle_set_param(struct kvm_xen_exit *exit, X86CPU *cpu, + uint64_t arg) +{ + CPUState *cs = CPU(cpu); + struct xen_hvm_param hp; + int err = 0; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(hp) == 16); + + if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) { + err = -EFAULT; + goto out; + } + + if (hp.domid != DOMID_SELF && hp.domid != xen_domid) { + err = -ESRCH; + goto out; + } + + switch (hp.index) { + case HVM_PARAM_CALLBACK_IRQ: + bql_lock(); + err = xen_evtchn_set_callback_param(hp.value); + bql_unlock(); + xen_set_long_mode(exit->u.hcall.longmode); + break; + default: + return false; + } + +out: + exit->u.hcall.result = err; + return true; +} + +static bool handle_get_param(struct kvm_xen_exit *exit, X86CPU *cpu, + uint64_t arg) +{ + CPUState *cs = CPU(cpu); + struct xen_hvm_param hp; + int err = 0; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(hp) == 16); + + if (kvm_copy_from_gva(cs, arg, &hp, sizeof(hp))) { + err = -EFAULT; + goto out; + } + + if (hp.domid != DOMID_SELF && hp.domid != xen_domid) { + err = -ESRCH; + goto out; + } + + switch (hp.index) { + case HVM_PARAM_STORE_PFN: + hp.value = XEN_SPECIAL_PFN(XENSTORE); + break; + case HVM_PARAM_STORE_EVTCHN: + hp.value = xen_xenstore_get_port(); + break; + case HVM_PARAM_CONSOLE_PFN: + hp.value = xen_primary_console_get_pfn(); + if (!hp.value) { + err = -EINVAL; + } + break; + case HVM_PARAM_CONSOLE_EVTCHN: + hp.value = xen_primary_console_get_port(); + if (!hp.value) { + err = -EINVAL; + } + break; + default: + return false; + } + + if (!err && kvm_copy_to_gva(cs, arg, &hp, sizeof(hp))) { + err = -EFAULT; + } +out: + exit->u.hcall.result = err; + return true; +} + +static int kvm_xen_hcall_evtchn_upcall_vector(struct kvm_xen_exit *exit, + X86CPU *cpu, uint64_t arg) +{ + struct xen_hvm_evtchn_upcall_vector up; + CPUState *target_cs; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(up) == 8); + + if (kvm_copy_from_gva(CPU(cpu), arg, &up, sizeof(up))) { + return -EFAULT; + } + + if (up.vector < 0x10) { + return -EINVAL; + } + + target_cs = qemu_get_cpu(up.vcpu); + if (!target_cs) { + return -EINVAL; + } + + async_run_on_cpu(target_cs, do_set_vcpu_callback_vector, + RUN_ON_CPU_HOST_INT(up.vector)); + return 0; +} + +static bool kvm_xen_hcall_hvm_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + int ret = -ENOSYS; + switch (cmd) { + case HVMOP_set_evtchn_upcall_vector: + ret = kvm_xen_hcall_evtchn_upcall_vector(exit, cpu, arg); + break; + + case HVMOP_pagetable_dying: + ret = -ENOSYS; + break; + + case HVMOP_set_param: + return handle_set_param(exit, cpu, arg); + + case HVMOP_get_param: + return handle_get_param(exit, cpu, arg); + + default: + return false; + } + + exit->u.hcall.result = ret; + return true; +} + +static int vcpuop_register_vcpu_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ + struct vcpu_register_vcpu_info rvi; + uint64_t gpa; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(rvi) == 16); + qemu_build_assert(sizeof(struct vcpu_info) == 64); + + if (!target) { + return -ENOENT; + } + + if (kvm_copy_from_gva(cs, arg, &rvi, sizeof(rvi))) { + return -EFAULT; + } + + if (rvi.offset > TARGET_PAGE_SIZE - sizeof(struct vcpu_info)) { + return -EINVAL; + } + + gpa = ((rvi.mfn << TARGET_PAGE_BITS) + rvi.offset); + async_run_on_cpu(target, do_set_vcpu_info_gpa, RUN_ON_CPU_HOST_ULONG(gpa)); + return 0; +} + +static int vcpuop_register_vcpu_time_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ + struct vcpu_register_time_memory_area tma; + uint64_t gpa; + size_t len; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(tma) == 8); + qemu_build_assert(sizeof(struct vcpu_time_info) == 32); + + if (!target) { + return -ENOENT; + } + + if (kvm_copy_from_gva(cs, arg, &tma, sizeof(tma))) { + return -EFAULT; + } + + /* + * Xen actually uses the GVA and does the translation through the guest + * page tables each time. But Linux/KVM uses the GPA, on the assumption + * that guests only ever use *global* addresses (kernel virtual addresses) + * for it. If Linux is changed to redo the GVA→GPA translation each time, + * it will offer a new vCPU attribute for that, and we'll use it instead. + */ + if (!kvm_gva_to_gpa(cs, tma.addr.p, &gpa, &len, false) || + len < sizeof(struct vcpu_time_info)) { + return -EFAULT; + } + + async_run_on_cpu(target, do_set_vcpu_time_info_gpa, + RUN_ON_CPU_HOST_ULONG(gpa)); + return 0; +} + +static int vcpuop_register_runstate_info(CPUState *cs, CPUState *target, + uint64_t arg) +{ + struct vcpu_register_runstate_memory_area rma; + uint64_t gpa; + size_t len; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(rma) == 8); + /* The runstate area actually does change size, but Linux copes. */ + + if (!target) { + return -ENOENT; + } + + if (kvm_copy_from_gva(cs, arg, &rma, sizeof(rma))) { + return -EFAULT; + } + + /* As with vcpu_time_info, Xen actually uses the GVA but KVM doesn't. */ + if (!kvm_gva_to_gpa(cs, rma.addr.p, &gpa, &len, false)) { + return -EFAULT; + } + + async_run_on_cpu(target, do_set_vcpu_runstate_gpa, + RUN_ON_CPU_HOST_ULONG(gpa)); + return 0; +} + +static uint64_t kvm_get_current_ns(void) +{ + struct kvm_clock_data data; + int ret; + + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + abort(); + } + + return data.clock; +} + +static void xen_vcpu_singleshot_timer_event(void *opaque) +{ + CPUState *cpu = opaque; + CPUX86State *env = &X86_CPU(cpu)->env; + uint16_t port = env->xen_virq[VIRQ_TIMER]; + + if (likely(port)) { + xen_evtchn_set_port(port); + } + + qemu_mutex_lock(&env->xen_timers_lock); + env->xen_singleshot_timer_ns = 0; + qemu_mutex_unlock(&env->xen_timers_lock); +} + +static void xen_vcpu_periodic_timer_event(void *opaque) +{ + CPUState *cpu = opaque; + CPUX86State *env = &X86_CPU(cpu)->env; + uint16_t port = env->xen_virq[VIRQ_TIMER]; + int64_t qemu_now; + + if (likely(port)) { + xen_evtchn_set_port(port); + } + + qemu_mutex_lock(&env->xen_timers_lock); + + qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + timer_mod_ns(env->xen_periodic_timer, + qemu_now + env->xen_periodic_timer_period); + + qemu_mutex_unlock(&env->xen_timers_lock); +} + +static int do_set_periodic_timer(CPUState *target, uint64_t period_ns) +{ + CPUX86State *tenv = &X86_CPU(target)->env; + int64_t qemu_now; + + timer_del(tenv->xen_periodic_timer); + + qemu_mutex_lock(&tenv->xen_timers_lock); + + qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + timer_mod_ns(tenv->xen_periodic_timer, qemu_now + period_ns); + tenv->xen_periodic_timer_period = period_ns; + + qemu_mutex_unlock(&tenv->xen_timers_lock); + return 0; +} + +#define MILLISECS(_ms) ((int64_t)((_ms) * 1000000ULL)) +#define MICROSECS(_us) ((int64_t)((_us) * 1000ULL)) +#define STIME_MAX ((time_t)((int64_t)~0ull >> 1)) +/* Chosen so (NOW() + delta) won't overflow without an uptime of 200 years */ +#define STIME_DELTA_MAX ((int64_t)((uint64_t)~0ull >> 2)) + +static int vcpuop_set_periodic_timer(CPUState *cs, CPUState *target, + uint64_t arg) +{ + struct vcpu_set_periodic_timer spt; + + qemu_build_assert(sizeof(spt) == 8); + if (kvm_copy_from_gva(cs, arg, &spt, sizeof(spt))) { + return -EFAULT; + } + + if (spt.period_ns < MILLISECS(1) || spt.period_ns > STIME_DELTA_MAX) { + return -EINVAL; + } + + return do_set_periodic_timer(target, spt.period_ns); +} + +static int vcpuop_stop_periodic_timer(CPUState *target) +{ + CPUX86State *tenv = &X86_CPU(target)->env; + + qemu_mutex_lock(&tenv->xen_timers_lock); + + timer_del(tenv->xen_periodic_timer); + tenv->xen_periodic_timer_period = 0; + + qemu_mutex_unlock(&tenv->xen_timers_lock); + return 0; +} + +/* + * Userspace handling of timer, for older kernels. + * Must always be called with xen_timers_lock held. + */ +static int do_set_singleshot_timer(CPUState *cs, uint64_t timeout_abs, + bool linux_wa) +{ + CPUX86State *env = &X86_CPU(cs)->env; + int64_t now = kvm_get_current_ns(); + int64_t qemu_now = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + int64_t delta = timeout_abs - now; + + if (linux_wa && unlikely((int64_t)timeout_abs < 0 || + (delta > 0 && (uint32_t)(delta >> 50) != 0))) { + /* + * Xen has a 'Linux workaround' in do_set_timer_op() which checks + * for negative absolute timeout values (caused by integer + * overflow), and for values about 13 days in the future (2^50ns) + * which would be caused by jiffies overflow. For those cases, it + * sets the timeout 100ms in the future (not *too* soon, since if + * a guest really did set a long timeout on purpose we don't want + * to keep churning CPU time by waking it up). + */ + delta = (100 * SCALE_MS); + timeout_abs = now + delta; + } + + timer_mod_ns(env->xen_singleshot_timer, qemu_now + delta); + env->xen_singleshot_timer_ns = now + delta; + return 0; +} + +static int vcpuop_set_singleshot_timer(CPUState *cs, uint64_t arg) +{ + struct vcpu_set_singleshot_timer sst = { 0 }; + + /* + * The struct is a uint64_t followed by a uint32_t. On 32-bit that + * makes it 12 bytes. On 64-bit it gets padded to 16. The parts + * that get used are identical, and there's four bytes of padding + * unused at the end. For true Xen compatibility we should attempt + * to copy the full 16 bytes from 64-bit guests, and return -EFAULT + * if we can't get the padding too. But that's daft. Just copy what + * we need. + */ + qemu_build_assert(offsetof(struct vcpu_set_singleshot_timer, flags) == 8); + qemu_build_assert(sizeof(sst) >= 12); + + if (kvm_copy_from_gva(cs, arg, &sst, 12)) { + return -EFAULT; + } + + QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); + + /* + * We ignore the VCPU_SSHOTTMR_future flag, just as Xen now does. + * The only guest that ever used it, got it wrong. + * https://xenbits.xen.org/gitweb/?p=xen.git;a=commitdiff;h=19c6cbd909 + */ + return do_set_singleshot_timer(cs, sst.timeout_abs_ns, false); +} + +static int vcpuop_stop_singleshot_timer(CPUState *cs) +{ + CPUX86State *env = &X86_CPU(cs)->env; + + qemu_mutex_lock(&env->xen_timers_lock); + + timer_del(env->xen_singleshot_timer); + env->xen_singleshot_timer_ns = 0; + + qemu_mutex_unlock(&env->xen_timers_lock); + return 0; +} + +static bool kvm_xen_hcall_set_timer_op(struct kvm_xen_exit *exit, X86CPU *cpu, + uint64_t timeout) +{ + int err; + + if (unlikely(timeout == 0)) { + err = vcpuop_stop_singleshot_timer(CPU(cpu)); + } else { + QEMU_LOCK_GUARD(&X86_CPU(cpu)->env.xen_timers_lock); + err = do_set_singleshot_timer(CPU(cpu), timeout, true); + } + exit->u.hcall.result = err; + return true; +} + +static bool kvm_xen_hcall_vcpu_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, int vcpu_id, uint64_t arg) +{ + CPUState *cs = CPU(cpu); + CPUState *dest = cs->cpu_index == vcpu_id ? cs : qemu_get_cpu(vcpu_id); + int err; + + if (!dest) { + err = -ENOENT; + goto out; + } + + switch (cmd) { + case VCPUOP_register_runstate_memory_area: + err = vcpuop_register_runstate_info(cs, dest, arg); + break; + case VCPUOP_register_vcpu_time_memory_area: + err = vcpuop_register_vcpu_time_info(cs, dest, arg); + break; + case VCPUOP_register_vcpu_info: + err = vcpuop_register_vcpu_info(cs, dest, arg); + break; + case VCPUOP_set_singleshot_timer: { + if (cs->cpu_index == vcpu_id) { + err = vcpuop_set_singleshot_timer(dest, arg); + } else { + err = -EINVAL; + } + break; + } + case VCPUOP_stop_singleshot_timer: + if (cs->cpu_index == vcpu_id) { + err = vcpuop_stop_singleshot_timer(dest); + } else { + err = -EINVAL; + } + break; + case VCPUOP_set_periodic_timer: { + err = vcpuop_set_periodic_timer(cs, dest, arg); + break; + } + case VCPUOP_stop_periodic_timer: + err = vcpuop_stop_periodic_timer(dest); + break; + + default: + return false; + } + + out: + exit->u.hcall.result = err; + return true; +} + +static bool kvm_xen_hcall_evtchn_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + CPUState *cs = CPU(cpu); + int err = -ENOSYS; + + switch (cmd) { + case EVTCHNOP_init_control: + case EVTCHNOP_expand_array: + case EVTCHNOP_set_priority: + /* We do not support FIFO channels at this point */ + err = -ENOSYS; + break; + + case EVTCHNOP_status: { + struct evtchn_status status; + + qemu_build_assert(sizeof(status) == 24); + if (kvm_copy_from_gva(cs, arg, &status, sizeof(status))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_status_op(&status); + if (!err && kvm_copy_to_gva(cs, arg, &status, sizeof(status))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_close: { + struct evtchn_close close; + + qemu_build_assert(sizeof(close) == 4); + if (kvm_copy_from_gva(cs, arg, &close, sizeof(close))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_close_op(&close); + break; + } + case EVTCHNOP_unmask: { + struct evtchn_unmask unmask; + + qemu_build_assert(sizeof(unmask) == 4); + if (kvm_copy_from_gva(cs, arg, &unmask, sizeof(unmask))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_unmask_op(&unmask); + break; + } + case EVTCHNOP_bind_virq: { + struct evtchn_bind_virq virq; + + qemu_build_assert(sizeof(virq) == 12); + if (kvm_copy_from_gva(cs, arg, &virq, sizeof(virq))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_bind_virq_op(&virq); + if (!err && kvm_copy_to_gva(cs, arg, &virq, sizeof(virq))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_bind_pirq: { + struct evtchn_bind_pirq pirq; + + qemu_build_assert(sizeof(pirq) == 12); + if (kvm_copy_from_gva(cs, arg, &pirq, sizeof(pirq))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_bind_pirq_op(&pirq); + if (!err && kvm_copy_to_gva(cs, arg, &pirq, sizeof(pirq))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_bind_ipi: { + struct evtchn_bind_ipi ipi; + + qemu_build_assert(sizeof(ipi) == 8); + if (kvm_copy_from_gva(cs, arg, &ipi, sizeof(ipi))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_bind_ipi_op(&ipi); + if (!err && kvm_copy_to_gva(cs, arg, &ipi, sizeof(ipi))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_send: { + struct evtchn_send send; + + qemu_build_assert(sizeof(send) == 4); + if (kvm_copy_from_gva(cs, arg, &send, sizeof(send))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_send_op(&send); + break; + } + case EVTCHNOP_alloc_unbound: { + struct evtchn_alloc_unbound alloc; + + qemu_build_assert(sizeof(alloc) == 8); + if (kvm_copy_from_gva(cs, arg, &alloc, sizeof(alloc))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_alloc_unbound_op(&alloc); + if (!err && kvm_copy_to_gva(cs, arg, &alloc, sizeof(alloc))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_bind_interdomain: { + struct evtchn_bind_interdomain interdomain; + + qemu_build_assert(sizeof(interdomain) == 12); + if (kvm_copy_from_gva(cs, arg, &interdomain, sizeof(interdomain))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_bind_interdomain_op(&interdomain); + if (!err && + kvm_copy_to_gva(cs, arg, &interdomain, sizeof(interdomain))) { + err = -EFAULT; + } + break; + } + case EVTCHNOP_bind_vcpu: { + struct evtchn_bind_vcpu vcpu; + + qemu_build_assert(sizeof(vcpu) == 8); + if (kvm_copy_from_gva(cs, arg, &vcpu, sizeof(vcpu))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_bind_vcpu_op(&vcpu); + break; + } + case EVTCHNOP_reset: { + struct evtchn_reset reset; + + qemu_build_assert(sizeof(reset) == 2); + if (kvm_copy_from_gva(cs, arg, &reset, sizeof(reset))) { + err = -EFAULT; + break; + } + + err = xen_evtchn_reset_op(&reset); + break; + } + default: + return false; + } + + exit->u.hcall.result = err; + return true; +} + +int kvm_xen_soft_reset(void) +{ + CPUState *cpu; + int err; + + assert(bql_locked()); + + trace_kvm_xen_soft_reset(); + + err = xen_evtchn_soft_reset(); + if (err) { + return err; + } + + /* + * Zero is the reset/startup state for HVM_PARAM_CALLBACK_IRQ. Strictly, + * it maps to HVM_PARAM_CALLBACK_TYPE_GSI with GSI#0, but Xen refuses to + * to deliver to the timer interrupt and treats that as 'disabled'. + */ + err = xen_evtchn_set_callback_param(0); + if (err) { + return err; + } + + CPU_FOREACH(cpu) { + async_run_on_cpu(cpu, do_vcpu_soft_reset, RUN_ON_CPU_NULL); + } + + err = xen_overlay_map_shinfo_page(INVALID_GFN); + if (err) { + return err; + } + + err = xen_gnttab_reset(); + if (err) { + return err; + } + + err = xen_primary_console_reset(); + if (err) { + return err; + } + + err = xen_xenstore_reset(); + if (err) { + return err; + } + + return 0; +} + +static int schedop_shutdown(CPUState *cs, uint64_t arg) +{ + struct sched_shutdown shutdown; + int ret = 0; + + /* No need for 32/64 compat handling */ + qemu_build_assert(sizeof(shutdown) == 4); + + if (kvm_copy_from_gva(cs, arg, &shutdown, sizeof(shutdown))) { + return -EFAULT; + } + + switch (shutdown.reason) { + case SHUTDOWN_crash: + cpu_dump_state(cs, stderr, CPU_DUMP_CODE); + qemu_system_guest_panicked(NULL); + break; + + case SHUTDOWN_reboot: + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + break; + + case SHUTDOWN_poweroff: + qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_SHUTDOWN); + break; + + case SHUTDOWN_soft_reset: + bql_lock(); + ret = kvm_xen_soft_reset(); + bql_unlock(); + break; + + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static bool kvm_xen_hcall_sched_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + CPUState *cs = CPU(cpu); + int err = -ENOSYS; + + switch (cmd) { + case SCHEDOP_shutdown: + err = schedop_shutdown(cs, arg); + break; + + case SCHEDOP_poll: + /* + * Linux will panic if this doesn't work. Just yield; it's not + * worth overthinking it because with event channel handling + * in KVM, the kernel will intercept this and it will never + * reach QEMU anyway. The semantics of the hypercall explicltly + * permit spurious wakeups. + */ + case SCHEDOP_yield: + sched_yield(); + err = 0; + break; + + default: + return false; + } + + exit->u.hcall.result = err; + return true; +} + +static bool kvm_xen_hcall_gnttab_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg, int count) +{ + CPUState *cs = CPU(cpu); + int err; + + switch (cmd) { + case GNTTABOP_set_version: { + struct gnttab_set_version set; + + qemu_build_assert(sizeof(set) == 4); + if (kvm_copy_from_gva(cs, arg, &set, sizeof(set))) { + err = -EFAULT; + break; + } + + err = xen_gnttab_set_version_op(&set); + if (!err && kvm_copy_to_gva(cs, arg, &set, sizeof(set))) { + err = -EFAULT; + } + break; + } + case GNTTABOP_get_version: { + struct gnttab_get_version get; + + qemu_build_assert(sizeof(get) == 8); + if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) { + err = -EFAULT; + break; + } + + err = xen_gnttab_get_version_op(&get); + if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) { + err = -EFAULT; + } + break; + } + case GNTTABOP_query_size: { + struct gnttab_query_size size; + + qemu_build_assert(sizeof(size) == 16); + if (kvm_copy_from_gva(cs, arg, &size, sizeof(size))) { + err = -EFAULT; + break; + } + + err = xen_gnttab_query_size_op(&size); + if (!err && kvm_copy_to_gva(cs, arg, &size, sizeof(size))) { + err = -EFAULT; + } + break; + } + case GNTTABOP_setup_table: + case GNTTABOP_copy: + case GNTTABOP_map_grant_ref: + case GNTTABOP_unmap_grant_ref: + case GNTTABOP_swap_grant_ref: + return false; + + default: + /* Xen explicitly returns -ENOSYS to HVM guests for all others */ + err = -ENOSYS; + break; + } + + exit->u.hcall.result = err; + return true; +} + +static bool kvm_xen_hcall_physdev_op(struct kvm_xen_exit *exit, X86CPU *cpu, + int cmd, uint64_t arg) +{ + CPUState *cs = CPU(cpu); + int err; + + switch (cmd) { + case PHYSDEVOP_map_pirq: { + struct physdev_map_pirq map; + + if (hypercall_compat32(exit->u.hcall.longmode)) { + struct compat_physdev_map_pirq *map32 = (void *)↦ + + if (kvm_copy_from_gva(cs, arg, map32, sizeof(*map32))) { + return -EFAULT; + } + + /* + * The only thing that's different is the alignment of the + * uint64_t table_base at the end, which gets padding to make + * it 64-bit aligned in the 64-bit version. + */ + qemu_build_assert(sizeof(*map32) == 36); + qemu_build_assert(offsetof(struct physdev_map_pirq, entry_nr) == + offsetof(struct compat_physdev_map_pirq, entry_nr)); + memmove(&map.table_base, &map32->table_base, sizeof(map.table_base)); + } else { + if (kvm_copy_from_gva(cs, arg, &map, sizeof(map))) { + err = -EFAULT; + break; + } + } + err = xen_physdev_map_pirq(&map); + /* + * Since table_base is an IN parameter and won't be changed, just + * copy the size of the compat structure back to the guest. + */ + if (!err && kvm_copy_to_gva(cs, arg, &map, + sizeof(struct compat_physdev_map_pirq))) { + err = -EFAULT; + } + break; + } + case PHYSDEVOP_unmap_pirq: { + struct physdev_unmap_pirq unmap; + + qemu_build_assert(sizeof(unmap) == 8); + if (kvm_copy_from_gva(cs, arg, &unmap, sizeof(unmap))) { + err = -EFAULT; + break; + } + + err = xen_physdev_unmap_pirq(&unmap); + if (!err && kvm_copy_to_gva(cs, arg, &unmap, sizeof(unmap))) { + err = -EFAULT; + } + break; + } + case PHYSDEVOP_eoi: { + struct physdev_eoi eoi; + + qemu_build_assert(sizeof(eoi) == 4); + if (kvm_copy_from_gva(cs, arg, &eoi, sizeof(eoi))) { + err = -EFAULT; + break; + } + + err = xen_physdev_eoi_pirq(&eoi); + if (!err && kvm_copy_to_gva(cs, arg, &eoi, sizeof(eoi))) { + err = -EFAULT; + } + break; + } + case PHYSDEVOP_irq_status_query: { + struct physdev_irq_status_query query; + + qemu_build_assert(sizeof(query) == 8); + if (kvm_copy_from_gva(cs, arg, &query, sizeof(query))) { + err = -EFAULT; + break; + } + + err = xen_physdev_query_pirq(&query); + if (!err && kvm_copy_to_gva(cs, arg, &query, sizeof(query))) { + err = -EFAULT; + } + break; + } + case PHYSDEVOP_get_free_pirq: { + struct physdev_get_free_pirq get; + + qemu_build_assert(sizeof(get) == 8); + if (kvm_copy_from_gva(cs, arg, &get, sizeof(get))) { + err = -EFAULT; + break; + } + + err = xen_physdev_get_free_pirq(&get); + if (!err && kvm_copy_to_gva(cs, arg, &get, sizeof(get))) { + err = -EFAULT; + } + break; + } + case PHYSDEVOP_pirq_eoi_gmfn_v2: /* FreeBSD 13 makes this hypercall */ + err = -ENOSYS; + break; + + default: + return false; + } + + exit->u.hcall.result = err; + return true; +} + +static bool do_kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) +{ + uint16_t code = exit->u.hcall.input; + + if (exit->u.hcall.cpl > 0) { + exit->u.hcall.result = -EPERM; + return true; + } + + switch (code) { + case __HYPERVISOR_set_timer_op: + if (exit->u.hcall.longmode) { + return kvm_xen_hcall_set_timer_op(exit, cpu, + exit->u.hcall.params[0]); + } else { + /* In 32-bit mode, the 64-bit timer value is in two args. */ + uint64_t val = ((uint64_t)exit->u.hcall.params[1]) << 32 | + (uint32_t)exit->u.hcall.params[0]; + return kvm_xen_hcall_set_timer_op(exit, cpu, val); + } + case __HYPERVISOR_grant_table_op: + return kvm_xen_hcall_gnttab_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1], + exit->u.hcall.params[2]); + case __HYPERVISOR_sched_op: + return kvm_xen_hcall_sched_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + case __HYPERVISOR_event_channel_op: + return kvm_xen_hcall_evtchn_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + case __HYPERVISOR_vcpu_op: + return kvm_xen_hcall_vcpu_op(exit, cpu, + exit->u.hcall.params[0], + exit->u.hcall.params[1], + exit->u.hcall.params[2]); + case __HYPERVISOR_hvm_op: + return kvm_xen_hcall_hvm_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + case __HYPERVISOR_memory_op: + return kvm_xen_hcall_memory_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + case __HYPERVISOR_physdev_op: + return kvm_xen_hcall_physdev_op(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + case __HYPERVISOR_xen_version: + return kvm_xen_hcall_xen_version(exit, cpu, exit->u.hcall.params[0], + exit->u.hcall.params[1]); + default: + return false; + } +} + +int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit) +{ + if (exit->type != KVM_EXIT_XEN_HCALL) { + return -1; + } + + /* + * The kernel latches the guest 32/64 mode when the MSR is used to fill + * the hypercall page. So if we see a hypercall in a mode that doesn't + * match our own idea of the guest mode, fetch the kernel's idea of the + * "long mode" to remain in sync. + */ + if (exit->u.hcall.longmode != xen_is_long_mode()) { + xen_sync_long_mode(); + } + + if (!do_kvm_xen_handle_exit(cpu, exit)) { + /* + * Some hypercalls will be deliberately "implemented" by returning + * -ENOSYS. This case is for hypercalls which are unexpected. + */ + exit->u.hcall.result = -ENOSYS; + qemu_log_mask(LOG_UNIMP, "Unimplemented Xen hypercall %" + PRId64 " (0x%" PRIx64 " 0x%" PRIx64 " 0x%" PRIx64 ")\n", + (uint64_t)exit->u.hcall.input, + (uint64_t)exit->u.hcall.params[0], + (uint64_t)exit->u.hcall.params[1], + (uint64_t)exit->u.hcall.params[2]); + } + + trace_kvm_xen_hypercall(CPU(cpu)->cpu_index, exit->u.hcall.cpl, + exit->u.hcall.input, exit->u.hcall.params[0], + exit->u.hcall.params[1], exit->u.hcall.params[2], + exit->u.hcall.result); + return 0; +} + +uint16_t kvm_xen_get_gnttab_max_frames(void) +{ + KVMState *s = KVM_STATE(current_accel()); + return s->xen_gnttab_max_frames; +} + +uint16_t kvm_xen_get_evtchn_max_pirq(void) +{ + KVMState *s = KVM_STATE(current_accel()); + return s->xen_evtchn_max_pirq; +} + +int kvm_put_xen_state(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint64_t gpa; + int ret; + + gpa = env->xen_vcpu_info_gpa; + if (gpa == INVALID_GPA) { + gpa = env->xen_vcpu_info_default_gpa; + } + + if (gpa != INVALID_GPA) { + ret = set_vcpu_info(cs, gpa); + if (ret < 0) { + return ret; + } + } + + gpa = env->xen_vcpu_time_info_gpa; + if (gpa != INVALID_GPA) { + ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO, + gpa); + if (ret < 0) { + return ret; + } + } + + gpa = env->xen_vcpu_runstate_gpa; + if (gpa != INVALID_GPA) { + ret = kvm_xen_set_vcpu_attr(cs, KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR, + gpa); + if (ret < 0) { + return ret; + } + } + + if (env->xen_periodic_timer_period) { + ret = do_set_periodic_timer(cs, env->xen_periodic_timer_period); + if (ret < 0) { + return ret; + } + } + + if (!kvm_xen_has_cap(EVTCHN_SEND)) { + /* + * If the kernel has EVTCHN_SEND support then it handles timers too, + * so the timer will be restored by kvm_xen_set_vcpu_timer() below. + */ + QEMU_LOCK_GUARD(&env->xen_timers_lock); + if (env->xen_singleshot_timer_ns) { + ret = do_set_singleshot_timer(cs, env->xen_singleshot_timer_ns, + false); + if (ret < 0) { + return ret; + } + } + return 0; + } + + if (env->xen_vcpu_callback_vector) { + ret = kvm_xen_set_vcpu_callback_vector(cs); + if (ret < 0) { + return ret; + } + } + + if (env->xen_virq[VIRQ_TIMER]) { + do_set_vcpu_timer_virq(cs, + RUN_ON_CPU_HOST_INT(env->xen_virq[VIRQ_TIMER])); + } + return 0; +} + +int kvm_get_xen_state(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + uint64_t gpa; + int ret; + + /* + * The kernel does not mark vcpu_info as dirty when it delivers interrupts + * to it. It's up to userspace to *assume* that any page shared thus is + * always considered dirty. The shared_info page is different since it's + * an overlay and migrated separately anyway. + */ + gpa = env->xen_vcpu_info_gpa; + if (gpa == INVALID_GPA) { + gpa = env->xen_vcpu_info_default_gpa; + } + if (gpa != INVALID_GPA) { + MemoryRegionSection mrs = memory_region_find(get_system_memory(), + gpa, + sizeof(struct vcpu_info)); + if (mrs.mr && + !int128_lt(mrs.size, int128_make64(sizeof(struct vcpu_info)))) { + memory_region_set_dirty(mrs.mr, mrs.offset_within_region, + sizeof(struct vcpu_info)); + } + } + + if (!kvm_xen_has_cap(EVTCHN_SEND)) { + return 0; + } + + /* + * If the kernel is accelerating timers, read out the current value of the + * singleshot timer deadline. + */ + if (env->xen_virq[VIRQ_TIMER]) { + struct kvm_xen_vcpu_attr va = { + .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER, + }; + ret = kvm_vcpu_ioctl(cs, KVM_XEN_VCPU_GET_ATTR, &va); + if (ret < 0) { + return ret; + } + + /* + * This locking is fairly pointless, and is here to appease Coverity. + * There is an unavoidable race condition if a different vCPU sets a + * timer for this vCPU after the value has been read out. But that's + * OK in practice because *all* the vCPUs need to be stopped before + * we set about migrating their state. + */ + QEMU_LOCK_GUARD(&X86_CPU(cs)->env.xen_timers_lock); + env->xen_singleshot_timer_ns = va.u.timer.expires_ns; + } + + return 0; +} diff --git a/target/i386/kvm/xen-emu.h b/target/i386/kvm/xen-emu.h new file mode 100644 index 0000000000..fe85e0b195 --- /dev/null +++ b/target/i386/kvm/xen-emu.h @@ -0,0 +1,33 @@ +/* + * Xen HVM emulation support in KVM + * + * Copyright © 2019 Oracle and/or its affiliates. All rights reserved. + * Copyright © 2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef QEMU_I386_KVM_XEN_EMU_H +#define QEMU_I386_KVM_XEN_EMU_H + +#define XEN_HYPERCALL_MSR 0x40000000 +#define XEN_HYPERCALL_MSR_HYPERV 0x40000200 + +#define XEN_CPUID_SIGNATURE 0 +#define XEN_CPUID_VENDOR 1 +#define XEN_CPUID_HVM_MSR 2 +#define XEN_CPUID_TIME 3 +#define XEN_CPUID_HVM 4 + +#define XEN_VERSION(maj, min) ((maj) << 16 | (min)) + +int kvm_xen_init(KVMState *s, uint32_t hypercall_msr); +int kvm_xen_init_vcpu(CPUState *cs); +int kvm_xen_handle_exit(X86CPU *cpu, struct kvm_xen_exit *exit); +int kvm_put_xen_state(CPUState *cs); +int kvm_get_xen_state(CPUState *cs); +void kvm_xen_maybe_deassert_callback(CPUState *cs); + +#endif /* QEMU_I386_KVM_XEN_EMU_H */ diff --git a/target/i386/kvm_i386.h b/target/i386/kvm_i386.h deleted file mode 100644 index 3057ba4f7d..0000000000 --- a/target/i386/kvm_i386.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * QEMU KVM support -- x86 specific functions. - * - * Copyright (c) 2012 Linaro Limited - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_KVM_I386_H -#define QEMU_KVM_I386_H - -#include "sysemu/kvm.h" - -#define kvm_apic_in_kernel() (kvm_irqchip_in_kernel()) - -#ifdef CONFIG_KVM - -#define kvm_pit_in_kernel() \ - (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) -#define kvm_pic_in_kernel() \ - (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) -#define kvm_ioapic_in_kernel() \ - (kvm_irqchip_in_kernel() && !kvm_irqchip_is_split()) - -#else - -#define kvm_pit_in_kernel() 0 -#define kvm_pic_in_kernel() 0 -#define kvm_ioapic_in_kernel() 0 - -#endif /* CONFIG_KVM */ - -bool kvm_allows_irq0_override(void); -bool kvm_has_smm(void); -bool kvm_has_adjust_clock_stable(void); -void kvm_synchronize_all_tsc(void); -void kvm_arch_reset_vcpu(X86CPU *cs); -void kvm_arch_do_init_vcpu(X86CPU *cs); - -int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr, - uint32_t flags, uint32_t *dev_id); -int kvm_device_pci_deassign(KVMState *s, uint32_t dev_id); - -int kvm_device_intx_assign(KVMState *s, uint32_t dev_id, - bool use_host_msi, uint32_t guest_irq); -int kvm_device_intx_set_mask(KVMState *s, uint32_t dev_id, bool masked); -int kvm_device_intx_deassign(KVMState *s, uint32_t dev_id, bool use_host_msi); - -int kvm_device_msi_assign(KVMState *s, uint32_t dev_id, int virq); -int kvm_device_msi_deassign(KVMState *s, uint32_t dev_id); - -bool kvm_device_msix_supported(KVMState *s); -int kvm_device_msix_init_vectors(KVMState *s, uint32_t dev_id, - uint32_t nr_vectors); -int kvm_device_msix_set_vector(KVMState *s, uint32_t dev_id, uint32_t vector, - int virq); -int kvm_device_msix_assign(KVMState *s, uint32_t dev_id); -int kvm_device_msix_deassign(KVMState *s, uint32_t dev_id); - -void kvm_put_apicbase(X86CPU *cpu, uint64_t value); - -bool kvm_enable_x2apic(void); -bool kvm_has_x2apic_api(void); - -bool kvm_hv_vpindex_settable(void); -#endif diff --git a/target/i386/machine.c b/target/i386/machine.c index 084c2c73a8..c3ae320814 100644 --- a/target/i386/machine.c +++ b/target/i386/machine.c @@ -1,14 +1,16 @@ #include "qemu/osdep.h" -#include "qemu-common.h" #include "cpu.h" #include "exec/exec-all.h" -#include "hw/hw.h" -#include "hw/boards.h" -#include "hw/i386/pc.h" #include "hw/isa/isa.h" #include "migration/cpu.h" +#include "kvm/hyperv.h" +#include "hw/i386/x86.h" +#include "kvm/kvm_i386.h" +#include "hw/xen/xen.h" #include "sysemu/kvm.h" +#include "sysemu/kvm_xen.h" +#include "sysemu/tcg.h" #include "qemu/error-report.h" @@ -16,7 +18,7 @@ static const VMStateDescription vmstate_segment = { .name = "segment", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT32(selector, SegmentCache), VMSTATE_UINTTL(base, SegmentCache), VMSTATE_UINT32(limit, SegmentCache), @@ -41,7 +43,7 @@ static const VMStateDescription vmstate_xmm_reg = { .name = "xmm_reg", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(ZMM_Q(0), ZMMReg), VMSTATE_UINT64(ZMM_Q(1), ZMMReg), VMSTATE_END_OF_LIST() @@ -57,7 +59,7 @@ static const VMStateDescription vmstate_ymmh_reg = { .name = "ymmh_reg", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(ZMM_Q(2), ZMMReg), VMSTATE_UINT64(ZMM_Q(3), ZMMReg), VMSTATE_END_OF_LIST() @@ -72,7 +74,7 @@ static const VMStateDescription vmstate_zmmh_reg = { .name = "zmmh_reg", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(ZMM_Q(4), ZMMReg), VMSTATE_UINT64(ZMM_Q(5), ZMMReg), VMSTATE_UINT64(ZMM_Q(6), ZMMReg), @@ -90,7 +92,7 @@ static const VMStateDescription vmstate_hi16_zmm_reg = { .name = "hi16_zmm_reg", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(ZMM_Q(0), ZMMReg), VMSTATE_UINT64(ZMM_Q(1), ZMMReg), VMSTATE_UINT64(ZMM_Q(2), ZMMReg), @@ -112,7 +114,7 @@ static const VMStateDescription vmstate_bnd_regs = { .name = "bnd_regs", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(lb, BNDReg), VMSTATE_UINT64(ub, BNDReg), VMSTATE_END_OF_LIST() @@ -126,7 +128,7 @@ static const VMStateDescription vmstate_mtrr_var = { .name = "mtrr_var", .version_id = 1, .minimum_version_id = 1, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(base, MTRRVar), VMSTATE_UINT64(mask, MTRRVar), VMSTATE_END_OF_LIST() @@ -136,6 +138,22 @@ static const VMStateDescription vmstate_mtrr_var = { #define VMSTATE_MTRR_VARS(_field, _state, _n, _v) \ VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_mtrr_var, MTRRVar) +static const VMStateDescription vmstate_lbr_records_var = { + .name = "lbr_records_var", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(from, LBREntry), + VMSTATE_UINT64(to, LBREntry), + VMSTATE_UINT64(info, LBREntry), + VMSTATE_END_OF_LIST() + } +}; + +#define VMSTATE_LBR_VARS(_field, _state, _n, _v) \ + VMSTATE_STRUCT_ARRAY(_field, _state, _n, _v, vmstate_lbr_records_var, \ + LBREntry) + typedef struct x86_FPReg_tmp { FPReg *parent; uint64_t tmp_mant; @@ -183,7 +201,7 @@ static const VMStateDescription vmstate_fpreg_tmp = { .name = "fpreg_tmp", .post_load = fpreg_post_load, .pre_save = fpreg_pre_save, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(tmp_mant, x86_FPReg_tmp), VMSTATE_UINT16(tmp_exp, x86_FPReg_tmp), VMSTATE_END_OF_LIST() @@ -192,7 +210,7 @@ static const VMStateDescription vmstate_fpreg_tmp = { static const VMStateDescription vmstate_fpreg = { .name = "fpreg", - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_WITH_TMP(FPReg, x86_FPReg_tmp, vmstate_fpreg_tmp), VMSTATE_END_OF_LIST() } @@ -203,7 +221,7 @@ static int cpu_pre_save(void *opaque) X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; int i; - + env->v_tpr = env->int_ctl & V_TPR_MASK; /* FPU */ env->fpus_vmstate = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; env->fptag_vmstate = 0; @@ -230,6 +248,65 @@ static int cpu_pre_save(void *opaque) env->segs[R_SS].flags &= ~(env->segs[R_SS].flags & DESC_DPL_MASK); } +#ifdef CONFIG_KVM + /* + * In case vCPU may have enabled VMX, we need to make sure kernel have + * required capabilities in order to perform migration correctly: + * + * 1) We must be able to extract vCPU nested-state from KVM. + * + * 2) In case vCPU is running in guest-mode and it has a pending exception, + * we must be able to determine if it's in a pending or injected state. + * Note that in case KVM don't have required capability to do so, + * a pending/injected exception will always appear as an + * injected exception. + */ + if (kvm_enabled() && cpu_vmx_maybe_enabled(env) && + (!env->nested_state || + (!kvm_has_exception_payload() && (env->hflags & HF_GUEST_MASK) && + env->exception_injected))) { + error_report("Guest maybe enabled nested virtualization but kernel " + "does not support required capabilities to save vCPU " + "nested state"); + return -EINVAL; + } +#endif + + /* + * When vCPU is running L2 and exception is still pending, + * it can potentially be intercepted by L1 hypervisor. + * In contrast to an injected exception which cannot be + * intercepted anymore. + * + * Furthermore, when a L2 exception is intercepted by L1 + * hypervisor, its exception payload (CR2/DR6 on #PF/#DB) + * should not be set yet in the respective vCPU register. + * Thus, in case an exception is pending, it is + * important to save the exception payload separately. + * + * Therefore, if an exception is not in a pending state + * or vCPU is not in guest-mode, it is not important to + * distinguish between a pending and injected exception + * and we don't need to store separately the exception payload. + * + * In order to preserve better backwards-compatible migration, + * convert a pending exception to an injected exception in + * case it is not important to distinguish between them + * as described above. + */ + if (env->exception_pending && !(env->hflags & HF_GUEST_MASK)) { + env->exception_pending = 0; + env->exception_injected = 1; + + if (env->exception_has_payload) { + if (env->exception_nr == EXCP01_DB) { + env->dr[6] = env->exception_payload; + } else if (env->exception_nr == EXCP0E_PAGE) { + env->cr[2] = env->exception_payload; + } + } + } + return 0; } @@ -277,6 +354,33 @@ static int cpu_post_load(void *opaque, int version_id) env->hflags &= ~HF_CPL_MASK; env->hflags |= (env->segs[R_SS].flags >> DESC_DPL_SHIFT) & HF_CPL_MASK; +#ifdef CONFIG_KVM + if ((env->hflags & HF_GUEST_MASK) && + (!env->nested_state || + !(env->nested_state->flags & KVM_STATE_NESTED_GUEST_MODE))) { + error_report("vCPU set in guest-mode inconsistent with " + "migrated kernel nested state"); + return -EINVAL; + } +#endif + + /* + * There are cases that we can get valid exception_nr with both + * exception_pending and exception_injected being cleared. + * This can happen in one of the following scenarios: + * 1) Source is older QEMU without KVM_CAP_EXCEPTION_PAYLOAD support. + * 2) Source is running on kernel without KVM_CAP_EXCEPTION_PAYLOAD support. + * 3) "cpu/exception_info" subsection not sent because there is no exception + * pending or guest wasn't running L2 (See comment in cpu_pre_save()). + * + * In those cases, we can just deduce that a valid exception_nr means + * we can treat the exception as already injected. + */ + if ((env->exception_nr != -1) && + !env->exception_pending && !env->exception_injected) { + env->exception_injected = 1; + } + env->fpstt = (env->fpus_vmstate >> 11) & 7; env->fpus = env->fpus_vmstate & ~0x3800; env->fptag_vmstate ^= 0xff; @@ -308,6 +412,13 @@ static bool async_pf_msr_needed(void *opaque) return cpu->env.async_pf_en_msr != 0; } +static bool async_pf_int_msr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + + return cpu->env.async_pf_int_msr != 0; +} + static bool pv_eoi_msr_needed(void *opaque) { X86CPU *cpu = opaque; @@ -322,12 +433,49 @@ static bool steal_time_msr_needed(void *opaque) return cpu->env.steal_time_msr != 0; } +static bool exception_info_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + /* + * It is important to save exception-info only in case + * we need to distinguish between a pending and injected + * exception. Which is only required in case there is a + * pending exception and vCPU is running L2. + * For more info, refer to comment in cpu_pre_save(). + */ + return env->exception_pending && (env->hflags & HF_GUEST_MASK); +} + +static const VMStateDescription vmstate_exception_info = { + .name = "cpu/exception_info", + .version_id = 1, + .minimum_version_id = 1, + .needed = exception_info_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT8(env.exception_pending, X86CPU), + VMSTATE_UINT8(env.exception_injected, X86CPU), + VMSTATE_UINT8(env.exception_has_payload, X86CPU), + VMSTATE_UINT64(env.exception_payload, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +/* Poll control MSR enabled by default */ +static bool poll_control_msr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + + return cpu->env.poll_control_msr != 1; +} + static const VMStateDescription vmstate_steal_time_msr = { .name = "cpu/steal_time_msr", .version_id = 1, .minimum_version_id = 1, .needed = steal_time_msr_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.steal_time_msr, X86CPU), VMSTATE_END_OF_LIST() } @@ -338,23 +486,45 @@ static const VMStateDescription vmstate_async_pf_msr = { .version_id = 1, .minimum_version_id = 1, .needed = async_pf_msr_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.async_pf_en_msr, X86CPU), VMSTATE_END_OF_LIST() } }; +static const VMStateDescription vmstate_async_pf_int_msr = { + .name = "cpu/async_pf_int_msr", + .version_id = 1, + .minimum_version_id = 1, + .needed = async_pf_int_msr_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.async_pf_int_msr, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + static const VMStateDescription vmstate_pv_eoi_msr = { .name = "cpu/async_pv_eoi_msr", .version_id = 1, .minimum_version_id = 1, .needed = pv_eoi_msr_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.pv_eoi_en_msr, X86CPU), VMSTATE_END_OF_LIST() } }; +static const VMStateDescription vmstate_poll_control_msr = { + .name = "cpu/poll_control_msr", + .version_id = 1, + .minimum_version_id = 1, + .needed = poll_control_msr_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.poll_control_msr, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + static bool fpop_ip_dp_needed(void *opaque) { X86CPU *cpu = opaque; @@ -368,7 +538,7 @@ static const VMStateDescription vmstate_fpop_ip_dp = { .version_id = 1, .minimum_version_id = 1, .needed = fpop_ip_dp_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT16(env.fpop, X86CPU), VMSTATE_UINT64(env.fpip, X86CPU), VMSTATE_UINT64(env.fpdp, X86CPU), @@ -389,7 +559,7 @@ static const VMStateDescription vmstate_msr_tsc_adjust = { .version_id = 1, .minimum_version_id = 1, .needed = tsc_adjust_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.tsc_adjust, X86CPU), VMSTATE_END_OF_LIST() } @@ -408,7 +578,7 @@ static const VMStateDescription vmstate_msr_smi_count = { .version_id = 1, .minimum_version_id = 1, .needed = msr_smi_count_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_smi_count, X86CPU), VMSTATE_END_OF_LIST() } @@ -427,7 +597,7 @@ static const VMStateDescription vmstate_msr_tscdeadline = { .version_id = 1, .minimum_version_id = 1, .needed = tscdeadline_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.tsc_deadline, X86CPU), VMSTATE_END_OF_LIST() } @@ -454,7 +624,7 @@ static const VMStateDescription vmstate_msr_ia32_misc_enable = { .version_id = 1, .minimum_version_id = 1, .needed = misc_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_ia32_misc_enable, X86CPU), VMSTATE_END_OF_LIST() } @@ -465,7 +635,7 @@ static const VMStateDescription vmstate_msr_ia32_feature_control = { .version_id = 1, .minimum_version_id = 1, .needed = feature_control_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_ia32_feature_control, X86CPU), VMSTATE_END_OF_LIST() } @@ -500,7 +670,7 @@ static const VMStateDescription vmstate_msr_architectural_pmu = { .version_id = 1, .minimum_version_id = 1, .needed = pmu_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_fixed_ctr_ctrl, X86CPU), VMSTATE_UINT64(env.msr_global_ctrl, X86CPU), VMSTATE_UINT64(env.msr_global_status, X86CPU), @@ -536,7 +706,7 @@ static const VMStateDescription vmstate_mpx = { .version_id = 1, .minimum_version_id = 1, .needed = mpx_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_BND_REGS(env.bnd_regs, X86CPU, 4), VMSTATE_UINT64(env.bndcs_regs.cfgu, X86CPU), VMSTATE_UINT64(env.bndcs_regs.sts, X86CPU), @@ -553,12 +723,12 @@ static bool hyperv_hypercall_enable_needed(void *opaque) return env->msr_hv_hypercall != 0 || env->msr_hv_guest_os_id != 0; } -static const VMStateDescription vmstate_msr_hypercall_hypercall = { +static const VMStateDescription vmstate_msr_hyperv_hypercall = { .name = "cpu/msr_hyperv_hypercall", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_hypercall_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_guest_os_id, X86CPU), VMSTATE_UINT64(env.msr_hv_hypercall, X86CPU), VMSTATE_END_OF_LIST() @@ -578,7 +748,7 @@ static const VMStateDescription vmstate_msr_hyperv_vapic = { .version_id = 1, .minimum_version_id = 1, .needed = hyperv_vapic_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_vapic, X86CPU), VMSTATE_END_OF_LIST() } @@ -597,7 +767,7 @@ static const VMStateDescription vmstate_msr_hyperv_time = { .version_id = 1, .minimum_version_id = 1, .needed = hyperv_time_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_tsc, X86CPU), VMSTATE_END_OF_LIST() } @@ -622,7 +792,7 @@ static const VMStateDescription vmstate_msr_hyperv_crash = { .version_id = 1, .minimum_version_id = 1, .needed = hyperv_crash_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_ARRAY(env.msr_hv_crash_params, X86CPU, HV_CRASH_PARAMS), VMSTATE_END_OF_LIST() } @@ -633,7 +803,7 @@ static bool hyperv_runtime_enable_needed(void *opaque) X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; - if (!cpu->hyperv_runtime) { + if (!hyperv_feat_enabled(cpu, HYPERV_FEAT_RUNTIME)) { return false; } @@ -645,7 +815,7 @@ static const VMStateDescription vmstate_msr_hyperv_runtime = { .version_id = 1, .minimum_version_id = 1, .needed = hyperv_runtime_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_runtime, X86CPU), VMSTATE_END_OF_LIST() } @@ -672,12 +842,20 @@ static bool hyperv_synic_enable_needed(void *opaque) return false; } +static int hyperv_synic_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + hyperv_x86_synic_update(cpu); + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_synic = { .name = "cpu/msr_hyperv_synic", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_synic_enable_needed, - .fields = (VMStateField[]) { + .post_load = hyperv_synic_post_load, + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_synic_control, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_evt_page, X86CPU), VMSTATE_UINT64(env.msr_hv_synic_msg_page, X86CPU), @@ -705,7 +883,7 @@ static const VMStateDescription vmstate_msr_hyperv_stimer = { .version_id = 1, .minimum_version_id = 1, .needed = hyperv_stimer_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_ARRAY(env.msr_hv_stimer_config, X86CPU, HV_STIMER_COUNT), VMSTATE_UINT64_ARRAY(env.msr_hv_stimer_count, X86CPU, HV_STIMER_COUNT), @@ -723,12 +901,32 @@ static bool hyperv_reenlightenment_enable_needed(void *opaque) env->msr_hv_tsc_emulation_status != 0; } +static int hyperv_reenlightenment_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + /* + * KVM doesn't fully support re-enlightenment notifications so we need to + * make sure TSC frequency doesn't change upon migration. + */ + if ((env->msr_hv_reenlightenment_control & HV_REENLIGHTENMENT_ENABLE_BIT) && + !env->user_tsc_khz) { + error_report("Guest enabled re-enlightenment notifications, " + "'tsc-frequency=' has to be specified"); + return -EINVAL; + } + + return 0; +} + static const VMStateDescription vmstate_msr_hyperv_reenlightenment = { .name = "cpu/msr_hyperv_reenlightenment", .version_id = 1, .minimum_version_id = 1, .needed = hyperv_reenlightenment_enable_needed, - .fields = (VMStateField[]) { + .post_load = hyperv_reenlightenment_post_load, + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_hv_reenlightenment_control, X86CPU), VMSTATE_UINT64(env.msr_hv_tsc_emulation_control, X86CPU), VMSTATE_UINT64(env.msr_hv_tsc_emulation_status, X86CPU), @@ -772,7 +970,7 @@ static const VMStateDescription vmstate_avx512 = { .version_id = 1, .minimum_version_id = 1, .needed = avx512_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64_ARRAY(env.opmask_regs, X86CPU, NB_OPMASK_REGS), VMSTATE_ZMMH_REGS_VARS(env.xmm_regs, X86CPU, 0), #ifdef TARGET_X86_64 @@ -795,13 +993,31 @@ static const VMStateDescription vmstate_xss = { .version_id = 1, .minimum_version_id = 1, .needed = xss_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.xss, X86CPU), VMSTATE_END_OF_LIST() } }; -#ifdef TARGET_X86_64 +static bool umwait_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->umwait != 0; +} + +static const VMStateDescription vmstate_umwait = { + .name = "cpu/umwait", + .version_id = 1, + .minimum_version_id = 1, + .needed = umwait_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(env.umwait, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + static bool pkru_needed(void *opaque) { X86CPU *cpu = opaque; @@ -815,20 +1031,38 @@ static const VMStateDescription vmstate_pkru = { .version_id = 1, .minimum_version_id = 1, .needed = pkru_needed, - .fields = (VMStateField[]){ + .fields = (const VMStateField[]){ VMSTATE_UINT32(env.pkru, X86CPU), VMSTATE_END_OF_LIST() } }; -#endif + +static bool pkrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->pkrs != 0; +} + +static const VMStateDescription vmstate_pkrs = { + .name = "cpu/pkrs", + .version_id = 1, + .minimum_version_id = 1, + .needed = pkrs_needed, + .fields = (const VMStateField[]){ + VMSTATE_UINT32(env.pkrs, X86CPU), + VMSTATE_END_OF_LIST() + } +}; static bool tsc_khz_needed(void *opaque) { X86CPU *cpu = opaque; CPUX86State *env = &cpu->env; MachineClass *mc = MACHINE_GET_CLASS(qdev_get_machine()); - PCMachineClass *pcmc = PC_MACHINE_CLASS(mc); - return env->tsc_khz && pcmc->save_tsc_khz; + X86MachineClass *x86mc = X86_MACHINE_CLASS(mc); + return env->tsc_khz && x86mc->save_tsc_khz; } static const VMStateDescription vmstate_tsc_khz = { @@ -836,12 +1070,219 @@ static const VMStateDescription vmstate_tsc_khz = { .version_id = 1, .minimum_version_id = 1, .needed = tsc_khz_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_INT64(env.tsc_khz, X86CPU), VMSTATE_END_OF_LIST() } }; +#ifdef CONFIG_KVM + +static bool vmx_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].vmcs12)); +} + +static const VMStateDescription vmstate_vmx_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_vmcs12_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_shadow_vmcs12_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + return (nested_state->size > + offsetof(struct kvm_nested_state, data.vmx[0].shadow_vmcs12)); +} + +static const VMStateDescription vmstate_vmx_shadow_vmcs12 = { + .name = "cpu/kvm_nested_state/vmx/shadow_vmcs12", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_shadow_vmcs12_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT8_ARRAY(data.vmx[0].shadow_vmcs12, + struct kvm_nested_state, + KVM_STATE_NESTED_VMX_VMCS_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool vmx_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + return (nested_state->format == KVM_STATE_NESTED_FORMAT_VMX && + nested_state->hdr.vmx.vmxon_pa != -1ull); +} + +static const VMStateDescription vmstate_vmx_nested_state = { + .name = "cpu/kvm_nested_state/vmx", + .version_id = 1, + .minimum_version_id = 1, + .needed = vmx_nested_state_needed, + .fields = (const VMStateField[]) { + VMSTATE_U64(hdr.vmx.vmxon_pa, struct kvm_nested_state), + VMSTATE_U64(hdr.vmx.vmcs12_pa, struct kvm_nested_state), + VMSTATE_U16(hdr.vmx.smm.flags, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_vmx_vmcs12, + &vmstate_vmx_shadow_vmcs12, + NULL, + } +}; + +static bool svm_nested_state_needed(void *opaque) +{ + struct kvm_nested_state *nested_state = opaque; + + /* + * HF_GUEST_MASK and HF2_GIF_MASK are already serialized + * via hflags and hflags2, all that's left is the opaque + * nested state blob. + */ + return (nested_state->format == KVM_STATE_NESTED_FORMAT_SVM && + nested_state->size > offsetof(struct kvm_nested_state, data)); +} + +static const VMStateDescription vmstate_svm_nested_state = { + .name = "cpu/kvm_nested_state/svm", + .version_id = 1, + .minimum_version_id = 1, + .needed = svm_nested_state_needed, + .fields = (const VMStateField[]) { + VMSTATE_U64(hdr.svm.vmcb_pa, struct kvm_nested_state), + VMSTATE_UINT8_ARRAY(data.svm[0].vmcb12, + struct kvm_nested_state, + KVM_STATE_NESTED_SVM_VMCB_SIZE), + VMSTATE_END_OF_LIST() + } +}; + +static bool nested_state_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return (env->nested_state && + (vmx_nested_state_needed(env->nested_state) || + svm_nested_state_needed(env->nested_state))); +} + +static int nested_state_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + struct kvm_nested_state *nested_state = env->nested_state; + int min_nested_state_len = offsetof(struct kvm_nested_state, data); + int max_nested_state_len = kvm_max_nested_state_length(); + + /* + * If our kernel don't support setting nested state + * and we have received nested state from migration stream, + * we need to fail migration + */ + if (max_nested_state_len <= 0) { + error_report("Received nested state when kernel cannot restore it"); + return -EINVAL; + } + + /* + * Verify that the size of received nested_state struct + * at least cover required header and is not larger + * than the max size that our kernel support + */ + if (nested_state->size < min_nested_state_len) { + error_report("Received nested state size less than min: " + "len=%d, min=%d", + nested_state->size, min_nested_state_len); + return -EINVAL; + } + if (nested_state->size > max_nested_state_len) { + error_report("Received unsupported nested state size: " + "nested_state->size=%d, max=%d", + nested_state->size, max_nested_state_len); + return -EINVAL; + } + + /* Verify format is valid */ + if ((nested_state->format != KVM_STATE_NESTED_FORMAT_VMX) && + (nested_state->format != KVM_STATE_NESTED_FORMAT_SVM)) { + error_report("Received invalid nested state format: %d", + nested_state->format); + return -EINVAL; + } + + return 0; +} + +static const VMStateDescription vmstate_kvm_nested_state = { + .name = "cpu/kvm_nested_state", + .version_id = 1, + .minimum_version_id = 1, + .fields = (const VMStateField[]) { + VMSTATE_U16(flags, struct kvm_nested_state), + VMSTATE_U16(format, struct kvm_nested_state), + VMSTATE_U32(size, struct kvm_nested_state), + VMSTATE_END_OF_LIST() + }, + .subsections = (const VMStateDescription * const []) { + &vmstate_vmx_nested_state, + &vmstate_svm_nested_state, + NULL + } +}; + +static const VMStateDescription vmstate_nested_state = { + .name = "cpu/nested_state", + .version_id = 1, + .minimum_version_id = 1, + .needed = nested_state_needed, + .post_load = nested_state_post_load, + .fields = (const VMStateField[]) { + VMSTATE_STRUCT_POINTER(env.nested_state, X86CPU, + vmstate_kvm_nested_state, + struct kvm_nested_state), + VMSTATE_END_OF_LIST() + } +}; + +static bool xen_vcpu_needed(void *opaque) +{ + return (xen_mode == XEN_EMULATE); +} + +static const VMStateDescription vmstate_xen_vcpu = { + .name = "cpu/xen_vcpu", + .version_id = 1, + .minimum_version_id = 1, + .needed = xen_vcpu_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.xen_vcpu_info_gpa, X86CPU), + VMSTATE_UINT64(env.xen_vcpu_info_default_gpa, X86CPU), + VMSTATE_UINT64(env.xen_vcpu_time_info_gpa, X86CPU), + VMSTATE_UINT64(env.xen_vcpu_runstate_gpa, X86CPU), + VMSTATE_UINT8(env.xen_vcpu_callback_vector, X86CPU), + VMSTATE_UINT16_ARRAY(env.xen_virq, X86CPU, XEN_NR_VIRQS), + VMSTATE_UINT64(env.xen_singleshot_timer_ns, X86CPU), + VMSTATE_UINT64(env.xen_periodic_timer_period, X86CPU), + VMSTATE_END_OF_LIST() + } +}; +#endif + static bool mcg_ext_ctl_needed(void *opaque) { X86CPU *cpu = opaque; @@ -854,7 +1295,7 @@ static const VMStateDescription vmstate_mcg_ext_ctl = { .version_id = 1, .minimum_version_id = 1, .needed = mcg_ext_ctl_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.mcg_ext_ctl, X86CPU), VMSTATE_END_OF_LIST() } @@ -873,12 +1314,33 @@ static const VMStateDescription vmstate_spec_ctrl = { .version_id = 1, .minimum_version_id = 1, .needed = spec_ctrl_needed, - .fields = (VMStateField[]){ + .fields = (const VMStateField[]){ VMSTATE_UINT64(env.spec_ctrl, X86CPU), VMSTATE_END_OF_LIST() } }; + +static bool amd_tsc_scale_msr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return (env->features[FEAT_SVM] & CPUID_SVM_TSCSCALE); +} + +static const VMStateDescription amd_tsc_scale_msr_ctrl = { + .name = "cpu/amd_tsc_scale_msr", + .version_id = 1, + .minimum_version_id = 1, + .needed = amd_tsc_scale_msr_needed, + .fields = (const VMStateField[]){ + VMSTATE_UINT64(env.amd_tsc_scale_msr, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + + static bool intel_pt_enable_needed(void *opaque) { X86CPU *cpu = opaque; @@ -905,7 +1367,7 @@ static const VMStateDescription vmstate_msr_intel_pt = { .version_id = 1, .minimum_version_id = 1, .needed = intel_pt_enable_needed, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINT64(env.msr_rtit_ctrl, X86CPU), VMSTATE_UINT64(env.msr_rtit_status, X86CPU), VMSTATE_UINT64(env.msr_rtit_output_base, X86CPU), @@ -929,7 +1391,7 @@ static const VMStateDescription vmstate_msr_virt_ssbd = { .version_id = 1, .minimum_version_id = 1, .needed = virt_ssbd_needed, - .fields = (VMStateField[]){ + .fields = (const VMStateField[]){ VMSTATE_UINT64(env.virt_ssbd, X86CPU), VMSTATE_END_OF_LIST() } @@ -948,20 +1410,208 @@ static const VMStateDescription vmstate_svm_npt = { .version_id = 1, .minimum_version_id = 1, .needed = svm_npt_needed, - .fields = (VMStateField[]){ + .fields = (const VMStateField[]){ VMSTATE_UINT64(env.nested_cr3, X86CPU), VMSTATE_UINT32(env.nested_pg_mode, X86CPU), VMSTATE_END_OF_LIST() } }; -VMStateDescription vmstate_x86_cpu = { +static bool svm_guest_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return tcg_enabled() && env->int_ctl; +} + +static const VMStateDescription vmstate_svm_guest = { + .name = "cpu/svm_guest", + .version_id = 1, + .minimum_version_id = 1, + .needed = svm_guest_needed, + .fields = (const VMStateField[]){ + VMSTATE_UINT32(env.int_ctl, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +#ifndef TARGET_X86_64 +static bool intel_efer32_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->efer != 0; +} + +static const VMStateDescription vmstate_efer32 = { + .name = "cpu/efer32", + .version_id = 1, + .minimum_version_id = 1, + .needed = intel_efer32_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.efer, X86CPU), + VMSTATE_END_OF_LIST() + } +}; +#endif + +static bool msr_tsx_ctrl_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->features[FEAT_ARCH_CAPABILITIES] & ARCH_CAP_TSX_CTRL_MSR; +} + +static const VMStateDescription vmstate_msr_tsx_ctrl = { + .name = "cpu/msr_tsx_ctrl", + .version_id = 1, + .minimum_version_id = 1, + .needed = msr_tsx_ctrl_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT32(env.tsx_ctrl, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +static bool intel_sgx_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_ECX] & CPUID_7_0_ECX_SGX_LC); +} + +static const VMStateDescription vmstate_msr_intel_sgx = { + .name = "cpu/intel_sgx", + .version_id = 1, + .minimum_version_id = 1, + .needed = intel_sgx_msrs_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.msr_ia32_sgxlepubkeyhash, X86CPU, 4), + VMSTATE_END_OF_LIST() + } + }; + +static bool pdptrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + return env->pdptrs_valid; +} + +static int pdptrs_post_load(void *opaque, int version_id) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + env->pdptrs_valid = true; + return 0; +} + + +static const VMStateDescription vmstate_pdptrs = { + .name = "cpu/pdptrs", + .version_id = 1, + .minimum_version_id = 1, + .needed = pdptrs_needed, + .post_load = pdptrs_post_load, + .fields = (const VMStateField[]) { + VMSTATE_UINT64_ARRAY(env.pdptrs, X86CPU, 4), + VMSTATE_END_OF_LIST() + } +}; + +static bool xfd_msrs_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_XSAVE] & CPUID_D_1_EAX_XFD); +} + +static const VMStateDescription vmstate_msr_xfd = { + .name = "cpu/msr_xfd", + .version_id = 1, + .minimum_version_id = 1, + .needed = xfd_msrs_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.msr_xfd, X86CPU), + VMSTATE_UINT64(env.msr_xfd_err, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +#ifdef TARGET_X86_64 +static bool amx_xtile_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_AMX_TILE); +} + +static const VMStateDescription vmstate_amx_xtile = { + .name = "cpu/intel_amx_xtile", + .version_id = 1, + .minimum_version_id = 1, + .needed = amx_xtile_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT8_ARRAY(env.xtilecfg, X86CPU, 64), + VMSTATE_UINT8_ARRAY(env.xtiledata, X86CPU, 8192), + VMSTATE_END_OF_LIST() + } +}; +#endif + +static bool arch_lbr_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return !!(env->features[FEAT_7_0_EDX] & CPUID_7_0_EDX_ARCH_LBR); +} + +static const VMStateDescription vmstate_arch_lbr = { + .name = "cpu/arch_lbr", + .version_id = 1, + .minimum_version_id = 1, + .needed = arch_lbr_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT64(env.msr_lbr_ctl, X86CPU), + VMSTATE_UINT64(env.msr_lbr_depth, X86CPU), + VMSTATE_LBR_VARS(env.lbr_records, X86CPU, ARCH_LBR_NR_ENTRIES, 1), + VMSTATE_END_OF_LIST() + } +}; + +static bool triple_fault_needed(void *opaque) +{ + X86CPU *cpu = opaque; + CPUX86State *env = &cpu->env; + + return env->triple_fault_pending; +} + +static const VMStateDescription vmstate_triple_fault = { + .name = "cpu/triple_fault", + .version_id = 1, + .minimum_version_id = 1, + .needed = triple_fault_needed, + .fields = (const VMStateField[]) { + VMSTATE_UINT8(env.triple_fault_pending, X86CPU), + VMSTATE_END_OF_LIST() + } +}; + +const VMStateDescription vmstate_x86_cpu = { .name = "cpu", .version_id = 12, .minimum_version_id = 11, .pre_save = cpu_pre_save, .post_load = cpu_post_load, - .fields = (VMStateField[]) { + .fields = (const VMStateField[]) { VMSTATE_UINTTL_ARRAY(env.regs, X86CPU, CPU_NB_REGS), VMSTATE_UINTTL(env.eip, X86CPU), VMSTATE_UINTTL(env.eflags, X86CPU), @@ -1026,7 +1676,7 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_INT32(env.interrupt_injected, X86CPU), VMSTATE_UINT32(env.mp_state, X86CPU), VMSTATE_UINT64(env.tsc, X86CPU), - VMSTATE_INT32(env.exception_injected, X86CPU), + VMSTATE_INT32(env.exception_nr, X86CPU), VMSTATE_UINT8(env.soft_interrupt, X86CPU), VMSTATE_UINT8(env.nmi_injected, X86CPU), VMSTATE_UINT8(env.nmi_pending, X86CPU), @@ -1049,10 +1699,13 @@ VMStateDescription vmstate_x86_cpu = { VMSTATE_END_OF_LIST() /* The above list is not sorted /wrt version numbers, watch out! */ }, - .subsections = (const VMStateDescription*[]) { + .subsections = (const VMStateDescription * const []) { + &vmstate_exception_info, &vmstate_async_pf_msr, + &vmstate_async_pf_int_msr, &vmstate_pv_eoi_msr, &vmstate_steal_time_msr, + &vmstate_poll_control_msr, &vmstate_fpop_ip_dp, &vmstate_msr_tsc_adjust, &vmstate_msr_tscdeadline, @@ -1060,7 +1713,7 @@ VMStateDescription vmstate_x86_cpu = { &vmstate_msr_ia32_feature_control, &vmstate_msr_architectural_pmu, &vmstate_mpx, - &vmstate_msr_hypercall_hypercall, + &vmstate_msr_hyperv_hypercall, &vmstate_msr_hyperv_vapic, &vmstate_msr_hyperv_time, &vmstate_msr_hyperv_crash, @@ -1070,16 +1723,34 @@ VMStateDescription vmstate_x86_cpu = { &vmstate_msr_hyperv_reenlightenment, &vmstate_avx512, &vmstate_xss, + &vmstate_umwait, &vmstate_tsc_khz, &vmstate_msr_smi_count, -#ifdef TARGET_X86_64 &vmstate_pkru, -#endif + &vmstate_pkrs, &vmstate_spec_ctrl, + &amd_tsc_scale_msr_ctrl, &vmstate_mcg_ext_ctl, &vmstate_msr_intel_pt, &vmstate_msr_virt_ssbd, &vmstate_svm_npt, + &vmstate_svm_guest, +#ifndef TARGET_X86_64 + &vmstate_efer32, +#endif +#ifdef CONFIG_KVM + &vmstate_nested_state, + &vmstate_xen_vcpu, +#endif + &vmstate_msr_tsx_ctrl, + &vmstate_msr_intel_sgx, + &vmstate_pdptrs, + &vmstate_msr_xfd, +#ifdef TARGET_X86_64 + &vmstate_amx_xtile, +#endif + &vmstate_arch_lbr, + &vmstate_triple_fault, NULL } }; diff --git a/target/i386/mem_helper.c b/target/i386/mem_helper.c deleted file mode 100644 index 30c26b9d9c..0000000000 --- a/target/i386/mem_helper.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - * x86 memory access helpers - * - * Copyright (c) 2003 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/helper-proto.h" -#include "exec/exec-all.h" -#include "exec/cpu_ldst.h" -#include "qemu/int128.h" -#include "tcg.h" - -void helper_cmpxchg8b_unlocked(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - uint64_t oldv, cmpv, newv; - int eflags; - - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); - newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); - - oldv = cpu_ldq_data_ra(env, a0, ra); - newv = (cmpv == oldv ? newv : oldv); - /* always do the store */ - cpu_stq_data_ra(env, a0, newv, ra); - - if (oldv == cmpv) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = (uint32_t)oldv; - env->regs[R_EDX] = (uint32_t)(oldv >> 32); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -} - -void helper_cmpxchg8b(CPUX86State *env, target_ulong a0) -{ -#ifdef CONFIG_ATOMIC64 - uint64_t oldv, cmpv, newv; - int eflags; - - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = deposit64(env->regs[R_EAX], 32, 32, env->regs[R_EDX]); - newv = deposit64(env->regs[R_EBX], 32, 32, env->regs[R_ECX]); - -#ifdef CONFIG_USER_ONLY - { - uint64_t *haddr = g2h(a0); - cmpv = cpu_to_le64(cmpv); - newv = cpu_to_le64(newv); - oldv = atomic_cmpxchg__nocheck(haddr, cmpv, newv); - oldv = le64_to_cpu(oldv); - } -#else - { - uintptr_t ra = GETPC(); - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ, mem_idx); - oldv = helper_atomic_cmpxchgq_le_mmu(env, a0, cmpv, newv, oi, ra); - } -#endif - - if (oldv == cmpv) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = (uint32_t)oldv; - env->regs[R_EDX] = (uint32_t)(oldv >> 32); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -#else - cpu_loop_exit_atomic(ENV_GET_CPU(env), GETPC()); -#endif /* CONFIG_ATOMIC64 */ -} - -#ifdef TARGET_X86_64 -void helper_cmpxchg16b_unlocked(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - Int128 oldv, cmpv, newv; - uint64_t o0, o1; - int eflags; - bool success; - - if ((a0 & 0xf) != 0) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } - eflags = cpu_cc_compute_all(env, CC_OP); - - cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); - newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); - - o0 = cpu_ldq_data_ra(env, a0 + 0, ra); - o1 = cpu_ldq_data_ra(env, a0 + 8, ra); - - oldv = int128_make128(o0, o1); - success = int128_eq(oldv, cmpv); - if (!success) { - newv = oldv; - } - - cpu_stq_data_ra(env, a0 + 0, int128_getlo(newv), ra); - cpu_stq_data_ra(env, a0 + 8, int128_gethi(newv), ra); - - if (success) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = int128_getlo(oldv); - env->regs[R_EDX] = int128_gethi(oldv); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -} - -void helper_cmpxchg16b(CPUX86State *env, target_ulong a0) -{ - uintptr_t ra = GETPC(); - - if ((a0 & 0xf) != 0) { - raise_exception_ra(env, EXCP0D_GPF, ra); - } else { -#ifndef CONFIG_ATOMIC128 - cpu_loop_exit_atomic(ENV_GET_CPU(env), ra); -#else - int eflags = cpu_cc_compute_all(env, CC_OP); - - Int128 cmpv = int128_make128(env->regs[R_EAX], env->regs[R_EDX]); - Int128 newv = int128_make128(env->regs[R_EBX], env->regs[R_ECX]); - - int mem_idx = cpu_mmu_index(env, false); - TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx); - Int128 oldv = helper_atomic_cmpxchgo_le_mmu(env, a0, cmpv, - newv, oi, ra); - - if (int128_eq(oldv, cmpv)) { - eflags |= CC_Z; - } else { - env->regs[R_EAX] = int128_getlo(oldv); - env->regs[R_EDX] = int128_gethi(oldv); - eflags &= ~CC_Z; - } - CC_SRC = eflags; -#endif - } -} -#endif - -void helper_boundw(CPUX86State *env, target_ulong a0, int v) -{ - int low, high; - - low = cpu_ldsw_data_ra(env, a0, GETPC()); - high = cpu_ldsw_data_ra(env, a0 + 2, GETPC()); - v = (int16_t)v; - if (v < low || v > high) { - if (env->hflags & HF_MPX_EN_MASK) { - env->bndcs_regs.sts = 0; - } - raise_exception_ra(env, EXCP05_BOUND, GETPC()); - } -} - -void helper_boundl(CPUX86State *env, target_ulong a0, int v) -{ - int low, high; - - low = cpu_ldl_data_ra(env, a0, GETPC()); - high = cpu_ldl_data_ra(env, a0 + 4, GETPC()); - if (v < low || v > high) { - if (env->hflags & HF_MPX_EN_MASK) { - env->bndcs_regs.sts = 0; - } - raise_exception_ra(env, EXCP05_BOUND, GETPC()); - } -} - -#if !defined(CONFIG_USER_ONLY) -/* try to fill the TLB and return an exception if error. If retaddr is - * NULL, it means that the function was called in C code (i.e. not - * from generated code or from helper.c) - */ -/* XXX: fix it to restore all registers */ -void tlb_fill(CPUState *cs, target_ulong addr, int size, - MMUAccessType access_type, int mmu_idx, uintptr_t retaddr) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - int ret; - - env->retaddr = retaddr; - ret = x86_cpu_handle_mmu_fault(cs, addr, size, access_type, mmu_idx); - if (ret) { - raise_exception_err_ra(env, cs->exception_index, env->error_code, retaddr); - } -} -#endif diff --git a/target/i386/meson.build b/target/i386/meson.build new file mode 100644 index 0000000000..7c74bfa859 --- /dev/null +++ b/target/i386/meson.build @@ -0,0 +1,35 @@ +i386_ss = ss.source_set() +i386_ss.add(files( + 'cpu.c', + 'gdbstub.c', + 'helper.c', + 'xsave_helper.c', + 'cpu-dump.c', +)) +i386_ss.add(when: 'CONFIG_SEV', if_true: files('host-cpu.c')) + +# x86 cpu type +i386_ss.add(when: 'CONFIG_KVM', if_true: files('host-cpu.c')) +i386_ss.add(when: 'CONFIG_HVF', if_true: files('host-cpu.c')) + +i386_system_ss = ss.source_set() +i386_system_ss.add(files( + 'arch_dump.c', + 'arch_memory_mapping.c', + 'machine.c', + 'monitor.c', + 'cpu-sysemu.c', +)) +i386_system_ss.add(when: 'CONFIG_SEV', if_true: files('sev.c'), if_false: files('sev-sysemu-stub.c')) + +i386_user_ss = ss.source_set() + +subdir('kvm') +subdir('whpx') +subdir('nvmm') +subdir('hvf') +subdir('tcg') + +target_arch += {'i386': i386_ss} +target_system_arch += {'i386': i386_system_ss} +target_user_arch += {'i386': i386_user_ss} diff --git a/target/i386/monitor.c b/target/i386/monitor.c index 74a13c571b..3a281dab02 100644 --- a/target/i386/monitor.c +++ b/target/i386/monitor.c @@ -26,13 +26,12 @@ #include "cpu.h" #include "monitor/monitor.h" #include "monitor/hmp-target.h" +#include "monitor/hmp.h" #include "qapi/qmp/qdict.h" -#include "hw/i386/pc.h" +#include "sysemu/hw_accel.h" #include "sysemu/kvm.h" -#include "sysemu/sev.h" -#include "hmp.h" #include "qapi/error.h" -#include "sev_i386.h" +#include "qapi/qapi-commands-misc-target.h" #include "qapi/qapi-commands-misc.h" /* Perform linear address sign extension */ @@ -57,7 +56,7 @@ static void print_pte(Monitor *mon, CPUArchState *env, hwaddr addr, { addr = addr_canonical(env, addr); - monitor_printf(mon, TARGET_FMT_plx ": " TARGET_FMT_plx + monitor_printf(mon, HWADDR_FMT_plx ": " HWADDR_FMT_plx " %c%c%c%c%c%c%c%c%c\n", addr, pte & mask, @@ -222,7 +221,7 @@ void hmp_info_tlb(Monitor *mon, const QDict *qdict) { CPUArchState *env; - env = mon_get_cpu_env(); + env = mon_get_cpu_env(mon); if (!env) { monitor_printf(mon, "No CPU available\n"); return; @@ -258,8 +257,8 @@ static void mem_print(Monitor *mon, CPUArchState *env, prot1 = *plast_prot; if (prot != prot1) { if (*pstart != -1) { - monitor_printf(mon, TARGET_FMT_plx "-" TARGET_FMT_plx " " - TARGET_FMT_plx " %c%c%c\n", + monitor_printf(mon, HWADDR_FMT_plx "-" HWADDR_FMT_plx " " + HWADDR_FMT_plx " %c%c%c\n", addr_canonical(env, *pstart), addr_canonical(env, end), addr_canonical(env, end - *pstart), @@ -550,7 +549,7 @@ void hmp_info_mem(Monitor *mon, const QDict *qdict) { CPUArchState *env; - env = mon_get_cpu_env(); + env = mon_get_cpu_env(mon); if (!env) { monitor_printf(mon, "No CPU available\n"); return; @@ -601,9 +600,10 @@ void hmp_mce(Monitor *mon, const QDict *qdict) } } -static target_long monitor_get_pc(const struct MonitorDef *md, int val) +static target_long monitor_get_pc(Monitor *mon, const struct MonitorDef *md, + int val) { - CPUArchState *env = mon_get_cpu_env(); + CPUArchState *env = mon_get_cpu_env(mon); return env->eip + env->segs[R_CS].base; } @@ -654,9 +654,13 @@ void hmp_info_local_apic(Monitor *mon, const QDict *qdict) if (qdict_haskey(qdict, "apic-id")) { int id = qdict_get_try_int(qdict, "apic-id", 0); + cs = cpu_by_arch_id(id); + if (cs) { + cpu_synchronize_state(cs); + } } else { - cs = mon_get_cpu(); + cs = mon_get_cpu(mon); } @@ -664,76 +668,5 @@ void hmp_info_local_apic(Monitor *mon, const QDict *qdict) monitor_printf(mon, "No CPU available\n"); return; } - x86_cpu_dump_local_apic_state(cs, (FILE *)mon, monitor_fprintf, - CPU_DUMP_FPU); -} - -void hmp_info_io_apic(Monitor *mon, const QDict *qdict) -{ - monitor_printf(mon, "This command is obsolete and will be " - "removed soon. Please use 'info pic' instead.\n"); -} - -SevInfo *qmp_query_sev(Error **errp) -{ - SevInfo *info; - - info = sev_get_info(); - if (!info) { - error_setg(errp, "SEV feature is not available"); - return NULL; - } - - return info; -} - -void hmp_info_sev(Monitor *mon, const QDict *qdict) -{ - SevInfo *info = sev_get_info(); - - if (info && info->enabled) { - monitor_printf(mon, "handle: %d\n", info->handle); - monitor_printf(mon, "state: %s\n", SevState_str(info->state)); - monitor_printf(mon, "build: %d\n", info->build_id); - monitor_printf(mon, "api version: %d.%d\n", - info->api_major, info->api_minor); - monitor_printf(mon, "debug: %s\n", - info->policy & SEV_POLICY_NODBG ? "off" : "on"); - monitor_printf(mon, "key-sharing: %s\n", - info->policy & SEV_POLICY_NOKS ? "off" : "on"); - } else { - monitor_printf(mon, "SEV is not enabled\n"); - } - - qapi_free_SevInfo(info); -} - -SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) -{ - char *data; - SevLaunchMeasureInfo *info; - - data = sev_get_launch_measurement(); - if (!data) { - error_setg(errp, "Measurement is not available"); - return NULL; - } - - info = g_malloc0(sizeof(*info)); - info->data = data; - - return info; -} - -SevCapability *qmp_query_sev_capabilities(Error **errp) -{ - SevCapability *data; - - data = sev_get_capabilities(); - if (!data) { - error_setg(errp, "SEV feature is not available"); - return NULL; - } - - return data; + x86_cpu_dump_local_apic_state(cs, CPU_DUMP_FPU); } diff --git a/target/i386/nvmm/meson.build b/target/i386/nvmm/meson.build new file mode 100644 index 0000000000..885a708665 --- /dev/null +++ b/target/i386/nvmm/meson.build @@ -0,0 +1,8 @@ +i386_system_ss.add(when: 'CONFIG_NVMM', if_true: + files( + 'nvmm-all.c', + 'nvmm-accel-ops.c', + ) +) + +i386_system_ss.add(when: 'CONFIG_NVMM', if_true: nvmm) diff --git a/target/i386/nvmm/nvmm-accel-ops.c b/target/i386/nvmm/nvmm-accel-ops.c new file mode 100644 index 0000000000..6b2bfd9b9c --- /dev/null +++ b/target/i386/nvmm/nvmm-accel-ops.c @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. + * + * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "sysemu/kvm_int.h" +#include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "qemu/guest-random.h" + +#include "sysemu/nvmm.h" +#include "nvmm-accel-ops.h" + +static void *qemu_nvmm_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + int r; + + assert(nvmm_enabled()); + + rcu_register_thread(); + + bql_lock(); + qemu_thread_get_self(cpu->thread); + cpu->thread_id = qemu_get_thread_id(); + current_cpu = cpu; + + r = nvmm_init_vcpu(cpu); + if (r < 0) { + fprintf(stderr, "nvmm_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } + + /* signal CPU creation */ + cpu_thread_signal_created(cpu); + qemu_guest_random_seed_thread_part2(cpu->random_seed); + + do { + if (cpu_can_run(cpu)) { + r = nvmm_vcpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + while (cpu_thread_is_idle(cpu)) { + qemu_cond_wait_bql(cpu->halt_cond); + } + qemu_wait_io_event_common(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + nvmm_destroy_vcpu(cpu); + cpu_thread_signal_destroyed(cpu); + bql_unlock(); + rcu_unregister_thread(); + return NULL; +} + +static void nvmm_start_vcpu_thread(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + cpu->thread = g_new0(QemuThread, 1); + cpu->halt_cond = g_new0(QemuCond, 1); + qemu_cond_init(cpu->halt_cond); + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/NVMM", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, qemu_nvmm_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +} + +/* + * Abort the call to run the virtual processor by another thread, and to + * return the control to that thread. + */ +static void nvmm_kick_vcpu_thread(CPUState *cpu) +{ + cpu->exit_request = 1; + cpus_kick_thread(cpu); +} + +static void nvmm_accel_ops_class_init(ObjectClass *oc, void *data) +{ + AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); + + ops->create_vcpu_thread = nvmm_start_vcpu_thread; + ops->kick_vcpu_thread = nvmm_kick_vcpu_thread; + + ops->synchronize_post_reset = nvmm_cpu_synchronize_post_reset; + ops->synchronize_post_init = nvmm_cpu_synchronize_post_init; + ops->synchronize_state = nvmm_cpu_synchronize_state; + ops->synchronize_pre_loadvm = nvmm_cpu_synchronize_pre_loadvm; +} + +static const TypeInfo nvmm_accel_ops_type = { + .name = ACCEL_OPS_NAME("nvmm"), + + .parent = TYPE_ACCEL_OPS, + .class_init = nvmm_accel_ops_class_init, + .abstract = true, +}; + +static void nvmm_accel_ops_register_types(void) +{ + type_register_static(&nvmm_accel_ops_type); +} +type_init(nvmm_accel_ops_register_types); diff --git a/target/i386/nvmm/nvmm-accel-ops.h b/target/i386/nvmm/nvmm-accel-ops.h new file mode 100644 index 0000000000..7c5461bd75 --- /dev/null +++ b/target/i386/nvmm/nvmm-accel-ops.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. + * + * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TARGET_I386_NVMM_ACCEL_OPS_H +#define TARGET_I386_NVMM_ACCEL_OPS_H + +#include "sysemu/cpus.h" + +int nvmm_init_vcpu(CPUState *cpu); +int nvmm_vcpu_exec(CPUState *cpu); +void nvmm_destroy_vcpu(CPUState *cpu); + +void nvmm_cpu_synchronize_state(CPUState *cpu); +void nvmm_cpu_synchronize_post_reset(CPUState *cpu); +void nvmm_cpu_synchronize_post_init(CPUState *cpu); +void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu); + +#endif /* TARGET_I386_NVMM_ACCEL_OPS_H */ diff --git a/target/i386/nvmm/nvmm-all.c b/target/i386/nvmm/nvmm-all.c new file mode 100644 index 0000000000..49a3a3b916 --- /dev/null +++ b/target/i386/nvmm/nvmm-all.c @@ -0,0 +1,1222 @@ +/* + * Copyright (c) 2018-2019 Maxime Villard, All rights reserved. + * + * NetBSD Virtual Machine Monitor (NVMM) accelerator for QEMU. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/address-spaces.h" +#include "exec/ioport.h" +#include "qemu/accel.h" +#include "sysemu/nvmm.h" +#include "sysemu/cpus.h" +#include "sysemu/runstate.h" +#include "qemu/main-loop.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qemu/queue.h" +#include "migration/blocker.h" +#include "strings.h" + +#include "nvmm-accel-ops.h" + +#include <nvmm.h> + +struct AccelCPUState { + struct nvmm_vcpu vcpu; + uint8_t tpr; + bool stop; + + /* Window-exiting for INTs/NMIs. */ + bool int_window_exit; + bool nmi_window_exit; + + /* The guest is in an interrupt shadow (POP SS, etc). */ + bool int_shadow; +}; + +struct qemu_machine { + struct nvmm_capability cap; + struct nvmm_machine mach; +}; + +/* -------------------------------------------------------------------------- */ + +static bool nvmm_allowed; +static struct qemu_machine qemu_mach; + +static struct nvmm_machine * +get_nvmm_mach(void) +{ + return &qemu_mach.mach; +} + +/* -------------------------------------------------------------------------- */ + +static void +nvmm_set_segment(struct nvmm_x64_state_seg *nseg, const SegmentCache *qseg) +{ + uint32_t attrib = qseg->flags; + + nseg->selector = qseg->selector; + nseg->limit = qseg->limit; + nseg->base = qseg->base; + nseg->attrib.type = __SHIFTOUT(attrib, DESC_TYPE_MASK); + nseg->attrib.s = __SHIFTOUT(attrib, DESC_S_MASK); + nseg->attrib.dpl = __SHIFTOUT(attrib, DESC_DPL_MASK); + nseg->attrib.p = __SHIFTOUT(attrib, DESC_P_MASK); + nseg->attrib.avl = __SHIFTOUT(attrib, DESC_AVL_MASK); + nseg->attrib.l = __SHIFTOUT(attrib, DESC_L_MASK); + nseg->attrib.def = __SHIFTOUT(attrib, DESC_B_MASK); + nseg->attrib.g = __SHIFTOUT(attrib, DESC_G_MASK); +} + +static void +nvmm_set_registers(CPUState *cpu) +{ + CPUX86State *env = cpu_env(cpu); + struct nvmm_machine *mach = get_nvmm_mach(); + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + struct nvmm_x64_state *state = vcpu->state; + uint64_t bitmap; + size_t i; + int ret; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + /* GPRs. */ + state->gprs[NVMM_X64_GPR_RAX] = env->regs[R_EAX]; + state->gprs[NVMM_X64_GPR_RCX] = env->regs[R_ECX]; + state->gprs[NVMM_X64_GPR_RDX] = env->regs[R_EDX]; + state->gprs[NVMM_X64_GPR_RBX] = env->regs[R_EBX]; + state->gprs[NVMM_X64_GPR_RSP] = env->regs[R_ESP]; + state->gprs[NVMM_X64_GPR_RBP] = env->regs[R_EBP]; + state->gprs[NVMM_X64_GPR_RSI] = env->regs[R_ESI]; + state->gprs[NVMM_X64_GPR_RDI] = env->regs[R_EDI]; +#ifdef TARGET_X86_64 + state->gprs[NVMM_X64_GPR_R8] = env->regs[R_R8]; + state->gprs[NVMM_X64_GPR_R9] = env->regs[R_R9]; + state->gprs[NVMM_X64_GPR_R10] = env->regs[R_R10]; + state->gprs[NVMM_X64_GPR_R11] = env->regs[R_R11]; + state->gprs[NVMM_X64_GPR_R12] = env->regs[R_R12]; + state->gprs[NVMM_X64_GPR_R13] = env->regs[R_R13]; + state->gprs[NVMM_X64_GPR_R14] = env->regs[R_R14]; + state->gprs[NVMM_X64_GPR_R15] = env->regs[R_R15]; +#endif + + /* RIP and RFLAGS. */ + state->gprs[NVMM_X64_GPR_RIP] = env->eip; + state->gprs[NVMM_X64_GPR_RFLAGS] = env->eflags; + + /* Segments. */ + nvmm_set_segment(&state->segs[NVMM_X64_SEG_CS], &env->segs[R_CS]); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_DS], &env->segs[R_DS]); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_ES], &env->segs[R_ES]); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_FS], &env->segs[R_FS]); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_GS], &env->segs[R_GS]); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_SS], &env->segs[R_SS]); + + /* Special segments. */ + nvmm_set_segment(&state->segs[NVMM_X64_SEG_GDT], &env->gdt); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_LDT], &env->ldt); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_TR], &env->tr); + nvmm_set_segment(&state->segs[NVMM_X64_SEG_IDT], &env->idt); + + /* Control registers. */ + state->crs[NVMM_X64_CR_CR0] = env->cr[0]; + state->crs[NVMM_X64_CR_CR2] = env->cr[2]; + state->crs[NVMM_X64_CR_CR3] = env->cr[3]; + state->crs[NVMM_X64_CR_CR4] = env->cr[4]; + state->crs[NVMM_X64_CR_CR8] = qcpu->tpr; + state->crs[NVMM_X64_CR_XCR0] = env->xcr0; + + /* Debug registers. */ + state->drs[NVMM_X64_DR_DR0] = env->dr[0]; + state->drs[NVMM_X64_DR_DR1] = env->dr[1]; + state->drs[NVMM_X64_DR_DR2] = env->dr[2]; + state->drs[NVMM_X64_DR_DR3] = env->dr[3]; + state->drs[NVMM_X64_DR_DR6] = env->dr[6]; + state->drs[NVMM_X64_DR_DR7] = env->dr[7]; + + /* FPU. */ + state->fpu.fx_cw = env->fpuc; + state->fpu.fx_sw = (env->fpus & ~0x3800) | ((env->fpstt & 0x7) << 11); + state->fpu.fx_tw = 0; + for (i = 0; i < 8; i++) { + state->fpu.fx_tw |= (!env->fptags[i]) << i; + } + state->fpu.fx_opcode = env->fpop; + state->fpu.fx_ip.fa_64 = env->fpip; + state->fpu.fx_dp.fa_64 = env->fpdp; + state->fpu.fx_mxcsr = env->mxcsr; + state->fpu.fx_mxcsr_mask = 0x0000FFFF; + assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs)); + memcpy(state->fpu.fx_87_ac, env->fpregs, sizeof(env->fpregs)); + for (i = 0; i < CPU_NB_REGS; i++) { + memcpy(&state->fpu.fx_xmm[i].xmm_bytes[0], + &env->xmm_regs[i].ZMM_Q(0), 8); + memcpy(&state->fpu.fx_xmm[i].xmm_bytes[8], + &env->xmm_regs[i].ZMM_Q(1), 8); + } + + /* MSRs. */ + state->msrs[NVMM_X64_MSR_EFER] = env->efer; + state->msrs[NVMM_X64_MSR_STAR] = env->star; +#ifdef TARGET_X86_64 + state->msrs[NVMM_X64_MSR_LSTAR] = env->lstar; + state->msrs[NVMM_X64_MSR_CSTAR] = env->cstar; + state->msrs[NVMM_X64_MSR_SFMASK] = env->fmask; + state->msrs[NVMM_X64_MSR_KERNELGSBASE] = env->kernelgsbase; +#endif + state->msrs[NVMM_X64_MSR_SYSENTER_CS] = env->sysenter_cs; + state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = env->sysenter_esp; + state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = env->sysenter_eip; + state->msrs[NVMM_X64_MSR_PAT] = env->pat; + state->msrs[NVMM_X64_MSR_TSC] = env->tsc; + + bitmap = + NVMM_X64_STATE_SEGS | + NVMM_X64_STATE_GPRS | + NVMM_X64_STATE_CRS | + NVMM_X64_STATE_DRS | + NVMM_X64_STATE_MSRS | + NVMM_X64_STATE_FPU; + + ret = nvmm_vcpu_setstate(mach, vcpu, bitmap); + if (ret == -1) { + error_report("NVMM: Failed to set virtual processor context," + " error=%d", errno); + } +} + +static void +nvmm_get_segment(SegmentCache *qseg, const struct nvmm_x64_state_seg *nseg) +{ + qseg->selector = nseg->selector; + qseg->limit = nseg->limit; + qseg->base = nseg->base; + + qseg->flags = + __SHIFTIN((uint32_t)nseg->attrib.type, DESC_TYPE_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.s, DESC_S_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.dpl, DESC_DPL_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.p, DESC_P_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.avl, DESC_AVL_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.l, DESC_L_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.def, DESC_B_MASK) | + __SHIFTIN((uint32_t)nseg->attrib.g, DESC_G_MASK); +} + +static void +nvmm_get_registers(CPUState *cpu) +{ + CPUX86State *env = cpu_env(cpu); + struct nvmm_machine *mach = get_nvmm_mach(); + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + X86CPU *x86_cpu = X86_CPU(cpu); + struct nvmm_x64_state *state = vcpu->state; + uint64_t bitmap, tpr; + size_t i; + int ret; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + bitmap = + NVMM_X64_STATE_SEGS | + NVMM_X64_STATE_GPRS | + NVMM_X64_STATE_CRS | + NVMM_X64_STATE_DRS | + NVMM_X64_STATE_MSRS | + NVMM_X64_STATE_FPU; + + ret = nvmm_vcpu_getstate(mach, vcpu, bitmap); + if (ret == -1) { + error_report("NVMM: Failed to get virtual processor context," + " error=%d", errno); + } + + /* GPRs. */ + env->regs[R_EAX] = state->gprs[NVMM_X64_GPR_RAX]; + env->regs[R_ECX] = state->gprs[NVMM_X64_GPR_RCX]; + env->regs[R_EDX] = state->gprs[NVMM_X64_GPR_RDX]; + env->regs[R_EBX] = state->gprs[NVMM_X64_GPR_RBX]; + env->regs[R_ESP] = state->gprs[NVMM_X64_GPR_RSP]; + env->regs[R_EBP] = state->gprs[NVMM_X64_GPR_RBP]; + env->regs[R_ESI] = state->gprs[NVMM_X64_GPR_RSI]; + env->regs[R_EDI] = state->gprs[NVMM_X64_GPR_RDI]; +#ifdef TARGET_X86_64 + env->regs[R_R8] = state->gprs[NVMM_X64_GPR_R8]; + env->regs[R_R9] = state->gprs[NVMM_X64_GPR_R9]; + env->regs[R_R10] = state->gprs[NVMM_X64_GPR_R10]; + env->regs[R_R11] = state->gprs[NVMM_X64_GPR_R11]; + env->regs[R_R12] = state->gprs[NVMM_X64_GPR_R12]; + env->regs[R_R13] = state->gprs[NVMM_X64_GPR_R13]; + env->regs[R_R14] = state->gprs[NVMM_X64_GPR_R14]; + env->regs[R_R15] = state->gprs[NVMM_X64_GPR_R15]; +#endif + + /* RIP and RFLAGS. */ + env->eip = state->gprs[NVMM_X64_GPR_RIP]; + env->eflags = state->gprs[NVMM_X64_GPR_RFLAGS]; + + /* Segments. */ + nvmm_get_segment(&env->segs[R_ES], &state->segs[NVMM_X64_SEG_ES]); + nvmm_get_segment(&env->segs[R_CS], &state->segs[NVMM_X64_SEG_CS]); + nvmm_get_segment(&env->segs[R_SS], &state->segs[NVMM_X64_SEG_SS]); + nvmm_get_segment(&env->segs[R_DS], &state->segs[NVMM_X64_SEG_DS]); + nvmm_get_segment(&env->segs[R_FS], &state->segs[NVMM_X64_SEG_FS]); + nvmm_get_segment(&env->segs[R_GS], &state->segs[NVMM_X64_SEG_GS]); + + /* Special segments. */ + nvmm_get_segment(&env->gdt, &state->segs[NVMM_X64_SEG_GDT]); + nvmm_get_segment(&env->ldt, &state->segs[NVMM_X64_SEG_LDT]); + nvmm_get_segment(&env->tr, &state->segs[NVMM_X64_SEG_TR]); + nvmm_get_segment(&env->idt, &state->segs[NVMM_X64_SEG_IDT]); + + /* Control registers. */ + env->cr[0] = state->crs[NVMM_X64_CR_CR0]; + env->cr[2] = state->crs[NVMM_X64_CR_CR2]; + env->cr[3] = state->crs[NVMM_X64_CR_CR3]; + env->cr[4] = state->crs[NVMM_X64_CR_CR4]; + tpr = state->crs[NVMM_X64_CR_CR8]; + if (tpr != qcpu->tpr) { + qcpu->tpr = tpr; + cpu_set_apic_tpr(x86_cpu->apic_state, tpr); + } + env->xcr0 = state->crs[NVMM_X64_CR_XCR0]; + + /* Debug registers. */ + env->dr[0] = state->drs[NVMM_X64_DR_DR0]; + env->dr[1] = state->drs[NVMM_X64_DR_DR1]; + env->dr[2] = state->drs[NVMM_X64_DR_DR2]; + env->dr[3] = state->drs[NVMM_X64_DR_DR3]; + env->dr[6] = state->drs[NVMM_X64_DR_DR6]; + env->dr[7] = state->drs[NVMM_X64_DR_DR7]; + + /* FPU. */ + env->fpuc = state->fpu.fx_cw; + env->fpstt = (state->fpu.fx_sw >> 11) & 0x7; + env->fpus = state->fpu.fx_sw & ~0x3800; + for (i = 0; i < 8; i++) { + env->fptags[i] = !((state->fpu.fx_tw >> i) & 1); + } + env->fpop = state->fpu.fx_opcode; + env->fpip = state->fpu.fx_ip.fa_64; + env->fpdp = state->fpu.fx_dp.fa_64; + env->mxcsr = state->fpu.fx_mxcsr; + assert(sizeof(state->fpu.fx_87_ac) == sizeof(env->fpregs)); + memcpy(env->fpregs, state->fpu.fx_87_ac, sizeof(env->fpregs)); + for (i = 0; i < CPU_NB_REGS; i++) { + memcpy(&env->xmm_regs[i].ZMM_Q(0), + &state->fpu.fx_xmm[i].xmm_bytes[0], 8); + memcpy(&env->xmm_regs[i].ZMM_Q(1), + &state->fpu.fx_xmm[i].xmm_bytes[8], 8); + } + + /* MSRs. */ + env->efer = state->msrs[NVMM_X64_MSR_EFER]; + env->star = state->msrs[NVMM_X64_MSR_STAR]; +#ifdef TARGET_X86_64 + env->lstar = state->msrs[NVMM_X64_MSR_LSTAR]; + env->cstar = state->msrs[NVMM_X64_MSR_CSTAR]; + env->fmask = state->msrs[NVMM_X64_MSR_SFMASK]; + env->kernelgsbase = state->msrs[NVMM_X64_MSR_KERNELGSBASE]; +#endif + env->sysenter_cs = state->msrs[NVMM_X64_MSR_SYSENTER_CS]; + env->sysenter_esp = state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; + env->sysenter_eip = state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; + env->pat = state->msrs[NVMM_X64_MSR_PAT]; + env->tsc = state->msrs[NVMM_X64_MSR_TSC]; + + x86_update_hflags(env); +} + +static bool +nvmm_can_take_int(CPUState *cpu) +{ + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + struct nvmm_machine *mach = get_nvmm_mach(); + + if (qcpu->int_window_exit) { + return false; + } + + if (qcpu->int_shadow || !(cpu_env(cpu)->eflags & IF_MASK)) { + struct nvmm_x64_state *state = vcpu->state; + + /* Exit on interrupt window. */ + nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_INTR); + state->intr.int_window_exiting = 1; + nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_INTR); + + return false; + } + + return true; +} + +static bool +nvmm_can_take_nmi(CPUState *cpu) +{ + AccelCPUState *qcpu = cpu->accel; + + /* + * Contrary to INTs, NMIs always schedule an exit when they are + * completed. Therefore, if window-exiting is enabled, it means + * NMIs are blocked. + */ + if (qcpu->nmi_window_exit) { + return false; + } + + return true; +} + +/* + * Called before the VCPU is run. We inject events generated by the I/O + * thread, and synchronize the guest TPR. + */ +static void +nvmm_vcpu_pre_run(CPUState *cpu) +{ + CPUX86State *env = cpu_env(cpu); + struct nvmm_machine *mach = get_nvmm_mach(); + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + X86CPU *x86_cpu = X86_CPU(cpu); + struct nvmm_x64_state *state = vcpu->state; + struct nvmm_vcpu_event *event = vcpu->event; + bool has_event = false; + bool sync_tpr = false; + uint8_t tpr; + int ret; + + bql_lock(); + + tpr = cpu_get_apic_tpr(x86_cpu->apic_state); + if (tpr != qcpu->tpr) { + qcpu->tpr = tpr; + sync_tpr = true; + } + + /* + * Force the VCPU out of its inner loop to process any INIT requests + * or commit pending TPR access. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + cpu->exit_request = 1; + } + + if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + if (nvmm_can_take_nmi(cpu)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + event->type = NVMM_VCPU_EVENT_INTR; + event->vector = 2; + has_event = true; + } + } + + if (!has_event && (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + if (nvmm_can_take_int(cpu)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + event->type = NVMM_VCPU_EVENT_INTR; + event->vector = cpu_get_pic_interrupt(env); + has_event = true; + } + } + + /* Don't want SMIs. */ + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + } + + if (sync_tpr) { + ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_CRS); + if (ret == -1) { + error_report("NVMM: Failed to get CPU state," + " error=%d", errno); + } + + state->crs[NVMM_X64_CR_CR8] = qcpu->tpr; + + ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_CRS); + if (ret == -1) { + error_report("NVMM: Failed to set CPU state," + " error=%d", errno); + } + } + + if (has_event) { + ret = nvmm_vcpu_inject(mach, vcpu); + if (ret == -1) { + error_report("NVMM: Failed to inject event," + " error=%d", errno); + } + } + + bql_unlock(); +} + +/* + * Called after the VCPU ran. We synchronize the host view of the TPR and + * RFLAGS. + */ +static void +nvmm_vcpu_post_run(CPUState *cpu, struct nvmm_vcpu_exit *exit) +{ + AccelCPUState *qcpu = cpu->accel; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + uint64_t tpr; + + env->eflags = exit->exitstate.rflags; + qcpu->int_shadow = exit->exitstate.int_shadow; + qcpu->int_window_exit = exit->exitstate.int_window_exiting; + qcpu->nmi_window_exit = exit->exitstate.nmi_window_exiting; + + tpr = exit->exitstate.cr8; + if (qcpu->tpr != tpr) { + qcpu->tpr = tpr; + bql_lock(); + cpu_set_apic_tpr(x86_cpu->apic_state, qcpu->tpr); + bql_unlock(); + } +} + +/* -------------------------------------------------------------------------- */ + +static void +nvmm_io_callback(struct nvmm_io *io) +{ + MemTxAttrs attrs = { 0 }; + int ret; + + ret = address_space_rw(&address_space_io, io->port, attrs, io->data, + io->size, !io->in); + if (ret != MEMTX_OK) { + error_report("NVMM: I/O Transaction Failed " + "[%s, port=%u, size=%zu]", (io->in ? "in" : "out"), + io->port, io->size); + } + + /* Needed, otherwise infinite loop. */ + current_cpu->vcpu_dirty = false; +} + +static void +nvmm_mem_callback(struct nvmm_mem *mem) +{ + cpu_physical_memory_rw(mem->gpa, mem->data, mem->size, mem->write); + + /* Needed, otherwise infinite loop. */ + current_cpu->vcpu_dirty = false; +} + +static struct nvmm_assist_callbacks nvmm_callbacks = { + .io = nvmm_io_callback, + .mem = nvmm_mem_callback +}; + +/* -------------------------------------------------------------------------- */ + +static int +nvmm_handle_mem(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) +{ + int ret; + + ret = nvmm_assist_mem(mach, vcpu); + if (ret == -1) { + error_report("NVMM: Mem Assist Failed [gpa=%p]", + (void *)vcpu->exit->u.mem.gpa); + } + + return ret; +} + +static int +nvmm_handle_io(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) +{ + int ret; + + ret = nvmm_assist_io(mach, vcpu); + if (ret == -1) { + error_report("NVMM: I/O Assist Failed [port=%d]", + (int)vcpu->exit->u.io.port); + } + + return ret; +} + +static int +nvmm_handle_rdmsr(struct nvmm_machine *mach, CPUState *cpu, + struct nvmm_vcpu_exit *exit) +{ + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + X86CPU *x86_cpu = X86_CPU(cpu); + struct nvmm_x64_state *state = vcpu->state; + uint64_t val; + int ret; + + switch (exit->u.rdmsr.msr) { + case MSR_IA32_APICBASE: + val = cpu_get_apic_base(x86_cpu->apic_state); + break; + case MSR_MTRRcap: + case MSR_MTRRdefType: + case MSR_MCG_CAP: + case MSR_MCG_STATUS: + val = 0; + break; + default: /* More MSRs to add? */ + val = 0; + error_report("NVMM: Unexpected RDMSR 0x%x, ignored", + exit->u.rdmsr.msr); + break; + } + + ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS); + if (ret == -1) { + return -1; + } + + state->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); + state->gprs[NVMM_X64_GPR_RDX] = (val >> 32); + state->gprs[NVMM_X64_GPR_RIP] = exit->u.rdmsr.npc; + + ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); + if (ret == -1) { + return -1; + } + + return 0; +} + +static int +nvmm_handle_wrmsr(struct nvmm_machine *mach, CPUState *cpu, + struct nvmm_vcpu_exit *exit) +{ + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + X86CPU *x86_cpu = X86_CPU(cpu); + struct nvmm_x64_state *state = vcpu->state; + uint64_t val; + int ret; + + val = exit->u.wrmsr.val; + + switch (exit->u.wrmsr.msr) { + case MSR_IA32_APICBASE: + cpu_set_apic_base(x86_cpu->apic_state, val); + break; + case MSR_MTRRdefType: + case MSR_MCG_STATUS: + break; + default: /* More MSRs to add? */ + error_report("NVMM: Unexpected WRMSR 0x%x [val=0x%lx], ignored", + exit->u.wrmsr.msr, val); + break; + } + + ret = nvmm_vcpu_getstate(mach, vcpu, NVMM_X64_STATE_GPRS); + if (ret == -1) { + return -1; + } + + state->gprs[NVMM_X64_GPR_RIP] = exit->u.wrmsr.npc; + + ret = nvmm_vcpu_setstate(mach, vcpu, NVMM_X64_STATE_GPRS); + if (ret == -1) { + return -1; + } + + return 0; +} + +static int +nvmm_handle_halted(struct nvmm_machine *mach, CPUState *cpu, + struct nvmm_vcpu_exit *exit) +{ + int ret = 0; + + bql_lock(); + + if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (cpu_env(cpu)->eflags & IF_MASK)) && + !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->exception_index = EXCP_HLT; + cpu->halted = true; + ret = 1; + } + + bql_unlock(); + + return ret; +} + +static int +nvmm_inject_ud(struct nvmm_machine *mach, struct nvmm_vcpu *vcpu) +{ + struct nvmm_vcpu_event *event = vcpu->event; + + event->type = NVMM_VCPU_EVENT_EXCP; + event->vector = 6; + event->u.excp.error = 0; + + return nvmm_vcpu_inject(mach, vcpu); +} + +static int +nvmm_vcpu_loop(CPUState *cpu) +{ + struct nvmm_machine *mach = get_nvmm_mach(); + AccelCPUState *qcpu = cpu->accel; + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct nvmm_vcpu_exit *exit = vcpu->exit; + int ret; + + /* + * Some asynchronous events must be handled outside of the inner + * VCPU loop. They are handled here. + */ + if (cpu->interrupt_request & CPU_INTERRUPT_INIT) { + nvmm_cpu_synchronize_state(cpu); + do_cpu_init(x86_cpu); + /* set int/nmi windows back to the reset state */ + } + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->halted = false; + } + if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + nvmm_cpu_synchronize_state(cpu); + do_cpu_sipi(x86_cpu); + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + nvmm_cpu_synchronize_state(cpu); + apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, + env->tpr_access_type); + } + + if (cpu->halted) { + cpu->exception_index = EXCP_HLT; + qatomic_set(&cpu->exit_request, false); + return 0; + } + + bql_unlock(); + cpu_exec_start(cpu); + + /* + * Inner VCPU loop. + */ + do { + if (cpu->vcpu_dirty) { + nvmm_set_registers(cpu); + cpu->vcpu_dirty = false; + } + + if (qcpu->stop) { + cpu->exception_index = EXCP_INTERRUPT; + qcpu->stop = false; + ret = 1; + break; + } + + nvmm_vcpu_pre_run(cpu); + + if (qatomic_read(&cpu->exit_request)) { +#if NVMM_USER_VERSION >= 2 + nvmm_vcpu_stop(vcpu); +#else + qemu_cpu_kick_self(); +#endif + } + + /* Read exit_request before the kernel reads the immediate exit flag */ + smp_rmb(); + ret = nvmm_vcpu_run(mach, vcpu); + if (ret == -1) { + error_report("NVMM: Failed to exec a virtual processor," + " error=%d", errno); + break; + } + + nvmm_vcpu_post_run(cpu, exit); + + switch (exit->reason) { + case NVMM_VCPU_EXIT_NONE: + break; +#if NVMM_USER_VERSION >= 2 + case NVMM_VCPU_EXIT_STOPPED: + /* + * The kernel cleared the immediate exit flag; cpu->exit_request + * must be cleared after + */ + smp_wmb(); + qcpu->stop = true; + break; +#endif + case NVMM_VCPU_EXIT_MEMORY: + ret = nvmm_handle_mem(mach, vcpu); + break; + case NVMM_VCPU_EXIT_IO: + ret = nvmm_handle_io(mach, vcpu); + break; + case NVMM_VCPU_EXIT_INT_READY: + case NVMM_VCPU_EXIT_NMI_READY: + case NVMM_VCPU_EXIT_TPR_CHANGED: + break; + case NVMM_VCPU_EXIT_HALTED: + ret = nvmm_handle_halted(mach, cpu, exit); + break; + case NVMM_VCPU_EXIT_SHUTDOWN: + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + cpu->exception_index = EXCP_INTERRUPT; + ret = 1; + break; + case NVMM_VCPU_EXIT_RDMSR: + ret = nvmm_handle_rdmsr(mach, cpu, exit); + break; + case NVMM_VCPU_EXIT_WRMSR: + ret = nvmm_handle_wrmsr(mach, cpu, exit); + break; + case NVMM_VCPU_EXIT_MONITOR: + case NVMM_VCPU_EXIT_MWAIT: + ret = nvmm_inject_ud(mach, vcpu); + break; + default: + error_report("NVMM: Unexpected VM exit code 0x%lx [hw=0x%lx]", + exit->reason, exit->u.inv.hwcode); + nvmm_get_registers(cpu); + bql_lock(); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + bql_unlock(); + ret = -1; + break; + } + } while (ret == 0); + + cpu_exec_end(cpu); + bql_lock(); + + qatomic_set(&cpu->exit_request, false); + + return ret < 0; +} + +/* -------------------------------------------------------------------------- */ + +static void +do_nvmm_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + nvmm_get_registers(cpu); + cpu->vcpu_dirty = true; +} + +static void +do_nvmm_cpu_synchronize_post_reset(CPUState *cpu, run_on_cpu_data arg) +{ + nvmm_set_registers(cpu); + cpu->vcpu_dirty = false; +} + +static void +do_nvmm_cpu_synchronize_post_init(CPUState *cpu, run_on_cpu_data arg) +{ + nvmm_set_registers(cpu); + cpu->vcpu_dirty = false; +} + +static void +do_nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu, run_on_cpu_data arg) +{ + cpu->vcpu_dirty = true; +} + +void nvmm_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_nvmm_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +void nvmm_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); +} + +void nvmm_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_nvmm_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +void nvmm_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_nvmm_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); +} + +/* -------------------------------------------------------------------------- */ + +static Error *nvmm_migration_blocker; + +/* + * The nvmm_vcpu_stop() mechanism breaks races between entering the VMM + * and another thread signaling the vCPU thread to exit. + */ + +static void +nvmm_ipi_signal(int sigcpu) +{ + if (current_cpu) { + AccelCPUState *qcpu = current_cpu->accel; +#if NVMM_USER_VERSION >= 2 + struct nvmm_vcpu *vcpu = &qcpu->vcpu; + nvmm_vcpu_stop(vcpu); +#else + qcpu->stop = true; +#endif + } +} + +static void +nvmm_init_cpu_signals(void) +{ + struct sigaction sigact; + sigset_t set; + + /* Install the IPI handler. */ + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = nvmm_ipi_signal; + sigaction(SIG_IPI, &sigact, NULL); + + /* Allow IPIs on the current thread. */ + sigprocmask(SIG_BLOCK, NULL, &set); + sigdelset(&set, SIG_IPI); + pthread_sigmask(SIG_SETMASK, &set, NULL); +} + +int +nvmm_init_vcpu(CPUState *cpu) +{ + struct nvmm_machine *mach = get_nvmm_mach(); + struct nvmm_vcpu_conf_cpuid cpuid; + struct nvmm_vcpu_conf_tpr tpr; + Error *local_error = NULL; + AccelCPUState *qcpu; + int ret, err; + + nvmm_init_cpu_signals(); + + if (nvmm_migration_blocker == NULL) { + error_setg(&nvmm_migration_blocker, + "NVMM: Migration not supported"); + + if (migrate_add_blocker(&nvmm_migration_blocker, &local_error) < 0) { + error_report_err(local_error); + return -EINVAL; + } + } + + qcpu = g_new0(AccelCPUState, 1); + + ret = nvmm_vcpu_create(mach, cpu->cpu_index, &qcpu->vcpu); + if (ret == -1) { + err = errno; + error_report("NVMM: Failed to create a virtual processor," + " error=%d", err); + g_free(qcpu); + return -err; + } + + memset(&cpuid, 0, sizeof(cpuid)); + cpuid.mask = 1; + cpuid.leaf = 0x00000001; + cpuid.u.mask.set.edx = CPUID_MCE | CPUID_MCA | CPUID_MTRR; + ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CPUID, + &cpuid); + if (ret == -1) { + err = errno; + error_report("NVMM: Failed to configure a virtual processor," + " error=%d", err); + g_free(qcpu); + return -err; + } + + ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_CALLBACKS, + &nvmm_callbacks); + if (ret == -1) { + err = errno; + error_report("NVMM: Failed to configure a virtual processor," + " error=%d", err); + g_free(qcpu); + return -err; + } + + if (qemu_mach.cap.arch.vcpu_conf_support & NVMM_CAP_ARCH_VCPU_CONF_TPR) { + memset(&tpr, 0, sizeof(tpr)); + tpr.exit_changed = 1; + ret = nvmm_vcpu_configure(mach, &qcpu->vcpu, NVMM_VCPU_CONF_TPR, &tpr); + if (ret == -1) { + err = errno; + error_report("NVMM: Failed to configure a virtual processor," + " error=%d", err); + g_free(qcpu); + return -err; + } + } + + cpu->vcpu_dirty = true; + cpu->accel = qcpu; + + return 0; +} + +int +nvmm_vcpu_exec(CPUState *cpu) +{ + int ret, fatal; + + while (1) { + if (cpu->exception_index >= EXCP_INTERRUPT) { + ret = cpu->exception_index; + cpu->exception_index = -1; + break; + } + + fatal = nvmm_vcpu_loop(cpu); + + if (fatal) { + error_report("NVMM: Failed to execute a VCPU."); + abort(); + } + } + + return ret; +} + +void +nvmm_destroy_vcpu(CPUState *cpu) +{ + struct nvmm_machine *mach = get_nvmm_mach(); + AccelCPUState *qcpu = cpu->accel; + + nvmm_vcpu_destroy(mach, &qcpu->vcpu); + g_free(cpu->accel); +} + +/* -------------------------------------------------------------------------- */ + +static void +nvmm_update_mapping(hwaddr start_pa, ram_addr_t size, uintptr_t hva, + bool add, bool rom, const char *name) +{ + struct nvmm_machine *mach = get_nvmm_mach(); + int ret, prot; + + if (add) { + prot = PROT_READ | PROT_EXEC; + if (!rom) { + prot |= PROT_WRITE; + } + ret = nvmm_gpa_map(mach, hva, start_pa, size, prot); + } else { + ret = nvmm_gpa_unmap(mach, hva, start_pa, size); + } + + if (ret == -1) { + error_report("NVMM: Failed to %s GPA range '%s' PA:%p, " + "Size:%p bytes, HostVA:%p, error=%d", + (add ? "map" : "unmap"), name, (void *)(uintptr_t)start_pa, + (void *)size, (void *)hva, errno); + } +} + +static void +nvmm_process_section(MemoryRegionSection *section, int add) +{ + MemoryRegion *mr = section->mr; + hwaddr start_pa = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + unsigned int delta; + uintptr_t hva; + + if (!memory_region_is_ram(mr)) { + return; + } + + /* Adjust start_pa and size so that they are page-aligned. */ + delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask()); + delta &= ~qemu_real_host_page_mask(); + if (delta > size) { + return; + } + start_pa += delta; + size -= delta; + size &= qemu_real_host_page_mask(); + if (!size || (start_pa & ~qemu_real_host_page_mask())) { + return; + } + + hva = (uintptr_t)memory_region_get_ram_ptr(mr) + + section->offset_within_region + delta; + + nvmm_update_mapping(start_pa, size, hva, add, + memory_region_is_rom(mr), mr->name); +} + +static void +nvmm_region_add(MemoryListener *listener, MemoryRegionSection *section) +{ + memory_region_ref(section->mr); + nvmm_process_section(section, 1); +} + +static void +nvmm_region_del(MemoryListener *listener, MemoryRegionSection *section) +{ + nvmm_process_section(section, 0); + memory_region_unref(section->mr); +} + +static void +nvmm_transaction_begin(MemoryListener *listener) +{ + /* nothing */ +} + +static void +nvmm_transaction_commit(MemoryListener *listener) +{ + /* nothing */ +} + +static void +nvmm_log_sync(MemoryListener *listener, MemoryRegionSection *section) +{ + MemoryRegion *mr = section->mr; + + if (!memory_region_is_ram(mr)) { + return; + } + + memory_region_set_dirty(mr, 0, int128_get64(section->size)); +} + +static MemoryListener nvmm_memory_listener = { + .name = "nvmm", + .begin = nvmm_transaction_begin, + .commit = nvmm_transaction_commit, + .region_add = nvmm_region_add, + .region_del = nvmm_region_del, + .log_sync = nvmm_log_sync, + .priority = MEMORY_LISTENER_PRIORITY_ACCEL, +}; + +static void +nvmm_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) +{ + struct nvmm_machine *mach = get_nvmm_mach(); + uintptr_t hva = (uintptr_t)host; + int ret; + + ret = nvmm_hva_map(mach, hva, max_size); + + if (ret == -1) { + error_report("NVMM: Failed to map HVA, HostVA:%p " + "Size:%p bytes, error=%d", + (void *)hva, (void *)size, errno); + } +} + +static struct RAMBlockNotifier nvmm_ram_notifier = { + .ram_block_added = nvmm_ram_block_added +}; + +/* -------------------------------------------------------------------------- */ + +static int +nvmm_accel_init(MachineState *ms) +{ + int ret, err; + + ret = nvmm_init(); + if (ret == -1) { + err = errno; + error_report("NVMM: Initialization failed, error=%d", errno); + return -err; + } + + ret = nvmm_capability(&qemu_mach.cap); + if (ret == -1) { + err = errno; + error_report("NVMM: Unable to fetch capability, error=%d", errno); + return -err; + } + if (qemu_mach.cap.version < NVMM_KERN_VERSION) { + error_report("NVMM: Unsupported version %u", qemu_mach.cap.version); + return -EPROGMISMATCH; + } + if (qemu_mach.cap.state_size != sizeof(struct nvmm_x64_state)) { + error_report("NVMM: Wrong state size %u", qemu_mach.cap.state_size); + return -EPROGMISMATCH; + } + + ret = nvmm_machine_create(&qemu_mach.mach); + if (ret == -1) { + err = errno; + error_report("NVMM: Machine creation failed, error=%d", errno); + return -err; + } + + memory_listener_register(&nvmm_memory_listener, &address_space_memory); + ram_block_notifier_add(&nvmm_ram_notifier); + + printf("NetBSD Virtual Machine Monitor accelerator is operational\n"); + return 0; +} + +int +nvmm_enabled(void) +{ + return nvmm_allowed; +} + +static void +nvmm_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "NVMM"; + ac->init_machine = nvmm_accel_init; + ac->allowed = &nvmm_allowed; +} + +static const TypeInfo nvmm_accel_type = { + .name = ACCEL_CLASS_NAME("nvmm"), + .parent = TYPE_ACCEL, + .class_init = nvmm_accel_class_init, +}; + +static void +nvmm_type_init(void) +{ + type_register_static(&nvmm_accel_type); +} + +type_init(nvmm_type_init); diff --git a/target/i386/ops_sse.h b/target/i386/ops_sse.h index ed05989768..6a465a35fd 100644 --- a/target/i386/ops_sse.h +++ b/target/i386/ops_sse.h @@ -7,7 +7,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -19,6 +19,8 @@ */ #include "crypto/aes.h" +#include "crypto/aes-round.h" +#include "crypto/clmul.h" #if SHIFT == 0 #define Reg MMXReg @@ -35,262 +37,206 @@ #define W(n) ZMM_W(n) #define L(n) ZMM_L(n) #define Q(n) ZMM_Q(n) +#if SHIFT == 1 #define SUFFIX _xmm +#else +#define SUFFIX _ymm +#endif #endif -void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - int shift; +#define LANE_WIDTH (SHIFT ? 16 : 8) +#define PACK_WIDTH (LANE_WIDTH / 2) - if (s->Q(0) > 15) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; +#if SHIFT == 0 +#define FPSRL(x, c) ((x) >> shift) +#define FPSRAW(x, c) ((int16_t)(x) >> shift) +#define FPSRAL(x, c) ((int32_t)(x) >> shift) +#define FPSLL(x, c) ((x) << shift) #endif + +void glue(helper_psrlw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) +{ + int shift; + if (c->Q(0) > 15) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); - d->W(0) >>= shift; - d->W(1) >>= shift; - d->W(2) >>= shift; - d->W(3) >>= shift; -#if SHIFT == 1 - d->W(4) >>= shift; - d->W(5) >>= shift; - d->W(6) >>= shift; - d->W(7) >>= shift; -#endif + shift = c->B(0); + for (int i = 0; i < 4 << SHIFT; i++) { + d->W(i) = FPSRL(s->W(i), shift); + } } } -void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 15) { - shift = 15; + if (c->Q(0) > 15) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); + shift = c->B(0); + for (int i = 0; i < 4 << SHIFT; i++) { + d->W(i) = FPSLL(s->W(i), shift); + } } - d->W(0) = (int16_t)d->W(0) >> shift; - d->W(1) = (int16_t)d->W(1) >> shift; - d->W(2) = (int16_t)d->W(2) >> shift; - d->W(3) = (int16_t)d->W(3) >> shift; -#if SHIFT == 1 - d->W(4) = (int16_t)d->W(4) >> shift; - d->W(5) = (int16_t)d->W(5) >> shift; - d->W(6) = (int16_t)d->W(6) >> shift; - d->W(7) = (int16_t)d->W(7) >> shift; -#endif } -void glue(helper_psllw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psraw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 15) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; -#endif + if (c->Q(0) > 15) { + shift = 15; } else { - shift = s->B(0); - d->W(0) <<= shift; - d->W(1) <<= shift; - d->W(2) <<= shift; - d->W(3) <<= shift; -#if SHIFT == 1 - d->W(4) <<= shift; - d->W(5) <<= shift; - d->W(6) <<= shift; - d->W(7) <<= shift; -#endif + shift = c->B(0); + } + for (int i = 0; i < 4 << SHIFT; i++) { + d->W(i) = FPSRAW(s->W(i), shift); } } -void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psrld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 31) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; -#endif + if (c->Q(0) > 31) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); - d->L(0) >>= shift; - d->L(1) >>= shift; -#if SHIFT == 1 - d->L(2) >>= shift; - d->L(3) >>= shift; -#endif + shift = c->B(0); + for (int i = 0; i < 2 << SHIFT; i++) { + d->L(i) = FPSRL(s->L(i), shift); + } } } -void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 31) { - shift = 31; + if (c->Q(0) > 31) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); + shift = c->B(0); + for (int i = 0; i < 2 << SHIFT; i++) { + d->L(i) = FPSLL(s->L(i), shift); + } } - d->L(0) = (int32_t)d->L(0) >> shift; - d->L(1) = (int32_t)d->L(1) >> shift; -#if SHIFT == 1 - d->L(2) = (int32_t)d->L(2) >> shift; - d->L(3) = (int32_t)d->L(3) >> shift; -#endif } -void glue(helper_pslld, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psrad, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 31) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; -#endif + if (c->Q(0) > 31) { + shift = 31; } else { - shift = s->B(0); - d->L(0) <<= shift; - d->L(1) <<= shift; -#if SHIFT == 1 - d->L(2) <<= shift; - d->L(3) <<= shift; -#endif + shift = c->B(0); + } + for (int i = 0; i < 2 << SHIFT; i++) { + d->L(i) = FPSRAL(s->L(i), shift); } } -void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psrlq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 63) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; -#endif + if (c->Q(0) > 63) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); - d->Q(0) >>= shift; -#if SHIFT == 1 - d->Q(1) >>= shift; -#endif + shift = c->B(0); + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = FPSRL(s->Q(i), shift); + } } } -void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_psllq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { int shift; - - if (s->Q(0) > 63) { - d->Q(0) = 0; -#if SHIFT == 1 - d->Q(1) = 0; -#endif + if (c->Q(0) > 63) { + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } else { - shift = s->B(0); - d->Q(0) <<= shift; -#if SHIFT == 1 - d->Q(1) <<= shift; -#endif + shift = c->B(0); + for (int i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = FPSLL(s->Q(i), shift); + } } } -#if SHIFT == 1 -void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +#if SHIFT >= 1 +void glue(helper_psrldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - int shift, i; + int shift, i, j; - shift = s->L(0); + shift = c->L(0); if (shift > 16) { shift = 16; } - for (i = 0; i < 16 - shift; i++) { - d->B(i) = d->B(i + shift); - } - for (i = 16 - shift; i < 16; i++) { - d->B(i) = 0; + for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) { + for (i = 0; i < 16 - shift; i++) { + d->B(j + i) = s->B(j + i + shift); + } + for (i = 16 - shift; i < 16; i++) { + d->B(j + i) = 0; + } } } -void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pslldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, Reg *c) { - int shift, i; + int shift, i, j; - shift = s->L(0); + shift = c->L(0); if (shift > 16) { shift = 16; } - for (i = 15; i >= shift; i--) { - d->B(i) = d->B(i - shift); - } - for (i = 0; i < shift; i++) { - d->B(i) = 0; + for (j = 0; j < 8 << SHIFT; j += LANE_WIDTH) { + for (i = 15; i >= shift; i--) { + d->B(j + i) = s->B(j + i - shift); + } + for (i = 0; i < shift; i++) { + d->B(j + i) = 0; + } } } #endif -#define SSE_HELPER_B(name, F) \ +#define SSE_HELPER_1(name, elem, num, F) \ void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ { \ - d->B(0) = F(d->B(0), s->B(0)); \ - d->B(1) = F(d->B(1), s->B(1)); \ - d->B(2) = F(d->B(2), s->B(2)); \ - d->B(3) = F(d->B(3), s->B(3)); \ - d->B(4) = F(d->B(4), s->B(4)); \ - d->B(5) = F(d->B(5), s->B(5)); \ - d->B(6) = F(d->B(6), s->B(6)); \ - d->B(7) = F(d->B(7), s->B(7)); \ - XMM_ONLY( \ - d->B(8) = F(d->B(8), s->B(8)); \ - d->B(9) = F(d->B(9), s->B(9)); \ - d->B(10) = F(d->B(10), s->B(10)); \ - d->B(11) = F(d->B(11), s->B(11)); \ - d->B(12) = F(d->B(12), s->B(12)); \ - d->B(13) = F(d->B(13), s->B(13)); \ - d->B(14) = F(d->B(14), s->B(14)); \ - d->B(15) = F(d->B(15), s->B(15)); \ - ) \ - } + int n = num; \ + for (int i = 0; i < n; i++) { \ + d->elem(i) = F(s->elem(i)); \ + } \ + } -#define SSE_HELPER_W(name, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ +#define SSE_HELPER_2(name, elem, num, F) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ { \ - d->W(0) = F(d->W(0), s->W(0)); \ - d->W(1) = F(d->W(1), s->W(1)); \ - d->W(2) = F(d->W(2), s->W(2)); \ - d->W(3) = F(d->W(3), s->W(3)); \ - XMM_ONLY( \ - d->W(4) = F(d->W(4), s->W(4)); \ - d->W(5) = F(d->W(5), s->W(5)); \ - d->W(6) = F(d->W(6), s->W(6)); \ - d->W(7) = F(d->W(7), s->W(7)); \ - ) \ - } + int n = num; \ + for (int i = 0; i < n; i++) { \ + d->elem(i) = F(v->elem(i), s->elem(i)); \ + } \ + } + +#define SSE_HELPER_B(name, F) \ + SSE_HELPER_2(name, B, 8 << SHIFT, F) + +#define SSE_HELPER_W(name, F) \ + SSE_HELPER_2(name, W, 4 << SHIFT, F) #define SSE_HELPER_L(name, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ - { \ - d->L(0) = F(d->L(0), s->L(0)); \ - d->L(1) = F(d->L(1), s->L(1)); \ - XMM_ONLY( \ - d->L(2) = F(d->L(2), s->L(2)); \ - d->L(3) = F(d->L(3), s->L(3)); \ - ) \ - } + SSE_HELPER_2(name, L, 2 << SHIFT, F) #define SSE_HELPER_Q(name, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ - { \ - d->Q(0) = F(d->Q(0), s->Q(0)); \ - XMM_ONLY( \ - d->Q(1) = F(d->Q(1), s->Q(1)); \ - ) \ - } + SSE_HELPER_2(name, Q, 1 << SHIFT, F) #if SHIFT == 0 static inline int satub(int x) @@ -353,17 +299,6 @@ static inline int satsw(int x) #define FMAXUB(a, b) ((a) > (b)) ? (a) : (b) #define FMAXSW(a, b) ((int16_t)(a) > (int16_t)(b)) ? (a) : (b) -#define FAND(a, b) ((a) & (b)) -#define FANDN(a, b) ((~(a)) & (b)) -#define FOR(a, b) ((a) | (b)) -#define FXOR(a, b) ((a) ^ (b)) - -#define FCMPGTB(a, b) ((int8_t)(a) > (int8_t)(b) ? -1 : 0) -#define FCMPGTW(a, b) ((int16_t)(a) > (int16_t)(b) ? -1 : 0) -#define FCMPGTL(a, b) ((int32_t)(a) > (int32_t)(b) ? -1 : 0) -#define FCMPEQ(a, b) ((a) == (b) ? -1 : 0) - -#define FMULLW(a, b) ((a) * (b)) #define FMULHRW(a, b) (((int16_t)(a) * (int16_t)(b) + 0x8000) >> 16) #define FMULHUW(a, b) ((a) * (b) >> 16) #define FMULHW(a, b) ((int16_t)(a) * (int16_t)(b) >> 16) @@ -371,70 +306,38 @@ static inline int satsw(int x) #define FAVG(a, b) (((a) + (b) + 1) >> 1) #endif -SSE_HELPER_B(helper_paddb, FADD) -SSE_HELPER_W(helper_paddw, FADD) -SSE_HELPER_L(helper_paddl, FADD) -SSE_HELPER_Q(helper_paddq, FADD) - -SSE_HELPER_B(helper_psubb, FSUB) -SSE_HELPER_W(helper_psubw, FSUB) -SSE_HELPER_L(helper_psubl, FSUB) -SSE_HELPER_Q(helper_psubq, FSUB) - -SSE_HELPER_B(helper_paddusb, FADDUB) -SSE_HELPER_B(helper_paddsb, FADDSB) -SSE_HELPER_B(helper_psubusb, FSUBUB) -SSE_HELPER_B(helper_psubsb, FSUBSB) - -SSE_HELPER_W(helper_paddusw, FADDUW) -SSE_HELPER_W(helper_paddsw, FADDSW) -SSE_HELPER_W(helper_psubusw, FSUBUW) -SSE_HELPER_W(helper_psubsw, FSUBSW) - -SSE_HELPER_B(helper_pminub, FMINUB) -SSE_HELPER_B(helper_pmaxub, FMAXUB) - -SSE_HELPER_W(helper_pminsw, FMINSW) -SSE_HELPER_W(helper_pmaxsw, FMAXSW) - -SSE_HELPER_Q(helper_pand, FAND) -SSE_HELPER_Q(helper_pandn, FANDN) -SSE_HELPER_Q(helper_por, FOR) -SSE_HELPER_Q(helper_pxor, FXOR) - -SSE_HELPER_B(helper_pcmpgtb, FCMPGTB) -SSE_HELPER_W(helper_pcmpgtw, FCMPGTW) -SSE_HELPER_L(helper_pcmpgtl, FCMPGTL) - -SSE_HELPER_B(helper_pcmpeqb, FCMPEQ) -SSE_HELPER_W(helper_pcmpeqw, FCMPEQ) -SSE_HELPER_L(helper_pcmpeql, FCMPEQ) +SSE_HELPER_W(helper_pmulhuw, FMULHUW) +SSE_HELPER_W(helper_pmulhw, FMULHW) -SSE_HELPER_W(helper_pmullw, FMULLW) #if SHIFT == 0 -SSE_HELPER_W(helper_pmulhrw, FMULHRW) +void glue(helper_pmulhrw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + d->W(0) = FMULHRW(d->W(0), s->W(0)); + d->W(1) = FMULHRW(d->W(1), s->W(1)); + d->W(2) = FMULHRW(d->W(2), s->W(2)); + d->W(3) = FMULHRW(d->W(3), s->W(3)); +} #endif -SSE_HELPER_W(helper_pmulhuw, FMULHUW) -SSE_HELPER_W(helper_pmulhw, FMULHW) SSE_HELPER_B(helper_pavgb, FAVG) SSE_HELPER_W(helper_pavgw, FAVG) -void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmuludq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - d->Q(0) = (uint64_t)s->L(0) * (uint64_t)d->L(0); -#if SHIFT == 1 - d->Q(1) = (uint64_t)s->L(2) * (uint64_t)d->L(2); -#endif + int i; + + for (i = 0; i < (1 << SHIFT); i++) { + d->Q(i) = (uint64_t)s->L(i * 2) * (uint64_t)v->L(i * 2); + } } -void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pmaddwd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { int i; for (i = 0; i < (2 << SHIFT); i++) { - d->L(i) = (int16_t)s->W(2 * i) * (int16_t)d->W(2 * i) + - (int16_t)s->W(2 * i + 1) * (int16_t)d->W(2 * i + 1); + d->L(i) = (int16_t)s->W(2 * i) * (int16_t)v->W(2 * i) + + (int16_t)s->W(2 * i + 1) * (int16_t)v->W(2 * i + 1); } } @@ -448,34 +351,25 @@ static inline int abs1(int a) } } #endif -void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - unsigned int val; - - val = 0; - val += abs1(d->B(0) - s->B(0)); - val += abs1(d->B(1) - s->B(1)); - val += abs1(d->B(2) - s->B(2)); - val += abs1(d->B(3) - s->B(3)); - val += abs1(d->B(4) - s->B(4)); - val += abs1(d->B(5) - s->B(5)); - val += abs1(d->B(6) - s->B(6)); - val += abs1(d->B(7) - s->B(7)); - d->Q(0) = val; -#if SHIFT == 1 - val = 0; - val += abs1(d->B(8) - s->B(8)); - val += abs1(d->B(9) - s->B(9)); - val += abs1(d->B(10) - s->B(10)); - val += abs1(d->B(11) - s->B(11)); - val += abs1(d->B(12) - s->B(12)); - val += abs1(d->B(13) - s->B(13)); - val += abs1(d->B(14) - s->B(14)); - val += abs1(d->B(15) - s->B(15)); - d->Q(1) = val; -#endif +void glue(helper_psadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + + for (i = 0; i < (1 << SHIFT); i++) { + unsigned int val = 0; + val += abs1(v->B(8 * i + 0) - s->B(8 * i + 0)); + val += abs1(v->B(8 * i + 1) - s->B(8 * i + 1)); + val += abs1(v->B(8 * i + 2) - s->B(8 * i + 2)); + val += abs1(v->B(8 * i + 3) - s->B(8 * i + 3)); + val += abs1(v->B(8 * i + 4) - s->B(8 * i + 4)); + val += abs1(v->B(8 * i + 5) - s->B(8 * i + 5)); + val += abs1(v->B(8 * i + 6) - s->B(8 * i + 6)); + val += abs1(v->B(8 * i + 7) - s->B(8 * i + 7)); + d->Q(i) = val; + } } +#if SHIFT < 2 void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, target_ulong a0) { @@ -487,128 +381,140 @@ void glue(helper_maskmov, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, } } } - -void glue(helper_movl_mm_T0, SUFFIX)(Reg *d, uint32_t val) -{ - d->L(0) = val; - d->L(1) = 0; -#if SHIFT == 1 - d->Q(1) = 0; #endif -} -#ifdef TARGET_X86_64 -void glue(helper_movq_mm_T0, SUFFIX)(Reg *d, uint64_t val) -{ - d->Q(0) = val; -#if SHIFT == 1 - d->Q(1) = 0; -#endif -} -#endif +#define SHUFFLE4(F, a, b, offset) do { \ + r0 = a->F((order & 3) + offset); \ + r1 = a->F(((order >> 2) & 3) + offset); \ + r2 = b->F(((order >> 4) & 3) + offset); \ + r3 = b->F(((order >> 6) & 3) + offset); \ + d->F(offset) = r0; \ + d->F(offset + 1) = r1; \ + d->F(offset + 2) = r2; \ + d->F(offset + 3) = r3; \ + } while (0) #if SHIFT == 0 void glue(helper_pshufw, SUFFIX)(Reg *d, Reg *s, int order) { - Reg r; + uint16_t r0, r1, r2, r3; - r.W(0) = s->W(order & 3); - r.W(1) = s->W((order >> 2) & 3); - r.W(2) = s->W((order >> 4) & 3); - r.W(3) = s->W((order >> 6) & 3); - *d = r; + SHUFFLE4(W, s, s, 0); } #else -void helper_shufps(Reg *d, Reg *s, int order) +void glue(helper_shufps, SUFFIX)(Reg *d, Reg *v, Reg *s, int order) { - Reg r; + uint32_t r0, r1, r2, r3; + int i; - r.L(0) = d->L(order & 3); - r.L(1) = d->L((order >> 2) & 3); - r.L(2) = s->L((order >> 4) & 3); - r.L(3) = s->L((order >> 6) & 3); - *d = r; + for (i = 0; i < 2 << SHIFT; i += 4) { + SHUFFLE4(L, v, s, i); + } } -void helper_shufpd(Reg *d, Reg *s, int order) +void glue(helper_shufpd, SUFFIX)(Reg *d, Reg *v, Reg *s, int order) { - Reg r; + uint64_t r0, r1; + int i; - r.Q(0) = d->Q(order & 1); - r.Q(1) = s->Q((order >> 1) & 1); - *d = r; + for (i = 0; i < 1 << SHIFT; i += 2) { + r0 = v->Q(((order & 1) & 1) + i); + r1 = s->Q(((order >> 1) & 1) + i); + d->Q(i) = r0; + d->Q(i + 1) = r1; + order >>= 2; + } } void glue(helper_pshufd, SUFFIX)(Reg *d, Reg *s, int order) { - Reg r; + uint32_t r0, r1, r2, r3; + int i; - r.L(0) = s->L(order & 3); - r.L(1) = s->L((order >> 2) & 3); - r.L(2) = s->L((order >> 4) & 3); - r.L(3) = s->L((order >> 6) & 3); - *d = r; + for (i = 0; i < 2 << SHIFT; i += 4) { + SHUFFLE4(L, s, s, i); + } } void glue(helper_pshuflw, SUFFIX)(Reg *d, Reg *s, int order) { - Reg r; + uint16_t r0, r1, r2, r3; + int i, j; - r.W(0) = s->W(order & 3); - r.W(1) = s->W((order >> 2) & 3); - r.W(2) = s->W((order >> 4) & 3); - r.W(3) = s->W((order >> 6) & 3); - r.Q(1) = s->Q(1); - *d = r; + for (i = 0, j = 1; j < 1 << SHIFT; i += 8, j += 2) { + SHUFFLE4(W, s, s, i); + d->Q(j) = s->Q(j); + } } void glue(helper_pshufhw, SUFFIX)(Reg *d, Reg *s, int order) { - Reg r; + uint16_t r0, r1, r2, r3; + int i, j; - r.Q(0) = s->Q(0); - r.W(4) = s->W(4 + (order & 3)); - r.W(5) = s->W(4 + ((order >> 2) & 3)); - r.W(6) = s->W(4 + ((order >> 4) & 3)); - r.W(7) = s->W(4 + ((order >> 6) & 3)); - *d = r; + for (i = 4, j = 0; j < 1 << SHIFT; i += 8, j += 2) { + d->Q(j) = s->Q(j); + SHUFFLE4(W, s, s, i); + } } #endif -#if SHIFT == 1 +#if SHIFT >= 1 /* FPU ops */ /* XXX: not accurate */ -#define SSE_HELPER_S(name, F) \ - void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s) \ +#define SSE_HELPER_P(name, F) \ + void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *v, Reg *s) \ { \ - d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - d->ZMM_S(1) = F(32, d->ZMM_S(1), s->ZMM_S(1)); \ - d->ZMM_S(2) = F(32, d->ZMM_S(2), s->ZMM_S(2)); \ - d->ZMM_S(3) = F(32, d->ZMM_S(3), s->ZMM_S(3)); \ + int i; \ + for (i = 0; i < 2 << SHIFT; i++) { \ + d->ZMM_S(i) = F(32, v->ZMM_S(i), s->ZMM_S(i)); \ + } \ } \ \ - void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ + void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *v, Reg *s) \ { \ - d->ZMM_S(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - } \ + int i; \ + for (i = 0; i < 1 << SHIFT; i++) { \ + d->ZMM_D(i) = F(64, v->ZMM_D(i), s->ZMM_D(i)); \ + } \ + } + +#if SHIFT == 1 + +#define SSE_HELPER_S(name, F) \ + SSE_HELPER_P(name, F) \ \ - void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s) \ + void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ - d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ - d->ZMM_D(1) = F(64, d->ZMM_D(1), s->ZMM_D(1)); \ + int i; \ + d->ZMM_S(0) = F(32, v->ZMM_S(0), s->ZMM_S(0)); \ + for (i = 1; i < 2 << SHIFT; i++) { \ + d->ZMM_L(i) = v->ZMM_L(i); \ + } \ } \ \ - void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ + void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s)\ { \ - d->ZMM_D(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ + int i; \ + d->ZMM_D(0) = F(64, v->ZMM_D(0), s->ZMM_D(0)); \ + for (i = 1; i < 1 << SHIFT; i++) { \ + d->ZMM_Q(i) = v->ZMM_Q(i); \ + } \ } +#else + +#define SSE_HELPER_S(name, F) SSE_HELPER_P(name, F) + +#endif + #define FPU_ADD(size, a, b) float ## size ## _add(a, b, &env->sse_status) #define FPU_SUB(size, a, b) float ## size ## _sub(a, b, &env->sse_status) #define FPU_MUL(size, a, b) float ## size ## _mul(a, b, &env->sse_status) #define FPU_DIV(size, a, b) float ## size ## _div(a, b, &env->sse_status) -#define FPU_SQRT(size, a, b) float ## size ## _sqrt(b, &env->sse_status) /* Note that the choice of comparison op here is important to get the * special cases right: for min and max Intel specifies that (-0,0), @@ -625,56 +531,131 @@ SSE_HELPER_S(mul, FPU_MUL) SSE_HELPER_S(div, FPU_DIV) SSE_HELPER_S(min, FPU_MIN) SSE_HELPER_S(max, FPU_MAX) -SSE_HELPER_S(sqrt, FPU_SQRT) +void glue(helper_sqrtps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_S(i) = float32_sqrt(s->ZMM_S(i), &env->sse_status); + } +} + +void glue(helper_sqrtpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + int i; + for (i = 0; i < 1 << SHIFT; i++) { + d->ZMM_D(i) = float64_sqrt(s->ZMM_D(i), &env->sse_status); + } +} + +#if SHIFT == 1 +void helper_sqrtss(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + d->ZMM_S(0) = float32_sqrt(s->ZMM_S(0), &env->sse_status); + for (i = 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = v->ZMM_L(i); + } +} + +void helper_sqrtsd(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + d->ZMM_D(0) = float64_sqrt(s->ZMM_D(0), &env->sse_status); + for (i = 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) = v->ZMM_Q(i); + } +} +#endif /* float to float conversions */ -void helper_cvtps2pd(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_cvtps2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - float32 s0, s1; + int i; + for (i = 1 << SHIFT; --i >= 0; ) { + d->ZMM_D(i) = float32_to_float64(s->ZMM_S(i), &env->sse_status); + } +} + +void glue(helper_cvtpd2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + int i; + for (i = 0; i < 1 << SHIFT; i++) { + d->ZMM_S(i) = float64_to_float32(s->ZMM_D(i), &env->sse_status); + } + for (i >>= 1; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } +} - s0 = s->ZMM_S(0); - s1 = s->ZMM_S(1); - d->ZMM_D(0) = float32_to_float64(s0, &env->sse_status); - d->ZMM_D(1) = float32_to_float64(s1, &env->sse_status); +#if SHIFT >= 1 +void glue(helper_cvtph2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + int i; + + for (i = 2 << SHIFT; --i >= 0; ) { + d->ZMM_S(i) = float16_to_float32(s->ZMM_H(i), true, &env->sse_status); + } } -void helper_cvtpd2ps(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_cvtps2ph, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, int mode) { - d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status); - d->ZMM_S(1) = float64_to_float32(s->ZMM_D(1), &env->sse_status); - d->Q(1) = 0; + int i; + FloatRoundMode prev_rounding_mode = env->sse_status.float_rounding_mode; + if (!(mode & (1 << 2))) { + set_x86_rounding_mode(mode & 3, &env->sse_status); + } + + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_H(i) = float32_to_float16(s->ZMM_S(i), true, &env->sse_status); + } + for (i >>= 2; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } + + env->sse_status.float_rounding_mode = prev_rounding_mode; } +#endif -void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *s) +#if SHIFT == 1 +void helper_cvtss2sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_D(0) = float32_to_float64(s->ZMM_S(0), &env->sse_status); + for (i = 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) = v->ZMM_Q(i); + } } -void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *s) +void helper_cvtsd2ss(CPUX86State *env, Reg *d, Reg *v, Reg *s) { + int i; d->ZMM_S(0) = float64_to_float32(s->ZMM_D(0), &env->sse_status); + for (i = 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = v->ZMM_L(i); + } } +#endif /* integer to float */ -void helper_cvtdq2ps(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_cvtdq2ps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - d->ZMM_S(0) = int32_to_float32(s->ZMM_L(0), &env->sse_status); - d->ZMM_S(1) = int32_to_float32(s->ZMM_L(1), &env->sse_status); - d->ZMM_S(2) = int32_to_float32(s->ZMM_L(2), &env->sse_status); - d->ZMM_S(3) = int32_to_float32(s->ZMM_L(3), &env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_S(i) = int32_to_float32(s->ZMM_L(i), &env->sse_status); + } } -void helper_cvtdq2pd(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_cvtdq2pd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - int32_t l0, l1; - - l0 = (int32_t)s->ZMM_L(0); - l1 = (int32_t)s->ZMM_L(1); - d->ZMM_D(0) = int32_to_float64(l0, &env->sse_status); - d->ZMM_D(1) = int32_to_float64(l1, &env->sse_status); + int i; + for (i = 1 << SHIFT; --i >= 0; ) { + int32_t l = s->ZMM_L(i); + d->ZMM_D(i) = int32_to_float64(l, &env->sse_status); + } } +#if SHIFT == 1 void helper_cvtpi2ps(CPUX86State *env, ZMMReg *d, MMXReg *s) { d->ZMM_S(0) = int32_to_float32(s->MMX_L(0), &env->sse_status); @@ -709,142 +690,207 @@ void helper_cvtsq2sd(CPUX86State *env, ZMMReg *d, uint64_t val) } #endif +#endif + /* float to integer */ -void helper_cvtps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) + +#if SHIFT == 1 +/* + * x86 mandates that we return the indefinite integer value for the result + * of any float-to-integer conversion that raises the 'invalid' exception. + * Wrap the softfloat functions to get this behaviour. + */ +#define WRAP_FLOATCONV(RETTYPE, FN, FLOATTYPE, INDEFVALUE) \ + static inline RETTYPE x86_##FN(FLOATTYPE a, float_status *s) \ + { \ + int oldflags, newflags; \ + RETTYPE r; \ + \ + oldflags = get_float_exception_flags(s); \ + set_float_exception_flags(0, s); \ + r = FN(a, s); \ + newflags = get_float_exception_flags(s); \ + if (newflags & float_flag_invalid) { \ + r = INDEFVALUE; \ + } \ + set_float_exception_flags(newflags | oldflags, s); \ + return r; \ + } + +WRAP_FLOATCONV(int32_t, float32_to_int32, float32, INT32_MIN) +WRAP_FLOATCONV(int32_t, float32_to_int32_round_to_zero, float32, INT32_MIN) +WRAP_FLOATCONV(int32_t, float64_to_int32, float64, INT32_MIN) +WRAP_FLOATCONV(int32_t, float64_to_int32_round_to_zero, float64, INT32_MIN) +WRAP_FLOATCONV(int64_t, float32_to_int64, float32, INT64_MIN) +WRAP_FLOATCONV(int64_t, float32_to_int64_round_to_zero, float32, INT64_MIN) +WRAP_FLOATCONV(int64_t, float64_to_int64, float64, INT64_MIN) +WRAP_FLOATCONV(int64_t, float64_to_int64_round_to_zero, float64, INT64_MIN) +#endif + +void glue(helper_cvtps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_L(0) = float32_to_int32(s->ZMM_S(0), &env->sse_status); - d->ZMM_L(1) = float32_to_int32(s->ZMM_S(1), &env->sse_status); - d->ZMM_L(2) = float32_to_int32(s->ZMM_S(2), &env->sse_status); - d->ZMM_L(3) = float32_to_int32(s->ZMM_S(3), &env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = x86_float32_to_int32(s->ZMM_S(i), &env->sse_status); + } } -void helper_cvtpd2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_cvtpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_L(0) = float64_to_int32(s->ZMM_D(0), &env->sse_status); - d->ZMM_L(1) = float64_to_int32(s->ZMM_D(1), &env->sse_status); - d->ZMM_Q(1) = 0; + int i; + for (i = 0; i < 1 << SHIFT; i++) { + d->ZMM_L(i) = x86_float64_to_int32(s->ZMM_D(i), &env->sse_status); + } + for (i >>= 1; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } +#if SHIFT == 1 void helper_cvtps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float32_to_int32(s->ZMM_S(0), &env->sse_status); - d->MMX_L(1) = float32_to_int32(s->ZMM_S(1), &env->sse_status); + d->MMX_L(0) = x86_float32_to_int32(s->ZMM_S(0), &env->sse_status); + d->MMX_L(1) = x86_float32_to_int32(s->ZMM_S(1), &env->sse_status); } void helper_cvtpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float64_to_int32(s->ZMM_D(0), &env->sse_status); - d->MMX_L(1) = float64_to_int32(s->ZMM_D(1), &env->sse_status); + d->MMX_L(0) = x86_float64_to_int32(s->ZMM_D(0), &env->sse_status); + d->MMX_L(1) = x86_float64_to_int32(s->ZMM_D(1), &env->sse_status); } int32_t helper_cvtss2si(CPUX86State *env, ZMMReg *s) { - return float32_to_int32(s->ZMM_S(0), &env->sse_status); + return x86_float32_to_int32(s->ZMM_S(0), &env->sse_status); } int32_t helper_cvtsd2si(CPUX86State *env, ZMMReg *s) { - return float64_to_int32(s->ZMM_D(0), &env->sse_status); + return x86_float64_to_int32(s->ZMM_D(0), &env->sse_status); } #ifdef TARGET_X86_64 int64_t helper_cvtss2sq(CPUX86State *env, ZMMReg *s) { - return float32_to_int64(s->ZMM_S(0), &env->sse_status); + return x86_float32_to_int64(s->ZMM_S(0), &env->sse_status); } int64_t helper_cvtsd2sq(CPUX86State *env, ZMMReg *s) { - return float64_to_int64(s->ZMM_D(0), &env->sse_status); + return x86_float64_to_int64(s->ZMM_D(0), &env->sse_status); } #endif +#endif /* float to integer truncated */ -void helper_cvttps2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_cvttps2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_L(0) = float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); - d->ZMM_L(1) = float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status); - d->ZMM_L(2) = float32_to_int32_round_to_zero(s->ZMM_S(2), &env->sse_status); - d->ZMM_L(3) = float32_to_int32_round_to_zero(s->ZMM_S(3), &env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = x86_float32_to_int32_round_to_zero(s->ZMM_S(i), + &env->sse_status); + } } -void helper_cvttpd2dq(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_cvttpd2dq, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_L(0) = float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); - d->ZMM_L(1) = float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status); - d->ZMM_Q(1) = 0; + int i; + for (i = 0; i < 1 << SHIFT; i++) { + d->ZMM_L(i) = x86_float64_to_int32_round_to_zero(s->ZMM_D(i), + &env->sse_status); + } + for (i >>= 1; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + } } +#if SHIFT == 1 void helper_cvttps2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); - d->MMX_L(1) = float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status); + d->MMX_L(0) = x86_float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); + d->MMX_L(1) = x86_float32_to_int32_round_to_zero(s->ZMM_S(1), &env->sse_status); } void helper_cvttpd2pi(CPUX86State *env, MMXReg *d, ZMMReg *s) { - d->MMX_L(0) = float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); - d->MMX_L(1) = float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status); + d->MMX_L(0) = x86_float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); + d->MMX_L(1) = x86_float64_to_int32_round_to_zero(s->ZMM_D(1), &env->sse_status); } int32_t helper_cvttss2si(CPUX86State *env, ZMMReg *s) { - return float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); + return x86_float32_to_int32_round_to_zero(s->ZMM_S(0), &env->sse_status); } int32_t helper_cvttsd2si(CPUX86State *env, ZMMReg *s) { - return float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); + return x86_float64_to_int32_round_to_zero(s->ZMM_D(0), &env->sse_status); } #ifdef TARGET_X86_64 int64_t helper_cvttss2sq(CPUX86State *env, ZMMReg *s) { - return float32_to_int64_round_to_zero(s->ZMM_S(0), &env->sse_status); + return x86_float32_to_int64_round_to_zero(s->ZMM_S(0), &env->sse_status); } int64_t helper_cvttsd2sq(CPUX86State *env, ZMMReg *s) { - return float64_to_int64_round_to_zero(s->ZMM_D(0), &env->sse_status); + return x86_float64_to_int64_round_to_zero(s->ZMM_D(0), &env->sse_status); } #endif +#endif -void helper_rsqrtps(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_rsqrtps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_S(0) = float32_div(float32_one, - float32_sqrt(s->ZMM_S(0), &env->sse_status), - &env->sse_status); - d->ZMM_S(1) = float32_div(float32_one, - float32_sqrt(s->ZMM_S(1), &env->sse_status), - &env->sse_status); - d->ZMM_S(2) = float32_div(float32_one, - float32_sqrt(s->ZMM_S(2), &env->sse_status), - &env->sse_status); - d->ZMM_S(3) = float32_div(float32_one, - float32_sqrt(s->ZMM_S(3), &env->sse_status), - &env->sse_status); + uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_S(i) = float32_div(float32_one, + float32_sqrt(s->ZMM_S(i), &env->sse_status), + &env->sse_status); + } + set_float_exception_flags(old_flags, &env->sse_status); } -void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *s) +#if SHIFT == 1 +void helper_rsqrtss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int i; d->ZMM_S(0) = float32_div(float32_one, float32_sqrt(s->ZMM_S(0), &env->sse_status), &env->sse_status); + set_float_exception_flags(old_flags, &env->sse_status); + for (i = 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = v->ZMM_L(i); + } } +#endif -void helper_rcpps(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_rcpps, SUFFIX)(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); - d->ZMM_S(1) = float32_div(float32_one, s->ZMM_S(1), &env->sse_status); - d->ZMM_S(2) = float32_div(float32_one, s->ZMM_S(2), &env->sse_status); - d->ZMM_S(3) = float32_div(float32_one, s->ZMM_S(3), &env->sse_status); + uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_S(i) = float32_div(float32_one, s->ZMM_S(i), &env->sse_status); + } + set_float_exception_flags(old_flags, &env->sse_status); } -void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *s) +#if SHIFT == 1 +void helper_rcpss(CPUX86State *env, ZMMReg *d, ZMMReg *v, ZMMReg *s) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); + int i; d->ZMM_S(0) = float32_div(float32_one, s->ZMM_S(0), &env->sse_status); + for (i = 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = v->ZMM_L(i); + } + set_float_exception_flags(old_flags, &env->sse_status); } +#endif +#if SHIFT == 1 static inline uint64_t helper_extrq(uint64_t src, int shift, int len) { uint64_t mask; @@ -859,7 +905,7 @@ static inline uint64_t helper_extrq(uint64_t src, int shift, int len) void helper_extrq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), s->ZMM_B(1), s->ZMM_B(0)); + d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), s->ZMM_B(1) & 63, s->ZMM_B(0) & 63); } void helper_extrq_i(CPUX86State *env, ZMMReg *d, int index, int length) @@ -867,7 +913,7 @@ void helper_extrq_i(CPUX86State *env, ZMMReg *d, int index, int length) d->ZMM_Q(0) = helper_extrq(d->ZMM_Q(0), index, length); } -static inline uint64_t helper_insertq(uint64_t src, int shift, int len) +static inline uint64_t helper_insertq(uint64_t dest, uint64_t src, int shift, int len) { uint64_t mask; @@ -876,130 +922,189 @@ static inline uint64_t helper_insertq(uint64_t src, int shift, int len) } else { mask = (1ULL << len) - 1; } - return (src & ~(mask << shift)) | ((src & mask) << shift); + return (dest & ~(mask << shift)) | ((src & mask) << shift); } void helper_insertq_r(CPUX86State *env, ZMMReg *d, ZMMReg *s) { - d->ZMM_Q(0) = helper_insertq(s->ZMM_Q(0), s->ZMM_B(9), s->ZMM_B(8)); -} - -void helper_insertq_i(CPUX86State *env, ZMMReg *d, int index, int length) -{ - d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), index, length); -} - -void helper_haddps(CPUX86State *env, ZMMReg *d, ZMMReg *s) -{ - ZMMReg r; - - r.ZMM_S(0) = float32_add(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status); - r.ZMM_S(1) = float32_add(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); - r.ZMM_S(2) = float32_add(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); - r.ZMM_S(3) = float32_add(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; -} - -void helper_haddpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) -{ - ZMMReg r; - - r.ZMM_D(0) = float64_add(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); - r.ZMM_D(1) = float64_add(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), s->ZMM_Q(0), s->ZMM_B(9) & 63, s->ZMM_B(8) & 63); } -void helper_hsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void helper_insertq_i(CPUX86State *env, ZMMReg *d, ZMMReg *s, int index, int length) { - ZMMReg r; - - r.ZMM_S(0) = float32_sub(d->ZMM_S(0), d->ZMM_S(1), &env->sse_status); - r.ZMM_S(1) = float32_sub(d->ZMM_S(2), d->ZMM_S(3), &env->sse_status); - r.ZMM_S(2) = float32_sub(s->ZMM_S(0), s->ZMM_S(1), &env->sse_status); - r.ZMM_S(3) = float32_sub(s->ZMM_S(2), s->ZMM_S(3), &env->sse_status); - *d = r; -} - -void helper_hsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) -{ - ZMMReg r; - - r.ZMM_D(0) = float64_sub(d->ZMM_D(0), d->ZMM_D(1), &env->sse_status); - r.ZMM_D(1) = float64_sub(s->ZMM_D(0), s->ZMM_D(1), &env->sse_status); - *d = r; + d->ZMM_Q(0) = helper_insertq(d->ZMM_Q(0), s->ZMM_Q(0), index, length); } +#endif -void helper_addsubps(CPUX86State *env, ZMMReg *d, ZMMReg *s) +#define SSE_HELPER_HPS(name, F) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ +{ \ + float32 r[2 << SHIFT]; \ + int i, j, k; \ + for (k = 0; k < 2 << SHIFT; k += LANE_WIDTH / 4) { \ + for (i = j = 0; j < 4; i++, j += 2) { \ + r[i + k] = F(v->ZMM_S(j + k), v->ZMM_S(j + k + 1), &env->sse_status); \ + } \ + for (j = 0; j < 4; i++, j += 2) { \ + r[i + k] = F(s->ZMM_S(j + k), s->ZMM_S(j + k + 1), &env->sse_status); \ + } \ + } \ + for (i = 0; i < 2 << SHIFT; i++) { \ + d->ZMM_S(i) = r[i]; \ + } \ +} + +SSE_HELPER_HPS(haddps, float32_add) +SSE_HELPER_HPS(hsubps, float32_sub) + +#define SSE_HELPER_HPD(name, F) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ +{ \ + float64 r[1 << SHIFT]; \ + int i, j, k; \ + for (k = 0; k < 1 << SHIFT; k += LANE_WIDTH / 8) { \ + for (i = j = 0; j < 2; i++, j += 2) { \ + r[i + k] = F(v->ZMM_D(j + k), v->ZMM_D(j + k + 1), &env->sse_status); \ + } \ + for (j = 0; j < 2; i++, j += 2) { \ + r[i + k] = F(s->ZMM_D(j + k), s->ZMM_D(j + k + 1), &env->sse_status); \ + } \ + } \ + for (i = 0; i < 1 << SHIFT; i++) { \ + d->ZMM_D(i) = r[i]; \ + } \ +} + +SSE_HELPER_HPD(haddpd, float64_add) +SSE_HELPER_HPD(hsubpd, float64_sub) + +void glue(helper_addsubps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - d->ZMM_S(0) = float32_sub(d->ZMM_S(0), s->ZMM_S(0), &env->sse_status); - d->ZMM_S(1) = float32_add(d->ZMM_S(1), s->ZMM_S(1), &env->sse_status); - d->ZMM_S(2) = float32_sub(d->ZMM_S(2), s->ZMM_S(2), &env->sse_status); - d->ZMM_S(3) = float32_add(d->ZMM_S(3), s->ZMM_S(3), &env->sse_status); + int i; + for (i = 0; i < 2 << SHIFT; i += 2) { + d->ZMM_S(i) = float32_sub(v->ZMM_S(i), s->ZMM_S(i), &env->sse_status); + d->ZMM_S(i+1) = float32_add(v->ZMM_S(i+1), s->ZMM_S(i+1), &env->sse_status); + } } -void helper_addsubpd(CPUX86State *env, ZMMReg *d, ZMMReg *s) +void glue(helper_addsubpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - d->ZMM_D(0) = float64_sub(d->ZMM_D(0), s->ZMM_D(0), &env->sse_status); - d->ZMM_D(1) = float64_add(d->ZMM_D(1), s->ZMM_D(1), &env->sse_status); + int i; + for (i = 0; i < 1 << SHIFT; i += 2) { + d->ZMM_D(i) = float64_sub(v->ZMM_D(i), s->ZMM_D(i), &env->sse_status); + d->ZMM_D(i+1) = float64_add(v->ZMM_D(i+1), s->ZMM_D(i+1), &env->sse_status); + } } -/* XXX: unordered */ -#define SSE_HELPER_CMP(name, F) \ - void helper_ ## name ## ps(CPUX86State *env, Reg *d, Reg *s) \ - { \ - d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - d->ZMM_L(1) = F(32, d->ZMM_S(1), s->ZMM_S(1)); \ - d->ZMM_L(2) = F(32, d->ZMM_S(2), s->ZMM_S(2)); \ - d->ZMM_L(3) = F(32, d->ZMM_S(3), s->ZMM_S(3)); \ - } \ - \ - void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *s) \ +#define SSE_HELPER_CMP_P(name, F, C) \ + void glue(helper_ ## name ## ps, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *v, Reg *s) \ { \ - d->ZMM_L(0) = F(32, d->ZMM_S(0), s->ZMM_S(0)); \ - } \ - \ - void helper_ ## name ## pd(CPUX86State *env, Reg *d, Reg *s) \ - { \ - d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ - d->ZMM_Q(1) = F(64, d->ZMM_D(1), s->ZMM_D(1)); \ + int i; \ + for (i = 0; i < 2 << SHIFT; i++) { \ + d->ZMM_L(i) = C(F(32, v->ZMM_S(i), s->ZMM_S(i))) ? -1 : 0; \ + } \ } \ \ - void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *s) \ + void glue(helper_ ## name ## pd, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *v, Reg *s) \ { \ - d->ZMM_Q(0) = F(64, d->ZMM_D(0), s->ZMM_D(0)); \ - } - -#define FPU_CMPEQ(size, a, b) \ - (float ## size ## _eq_quiet(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPLT(size, a, b) \ - (float ## size ## _lt(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPLE(size, a, b) \ - (float ## size ## _le(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPUNORD(size, a, b) \ - (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? -1 : 0) -#define FPU_CMPNEQ(size, a, b) \ - (float ## size ## _eq_quiet(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPNLT(size, a, b) \ - (float ## size ## _lt(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPNLE(size, a, b) \ - (float ## size ## _le(a, b, &env->sse_status) ? 0 : -1) -#define FPU_CMPORD(size, a, b) \ - (float ## size ## _unordered_quiet(a, b, &env->sse_status) ? 0 : -1) - -SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) -SSE_HELPER_CMP(cmplt, FPU_CMPLT) -SSE_HELPER_CMP(cmple, FPU_CMPLE) -SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) -SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) -SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) -SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) -SSE_HELPER_CMP(cmpord, FPU_CMPORD) + int i; \ + for (i = 0; i < 1 << SHIFT; i++) { \ + d->ZMM_Q(i) = C(F(64, v->ZMM_D(i), s->ZMM_D(i))) ? -1 : 0; \ + } \ + } + +#if SHIFT == 1 +#define SSE_HELPER_CMP(name, F, C) \ + SSE_HELPER_CMP_P(name, F, C) \ + void helper_ ## name ## ss(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ + { \ + int i; \ + d->ZMM_L(0) = C(F(32, v->ZMM_S(0), s->ZMM_S(0))) ? -1 : 0; \ + for (i = 1; i < 2 << SHIFT; i++) { \ + d->ZMM_L(i) = v->ZMM_L(i); \ + } \ + } \ + \ + void helper_ ## name ## sd(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ + { \ + int i; \ + d->ZMM_Q(0) = C(F(64, v->ZMM_D(0), s->ZMM_D(0))) ? -1 : 0; \ + for (i = 1; i < 1 << SHIFT; i++) { \ + d->ZMM_Q(i) = v->ZMM_Q(i); \ + } \ + } + +static inline bool FPU_EQU(FloatRelation x) +{ + return (x == float_relation_equal || x == float_relation_unordered); +} +static inline bool FPU_GE(FloatRelation x) +{ + return (x == float_relation_equal || x == float_relation_greater); +} +#define FPU_EQ(x) (x == float_relation_equal) +#define FPU_LT(x) (x == float_relation_less) +#define FPU_LE(x) (x <= float_relation_equal) +#define FPU_GT(x) (x == float_relation_greater) +#define FPU_UNORD(x) (x == float_relation_unordered) +/* We must make sure we evaluate the argument in case it is a signalling NAN */ +#define FPU_FALSE(x) (x == float_relation_equal && 0) + +#define FPU_CMPQ(size, a, b) \ + float ## size ## _compare_quiet(a, b, &env->sse_status) +#define FPU_CMPS(size, a, b) \ + float ## size ## _compare(a, b, &env->sse_status) +#else +#define SSE_HELPER_CMP(name, F, C) SSE_HELPER_CMP_P(name, F, C) +#endif + +SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ) +SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT) +SSE_HELPER_CMP(cmple, FPU_CMPS, FPU_LE) +SSE_HELPER_CMP(cmpunord, FPU_CMPQ, FPU_UNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPQ, !FPU_EQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) +SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) +SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequ, FPU_CMPQ, FPU_EQU) +SSE_HELPER_CMP(cmpnge, FPU_CMPS, !FPU_GE) +SSE_HELPER_CMP(cmpngt, FPU_CMPS, !FPU_GT) +SSE_HELPER_CMP(cmpfalse, FPU_CMPQ, FPU_FALSE) +SSE_HELPER_CMP(cmpnequ, FPU_CMPQ, !FPU_EQU) +SSE_HELPER_CMP(cmpge, FPU_CMPS, FPU_GE) +SSE_HELPER_CMP(cmpgt, FPU_CMPS, FPU_GT) +SSE_HELPER_CMP(cmptrue, FPU_CMPQ, !FPU_FALSE) + +SSE_HELPER_CMP(cmpeqs, FPU_CMPS, FPU_EQ) +SSE_HELPER_CMP(cmpltq, FPU_CMPQ, FPU_LT) +SSE_HELPER_CMP(cmpleq, FPU_CMPQ, FPU_LE) +SSE_HELPER_CMP(cmpunords, FPU_CMPS, FPU_UNORD) +SSE_HELPER_CMP(cmpneqq, FPU_CMPS, !FPU_EQ) +SSE_HELPER_CMP(cmpnltq, FPU_CMPQ, !FPU_LT) +SSE_HELPER_CMP(cmpnleq, FPU_CMPQ, !FPU_LE) +SSE_HELPER_CMP(cmpords, FPU_CMPS, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequs, FPU_CMPS, FPU_EQU) +SSE_HELPER_CMP(cmpngeq, FPU_CMPQ, !FPU_GE) +SSE_HELPER_CMP(cmpngtq, FPU_CMPQ, !FPU_GT) +SSE_HELPER_CMP(cmpfalses, FPU_CMPS, FPU_FALSE) +SSE_HELPER_CMP(cmpnequs, FPU_CMPS, !FPU_EQU) +SSE_HELPER_CMP(cmpgeq, FPU_CMPQ, FPU_GE) +SSE_HELPER_CMP(cmpgtq, FPU_CMPQ, FPU_GT) +SSE_HELPER_CMP(cmptrues, FPU_CMPS, !FPU_FALSE) + +#undef SSE_HELPER_CMP + +#if SHIFT == 1 static const int comis_eflags[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1010,7 +1115,7 @@ void helper_ucomiss(CPUX86State *env, Reg *d, Reg *s) void helper_comiss(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float32 s0, s1; s0 = d->ZMM_S(0); @@ -1021,7 +1126,7 @@ void helper_comiss(CPUX86State *env, Reg *d, Reg *s) void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float64 d0, d1; d0 = d->ZMM_D(0); @@ -1032,7 +1137,7 @@ void helper_ucomisd(CPUX86State *env, Reg *d, Reg *s) void helper_comisd(CPUX86State *env, Reg *d, Reg *s) { - int ret; + FloatRelation ret; float64 d0, d1; d0 = d->ZMM_D(0); @@ -1040,205 +1145,154 @@ void helper_comisd(CPUX86State *env, Reg *d, Reg *s) ret = float64_compare(d0, d1, &env->sse_status); CC_SRC = comis_eflags[ret + 1]; } - -uint32_t helper_movmskps(CPUX86State *env, Reg *s) -{ - int b0, b1, b2, b3; - - b0 = s->ZMM_L(0) >> 31; - b1 = s->ZMM_L(1) >> 31; - b2 = s->ZMM_L(2) >> 31; - b3 = s->ZMM_L(3) >> 31; - return b0 | (b1 << 1) | (b2 << 2) | (b3 << 3); -} - -uint32_t helper_movmskpd(CPUX86State *env, Reg *s) -{ - int b0, b1; - - b0 = s->ZMM_L(1) >> 31; - b1 = s->ZMM_L(3) >> 31; - return b0 | (b1 << 1); -} - #endif -uint32_t glue(helper_pmovmskb, SUFFIX)(CPUX86State *env, Reg *s) +uint32_t glue(helper_movmskps, SUFFIX)(CPUX86State *env, Reg *s) { - uint32_t val; + uint32_t mask; + int i; - val = 0; - val |= (s->B(0) >> 7); - val |= (s->B(1) >> 6) & 0x02; - val |= (s->B(2) >> 5) & 0x04; - val |= (s->B(3) >> 4) & 0x08; - val |= (s->B(4) >> 3) & 0x10; - val |= (s->B(5) >> 2) & 0x20; - val |= (s->B(6) >> 1) & 0x40; - val |= (s->B(7)) & 0x80; -#if SHIFT == 1 - val |= (s->B(8) << 1) & 0x0100; - val |= (s->B(9) << 2) & 0x0200; - val |= (s->B(10) << 3) & 0x0400; - val |= (s->B(11) << 4) & 0x0800; - val |= (s->B(12) << 5) & 0x1000; - val |= (s->B(13) << 6) & 0x2000; - val |= (s->B(14) << 7) & 0x4000; - val |= (s->B(15) << 8) & 0x8000; -#endif - return val; + mask = 0; + for (i = 0; i < 2 << SHIFT; i++) { + mask |= (s->ZMM_L(i) >> (31 - i)) & (1 << i); + } + return mask; } -void glue(helper_packsswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +uint32_t glue(helper_movmskpd, SUFFIX)(CPUX86State *env, Reg *s) { - Reg r; + uint32_t mask; + int i; - r.B(0) = satsb((int16_t)d->W(0)); - r.B(1) = satsb((int16_t)d->W(1)); - r.B(2) = satsb((int16_t)d->W(2)); - r.B(3) = satsb((int16_t)d->W(3)); -#if SHIFT == 1 - r.B(4) = satsb((int16_t)d->W(4)); - r.B(5) = satsb((int16_t)d->W(5)); - r.B(6) = satsb((int16_t)d->W(6)); - r.B(7) = satsb((int16_t)d->W(7)); -#endif - r.B((4 << SHIFT) + 0) = satsb((int16_t)s->W(0)); - r.B((4 << SHIFT) + 1) = satsb((int16_t)s->W(1)); - r.B((4 << SHIFT) + 2) = satsb((int16_t)s->W(2)); - r.B((4 << SHIFT) + 3) = satsb((int16_t)s->W(3)); -#if SHIFT == 1 - r.B(12) = satsb((int16_t)s->W(4)); - r.B(13) = satsb((int16_t)s->W(5)); - r.B(14) = satsb((int16_t)s->W(6)); - r.B(15) = satsb((int16_t)s->W(7)); -#endif - *d = r; + mask = 0; + for (i = 0; i < 1 << SHIFT; i++) { + mask |= (s->ZMM_Q(i) >> (63 - i)) & (1 << i); + } + return mask; } -void glue(helper_packuswb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - Reg r; - - r.B(0) = satub((int16_t)d->W(0)); - r.B(1) = satub((int16_t)d->W(1)); - r.B(2) = satub((int16_t)d->W(2)); - r.B(3) = satub((int16_t)d->W(3)); -#if SHIFT == 1 - r.B(4) = satub((int16_t)d->W(4)); - r.B(5) = satub((int16_t)d->W(5)); - r.B(6) = satub((int16_t)d->W(6)); - r.B(7) = satub((int16_t)d->W(7)); -#endif - r.B((4 << SHIFT) + 0) = satub((int16_t)s->W(0)); - r.B((4 << SHIFT) + 1) = satub((int16_t)s->W(1)); - r.B((4 << SHIFT) + 2) = satub((int16_t)s->W(2)); - r.B((4 << SHIFT) + 3) = satub((int16_t)s->W(3)); -#if SHIFT == 1 - r.B(12) = satub((int16_t)s->W(4)); - r.B(13) = satub((int16_t)s->W(5)); - r.B(14) = satub((int16_t)s->W(6)); - r.B(15) = satub((int16_t)s->W(7)); #endif - *d = r; -} - -void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - Reg r; - r.W(0) = satsw(d->L(0)); - r.W(1) = satsw(d->L(1)); -#if SHIFT == 1 - r.W(2) = satsw(d->L(2)); - r.W(3) = satsw(d->L(3)); -#endif - r.W((2 << SHIFT) + 0) = satsw(s->L(0)); - r.W((2 << SHIFT) + 1) = satsw(s->L(1)); -#if SHIFT == 1 - r.W(6) = satsw(s->L(2)); - r.W(7) = satsw(s->L(3)); -#endif - *d = r; +#define PACK_HELPER_B(name, F) \ +void glue(helper_pack ## name, SUFFIX)(CPUX86State *env, \ + Reg *d, Reg *v, Reg *s) \ +{ \ + uint8_t r[PACK_WIDTH * 2]; \ + int j, k; \ + for (j = 0; j < 4 << SHIFT; j += PACK_WIDTH) { \ + for (k = 0; k < PACK_WIDTH; k++) { \ + r[k] = F((int16_t)v->W(j + k)); \ + } \ + for (k = 0; k < PACK_WIDTH; k++) { \ + r[PACK_WIDTH + k] = F((int16_t)s->W(j + k)); \ + } \ + for (k = 0; k < PACK_WIDTH * 2; k++) { \ + d->B(2 * j + k) = r[k]; \ + } \ + } \ +} + +PACK_HELPER_B(sswb, satsb) +PACK_HELPER_B(uswb, satub) + +void glue(helper_packssdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + uint16_t r[PACK_WIDTH]; + int j, k; + + for (j = 0; j < 2 << SHIFT; j += PACK_WIDTH / 2) { + for (k = 0; k < PACK_WIDTH / 2; k++) { + r[k] = satsw(v->L(j + k)); + } + for (k = 0; k < PACK_WIDTH / 2; k++) { + r[PACK_WIDTH / 2 + k] = satsw(s->L(j + k)); + } + for (k = 0; k < PACK_WIDTH; k++) { + d->W(2 * j + k) = r[k]; + } + } } #define UNPCK_OP(base_name, base) \ \ void glue(helper_punpck ## base_name ## bw, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg r; \ + uint8_t r[PACK_WIDTH * 2]; \ + int j, i; \ \ - r.B(0) = d->B((base << (SHIFT + 2)) + 0); \ - r.B(1) = s->B((base << (SHIFT + 2)) + 0); \ - r.B(2) = d->B((base << (SHIFT + 2)) + 1); \ - r.B(3) = s->B((base << (SHIFT + 2)) + 1); \ - r.B(4) = d->B((base << (SHIFT + 2)) + 2); \ - r.B(5) = s->B((base << (SHIFT + 2)) + 2); \ - r.B(6) = d->B((base << (SHIFT + 2)) + 3); \ - r.B(7) = s->B((base << (SHIFT + 2)) + 3); \ - XMM_ONLY( \ - r.B(8) = d->B((base << (SHIFT + 2)) + 4); \ - r.B(9) = s->B((base << (SHIFT + 2)) + 4); \ - r.B(10) = d->B((base << (SHIFT + 2)) + 5); \ - r.B(11) = s->B((base << (SHIFT + 2)) + 5); \ - r.B(12) = d->B((base << (SHIFT + 2)) + 6); \ - r.B(13) = s->B((base << (SHIFT + 2)) + 6); \ - r.B(14) = d->B((base << (SHIFT + 2)) + 7); \ - r.B(15) = s->B((base << (SHIFT + 2)) + 7); \ - ) \ - *d = r; \ + for (j = 0; j < 8 << SHIFT; ) { \ + int k = j + base * PACK_WIDTH; \ + for (i = 0; i < PACK_WIDTH; i++) { \ + r[2 * i] = v->B(k + i); \ + r[2 * i + 1] = s->B(k + i); \ + } \ + for (i = 0; i < PACK_WIDTH * 2; i++, j++) { \ + d->B(j) = r[i]; \ + } \ + } \ } \ \ void glue(helper_punpck ## base_name ## wd, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg r; \ + uint16_t r[PACK_WIDTH]; \ + int j, i; \ \ - r.W(0) = d->W((base << (SHIFT + 1)) + 0); \ - r.W(1) = s->W((base << (SHIFT + 1)) + 0); \ - r.W(2) = d->W((base << (SHIFT + 1)) + 1); \ - r.W(3) = s->W((base << (SHIFT + 1)) + 1); \ - XMM_ONLY( \ - r.W(4) = d->W((base << (SHIFT + 1)) + 2); \ - r.W(5) = s->W((base << (SHIFT + 1)) + 2); \ - r.W(6) = d->W((base << (SHIFT + 1)) + 3); \ - r.W(7) = s->W((base << (SHIFT + 1)) + 3); \ - ) \ - *d = r; \ + for (j = 0; j < 4 << SHIFT; ) { \ + int k = j + base * PACK_WIDTH / 2; \ + for (i = 0; i < PACK_WIDTH / 2; i++) { \ + r[2 * i] = v->W(k + i); \ + r[2 * i + 1] = s->W(k + i); \ + } \ + for (i = 0; i < PACK_WIDTH; i++, j++) { \ + d->W(j) = r[i]; \ + } \ + } \ } \ \ void glue(helper_punpck ## base_name ## dq, SUFFIX)(CPUX86State *env,\ - Reg *d, Reg *s) \ + Reg *d, Reg *v, Reg *s) \ { \ - Reg r; \ + uint32_t r[PACK_WIDTH / 2]; \ + int j, i; \ \ - r.L(0) = d->L((base << SHIFT) + 0); \ - r.L(1) = s->L((base << SHIFT) + 0); \ - XMM_ONLY( \ - r.L(2) = d->L((base << SHIFT) + 1); \ - r.L(3) = s->L((base << SHIFT) + 1); \ - ) \ - *d = r; \ + for (j = 0; j < 2 << SHIFT; ) { \ + int k = j + base * PACK_WIDTH / 4; \ + for (i = 0; i < PACK_WIDTH / 4; i++) { \ + r[2 * i] = v->L(k + i); \ + r[2 * i + 1] = s->L(k + i); \ + } \ + for (i = 0; i < PACK_WIDTH / 2; i++, j++) { \ + d->L(j) = r[i]; \ + } \ + } \ } \ \ XMM_ONLY( \ - void glue(helper_punpck ## base_name ## qdq, SUFFIX)(CPUX86State \ - *env, \ - Reg *d, \ - Reg *s) \ + void glue(helper_punpck ## base_name ## qdq, SUFFIX)( \ + CPUX86State *env, Reg *d, Reg *v, Reg *s) \ { \ - Reg r; \ + uint64_t r[2]; \ + int i; \ \ - r.Q(0) = d->Q(base); \ - r.Q(1) = s->Q(base); \ - *d = r; \ + for (i = 0; i < 1 << SHIFT; i += 2) { \ + r[0] = v->Q(base + i); \ + r[1] = s->Q(base + i); \ + d->Q(i) = r[0]; \ + d->Q(i + 1) = r[1]; \ + } \ } \ ) UNPCK_OP(l, 0) UNPCK_OP(h, 1) +#undef PACK_WIDTH +#undef PACK_HELPER_B +#undef UNPCK_OP + + /* 3DNow! float ops */ #if SHIFT == 0 void helper_pi2fd(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1269,11 +1323,11 @@ void helper_pf2iw(CPUX86State *env, MMXReg *d, MMXReg *s) void helper_pfacc(CPUX86State *env, MMXReg *d, MMXReg *s) { - MMXReg r; + float32 r; - r.MMX_S(0) = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); - r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + r = float32_add(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + d->MMX_S(0) = r; } void helper_pfadd(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1334,20 +1388,20 @@ void helper_pfmul(CPUX86State *env, MMXReg *d, MMXReg *s) void helper_pfnacc(CPUX86State *env, MMXReg *d, MMXReg *s) { - MMXReg r; + float32 r; - r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); - r.MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + r = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + d->MMX_S(1) = float32_sub(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + d->MMX_S(0) = r; } void helper_pfpnacc(CPUX86State *env, MMXReg *d, MMXReg *s) { - MMXReg r; + float32 r; - r.MMX_S(0) = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); - r.MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); - *d = r; + r = float32_sub(d->MMX_S(0), d->MMX_S(1), &env->mmx_status); + d->MMX_S(1) = float32_add(s->MMX_S(0), s->MMX_S(1), &env->mmx_status); + d->MMX_S(0) = r; } void helper_pfrcp(CPUX86State *env, MMXReg *d, MMXReg *s) @@ -1380,120 +1434,95 @@ void helper_pfsubr(CPUX86State *env, MMXReg *d, MMXReg *s) void helper_pswapd(CPUX86State *env, MMXReg *d, MMXReg *s) { - MMXReg r; + uint32_t r; - r.MMX_L(0) = s->MMX_L(1); - r.MMX_L(1) = s->MMX_L(0); - *d = r; + r = s->MMX_L(0); + d->MMX_L(0) = s->MMX_L(1); + d->MMX_L(1) = r; } #endif /* SSSE3 op helpers */ -void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_pshufb, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { int i; - Reg r; +#if SHIFT == 0 + uint8_t r[8]; - for (i = 0; i < (8 << SHIFT); i++) { - r.B(i) = (s->B(i) & 0x80) ? 0 : (d->B(s->B(i) & ((8 << SHIFT) - 1))); + for (i = 0; i < 8; i++) { + r[i] = (s->B(i) & 0x80) ? 0 : (v->B(s->B(i) & 7)); } + for (i = 0; i < 8; i++) { + d->B(i) = r[i]; + } +#else + uint8_t r[8 << SHIFT]; - *d = r; -} - -void glue(helper_phaddw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->W(0) = (int16_t)d->W(0) + (int16_t)d->W(1); - d->W(1) = (int16_t)d->W(2) + (int16_t)d->W(3); - XMM_ONLY(d->W(2) = (int16_t)d->W(4) + (int16_t)d->W(5)); - XMM_ONLY(d->W(3) = (int16_t)d->W(6) + (int16_t)d->W(7)); - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) + (int16_t)s->W(1); - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) + (int16_t)s->W(3); - XMM_ONLY(d->W(6) = (int16_t)s->W(4) + (int16_t)s->W(5)); - XMM_ONLY(d->W(7) = (int16_t)s->W(6) + (int16_t)s->W(7)); -} - -void glue(helper_phaddd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->L(0) = (int32_t)d->L(0) + (int32_t)d->L(1); - XMM_ONLY(d->L(1) = (int32_t)d->L(2) + (int32_t)d->L(3)); - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) + (int32_t)s->L(1); - XMM_ONLY(d->L(3) = (int32_t)s->L(2) + (int32_t)s->L(3)); -} - -void glue(helper_phaddsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->W(0) = satsw((int16_t)d->W(0) + (int16_t)d->W(1)); - d->W(1) = satsw((int16_t)d->W(2) + (int16_t)d->W(3)); - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) + (int16_t)d->W(5))); - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) + (int16_t)d->W(7))); - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) + (int16_t)s->W(1)); - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) + (int16_t)s->W(3)); - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) + (int16_t)s->W(5))); - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) + (int16_t)s->W(7))); -} - -void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->W(0) = satsw((int8_t)s->B(0) * (uint8_t)d->B(0) + - (int8_t)s->B(1) * (uint8_t)d->B(1)); - d->W(1) = satsw((int8_t)s->B(2) * (uint8_t)d->B(2) + - (int8_t)s->B(3) * (uint8_t)d->B(3)); - d->W(2) = satsw((int8_t)s->B(4) * (uint8_t)d->B(4) + - (int8_t)s->B(5) * (uint8_t)d->B(5)); - d->W(3) = satsw((int8_t)s->B(6) * (uint8_t)d->B(6) + - (int8_t)s->B(7) * (uint8_t)d->B(7)); -#if SHIFT == 1 - d->W(4) = satsw((int8_t)s->B(8) * (uint8_t)d->B(8) + - (int8_t)s->B(9) * (uint8_t)d->B(9)); - d->W(5) = satsw((int8_t)s->B(10) * (uint8_t)d->B(10) + - (int8_t)s->B(11) * (uint8_t)d->B(11)); - d->W(6) = satsw((int8_t)s->B(12) * (uint8_t)d->B(12) + - (int8_t)s->B(13) * (uint8_t)d->B(13)); - d->W(7) = satsw((int8_t)s->B(14) * (uint8_t)d->B(14) + - (int8_t)s->B(15) * (uint8_t)d->B(15)); + for (i = 0; i < 8 << SHIFT; i++) { + int j = i & ~0xf; + r[i] = (s->B(i) & 0x80) ? 0 : v->B(j | (s->B(i) & 0xf)); + } + for (i = 0; i < 8 << SHIFT; i++) { + d->B(i) = r[i]; + } #endif } -void glue(helper_phsubw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->W(0) = (int16_t)d->W(0) - (int16_t)d->W(1); - d->W(1) = (int16_t)d->W(2) - (int16_t)d->W(3); - XMM_ONLY(d->W(2) = (int16_t)d->W(4) - (int16_t)d->W(5)); - XMM_ONLY(d->W(3) = (int16_t)d->W(6) - (int16_t)d->W(7)); - d->W((2 << SHIFT) + 0) = (int16_t)s->W(0) - (int16_t)s->W(1); - d->W((2 << SHIFT) + 1) = (int16_t)s->W(2) - (int16_t)s->W(3); - XMM_ONLY(d->W(6) = (int16_t)s->W(4) - (int16_t)s->W(5)); - XMM_ONLY(d->W(7) = (int16_t)s->W(6) - (int16_t)s->W(7)); -} - -void glue(helper_phsubd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +#define SSE_HELPER_HW(name, F) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ +{ \ + uint16_t r[4 << SHIFT]; \ + int i, j, k; \ + for (k = 0; k < 4 << SHIFT; k += LANE_WIDTH / 2) { \ + for (i = j = 0; j < LANE_WIDTH / 2; i++, j += 2) { \ + r[i + k] = F(v->W(j + k), v->W(j + k + 1)); \ + } \ + for (j = 0; j < LANE_WIDTH / 2; i++, j += 2) { \ + r[i + k] = F(s->W(j + k), s->W(j + k + 1)); \ + } \ + } \ + for (i = 0; i < 4 << SHIFT; i++) { \ + d->W(i) = r[i]; \ + } \ +} + +#define SSE_HELPER_HL(name, F) \ +void glue(helper_ ## name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) \ +{ \ + uint32_t r[2 << SHIFT]; \ + int i, j, k; \ + for (k = 0; k < 2 << SHIFT; k += LANE_WIDTH / 4) { \ + for (i = j = 0; j < LANE_WIDTH / 4; i++, j += 2) { \ + r[i + k] = F(v->L(j + k), v->L(j + k + 1)); \ + } \ + for (j = 0; j < LANE_WIDTH / 4; i++, j += 2) { \ + r[i + k] = F(s->L(j + k), s->L(j + k + 1)); \ + } \ + } \ + for (i = 0; i < 2 << SHIFT; i++) { \ + d->L(i) = r[i]; \ + } \ +} + +SSE_HELPER_HW(phaddw, FADD) +SSE_HELPER_HW(phsubw, FSUB) +SSE_HELPER_HW(phaddsw, FADDSW) +SSE_HELPER_HW(phsubsw, FSUBSW) +SSE_HELPER_HL(phaddd, FADD) +SSE_HELPER_HL(phsubd, FSUB) + +#undef SSE_HELPER_HW +#undef SSE_HELPER_HL + +void glue(helper_pmaddubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - d->L(0) = (int32_t)d->L(0) - (int32_t)d->L(1); - XMM_ONLY(d->L(1) = (int32_t)d->L(2) - (int32_t)d->L(3)); - d->L((1 << SHIFT) + 0) = (int32_t)s->L(0) - (int32_t)s->L(1); - XMM_ONLY(d->L(3) = (int32_t)s->L(2) - (int32_t)s->L(3)); -} - -void glue(helper_phsubsw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->W(0) = satsw((int16_t)d->W(0) - (int16_t)d->W(1)); - d->W(1) = satsw((int16_t)d->W(2) - (int16_t)d->W(3)); - XMM_ONLY(d->W(2) = satsw((int16_t)d->W(4) - (int16_t)d->W(5))); - XMM_ONLY(d->W(3) = satsw((int16_t)d->W(6) - (int16_t)d->W(7))); - d->W((2 << SHIFT) + 0) = satsw((int16_t)s->W(0) - (int16_t)s->W(1)); - d->W((2 << SHIFT) + 1) = satsw((int16_t)s->W(2) - (int16_t)s->W(3)); - XMM_ONLY(d->W(6) = satsw((int16_t)s->W(4) - (int16_t)s->W(5))); - XMM_ONLY(d->W(7) = satsw((int16_t)s->W(6) - (int16_t)s->W(7))); + int i; + for (i = 0; i < 4 << SHIFT; i++) { + d->W(i) = satsw((int8_t)s->B(i * 2) * (uint8_t)v->B(i * 2) + + (int8_t)s->B(i * 2 + 1) * (uint8_t)v->B(i * 2 + 1)); + } } -#define FABSB(_, x) (x > INT8_MAX ? -(int8_t)x : x) -#define FABSW(_, x) (x > INT16_MAX ? -(int16_t)x : x) -#define FABSL(_, x) (x > INT32_MAX ? -(int32_t)x : x) -SSE_HELPER_B(helper_pabsb, FABSB) -SSE_HELPER_W(helper_pabsw, FABSW) -SSE_HELPER_L(helper_pabsd, FABSL) - #define FMULHRSW(d, s) (((int16_t) d * (int16_t)s + 0x4000) >> 15) SSE_HELPER_W(helper_pmulhrsw, FMULHRSW) @@ -1504,186 +1533,146 @@ SSE_HELPER_B(helper_psignb, FSIGNB) SSE_HELPER_W(helper_psignw, FSIGNW) SSE_HELPER_L(helper_psignd, FSIGNL) -void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, - int32_t shift) +void glue(helper_palignr, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, + uint32_t imm) { - Reg r; + int i; /* XXX could be checked during translation */ - if (shift >= (16 << SHIFT)) { - r.Q(0) = 0; - XMM_ONLY(r.Q(1) = 0); + if (imm >= (SHIFT ? 32 : 16)) { + for (i = 0; i < (1 << SHIFT); i++) { + d->Q(i) = 0; + } } else { - shift <<= 3; + int shift = imm * 8; #define SHR(v, i) (i < 64 && i > -64 ? i > 0 ? v >> (i) : (v << -(i)) : 0) #if SHIFT == 0 - r.Q(0) = SHR(s->Q(0), shift - 0) | - SHR(d->Q(0), shift - 64); + d->Q(0) = SHR(s->Q(0), shift - 0) | + SHR(v->Q(0), shift - 64); #else - r.Q(0) = SHR(s->Q(0), shift - 0) | - SHR(s->Q(1), shift - 64) | - SHR(d->Q(0), shift - 128) | - SHR(d->Q(1), shift - 192); - r.Q(1) = SHR(s->Q(0), shift + 64) | - SHR(s->Q(1), shift - 0) | - SHR(d->Q(0), shift - 64) | - SHR(d->Q(1), shift - 128); + for (i = 0; i < (1 << SHIFT); i += 2) { + uint64_t r0, r1; + + r0 = SHR(s->Q(i), shift - 0) | + SHR(s->Q(i + 1), shift - 64) | + SHR(v->Q(i), shift - 128) | + SHR(v->Q(i + 1), shift - 192); + r1 = SHR(s->Q(i), shift + 64) | + SHR(s->Q(i + 1), shift - 0) | + SHR(v->Q(i), shift - 64) | + SHR(v->Q(i + 1), shift - 128); + d->Q(i) = r0; + d->Q(i + 1) = r1; + } #endif #undef SHR } - - *d = r; } -#define XMM0 (env->xmm_regs[0]) +#if SHIFT >= 1 -#if SHIFT == 1 #define SSE_HELPER_V(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, \ + Reg *m) \ { \ - d->elem(0) = F(d->elem(0), s->elem(0), XMM0.elem(0)); \ - d->elem(1) = F(d->elem(1), s->elem(1), XMM0.elem(1)); \ - if (num > 2) { \ - d->elem(2) = F(d->elem(2), s->elem(2), XMM0.elem(2)); \ - d->elem(3) = F(d->elem(3), s->elem(3), XMM0.elem(3)); \ - if (num > 4) { \ - d->elem(4) = F(d->elem(4), s->elem(4), XMM0.elem(4)); \ - d->elem(5) = F(d->elem(5), s->elem(5), XMM0.elem(5)); \ - d->elem(6) = F(d->elem(6), s->elem(6), XMM0.elem(6)); \ - d->elem(7) = F(d->elem(7), s->elem(7), XMM0.elem(7)); \ - if (num > 8) { \ - d->elem(8) = F(d->elem(8), s->elem(8), XMM0.elem(8)); \ - d->elem(9) = F(d->elem(9), s->elem(9), XMM0.elem(9)); \ - d->elem(10) = F(d->elem(10), s->elem(10), XMM0.elem(10)); \ - d->elem(11) = F(d->elem(11), s->elem(11), XMM0.elem(11)); \ - d->elem(12) = F(d->elem(12), s->elem(12), XMM0.elem(12)); \ - d->elem(13) = F(d->elem(13), s->elem(13), XMM0.elem(13)); \ - d->elem(14) = F(d->elem(14), s->elem(14), XMM0.elem(14)); \ - d->elem(15) = F(d->elem(15), s->elem(15), XMM0.elem(15)); \ - } \ - } \ + int i; \ + for (i = 0; i < num; i++) { \ + d->elem(i) = F(v->elem(i), s->elem(i), m->elem(i)); \ } \ } #define SSE_HELPER_I(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t imm) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, \ + uint32_t imm) \ { \ - d->elem(0) = F(d->elem(0), s->elem(0), ((imm >> 0) & 1)); \ - d->elem(1) = F(d->elem(1), s->elem(1), ((imm >> 1) & 1)); \ - if (num > 2) { \ - d->elem(2) = F(d->elem(2), s->elem(2), ((imm >> 2) & 1)); \ - d->elem(3) = F(d->elem(3), s->elem(3), ((imm >> 3) & 1)); \ - if (num > 4) { \ - d->elem(4) = F(d->elem(4), s->elem(4), ((imm >> 4) & 1)); \ - d->elem(5) = F(d->elem(5), s->elem(5), ((imm >> 5) & 1)); \ - d->elem(6) = F(d->elem(6), s->elem(6), ((imm >> 6) & 1)); \ - d->elem(7) = F(d->elem(7), s->elem(7), ((imm >> 7) & 1)); \ - if (num > 8) { \ - d->elem(8) = F(d->elem(8), s->elem(8), ((imm >> 8) & 1)); \ - d->elem(9) = F(d->elem(9), s->elem(9), ((imm >> 9) & 1)); \ - d->elem(10) = F(d->elem(10), s->elem(10), \ - ((imm >> 10) & 1)); \ - d->elem(11) = F(d->elem(11), s->elem(11), \ - ((imm >> 11) & 1)); \ - d->elem(12) = F(d->elem(12), s->elem(12), \ - ((imm >> 12) & 1)); \ - d->elem(13) = F(d->elem(13), s->elem(13), \ - ((imm >> 13) & 1)); \ - d->elem(14) = F(d->elem(14), s->elem(14), \ - ((imm >> 14) & 1)); \ - d->elem(15) = F(d->elem(15), s->elem(15), \ - ((imm >> 15) & 1)); \ - } \ - } \ + int i; \ + for (i = 0; i < num; i++) { \ + int j = i & 7; \ + d->elem(i) = F(v->elem(i), s->elem(i), (imm >> j) & 1); \ } \ } /* SSE4.1 op helpers */ -#define FBLENDVB(d, s, m) ((m & 0x80) ? s : d) -#define FBLENDVPS(d, s, m) ((m & 0x80000000) ? s : d) -#define FBLENDVPD(d, s, m) ((m & 0x8000000000000000LL) ? s : d) -SSE_HELPER_V(helper_pblendvb, B, 16, FBLENDVB) -SSE_HELPER_V(helper_blendvps, L, 4, FBLENDVPS) -SSE_HELPER_V(helper_blendvpd, Q, 2, FBLENDVPD) +#define FBLENDVB(v, s, m) ((m & 0x80) ? s : v) +#define FBLENDVPS(v, s, m) ((m & 0x80000000) ? s : v) +#define FBLENDVPD(v, s, m) ((m & 0x8000000000000000LL) ? s : v) +SSE_HELPER_V(helper_pblendvb, B, 8 << SHIFT, FBLENDVB) +SSE_HELPER_V(helper_blendvps, L, 2 << SHIFT, FBLENDVPS) +SSE_HELPER_V(helper_blendvpd, Q, 1 << SHIFT, FBLENDVPD) void glue(helper_ptest, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - uint64_t zf = (s->Q(0) & d->Q(0)) | (s->Q(1) & d->Q(1)); - uint64_t cf = (s->Q(0) & ~d->Q(0)) | (s->Q(1) & ~d->Q(1)); + uint64_t zf = 0, cf = 0; + int i; + for (i = 0; i < 1 << SHIFT; i++) { + zf |= (s->Q(i) & d->Q(i)); + cf |= (s->Q(i) & ~d->Q(i)); + } CC_SRC = (zf ? 0 : CC_Z) | (cf ? 0 : CC_C); } -#define SSE_HELPER_F(name, elem, num, F) \ - void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ - { \ - if (num > 2) { \ - if (num > 4) { \ - d->elem(7) = F(7); \ - d->elem(6) = F(6); \ - d->elem(5) = F(5); \ - d->elem(4) = F(4); \ - } \ - d->elem(3) = F(3); \ - d->elem(2) = F(2); \ - } \ - d->elem(1) = F(1); \ - d->elem(0) = F(0); \ - } - -SSE_HELPER_F(helper_pmovsxbw, W, 8, (int8_t) s->B) -SSE_HELPER_F(helper_pmovsxbd, L, 4, (int8_t) s->B) -SSE_HELPER_F(helper_pmovsxbq, Q, 2, (int8_t) s->B) -SSE_HELPER_F(helper_pmovsxwd, L, 4, (int16_t) s->W) -SSE_HELPER_F(helper_pmovsxwq, Q, 2, (int16_t) s->W) -SSE_HELPER_F(helper_pmovsxdq, Q, 2, (int32_t) s->L) -SSE_HELPER_F(helper_pmovzxbw, W, 8, s->B) -SSE_HELPER_F(helper_pmovzxbd, L, 4, s->B) -SSE_HELPER_F(helper_pmovzxbq, Q, 2, s->B) -SSE_HELPER_F(helper_pmovzxwd, L, 4, s->W) -SSE_HELPER_F(helper_pmovzxwq, Q, 2, s->W) -SSE_HELPER_F(helper_pmovzxdq, Q, 2, s->L) - -void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - d->Q(0) = (int64_t)(int32_t) d->L(0) * (int32_t) s->L(0); - d->Q(1) = (int64_t)(int32_t) d->L(2) * (int32_t) s->L(2); -} - -#define FCMPEQQ(d, s) (d == s ? -1 : 0) -SSE_HELPER_Q(helper_pcmpeqq, FCMPEQQ) - -void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) -{ - Reg r; - - r.W(0) = satuw((int32_t) d->L(0)); - r.W(1) = satuw((int32_t) d->L(1)); - r.W(2) = satuw((int32_t) d->L(2)); - r.W(3) = satuw((int32_t) d->L(3)); - r.W(4) = satuw((int32_t) s->L(0)); - r.W(5) = satuw((int32_t) s->L(1)); - r.W(6) = satuw((int32_t) s->L(2)); - r.W(7) = satuw((int32_t) s->L(3)); - *d = r; -} - -#define FMINSB(d, s) MIN((int8_t)d, (int8_t)s) -#define FMINSD(d, s) MIN((int32_t)d, (int32_t)s) -#define FMAXSB(d, s) MAX((int8_t)d, (int8_t)s) -#define FMAXSD(d, s) MAX((int32_t)d, (int32_t)s) -SSE_HELPER_B(helper_pminsb, FMINSB) -SSE_HELPER_L(helper_pminsd, FMINSD) -SSE_HELPER_W(helper_pminuw, MIN) -SSE_HELPER_L(helper_pminud, MIN) -SSE_HELPER_B(helper_pmaxsb, FMAXSB) -SSE_HELPER_L(helper_pmaxsd, FMAXSD) -SSE_HELPER_W(helper_pmaxuw, MAX) -SSE_HELPER_L(helper_pmaxud, MAX) - -#define FMULLD(d, s) ((int32_t)d * (int32_t)s) -SSE_HELPER_L(helper_pmulld, FMULLD) +#define FMOVSLDUP(i) s->L((i) & ~1) +#define FMOVSHDUP(i) s->L((i) | 1) +#define FMOVDLDUP(i) s->Q((i) & ~1) + +#define SSE_HELPER_F(name, elem, num, F) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) \ + { \ + int n = num; \ + for (int i = n; --i >= 0; ) { \ + d->elem(i) = F(i); \ + } \ + } + +#if SHIFT > 0 +SSE_HELPER_F(helper_pmovsxbw, W, 4 << SHIFT, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbd, L, 2 << SHIFT, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxbq, Q, 1 << SHIFT, (int8_t) s->B) +SSE_HELPER_F(helper_pmovsxwd, L, 2 << SHIFT, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxwq, Q, 1 << SHIFT, (int16_t) s->W) +SSE_HELPER_F(helper_pmovsxdq, Q, 1 << SHIFT, (int32_t) s->L) +SSE_HELPER_F(helper_pmovzxbw, W, 4 << SHIFT, s->B) +SSE_HELPER_F(helper_pmovzxbd, L, 2 << SHIFT, s->B) +SSE_HELPER_F(helper_pmovzxbq, Q, 1 << SHIFT, s->B) +SSE_HELPER_F(helper_pmovzxwd, L, 2 << SHIFT, s->W) +SSE_HELPER_F(helper_pmovzxwq, Q, 1 << SHIFT, s->W) +SSE_HELPER_F(helper_pmovzxdq, Q, 1 << SHIFT, s->L) +SSE_HELPER_F(helper_pmovsldup, L, 2 << SHIFT, FMOVSLDUP) +SSE_HELPER_F(helper_pmovshdup, L, 2 << SHIFT, FMOVSHDUP) +SSE_HELPER_F(helper_pmovdldup, Q, 1 << SHIFT, FMOVDLDUP) +#endif + +void glue(helper_pmuldq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + + for (i = 0; i < 1 << SHIFT; i++) { + d->Q(i) = (int64_t)(int32_t) v->L(2 * i) * (int32_t) s->L(2 * i); + } +} + +void glue(helper_packusdw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + uint16_t r[8]; + int i, j, k; + + for (i = 0, j = 0; i <= 2 << SHIFT; i += 8, j += 4) { + r[0] = satuw(v->L(j)); + r[1] = satuw(v->L(j + 1)); + r[2] = satuw(v->L(j + 2)); + r[3] = satuw(v->L(j + 3)); + r[4] = satuw(s->L(j)); + r[5] = satuw(s->L(j + 1)); + r[6] = satuw(s->L(j + 2)); + r[7] = satuw(s->L(j + 3)); + for (k = 0; k < 8; k++) { + d->W(i + k) = r[k]; + } + } +} +#if SHIFT == 1 void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { int idx = 0; @@ -1715,254 +1704,222 @@ void glue(helper_phminposuw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) d->L(1) = 0; d->Q(1) = 0; } +#endif void glue(helper_roundps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } - d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); - d->ZMM_S(1) = float32_round_to_int(s->ZMM_S(1), &env->sse_status); - d->ZMM_S(2) = float32_round_to_int(s->ZMM_S(2), &env->sse_status); - d->ZMM_S(3) = float32_round_to_int(s->ZMM_S(3), &env->sse_status); + for (i = 0; i < 2 << SHIFT; i++) { + d->ZMM_S(i) = float32_round_to_int(s->ZMM_S(i), &env->sse_status); + } -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } void glue(helper_roundpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } - d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); - d->ZMM_D(1) = float64_round_to_int(s->ZMM_D(1), &env->sse_status); + for (i = 0; i < 1 << SHIFT; i++) { + d->ZMM_D(i) = float64_round_to_int(s->ZMM_D(i), &env->sse_status); + } -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } -void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +#if SHIFT == 1 +void glue(helper_roundss, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } d->ZMM_S(0) = float32_round_to_int(s->ZMM_S(0), &env->sse_status); + for (i = 1; i < 2 << SHIFT; i++) { + d->ZMM_L(i) = v->ZMM_L(i); + } -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } -void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_roundsd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t mode) { + uint8_t old_flags = get_float_exception_flags(&env->sse_status); signed char prev_rounding_mode; + int i; prev_rounding_mode = env->sse_status.float_rounding_mode; if (!(mode & (1 << 2))) { - switch (mode & 3) { - case 0: - set_float_rounding_mode(float_round_nearest_even, &env->sse_status); - break; - case 1: - set_float_rounding_mode(float_round_down, &env->sse_status); - break; - case 2: - set_float_rounding_mode(float_round_up, &env->sse_status); - break; - case 3: - set_float_rounding_mode(float_round_to_zero, &env->sse_status); - break; - } + set_x86_rounding_mode(mode & 3, &env->sse_status); } d->ZMM_D(0) = float64_round_to_int(s->ZMM_D(0), &env->sse_status); + for (i = 1; i < 1 << SHIFT; i++) { + d->ZMM_Q(i) = v->ZMM_Q(i); + } -#if 0 /* TODO */ - if (mode & (1 << 3)) { + if (mode & (1 << 3) && !(old_flags & float_flag_inexact)) { set_float_exception_flags(get_float_exception_flags(&env->sse_status) & ~float_flag_inexact, &env->sse_status); } -#endif env->sse_status.float_rounding_mode = prev_rounding_mode; } +#endif -#define FBLENDP(d, s, m) (m ? s : d) -SSE_HELPER_I(helper_blendps, L, 4, FBLENDP) -SSE_HELPER_I(helper_blendpd, Q, 2, FBLENDP) -SSE_HELPER_I(helper_pblendw, W, 8, FBLENDP) +#define FBLENDP(v, s, m) (m ? s : v) +SSE_HELPER_I(helper_blendps, L, 2 << SHIFT, FBLENDP) +SSE_HELPER_I(helper_blendpd, Q, 1 << SHIFT, FBLENDP) +SSE_HELPER_I(helper_pblendw, W, 4 << SHIFT, FBLENDP) -void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask) +void glue(helper_dpps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, + uint32_t mask) { - float32 iresult = float32_zero; + float32 prod1, prod2, temp2, temp3, temp4; + int i; - if (mask & (1 << 4)) { - iresult = float32_add(iresult, - float32_mul(d->ZMM_S(0), s->ZMM_S(0), - &env->sse_status), - &env->sse_status); - } - if (mask & (1 << 5)) { - iresult = float32_add(iresult, - float32_mul(d->ZMM_S(1), s->ZMM_S(1), - &env->sse_status), - &env->sse_status); - } - if (mask & (1 << 6)) { - iresult = float32_add(iresult, - float32_mul(d->ZMM_S(2), s->ZMM_S(2), - &env->sse_status), - &env->sse_status); - } - if (mask & (1 << 7)) { - iresult = float32_add(iresult, - float32_mul(d->ZMM_S(3), s->ZMM_S(3), - &env->sse_status), - &env->sse_status); + for (i = 0; i < 2 << SHIFT; i += 4) { + /* + * We must evaluate (A+B)+(C+D), not ((A+B)+C)+D + * to correctly round the intermediate results + */ + if (mask & (1 << 4)) { + prod1 = float32_mul(v->ZMM_S(i), s->ZMM_S(i), &env->sse_status); + } else { + prod1 = float32_zero; + } + if (mask & (1 << 5)) { + prod2 = float32_mul(v->ZMM_S(i+1), s->ZMM_S(i+1), &env->sse_status); + } else { + prod2 = float32_zero; + } + temp2 = float32_add(prod1, prod2, &env->sse_status); + if (mask & (1 << 6)) { + prod1 = float32_mul(v->ZMM_S(i+2), s->ZMM_S(i+2), &env->sse_status); + } else { + prod1 = float32_zero; + } + if (mask & (1 << 7)) { + prod2 = float32_mul(v->ZMM_S(i+3), s->ZMM_S(i+3), &env->sse_status); + } else { + prod2 = float32_zero; + } + temp3 = float32_add(prod1, prod2, &env->sse_status); + temp4 = float32_add(temp2, temp3, &env->sse_status); + + d->ZMM_S(i) = (mask & (1 << 0)) ? temp4 : float32_zero; + d->ZMM_S(i+1) = (mask & (1 << 1)) ? temp4 : float32_zero; + d->ZMM_S(i+2) = (mask & (1 << 2)) ? temp4 : float32_zero; + d->ZMM_S(i+3) = (mask & (1 << 3)) ? temp4 : float32_zero; } - d->ZMM_S(0) = (mask & (1 << 0)) ? iresult : float32_zero; - d->ZMM_S(1) = (mask & (1 << 1)) ? iresult : float32_zero; - d->ZMM_S(2) = (mask & (1 << 2)) ? iresult : float32_zero; - d->ZMM_S(3) = (mask & (1 << 3)) ? iresult : float32_zero; } -void glue(helper_dppd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, uint32_t mask) +#if SHIFT == 1 +/* Oddly, there is no ymm version of dppd */ +void glue(helper_dppd, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, uint32_t mask) { - float64 iresult = float64_zero; + float64 prod1, prod2, temp2; if (mask & (1 << 4)) { - iresult = float64_add(iresult, - float64_mul(d->ZMM_D(0), s->ZMM_D(0), - &env->sse_status), - &env->sse_status); + prod1 = float64_mul(v->ZMM_D(0), s->ZMM_D(0), &env->sse_status); + } else { + prod1 = float64_zero; } if (mask & (1 << 5)) { - iresult = float64_add(iresult, - float64_mul(d->ZMM_D(1), s->ZMM_D(1), - &env->sse_status), - &env->sse_status); + prod2 = float64_mul(v->ZMM_D(1), s->ZMM_D(1), &env->sse_status); + } else { + prod2 = float64_zero; } - d->ZMM_D(0) = (mask & (1 << 0)) ? iresult : float64_zero; - d->ZMM_D(1) = (mask & (1 << 1)) ? iresult : float64_zero; + temp2 = float64_add(prod1, prod2, &env->sse_status); + d->ZMM_D(0) = (mask & (1 << 0)) ? temp2 : float64_zero; + d->ZMM_D(1) = (mask & (1 << 1)) ? temp2 : float64_zero; } +#endif -void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +void glue(helper_mpsadbw, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t offset) { - int s0 = (offset & 3) << 2; - int d0 = (offset & 4) << 0; - int i; - Reg r; - - for (i = 0; i < 8; i++, d0++) { - r.W(i) = 0; - r.W(i) += abs1(d->B(d0 + 0) - s->B(s0 + 0)); - r.W(i) += abs1(d->B(d0 + 1) - s->B(s0 + 1)); - r.W(i) += abs1(d->B(d0 + 2) - s->B(s0 + 2)); - r.W(i) += abs1(d->B(d0 + 3) - s->B(s0 + 3)); + int i, j; + uint16_t r[8]; + + for (j = 0; j < 4 << SHIFT; ) { + int s0 = (j * 2) + ((offset & 3) << 2); + int d0 = (j * 2) + ((offset & 4) << 0); + for (i = 0; i < LANE_WIDTH / 2; i++, d0++) { + r[i] = 0; + r[i] += abs1(v->B(d0 + 0) - s->B(s0 + 0)); + r[i] += abs1(v->B(d0 + 1) - s->B(s0 + 1)); + r[i] += abs1(v->B(d0 + 2) - s->B(s0 + 2)); + r[i] += abs1(v->B(d0 + 3) - s->B(s0 + 3)); + } + for (i = 0; i < LANE_WIDTH / 2; i++, j++) { + d->W(j) = r[i]; + } + offset >>= 3; } - - *d = r; } /* SSE4.2 op helpers */ -#define FCMPGTQ(d, s) ((int64_t)d > (int64_t)s ? -1 : 0) -SSE_HELPER_Q(helper_pcmpgtq, FCMPGTQ) - +#if SHIFT == 1 static inline int pcmp_elen(CPUX86State *env, int reg, uint32_t ctrl) { - int val; + target_long val, limit; /* Presence of REX.W is indicated by a bit higher than 7 set */ if (ctrl >> 8) { - val = abs1((int64_t)env->regs[reg]); + val = (target_long)env->regs[reg]; } else { - val = abs1((int32_t)env->regs[reg]); + val = (int32_t)env->regs[reg]; } - if (ctrl & 1) { - if (val > 8) { - return 8; - } + limit = 8; } else { - if (val > 16) { - return 16; - } + limit = 16; } - return val; + if ((val > limit) || (val < -limit)) { + return limit; + } + return abs1(val); } static inline int pcmp_ilen(Reg *r, uint8_t ctrl) @@ -1998,7 +1955,7 @@ static inline int pcmp_val(Reg *r, uint8_t ctrl, int i) } static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, - int8_t ctrl, int valids, int validd) + uint8_t ctrl, int valids, int validd) { unsigned int res = 0; int v; @@ -2044,10 +2001,10 @@ static inline unsigned pcmpxstrx(CPUX86State *env, Reg *d, Reg *s, res = (2 << upper) - 1; break; } - for (j = valids - validd; j >= 0; j--) { + for (j = valids == upper ? valids : valids - validd; j >= 0; j--) { res <<= 1; v = 1; - for (i = validd; i >= 0; i--) { + for (i = MIN(valids - j, validd); i >= 0; i--) { v &= (pcmp_val(s, ctrl, i + j) == pcmp_val(d, ctrl, i)); } res |= v; @@ -2164,92 +2121,74 @@ target_ulong helper_crc32(uint32_t crc1, target_ulong msg, uint32_t len) return crc; } -void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, +#endif + +void glue(helper_pclmulqdq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s, uint32_t ctrl) { - uint64_t ah, al, b, resh, resl; + int a_idx = (ctrl & 1) != 0; + int b_idx = (ctrl & 16) != 0; - ah = 0; - al = d->Q((ctrl & 1) != 0); - b = s->Q((ctrl & 16) != 0); - resh = resl = 0; + for (int i = 0; i < SHIFT; i++) { + uint64_t a = v->Q(2 * i + a_idx); + uint64_t b = s->Q(2 * i + b_idx); + Int128 *r = (Int128 *)&d->ZMM_X(i); - while (b) { - if (b & 1) { - resl ^= al; - resh ^= ah; - } - ah = (ah << 1) | (al >> 63); - al <<= 1; - b >>= 1; + *r = clmul_64(a, b); } - - d->Q(0) = resl; - d->Q(1) = resh; } -void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesdec, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - int i; - Reg st = *d; - Reg rk = *s; + for (int i = 0; i < SHIFT; i++) { + AESState *ad = (AESState *)&d->ZMM_X(i); + AESState *st = (AESState *)&v->ZMM_X(i); + AESState *rk = (AESState *)&s->ZMM_X(i); - for (i = 0 ; i < 4 ; i++) { - d->L(i) = rk.L(i) ^ bswap32(AES_Td0[st.B(AES_ishifts[4*i+0])] ^ - AES_Td1[st.B(AES_ishifts[4*i+1])] ^ - AES_Td2[st.B(AES_ishifts[4*i+2])] ^ - AES_Td3[st.B(AES_ishifts[4*i+3])]); + aesdec_ISB_ISR_IMC_AK(ad, st, rk, false); } } -void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesdeclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - int i; - Reg st = *d; - Reg rk = *s; + for (int i = 0; i < SHIFT; i++) { + AESState *ad = (AESState *)&d->ZMM_X(i); + AESState *st = (AESState *)&v->ZMM_X(i); + AESState *rk = (AESState *)&s->ZMM_X(i); - for (i = 0; i < 16; i++) { - d->B(i) = rk.B(i) ^ (AES_isbox[st.B(AES_ishifts[i])]); + aesdec_ISB_ISR_AK(ad, st, rk, false); } } -void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesenc, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - int i; - Reg st = *d; - Reg rk = *s; + for (int i = 0; i < SHIFT; i++) { + AESState *ad = (AESState *)&d->ZMM_X(i); + AESState *st = (AESState *)&v->ZMM_X(i); + AESState *rk = (AESState *)&s->ZMM_X(i); - for (i = 0 ; i < 4 ; i++) { - d->L(i) = rk.L(i) ^ bswap32(AES_Te0[st.B(AES_shifts[4*i+0])] ^ - AES_Te1[st.B(AES_shifts[4*i+1])] ^ - AES_Te2[st.B(AES_shifts[4*i+2])] ^ - AES_Te3[st.B(AES_shifts[4*i+3])]); + aesenc_SB_SR_MC_AK(ad, st, rk, false); } } -void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +void glue(helper_aesenclast, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) { - int i; - Reg st = *d; - Reg rk = *s; + for (int i = 0; i < SHIFT; i++) { + AESState *ad = (AESState *)&d->ZMM_X(i); + AESState *st = (AESState *)&v->ZMM_X(i); + AESState *rk = (AESState *)&s->ZMM_X(i); - for (i = 0; i < 16; i++) { - d->B(i) = rk.B(i) ^ (AES_sbox[st.B(AES_shifts[i])]); + aesenc_SB_SR_AK(ad, st, rk, false); } - } +#if SHIFT == 1 void glue(helper_aesimc, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) { - int i; - Reg tmp = *s; + AESState *ad = (AESState *)&d->ZMM_X(0); + AESState *st = (AESState *)&s->ZMM_X(0); - for (i = 0 ; i < 4 ; i++) { - d->L(i) = bswap32(AES_imc[tmp.B(4*i+0)][0] ^ - AES_imc[tmp.B(4*i+1)][1] ^ - AES_imc[tmp.B(4*i+2)][2] ^ - AES_imc[tmp.B(4*i+3)][3]); - } + aesdec_IMC(ad, st, false); } void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, @@ -2266,7 +2205,459 @@ void glue(helper_aeskeygenassist, SUFFIX)(CPUX86State *env, Reg *d, Reg *s, d->L(3) = (d->L(2) << 24 | d->L(2) >> 8) ^ ctrl; } #endif +#endif + +#if SHIFT >= 1 +void glue(helper_vpermilpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + uint64_t r0, r1; + int i; + + for (i = 0; i < 1 << SHIFT; i += 2) { + r0 = v->Q(i + ((s->Q(i) >> 1) & 1)); + r1 = v->Q(i + ((s->Q(i+1) >> 1) & 1)); + d->Q(i) = r0; + d->Q(i+1) = r1; + } +} + +void glue(helper_vpermilps, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + uint32_t r0, r1, r2, r3; + int i; + + for (i = 0; i < 2 << SHIFT; i += 4) { + r0 = v->L(i + (s->L(i) & 3)); + r1 = v->L(i + (s->L(i+1) & 3)); + r2 = v->L(i + (s->L(i+2) & 3)); + r3 = v->L(i + (s->L(i+3) & 3)); + d->L(i) = r0; + d->L(i+1) = r1; + d->L(i+2) = r2; + d->L(i+3) = r3; + } +} + +void glue(helper_vpermilpd_imm, SUFFIX)(Reg *d, Reg *s, uint32_t order) +{ + uint64_t r0, r1; + int i; + + for (i = 0; i < 1 << SHIFT; i += 2) { + r0 = s->Q(i + ((order >> 0) & 1)); + r1 = s->Q(i + ((order >> 1) & 1)); + d->Q(i) = r0; + d->Q(i+1) = r1; + + order >>= 2; + } +} + +void glue(helper_vpermilps_imm, SUFFIX)(Reg *d, Reg *s, uint32_t order) +{ + uint32_t r0, r1, r2, r3; + int i; + + for (i = 0; i < 2 << SHIFT; i += 4) { + r0 = s->L(i + ((order >> 0) & 3)); + r1 = s->L(i + ((order >> 2) & 3)); + r2 = s->L(i + ((order >> 4) & 3)); + r3 = s->L(i + ((order >> 6) & 3)); + d->L(i) = r0; + d->L(i+1) = r1; + d->L(i+2) = r2; + d->L(i+3) = r3; + } +} + +#if SHIFT == 1 +#define FPSRLVD(x, c) (c < 32 ? ((x) >> c) : 0) +#define FPSRLVQ(x, c) (c < 64 ? ((x) >> c) : 0) +#define FPSRAVD(x, c) ((int32_t)(x) >> (c < 32 ? c : 31)) +#define FPSRAVQ(x, c) ((int64_t)(x) >> (c < 64 ? c : 63)) +#define FPSLLVD(x, c) (c < 32 ? ((x) << c) : 0) +#define FPSLLVQ(x, c) (c < 64 ? ((x) << c) : 0) +#endif + +SSE_HELPER_L(helper_vpsrlvd, FPSRLVD) +SSE_HELPER_L(helper_vpsravd, FPSRAVD) +SSE_HELPER_L(helper_vpsllvd, FPSLLVD) + +SSE_HELPER_Q(helper_vpsrlvq, FPSRLVQ) +SSE_HELPER_Q(helper_vpsravq, FPSRAVQ) +SSE_HELPER_Q(helper_vpsllvq, FPSLLVQ) + +void glue(helper_vtestps, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + uint32_t zf = 0, cf = 0; + int i; + + for (i = 0; i < 2 << SHIFT; i++) { + zf |= (s->L(i) & d->L(i)); + cf |= (s->L(i) & ~d->L(i)); + } + CC_SRC = ((zf >> 31) ? 0 : CC_Z) | ((cf >> 31) ? 0 : CC_C); +} + +void glue(helper_vtestpd, SUFFIX)(CPUX86State *env, Reg *d, Reg *s) +{ + uint64_t zf = 0, cf = 0; + int i; + + for (i = 0; i < 1 << SHIFT; i++) { + zf |= (s->Q(i) & d->Q(i)); + cf |= (s->Q(i) & ~d->Q(i)); + } + CC_SRC = ((zf >> 63) ? 0 : CC_Z) | ((cf >> 63) ? 0 : CC_C); +} + +void glue(helper_vpmaskmovd_st, SUFFIX)(CPUX86State *env, + Reg *v, Reg *s, target_ulong a0) +{ + int i; + + for (i = 0; i < (2 << SHIFT); i++) { + if (v->L(i) >> 31) { + cpu_stl_data_ra(env, a0 + i * 4, s->L(i), GETPC()); + } + } +} + +void glue(helper_vpmaskmovq_st, SUFFIX)(CPUX86State *env, + Reg *v, Reg *s, target_ulong a0) +{ + int i; + + for (i = 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + cpu_stq_data_ra(env, a0 + i * 8, s->Q(i), GETPC()); + } + } +} + +void glue(helper_vpmaskmovd, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + + for (i = 0; i < (2 << SHIFT); i++) { + d->L(i) = (v->L(i) >> 31) ? s->L(i) : 0; + } +} + +void glue(helper_vpmaskmovq, SUFFIX)(CPUX86State *env, Reg *d, Reg *v, Reg *s) +{ + int i; + + for (i = 0; i < (1 << SHIFT); i++) { + d->Q(i) = (v->Q(i) >> 63) ? s->Q(i) : 0; + } +} + +void glue(helper_vpgatherdd, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i = 0; i < (2 << SHIFT); i++) { + if (v->L(i) >> 31) { + target_ulong addr = a0 + + ((target_ulong)(int32_t)s->L(i) << scale); + d->L(i) = cpu_ldl_data_ra(env, addr, GETPC()); + } + v->L(i) = 0; + } +} + +void glue(helper_vpgatherdq, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i = 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + target_ulong addr = a0 + + ((target_ulong)(int32_t)s->L(i) << scale); + d->Q(i) = cpu_ldq_data_ra(env, addr, GETPC()); + } + v->Q(i) = 0; + } +} + +void glue(helper_vpgatherqd, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i = 0; i < (1 << SHIFT); i++) { + if (v->L(i) >> 31) { + target_ulong addr = a0 + + ((target_ulong)(int64_t)s->Q(i) << scale); + d->L(i) = cpu_ldl_data_ra(env, addr, GETPC()); + } + v->L(i) = 0; + } + for (i /= 2; i < 1 << SHIFT; i++) { + d->Q(i) = 0; + v->Q(i) = 0; + } +} + +void glue(helper_vpgatherqq, SUFFIX)(CPUX86State *env, + Reg *d, Reg *v, Reg *s, target_ulong a0, unsigned scale) +{ + int i; + for (i = 0; i < (1 << SHIFT); i++) { + if (v->Q(i) >> 63) { + target_ulong addr = a0 + + ((target_ulong)(int64_t)s->Q(i) << scale); + d->Q(i) = cpu_ldq_data_ra(env, addr, GETPC()); + } + v->Q(i) = 0; + } +} +#endif + +#if SHIFT >= 2 +void helper_vpermdq_ymm(Reg *d, Reg *v, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + + switch (order & 3) { + case 0: + r0 = v->Q(0); + r1 = v->Q(1); + break; + case 1: + r0 = v->Q(2); + r1 = v->Q(3); + break; + case 2: + r0 = s->Q(0); + r1 = s->Q(1); + break; + case 3: + r0 = s->Q(2); + r1 = s->Q(3); + break; + default: /* default case added to help the compiler to avoid warnings */ + g_assert_not_reached(); + } + switch ((order >> 4) & 3) { + case 0: + r2 = v->Q(0); + r3 = v->Q(1); + break; + case 1: + r2 = v->Q(2); + r3 = v->Q(3); + break; + case 2: + r2 = s->Q(0); + r3 = s->Q(1); + break; + case 3: + r2 = s->Q(2); + r3 = s->Q(3); + break; + default: /* default case added to help the compiler to avoid warnings */ + g_assert_not_reached(); + } + d->Q(0) = r0; + d->Q(1) = r1; + d->Q(2) = r2; + d->Q(3) = r3; + if (order & 0x8) { + d->Q(0) = 0; + d->Q(1) = 0; + } + if (order & 0x80) { + d->Q(2) = 0; + d->Q(3) = 0; + } +} + +void helper_vpermq_ymm(Reg *d, Reg *s, uint32_t order) +{ + uint64_t r0, r1, r2, r3; + r0 = s->Q(order & 3); + r1 = s->Q((order >> 2) & 3); + r2 = s->Q((order >> 4) & 3); + r3 = s->Q((order >> 6) & 3); + d->Q(0) = r0; + d->Q(1) = r1; + d->Q(2) = r2; + d->Q(3) = r3; +} + +void helper_vpermd_ymm(Reg *d, Reg *v, Reg *s) +{ + uint32_t r[8]; + int i; + + for (i = 0; i < 8; i++) { + r[i] = s->L(v->L(i) & 7); + } + for (i = 0; i < 8; i++) { + d->L(i) = r[i]; + } +} +#endif + +/* FMA3 op helpers */ +#if SHIFT == 1 +#define SSE_HELPER_FMAS(name, elem, F) \ + void name(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c, int flags) \ + { \ + d->elem(0) = F(a->elem(0), b->elem(0), c->elem(0), flags, &env->sse_status); \ + } +#define SSE_HELPER_FMAP(name, elem, num, F) \ + void glue(name, SUFFIX)(CPUX86State *env, Reg *d, Reg *a, Reg *b, Reg *c, \ + int flags, int flip) \ + { \ + int i; \ + for (i = 0; i < num; i++) { \ + d->elem(i) = F(a->elem(i), b->elem(i), c->elem(i), flags, &env->sse_status); \ + flags ^= flip; \ + } \ + } + +SSE_HELPER_FMAS(helper_fma4ss, ZMM_S, float32_muladd) +SSE_HELPER_FMAS(helper_fma4sd, ZMM_D, float64_muladd) +#endif + +#if SHIFT >= 1 +SSE_HELPER_FMAP(helper_fma4ps, ZMM_S, 2 << SHIFT, float32_muladd) +SSE_HELPER_FMAP(helper_fma4pd, ZMM_D, 1 << SHIFT, float64_muladd) +#endif + +#if SHIFT == 1 +#define SSE_HELPER_SHA1RNDS4(name, F, K) \ + void name(Reg *d, Reg *a, Reg *b) \ + { \ + uint32_t A, B, C, D, E, t, i; \ + \ + A = a->L(3); \ + B = a->L(2); \ + C = a->L(1); \ + D = a->L(0); \ + E = 0; \ + \ + for (i = 0; i <= 3; i++) { \ + t = F(B, C, D) + rol32(A, 5) + b->L(3 - i) + E + K; \ + E = D; \ + D = C; \ + C = rol32(B, 30); \ + B = A; \ + A = t; \ + } \ + \ + d->L(3) = A; \ + d->L(2) = B; \ + d->L(1) = C; \ + d->L(0) = D; \ + } + +#define SHA1_F0(b, c, d) (((b) & (c)) ^ (~(b) & (d))) +#define SHA1_F1(b, c, d) ((b) ^ (c) ^ (d)) +#define SHA1_F2(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d))) + +SSE_HELPER_SHA1RNDS4(helper_sha1rnds4_f0, SHA1_F0, 0x5A827999) +SSE_HELPER_SHA1RNDS4(helper_sha1rnds4_f1, SHA1_F1, 0x6ED9EBA1) +SSE_HELPER_SHA1RNDS4(helper_sha1rnds4_f2, SHA1_F2, 0x8F1BBCDC) +SSE_HELPER_SHA1RNDS4(helper_sha1rnds4_f3, SHA1_F1, 0xCA62C1D6) + +void helper_sha1nexte(Reg *d, Reg *a, Reg *b) +{ + d->L(3) = b->L(3) + rol32(a->L(3), 30); + d->L(2) = b->L(2); + d->L(1) = b->L(1); + d->L(0) = b->L(0); +} + +void helper_sha1msg1(Reg *d, Reg *a, Reg *b) +{ + /* These could be overwritten by the first two assignments, save them. */ + uint32_t b3 = b->L(3); + uint32_t b2 = b->L(2); + + d->L(3) = a->L(3) ^ a->L(1); + d->L(2) = a->L(2) ^ a->L(0); + d->L(1) = a->L(1) ^ b3; + d->L(0) = a->L(0) ^ b2; +} + +void helper_sha1msg2(Reg *d, Reg *a, Reg *b) +{ + d->L(3) = rol32(a->L(3) ^ b->L(2), 1); + d->L(2) = rol32(a->L(2) ^ b->L(1), 1); + d->L(1) = rol32(a->L(1) ^ b->L(0), 1); + d->L(0) = rol32(a->L(0) ^ d->L(3), 1); +} + +#define SHA256_CH(e, f, g) (((e) & (f)) ^ (~(e) & (g))) +#define SHA256_MAJ(a, b, c) (((a) & (b)) ^ ((a) & (c)) ^ ((b) & (c))) + +#define SHA256_RNDS0(w) (ror32((w), 2) ^ ror32((w), 13) ^ ror32((w), 22)) +#define SHA256_RNDS1(w) (ror32((w), 6) ^ ror32((w), 11) ^ ror32((w), 25)) +#define SHA256_MSGS0(w) (ror32((w), 7) ^ ror32((w), 18) ^ ((w) >> 3)) +#define SHA256_MSGS1(w) (ror32((w), 17) ^ ror32((w), 19) ^ ((w) >> 10)) + +void helper_sha256rnds2(Reg *d, Reg *a, Reg *b, uint32_t wk0, uint32_t wk1) +{ + uint32_t t, AA, EE; + + uint32_t A = b->L(3); + uint32_t B = b->L(2); + uint32_t C = a->L(3); + uint32_t D = a->L(2); + uint32_t E = b->L(1); + uint32_t F = b->L(0); + uint32_t G = a->L(1); + uint32_t H = a->L(0); + + /* Even round */ + t = SHA256_CH(E, F, G) + SHA256_RNDS1(E) + wk0 + H; + AA = t + SHA256_MAJ(A, B, C) + SHA256_RNDS0(A); + EE = t + D; + + /* These will be B and F at the end of the odd round */ + d->L(2) = AA; + d->L(0) = EE; + + D = C, C = B, B = A, A = AA; + H = G, G = F, F = E, E = EE; + + /* Odd round */ + t = SHA256_CH(E, F, G) + SHA256_RNDS1(E) + wk1 + H; + AA = t + SHA256_MAJ(A, B, C) + SHA256_RNDS0(A); + EE = t + D; + + d->L(3) = AA; + d->L(1) = EE; +} + +void helper_sha256msg1(Reg *d, Reg *a, Reg *b) +{ + /* b->L(0) could be overwritten by the first assignment, save it. */ + uint32_t b0 = b->L(0); + + d->L(0) = a->L(0) + SHA256_MSGS0(a->L(1)); + d->L(1) = a->L(1) + SHA256_MSGS0(a->L(2)); + d->L(2) = a->L(2) + SHA256_MSGS0(a->L(3)); + d->L(3) = a->L(3) + SHA256_MSGS0(b0); +} + +void helper_sha256msg2(Reg *d, Reg *a, Reg *b) +{ + /* Earlier assignments cannot overwrite any of the two operands. */ + d->L(0) = a->L(0) + SHA256_MSGS1(b->L(2)); + d->L(1) = a->L(1) + SHA256_MSGS1(b->L(3)); + /* Yes, this reuses the previously computed values. */ + d->L(2) = a->L(2) + SHA256_MSGS1(d->L(0)); + d->L(3) = a->L(3) + SHA256_MSGS1(d->L(1)); +} +#endif + +#undef SSE_HELPER_S +#undef LANE_WIDTH #undef SHIFT #undef XMM_ONLY #undef Reg diff --git a/target/i386/ops_sse_header.h b/target/i386/ops_sse_header.h deleted file mode 100644 index 094aafc573..0000000000 --- a/target/i386/ops_sse_header.h +++ /dev/null @@ -1,359 +0,0 @@ -/* - * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support - * - * Copyright (c) 2005 Fabrice Bellard - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, see <http://www.gnu.org/licenses/>. - */ -#if SHIFT == 0 -#define Reg MMXReg -#define SUFFIX _mmx -#else -#define Reg ZMMReg -#define SUFFIX _xmm -#endif - -#define dh_alias_Reg ptr -#define dh_alias_ZMMReg ptr -#define dh_alias_MMXReg ptr -#define dh_ctype_Reg Reg * -#define dh_ctype_ZMMReg ZMMReg * -#define dh_ctype_MMXReg MMXReg * -#define dh_is_signed_Reg dh_is_signed_ptr -#define dh_is_signed_ZMMReg dh_is_signed_ptr -#define dh_is_signed_MMXReg dh_is_signed_ptr - -DEF_HELPER_3(glue(psrlw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psraw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psllw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrld, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrad, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pslld, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psrlq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psllq, SUFFIX), void, env, Reg, Reg) - -#if SHIFT == 1 -DEF_HELPER_3(glue(psrldq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pslldq, SUFFIX), void, env, Reg, Reg) -#endif - -#define SSE_HELPER_B(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) - -#define SSE_HELPER_W(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) - -#define SSE_HELPER_L(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) - -#define SSE_HELPER_Q(name, F)\ - DEF_HELPER_3(glue(name, SUFFIX), void, env, Reg, Reg) - -SSE_HELPER_B(paddb, FADD) -SSE_HELPER_W(paddw, FADD) -SSE_HELPER_L(paddl, FADD) -SSE_HELPER_Q(paddq, FADD) - -SSE_HELPER_B(psubb, FSUB) -SSE_HELPER_W(psubw, FSUB) -SSE_HELPER_L(psubl, FSUB) -SSE_HELPER_Q(psubq, FSUB) - -SSE_HELPER_B(paddusb, FADDUB) -SSE_HELPER_B(paddsb, FADDSB) -SSE_HELPER_B(psubusb, FSUBUB) -SSE_HELPER_B(psubsb, FSUBSB) - -SSE_HELPER_W(paddusw, FADDUW) -SSE_HELPER_W(paddsw, FADDSW) -SSE_HELPER_W(psubusw, FSUBUW) -SSE_HELPER_W(psubsw, FSUBSW) - -SSE_HELPER_B(pminub, FMINUB) -SSE_HELPER_B(pmaxub, FMAXUB) - -SSE_HELPER_W(pminsw, FMINSW) -SSE_HELPER_W(pmaxsw, FMAXSW) - -SSE_HELPER_Q(pand, FAND) -SSE_HELPER_Q(pandn, FANDN) -SSE_HELPER_Q(por, FOR) -SSE_HELPER_Q(pxor, FXOR) - -SSE_HELPER_B(pcmpgtb, FCMPGTB) -SSE_HELPER_W(pcmpgtw, FCMPGTW) -SSE_HELPER_L(pcmpgtl, FCMPGTL) - -SSE_HELPER_B(pcmpeqb, FCMPEQ) -SSE_HELPER_W(pcmpeqw, FCMPEQ) -SSE_HELPER_L(pcmpeql, FCMPEQ) - -SSE_HELPER_W(pmullw, FMULLW) -#if SHIFT == 0 -SSE_HELPER_W(pmulhrw, FMULHRW) -#endif -SSE_HELPER_W(pmulhuw, FMULHUW) -SSE_HELPER_W(pmulhw, FMULHW) - -SSE_HELPER_B(pavgb, FAVG) -SSE_HELPER_W(pavgw, FAVG) - -DEF_HELPER_3(glue(pmuludq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaddwd, SUFFIX), void, env, Reg, Reg) - -DEF_HELPER_3(glue(psadbw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) -DEF_HELPER_2(glue(movl_mm_T0, SUFFIX), void, Reg, i32) -#ifdef TARGET_X86_64 -DEF_HELPER_2(glue(movq_mm_T0, SUFFIX), void, Reg, i64) -#endif - -#if SHIFT == 0 -DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) -#else -DEF_HELPER_3(shufps, void, Reg, Reg, int) -DEF_HELPER_3(shufpd, void, Reg, Reg, int) -DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) -DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) -DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) -#endif - -#if SHIFT == 1 -/* FPU ops */ -/* XXX: not accurate */ - -#define SSE_HELPER_S(name, F) \ - DEF_HELPER_3(name ## ps, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## pd, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## sd, void, env, Reg, Reg) - -SSE_HELPER_S(add, FPU_ADD) -SSE_HELPER_S(sub, FPU_SUB) -SSE_HELPER_S(mul, FPU_MUL) -SSE_HELPER_S(div, FPU_DIV) -SSE_HELPER_S(min, FPU_MIN) -SSE_HELPER_S(max, FPU_MAX) -SSE_HELPER_S(sqrt, FPU_SQRT) - - -DEF_HELPER_3(cvtps2pd, void, env, Reg, Reg) -DEF_HELPER_3(cvtpd2ps, void, env, Reg, Reg) -DEF_HELPER_3(cvtss2sd, void, env, Reg, Reg) -DEF_HELPER_3(cvtsd2ss, void, env, Reg, Reg) -DEF_HELPER_3(cvtdq2ps, void, env, Reg, Reg) -DEF_HELPER_3(cvtdq2pd, void, env, Reg, Reg) -DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg) -DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg) -DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32) -DEF_HELPER_3(cvtsi2sd, void, env, ZMMReg, i32) - -#ifdef TARGET_X86_64 -DEF_HELPER_3(cvtsq2ss, void, env, ZMMReg, i64) -DEF_HELPER_3(cvtsq2sd, void, env, ZMMReg, i64) -#endif - -DEF_HELPER_3(cvtps2dq, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(cvtpd2dq, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(cvtps2pi, void, env, MMXReg, ZMMReg) -DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, ZMMReg) -DEF_HELPER_2(cvtss2si, s32, env, ZMMReg) -DEF_HELPER_2(cvtsd2si, s32, env, ZMMReg) -#ifdef TARGET_X86_64 -DEF_HELPER_2(cvtss2sq, s64, env, ZMMReg) -DEF_HELPER_2(cvtsd2sq, s64, env, ZMMReg) -#endif - -DEF_HELPER_3(cvttps2dq, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(cvttpd2dq, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(cvttps2pi, void, env, MMXReg, ZMMReg) -DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, ZMMReg) -DEF_HELPER_2(cvttss2si, s32, env, ZMMReg) -DEF_HELPER_2(cvttsd2si, s32, env, ZMMReg) -#ifdef TARGET_X86_64 -DEF_HELPER_2(cvttss2sq, s64, env, ZMMReg) -DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg) -#endif - -DEF_HELPER_3(rsqrtps, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(rsqrtss, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(rcpps, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(rcpss, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg) -DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int) -DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg) -DEF_HELPER_4(insertq_i, void, env, ZMMReg, int, int) -DEF_HELPER_3(haddps, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(haddpd, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(hsubps, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(hsubpd, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(addsubps, void, env, ZMMReg, ZMMReg) -DEF_HELPER_3(addsubpd, void, env, ZMMReg, ZMMReg) - -#define SSE_HELPER_CMP(name, F) \ - DEF_HELPER_3(name ## ps, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## ss, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## pd, void, env, Reg, Reg) \ - DEF_HELPER_3(name ## sd, void, env, Reg, Reg) - -SSE_HELPER_CMP(cmpeq, FPU_CMPEQ) -SSE_HELPER_CMP(cmplt, FPU_CMPLT) -SSE_HELPER_CMP(cmple, FPU_CMPLE) -SSE_HELPER_CMP(cmpunord, FPU_CMPUNORD) -SSE_HELPER_CMP(cmpneq, FPU_CMPNEQ) -SSE_HELPER_CMP(cmpnlt, FPU_CMPNLT) -SSE_HELPER_CMP(cmpnle, FPU_CMPNLE) -SSE_HELPER_CMP(cmpord, FPU_CMPORD) - -DEF_HELPER_3(ucomiss, void, env, Reg, Reg) -DEF_HELPER_3(comiss, void, env, Reg, Reg) -DEF_HELPER_3(ucomisd, void, env, Reg, Reg) -DEF_HELPER_3(comisd, void, env, Reg, Reg) -DEF_HELPER_2(movmskps, i32, env, Reg) -DEF_HELPER_2(movmskpd, i32, env, Reg) -#endif - -DEF_HELPER_2(glue(pmovmskb, SUFFIX), i32, env, Reg) -DEF_HELPER_3(glue(packsswb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packuswb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packssdw, SUFFIX), void, env, Reg, Reg) -#define UNPCK_OP(base_name, base) \ - DEF_HELPER_3(glue(punpck ## base_name ## bw, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(glue(punpck ## base_name ## wd, SUFFIX), void, env, Reg, Reg) \ - DEF_HELPER_3(glue(punpck ## base_name ## dq, SUFFIX), void, env, Reg, Reg) - -UNPCK_OP(l, 0) -UNPCK_OP(h, 1) - -#if SHIFT == 1 -DEF_HELPER_3(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg) -#endif - -/* 3DNow! float ops */ -#if SHIFT == 0 -DEF_HELPER_3(pi2fd, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pi2fw, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pf2id, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pf2iw, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfacc, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfadd, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfcmpeq, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfcmpge, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfcmpgt, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfmax, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfmin, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfmul, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfnacc, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfpnacc, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfrcp, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfrsqrt, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfsub, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pfsubr, void, env, MMXReg, MMXReg) -DEF_HELPER_3(pswapd, void, env, MMXReg, MMXReg) -#endif - -/* SSSE3 op helpers */ -DEF_HELPER_3(glue(phaddw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phaddd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phaddsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phsubsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pabsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pabsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pabsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pshufb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(psignd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(palignr, SUFFIX), void, env, Reg, Reg, s32) - -/* SSE4.1 op helpers */ -#if SHIFT == 1 -DEF_HELPER_3(glue(pblendvb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(blendvps, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(blendvpd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(ptest, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxbw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxbd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxbq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxwd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxwq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovsxdq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxbw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxbd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxbq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxwd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxwq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmovzxdq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmuldq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pcmpeqq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(packusdw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pminud, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxsb, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxsd, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmaxud, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(pmulld, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(roundss, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(roundsd, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(blendps, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(blendpd, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pblendw, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(dpps, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(dppd, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, i32) -#endif - -/* SSE4.2 op helpers */ -#if SHIFT == 1 -DEF_HELPER_3(glue(pcmpgtq, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_3(crc32, tl, i32, tl, i32) -#endif - -/* AES-NI op helpers */ -#if SHIFT == 1 -DEF_HELPER_3(glue(aesdec, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesdeclast, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesenc, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesenclast, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg) -DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) -DEF_HELPER_4(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, i32) -#endif - -#undef SHIFT -#undef Reg -#undef SUFFIX - -#undef SSE_HELPER_B -#undef SSE_HELPER_W -#undef SSE_HELPER_L -#undef SSE_HELPER_Q -#undef SSE_HELPER_S -#undef SSE_HELPER_CMP -#undef UNPCK_OP diff --git a/target/i386/sev-stub.c b/target/i386/sev-stub.c deleted file mode 100644 index 59a003a4eb..0000000000 --- a/target/i386/sev-stub.c +++ /dev/null @@ -1,51 +0,0 @@ -/* - * QEMU SEV stub - * - * Copyright Advanced Micro Devices 2018 - * - * Authors: - * Brijesh Singh <brijesh.singh@amd.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "qemu-common.h" -#include "sev_i386.h" - -SevInfo *sev_get_info(void) -{ - return NULL; -} - -bool sev_enabled(void) -{ - return false; -} - -uint64_t sev_get_me_mask(void) -{ - return ~0; -} - -uint32_t sev_get_cbit_position(void) -{ - return 0; -} - -uint32_t sev_get_reduced_phys_bits(void) -{ - return 0; -} - -char *sev_get_launch_measurement(void) -{ - return NULL; -} - -SevCapability *sev_get_capabilities(void) -{ - return NULL; -} diff --git a/target/i386/sev-sysemu-stub.c b/target/i386/sev-sysemu-stub.c new file mode 100644 index 0000000000..96e1c15cc3 --- /dev/null +++ b/target/i386/sev-sysemu-stub.c @@ -0,0 +1,69 @@ +/* + * QEMU SEV system stub + * + * Copyright Advanced Micro Devices 2018 + * + * Authors: + * Brijesh Singh <brijesh.singh@amd.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "qapi/qapi-commands-misc-target.h" +#include "qapi/error.h" +#include "sev.h" + +SevInfo *qmp_query_sev(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +SevCapability *qmp_query_sev_capabilities(Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +void qmp_sev_inject_launch_secret(const char *packet_header, const char *secret, + bool has_gpa, uint64_t gpa, Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); +} + +int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) +{ + g_assert_not_reached(); +} + +void sev_es_set_reset_vector(CPUState *cpu) +{ +} + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +{ + g_assert_not_reached(); +} + +SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, + Error **errp) +{ + error_setg(errp, "SEV is not available in this QEMU"); + return NULL; +} + +void hmp_info_sev(Monitor *mon, const QDict *qdict) +{ + monitor_printf(mon, "SEV is not available in this QEMU\n"); +} diff --git a/target/i386/sev.c b/target/i386/sev.c index 2395171acf..72930ff0dc 100644 --- a/target/i386/sev.c +++ b/target/i386/sev.c @@ -11,51 +11,148 @@ * */ +#include "qemu/osdep.h" + #include <linux/kvm.h> #include <linux/psp-sev.h> #include <sys/ioctl.h> -#include "qemu/osdep.h" #include "qapi/error.h" #include "qom/object_interfaces.h" #include "qemu/base64.h" +#include "qemu/module.h" +#include "qemu/uuid.h" +#include "qemu/error-report.h" +#include "crypto/hash.h" #include "sysemu/kvm.h" -#include "sev_i386.h" +#include "sev.h" #include "sysemu/sysemu.h" +#include "sysemu/runstate.h" #include "trace.h" #include "migration/blocker.h" +#include "qom/object.h" +#include "monitor/monitor.h" +#include "monitor/hmp-target.h" +#include "qapi/qapi-commands-misc-target.h" +#include "exec/confidential-guest-support.h" +#include "hw/i386/pc.h" +#include "exec/address-spaces.h" + +#define TYPE_SEV_GUEST "sev-guest" +OBJECT_DECLARE_SIMPLE_TYPE(SevGuestState, SEV_GUEST) + + +/** + * SevGuestState: + * + * The SevGuestState object is used for creating and managing a SEV + * guest. + * + * # $QEMU \ + * -object sev-guest,id=sev0 \ + * -machine ...,memory-encryption=sev0 + */ +struct SevGuestState { + ConfidentialGuestSupport parent_obj; + + /* configuration parameters */ + char *sev_device; + uint32_t policy; + char *dh_cert_file; + char *session_file; + uint32_t cbitpos; + uint32_t reduced_phys_bits; + bool kernel_hashes; + + /* runtime state */ + uint32_t handle; + uint8_t api_major; + uint8_t api_minor; + uint8_t build_id; + int sev_fd; + SevState state; + gchar *measurement; + + uint32_t reset_cs; + uint32_t reset_ip; + bool reset_data_valid; +}; #define DEFAULT_GUEST_POLICY 0x1 /* disable debug */ #define DEFAULT_SEV_DEVICE "/dev/sev" -static SEVState *sev_state; +#define SEV_INFO_BLOCK_GUID "00f771de-1a7e-4fcb-890e-68c77e2fb44e" +typedef struct __attribute__((__packed__)) SevInfoBlock { + /* SEV-ES Reset Vector Address */ + uint32_t reset_addr; +} SevInfoBlock; + +#define SEV_HASH_TABLE_RV_GUID "7255371f-3a3b-4b04-927b-1da6efa8d454" +typedef struct QEMU_PACKED SevHashTableDescriptor { + /* SEV hash table area guest address */ + uint32_t base; + /* SEV hash table area size (in bytes) */ + uint32_t size; +} SevHashTableDescriptor; + +/* hard code sha256 digest size */ +#define HASH_SIZE 32 + +typedef struct QEMU_PACKED SevHashTableEntry { + QemuUUID guid; + uint16_t len; + uint8_t hash[HASH_SIZE]; +} SevHashTableEntry; + +typedef struct QEMU_PACKED SevHashTable { + QemuUUID guid; + uint16_t len; + SevHashTableEntry cmdline; + SevHashTableEntry initrd; + SevHashTableEntry kernel; +} SevHashTable; + +/* + * Data encrypted by sev_encrypt_flash() must be padded to a multiple of + * 16 bytes. + */ +typedef struct QEMU_PACKED PaddedSevHashTable { + SevHashTable ht; + uint8_t padding[ROUND_UP(sizeof(SevHashTable), 16) - sizeof(SevHashTable)]; +} PaddedSevHashTable; + +QEMU_BUILD_BUG_ON(sizeof(PaddedSevHashTable) % 16 != 0); + +static SevGuestState *sev_guest; static Error *sev_mig_blocker; static const char *const sev_fw_errlist[] = { - "", - "Platform state is invalid", - "Guest state is invalid", - "Platform configuration is invalid", - "Buffer too small", - "Platform is already owned", - "Certificate is invalid", - "Policy is not allowed", - "Guest is not active", - "Invalid address", - "Bad signature", - "Bad measurement", - "Asid is already owned", - "Invalid ASID", - "WBINVD is required", - "DF_FLUSH is required", - "Guest handle is invalid", - "Invalid command", - "Guest is active", - "Hardware error", - "Hardware unsafe", - "Feature not supported", - "Invalid parameter" + [SEV_RET_SUCCESS] = "", + [SEV_RET_INVALID_PLATFORM_STATE] = "Platform state is invalid", + [SEV_RET_INVALID_GUEST_STATE] = "Guest state is invalid", + [SEV_RET_INAVLID_CONFIG] = "Platform configuration is invalid", + [SEV_RET_INVALID_LEN] = "Buffer too small", + [SEV_RET_ALREADY_OWNED] = "Platform is already owned", + [SEV_RET_INVALID_CERTIFICATE] = "Certificate is invalid", + [SEV_RET_POLICY_FAILURE] = "Policy is not allowed", + [SEV_RET_INACTIVE] = "Guest is not active", + [SEV_RET_INVALID_ADDRESS] = "Invalid address", + [SEV_RET_BAD_SIGNATURE] = "Bad signature", + [SEV_RET_BAD_MEASUREMENT] = "Bad measurement", + [SEV_RET_ASID_OWNED] = "ASID is already owned", + [SEV_RET_INVALID_ASID] = "Invalid ASID", + [SEV_RET_WBINVD_REQUIRED] = "WBINVD is required", + [SEV_RET_DFFLUSH_REQUIRED] = "DF_FLUSH is required", + [SEV_RET_INVALID_GUEST] = "Guest handle is invalid", + [SEV_RET_INVALID_COMMAND] = "Invalid command", + [SEV_RET_ACTIVE] = "Guest is active", + [SEV_RET_HWSEV_RET_PLATFORM] = "Hardware error", + [SEV_RET_HWSEV_RET_UNSAFE] = "Hardware unsafe", + [SEV_RET_UNSUPPORTED] = "Feature not supported", + [SEV_RET_INVALID_PARAM] = "Invalid parameter", + [SEV_RET_RESOURCE_LIMIT] = "Required firmware resource depleted", + [SEV_RET_SECURE_DATA_INVALID] = "Part-specific integrity check failure", }; #define SEV_FW_MAX_ERROR ARRAY_SIZE(sev_fw_errlist) @@ -70,7 +167,7 @@ sev_ioctl(int fd, int cmd, void *data, int *error) input.id = cmd; input.sev_fd = fd; - input.data = (__u64)(unsigned long)data; + input.data = (uintptr_t)data; r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, &input); @@ -108,55 +205,79 @@ fw_error_to_str(int code) } static bool -sev_check_state(SevState state) +sev_check_state(const SevGuestState *sev, SevState state) { - assert(sev_state); - return sev_state->state == state ? true : false; + assert(sev); + return sev->state == state ? true : false; } static void -sev_set_guest_state(SevState new_state) +sev_set_guest_state(SevGuestState *sev, SevState new_state) { assert(new_state < SEV_STATE__MAX); - assert(sev_state); + assert(sev); - trace_kvm_sev_change_state(SevState_str(sev_state->state), + trace_kvm_sev_change_state(SevState_str(sev->state), SevState_str(new_state)); - sev_state->state = new_state; + sev->state = new_state; } static void -sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size) +sev_ram_block_added(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) { int r; struct kvm_enc_region range; + ram_addr_t offset; + MemoryRegion *mr; + + /* + * The RAM device presents a memory region that should be treated + * as IO region and should not be pinned. + */ + mr = memory_region_from_host(host, &offset); + if (mr && memory_region_is_ram_device(mr)) { + return; + } - range.addr = (__u64)(unsigned long)host; - range.size = size; + range.addr = (uintptr_t)host; + range.size = max_size; - trace_kvm_memcrypt_register_region(host, size); + trace_kvm_memcrypt_register_region(host, max_size); r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_REG_REGION, &range); if (r) { error_report("%s: failed to register region (%p+%#zx) error '%s'", - __func__, host, size, strerror(errno)); + __func__, host, max_size, strerror(errno)); exit(1); } } static void -sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size) +sev_ram_block_removed(RAMBlockNotifier *n, void *host, size_t size, + size_t max_size) { int r; struct kvm_enc_region range; + ram_addr_t offset; + MemoryRegion *mr; + + /* + * The RAM device presents a memory region that should be treated + * as IO region and should not have been pinned. + */ + mr = memory_region_from_host(host, &offset); + if (mr && memory_region_is_ram_device(mr)) { + return; + } - range.addr = (__u64)(unsigned long)host; - range.size = size; + range.addr = (uintptr_t)host; + range.size = max_size; - trace_kvm_memcrypt_unregister_region(host, size); + trace_kvm_memcrypt_unregister_region(host, max_size); r = kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_UNREG_REGION, &range); if (r) { error_report("%s: failed to unregister region (%p+%#zx)", - __func__, host, size); + __func__, host, max_size); } } @@ -166,269 +287,209 @@ static struct RAMBlockNotifier sev_ram_notifier = { }; static void -qsev_guest_finalize(Object *obj) +sev_guest_finalize(Object *obj) { } static char * -qsev_guest_get_session_file(Object *obj, Error **errp) +sev_guest_get_session_file(Object *obj, Error **errp) { - QSevGuestInfo *s = QSEV_GUEST_INFO(obj); + SevGuestState *s = SEV_GUEST(obj); return s->session_file ? g_strdup(s->session_file) : NULL; } static void -qsev_guest_set_session_file(Object *obj, const char *value, Error **errp) +sev_guest_set_session_file(Object *obj, const char *value, Error **errp) { - QSevGuestInfo *s = QSEV_GUEST_INFO(obj); + SevGuestState *s = SEV_GUEST(obj); s->session_file = g_strdup(value); } static char * -qsev_guest_get_dh_cert_file(Object *obj, Error **errp) +sev_guest_get_dh_cert_file(Object *obj, Error **errp) { - QSevGuestInfo *s = QSEV_GUEST_INFO(obj); + SevGuestState *s = SEV_GUEST(obj); return g_strdup(s->dh_cert_file); } static void -qsev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) +sev_guest_set_dh_cert_file(Object *obj, const char *value, Error **errp) { - QSevGuestInfo *s = QSEV_GUEST_INFO(obj); + SevGuestState *s = SEV_GUEST(obj); s->dh_cert_file = g_strdup(value); } static char * -qsev_guest_get_sev_device(Object *obj, Error **errp) +sev_guest_get_sev_device(Object *obj, Error **errp) { - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); + SevGuestState *sev = SEV_GUEST(obj); return g_strdup(sev->sev_device); } static void -qsev_guest_set_sev_device(Object *obj, const char *value, Error **errp) +sev_guest_set_sev_device(Object *obj, const char *value, Error **errp) { - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); + SevGuestState *sev = SEV_GUEST(obj); sev->sev_device = g_strdup(value); } -static void -qsev_guest_class_init(ObjectClass *oc, void *data) -{ - object_class_property_add_str(oc, "sev-device", - qsev_guest_get_sev_device, - qsev_guest_set_sev_device, - NULL); - object_class_property_set_description(oc, "sev-device", - "SEV device to use", NULL); - object_class_property_add_str(oc, "dh-cert-file", - qsev_guest_get_dh_cert_file, - qsev_guest_set_dh_cert_file, - NULL); - object_class_property_set_description(oc, "dh-cert-file", - "guest owners DH certificate (encoded with base64)", NULL); - object_class_property_add_str(oc, "session-file", - qsev_guest_get_session_file, - qsev_guest_set_session_file, - NULL); - object_class_property_set_description(oc, "session-file", - "guest owners session parameters (encoded with base64)", NULL); -} - -static void -qsev_guest_set_handle(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - uint32_t value; - - visit_type_uint32(v, name, &value, errp); - sev->handle = value; -} - -static void -qsev_guest_set_policy(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - uint32_t value; - - visit_type_uint32(v, name, &value, errp); - sev->policy = value; -} - -static void -qsev_guest_set_cbitpos(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static bool sev_guest_get_kernel_hashes(Object *obj, Error **errp) { - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - uint32_t value; + SevGuestState *sev = SEV_GUEST(obj); - visit_type_uint32(v, name, &value, errp); - sev->cbitpos = value; + return sev->kernel_hashes; } -static void -qsev_guest_set_reduced_phys_bits(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +static void sev_guest_set_kernel_hashes(Object *obj, bool value, Error **errp) { - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - uint32_t value; + SevGuestState *sev = SEV_GUEST(obj); - visit_type_uint32(v, name, &value, errp); - sev->reduced_phys_bits = value; + sev->kernel_hashes = value; } static void -qsev_guest_get_policy(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) +sev_guest_class_init(ObjectClass *oc, void *data) { - uint32_t value; - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - - value = sev->policy; - visit_type_uint32(v, name, &value, errp); -} - -static void -qsev_guest_get_handle(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - uint32_t value; - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - - value = sev->handle; - visit_type_uint32(v, name, &value, errp); -} - -static void -qsev_guest_get_cbitpos(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - uint32_t value; - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - - value = sev->cbitpos; - visit_type_uint32(v, name, &value, errp); -} - -static void -qsev_guest_get_reduced_phys_bits(Object *obj, Visitor *v, const char *name, - void *opaque, Error **errp) -{ - uint32_t value; - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); - - value = sev->reduced_phys_bits; - visit_type_uint32(v, name, &value, errp); + object_class_property_add_str(oc, "sev-device", + sev_guest_get_sev_device, + sev_guest_set_sev_device); + object_class_property_set_description(oc, "sev-device", + "SEV device to use"); + object_class_property_add_str(oc, "dh-cert-file", + sev_guest_get_dh_cert_file, + sev_guest_set_dh_cert_file); + object_class_property_set_description(oc, "dh-cert-file", + "guest owners DH certificate (encoded with base64)"); + object_class_property_add_str(oc, "session-file", + sev_guest_get_session_file, + sev_guest_set_session_file); + object_class_property_set_description(oc, "session-file", + "guest owners session parameters (encoded with base64)"); + object_class_property_add_bool(oc, "kernel-hashes", + sev_guest_get_kernel_hashes, + sev_guest_set_kernel_hashes); + object_class_property_set_description(oc, "kernel-hashes", + "add kernel hashes to guest firmware for measured Linux boot"); } static void -qsev_guest_init(Object *obj) +sev_guest_instance_init(Object *obj) { - QSevGuestInfo *sev = QSEV_GUEST_INFO(obj); + SevGuestState *sev = SEV_GUEST(obj); sev->sev_device = g_strdup(DEFAULT_SEV_DEVICE); sev->policy = DEFAULT_GUEST_POLICY; - object_property_add(obj, "policy", "uint32", qsev_guest_get_policy, - qsev_guest_set_policy, NULL, NULL, NULL); - object_property_add(obj, "handle", "uint32", qsev_guest_get_handle, - qsev_guest_set_handle, NULL, NULL, NULL); - object_property_add(obj, "cbitpos", "uint32", qsev_guest_get_cbitpos, - qsev_guest_set_cbitpos, NULL, NULL, NULL); - object_property_add(obj, "reduced-phys-bits", "uint32", - qsev_guest_get_reduced_phys_bits, - qsev_guest_set_reduced_phys_bits, NULL, NULL, NULL); + object_property_add_uint32_ptr(obj, "policy", &sev->policy, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "handle", &sev->handle, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "cbitpos", &sev->cbitpos, + OBJ_PROP_FLAG_READWRITE); + object_property_add_uint32_ptr(obj, "reduced-phys-bits", + &sev->reduced_phys_bits, + OBJ_PROP_FLAG_READWRITE); } /* sev guest info */ -static const TypeInfo qsev_guest_info = { - .parent = TYPE_OBJECT, - .name = TYPE_QSEV_GUEST_INFO, - .instance_size = sizeof(QSevGuestInfo), - .instance_finalize = qsev_guest_finalize, - .class_size = sizeof(QSevGuestInfoClass), - .class_init = qsev_guest_class_init, - .instance_init = qsev_guest_init, +static const TypeInfo sev_guest_info = { + .parent = TYPE_CONFIDENTIAL_GUEST_SUPPORT, + .name = TYPE_SEV_GUEST, + .instance_size = sizeof(SevGuestState), + .instance_finalize = sev_guest_finalize, + .class_init = sev_guest_class_init, + .instance_init = sev_guest_instance_init, .interfaces = (InterfaceInfo[]) { { TYPE_USER_CREATABLE }, { } } }; -static QSevGuestInfo * -lookup_sev_guest_info(const char *id) -{ - Object *obj; - QSevGuestInfo *info; - - obj = object_resolve_path_component(object_get_objects_root(), id); - if (!obj) { - return NULL; - } - - info = (QSevGuestInfo *) - object_dynamic_cast(obj, TYPE_QSEV_GUEST_INFO); - if (!info) { - return NULL; - } - - return info; -} - bool sev_enabled(void) { - return sev_state ? true : false; + return !!sev_guest; } -uint64_t -sev_get_me_mask(void) +bool +sev_es_enabled(void) { - return sev_state ? sev_state->me_mask : ~0; + return sev_enabled() && (sev_guest->policy & SEV_POLICY_ES); } uint32_t sev_get_cbit_position(void) { - return sev_state ? sev_state->cbitpos : 0; + return sev_guest ? sev_guest->cbitpos : 0; } uint32_t sev_get_reduced_phys_bits(void) { - return sev_state ? sev_state->reduced_phys_bits : 0; + return sev_guest ? sev_guest->reduced_phys_bits : 0; } -SevInfo * -sev_get_info(void) +static SevInfo *sev_get_info(void) { SevInfo *info; info = g_new0(SevInfo, 1); - info->enabled = sev_state ? true : false; + info->enabled = sev_enabled(); if (info->enabled) { - info->api_major = sev_state->api_major; - info->api_minor = sev_state->api_minor; - info->build_id = sev_state->build_id; - info->policy = sev_state->policy; - info->state = sev_state->state; - info->handle = sev_state->handle; + info->api_major = sev_guest->api_major; + info->api_minor = sev_guest->api_minor; + info->build_id = sev_guest->build_id; + info->policy = sev_guest->policy; + info->state = sev_guest->state; + info->handle = sev_guest->handle; } return info; } +SevInfo *qmp_query_sev(Error **errp) +{ + SevInfo *info; + + info = sev_get_info(); + if (!info) { + error_setg(errp, "SEV feature is not available"); + return NULL; + } + + return info; +} + +void hmp_info_sev(Monitor *mon, const QDict *qdict) +{ + SevInfo *info = sev_get_info(); + + if (info && info->enabled) { + monitor_printf(mon, "handle: %d\n", info->handle); + monitor_printf(mon, "state: %s\n", SevState_str(info->state)); + monitor_printf(mon, "build: %d\n", info->build_id); + monitor_printf(mon, "api version: %d.%d\n", + info->api_major, info->api_minor); + monitor_printf(mon, "debug: %s\n", + info->policy & SEV_POLICY_NODBG ? "off" : "on"); + monitor_printf(mon, "key-sharing: %s\n", + info->policy & SEV_POLICY_NOKS ? "off" : "on"); + } else { + monitor_printf(mon, "SEV is not enabled\n"); + } + + qapi_free_SevInfo(info); +} + static int sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, - size_t *cert_chain_len) + size_t *cert_chain_len, Error **errp) { guchar *pdh_data = NULL; guchar *cert_chain_data = NULL; @@ -439,8 +500,9 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); if (r < 0) { if (err != SEV_RET_INVALID_LEN) { - error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", - r, err, fw_error_to_str(err)); + error_setg(errp, "SEV: Failed to export PDH cert" + " ret=%d fw_err=%d (%s)", + r, err, fw_error_to_str(err)); return 1; } } @@ -452,8 +514,8 @@ sev_get_pdh_info(int fd, guchar **pdh, size_t *pdh_len, guchar **cert_chain, r = sev_platform_ioctl(fd, SEV_PDH_CERT_EXPORT, &export, &err); if (r < 0) { - error_report("failed to export PDH cert ret=%d fw_err=%d (%s)", - r, err, fw_error_to_str(err)); + error_setg(errp, "SEV: Failed to export PDH cert ret=%d fw_err=%d (%s)", + r, err, fw_error_to_str(err)); goto e_free; } @@ -469,31 +531,78 @@ e_free: return 1; } -SevCapability * -sev_get_capabilities(void) +static int sev_get_cpu0_id(int fd, guchar **id, size_t *id_len, Error **errp) +{ + guchar *id_data; + struct sev_user_data_get_id2 get_id2 = {}; + int err, r; + + /* query the ID length */ + r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err); + if (r < 0 && err != SEV_RET_INVALID_LEN) { + error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)", + r, err, fw_error_to_str(err)); + return 1; + } + + id_data = g_new(guchar, get_id2.length); + get_id2.address = (unsigned long)id_data; + + r = sev_platform_ioctl(fd, SEV_GET_ID2, &get_id2, &err); + if (r < 0) { + error_setg(errp, "SEV: Failed to get ID ret=%d fw_err=%d (%s)", + r, err, fw_error_to_str(err)); + goto err; + } + + *id = id_data; + *id_len = get_id2.length; + return 0; + +err: + g_free(id_data); + return 1; +} + +static SevCapability *sev_get_capabilities(Error **errp) { SevCapability *cap = NULL; guchar *pdh_data = NULL; guchar *cert_chain_data = NULL; - size_t pdh_len = 0, cert_chain_len = 0; + guchar *cpu0_id_data = NULL; + size_t pdh_len = 0, cert_chain_len = 0, cpu0_id_len = 0; uint32_t ebx; int fd; + if (!kvm_enabled()) { + error_setg(errp, "KVM not enabled"); + return NULL; + } + if (kvm_vm_ioctl(kvm_state, KVM_MEMORY_ENCRYPT_OP, NULL) < 0) { + error_setg(errp, "SEV is not enabled in KVM"); + return NULL; + } + fd = open(DEFAULT_SEV_DEVICE, O_RDWR); if (fd < 0) { - error_report("%s: Failed to open %s '%s'", __func__, - DEFAULT_SEV_DEVICE, strerror(errno)); + error_setg_errno(errp, errno, "SEV: Failed to open %s", + DEFAULT_SEV_DEVICE); return NULL; } if (sev_get_pdh_info(fd, &pdh_data, &pdh_len, - &cert_chain_data, &cert_chain_len)) { + &cert_chain_data, &cert_chain_len, errp)) { + goto out; + } + + if (sev_get_cpu0_id(fd, &cpu0_id_data, &cpu0_id_len, errp)) { goto out; } cap = g_new0(SevCapability, 1); cap->pdh = g_base64_encode(pdh_data, pdh_len); cap->cert_chain = g_base64_encode(cert_chain_data, cert_chain_len); + cap->cpu0_id = g_base64_encode(cpu0_id_data, cpu0_id_len); host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); cap->cbitpos = ebx & 0x3f; @@ -505,21 +614,97 @@ sev_get_capabilities(void) cap->reduced_phys_bits = 1; out: + g_free(cpu0_id_data); g_free(pdh_data); g_free(cert_chain_data); close(fd); return cap; } +SevCapability *qmp_query_sev_capabilities(Error **errp) +{ + return sev_get_capabilities(errp); +} + +static SevAttestationReport *sev_get_attestation_report(const char *mnonce, + Error **errp) +{ + struct kvm_sev_attestation_report input = {}; + SevAttestationReport *report = NULL; + SevGuestState *sev = sev_guest; + g_autofree guchar *data = NULL; + g_autofree guchar *buf = NULL; + gsize len; + int err = 0, ret; + + if (!sev_enabled()) { + error_setg(errp, "SEV is not enabled"); + return NULL; + } + + /* lets decode the mnonce string */ + buf = g_base64_decode(mnonce, &len); + if (!buf) { + error_setg(errp, "SEV: failed to decode mnonce input"); + return NULL; + } + + /* verify the input mnonce length */ + if (len != sizeof(input.mnonce)) { + error_setg(errp, "SEV: mnonce must be %zu bytes (got %" G_GSIZE_FORMAT ")", + sizeof(input.mnonce), len); + return NULL; + } + + /* Query the report length */ + ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret < 0) { + if (err != SEV_RET_INVALID_LEN) { + error_setg(errp, "SEV: Failed to query the attestation report" + " length ret=%d fw_err=%d (%s)", + ret, err, fw_error_to_str(err)); + return NULL; + } + } + + data = g_malloc(input.len); + input.uaddr = (unsigned long)data; + memcpy(input.mnonce, buf, sizeof(input.mnonce)); + + /* Query the report */ + ret = sev_ioctl(sev->sev_fd, KVM_SEV_GET_ATTESTATION_REPORT, + &input, &err); + if (ret) { + error_setg_errno(errp, errno, "SEV: Failed to get attestation report" + " ret=%d fw_err=%d (%s)", ret, err, fw_error_to_str(err)); + return NULL; + } + + report = g_new0(SevAttestationReport, 1); + report->data = g_base64_encode(data, input.len); + + trace_kvm_sev_attestation_report(mnonce, report->data); + + return report; +} + +SevAttestationReport *qmp_query_sev_attestation_report(const char *mnonce, + Error **errp) +{ + return sev_get_attestation_report(mnonce, errp); +} + static int sev_read_file_base64(const char *filename, guchar **data, gsize *len) { gsize sz; - gchar *base64; + g_autofree gchar *base64 = NULL; GError *error = NULL; if (!g_file_get_contents(filename, &base64, &sz, &error)) { - error_report("failed to read '%s' (%s)", filename, error->message); + error_report("SEV: Failed to read '%s' (%s)", filename, error->message); + g_error_free(error); return -1; } @@ -528,61 +713,52 @@ sev_read_file_base64(const char *filename, guchar **data, gsize *len) } static int -sev_launch_start(SEVState *s) +sev_launch_start(SevGuestState *sev) { gsize sz; int ret = 1; int fw_error, rc; - QSevGuestInfo *sev = s->sev_info; - struct kvm_sev_launch_start *start; + struct kvm_sev_launch_start start = { + .handle = sev->handle, .policy = sev->policy + }; guchar *session = NULL, *dh_cert = NULL; - start = g_new0(struct kvm_sev_launch_start, 1); - - start->handle = object_property_get_int(OBJECT(sev), "handle", - &error_abort); - start->policy = object_property_get_int(OBJECT(sev), "policy", - &error_abort); if (sev->session_file) { if (sev_read_file_base64(sev->session_file, &session, &sz) < 0) { goto out; } - start->session_uaddr = (unsigned long)session; - start->session_len = sz; + start.session_uaddr = (unsigned long)session; + start.session_len = sz; } if (sev->dh_cert_file) { if (sev_read_file_base64(sev->dh_cert_file, &dh_cert, &sz) < 0) { goto out; } - start->dh_uaddr = (unsigned long)dh_cert; - start->dh_len = sz; + start.dh_uaddr = (unsigned long)dh_cert; + start.dh_len = sz; } - trace_kvm_sev_launch_start(start->policy, session, dh_cert); - rc = sev_ioctl(s->sev_fd, KVM_SEV_LAUNCH_START, start, &fw_error); + trace_kvm_sev_launch_start(start.policy, session, dh_cert); + rc = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_START, &start, &fw_error); if (rc < 0) { error_report("%s: LAUNCH_START ret=%d fw_error=%d '%s'", __func__, ret, fw_error, fw_error_to_str(fw_error)); goto out; } - object_property_set_int(OBJECT(sev), start->handle, "handle", - &error_abort); - sev_set_guest_state(SEV_STATE_LAUNCH_UPDATE); - s->handle = start->handle; - s->policy = start->policy; + sev_set_guest_state(sev, SEV_STATE_LAUNCH_UPDATE); + sev->handle = start.handle; ret = 0; out: - g_free(start); g_free(session); g_free(dh_cert); return ret; } static int -sev_launch_update_data(uint8_t *addr, uint64_t len) +sev_launch_update_data(SevGuestState *sev, uint8_t *addr, uint64_t len) { int ret, fw_error; struct kvm_sev_launch_update_data update; @@ -591,10 +767,10 @@ sev_launch_update_data(uint8_t *addr, uint64_t len) return 1; } - update.uaddr = (__u64)(unsigned long)addr; + update.uaddr = (uintptr_t)addr; update.len = len; trace_kvm_sev_launch_update_data(addr, len); - ret = sev_ioctl(sev_state->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_DATA, &update, &fw_error); if (ret) { error_report("%s: LAUNCH_UPDATE ret=%d fw_error=%d '%s'", @@ -604,212 +780,614 @@ sev_launch_update_data(uint8_t *addr, uint64_t len) return ret; } +static int +sev_launch_update_vmsa(SevGuestState *sev) +{ + int ret, fw_error; + + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL, &fw_error); + if (ret) { + error_report("%s: LAUNCH_UPDATE_VMSA ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); + } + + return ret; +} + static void sev_launch_get_measure(Notifier *notifier, void *unused) { + SevGuestState *sev = sev_guest; int ret, error; - guchar *data; - SEVState *s = sev_state; - struct kvm_sev_launch_measure *measurement; + g_autofree guchar *data = NULL; + struct kvm_sev_launch_measure measurement = {}; - if (!sev_check_state(SEV_STATE_LAUNCH_UPDATE)) { + if (!sev_check_state(sev, SEV_STATE_LAUNCH_UPDATE)) { return; } - measurement = g_new0(struct kvm_sev_launch_measure, 1); + if (sev_es_enabled()) { + /* measure all the VM save areas before getting launch_measure */ + ret = sev_launch_update_vmsa(sev); + if (ret) { + exit(1); + } + } /* query the measurement blob length */ - ret = sev_ioctl(sev_state->sev_fd, KVM_SEV_LAUNCH_MEASURE, - measurement, &error); - if (!measurement->len) { + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); + if (!measurement.len) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(errno)); - goto free_measurement; + return; } - data = g_new0(guchar, measurement->len); - measurement->uaddr = (unsigned long)data; + data = g_new0(guchar, measurement.len); + measurement.uaddr = (unsigned long)data; /* get the measurement blob */ - ret = sev_ioctl(sev_state->sev_fd, KVM_SEV_LAUNCH_MEASURE, - measurement, &error); + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_MEASURE, + &measurement, &error); if (ret) { error_report("%s: LAUNCH_MEASURE ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(errno)); - goto free_data; + return; } - sev_set_guest_state(SEV_STATE_LAUNCH_SECRET); + sev_set_guest_state(sev, SEV_STATE_LAUNCH_SECRET); /* encode the measurement value and emit the event */ - s->measurement = g_base64_encode(data, measurement->len); - trace_kvm_sev_launch_measurement(s->measurement); - -free_data: - g_free(data); -free_measurement: - g_free(measurement); + sev->measurement = g_base64_encode(data, measurement.len); + trace_kvm_sev_launch_measurement(sev->measurement); } -char * -sev_get_launch_measurement(void) +static char *sev_get_launch_measurement(void) { - if (sev_state && - sev_state->state >= SEV_STATE_LAUNCH_SECRET) { - return g_strdup(sev_state->measurement); + if (sev_guest && + sev_guest->state >= SEV_STATE_LAUNCH_SECRET) { + return g_strdup(sev_guest->measurement); } return NULL; } +SevLaunchMeasureInfo *qmp_query_sev_launch_measure(Error **errp) +{ + char *data; + SevLaunchMeasureInfo *info; + + data = sev_get_launch_measurement(); + if (!data) { + error_setg(errp, "SEV launch measurement is not available"); + return NULL; + } + + info = g_malloc0(sizeof(*info)); + info->data = data; + + return info; +} + static Notifier sev_machine_done_notify = { .notify = sev_launch_get_measure, }; static void -sev_launch_finish(SEVState *s) +sev_launch_finish(SevGuestState *sev) { int ret, error; - Error *local_err = NULL; trace_kvm_sev_launch_finish(); - ret = sev_ioctl(sev_state->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); + ret = sev_ioctl(sev->sev_fd, KVM_SEV_LAUNCH_FINISH, 0, &error); if (ret) { error_report("%s: LAUNCH_FINISH ret=%d fw_error=%d '%s'", __func__, ret, error, fw_error_to_str(error)); exit(1); } - sev_set_guest_state(SEV_STATE_RUNNING); + sev_set_guest_state(sev, SEV_STATE_RUNNING); /* add migration blocker */ error_setg(&sev_mig_blocker, "SEV: Migration is not implemented"); - ret = migrate_add_blocker(sev_mig_blocker, &local_err); - if (local_err) { - error_report_err(local_err); - error_free(sev_mig_blocker); - exit(1); - } + migrate_add_blocker(&sev_mig_blocker, &error_fatal); } static void -sev_vm_state_change(void *opaque, int running, RunState state) +sev_vm_state_change(void *opaque, bool running, RunState state) { - SEVState *s = opaque; + SevGuestState *sev = opaque; if (running) { - if (!sev_check_state(SEV_STATE_RUNNING)) { - sev_launch_finish(s); + if (!sev_check_state(sev, SEV_STATE_RUNNING)) { + sev_launch_finish(sev); } } } -void * -sev_guest_init(const char *id) +int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp) { - SEVState *s; + SevGuestState *sev + = (SevGuestState *)object_dynamic_cast(OBJECT(cgs), TYPE_SEV_GUEST); char *devname; - int ret, fw_error; + int ret, fw_error, cmd; uint32_t ebx; uint32_t host_cbitpos; struct sev_user_data_status status = {}; - sev_state = s = g_new0(SEVState, 1); - s->sev_info = lookup_sev_guest_info(id); - if (!s->sev_info) { - error_report("%s: '%s' is not a valid '%s' object", - __func__, id, TYPE_QSEV_GUEST_INFO); - goto err; + if (!sev) { + return 0; + } + + ret = ram_block_discard_disable(true); + if (ret) { + error_report("%s: cannot disable RAM discard", __func__); + return -1; } - s->state = SEV_STATE_UNINIT; + sev_guest = sev; + sev->state = SEV_STATE_UNINIT; host_cpuid(0x8000001F, 0, NULL, &ebx, NULL, NULL); host_cbitpos = ebx & 0x3f; - s->cbitpos = object_property_get_int(OBJECT(s->sev_info), "cbitpos", NULL); - if (host_cbitpos != s->cbitpos) { - error_report("%s: cbitpos check failed, host '%d' requested '%d'", - __func__, host_cbitpos, s->cbitpos); + /* + * The cbitpos value will be placed in bit positions 5:0 of the EBX + * register of CPUID 0x8000001F. No need to verify the range as the + * comparison against the host value accomplishes that. + */ + if (host_cbitpos != sev->cbitpos) { + error_setg(errp, "%s: cbitpos check failed, host '%d' requested '%d'", + __func__, host_cbitpos, sev->cbitpos); goto err; } - s->reduced_phys_bits = object_property_get_int(OBJECT(s->sev_info), - "reduced-phys-bits", NULL); - if (s->reduced_phys_bits < 1) { - error_report("%s: reduced_phys_bits check failed, it should be >=1," - "' requested '%d'", __func__, s->reduced_phys_bits); + /* + * The reduced-phys-bits value will be placed in bit positions 11:6 of + * the EBX register of CPUID 0x8000001F, so verify the supplied value + * is in the range of 1 to 63. + */ + if (sev->reduced_phys_bits < 1 || sev->reduced_phys_bits > 63) { + error_setg(errp, "%s: reduced_phys_bits check failed," + " it should be in the range of 1 to 63, requested '%d'", + __func__, sev->reduced_phys_bits); goto err; } - s->me_mask = ~(1UL << s->cbitpos); - - devname = object_property_get_str(OBJECT(s->sev_info), "sev-device", NULL); - s->sev_fd = open(devname, O_RDWR); - if (s->sev_fd < 0) { - error_report("%s: Failed to open %s '%s'", __func__, - devname, strerror(errno)); - } - g_free(devname); - if (s->sev_fd < 0) { + devname = object_property_get_str(OBJECT(sev), "sev-device", NULL); + sev->sev_fd = open(devname, O_RDWR); + if (sev->sev_fd < 0) { + error_setg(errp, "%s: Failed to open %s '%s'", __func__, + devname, strerror(errno)); + g_free(devname); goto err; } + g_free(devname); - ret = sev_platform_ioctl(s->sev_fd, SEV_PLATFORM_STATUS, &status, + ret = sev_platform_ioctl(sev->sev_fd, SEV_PLATFORM_STATUS, &status, &fw_error); if (ret) { - error_report("%s: failed to get platform status ret=%d" - "fw_error='%d: %s'", __func__, ret, fw_error, - fw_error_to_str(fw_error)); + error_setg(errp, "%s: failed to get platform status ret=%d " + "fw_error='%d: %s'", __func__, ret, fw_error, + fw_error_to_str(fw_error)); goto err; } - s->build_id = status.build; - s->api_major = status.api_major; - s->api_minor = status.api_minor; + sev->build_id = status.build; + sev->api_major = status.api_major; + sev->api_minor = status.api_minor; + + if (sev_es_enabled()) { + if (!kvm_kernel_irqchip_allowed()) { + error_report("%s: SEV-ES guests require in-kernel irqchip support", + __func__); + goto err; + } + + if (!(status.flags & SEV_STATUS_FLAGS_CONFIG_ES)) { + error_report("%s: guest policy requires SEV-ES, but " + "host SEV-ES support unavailable", + __func__); + goto err; + } + cmd = KVM_SEV_ES_INIT; + } else { + cmd = KVM_SEV_INIT; + } trace_kvm_sev_init(); - ret = sev_ioctl(s->sev_fd, KVM_SEV_INIT, NULL, &fw_error); + ret = sev_ioctl(sev->sev_fd, cmd, NULL, &fw_error); if (ret) { - error_report("%s: failed to initialize ret=%d fw_error=%d '%s'", - __func__, ret, fw_error, fw_error_to_str(fw_error)); + error_setg(errp, "%s: failed to initialize ret=%d fw_error=%d '%s'", + __func__, ret, fw_error, fw_error_to_str(fw_error)); goto err; } - ret = sev_launch_start(s); + ret = sev_launch_start(sev); if (ret) { - error_report("%s: failed to create encryption context", __func__); + error_setg(errp, "%s: failed to create encryption context", __func__); goto err; } ram_block_notifier_add(&sev_ram_notifier); qemu_add_machine_init_done_notifier(&sev_machine_done_notify); - qemu_add_vm_change_state_handler(sev_vm_state_change, s); + qemu_add_vm_change_state_handler(sev_vm_state_change, sev); - return s; + cgs->ready = true; + + return 0; err: - g_free(sev_state); - sev_state = NULL; - return NULL; + sev_guest = NULL; + ram_block_discard_disable(false); + return -1; } int -sev_encrypt_data(void *handle, uint8_t *ptr, uint64_t len) +sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp) { - assert(handle); + if (!sev_guest) { + return 0; + } /* if SEV is in update state then encrypt the data else do nothing */ - if (sev_check_state(SEV_STATE_LAUNCH_UPDATE)) { - return sev_launch_update_data(ptr, len); + if (sev_check_state(sev_guest, SEV_STATE_LAUNCH_UPDATE)) { + int ret = sev_launch_update_data(sev_guest, ptr, len); + if (ret < 0) { + error_setg(errp, "SEV: Failed to encrypt pflash rom"); + return ret; + } + } + + return 0; +} + +int sev_inject_launch_secret(const char *packet_hdr, const char *secret, + uint64_t gpa, Error **errp) +{ + ERRP_GUARD(); + struct kvm_sev_launch_secret input; + g_autofree guchar *data = NULL, *hdr = NULL; + int error, ret = 1; + void *hva; + gsize hdr_sz = 0, data_sz = 0; + MemoryRegion *mr = NULL; + + if (!sev_guest) { + error_setg(errp, "SEV not enabled for guest"); + return 1; } + /* secret can be injected only in this state */ + if (!sev_check_state(sev_guest, SEV_STATE_LAUNCH_SECRET)) { + error_setg(errp, "SEV: Not in correct state. (LSECRET) %x", + sev_guest->state); + return 1; + } + + hdr = g_base64_decode(packet_hdr, &hdr_sz); + if (!hdr || !hdr_sz) { + error_setg(errp, "SEV: Failed to decode sequence header"); + return 1; + } + + data = g_base64_decode(secret, &data_sz); + if (!data || !data_sz) { + error_setg(errp, "SEV: Failed to decode data"); + return 1; + } + + hva = gpa2hva(&mr, gpa, data_sz, errp); + if (!hva) { + error_prepend(errp, "SEV: Failed to calculate guest address: "); + return 1; + } + + input.hdr_uaddr = (uint64_t)(unsigned long)hdr; + input.hdr_len = hdr_sz; + + input.trans_uaddr = (uint64_t)(unsigned long)data; + input.trans_len = data_sz; + + input.guest_uaddr = (uint64_t)(unsigned long)hva; + input.guest_len = data_sz; + + trace_kvm_sev_launch_secret(gpa, input.guest_uaddr, + input.trans_uaddr, input.trans_len); + + ret = sev_ioctl(sev_guest->sev_fd, KVM_SEV_LAUNCH_SECRET, + &input, &error); + if (ret) { + error_setg(errp, "SEV: failed to inject secret ret=%d fw_error=%d '%s'", + ret, error, fw_error_to_str(error)); + return ret; + } + + return 0; +} + +#define SEV_SECRET_GUID "4c2eb361-7d9b-4cc3-8081-127c90d3d294" +struct sev_secret_area { + uint32_t base; + uint32_t size; +}; + +void qmp_sev_inject_launch_secret(const char *packet_hdr, + const char *secret, + bool has_gpa, uint64_t gpa, + Error **errp) +{ + if (!sev_enabled()) { + error_setg(errp, "SEV not enabled for guest"); + return; + } + if (!has_gpa) { + uint8_t *data; + struct sev_secret_area *area; + + if (!pc_system_ovmf_table_find(SEV_SECRET_GUID, &data, NULL)) { + error_setg(errp, "SEV: no secret area found in OVMF," + " gpa must be specified."); + return; + } + area = (struct sev_secret_area *)data; + gpa = area->base; + } + + sev_inject_launch_secret(packet_hdr, secret, gpa, errp); +} + +static int +sev_es_parse_reset_block(SevInfoBlock *info, uint32_t *addr) +{ + if (!info->reset_addr) { + error_report("SEV-ES reset address is zero"); + return 1; + } + + *addr = info->reset_addr; + return 0; } +static int +sev_es_find_reset_vector(void *flash_ptr, uint64_t flash_size, + uint32_t *addr) +{ + QemuUUID info_guid, *guid; + SevInfoBlock *info; + uint8_t *data; + uint16_t *len; + + /* + * Initialize the address to zero. An address of zero with a successful + * return code indicates that SEV-ES is not active. + */ + *addr = 0; + + /* + * Extract the AP reset vector for SEV-ES guests by locating the SEV GUID. + * The SEV GUID is located on its own (original implementation) or within + * the Firmware GUID Table (new implementation), either of which are + * located 32 bytes from the end of the flash. + * + * Check the Firmware GUID Table first. + */ + if (pc_system_ovmf_table_find(SEV_INFO_BLOCK_GUID, &data, NULL)) { + return sev_es_parse_reset_block((SevInfoBlock *)data, addr); + } + + /* + * SEV info block not found in the Firmware GUID Table (or there isn't + * a Firmware GUID Table), fall back to the original implementation. + */ + data = flash_ptr + flash_size - 0x20; + + qemu_uuid_parse(SEV_INFO_BLOCK_GUID, &info_guid); + info_guid = qemu_uuid_bswap(info_guid); /* GUIDs are LE */ + + guid = (QemuUUID *)(data - sizeof(info_guid)); + if (!qemu_uuid_is_equal(guid, &info_guid)) { + error_report("SEV information block/Firmware GUID Table block not found in pflash rom"); + return 1; + } + + len = (uint16_t *)((uint8_t *)guid - sizeof(*len)); + info = (SevInfoBlock *)(data - le16_to_cpu(*len)); + + return sev_es_parse_reset_block(info, addr); +} + +void sev_es_set_reset_vector(CPUState *cpu) +{ + X86CPU *x86; + CPUX86State *env; + + /* Only update if we have valid reset information */ + if (!sev_guest || !sev_guest->reset_data_valid) { + return; + } + + /* Do not update the BSP reset state */ + if (cpu->cpu_index == 0) { + return; + } + + x86 = X86_CPU(cpu); + env = &x86->env; + + cpu_x86_load_seg_cache(env, R_CS, 0xf000, sev_guest->reset_cs, 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_CS_MASK | + DESC_R_MASK | DESC_A_MASK); + + env->eip = sev_guest->reset_ip; +} + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size) +{ + CPUState *cpu; + uint32_t addr; + int ret; + + if (!sev_es_enabled()) { + return 0; + } + + addr = 0; + ret = sev_es_find_reset_vector(flash_ptr, flash_size, + &addr); + if (ret) { + return ret; + } + + if (addr) { + sev_guest->reset_cs = addr & 0xffff0000; + sev_guest->reset_ip = addr & 0x0000ffff; + sev_guest->reset_data_valid = true; + + CPU_FOREACH(cpu) { + sev_es_set_reset_vector(cpu); + } + } + + return 0; +} + +static const QemuUUID sev_hash_table_header_guid = { + .data = UUID_LE(0x9438d606, 0x4f22, 0x4cc9, 0xb4, 0x79, 0xa7, 0x93, + 0xd4, 0x11, 0xfd, 0x21) +}; + +static const QemuUUID sev_kernel_entry_guid = { + .data = UUID_LE(0x4de79437, 0xabd2, 0x427f, 0xb8, 0x35, 0xd5, 0xb1, + 0x72, 0xd2, 0x04, 0x5b) +}; +static const QemuUUID sev_initrd_entry_guid = { + .data = UUID_LE(0x44baf731, 0x3a2f, 0x4bd7, 0x9a, 0xf1, 0x41, 0xe2, + 0x91, 0x69, 0x78, 0x1d) +}; +static const QemuUUID sev_cmdline_entry_guid = { + .data = UUID_LE(0x97d02dd8, 0xbd20, 0x4c94, 0xaa, 0x78, 0xe7, 0x71, + 0x4d, 0x36, 0xab, 0x2a) +}; + +/* + * Add the hashes of the linux kernel/initrd/cmdline to an encrypted guest page + * which is included in SEV's initial memory measurement. + */ +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp) +{ + uint8_t *data; + SevHashTableDescriptor *area; + SevHashTable *ht; + PaddedSevHashTable *padded_ht; + uint8_t cmdline_hash[HASH_SIZE]; + uint8_t initrd_hash[HASH_SIZE]; + uint8_t kernel_hash[HASH_SIZE]; + uint8_t *hashp; + size_t hash_len = HASH_SIZE; + hwaddr mapped_len = sizeof(*padded_ht); + MemTxAttrs attrs = { 0 }; + bool ret = true; + + /* + * Only add the kernel hashes if the sev-guest configuration explicitly + * stated kernel-hashes=on. + */ + if (!sev_guest->kernel_hashes) { + return false; + } + + if (!pc_system_ovmf_table_find(SEV_HASH_TABLE_RV_GUID, &data, NULL)) { + error_setg(errp, "SEV: kernel specified but guest firmware " + "has no hashes table GUID"); + return false; + } + area = (SevHashTableDescriptor *)data; + if (!area->base || area->size < sizeof(PaddedSevHashTable)) { + error_setg(errp, "SEV: guest firmware hashes table area is invalid " + "(base=0x%x size=0x%x)", area->base, area->size); + return false; + } + + /* + * Calculate hash of kernel command-line with the terminating null byte. If + * the user doesn't supply a command-line via -append, the 1-byte "\0" will + * be used. + */ + hashp = cmdline_hash; + if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->cmdline_data, + ctx->cmdline_size, &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* + * Calculate hash of initrd. If the user doesn't supply an initrd via + * -initrd, an empty buffer will be used (ctx->initrd_size == 0). + */ + hashp = initrd_hash; + if (qcrypto_hash_bytes(QCRYPTO_HASH_ALG_SHA256, ctx->initrd_data, + ctx->initrd_size, &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* Calculate hash of the kernel */ + hashp = kernel_hash; + struct iovec iov[2] = { + { .iov_base = ctx->setup_data, .iov_len = ctx->setup_size }, + { .iov_base = ctx->kernel_data, .iov_len = ctx->kernel_size } + }; + if (qcrypto_hash_bytesv(QCRYPTO_HASH_ALG_SHA256, iov, ARRAY_SIZE(iov), + &hashp, &hash_len, errp) < 0) { + return false; + } + assert(hash_len == HASH_SIZE); + + /* + * Populate the hashes table in the guest's memory at the OVMF-designated + * area for the SEV hashes table + */ + padded_ht = address_space_map(&address_space_memory, area->base, + &mapped_len, true, attrs); + if (!padded_ht || mapped_len != sizeof(*padded_ht)) { + error_setg(errp, "SEV: cannot map hashes table guest memory area"); + return false; + } + ht = &padded_ht->ht; + + ht->guid = sev_hash_table_header_guid; + ht->len = sizeof(*ht); + + ht->cmdline.guid = sev_cmdline_entry_guid; + ht->cmdline.len = sizeof(ht->cmdline); + memcpy(ht->cmdline.hash, cmdline_hash, sizeof(ht->cmdline.hash)); + + ht->initrd.guid = sev_initrd_entry_guid; + ht->initrd.len = sizeof(ht->initrd); + memcpy(ht->initrd.hash, initrd_hash, sizeof(ht->initrd.hash)); + + ht->kernel.guid = sev_kernel_entry_guid; + ht->kernel.len = sizeof(ht->kernel); + memcpy(ht->kernel.hash, kernel_hash, sizeof(ht->kernel.hash)); + + /* zero the excess data so the measurement can be reliably calculated */ + memset(padded_ht->padding, 0, sizeof(padded_ht->padding)); + + if (sev_encrypt_flash((uint8_t *)padded_ht, sizeof(*padded_ht), errp) < 0) { + ret = false; + } + + address_space_unmap(&address_space_memory, padded_ht, + mapped_len, true, mapped_len); + + return ret; +} + static void sev_register_types(void) { - type_register_static(&qsev_guest_info); + type_register_static(&sev_guest_info); } type_init(sev_register_types); diff --git a/target/i386/sev.h b/target/i386/sev.h new file mode 100644 index 0000000000..e7499c95b1 --- /dev/null +++ b/target/i386/sev.h @@ -0,0 +1,62 @@ +/* + * QEMU Secure Encrypted Virutualization (SEV) support + * + * Copyright: Advanced Micro Devices, 2016-2018 + * + * Authors: + * Brijesh Singh <brijesh.singh@amd.com> + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#ifndef I386_SEV_H +#define I386_SEV_H + +#ifndef CONFIG_USER_ONLY +#include CONFIG_DEVICES /* CONFIG_SEV */ +#endif + +#include "exec/confidential-guest-support.h" + +#define SEV_POLICY_NODBG 0x1 +#define SEV_POLICY_NOKS 0x2 +#define SEV_POLICY_ES 0x4 +#define SEV_POLICY_NOSEND 0x8 +#define SEV_POLICY_DOMAIN 0x10 +#define SEV_POLICY_SEV 0x20 + +typedef struct SevKernelLoaderContext { + char *setup_data; + size_t setup_size; + char *kernel_data; + size_t kernel_size; + char *initrd_data; + size_t initrd_size; + char *cmdline_data; + size_t cmdline_size; +} SevKernelLoaderContext; + +#ifdef CONFIG_SEV +bool sev_enabled(void); +bool sev_es_enabled(void); +#else +#define sev_enabled() 0 +#define sev_es_enabled() 0 +#endif + +uint32_t sev_get_cbit_position(void); +uint32_t sev_get_reduced_phys_bits(void); +bool sev_add_kernel_loader_hashes(SevKernelLoaderContext *ctx, Error **errp); + +int sev_encrypt_flash(uint8_t *ptr, uint64_t len, Error **errp); +int sev_inject_launch_secret(const char *hdr, const char *secret, + uint64_t gpa, Error **errp); + +int sev_es_save_reset_vector(void *flash_ptr, uint64_t flash_size); +void sev_es_set_reset_vector(CPUState *cpu); + +int sev_kvm_init(ConfidentialGuestSupport *cgs, Error **errp); + +#endif diff --git a/target/i386/sev_i386.h b/target/i386/sev_i386.h deleted file mode 100644 index b8622dfb1e..0000000000 --- a/target/i386/sev_i386.h +++ /dev/null @@ -1,88 +0,0 @@ -/* - * QEMU Secure Encrypted Virutualization (SEV) support - * - * Copyright: Advanced Micro Devices, 2016-2018 - * - * Authors: - * Brijesh Singh <brijesh.singh@amd.com> - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#ifndef QEMU_SEV_I386_H -#define QEMU_SEV_I386_H - -#include "qom/object.h" -#include "qapi/error.h" -#include "sysemu/kvm.h" -#include "sysemu/sev.h" -#include "qemu/error-report.h" -#include "qapi/qapi-commands-misc.h" - -#define SEV_POLICY_NODBG 0x1 -#define SEV_POLICY_NOKS 0x2 -#define SEV_POLICY_ES 0x4 -#define SEV_POLICY_NOSEND 0x8 -#define SEV_POLICY_DOMAIN 0x10 -#define SEV_POLICY_SEV 0x20 - -#define TYPE_QSEV_GUEST_INFO "sev-guest" -#define QSEV_GUEST_INFO(obj) \ - OBJECT_CHECK(QSevGuestInfo, (obj), TYPE_QSEV_GUEST_INFO) - -extern bool sev_enabled(void); -extern uint64_t sev_get_me_mask(void); -extern SevInfo *sev_get_info(void); -extern uint32_t sev_get_cbit_position(void); -extern uint32_t sev_get_reduced_phys_bits(void); -extern char *sev_get_launch_measurement(void); -extern SevCapability *sev_get_capabilities(void); - -typedef struct QSevGuestInfo QSevGuestInfo; -typedef struct QSevGuestInfoClass QSevGuestInfoClass; - -/** - * QSevGuestInfo: - * - * The QSevGuestInfo object is used for creating a SEV guest. - * - * # $QEMU \ - * -object sev-guest,id=sev0 \ - * -machine ...,memory-encryption=sev0 - */ -struct QSevGuestInfo { - Object parent_obj; - - char *sev_device; - uint32_t policy; - uint32_t handle; - char *dh_cert_file; - char *session_file; - uint32_t cbitpos; - uint32_t reduced_phys_bits; -}; - -struct QSevGuestInfoClass { - ObjectClass parent_class; -}; - -struct SEVState { - QSevGuestInfo *sev_info; - uint8_t api_major; - uint8_t api_minor; - uint8_t build_id; - uint32_t policy; - uint64_t me_mask; - uint32_t cbitpos; - uint32_t reduced_phys_bits; - uint32_t handle; - int sev_fd; - SevState state; - gchar *measurement; -}; - -typedef struct SEVState SEVState; - -#endif diff --git a/target/i386/svm.h b/target/i386/svm.h index 23a3a040b8..1bd7844730 100644 --- a/target/i386/svm.h +++ b/target/i386/svm.h @@ -9,6 +9,12 @@ #define V_IRQ_SHIFT 8 #define V_IRQ_MASK (1 << V_IRQ_SHIFT) +#define V_GIF_ENABLED_SHIFT 25 +#define V_GIF_ENABLED_MASK (1 << V_GIF_ENABLED_SHIFT) + +#define V_GIF_SHIFT 9 +#define V_GIF_MASK (1 << V_GIF_SHIFT) + #define V_INTR_PRIO_SHIFT 16 #define V_INTR_PRIO_MASK (0x0f << V_INTR_PRIO_SHIFT) @@ -18,6 +24,8 @@ #define V_INTR_MASKING_SHIFT 24 #define V_INTR_MASKING_MASK (1 << V_INTR_MASKING_SHIFT) +#define V_VMLOAD_VMSAVE_ENABLED_MASK (1 << 1) + #define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_IOIO_STR_SHIFT 2 @@ -124,6 +132,7 @@ /* only included in documentation, maybe wrong */ #define SVM_EXIT_MONITOR 0x08a #define SVM_EXIT_MWAIT 0x08b +#define SVM_EXIT_XSETBV 0x08d #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_ERR -1 @@ -132,18 +141,14 @@ #define SVM_NPT_ENABLED (1 << 0) -#define SVM_NPT_PAE (1 << 0) -#define SVM_NPT_LMA (1 << 1) -#define SVM_NPT_NXE (1 << 2) - -#define SVM_NPTEXIT_P (1ULL << 0) -#define SVM_NPTEXIT_RW (1ULL << 1) -#define SVM_NPTEXIT_US (1ULL << 2) -#define SVM_NPTEXIT_RSVD (1ULL << 3) -#define SVM_NPTEXIT_ID (1ULL << 4) #define SVM_NPTEXIT_GPA (1ULL << 32) #define SVM_NPTEXIT_GPT (1ULL << 33) +#define SVM_CR0_RESERVED_MASK 0xffffffff00000000U + +#define SVM_MSRPM_SIZE (1ULL << 13) +#define SVM_IOPM_SIZE ((1ULL << 13) + 1) + struct QEMU_PACKED vmcb_control_area { uint16_t intercept_cr_read; uint16_t intercept_cr_write; diff --git a/target/i386/tcg/bpt_helper.c b/target/i386/tcg/bpt_helper.c new file mode 100644 index 0000000000..bc34ac27fe --- /dev/null +++ b/target/i386/tcg/bpt_helper.c @@ -0,0 +1,39 @@ +/* + * i386 breakpoint helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "helper-tcg.h" + +G_NORETURN void helper_single_step(CPUX86State *env) +{ +#ifndef CONFIG_USER_ONLY + check_hw_breakpoints(env, true); + env->dr[6] |= DR6_BS; +#endif + raise_exception(env, EXCP01_DB); +} + +void helper_rechecking_single_step(CPUX86State *env) +{ + if ((env->eflags & TF_MASK) != 0) { + helper_single_step(env); + } +} diff --git a/target/i386/cc_helper.c b/target/i386/tcg/cc_helper.c index c9c90e10db..f76e9cb8cf 100644 --- a/target/i386/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,6 +20,7 @@ #include "qemu/osdep.h" #include "cpu.h" #include "exec/helper-proto.h" +#include "helper-tcg.h" const uint8_t parity_table[256] = { CC_P, 0, 0, CC_P, 0, CC_P, CC_P, 0, @@ -57,21 +58,21 @@ const uint8_t parity_table[256] = { }; #define SHIFT 0 -#include "cc_helper_template.h" +#include "cc_helper_template.h.inc" #undef SHIFT #define SHIFT 1 -#include "cc_helper_template.h" +#include "cc_helper_template.h.inc" #undef SHIFT #define SHIFT 2 -#include "cc_helper_template.h" +#include "cc_helper_template.h.inc" #undef SHIFT #ifdef TARGET_X86_64 #define SHIFT 3 -#include "cc_helper_template.h" +#include "cc_helper_template.h.inc" #undef SHIFT #endif @@ -219,9 +220,9 @@ target_ulong helper_cc_compute_all(target_ulong dst, target_ulong src1, } } -uint32_t cpu_cc_compute_all(CPUX86State *env, int op) +uint32_t cpu_cc_compute_all(CPUX86State *env) { - return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, op); + return helper_cc_compute_all(CC_DST, CC_SRC, CC_SRC2, CC_OP); } target_ulong helper_cc_compute_c(target_ulong dst, target_ulong src1, @@ -334,7 +335,7 @@ target_ulong helper_read_eflags(CPUX86State *env) { uint32_t eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); eflags |= (env->df & DF_MASK); eflags |= env->eflags & ~(VM_MASK | RF_MASK); return eflags; @@ -345,44 +346,3 @@ void helper_clts(CPUX86State *env) env->cr[0] &= ~CR0_TS_MASK; env->hflags &= ~HF_TS_MASK; } - -void helper_reset_rf(CPUX86State *env) -{ - env->eflags &= ~RF_MASK; -} - -void helper_cli(CPUX86State *env) -{ - env->eflags &= ~IF_MASK; -} - -void helper_sti(CPUX86State *env) -{ - env->eflags |= IF_MASK; -} - -void helper_clac(CPUX86State *env) -{ - env->eflags &= ~AC_MASK; -} - -void helper_stac(CPUX86State *env) -{ - env->eflags |= AC_MASK; -} - -#if 0 -/* vm86plus instructions */ -void helper_cli_vm(CPUX86State *env) -{ - env->eflags &= ~VIF_MASK; -} - -void helper_sti_vm(CPUX86State *env) -{ - env->eflags |= VIF_MASK; - if (env->eflags & VIP_MASK) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } -} -#endif diff --git a/target/i386/cc_helper_template.h b/target/i386/tcg/cc_helper_template.h.inc index 607311f195..bb611feb04 100644 --- a/target/i386/cc_helper_template.h +++ b/target/i386/tcg/cc_helper_template.h.inc @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/target/i386/tcg/decode-new.c.inc b/target/i386/tcg/decode-new.c.inc new file mode 100644 index 0000000000..426c459412 --- /dev/null +++ b/target/i386/tcg/decode-new.c.inc @@ -0,0 +1,2024 @@ +/* + * New-style decoder for i386 instructions + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +/* + * The decoder is mostly based on tables copied from the Intel SDM. As + * a result, most operand load and writeback is done entirely in common + * table-driven code using the same operand type (X86_TYPE_*) and + * size (X86_SIZE_*) codes used in the manual. There are a few differences + * though. + * + * Operand sizes + * ------------- + * + * The manual lists d64 ("cannot encode 32-bit size in 64-bit mode") and f64 + * ("cannot encode 16-bit or 32-bit size in 64-bit mode") as modifiers of the + * "v" or "z" sizes. The decoder simply makes them separate operand sizes. + * + * Vector operands + * --------------- + * + * The main difference is that the V, U and W types are extended to + * cover MMX as well; if an instruction is like + * + * por Pq, Qq + * 66 por Vx, Hx, Wx + * + * only the second row is included and the instruction is marked as a + * valid MMX instruction. The MMX flag directs the decoder to rewrite + * the V/U/H/W types to P/N/P/Q if there is no prefix, as well as changing + * "x" to "q" if there is no prefix. + * + * In addition, the ss/ps/sd/pd types are sometimes mushed together as "x" + * if the difference is expressed via prefixes. Individual instructions + * are separated by prefix in the generator functions. + * + * There is a custom size "xh" used to address half of a SSE/AVX operand. + * This points to a 64-bit operand for SSE operations, 128-bit operand + * for 256-bit AVX operands, etc. It is used for conversion operations + * such as VCVTPH2PS or VCVTSS2SD. + * + * There are a couple cases in which instructions (e.g. MOVD) write the + * whole XMM or MM register but are established incorrectly in the manual + * as "d" or "q". These have to be fixed for the decoder to work correctly. + * + * VEX exception classes + * --------------------- + * + * Speaking about imprecisions in the manual, the decoder treats all + * exception-class 4 instructions as having an optional VEX prefix, and + * all exception-class 6 instructions as having a mandatory VEX prefix. + * This is true except for a dozen instructions; these are in exception + * class 4 but do not ignore the VEX.W bit (which does not even exist + * without a VEX prefix). These instructions are mostly listed in Intel's + * table 2-16, but with a few exceptions. + * + * The AMD manual has more precise subclasses for exceptions, and unlike Intel + * they list the VEX.W requirements in the exception classes as well (except + * when they don't). AMD describes class 6 as "AVX Mixed Memory Argument" + * without defining what a mixed memory argument is, but still use 4 as the + * primary exception class... except when they don't. + * + * The summary is: + * Intel AMD VEX.W note + * ------------------------------------------------------------------- + * vpblendd 4 4J 0 + * vpblendvb 4 4E-X 0 (*) + * vpbroadcastq 6 6D 0 (+) + * vpermd/vpermps 4 4H 0 (§) + * vpermq/vpermpd 4 4H-1 1 (§) + * vpermilpd/vpermilps 4 6E 0 (^) + * vpmaskmovd 6 4K significant (^) + * vpsllv 4 4K significant + * vpsrav 4 4J 0 + * vpsrlv 4 4K significant + * vtestps/vtestpd 4 4G 0 + * + * (*) AMD lists VPBLENDVB as related to SSE4.1 PBLENDVB, which may + * explain why it is considered exception class 4. However, + * Intel says that VEX-only instructions should be in class 6... + * + * (+) Not found in Intel's table 2-16 + * + * (§) 4H and 4H-1 do not mention VEX.W requirements, which are + * however present in the description of the instruction + * + * (^) these are the two cases in which Intel and AMD disagree on the + * primary exception class + */ + +#define X86_OP_NONE { 0 }, + +#define X86_OP_GROUP3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ + .decode = glue(decode_, op), \ + .op0 = glue(X86_TYPE_, op0_), \ + .s0 = glue(X86_SIZE_, s0_), \ + .op1 = glue(X86_TYPE_, op1_), \ + .s1 = glue(X86_SIZE_, s1_), \ + .op2 = glue(X86_TYPE_, op2_), \ + .s2 = glue(X86_SIZE_, s2_), \ + .is_decode = true, \ + ## __VA_ARGS__ \ +} + +#define X86_OP_GROUP2(op, op0, s0, op1, s1, ...) \ + X86_OP_GROUP3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_GROUP0(op, ...) \ + X86_OP_GROUP3(op, None, None, None, None, None, None, ## __VA_ARGS__) + +#define X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) { \ + .gen = glue(gen_, op), \ + .op0 = glue(X86_TYPE_, op0_), \ + .s0 = glue(X86_SIZE_, s0_), \ + .op1 = glue(X86_TYPE_, op1_), \ + .s1 = glue(X86_SIZE_, s1_), \ + .op2 = glue(X86_TYPE_, op2_), \ + .s2 = glue(X86_SIZE_, s2_), \ + ## __VA_ARGS__ \ +} + +#define X86_OP_ENTRY4(op, op0_, s0_, op1_, s1_, op2_, s2_, ...) \ + X86_OP_ENTRY3(op, op0_, s0_, op1_, s1_, op2_, s2_, \ + .op3 = X86_TYPE_I, .s3 = X86_SIZE_b, \ + ## __VA_ARGS__) + +#define X86_OP_ENTRY2(op, op0, s0, op1, s1, ...) \ + X86_OP_ENTRY3(op, op0, s0, 2op, s0, op1, s1, ## __VA_ARGS__) +#define X86_OP_ENTRYw(op, op0, s0, ...) \ + X86_OP_ENTRY3(op, op0, s0, None, None, None, None, ## __VA_ARGS__) +#define X86_OP_ENTRYr(op, op0, s0, ...) \ + X86_OP_ENTRY3(op, None, None, None, None, op0, s0, ## __VA_ARGS__) +#define X86_OP_ENTRY0(op, ...) \ + X86_OP_ENTRY3(op, None, None, None, None, None, None, ## __VA_ARGS__) + +#define cpuid(feat) .cpuid = X86_FEAT_##feat, +#define xchg .special = X86_SPECIAL_Locked, +#define lock .special = X86_SPECIAL_HasLock, +#define mmx .special = X86_SPECIAL_MMX, +#define op0_Rd .special = X86_SPECIAL_Op0_Rd, +#define op2_Ry .special = X86_SPECIAL_Op2_Ry, +#define avx_movx .special = X86_SPECIAL_AVXExtMov, +#define sextT0 .special = X86_SPECIAL_SExtT0, +#define zextT0 .special = X86_SPECIAL_ZExtT0, + +#define vex1 .vex_class = 1, +#define vex1_rep3 .vex_class = 1, .vex_special = X86_VEX_REPScalar, +#define vex2 .vex_class = 2, +#define vex2_rep3 .vex_class = 2, .vex_special = X86_VEX_REPScalar, +#define vex3 .vex_class = 3, +#define vex4 .vex_class = 4, +#define vex4_unal .vex_class = 4, .vex_special = X86_VEX_SSEUnaligned, +#define vex4_rep5 .vex_class = 4, .vex_special = X86_VEX_REPScalar, +#define vex5 .vex_class = 5, +#define vex6 .vex_class = 6, +#define vex7 .vex_class = 7, +#define vex8 .vex_class = 8, +#define vex11 .vex_class = 11, +#define vex12 .vex_class = 12, +#define vex13 .vex_class = 13, + +#define chk(a) .check = X86_CHECK_##a, +#define svm(a) .intercept = SVM_EXIT_##a, + +#define avx2_256 .vex_special = X86_VEX_AVX2_256, + +#define P_00 1 +#define P_66 (1 << PREFIX_DATA) +#define P_F3 (1 << PREFIX_REPZ) +#define P_F2 (1 << PREFIX_REPNZ) + +#define p_00 .valid_prefix = P_00, +#define p_66 .valid_prefix = P_66, +#define p_f3 .valid_prefix = P_F3, +#define p_f2 .valid_prefix = P_F2, +#define p_00_66 .valid_prefix = P_00 | P_66, +#define p_00_f3 .valid_prefix = P_00 | P_F3, +#define p_66_f2 .valid_prefix = P_66 | P_F2, +#define p_00_66_f3 .valid_prefix = P_00 | P_66 | P_F3, +#define p_66_f3_f2 .valid_prefix = P_66 | P_F3 | P_F2, +#define p_00_66_f3_f2 .valid_prefix = P_00 | P_66 | P_F3 | P_F2, + +static uint8_t get_modrm(DisasContext *s, CPUX86State *env) +{ + if (!s->has_modrm) { + s->modrm = x86_ldub_code(env, s); + s->has_modrm = true; + } + return s->modrm; +} + +static inline const X86OpEntry *decode_by_prefix(DisasContext *s, const X86OpEntry entries[4]) +{ + if (s->prefix & PREFIX_REPNZ) { + return &entries[3]; + } else if (s->prefix & PREFIX_REPZ) { + return &entries[2]; + } else if (s->prefix & PREFIX_DATA) { + return &entries[1]; + } else { + return &entries[0]; + } +} + +static void decode_group15(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + /* only includes ldmxcsr and stmxcsr, because they have AVX variants. */ + static const X86OpEntry group15_reg[8] = { + }; + + static const X86OpEntry group15_mem[8] = { + [2] = X86_OP_ENTRYr(LDMXCSR, E,d, vex5 chk(VEX128)), + [3] = X86_OP_ENTRYw(STMXCSR, E,d, vex5 chk(VEX128)), + }; + + uint8_t modrm = get_modrm(s, env); + if ((modrm >> 6) == 3) { + *entry = group15_reg[(modrm >> 3) & 7]; + } else { + *entry = group15_mem[(modrm >> 3) & 7]; + } +} + +static void decode_group17(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86GenFunc group17_gen[8] = { + NULL, gen_BLSR, gen_BLSMSK, gen_BLSI, + }; + int op = (get_modrm(s, env) >> 3) & 7; + entry->gen = group17_gen[op]; +} + +static void decode_group12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group12[8] = { + {}, + {}, + X86_OP_ENTRY3(PSRLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSRAW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSLLW_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + }; + + int op = (get_modrm(s, env) >> 3) & 7; + *entry = opcodes_group12[op]; +} + +static void decode_group13(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group13[8] = { + {}, + {}, + X86_OP_ENTRY3(PSRLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSRAD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + X86_OP_ENTRY3(PSLLD_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + {}, + }; + + int op = (get_modrm(s, env) >> 3) & 7; + *entry = opcodes_group13[op]; +} + +static void decode_group14(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_group14[8] = { + /* grp14 */ + {}, + {}, + X86_OP_ENTRY3(PSRLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + X86_OP_ENTRY3(PSRLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), + {}, + {}, + X86_OP_ENTRY3(PSLLQ_i, H,x, U,x, I,b, vex7 mmx avx2_256 p_00_66), + X86_OP_ENTRY3(PSLLDQ_i, H,x, U,x, I,b, vex7 avx2_256 p_66), + }; + + int op = (get_modrm(s, env) >> 3) & 7; + *entry = opcodes_group14[op]; +} + +static void decode_0F6F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F6F[4] = { + X86_OP_ENTRY3(MOVDQ, P,q, None,None, Q,q, vex5 mmx), /* movq */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1), /* movdqa */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* movdqu */ + {}, + }; + *entry = *decode_by_prefix(s, opcodes_0F6F); +} + +static void decode_0F70(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry pshufw[4] = { + X86_OP_ENTRY3(PSHUFW, P,q, Q,q, I,b, vex4 mmx), + X86_OP_ENTRY3(PSHUFD, V,x, W,x, I,b, vex4 avx2_256), + X86_OP_ENTRY3(PSHUFHW, V,x, W,x, I,b, vex4 avx2_256), + X86_OP_ENTRY3(PSHUFLW, V,x, W,x, I,b, vex4 avx2_256), + }; + + *entry = *decode_by_prefix(s, pshufw); +} + +static void decode_0F77(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + if (!(s->prefix & PREFIX_VEX)) { + entry->gen = gen_EMMS; + } else if (!s->vex_l) { + entry->gen = gen_VZEROUPPER; + entry->vex_class = 8; + } else { + entry->gen = gen_VZEROALL; + entry->vex_class = 8; + } +} + +static void decode_0F78(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F78[4] = { + {}, + X86_OP_ENTRY3(EXTRQ_i, V,x, None,None, I,w, cpuid(SSE4A)), /* AMD extension */ + {}, + X86_OP_ENTRY3(INSERTQ_i, V,x, U,x, I,w, cpuid(SSE4A)), /* AMD extension */ + }; + *entry = *decode_by_prefix(s, opcodes_0F78); +} + +static void decode_0F79(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + if (s->prefix & PREFIX_REPNZ) { + entry->gen = gen_INSERTQ_r; /* AMD extension */ + } else if (s->prefix & PREFIX_DATA) { + entry->gen = gen_EXTRQ_r; /* AMD extension */ + } else { + entry->gen = NULL; + }; +} + +static void decode_0F7E(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F7E[4] = { + X86_OP_ENTRY3(MOVD_from, E,y, None,None, P,y, vex5 mmx), + X86_OP_ENTRY3(MOVD_from, E,y, None,None, V,y, vex5), + X86_OP_ENTRY3(MOVQ, V,x, None,None, W,q, vex5), /* wrong dest Vy on SDM! */ + {}, + }; + *entry = *decode_by_prefix(s, opcodes_0F7E); +} + +static void decode_0F7F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F7F[4] = { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex5 mmx), /* movq */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1), /* movdqa */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4_unal), /* movdqu */ + {}, + }; + *entry = *decode_by_prefix(s, opcodes_0F7F); +} + +static void decode_0FD6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry movq[4] = { + {}, + X86_OP_ENTRY3(MOVQ, W,x, None, None, V,q, vex5), + X86_OP_ENTRY3(MOVq_dq, V,dq, None, None, N,q), + X86_OP_ENTRY3(MOVq_dq, P,q, None, None, U,q), + }; + + *entry = *decode_by_prefix(s, movq); +} + +static const X86OpEntry opcodes_0F38_00toEF[240] = { + [0x00] = X86_OP_ENTRY3(PSHUFB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x01] = X86_OP_ENTRY3(PHADDW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x02] = X86_OP_ENTRY3(PHADDD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x03] = X86_OP_ENTRY3(PHADDSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x04] = X86_OP_ENTRY3(PMADDUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x05] = X86_OP_ENTRY3(PHSUBW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x06] = X86_OP_ENTRY3(PHSUBD, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x07] = X86_OP_ENTRY3(PHSUBSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + + [0x10] = X86_OP_ENTRY2(PBLENDVB, V,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x13] = X86_OP_ENTRY2(VCVTPH2PS, V,x, W,xh, vex11 chk(W0) cpuid(F16C) p_66), + [0x14] = X86_OP_ENTRY2(BLENDVPS, V,x, W,x, vex4 cpuid(SSE41) p_66), + [0x15] = X86_OP_ENTRY2(BLENDVPD, V,x, W,x, vex4 cpuid(SSE41) p_66), + /* Listed incorrectly as type 4 */ + [0x16] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), /* vpermps */ + [0x17] = X86_OP_ENTRY3(VPTEST, None,None, V,x, W,x, vex4 cpuid(SSE41) p_66), + + /* + * Source operand listed as Mq/Ux and similar in the manual; incorrectly listed + * as 128-bit only in 2-17. + */ + [0x20] = X86_OP_ENTRY3(VPMOVSXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x21] = X86_OP_ENTRY3(VPMOVSXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x22] = X86_OP_ENTRY3(VPMOVSXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x23] = X86_OP_ENTRY3(VPMOVSXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x24] = X86_OP_ENTRY3(VPMOVSXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x25] = X86_OP_ENTRY3(VPMOVSXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + + /* Same as PMOVSX. */ + [0x30] = X86_OP_ENTRY3(VPMOVZXBW, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x31] = X86_OP_ENTRY3(VPMOVZXBD, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x32] = X86_OP_ENTRY3(VPMOVZXBQ, V,x, None,None, W,w, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x33] = X86_OP_ENTRY3(VPMOVZXWD, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x34] = X86_OP_ENTRY3(VPMOVZXWQ, V,x, None,None, W,d, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x35] = X86_OP_ENTRY3(VPMOVZXDQ, V,x, None,None, W,q, vex5 cpuid(SSE41) avx_movx avx2_256 p_66), + [0x36] = X86_OP_ENTRY3(VPERMD, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x37] = X86_OP_ENTRY3(PCMPGTQ, V,x, H,x, W,x, vex4 cpuid(SSE42) avx2_256 p_66), + + [0x40] = X86_OP_ENTRY3(PMULLD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x41] = X86_OP_ENTRY3(VPHMINPOSUW, V,dq, None,None, W,dq, vex4 cpuid(SSE41) p_66), + /* Listed incorrectly as type 4 */ + [0x45] = X86_OP_ENTRY3(VPSRLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66), + [0x46] = X86_OP_ENTRY3(VPSRAV, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66), + [0x47] = X86_OP_ENTRY3(VPSLLV, V,x, H,x, W,x, vex6 cpuid(AVX2) p_66), + + [0x90] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vpgatherdd/q */ + [0x91] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vpgatherqd/q */ + [0x92] = X86_OP_ENTRY3(VPGATHERD, V,x, H,x, M,d, vex12 cpuid(AVX2) p_66), /* vgatherdps/d */ + [0x93] = X86_OP_ENTRY3(VPGATHERQ, V,x, H,x, M,q, vex12 cpuid(AVX2) p_66), /* vgatherqps/d */ + + /* Should be exception type 2 but they do not have legacy SSE equivalents? */ + [0x96] = X86_OP_ENTRY3(VFMADDSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x97] = X86_OP_ENTRY3(VFMSUBADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xa6] = X86_OP_ENTRY3(VFMADDSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xa7] = X86_OP_ENTRY3(VFMSUBADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xb6] = X86_OP_ENTRY3(VFMADDSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xb7] = X86_OP_ENTRY3(VFMSUBADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0x08] = X86_OP_ENTRY3(PSIGNB, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x09] = X86_OP_ENTRY3(PSIGNW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x0a] = X86_OP_ENTRY3(PSIGND, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x0b] = X86_OP_ENTRY3(PMULHRSW, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + /* Listed incorrectly as type 4 */ + [0x0c] = X86_OP_ENTRY3(VPERMILPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_00_66), + [0x0d] = X86_OP_ENTRY3(VPERMILPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x0e] = X86_OP_ENTRY3(VTESTPS, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x0f] = X86_OP_ENTRY3(VTESTPD, None,None, V,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), + + [0x18] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastss */ + [0x19] = X86_OP_ENTRY3(VPBROADCASTQ, V,qq, None,None, W,q, vex6 chk(W0) cpuid(AVX) p_66), /* vbroadcastsd */ + [0x1a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX) p_66), + [0x1c] = X86_OP_ENTRY3(PABSB, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x1d] = X86_OP_ENTRY3(PABSW, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + [0x1e] = X86_OP_ENTRY3(PABSD, V,x, None,None, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + + [0x28] = X86_OP_ENTRY3(PMULDQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x29] = X86_OP_ENTRY3(PCMPEQQ, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x2a] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex1 cpuid(SSE41) avx2_256 p_66), /* movntdqa */ + [0x2b] = X86_OP_ENTRY3(VPACKUSDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x2c] = X86_OP_ENTRY3(VMASKMOVPS, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x2d] = X86_OP_ENTRY3(VMASKMOVPD, V,x, H,x, WM,x, vex6 chk(W0) cpuid(AVX) p_66), + /* Incorrectly listed as Mx,Hx,Vx in the manual */ + [0x2e] = X86_OP_ENTRY3(VMASKMOVPS_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x2f] = X86_OP_ENTRY3(VMASKMOVPD_st, M,x, V,x, H,x, vex6 chk(W0) cpuid(AVX) p_66), + + [0x38] = X86_OP_ENTRY3(PMINSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x39] = X86_OP_ENTRY3(PMINSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3a] = X86_OP_ENTRY3(PMINUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3b] = X86_OP_ENTRY3(PMINUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3c] = X86_OP_ENTRY3(PMAXSB, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3d] = X86_OP_ENTRY3(PMAXSD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3e] = X86_OP_ENTRY3(PMAXUW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x3f] = X86_OP_ENTRY3(PMAXUD, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + + /* VPBROADCASTQ not listed as W0 in table 2-16 */ + [0x58] = X86_OP_ENTRY3(VPBROADCASTD, V,x, None,None, W,d, vex6 chk(W0) cpuid(AVX2) p_66), + [0x59] = X86_OP_ENTRY3(VPBROADCASTQ, V,x, None,None, W,q, vex6 chk(W0) cpuid(AVX2) p_66), + [0x5a] = X86_OP_ENTRY3(VBROADCASTx128, V,qq, None,None, WM,dq,vex6 chk(W0) cpuid(AVX2) p_66), + + [0x78] = X86_OP_ENTRY3(VPBROADCASTB, V,x, None,None, W,b, vex6 chk(W0) cpuid(AVX2) p_66), + [0x79] = X86_OP_ENTRY3(VPBROADCASTW, V,x, None,None, W,w, vex6 chk(W0) cpuid(AVX2) p_66), + + [0x8c] = X86_OP_ENTRY3(VPMASKMOV, V,x, H,x, WM,x, vex6 cpuid(AVX2) p_66), + [0x8e] = X86_OP_ENTRY3(VPMASKMOV_st, M,x, V,x, H,x, vex6 cpuid(AVX2) p_66), + + /* Should be exception type 2 or 3 but they do not have legacy SSE equivalents? */ + [0x98] = X86_OP_ENTRY3(VFMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x99] = X86_OP_ENTRY3(VFMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9a] = X86_OP_ENTRY3(VFMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9b] = X86_OP_ENTRY3(VFMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9c] = X86_OP_ENTRY3(VFNMADD132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9d] = X86_OP_ENTRY3(VFNMADD132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9e] = X86_OP_ENTRY3(VFNMSUB132Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0x9f] = X86_OP_ENTRY3(VFNMSUB132Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xa8] = X86_OP_ENTRY3(VFMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xa9] = X86_OP_ENTRY3(VFMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xaa] = X86_OP_ENTRY3(VFMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xab] = X86_OP_ENTRY3(VFMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xac] = X86_OP_ENTRY3(VFNMADD213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xad] = X86_OP_ENTRY3(VFNMADD213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xae] = X86_OP_ENTRY3(VFNMSUB213Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xaf] = X86_OP_ENTRY3(VFNMSUB213Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xb8] = X86_OP_ENTRY3(VFMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xb9] = X86_OP_ENTRY3(VFMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xba] = X86_OP_ENTRY3(VFMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbb] = X86_OP_ENTRY3(VFMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbc] = X86_OP_ENTRY3(VFNMADD231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbd] = X86_OP_ENTRY3(VFNMADD231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbe] = X86_OP_ENTRY3(VFNMSUB231Px, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + [0xbf] = X86_OP_ENTRY3(VFNMSUB231Sx, V,x, H,x, W,x, vex6 cpuid(FMA) p_66), + + [0xc8] = X86_OP_ENTRY2(SHA1NEXTE, V,dq, W,dq, cpuid(SHA_NI)), + [0xc9] = X86_OP_ENTRY2(SHA1MSG1, V,dq, W,dq, cpuid(SHA_NI)), + [0xca] = X86_OP_ENTRY2(SHA1MSG2, V,dq, W,dq, cpuid(SHA_NI)), + [0xcb] = X86_OP_ENTRY2(SHA256RNDS2, V,dq, W,dq, cpuid(SHA_NI)), + [0xcc] = X86_OP_ENTRY2(SHA256MSG1, V,dq, W,dq, cpuid(SHA_NI)), + [0xcd] = X86_OP_ENTRY2(SHA256MSG2, V,dq, W,dq, cpuid(SHA_NI)), + + [0xdb] = X86_OP_ENTRY3(VAESIMC, V,dq, None,None, W,dq, vex4 cpuid(AES) p_66), + [0xdc] = X86_OP_ENTRY3(VAESENC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + [0xdd] = X86_OP_ENTRY3(VAESENCLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + [0xde] = X86_OP_ENTRY3(VAESDEC, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + [0xdf] = X86_OP_ENTRY3(VAESDECLAST, V,x, H,x, W,x, vex4 cpuid(AES) p_66), + + /* + * REG selects srcdest2 operand, VEX.vvvv selects src3. VEX class not found + * in manual, assumed to be 13 from the VEX.L0 constraint. + */ + [0xe0] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe1] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe2] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe3] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe4] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe5] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe6] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe7] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + + [0xe8] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xe9] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xea] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xeb] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xec] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xed] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xee] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), + [0xef] = X86_OP_ENTRY3(CMPccXADD, M,y, G,y, B,y, vex13 xchg chk(o64) cpuid(CMPCCXADD) p_66), +}; + +/* five rows for no prefix, 66, F3, F2, 66+F2 */ +static const X86OpEntry opcodes_0F38_F0toFF[16][5] = { + [0] = { + X86_OP_ENTRY3(MOVBE, G,y, M,y, None,None, cpuid(MOVBE)), + X86_OP_ENTRY3(MOVBE, G,w, M,w, None,None, cpuid(MOVBE)), + {}, + X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), + X86_OP_ENTRY2(CRC32, G,d, E,b, cpuid(SSE42)), + }, + [1] = { + X86_OP_ENTRY3(MOVBE, M,y, G,y, None,None, cpuid(MOVBE)), + X86_OP_ENTRY3(MOVBE, M,w, G,w, None,None, cpuid(MOVBE)), + {}, + X86_OP_ENTRY2(CRC32, G,d, E,y, cpuid(SSE42)), + X86_OP_ENTRY2(CRC32, G,d, E,w, cpuid(SSE42)), + }, + [2] = { + X86_OP_ENTRY3(ANDN, G,y, B,y, E,y, vex13 cpuid(BMI1)), + {}, + {}, + {}, + {}, + }, + [3] = { + X86_OP_GROUP3(group17, B,y, E,y, None,None, vex13 cpuid(BMI1)), + {}, + {}, + {}, + {}, + }, + [5] = { + X86_OP_ENTRY3(BZHI, G,y, E,y, B,y, vex13 cpuid(BMI1)), + {}, + X86_OP_ENTRY3(PEXT, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), + X86_OP_ENTRY3(PDEP, G,y, B,y, E,y, vex13 zextT0 cpuid(BMI2)), + {}, + }, + [6] = { + {}, + X86_OP_ENTRY2(ADCX, G,y, E,y, cpuid(ADX)), + X86_OP_ENTRY2(ADOX, G,y, E,y, cpuid(ADX)), + X86_OP_ENTRY3(MULX, /* B,y, */ G,y, E,y, 2,y, vex13 cpuid(BMI2)), + {}, + }, + [7] = { + X86_OP_ENTRY3(BEXTR, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), + X86_OP_ENTRY3(SHLX, G,y, E,y, B,y, vex13 cpuid(BMI1)), + X86_OP_ENTRY3(SARX, G,y, E,y, B,y, vex13 sextT0 cpuid(BMI1)), + X86_OP_ENTRY3(SHRX, G,y, E,y, B,y, vex13 zextT0 cpuid(BMI1)), + {}, + }, +}; + +static void decode_0F38(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + *b = x86_ldub_code(env, s); + if (*b < 0xf0) { + *entry = opcodes_0F38_00toEF[*b]; + } else { + int row = 0; + if (s->prefix & PREFIX_REPZ) { + /* The REPZ (F3) prefix has priority over 66 */ + row = 2; + } else { + row += s->prefix & PREFIX_REPNZ ? 3 : 0; + row += s->prefix & PREFIX_DATA ? 1 : 0; + } + *entry = opcodes_0F38_F0toFF[*b & 15][row]; + } +} + +static void decode_VINSERTPS(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry + vinsertps_reg = X86_OP_ENTRY4(VINSERTPS_r, V,dq, H,dq, U,dq, vex5 cpuid(SSE41) p_66), + vinsertps_mem = X86_OP_ENTRY4(VINSERTPS_m, V,dq, H,dq, M,d, vex5 cpuid(SSE41) p_66); + + int modrm = get_modrm(s, env); + *entry = (modrm >> 6) == 3 ? vinsertps_reg : vinsertps_mem; +} + +static const X86OpEntry opcodes_0F3A[256] = { + /* + * These are VEX-only, but incorrectly listed in the manual as exception type 4. + * Also the "qq" instructions are sometimes omitted by Table 2-17, but are VEX256 + * only. + */ + [0x00] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66), + [0x01] = X86_OP_ENTRY3(VPERMQ, V,qq, W,qq, I,b, vex6 chk(W1) cpuid(AVX2) p_66), /* VPERMPD */ + [0x02] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX2) p_66), /* VPBLENDD */ + [0x04] = X86_OP_ENTRY3(VPERMILPS_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), + [0x05] = X86_OP_ENTRY3(VPERMILPD_i, V,x, W,x, I,b, vex6 chk(W0) cpuid(AVX) p_66), + [0x06] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), + + [0x14] = X86_OP_ENTRY3(PEXTRB, E,b, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), + [0x15] = X86_OP_ENTRY3(PEXTRW, E,w, V,dq, I,b, vex5 cpuid(SSE41) op0_Rd p_66), + [0x16] = X86_OP_ENTRY3(PEXTR, E,y, V,dq, I,b, vex5 cpuid(SSE41) p_66), + [0x17] = X86_OP_ENTRY3(VEXTRACTPS, E,d, V,dq, I,b, vex5 cpuid(SSE41) p_66), + [0x1d] = X86_OP_ENTRY3(VCVTPS2PH, W,xh, V,x, I,b, vex11 chk(W0) cpuid(F16C) p_66), + + [0x20] = X86_OP_ENTRY4(PINSRB, V,dq, H,dq, E,b, vex5 cpuid(SSE41) op2_Ry p_66), + [0x21] = X86_OP_GROUP0(VINSERTPS), + [0x22] = X86_OP_ENTRY4(PINSR, V,dq, H,dq, E,y, vex5 cpuid(SSE41) p_66), + + [0x40] = X86_OP_ENTRY4(VDDPS, V,x, H,x, W,x, vex2 cpuid(SSE41) p_66), + [0x41] = X86_OP_ENTRY4(VDDPD, V,dq, H,dq, W,dq, vex2 cpuid(SSE41) p_66), + [0x42] = X86_OP_ENTRY4(VMPSADBW, V,x, H,x, W,x, vex2 cpuid(SSE41) avx2_256 p_66), + [0x44] = X86_OP_ENTRY4(PCLMULQDQ, V,dq, H,dq, W,dq, vex4 cpuid(PCLMULQDQ) p_66), + [0x46] = X86_OP_ENTRY4(VPERM2x128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + + [0x60] = X86_OP_ENTRY4(PCMPESTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), + [0x61] = X86_OP_ENTRY4(PCMPESTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), + [0x62] = X86_OP_ENTRY4(PCMPISTRM, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), + [0x63] = X86_OP_ENTRY4(PCMPISTRI, None,None, V,dq, W,dq, vex4_unal cpuid(SSE42) p_66), + + [0x08] = X86_OP_ENTRY3(VROUNDPS, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66), + [0x09] = X86_OP_ENTRY3(VROUNDPD, V,x, W,x, I,b, vex2 cpuid(SSE41) p_66), + /* + * Not listed as four operand in the manual. Also writes and reads 128-bits + * from the first two operands due to the V operand picking higher entries of + * the H operand; the "Vss,Hss,Wss" description from the manual is incorrect. + * For other unary operations such as VSQRTSx this is hidden by the "REPScalar" + * value of vex_special, because the table lists the operand types of VSQRTPx. + */ + [0x0a] = X86_OP_ENTRY4(VROUNDSS, V,x, H,x, W,ss, vex3 cpuid(SSE41) p_66), + [0x0b] = X86_OP_ENTRY4(VROUNDSD, V,x, H,x, W,sd, vex3 cpuid(SSE41) p_66), + [0x0c] = X86_OP_ENTRY4(VBLENDPS, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66), + [0x0d] = X86_OP_ENTRY4(VBLENDPD, V,x, H,x, W,x, vex4 cpuid(SSE41) p_66), + [0x0e] = X86_OP_ENTRY4(VPBLENDW, V,x, H,x, W,x, vex4 cpuid(SSE41) avx2_256 p_66), + [0x0f] = X86_OP_ENTRY4(PALIGNR, V,x, H,x, W,x, vex4 cpuid(SSSE3) mmx avx2_256 p_00_66), + + [0x18] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX) p_66), + [0x19] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX) p_66), + + [0x38] = X86_OP_ENTRY4(VINSERTx128, V,qq, H,qq, W,qq, vex6 chk(W0) cpuid(AVX2) p_66), + [0x39] = X86_OP_ENTRY3(VEXTRACTx128, W,dq, V,qq, I,b, vex6 chk(W0) cpuid(AVX2) p_66), + + /* Listed incorrectly as type 4 */ + [0x4a] = X86_OP_ENTRY4(VBLENDVPS, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x4b] = X86_OP_ENTRY4(VBLENDVPD, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66), + [0x4c] = X86_OP_ENTRY4(VPBLENDVB, V,x, H,x, W,x, vex6 chk(W0) cpuid(AVX) p_66 avx2_256), + + [0xcc] = X86_OP_ENTRY3(SHA1RNDS4, V,dq, W,dq, I,b, cpuid(SHA_NI)), + + [0xdf] = X86_OP_ENTRY3(VAESKEYGEN, V,dq, W,dq, I,b, vex4 cpuid(AES) p_66), + + [0xF0] = X86_OP_ENTRY3(RORX, G,y, E,y, I,b, vex13 cpuid(BMI2) p_f2), +}; + +static void decode_0F3A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + *b = x86_ldub_code(env, s); + *entry = opcodes_0F3A[*b]; +} + +/* + * There are some mistakes in the operands in the manual, and the load/store/register + * cases are easiest to keep separate, so the entries for 10-17 follow simplicity and + * efficiency of implementation rather than copying what the manual says. + * + * In particular: + * + * 1) "VMOVSS m32, xmm1" and "VMOVSD m64, xmm1" do not support VEX.vvvv != 1111b, + * but this is not mentioned in the tables. + * + * 2) MOVHLPS, MOVHPS, MOVHPD, MOVLPD, MOVLPS read the high quadword of one of their + * operands, which must therefore be dq; MOVLPD and MOVLPS also write the high + * quadword of the V operand. + */ +static void decode_0F10(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F10_reg[4] = { + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */ + X86_OP_ENTRY3(VMOVSS, V,x, H,x, W,x, vex5), + X86_OP_ENTRY3(VMOVLPx, V,x, H,x, W,x, vex5), /* MOVSD */ + }; + + static const X86OpEntry opcodes_0F10_mem[4] = { + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPS */ + X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex4_unal), /* MOVUPD */ + X86_OP_ENTRY3(VMOVSS_ld, V,x, H,x, M,ss, vex5), + X86_OP_ENTRY3(VMOVSD_ld, V,x, H,x, M,sd, vex5), + }; + + if ((get_modrm(s, env) >> 6) == 3) { + *entry = *decode_by_prefix(s, opcodes_0F10_reg); + } else { + *entry = *decode_by_prefix(s, opcodes_0F10_mem); + } +} + +static void decode_0F11(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F11_reg[4] = { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */ + X86_OP_ENTRY3(VMOVSS, W,x, H,x, V,x, vex5), + X86_OP_ENTRY3(VMOVLPx, W,x, H,x, V,q, vex5), /* MOVSD */ + }; + + static const X86OpEntry opcodes_0F11_mem[4] = { + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPS */ + X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex4), /* MOVUPD */ + X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex5), + X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex5), /* MOVSD */ + }; + + if ((get_modrm(s, env) >> 6) == 3) { + *entry = *decode_by_prefix(s, opcodes_0F11_reg); + } else { + *entry = *decode_by_prefix(s, opcodes_0F11_mem); + } +} + +static void decode_0F12(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F12_mem[4] = { + /* + * Use dq for operand for compatibility with gen_MOVSD and + * to allow VEX128 only. + */ + X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPS */ + X86_OP_ENTRY3(VMOVLPx_ld, V,dq, H,dq, M,q, vex5), /* MOVLPD */ + X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), + X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, WM,q, vex5 cpuid(SSE3)), /* qq if VEX.256 */ + }; + static const X86OpEntry opcodes_0F12_reg[4] = { + X86_OP_ENTRY3(VMOVHLPS, V,dq, H,dq, U,dq, vex7), + X86_OP_ENTRY3(VMOVLPx, W,x, H,x, U,q, vex5), /* MOVLPD */ + X86_OP_ENTRY3(VMOVSLDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), + X86_OP_ENTRY3(VMOVDDUP, V,x, None,None, U,x, vex5 cpuid(SSE3)), + }; + + if ((get_modrm(s, env) >> 6) == 3) { + *entry = *decode_by_prefix(s, opcodes_0F12_reg); + } else { + *entry = *decode_by_prefix(s, opcodes_0F12_mem); + if ((s->prefix & PREFIX_REPNZ) && s->vex_l) { + entry->s2 = X86_SIZE_qq; + } + } +} + +static void decode_0F16(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F16_mem[4] = { + /* + * Operand 1 technically only reads the low 64 bits, but uses dq so that + * it is easier to check for op0 == op1 in an endianness-neutral manner. + */ + X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPS */ + X86_OP_ENTRY3(VMOVHPx_ld, V,dq, H,dq, M,q, vex5), /* MOVHPD */ + X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, W,x, vex4 cpuid(SSE3)), + {}, + }; + static const X86OpEntry opcodes_0F16_reg[4] = { + /* Same as above, operand 1 could be Hq if it wasn't for big-endian. */ + X86_OP_ENTRY3(VMOVLHPS, V,dq, H,dq, U,q, vex7), + X86_OP_ENTRY3(VMOVHPx, V,x, H,x, U,x, vex5), /* MOVHPD */ + X86_OP_ENTRY3(VMOVSHDUP, V,x, None,None, U,x, vex4 cpuid(SSE3)), + {}, + }; + + if ((get_modrm(s, env) >> 6) == 3) { + *entry = *decode_by_prefix(s, opcodes_0F16_reg); + } else { + *entry = *decode_by_prefix(s, opcodes_0F16_mem); + } +} + +static void decode_0F2A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2A[4] = { + X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), + X86_OP_ENTRY3(CVTPI2Px, V,x, None,None, Q,q), + X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), + X86_OP_ENTRY3(VCVTSI2Sx, V,x, H,x, E,y, vex3), + }; + *entry = *decode_by_prefix(s, opcodes_0F2A); +} + +static void decode_0F2B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2B[4] = { + X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPS */ + X86_OP_ENTRY3(MOVDQ, M,x, None,None, V,x, vex1), /* MOVNTPD */ + /* AMD extensions */ + X86_OP_ENTRY3(VMOVSS_st, M,ss, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSS */ + X86_OP_ENTRY3(VMOVLPx_st, M,sd, None,None, V,x, vex4 cpuid(SSE4A)), /* MOVNTSD */ + }; + + *entry = *decode_by_prefix(s, opcodes_0F2B); +} + +static void decode_0F2C(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2C[4] = { + /* Listed as ps/pd in the manual, but CVTTPS2PI only reads 64-bit. */ + X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,q), + X86_OP_ENTRY3(CVTTPx2PI, P,q, None,None, W,dq), + X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,ss, vex3), + X86_OP_ENTRY3(VCVTTSx2SI, G,y, None,None, W,sd, vex3), + }; + *entry = *decode_by_prefix(s, opcodes_0F2C); +} + +static void decode_0F2D(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F2D[4] = { + /* Listed as ps/pd in the manual, but CVTPS2PI only reads 64-bit. */ + X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,q), + X86_OP_ENTRY3(CVTPx2PI, P,q, None,None, W,dq), + X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,ss, vex3), + X86_OP_ENTRY3(VCVTSx2SI, G,y, None,None, W,sd, vex3), + }; + *entry = *decode_by_prefix(s, opcodes_0F2D); +} + +static void decode_VxCOMISx(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + /* + * VUCOMISx and VCOMISx are different and use no-prefix and 0x66 for SS and SD + * respectively. Scalar values usually are associated with 0xF2 and 0xF3, for + * which X86_VEX_REPScalar exists, but here it has to be decoded by hand. + */ + entry->s1 = entry->s2 = (s->prefix & PREFIX_DATA ? X86_SIZE_sd : X86_SIZE_ss); + entry->gen = (*b == 0x2E ? gen_VUCOMI : gen_VCOMI); +} + +static void decode_sse_unary(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + if (!(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ))) { + entry->op1 = X86_TYPE_None; + entry->s1 = X86_SIZE_None; + } + switch (*b) { + case 0x51: entry->gen = gen_VSQRT; break; + case 0x52: entry->gen = gen_VRSQRT; break; + case 0x53: entry->gen = gen_VRCP; break; + } +} + +static void decode_0F5A(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F5A[4] = { + X86_OP_ENTRY2(VCVTPS2PD, V,x, W,xh, vex2), /* VCVTPS2PD */ + X86_OP_ENTRY2(VCVTPD2PS, V,x, W,x, vex2), /* VCVTPD2PS */ + X86_OP_ENTRY3(VCVTSS2SD, V,x, H,x, W,x, vex2_rep3), /* VCVTSS2SD */ + X86_OP_ENTRY3(VCVTSD2SS, V,x, H,x, W,x, vex2_rep3), /* VCVTSD2SS */ + }; + *entry = *decode_by_prefix(s, opcodes_0F5A); +} + +static void decode_0F5B(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0F5B[4] = { + X86_OP_ENTRY2(VCVTDQ2PS, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTPS2DQ, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTTPS2DQ, V,x, W,x, vex2), + {}, + }; + *entry = *decode_by_prefix(s, opcodes_0F5B); +} + +static void decode_0FE6(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + static const X86OpEntry opcodes_0FE6[4] = { + {}, + X86_OP_ENTRY2(VCVTTPD2DQ, V,x, W,x, vex2), + X86_OP_ENTRY2(VCVTDQ2PD, V,x, W,x, vex5), + X86_OP_ENTRY2(VCVTPD2DQ, V,x, W,x, vex2), + }; + *entry = *decode_by_prefix(s, opcodes_0FE6); +} + +static const X86OpEntry opcodes_0F[256] = { + [0x0E] = X86_OP_ENTRY0(EMMS, cpuid(3DNOW)), /* femms */ + /* + * 3DNow!'s opcode byte comes *after* modrm and displacements, making it + * more like an Ib operand. Dispatch to the right helper in a single gen_* + * function. + */ + [0x0F] = X86_OP_ENTRY3(3dnow, P,q, Q,q, I,b, cpuid(3DNOW)), + + [0x10] = X86_OP_GROUP0(0F10), + [0x11] = X86_OP_GROUP0(0F11), + [0x12] = X86_OP_GROUP0(0F12), + [0x13] = X86_OP_ENTRY3(VMOVLPx_st, M,q, None,None, V,q, vex5 p_00_66), + [0x14] = X86_OP_ENTRY3(VUNPCKLPx, V,x, H,x, W,x, vex4 p_00_66), + [0x15] = X86_OP_ENTRY3(VUNPCKHPx, V,x, H,x, W,x, vex4 p_00_66), + [0x16] = X86_OP_GROUP0(0F16), + /* Incorrectly listed as Mq,Vq in the manual */ + [0x17] = X86_OP_ENTRY3(VMOVHPx_st, M,q, None,None, V,dq, vex5 p_00_66), + + [0x50] = X86_OP_ENTRY3(MOVMSK, G,y, None,None, U,x, vex7 p_00_66), + [0x51] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), /* sqrtps */ + [0x52] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rsqrtps */ + [0x53] = X86_OP_GROUP3(sse_unary, V,x, H,x, W,x, vex4_rep5 p_00_f3), /* rcpps */ + [0x54] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 p_00_66), /* vand */ + [0x55] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 p_00_66), /* vandn */ + [0x56] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 p_00_66), /* vor */ + [0x57] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 p_00_66), /* vxor */ + + [0x60] = X86_OP_ENTRY3(PUNPCKLBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x61] = X86_OP_ENTRY3(PUNPCKLWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x62] = X86_OP_ENTRY3(PUNPCKLDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x63] = X86_OP_ENTRY3(PACKSSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x64] = X86_OP_ENTRY3(PCMPGTB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x65] = X86_OP_ENTRY3(PCMPGTW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x66] = X86_OP_ENTRY3(PCMPGTD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x67] = X86_OP_ENTRY3(PACKUSWB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + + [0x70] = X86_OP_GROUP0(0F70), + [0x71] = X86_OP_GROUP0(group12), + [0x72] = X86_OP_GROUP0(group13), + [0x73] = X86_OP_GROUP0(group14), + [0x74] = X86_OP_ENTRY3(PCMPEQB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x75] = X86_OP_ENTRY3(PCMPEQW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x76] = X86_OP_ENTRY3(PCMPEQD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x77] = X86_OP_GROUP0(0F77), + + [0x28] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, W,x, vex1 p_00_66), /* MOVAPS */ + [0x29] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 p_00_66), /* MOVAPS */ + [0x2A] = X86_OP_GROUP0(0F2A), + [0x2B] = X86_OP_GROUP0(0F2B), + [0x2C] = X86_OP_GROUP0(0F2C), + [0x2D] = X86_OP_GROUP0(0F2D), + [0x2E] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VUCOMISS/SD */ + [0x2F] = X86_OP_GROUP3(VxCOMISx, None,None, V,x, W,x, vex3 p_00_66), /* VCOMISS/SD */ + + [0x38] = X86_OP_GROUP0(0F38), + [0x3a] = X86_OP_GROUP0(0F3A), + + [0x58] = X86_OP_ENTRY3(VADD, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x59] = X86_OP_ENTRY3(VMUL, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5a] = X86_OP_GROUP0(0F5A), + [0x5b] = X86_OP_GROUP0(0F5B), + [0x5c] = X86_OP_ENTRY3(VSUB, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5d] = X86_OP_ENTRY3(VMIN, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5e] = X86_OP_ENTRY3(VDIV, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0x5f] = X86_OP_ENTRY3(VMAX, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + + [0x68] = X86_OP_ENTRY3(PUNPCKHBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x69] = X86_OP_ENTRY3(PUNPCKHWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x6a] = X86_OP_ENTRY3(PUNPCKHDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x6b] = X86_OP_ENTRY3(PACKSSDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0x6c] = X86_OP_ENTRY3(PUNPCKLQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256), + [0x6d] = X86_OP_ENTRY3(PUNPCKHQDQ, V,x, H,x, W,x, vex4 p_66 avx2_256), + [0x6e] = X86_OP_ENTRY3(MOVD_to, V,x, None,None, E,y, vex5 mmx p_00_66), /* wrong dest Vy on SDM! */ + [0x6f] = X86_OP_GROUP0(0F6F), + + [0x78] = X86_OP_GROUP0(0F78), + [0x79] = X86_OP_GROUP2(0F79, V,x, U,x, cpuid(SSE4A)), + [0x7c] = X86_OP_ENTRY3(VHADD, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), + [0x7d] = X86_OP_ENTRY3(VHSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), + [0x7e] = X86_OP_GROUP0(0F7E), + [0x7f] = X86_OP_GROUP0(0F7F), + + [0xae] = X86_OP_GROUP0(group15), + + [0xc2] = X86_OP_ENTRY4(VCMP, V,x, H,x, W,x, vex2_rep3 p_00_66_f3_f2), + [0xc4] = X86_OP_ENTRY4(PINSRW, V,dq,H,dq,E,w, vex5 mmx p_00_66), + [0xc5] = X86_OP_ENTRY3(PEXTRW, G,d, U,dq,I,b, vex5 mmx p_00_66), + [0xc6] = X86_OP_ENTRY4(VSHUF, V,x, H,x, W,x, vex4 p_00_66), + + [0xd0] = X86_OP_ENTRY3(VADDSUB, V,x, H,x, W,x, vex2 cpuid(SSE3) p_66_f2), + [0xd1] = X86_OP_ENTRY3(PSRLW_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd2] = X86_OP_ENTRY3(PSRLD_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd3] = X86_OP_ENTRY3(PSRLQ_r, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd4] = X86_OP_ENTRY3(PADDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd5] = X86_OP_ENTRY3(PMULLW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd6] = X86_OP_GROUP0(0FD6), + [0xd7] = X86_OP_ENTRY3(PMOVMSKB, G,d, None,None, U,x, vex7 mmx avx2_256 p_00_66), + + [0xe0] = X86_OP_ENTRY3(PAVGB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe1] = X86_OP_ENTRY3(PSRAW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe2] = X86_OP_ENTRY3(PSRAD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xe3] = X86_OP_ENTRY3(PAVGW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe4] = X86_OP_ENTRY3(PMULHUW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe5] = X86_OP_ENTRY3(PMULHW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe6] = X86_OP_GROUP0(0FE6), + [0xe7] = X86_OP_ENTRY3(MOVDQ, W,x, None,None, V,x, vex1 mmx p_00_66), /* MOVNTQ/MOVNTDQ */ + + [0xf0] = X86_OP_ENTRY3(MOVDQ, V,x, None,None, WM,x, vex4_unal cpuid(SSE3) p_f2), /* LDDQU */ + [0xf1] = X86_OP_ENTRY3(PSLLW_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf2] = X86_OP_ENTRY3(PSLLD_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf3] = X86_OP_ENTRY3(PSLLQ_r, V,x, H,x, W,x, vex7 mmx avx2_256 p_00_66), + [0xf4] = X86_OP_ENTRY3(PMULUDQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf5] = X86_OP_ENTRY3(PMADDWD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf6] = X86_OP_ENTRY3(PSADBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf7] = X86_OP_ENTRY3(MASKMOV, None,None, V,dq, U,dq, vex4_unal avx2_256 mmx p_00_66), + + /* Incorrectly missing from 2-17 */ + [0xd8] = X86_OP_ENTRY3(PSUBUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xd9] = X86_OP_ENTRY3(PSUBUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xda] = X86_OP_ENTRY3(PMINUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdb] = X86_OP_ENTRY3(PAND, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdc] = X86_OP_ENTRY3(PADDUSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdd] = X86_OP_ENTRY3(PADDUSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xde] = X86_OP_ENTRY3(PMAXUB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xdf] = X86_OP_ENTRY3(PANDN, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + + [0xe8] = X86_OP_ENTRY3(PSUBSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xe9] = X86_OP_ENTRY3(PSUBSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xea] = X86_OP_ENTRY3(PMINSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xeb] = X86_OP_ENTRY3(POR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xec] = X86_OP_ENTRY3(PADDSB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xed] = X86_OP_ENTRY3(PADDSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xee] = X86_OP_ENTRY3(PMAXSW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xef] = X86_OP_ENTRY3(PXOR, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + + [0xf8] = X86_OP_ENTRY3(PSUBB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xf9] = X86_OP_ENTRY3(PSUBW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfa] = X86_OP_ENTRY3(PSUBD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfb] = X86_OP_ENTRY3(PSUBQ, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfc] = X86_OP_ENTRY3(PADDB, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfd] = X86_OP_ENTRY3(PADDW, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + [0xfe] = X86_OP_ENTRY3(PADDD, V,x, H,x, W,x, vex4 mmx avx2_256 p_00_66), + /* 0xff = UD0 */ +}; + +static void do_decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + *entry = opcodes_0F[*b]; +} + +static void decode_0F(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + *b = x86_ldub_code(env, s); + do_decode_0F(s, env, entry, b); +} + +static const X86OpEntry opcodes_root[256] = { + [0x0F] = X86_OP_GROUP0(0F), +}; + +#undef mmx +#undef vex1 +#undef vex2 +#undef vex3 +#undef vex4 +#undef vex4_unal +#undef vex5 +#undef vex6 +#undef vex7 +#undef vex8 +#undef vex11 +#undef vex12 +#undef vex13 + +/* + * Decode the fixed part of the opcode and place the last + * in b. + */ +static void decode_root(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b) +{ + *entry = opcodes_root[*b]; +} + + +static int decode_modrm(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + X86DecodedOp *op, X86OpType type) +{ + int modrm = get_modrm(s, env); + if ((modrm >> 6) == 3) { + op->n = (modrm & 7); + if (type != X86_TYPE_Q && type != X86_TYPE_N) { + op->n |= REX_B(s); + } + } else { + op->has_ea = true; + op->n = -1; + decode->mem = gen_lea_modrm_0(env, s, get_modrm(s, env)); + } + return modrm; +} + +static bool decode_op_size(DisasContext *s, X86OpEntry *e, X86OpSize size, MemOp *ot) +{ + switch (size) { + case X86_SIZE_b: /* byte */ + *ot = MO_8; + return true; + + case X86_SIZE_d: /* 32-bit */ + case X86_SIZE_ss: /* SSE/AVX scalar single precision */ + *ot = MO_32; + return true; + + case X86_SIZE_p: /* Far pointer, return offset size */ + case X86_SIZE_s: /* Descriptor, return offset size */ + case X86_SIZE_v: /* 16/32/64-bit, based on operand size */ + *ot = s->dflag; + return true; + + case X86_SIZE_pi: /* MMX */ + case X86_SIZE_q: /* 64-bit */ + case X86_SIZE_sd: /* SSE/AVX scalar double precision */ + *ot = MO_64; + return true; + + case X86_SIZE_w: /* 16-bit */ + *ot = MO_16; + return true; + + case X86_SIZE_y: /* 32/64-bit, based on operand size */ + *ot = s->dflag == MO_16 ? MO_32 : s->dflag; + return true; + + case X86_SIZE_z: /* 16-bit for 16-bit operand size, else 32-bit */ + *ot = s->dflag == MO_16 ? MO_16 : MO_32; + return true; + + case X86_SIZE_dq: /* SSE/AVX 128-bit */ + if (e->special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + *ot = MO_64; + return true; + } + if (s->vex_l && e->s0 != X86_SIZE_qq && e->s1 != X86_SIZE_qq) { + return false; + } + *ot = MO_128; + return true; + + case X86_SIZE_qq: /* AVX 256-bit */ + if (!s->vex_l) { + return false; + } + *ot = MO_256; + return true; + + case X86_SIZE_x: /* 128/256-bit, based on operand size */ + if (e->special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + *ot = MO_64; + return true; + } + /* fall through */ + case X86_SIZE_ps: /* SSE/AVX packed single precision */ + case X86_SIZE_pd: /* SSE/AVX packed double precision */ + *ot = s->vex_l ? MO_256 : MO_128; + return true; + + case X86_SIZE_xh: /* SSE/AVX packed half register */ + *ot = s->vex_l ? MO_128 : MO_64; + return true; + + case X86_SIZE_d64: /* Default to 64-bit in 64-bit mode */ + *ot = CODE64(s) && s->dflag == MO_32 ? MO_64 : s->dflag; + return true; + + case X86_SIZE_f64: /* Ignore size override prefix in 64-bit mode */ + *ot = CODE64(s) ? MO_64 : s->dflag; + return true; + + default: + *ot = -1; + return true; + } +} + +static bool decode_op(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + X86DecodedOp *op, X86OpType type, int b) +{ + int modrm; + + switch (type) { + case X86_TYPE_None: /* Implicit or absent */ + case X86_TYPE_A: /* Implicit */ + case X86_TYPE_F: /* EFLAGS/RFLAGS */ + case X86_TYPE_X: /* string source */ + case X86_TYPE_Y: /* string destination */ + break; + + case X86_TYPE_B: /* VEX.vvvv selects a GPR */ + op->unit = X86_OP_INT; + op->n = s->vex_v; + break; + + case X86_TYPE_C: /* REG in the modrm byte selects a control register */ + op->unit = X86_OP_CR; + goto get_reg; + + case X86_TYPE_D: /* REG in the modrm byte selects a debug register */ + op->unit = X86_OP_DR; + goto get_reg; + + case X86_TYPE_G: /* REG in the modrm byte selects a GPR */ + op->unit = X86_OP_INT; + goto get_reg; + + case X86_TYPE_S: /* reg selects a segment register */ + op->unit = X86_OP_SEG; + goto get_reg; + + case X86_TYPE_P: + op->unit = X86_OP_MMX; + goto get_reg; + + case X86_TYPE_V: /* reg in the modrm byte selects an XMM/YMM register */ + if (decode->e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit = X86_OP_MMX; + } else { + op->unit = X86_OP_SSE; + } + get_reg: + op->n = ((get_modrm(s, env) >> 3) & 7) | REX_R(s); + break; + + case X86_TYPE_E: /* ALU modrm operand */ + op->unit = X86_OP_INT; + goto get_modrm; + + case X86_TYPE_Q: /* MMX modrm operand */ + op->unit = X86_OP_MMX; + goto get_modrm; + + case X86_TYPE_W: /* XMM/YMM modrm operand */ + if (decode->e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit = X86_OP_MMX; + } else { + op->unit = X86_OP_SSE; + } + goto get_modrm; + + case X86_TYPE_N: /* R/M in the modrm byte selects an MMX register */ + op->unit = X86_OP_MMX; + goto get_modrm_reg; + + case X86_TYPE_U: /* R/M in the modrm byte selects an XMM/YMM register */ + if (decode->e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + op->unit = X86_OP_MMX; + } else { + op->unit = X86_OP_SSE; + } + goto get_modrm_reg; + + case X86_TYPE_R: /* R/M in the modrm byte selects a register */ + op->unit = X86_OP_INT; + get_modrm_reg: + modrm = get_modrm(s, env); + if ((modrm >> 6) != 3) { + return false; + } + goto get_modrm; + + case X86_TYPE_WM: /* modrm byte selects an XMM/YMM memory operand */ + op->unit = X86_OP_SSE; + /* fall through */ + case X86_TYPE_M: /* modrm byte selects a memory operand */ + modrm = get_modrm(s, env); + if ((modrm >> 6) == 3) { + return false; + } + get_modrm: + decode_modrm(s, env, decode, op, type); + break; + + case X86_TYPE_O: /* Absolute address encoded in the instruction */ + op->unit = X86_OP_INT; + op->has_ea = true; + op->n = -1; + decode->mem = (AddressParts) { + .def_seg = R_DS, + .base = -1, + .index = -1, + .disp = insn_get_addr(env, s, s->aflag) + }; + break; + + case X86_TYPE_H: /* For AVX, VEX.vvvv selects an XMM/YMM register */ + if ((s->prefix & PREFIX_VEX)) { + op->unit = X86_OP_SSE; + op->n = s->vex_v; + break; + } + if (op == &decode->op[0]) { + /* shifts place the destination in VEX.vvvv, use modrm */ + return decode_op(s, env, decode, op, decode->e.op1, b); + } else { + return decode_op(s, env, decode, op, decode->e.op0, b); + } + + case X86_TYPE_I: /* Immediate */ + case X86_TYPE_J: /* Relative offset for a jump */ + op->unit = X86_OP_IMM; + decode->immediate = insn_get_signed(env, s, op->ot); + break; + + case X86_TYPE_L: /* The upper 4 bits of the immediate select a 128-bit register */ + op->n = insn_get(env, s, op->ot) >> 4; + break; + + case X86_TYPE_2op: + *op = decode->op[0]; + break; + + case X86_TYPE_LoBits: + op->n = (b & 7) | REX_B(s); + op->unit = X86_OP_INT; + break; + + case X86_TYPE_0 ... X86_TYPE_7: + op->n = type - X86_TYPE_0; + op->unit = X86_OP_INT; + break; + + case X86_TYPE_ES ... X86_TYPE_GS: + op->n = type - X86_TYPE_ES; + op->unit = X86_OP_SEG; + break; + } + + return true; +} + +static bool validate_sse_prefix(DisasContext *s, X86OpEntry *e) +{ + uint16_t sse_prefixes; + + if (!e->valid_prefix) { + return true; + } + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + /* In SSE instructions, 0xF3 and 0xF2 cancel 0x66. */ + s->prefix &= ~PREFIX_DATA; + } + + /* Now, either zero or one bit is set in sse_prefixes. */ + sse_prefixes = s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); + return e->valid_prefix & (1 << sse_prefixes); +} + +static bool decode_insn(DisasContext *s, CPUX86State *env, X86DecodeFunc decode_func, + X86DecodedInsn *decode) +{ + X86OpEntry *e = &decode->e; + + decode_func(s, env, e, &decode->b); + while (e->is_decode) { + e->is_decode = false; + e->decode(s, env, e, &decode->b); + } + + if (!validate_sse_prefix(s, e)) { + return false; + } + + /* First compute size of operands in order to initialize s->rip_offset. */ + if (e->op0 != X86_TYPE_None) { + if (!decode_op_size(s, e, e->s0, &decode->op[0].ot)) { + return false; + } + if (e->op0 == X86_TYPE_I) { + s->rip_offset += 1 << decode->op[0].ot; + } + } + if (e->op1 != X86_TYPE_None) { + if (!decode_op_size(s, e, e->s1, &decode->op[1].ot)) { + return false; + } + if (e->op1 == X86_TYPE_I) { + s->rip_offset += 1 << decode->op[1].ot; + } + } + if (e->op2 != X86_TYPE_None) { + if (!decode_op_size(s, e, e->s2, &decode->op[2].ot)) { + return false; + } + if (e->op2 == X86_TYPE_I) { + s->rip_offset += 1 << decode->op[2].ot; + } + } + if (e->op3 != X86_TYPE_None) { + /* + * A couple instructions actually use the extra immediate byte for an Lx + * register operand; those are handled in the gen_* functions as one off. + */ + assert(e->op3 == X86_TYPE_I && e->s3 == X86_SIZE_b); + s->rip_offset += 1; + } + + if (e->op0 != X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[0], e->op0, decode->b)) { + return false; + } + + if (e->op1 != X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[1], e->op1, decode->b)) { + return false; + } + + if (e->op2 != X86_TYPE_None && + !decode_op(s, env, decode, &decode->op[2], e->op2, decode->b)) { + return false; + } + + if (e->op3 != X86_TYPE_None) { + decode->immediate = insn_get_signed(env, s, MO_8); + } + + return true; +} + +static bool has_cpuid_feature(DisasContext *s, X86CPUIDFeature cpuid) +{ + switch (cpuid) { + case X86_FEAT_None: + return true; + case X86_FEAT_F16C: + return (s->cpuid_ext_features & CPUID_EXT_F16C); + case X86_FEAT_FMA: + return (s->cpuid_ext_features & CPUID_EXT_FMA); + case X86_FEAT_MOVBE: + return (s->cpuid_ext_features & CPUID_EXT_MOVBE); + case X86_FEAT_PCLMULQDQ: + return (s->cpuid_ext_features & CPUID_EXT_PCLMULQDQ); + case X86_FEAT_SSE: + return (s->cpuid_ext_features & CPUID_SSE); + case X86_FEAT_SSE2: + return (s->cpuid_ext_features & CPUID_SSE2); + case X86_FEAT_SSE3: + return (s->cpuid_ext_features & CPUID_EXT_SSE3); + case X86_FEAT_SSSE3: + return (s->cpuid_ext_features & CPUID_EXT_SSSE3); + case X86_FEAT_SSE41: + return (s->cpuid_ext_features & CPUID_EXT_SSE41); + case X86_FEAT_SSE42: + return (s->cpuid_ext_features & CPUID_EXT_SSE42); + case X86_FEAT_AES: + if (!(s->cpuid_ext_features & CPUID_EXT_AES)) { + return false; + } else if (!(s->prefix & PREFIX_VEX)) { + return true; + } else if (!(s->cpuid_ext_features & CPUID_EXT_AVX)) { + return false; + } else { + return !s->vex_l || (s->cpuid_7_0_ecx_features & CPUID_7_0_ECX_VAES); + } + + case X86_FEAT_AVX: + return (s->cpuid_ext_features & CPUID_EXT_AVX); + + case X86_FEAT_3DNOW: + return (s->cpuid_ext2_features & CPUID_EXT2_3DNOW); + case X86_FEAT_SSE4A: + return (s->cpuid_ext3_features & CPUID_EXT3_SSE4A); + + case X86_FEAT_ADX: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX); + case X86_FEAT_BMI1: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1); + case X86_FEAT_BMI2: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2); + case X86_FEAT_AVX2: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_AVX2); + case X86_FEAT_SHA_NI: + return (s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SHA_NI); + + case X86_FEAT_CMPCCXADD: + return (s->cpuid_7_1_eax_features & CPUID_7_1_EAX_CMPCCXADD); + } + g_assert_not_reached(); +} + +static bool validate_vex(DisasContext *s, X86DecodedInsn *decode) +{ + X86OpEntry *e = &decode->e; + + switch (e->vex_special) { + case X86_VEX_REPScalar: + /* + * Instructions which differ between 00/66 and F2/F3 in the + * exception classification and the size of the memory operand. + */ + assert(e->vex_class == 1 || e->vex_class == 2 || e->vex_class == 4); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + e->vex_class = e->vex_class < 4 ? 3 : 5; + if (s->vex_l) { + goto illegal; + } + assert(decode->e.s2 == X86_SIZE_x); + if (decode->op[2].has_ea) { + decode->op[2].ot = s->prefix & PREFIX_REPZ ? MO_32 : MO_64; + } + } + break; + + case X86_VEX_SSEUnaligned: + /* handled in sse_needs_alignment. */ + break; + + case X86_VEX_AVX2_256: + if ((s->prefix & PREFIX_VEX) && s->vex_l && !has_cpuid_feature(s, X86_FEAT_AVX2)) { + goto illegal; + } + } + + switch (e->vex_class) { + case 0: + if (s->prefix & PREFIX_VEX) { + goto illegal; + } + return true; + case 1: + case 2: + case 3: + case 4: + case 5: + case 7: + if (s->prefix & PREFIX_VEX) { + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + } else if (e->special != X86_SPECIAL_MMX || + (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { + if (!(s->flags & HF_OSFXSR_MASK)) { + goto illegal; + } + } + break; + case 12: + /* Must have a VSIB byte and no address prefix. */ + assert(s->has_modrm); + if ((s->modrm & 7) != 4 || s->aflag == MO_16) { + goto illegal; + } + + /* Check no overlap between registers. */ + if (!decode->op[0].has_ea && + (decode->op[0].n == decode->mem.index || decode->op[0].n == decode->op[1].n)) { + goto illegal; + } + assert(!decode->op[1].has_ea); + if (decode->op[1].n == decode->mem.index) { + goto illegal; + } + if (!decode->op[2].has_ea && + (decode->op[2].n == decode->mem.index || decode->op[2].n == decode->op[1].n)) { + goto illegal; + } + /* fall through */ + case 6: + case 11: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 8: + /* Non-VEX case handled in decode_0F77. */ + assert(s->prefix & PREFIX_VEX); + if (!(s->flags & HF_AVX_EN_MASK)) { + goto illegal; + } + break; + case 13: + if (!(s->prefix & PREFIX_VEX)) { + goto illegal; + } + if (s->vex_l) { + goto illegal; + } + /* All integer instructions use VEX.vvvv, so exit. */ + return true; + } + + if (s->vex_v != 0 && + e->op0 != X86_TYPE_H && e->op0 != X86_TYPE_B && + e->op1 != X86_TYPE_H && e->op1 != X86_TYPE_B && + e->op2 != X86_TYPE_H && e->op2 != X86_TYPE_B) { + goto illegal; + } + + if (s->flags & HF_TS_MASK) { + goto nm_exception; + } + if (s->flags & HF_EM_MASK) { + goto illegal; + } + + if (e->check) { + if (e->check & X86_CHECK_VEX128) { + if (s->vex_l) { + goto illegal; + } + } + if (e->check & X86_CHECK_W0) { + if (s->vex_w) { + goto illegal; + } + } + if (e->check & X86_CHECK_W1) { + if (!s->vex_w) { + goto illegal; + } + } + } + return true; + +nm_exception: + gen_NM_exception(s); + return false; +illegal: + gen_illegal_opcode(s); + return false; +} + +/* + * Convert one instruction. s->base.is_jmp is set if the translation must + * be stopped. + */ +static void disas_insn_new(DisasContext *s, CPUState *cpu, int b) +{ + CPUX86State *env = cpu_env(cpu); + bool first = true; + X86DecodedInsn decode; + X86DecodeFunc decode_func = decode_root; + uint8_t cc_live; + + s->has_modrm = false; + + next_byte: + if (first) { + first = false; + } else { + b = x86_ldub_code(env, s); + } + /* Collect prefixes. */ + switch (b) { + case 0xf3: + s->prefix |= PREFIX_REPZ; + s->prefix &= ~PREFIX_REPNZ; + goto next_byte; + case 0xf2: + s->prefix |= PREFIX_REPNZ; + s->prefix &= ~PREFIX_REPZ; + goto next_byte; + case 0xf0: + s->prefix |= PREFIX_LOCK; + goto next_byte; + case 0x2e: + s->override = R_CS; + goto next_byte; + case 0x36: + s->override = R_SS; + goto next_byte; + case 0x3e: + s->override = R_DS; + goto next_byte; + case 0x26: + s->override = R_ES; + goto next_byte; + case 0x64: + s->override = R_FS; + goto next_byte; + case 0x65: + s->override = R_GS; + goto next_byte; + case 0x66: + s->prefix |= PREFIX_DATA; + goto next_byte; + case 0x67: + s->prefix |= PREFIX_ADR; + goto next_byte; +#ifdef TARGET_X86_64 + case 0x40 ... 0x4f: + if (CODE64(s)) { + /* REX prefix */ + s->prefix |= PREFIX_REX; + s->vex_w = (b >> 3) & 1; + s->rex_r = (b & 0x4) << 1; + s->rex_x = (b & 0x2) << 2; + s->rex_b = (b & 0x1) << 3; + goto next_byte; + } + break; +#endif + case 0xc5: /* 2-byte VEX */ + case 0xc4: /* 3-byte VEX */ + /* + * VEX prefixes cannot be used except in 32-bit mode. + * Otherwise the instruction is LES or LDS. + */ + if (CODE32(s) && !VM86(s)) { + static const int pp_prefix[4] = { + 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ + }; + int vex3, vex2 = x86_ldub_code(env, s); + + if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { + /* + * 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, + * otherwise the instruction is LES or LDS. + */ + s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ + break; + } + + /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ + | PREFIX_LOCK | PREFIX_DATA | PREFIX_REX)) { + goto illegal_op; + } +#ifdef TARGET_X86_64 + s->rex_r = (~vex2 >> 4) & 8; +#endif + if (b == 0xc5) { + /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */ + vex3 = vex2; + decode_func = decode_0F; + } else { + /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */ + vex3 = x86_ldub_code(env, s); +#ifdef TARGET_X86_64 + s->rex_x = (~vex2 >> 3) & 8; + s->rex_b = (~vex2 >> 2) & 8; +#endif + s->vex_w = (vex3 >> 7) & 1; + switch (vex2 & 0x1f) { + case 0x01: /* Implied 0f leading opcode bytes. */ + decode_func = decode_0F; + break; + case 0x02: /* Implied 0f 38 leading opcode bytes. */ + decode_func = decode_0F38; + break; + case 0x03: /* Implied 0f 3a leading opcode bytes. */ + decode_func = decode_0F3A; + break; + default: /* Reserved for future use. */ + goto unknown_op; + } + } + s->vex_v = (~vex3 >> 3) & 0xf; + s->vex_l = (vex3 >> 2) & 1; + s->prefix |= pp_prefix[vex3 & 3] | PREFIX_VEX; + } + break; + default: + if (b >= 0x100) { + b -= 0x100; + decode_func = do_decode_0F; + } + break; + } + + /* Post-process prefixes. */ + if (CODE64(s)) { + /* + * In 64-bit mode, the default data size is 32-bit. Select 64-bit + * data with rex_w, and 16-bit data with 0x66; rex_w takes precedence + * over 0x66 if both are present. + */ + s->dflag = (REX_W(s) ? MO_64 : s->prefix & PREFIX_DATA ? MO_16 : MO_32); + /* In 64-bit mode, 0x67 selects 32-bit addressing. */ + s->aflag = (s->prefix & PREFIX_ADR ? MO_32 : MO_64); + } else { + /* In 16/32-bit mode, 0x66 selects the opposite data size. */ + if (CODE32(s) ^ ((s->prefix & PREFIX_DATA) != 0)) { + s->dflag = MO_32; + } else { + s->dflag = MO_16; + } + /* In 16/32-bit mode, 0x67 selects the opposite addressing. */ + if (CODE32(s) ^ ((s->prefix & PREFIX_ADR) != 0)) { + s->aflag = MO_32; + } else { + s->aflag = MO_16; + } + } + + memset(&decode, 0, sizeof(decode)); + decode.cc_op = -1; + decode.b = b; + if (!decode_insn(s, env, decode_func, &decode)) { + goto illegal_op; + } + if (!decode.e.gen) { + goto unknown_op; + } + + if (!has_cpuid_feature(s, decode.e.cpuid)) { + goto illegal_op; + } + + /* Checks that result in #UD come first. */ + if (decode.e.check) { + if (decode.e.check & X86_CHECK_i64) { + if (CODE64(s)) { + goto illegal_op; + } + } + if (decode.e.check & X86_CHECK_o64) { + if (!CODE64(s)) { + goto illegal_op; + } + } + if (decode.e.check & X86_CHECK_prot) { + if (!PE(s) || VM86(s)) { + goto illegal_op; + } + } + } + + switch (decode.e.special) { + case X86_SPECIAL_None: + break; + + case X86_SPECIAL_Locked: + if (decode.op[0].has_ea) { + s->prefix |= PREFIX_LOCK; + } + decode.e.special = X86_SPECIAL_HasLock; + /* fallthrough */ + case X86_SPECIAL_HasLock: + break; + + case X86_SPECIAL_Op0_Rd: + assert(decode.op[0].unit == X86_OP_INT); + if (!decode.op[0].has_ea) { + decode.op[0].ot = MO_32; + } + break; + + case X86_SPECIAL_Op2_Ry: + assert(decode.op[2].unit == X86_OP_INT); + if (!decode.op[2].has_ea) { + decode.op[2].ot = s->dflag == MO_16 ? MO_32 : s->dflag; + } + break; + + case X86_SPECIAL_AVXExtMov: + if (!decode.op[2].has_ea) { + decode.op[2].ot = s->vex_l ? MO_256 : MO_128; + } else if (s->vex_l) { + decode.op[2].ot++; + } + break; + + case X86_SPECIAL_SExtT0: + case X86_SPECIAL_ZExtT0: + /* Handled in gen_load. */ + assert(decode.op[1].unit == X86_OP_INT); + break; + + default: + break; + } + + if (s->prefix & PREFIX_LOCK) { + if (decode.e.special != X86_SPECIAL_HasLock || !decode.op[0].has_ea) { + goto illegal_op; + } + } + + if (!validate_vex(s, &decode)) { + return; + } + + /* + * Checks that result in #GP or VMEXIT come second. Intercepts are + * generally checked after non-memory exceptions (i.e. before all + * exceptions if there is no memory operand). Exceptions are + * vm86 checks (INTn, IRET, PUSHF/POPF), RSM and XSETBV (!). + * + * RSM and XSETBV will be handled in the gen_* functions + * instead of using chk(). + */ + if (decode.e.check & X86_CHECK_cpl0) { + if (CPL(s) != 0) { + goto gp_fault; + } + } + if (decode.e.intercept && unlikely(GUEST(s))) { + gen_helper_svm_check_intercept(tcg_env, + tcg_constant_i32(decode.e.intercept)); + } + if (decode.e.check) { + if ((decode.e.check & X86_CHECK_vm86_iopl) && VM86(s)) { + if (IOPL(s) < 3) { + goto gp_fault; + } + } else if (decode.e.check & X86_CHECK_cpl_iopl) { + if (IOPL(s) < CPL(s)) { + goto gp_fault; + } + } + } + + if (decode.e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA))) { + gen_helper_enter_mmx(tcg_env); + } + + if (decode.op[0].has_ea || decode.op[1].has_ea || decode.op[2].has_ea) { + gen_load_ea(s, &decode.mem, decode.e.vex_class == 12); + } + if (s->prefix & PREFIX_LOCK) { + gen_load(s, &decode, 2, s->T1); + decode.e.gen(s, env, &decode); + } else { + if (decode.op[0].unit == X86_OP_MMX) { + compute_mmx_offset(&decode.op[0]); + } else if (decode.op[0].unit == X86_OP_SSE) { + compute_xmm_offset(&decode.op[0]); + } + gen_load(s, &decode, 1, s->T0); + gen_load(s, &decode, 2, s->T1); + decode.e.gen(s, env, &decode); + gen_writeback(s, &decode, 0, s->T0); + } + + /* + * Write back flags after last memory access. Some newer ALU instructions, as + * well as SSE instructions, write flags in the gen_* function, but that can + * cause incorrect tracking of CC_OP for instructions that write to both memory + * and flags. + */ + if (decode.cc_op != -1) { + if (decode.cc_dst) { + tcg_gen_mov_tl(cpu_cc_dst, decode.cc_dst); + } + if (decode.cc_src) { + tcg_gen_mov_tl(cpu_cc_src, decode.cc_src); + } + if (decode.cc_src2) { + tcg_gen_mov_tl(cpu_cc_src2, decode.cc_src2); + } + if (decode.cc_op == CC_OP_DYNAMIC) { + tcg_gen_mov_i32(cpu_cc_op, decode.cc_op_dynamic); + } + set_cc_op(s, decode.cc_op); + cc_live = cc_op_live[decode.cc_op]; + } else { + cc_live = 0; + } + if (decode.cc_op != CC_OP_DYNAMIC) { + assert(!decode.cc_op_dynamic); + assert(!!decode.cc_dst == !!(cc_live & USES_CC_DST)); + assert(!!decode.cc_src == !!(cc_live & USES_CC_SRC)); + assert(!!decode.cc_src2 == !!(cc_live & USES_CC_SRC2)); + } + + return; + gp_fault: + gen_exception_gpf(s); + return; + illegal_op: + gen_illegal_opcode(s); + return; + unknown_op: + gen_unknown_opcode(env, s); +} diff --git a/target/i386/tcg/decode-new.h b/target/i386/tcg/decode-new.h new file mode 100644 index 0000000000..15e6bfef4b --- /dev/null +++ b/target/i386/tcg/decode-new.h @@ -0,0 +1,293 @@ +/* + * Decode table flags, mostly based on Intel SDM. + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +typedef enum X86OpType { + X86_TYPE_None, + + X86_TYPE_A, /* Implicit */ + X86_TYPE_B, /* VEX.vvvv selects a GPR */ + X86_TYPE_C, /* REG in the modrm byte selects a control register */ + X86_TYPE_D, /* REG in the modrm byte selects a debug register */ + X86_TYPE_E, /* ALU modrm operand */ + X86_TYPE_F, /* EFLAGS/RFLAGS */ + X86_TYPE_G, /* REG in the modrm byte selects a GPR */ + X86_TYPE_H, /* For AVX, VEX.vvvv selects an XMM/YMM register */ + X86_TYPE_I, /* Immediate */ + X86_TYPE_J, /* Relative offset for a jump */ + X86_TYPE_L, /* The upper 4 bits of the immediate select a 128-bit register */ + X86_TYPE_M, /* modrm byte selects a memory operand */ + X86_TYPE_N, /* R/M in the modrm byte selects an MMX register */ + X86_TYPE_O, /* Absolute address encoded in the instruction */ + X86_TYPE_P, /* reg in the modrm byte selects an MMX register */ + X86_TYPE_Q, /* MMX modrm operand */ + X86_TYPE_R, /* R/M in the modrm byte selects a register */ + X86_TYPE_S, /* reg selects a segment register */ + X86_TYPE_U, /* R/M in the modrm byte selects an XMM/YMM register */ + X86_TYPE_V, /* reg in the modrm byte selects an XMM/YMM register */ + X86_TYPE_W, /* XMM/YMM modrm operand */ + X86_TYPE_X, /* string source */ + X86_TYPE_Y, /* string destination */ + + /* Custom */ + X86_TYPE_WM, /* modrm byte selects an XMM/YMM memory operand */ + X86_TYPE_2op, /* 2-operand RMW instruction */ + X86_TYPE_LoBits, /* encoded in bits 0-2 of the operand + REX.B */ + X86_TYPE_0, /* Hard-coded GPRs (RAX..RDI) */ + X86_TYPE_1, + X86_TYPE_2, + X86_TYPE_3, + X86_TYPE_4, + X86_TYPE_5, + X86_TYPE_6, + X86_TYPE_7, + X86_TYPE_ES, /* Hard-coded segment registers */ + X86_TYPE_CS, + X86_TYPE_SS, + X86_TYPE_DS, + X86_TYPE_FS, + X86_TYPE_GS, +} X86OpType; + +typedef enum X86OpSize { + X86_SIZE_None, + + X86_SIZE_a, /* BOUND operand */ + X86_SIZE_b, /* byte */ + X86_SIZE_d, /* 32-bit */ + X86_SIZE_dq, /* SSE/AVX 128-bit */ + X86_SIZE_p, /* Far pointer */ + X86_SIZE_pd, /* SSE/AVX packed double precision */ + X86_SIZE_pi, /* MMX */ + X86_SIZE_ps, /* SSE/AVX packed single precision */ + X86_SIZE_q, /* 64-bit */ + X86_SIZE_qq, /* AVX 256-bit */ + X86_SIZE_s, /* Descriptor */ + X86_SIZE_sd, /* SSE/AVX scalar double precision */ + X86_SIZE_ss, /* SSE/AVX scalar single precision */ + X86_SIZE_si, /* 32-bit GPR */ + X86_SIZE_v, /* 16/32/64-bit, based on operand size */ + X86_SIZE_w, /* 16-bit */ + X86_SIZE_x, /* 128/256-bit, based on operand size */ + X86_SIZE_y, /* 32/64-bit, based on operand size */ + X86_SIZE_z, /* 16-bit for 16-bit operand size, else 32-bit */ + + /* Custom */ + X86_SIZE_d64, + X86_SIZE_f64, + X86_SIZE_xh, /* SSE/AVX packed half register */ +} X86OpSize; + +typedef enum X86CPUIDFeature { + X86_FEAT_None, + X86_FEAT_3DNOW, + X86_FEAT_ADX, + X86_FEAT_AES, + X86_FEAT_AVX, + X86_FEAT_AVX2, + X86_FEAT_BMI1, + X86_FEAT_BMI2, + X86_FEAT_CMPCCXADD, + X86_FEAT_F16C, + X86_FEAT_FMA, + X86_FEAT_MOVBE, + X86_FEAT_PCLMULQDQ, + X86_FEAT_SHA_NI, + X86_FEAT_SSE, + X86_FEAT_SSE2, + X86_FEAT_SSE3, + X86_FEAT_SSSE3, + X86_FEAT_SSE41, + X86_FEAT_SSE42, + X86_FEAT_SSE4A, +} X86CPUIDFeature; + +/* Execution flags */ + +typedef enum X86OpUnit { + X86_OP_SKIP, /* not valid or managed by emission function */ + X86_OP_SEG, /* segment selector */ + X86_OP_CR, /* control register */ + X86_OP_DR, /* debug register */ + X86_OP_INT, /* loaded into/stored from s->T0/T1 */ + X86_OP_IMM, /* immediate */ + X86_OP_SSE, /* address in either s->ptrX or s->A0 depending on has_ea */ + X86_OP_MMX, /* address in either s->ptrX or s->A0 depending on has_ea */ +} X86OpUnit; + +typedef enum X86InsnCheck { + /* Illegal or exclusive to 64-bit mode */ + X86_CHECK_i64 = 1, + X86_CHECK_o64 = 2, + + /* Fault outside protected mode */ + X86_CHECK_prot = 4, + + /* Privileged instruction checks */ + X86_CHECK_cpl0 = 8, + X86_CHECK_vm86_iopl = 16, + X86_CHECK_cpl_iopl = 32, + X86_CHECK_iopl = X86_CHECK_cpl_iopl | X86_CHECK_vm86_iopl, + + /* Fault if VEX.L=1 */ + X86_CHECK_VEX128 = 64, + + /* Fault if VEX.W=1 */ + X86_CHECK_W0 = 128, + + /* Fault if VEX.W=0 */ + X86_CHECK_W1 = 256, +} X86InsnCheck; + +typedef enum X86InsnSpecial { + X86_SPECIAL_None, + + /* Accepts LOCK prefix; LOCKed operations do not load or writeback operand 0 */ + X86_SPECIAL_HasLock, + + /* Always locked if it has a memory operand (XCHG) */ + X86_SPECIAL_Locked, + + /* + * Rd/Mb or Rd/Mw in the manual: register operand 0 is treated as 32 bits + * (and writeback zero-extends it to 64 bits if applicable). PREFIX_DATA + * does not trigger 16-bit writeback and, as a side effect, high-byte + * registers are never used. + */ + X86_SPECIAL_Op0_Rd, + + /* + * Ry/Mb in the manual (PINSRB). However, the high bits are never used by + * the instruction in either the register or memory cases; the *real* effect + * of this modifier is that high-byte registers are never used, even without + * a REX prefix. Therefore, PINSRW does not need it despite having Ry/Mw. + */ + X86_SPECIAL_Op2_Ry, + + /* + * Register operand 2 is extended to full width, while a memory operand + * is doubled in size if VEX.L=1. + */ + X86_SPECIAL_AVXExtMov, + + /* + * MMX instruction exists with no prefix; if there is no prefix, V/H/W/U operands + * become P/P/Q/N, and size "x" becomes "q". + */ + X86_SPECIAL_MMX, + + /* When loaded into s->T0, register operand 1 is zero/sign extended. */ + X86_SPECIAL_SExtT0, + X86_SPECIAL_ZExtT0, +} X86InsnSpecial; + +/* + * Special cases for instructions that operate on XMM/YMM registers. Intel + * retconned all of them to have VEX exception classes other than 0 and 13, so + * all these only matter for instructions that have a VEX exception class. + * Based on tables in the "AVX and SSE Instruction Exception Specification" + * section of the manual. + */ +typedef enum X86VEXSpecial { + /* Legacy SSE instructions that allow unaligned operands */ + X86_VEX_SSEUnaligned, + + /* + * Used for instructions that distinguish the XMM operand type with an + * instruction prefix; legacy SSE encodings will allow unaligned operands + * for scalar operands only (identified by a REP prefix). In this case, + * the decoding table uses "x" for the vector operands instead of specifying + * pd/ps/sd/ss individually. + */ + X86_VEX_REPScalar, + + /* + * VEX instructions that only support 256-bit operands with AVX2 (Table 2-17 + * column 3). Columns 2 and 4 (instructions limited to 256- and 127-bit + * operands respectively) are implicit in the presence of dq and qq + * operands, and thus handled by decode_op_size. + */ + X86_VEX_AVX2_256, +} X86VEXSpecial; + + +typedef struct X86OpEntry X86OpEntry; +typedef struct X86DecodedInsn X86DecodedInsn; + +/* Decode function for multibyte opcodes. */ +typedef void (*X86DecodeFunc)(DisasContext *s, CPUX86State *env, X86OpEntry *entry, uint8_t *b); + +/* Code generation function. */ +typedef void (*X86GenFunc)(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode); + +struct X86OpEntry { + /* Based on the is_decode flags. */ + union { + X86GenFunc gen; + X86DecodeFunc decode; + }; + /* op0 is always written, op1 and op2 are always read. */ + X86OpType op0:8; + X86OpSize s0:8; + X86OpType op1:8; + X86OpSize s1:8; + X86OpType op2:8; + X86OpSize s2:8; + /* Must be I and b respectively if present. */ + X86OpType op3:8; + X86OpSize s3:8; + + X86InsnSpecial special:8; + X86CPUIDFeature cpuid:8; + unsigned vex_class:8; + X86VEXSpecial vex_special:8; + unsigned valid_prefix:16; + unsigned check:16; + unsigned intercept:8; + bool is_decode:1; +}; + +typedef struct X86DecodedOp { + int8_t n; + MemOp ot; /* For b/c/d/p/s/q/v/w/y/z */ + X86OpUnit unit; + bool has_ea; + int offset; /* For MMX and SSE */ + + /* + * This field is used internally by macros OP0_PTR/OP1_PTR/OP2_PTR, + * do not access directly! + */ + TCGv_ptr v_ptr; +} X86DecodedOp; + +struct X86DecodedInsn { + X86OpEntry e; + X86DecodedOp op[3]; + target_ulong immediate; + AddressParts mem; + + TCGv cc_dst, cc_src, cc_src2; + TCGv_i32 cc_op_dynamic; + int8_t cc_op; + + uint8_t b; +}; + diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc new file mode 100644 index 0000000000..6bcf88ecd7 --- /dev/null +++ b/target/i386/tcg/emit.c.inc @@ -0,0 +1,2493 @@ +/* + * New-style TCG opcode generator for i386 instructions + * + * Copyright (c) 2022 Red Hat, Inc. + * + * Author: Paolo Bonzini <pbonzini@redhat.com> + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#define ZMM_OFFSET(reg) offsetof(CPUX86State, xmm_regs[reg]) + +typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); +typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); +typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b); +typedef void (*SSEFunc_0_eppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c); +typedef void (*SSEFunc_0_epppp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_ptr reg_d); +typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_i32 val); +typedef void (*SSEFunc_0_epppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_i32 val); +typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); +typedef void (*SSEFunc_0_pppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_ptr reg_c, + TCGv_i32 val); +typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv val); +typedef void (*SSEFunc_0_epppti)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv a0, TCGv_i32 scale); +typedef void (*SSEFunc_0_eppppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 flags); +typedef void (*SSEFunc_0_eppppii)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, + TCGv_ptr reg_c, TCGv_ptr reg_d, TCGv_i32 even, + TCGv_i32 odd); + +static inline TCGv_i32 tcg_constant8u_i32(uint8_t val) +{ + return tcg_constant_i32(val); +} + +static void gen_NM_exception(DisasContext *s) +{ + gen_exception(s, EXCP07_PREX); +} + +static void gen_load_ea(DisasContext *s, AddressParts *mem, bool is_vsib) +{ + TCGv ea = gen_lea_modrm_1(s, *mem, is_vsib); + gen_lea_v_seg(s, s->aflag, ea, mem->def_seg, s->override); +} + +static inline int mmx_offset(MemOp ot) +{ + switch (ot) { + case MO_8: + return offsetof(MMXReg, MMX_B(0)); + case MO_16: + return offsetof(MMXReg, MMX_W(0)); + case MO_32: + return offsetof(MMXReg, MMX_L(0)); + case MO_64: + return offsetof(MMXReg, MMX_Q(0)); + default: + g_assert_not_reached(); + } +} + +static inline int xmm_offset(MemOp ot) +{ + switch (ot) { + case MO_8: + return offsetof(ZMMReg, ZMM_B(0)); + case MO_16: + return offsetof(ZMMReg, ZMM_W(0)); + case MO_32: + return offsetof(ZMMReg, ZMM_L(0)); + case MO_64: + return offsetof(ZMMReg, ZMM_Q(0)); + case MO_128: + return offsetof(ZMMReg, ZMM_X(0)); + case MO_256: + return offsetof(ZMMReg, ZMM_Y(0)); + default: + g_assert_not_reached(); + } +} + +static int vector_reg_offset(X86DecodedOp *op) +{ + assert(op->unit == X86_OP_MMX || op->unit == X86_OP_SSE); + + if (op->unit == X86_OP_MMX) { + return op->offset - mmx_offset(op->ot); + } else { + return op->offset - xmm_offset(op->ot); + } +} + +static int vector_elem_offset(X86DecodedOp *op, MemOp ot, int n) +{ + int base_ofs = vector_reg_offset(op); + switch(ot) { + case MO_8: + if (op->unit == X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_B(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_B(n)); + } + case MO_16: + if (op->unit == X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_W(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_W(n)); + } + case MO_32: + if (op->unit == X86_OP_MMX) { + return base_ofs + offsetof(MMXReg, MMX_L(n)); + } else { + return base_ofs + offsetof(ZMMReg, ZMM_L(n)); + } + case MO_64: + if (op->unit == X86_OP_MMX) { + return base_ofs; + } else { + return base_ofs + offsetof(ZMMReg, ZMM_Q(n)); + } + case MO_128: + assert(op->unit == X86_OP_SSE); + return base_ofs + offsetof(ZMMReg, ZMM_X(n)); + case MO_256: + assert(op->unit == X86_OP_SSE); + return base_ofs + offsetof(ZMMReg, ZMM_Y(n)); + default: + g_assert_not_reached(); + } +} + +static void compute_mmx_offset(X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset = offsetof(CPUX86State, fpregs[op->n].mmx) + mmx_offset(op->ot); + } else { + op->offset = offsetof(CPUX86State, mmx_t0) + mmx_offset(op->ot); + } +} + +static void compute_xmm_offset(X86DecodedOp *op) +{ + if (!op->has_ea) { + op->offset = ZMM_OFFSET(op->n) + xmm_offset(op->ot); + } else { + op->offset = offsetof(CPUX86State, xmm_t0) + xmm_offset(op->ot); + } +} + +static void gen_load_sse(DisasContext *s, TCGv temp, MemOp ot, int dest_ofs, bool aligned) +{ + switch(ot) { + case MO_8: + gen_op_ld_v(s, MO_8, temp, s->A0); + tcg_gen_st8_tl(temp, tcg_env, dest_ofs); + break; + case MO_16: + gen_op_ld_v(s, MO_16, temp, s->A0); + tcg_gen_st16_tl(temp, tcg_env, dest_ofs); + break; + case MO_32: + gen_op_ld_v(s, MO_32, temp, s->A0); + tcg_gen_st32_tl(temp, tcg_env, dest_ofs); + break; + case MO_64: + gen_ldq_env_A0(s, dest_ofs); + break; + case MO_128: + gen_ldo_env_A0(s, dest_ofs, aligned); + break; + case MO_256: + gen_ldy_env_A0(s, dest_ofs, aligned); + break; + default: + g_assert_not_reached(); + } +} + +static bool sse_needs_alignment(DisasContext *s, X86DecodedInsn *decode, MemOp ot) +{ + switch (decode->e.vex_class) { + case 2: + case 4: + if ((s->prefix & PREFIX_VEX) || + decode->e.vex_special == X86_VEX_SSEUnaligned) { + /* MOST legacy SSE instructions require aligned memory operands, but not all. */ + return false; + } + /* fall through */ + case 1: + return ot >= MO_128; + + default: + return false; + } +} + +static void gen_load(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) +{ + X86DecodedOp *op = &decode->op[opn]; + + switch (op->unit) { + case X86_OP_SKIP: + return; + case X86_OP_SEG: + tcg_gen_ld32u_tl(v, tcg_env, + offsetof(CPUX86State,segs[op->n].selector)); + break; + case X86_OP_CR: + tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, cr[op->n])); + break; + case X86_OP_DR: + tcg_gen_ld_tl(v, tcg_env, offsetof(CPUX86State, dr[op->n])); + break; + case X86_OP_INT: + if (op->has_ea) { + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + gen_op_ld_v(s, op->ot | MO_SIGN, v, s->A0); + } else { + gen_op_ld_v(s, op->ot, v, s->A0); + } + + } else if (op->ot == MO_8 && byte_reg_is_xH(s, op->n)) { + if (v == s->T0 && decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_sextract_tl(v, cpu_regs[op->n - 4], 8, 8); + } else { + tcg_gen_extract_tl(v, cpu_regs[op->n - 4], 8, 8); + } + + } else if (op->ot < MO_TL && v == s->T0 && + (decode->e.special == X86_SPECIAL_SExtT0 || + decode->e.special == X86_SPECIAL_ZExtT0)) { + if (decode->e.special == X86_SPECIAL_SExtT0) { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot | MO_SIGN); + } else { + tcg_gen_ext_tl(v, cpu_regs[op->n], op->ot); + } + + } else { + tcg_gen_mov_tl(v, cpu_regs[op->n]); + } + break; + case X86_OP_IMM: + tcg_gen_movi_tl(v, decode->immediate); + break; + + case X86_OP_MMX: + compute_mmx_offset(op); + goto load_vector; + + case X86_OP_SSE: + compute_xmm_offset(op); + load_vector: + if (op->has_ea) { + bool aligned = sse_needs_alignment(s, decode, op->ot); + gen_load_sse(s, v, op->ot, op->offset, aligned); + } + break; + + default: + g_assert_not_reached(); + } +} + +static TCGv_ptr op_ptr(X86DecodedInsn *decode, int opn) +{ + X86DecodedOp *op = &decode->op[opn]; + if (op->v_ptr) { + return op->v_ptr; + } + op->v_ptr = tcg_temp_new_ptr(); + + /* The temporary points to the MMXReg or ZMMReg. */ + tcg_gen_addi_ptr(op->v_ptr, tcg_env, vector_reg_offset(op)); + return op->v_ptr; +} + +#define OP_PTR0 op_ptr(decode, 0) +#define OP_PTR1 op_ptr(decode, 1) +#define OP_PTR2 op_ptr(decode, 2) + +static void gen_writeback(DisasContext *s, X86DecodedInsn *decode, int opn, TCGv v) +{ + X86DecodedOp *op = &decode->op[opn]; + switch (op->unit) { + case X86_OP_SKIP: + break; + case X86_OP_SEG: + /* Note that gen_movl_seg_T0 takes care of interrupt shadow and TF. */ + gen_movl_seg_T0(s, op->n); + break; + case X86_OP_INT: + if (op->has_ea) { + gen_op_st_v(s, op->ot, v, s->A0); + } else { + gen_op_mov_reg_v(s, op->ot, op->n, v); + } + break; + case X86_OP_MMX: + break; + case X86_OP_SSE: + if (!op->has_ea && (s->prefix & PREFIX_VEX) && op->ot <= MO_128) { + tcg_gen_gvec_dup_imm(MO_64, + offsetof(CPUX86State, xmm_regs[op->n].ZMM_X(1)), + 16, 16, 0); + } + break; + case X86_OP_CR: + case X86_OP_DR: + default: + g_assert_not_reached(); + } +} + +static inline int vector_len(DisasContext *s, X86DecodedInsn *decode) +{ + if (decode->e.special == X86_SPECIAL_MMX && + !(s->prefix & (PREFIX_DATA | PREFIX_REPZ | PREFIX_REPNZ))) { + return 8; + } + return s->vex_l ? 32 : 16; +} + +static void prepare_update1_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_dst = s->T0; + decode->cc_op = op; +} + +static void prepare_update2_cc(X86DecodedInsn *decode, DisasContext *s, CCOp op) +{ + decode->cc_src = s->T1; + decode->cc_dst = s->T0; + decode->cc_op = op; +} + +static void gen_store_sse(DisasContext *s, X86DecodedInsn *decode, int src_ofs) +{ + MemOp ot = decode->op[0].ot; + int vec_len = vector_len(s, decode); + bool aligned = sse_needs_alignment(s, decode, ot); + + if (!decode->op[0].has_ea) { + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, vec_len, vec_len); + return; + } + + switch (ot) { + case MO_64: + gen_stq_env_A0(s, src_ofs); + break; + case MO_128: + gen_sto_env_A0(s, src_ofs, aligned); + break; + case MO_256: + gen_sty_env_A0(s, src_ofs, aligned); + break; + default: + g_assert_not_reached(); + } +} + +static void gen_helper_pavgusb(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b) +{ + gen_helper_pavgb_mmx(env, reg_a, reg_a, reg_b); +} + +#define FN_3DNOW_MOVE ((SSEFunc_0_epp) (uintptr_t) 1) +static const SSEFunc_0_epp fns_3dnow[] = { + [0x0c] = gen_helper_pi2fw, + [0x0d] = gen_helper_pi2fd, + [0x1c] = gen_helper_pf2iw, + [0x1d] = gen_helper_pf2id, + [0x8a] = gen_helper_pfnacc, + [0x8e] = gen_helper_pfpnacc, + [0x90] = gen_helper_pfcmpge, + [0x94] = gen_helper_pfmin, + [0x96] = gen_helper_pfrcp, + [0x97] = gen_helper_pfrsqrt, + [0x9a] = gen_helper_pfsub, + [0x9e] = gen_helper_pfadd, + [0xa0] = gen_helper_pfcmpgt, + [0xa4] = gen_helper_pfmax, + [0xa6] = FN_3DNOW_MOVE, /* PFRCPIT1; no need to actually increase precision */ + [0xa7] = FN_3DNOW_MOVE, /* PFRSQIT1 */ + [0xb6] = FN_3DNOW_MOVE, /* PFRCPIT2 */ + [0xaa] = gen_helper_pfsubr, + [0xae] = gen_helper_pfacc, + [0xb0] = gen_helper_pfcmpeq, + [0xb4] = gen_helper_pfmul, + [0xb7] = gen_helper_pmulhrw_mmx, + [0xbb] = gen_helper_pswapd, + [0xbf] = gen_helper_pavgusb, +}; + +static void gen_3dnow(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + uint8_t b = decode->immediate; + SSEFunc_0_epp fn = b < ARRAY_SIZE(fns_3dnow) ? fns_3dnow[b] : NULL; + + if (!fn) { + gen_illegal_opcode(s); + return; + } + if (s->flags & HF_TS_MASK) { + gen_NM_exception(s); + return; + } + if (s->flags & HF_EM_MASK) { + gen_illegal_opcode(s); + return; + } + + gen_helper_enter_mmx(tcg_env); + if (fn == FN_3DNOW_MOVE) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset); + } else { + fn(tcg_env, OP_PTR0, OP_PTR1); + } +} + +/* + * 00 = v*ps Vps, Hps, Wpd + * 66 = v*pd Vpd, Hpd, Wps + * f3 = v*ss Vss, Hss, Wps + * f2 = v*sd Vsd, Hsd, Wps + */ +static inline void gen_unary_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_epp pd_xmm, SSEFunc_0_epp ps_xmm, + SSEFunc_0_epp pd_ymm, SSEFunc_0_epp ps_ymm, + SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) +{ + if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { + SSEFunc_0_eppp fn = s->prefix & PREFIX_REPZ ? ss : sd; + if (!fn) { + gen_illegal_opcode(s); + return; + } + fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + SSEFunc_0_epp ps, pd, fn; + ps = s->vex_l ? ps_ymm : ps_xmm; + pd = s->vex_l ? pd_ymm : pd_xmm; + fn = s->prefix & PREFIX_DATA ? pd : ps; + if (!fn) { + gen_illegal_opcode(s); + return; + } + fn(tcg_env, OP_PTR0, OP_PTR2); + } +} +#define UNARY_FP_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_unary_fp_sse(s, env, decode, \ + gen_helper_##lname##pd_xmm, \ + gen_helper_##lname##ps_xmm, \ + gen_helper_##lname##pd_ymm, \ + gen_helper_##lname##ps_ymm, \ + gen_helper_##lname##sd, \ + gen_helper_##lname##ss); \ +} +UNARY_FP_SSE(VSQRT, sqrt) + +/* + * 00 = v*ps Vps, Hps, Wpd + * 66 = v*pd Vpd, Hpd, Wps + * f3 = v*ss Vss, Hss, Wps + * f2 = v*sd Vsd, Hsd, Wps + */ +static inline void gen_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, + SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm, + SSEFunc_0_eppp sd, SSEFunc_0_eppp ss) +{ + SSEFunc_0_eppp ps, pd, fn; + if ((s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) != 0) { + fn = s->prefix & PREFIX_REPZ ? ss : sd; + } else { + ps = s->vex_l ? ps_ymm : ps_xmm; + pd = s->vex_l ? pd_ymm : pd_xmm; + fn = s->prefix & PREFIX_DATA ? pd : ps; + } + if (fn) { + fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + gen_illegal_opcode(s); + } +} + +#define FP_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_fp_sse(s, env, decode, \ + gen_helper_##lname##pd_xmm, \ + gen_helper_##lname##ps_xmm, \ + gen_helper_##lname##pd_ymm, \ + gen_helper_##lname##ps_ymm, \ + gen_helper_##lname##sd, \ + gen_helper_##lname##ss); \ +} +FP_SSE(VADD, add) +FP_SSE(VMUL, mul) +FP_SSE(VSUB, sub) +FP_SSE(VMIN, min) +FP_SSE(VDIV, div) +FP_SSE(VMAX, max) + +#define FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, even, odd) \ +static void gen_##uname##Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + SSEFunc_0_eppppii xmm = s->vex_w ? gen_helper_fma4pd_xmm : gen_helper_fma4ps_xmm; \ + SSEFunc_0_eppppii ymm = s->vex_w ? gen_helper_fma4pd_ymm : gen_helper_fma4ps_ymm; \ + SSEFunc_0_eppppii fn = s->vex_l ? ymm : xmm; \ + \ + fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ + tcg_constant_i32(even), \ + tcg_constant_i32((even) ^ (odd))); \ +} + +#define FMA_SSE(uname, ptr0, ptr1, ptr2, flags) \ +FMA_SSE_PACKED(uname, ptr0, ptr1, ptr2, flags, flags) \ +static void gen_##uname##Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + SSEFunc_0_eppppi fn = s->vex_w ? gen_helper_fma4sd : gen_helper_fma4ss; \ + \ + fn(tcg_env, OP_PTR0, ptr0, ptr1, ptr2, \ + tcg_constant_i32(flags)); \ +} \ + +FMA_SSE(VFMADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0) +FMA_SSE(VFMADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0) +FMA_SSE(VFMADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0) + +FMA_SSE(VFNMADD231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_product) +FMA_SSE(VFNMADD213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_product) +FMA_SSE(VFNMADD132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_product) + +FMA_SSE(VFMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c) +FMA_SSE(VFMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c) +FMA_SSE(VFMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c) + +FMA_SSE(VFNMSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c|float_muladd_negate_product) +FMA_SSE(VFNMSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c|float_muladd_negate_product) +FMA_SSE(VFNMSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c|float_muladd_negate_product) + +FMA_SSE_PACKED(VFMADDSUB231, OP_PTR1, OP_PTR2, OP_PTR0, float_muladd_negate_c, 0) +FMA_SSE_PACKED(VFMADDSUB213, OP_PTR1, OP_PTR0, OP_PTR2, float_muladd_negate_c, 0) +FMA_SSE_PACKED(VFMADDSUB132, OP_PTR0, OP_PTR2, OP_PTR1, float_muladd_negate_c, 0) + +FMA_SSE_PACKED(VFMSUBADD231, OP_PTR1, OP_PTR2, OP_PTR0, 0, float_muladd_negate_c) +FMA_SSE_PACKED(VFMSUBADD213, OP_PTR1, OP_PTR0, OP_PTR2, 0, float_muladd_negate_c) +FMA_SSE_PACKED(VFMSUBADD132, OP_PTR0, OP_PTR2, OP_PTR1, 0, float_muladd_negate_c) + +#define FP_UNPACK_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + /* PS maps to the DQ integer instruction, PD maps to QDQ. */ \ + gen_fp_sse(s, env, decode, \ + gen_helper_##lname##qdq_xmm, \ + gen_helper_##lname##dq_xmm, \ + gen_helper_##lname##qdq_ymm, \ + gen_helper_##lname##dq_ymm, \ + NULL, NULL); \ +} +FP_UNPACK_SSE(VUNPCKLPx, punpckl) +FP_UNPACK_SSE(VUNPCKHPx, punpckh) + +/* + * 00 = v*ps Vps, Wpd + * f3 = v*ss Vss, Wps + */ +static inline void gen_unary_fp32_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_epp ps_xmm, + SSEFunc_0_epp ps_ymm, + SSEFunc_0_eppp ss) +{ + if ((s->prefix & (PREFIX_DATA | PREFIX_REPNZ)) != 0) { + goto illegal_op; + } else if (s->prefix & PREFIX_REPZ) { + if (!ss) { + goto illegal_op; + } + ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + SSEFunc_0_epp fn = s->vex_l ? ps_ymm : ps_xmm; + if (!fn) { + goto illegal_op; + } + fn(tcg_env, OP_PTR0, OP_PTR2); + } + return; + +illegal_op: + gen_illegal_opcode(s); +} +#define UNARY_FP32_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_unary_fp32_sse(s, env, decode, \ + gen_helper_##lname##ps_xmm, \ + gen_helper_##lname##ps_ymm, \ + gen_helper_##lname##ss); \ +} +UNARY_FP32_SSE(VRSQRT, rsqrt) +UNARY_FP32_SSE(VRCP, rcp) + +/* + * 66 = v*pd Vpd, Hpd, Wpd + * f2 = v*ps Vps, Hps, Wps + */ +static inline void gen_horizontal_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppp pd_xmm, SSEFunc_0_eppp ps_xmm, + SSEFunc_0_eppp pd_ymm, SSEFunc_0_eppp ps_ymm) +{ + SSEFunc_0_eppp ps, pd, fn; + ps = s->vex_l ? ps_ymm : ps_xmm; + pd = s->vex_l ? pd_ymm : pd_xmm; + fn = s->prefix & PREFIX_DATA ? pd : ps; + fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); +} +#define HORIZONTAL_FP_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_horizontal_fp_sse(s, env, decode, \ + gen_helper_##lname##pd_xmm, gen_helper_##lname##ps_xmm, \ + gen_helper_##lname##pd_ymm, gen_helper_##lname##ps_ymm); \ +} +HORIZONTAL_FP_SSE(VHADD, hadd) +HORIZONTAL_FP_SSE(VHSUB, hsub) +HORIZONTAL_FP_SSE(VADDSUB, addsub) + +static inline void gen_ternary_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + int op3, SSEFunc_0_epppp xmm, SSEFunc_0_epppp ymm) +{ + SSEFunc_0_epppp fn = s->vex_l ? ymm : xmm; + TCGv_ptr ptr3 = tcg_temp_new_ptr(); + + /* The format of the fourth input is Lx */ + tcg_gen_addi_ptr(ptr3, tcg_env, ZMM_OFFSET(op3)); + fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, ptr3); +} +#define TERNARY_SSE(uname, uvname, lname) \ +static void gen_##uvname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_ternary_sse(s, env, decode, (uint8_t)decode->immediate >> 4, \ + gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ +} \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_ternary_sse(s, env, decode, 0, \ + gen_helper_##lname##_xmm, gen_helper_##lname##_ymm); \ +} +TERNARY_SSE(BLENDVPS, VBLENDVPS, blendvps) +TERNARY_SSE(BLENDVPD, VBLENDVPD, blendvpd) +TERNARY_SSE(PBLENDVB, VPBLENDVB, pblendvb) + +static inline void gen_binary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_epppi xmm, SSEFunc_0_epppi ymm) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else { + ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } +} + +#define BINARY_IMM_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_binary_imm_sse(s, env, decode, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} + +BINARY_IMM_SSE(VBLENDPD, blendpd) +BINARY_IMM_SSE(VBLENDPS, blendps) +BINARY_IMM_SSE(VPBLENDW, pblendw) +BINARY_IMM_SSE(VDDPS, dpps) +#define gen_helper_dppd_ymm NULL +BINARY_IMM_SSE(VDDPD, dppd) +BINARY_IMM_SSE(VMPSADBW, mpsadbw) +BINARY_IMM_SSE(PCLMULQDQ, pclmulqdq) + + +#define UNARY_INT_GVEC(uname, func, ...) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + int vec_len = vector_len(s, decode); \ + \ + func(__VA_ARGS__, decode->op[0].offset, \ + decode->op[2].offset, vec_len, vec_len); \ +} +UNARY_INT_GVEC(PABSB, tcg_gen_gvec_abs, MO_8) +UNARY_INT_GVEC(PABSW, tcg_gen_gvec_abs, MO_16) +UNARY_INT_GVEC(PABSD, tcg_gen_gvec_abs, MO_32) +UNARY_INT_GVEC(VBROADCASTx128, tcg_gen_gvec_dup_mem, MO_128) +UNARY_INT_GVEC(VPBROADCASTB, tcg_gen_gvec_dup_mem, MO_8) +UNARY_INT_GVEC(VPBROADCASTW, tcg_gen_gvec_dup_mem, MO_16) +UNARY_INT_GVEC(VPBROADCASTD, tcg_gen_gvec_dup_mem, MO_32) +UNARY_INT_GVEC(VPBROADCASTQ, tcg_gen_gvec_dup_mem, MO_64) + + +#define BINARY_INT_GVEC(uname, func, ...) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + int vec_len = vector_len(s, decode); \ + \ + func(__VA_ARGS__, \ + decode->op[0].offset, decode->op[1].offset, \ + decode->op[2].offset, vec_len, vec_len); \ +} + +BINARY_INT_GVEC(PADDB, tcg_gen_gvec_add, MO_8) +BINARY_INT_GVEC(PADDW, tcg_gen_gvec_add, MO_16) +BINARY_INT_GVEC(PADDD, tcg_gen_gvec_add, MO_32) +BINARY_INT_GVEC(PADDQ, tcg_gen_gvec_add, MO_64) +BINARY_INT_GVEC(PADDSB, tcg_gen_gvec_ssadd, MO_8) +BINARY_INT_GVEC(PADDSW, tcg_gen_gvec_ssadd, MO_16) +BINARY_INT_GVEC(PADDUSB, tcg_gen_gvec_usadd, MO_8) +BINARY_INT_GVEC(PADDUSW, tcg_gen_gvec_usadd, MO_16) +BINARY_INT_GVEC(PAND, tcg_gen_gvec_and, MO_64) +BINARY_INT_GVEC(PCMPEQB, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_8) +BINARY_INT_GVEC(PCMPEQD, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_32) +BINARY_INT_GVEC(PCMPEQW, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_16) +BINARY_INT_GVEC(PCMPEQQ, tcg_gen_gvec_cmp, TCG_COND_EQ, MO_64) +BINARY_INT_GVEC(PCMPGTB, tcg_gen_gvec_cmp, TCG_COND_GT, MO_8) +BINARY_INT_GVEC(PCMPGTW, tcg_gen_gvec_cmp, TCG_COND_GT, MO_16) +BINARY_INT_GVEC(PCMPGTD, tcg_gen_gvec_cmp, TCG_COND_GT, MO_32) +BINARY_INT_GVEC(PCMPGTQ, tcg_gen_gvec_cmp, TCG_COND_GT, MO_64) +BINARY_INT_GVEC(PMAXSB, tcg_gen_gvec_smax, MO_8) +BINARY_INT_GVEC(PMAXSW, tcg_gen_gvec_smax, MO_16) +BINARY_INT_GVEC(PMAXSD, tcg_gen_gvec_smax, MO_32) +BINARY_INT_GVEC(PMAXUB, tcg_gen_gvec_umax, MO_8) +BINARY_INT_GVEC(PMAXUW, tcg_gen_gvec_umax, MO_16) +BINARY_INT_GVEC(PMAXUD, tcg_gen_gvec_umax, MO_32) +BINARY_INT_GVEC(PMINSB, tcg_gen_gvec_smin, MO_8) +BINARY_INT_GVEC(PMINSW, tcg_gen_gvec_smin, MO_16) +BINARY_INT_GVEC(PMINSD, tcg_gen_gvec_smin, MO_32) +BINARY_INT_GVEC(PMINUB, tcg_gen_gvec_umin, MO_8) +BINARY_INT_GVEC(PMINUW, tcg_gen_gvec_umin, MO_16) +BINARY_INT_GVEC(PMINUD, tcg_gen_gvec_umin, MO_32) +BINARY_INT_GVEC(PMULLW, tcg_gen_gvec_mul, MO_16) +BINARY_INT_GVEC(PMULLD, tcg_gen_gvec_mul, MO_32) +BINARY_INT_GVEC(POR, tcg_gen_gvec_or, MO_64) +BINARY_INT_GVEC(PSUBB, tcg_gen_gvec_sub, MO_8) +BINARY_INT_GVEC(PSUBW, tcg_gen_gvec_sub, MO_16) +BINARY_INT_GVEC(PSUBD, tcg_gen_gvec_sub, MO_32) +BINARY_INT_GVEC(PSUBQ, tcg_gen_gvec_sub, MO_64) +BINARY_INT_GVEC(PSUBSB, tcg_gen_gvec_sssub, MO_8) +BINARY_INT_GVEC(PSUBSW, tcg_gen_gvec_sssub, MO_16) +BINARY_INT_GVEC(PSUBUSB, tcg_gen_gvec_ussub, MO_8) +BINARY_INT_GVEC(PSUBUSW, tcg_gen_gvec_ussub, MO_16) +BINARY_INT_GVEC(PXOR, tcg_gen_gvec_xor, MO_64) + + +/* + * 00 = p* Pq, Qq (if mmx not NULL; no VEX) + * 66 = vp* Vx, Hx, Wx + * + * These are really the same encoding, because 1) V is the same as P when VEX.V + * is not present 2) P and Q are the same as H and W apart from MM/XMM + */ +static inline void gen_binary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppp mmx, SSEFunc_0_eppp xmm, SSEFunc_0_eppp ymm) +{ + assert(!!mmx == !!(decode->e.special == X86_SPECIAL_MMX)); + + if (mmx && (s->prefix & PREFIX_VEX) && !(s->prefix & PREFIX_DATA)) { + /* VEX encoding is not applicable to MMX instructions. */ + gen_illegal_opcode(s); + return; + } + if (!(s->prefix & PREFIX_DATA)) { + mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else if (!s->vex_l) { + xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } else { + ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); + } +} + + +#define BINARY_INT_MMX(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_binary_int_sse(s, env, decode, \ + gen_helper_##lname##_mmx, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} +BINARY_INT_MMX(PUNPCKLBW, punpcklbw) +BINARY_INT_MMX(PUNPCKLWD, punpcklwd) +BINARY_INT_MMX(PUNPCKLDQ, punpckldq) +BINARY_INT_MMX(PACKSSWB, packsswb) +BINARY_INT_MMX(PACKUSWB, packuswb) +BINARY_INT_MMX(PUNPCKHBW, punpckhbw) +BINARY_INT_MMX(PUNPCKHWD, punpckhwd) +BINARY_INT_MMX(PUNPCKHDQ, punpckhdq) +BINARY_INT_MMX(PACKSSDW, packssdw) + +BINARY_INT_MMX(PAVGB, pavgb) +BINARY_INT_MMX(PAVGW, pavgw) +BINARY_INT_MMX(PMADDWD, pmaddwd) +BINARY_INT_MMX(PMULHUW, pmulhuw) +BINARY_INT_MMX(PMULHW, pmulhw) +BINARY_INT_MMX(PMULUDQ, pmuludq) +BINARY_INT_MMX(PSADBW, psadbw) + +BINARY_INT_MMX(PSLLW_r, psllw) +BINARY_INT_MMX(PSLLD_r, pslld) +BINARY_INT_MMX(PSLLQ_r, psllq) +BINARY_INT_MMX(PSRLW_r, psrlw) +BINARY_INT_MMX(PSRLD_r, psrld) +BINARY_INT_MMX(PSRLQ_r, psrlq) +BINARY_INT_MMX(PSRAW_r, psraw) +BINARY_INT_MMX(PSRAD_r, psrad) + +BINARY_INT_MMX(PHADDW, phaddw) +BINARY_INT_MMX(PHADDSW, phaddsw) +BINARY_INT_MMX(PHADDD, phaddd) +BINARY_INT_MMX(PHSUBW, phsubw) +BINARY_INT_MMX(PHSUBSW, phsubsw) +BINARY_INT_MMX(PHSUBD, phsubd) +BINARY_INT_MMX(PMADDUBSW, pmaddubsw) +BINARY_INT_MMX(PSHUFB, pshufb) +BINARY_INT_MMX(PSIGNB, psignb) +BINARY_INT_MMX(PSIGNW, psignw) +BINARY_INT_MMX(PSIGND, psignd) +BINARY_INT_MMX(PMULHRSW, pmulhrsw) + +/* Instructions with no MMX equivalent. */ +#define BINARY_INT_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_binary_int_sse(s, env, decode, \ + NULL, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} + +/* Instructions with no MMX equivalent. */ +BINARY_INT_SSE(PUNPCKLQDQ, punpcklqdq) +BINARY_INT_SSE(PUNPCKHQDQ, punpckhqdq) +BINARY_INT_SSE(VPACKUSDW, packusdw) +BINARY_INT_SSE(VPERMILPS, vpermilps) +BINARY_INT_SSE(VPERMILPD, vpermilpd) +BINARY_INT_SSE(VMASKMOVPS, vpmaskmovd) +BINARY_INT_SSE(VMASKMOVPD, vpmaskmovq) + +BINARY_INT_SSE(PMULDQ, pmuldq) + +BINARY_INT_SSE(VAESDEC, aesdec) +BINARY_INT_SSE(VAESDECLAST, aesdeclast) +BINARY_INT_SSE(VAESENC, aesenc) +BINARY_INT_SSE(VAESENCLAST, aesenclast) + +#define UNARY_CMP_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + if (!s->vex_l) { \ + gen_helper_##lname##_xmm(tcg_env, OP_PTR1, OP_PTR2); \ + } else { \ + gen_helper_##lname##_ymm(tcg_env, OP_PTR1, OP_PTR2); \ + } \ + set_cc_op(s, CC_OP_EFLAGS); \ +} +UNARY_CMP_SSE(VPTEST, ptest) +UNARY_CMP_SSE(VTESTPS, vtestps) +UNARY_CMP_SSE(VTESTPD, vtestpd) + +static inline void gen_unary_int_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_epp xmm, SSEFunc_0_epp ymm) +{ + if (!s->vex_l) { + xmm(tcg_env, OP_PTR0, OP_PTR2); + } else { + ymm(tcg_env, OP_PTR0, OP_PTR2); + } +} + +#define UNARY_INT_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_unary_int_sse(s, env, decode, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} + +UNARY_INT_SSE(VPMOVSXBW, pmovsxbw) +UNARY_INT_SSE(VPMOVSXBD, pmovsxbd) +UNARY_INT_SSE(VPMOVSXBQ, pmovsxbq) +UNARY_INT_SSE(VPMOVSXWD, pmovsxwd) +UNARY_INT_SSE(VPMOVSXWQ, pmovsxwq) +UNARY_INT_SSE(VPMOVSXDQ, pmovsxdq) + +UNARY_INT_SSE(VPMOVZXBW, pmovzxbw) +UNARY_INT_SSE(VPMOVZXBD, pmovzxbd) +UNARY_INT_SSE(VPMOVZXBQ, pmovzxbq) +UNARY_INT_SSE(VPMOVZXWD, pmovzxwd) +UNARY_INT_SSE(VPMOVZXWQ, pmovzxwq) +UNARY_INT_SSE(VPMOVZXDQ, pmovzxdq) + +UNARY_INT_SSE(VMOVSLDUP, pmovsldup) +UNARY_INT_SSE(VMOVSHDUP, pmovshdup) +UNARY_INT_SSE(VMOVDDUP, pmovdldup) + +UNARY_INT_SSE(VCVTDQ2PD, cvtdq2pd) +UNARY_INT_SSE(VCVTPD2DQ, cvtpd2dq) +UNARY_INT_SSE(VCVTTPD2DQ, cvttpd2dq) +UNARY_INT_SSE(VCVTDQ2PS, cvtdq2ps) +UNARY_INT_SSE(VCVTPS2DQ, cvtps2dq) +UNARY_INT_SSE(VCVTTPS2DQ, cvttps2dq) +UNARY_INT_SSE(VCVTPH2PS, cvtph2ps) + + +static inline void gen_unary_imm_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_ppi xmm, SSEFunc_0_ppi ymm) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(OP_PTR0, OP_PTR1, imm); + } else { + ymm(OP_PTR0, OP_PTR1, imm); + } +} + +#define UNARY_IMM_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_unary_imm_sse(s, env, decode, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} + +UNARY_IMM_SSE(PSHUFD, pshufd) +UNARY_IMM_SSE(PSHUFHW, pshufhw) +UNARY_IMM_SSE(PSHUFLW, pshuflw) +#define gen_helper_vpermq_xmm NULL +UNARY_IMM_SSE(VPERMQ, vpermq) +UNARY_IMM_SSE(VPERMILPS_i, vpermilps_imm) +UNARY_IMM_SSE(VPERMILPD_i, vpermilpd_imm) + +static inline void gen_unary_imm_fp_sse(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppi xmm, SSEFunc_0_eppi ymm) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + if (!s->vex_l) { + xmm(tcg_env, OP_PTR0, OP_PTR1, imm); + } else { + ymm(tcg_env, OP_PTR0, OP_PTR1, imm); + } +} + +#define UNARY_IMM_FP_SSE(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_unary_imm_fp_sse(s, env, decode, \ + gen_helper_##lname##_xmm, \ + gen_helper_##lname##_ymm); \ +} + +UNARY_IMM_FP_SSE(VROUNDPS, roundps) +UNARY_IMM_FP_SSE(VROUNDPD, roundpd) + +static inline void gen_vexw_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppp d_xmm, SSEFunc_0_eppp q_xmm, + SSEFunc_0_eppp d_ymm, SSEFunc_0_eppp q_ymm) +{ + SSEFunc_0_eppp d = s->vex_l ? d_ymm : d_xmm; + SSEFunc_0_eppp q = s->vex_l ? q_ymm : q_xmm; + SSEFunc_0_eppp fn = s->vex_w ? q : d; + fn(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +/* VEX.W affects whether to operate on 32- or 64-bit elements. */ +#define VEXW_AVX(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_vexw_avx(s, env, decode, \ + gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ + gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ +} +VEXW_AVX(VPSLLV, vpsllv) +VEXW_AVX(VPSRLV, vpsrlv) +VEXW_AVX(VPSRAV, vpsrav) +VEXW_AVX(VPMASKMOV, vpmaskmov) + +/* Same as above, but with extra arguments to the helper. */ +static inline void gen_vsib_avx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_epppti d_xmm, SSEFunc_0_epppti q_xmm, + SSEFunc_0_epppti d_ymm, SSEFunc_0_epppti q_ymm) +{ + SSEFunc_0_epppti d = s->vex_l ? d_ymm : d_xmm; + SSEFunc_0_epppti q = s->vex_l ? q_ymm : q_xmm; + SSEFunc_0_epppti fn = s->vex_w ? q : d; + TCGv_i32 scale = tcg_constant_i32(decode->mem.scale); + TCGv_ptr index = tcg_temp_new_ptr(); + + /* Pass third input as (index, base, scale) */ + tcg_gen_addi_ptr(index, tcg_env, ZMM_OFFSET(decode->mem.index)); + fn(tcg_env, OP_PTR0, OP_PTR1, index, s->A0, scale); + + /* + * There are two output operands, so zero OP1's high 128 bits + * in the VEX.128 case. + */ + if (!s->vex_l) { + int ymmh_ofs = vector_elem_offset(&decode->op[1], MO_128, 1); + tcg_gen_gvec_dup_imm(MO_64, ymmh_ofs, 16, 16, 0); + } +} +#define VSIB_AVX(uname, lname) \ +static void gen_##uname(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) \ +{ \ + gen_vsib_avx(s, env, decode, \ + gen_helper_##lname##d_xmm, gen_helper_##lname##q_xmm, \ + gen_helper_##lname##d_ymm, gen_helper_##lname##q_ymm); \ +} +VSIB_AVX(VPGATHERD, vpgatherd) +VSIB_AVX(VPGATHERQ, vpgatherq) + +/* ADCX/ADOX do not have memory operands and can use set_cc_op. */ +static void gen_ADCOX(DisasContext *s, CPUX86State *env, MemOp ot, int cc_op) +{ + int opposite_cc_op; + TCGv carry_in = NULL; + TCGv carry_out = (cc_op == CC_OP_ADCX ? cpu_cc_dst : cpu_cc_src2); + TCGv zero; + + if (cc_op == s->cc_op || s->cc_op == CC_OP_ADCOX) { + /* Re-use the carry-out from a previous round. */ + carry_in = carry_out; + } else { + /* We don't have a carry-in, get it out of EFLAGS. */ + if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { + gen_compute_eflags(s); + } + carry_in = s->tmp0; + tcg_gen_extract_tl(carry_in, cpu_cc_src, + ctz32(cc_op == CC_OP_ADCX ? CC_C : CC_O), 1); + } + + switch (ot) { +#ifdef TARGET_X86_64 + case MO_32: + /* If TL is 64-bit just do everything in 64-bit arithmetic. */ + tcg_gen_ext32u_tl(s->T0, s->T0); + tcg_gen_ext32u_tl(s->T1, s->T1); + tcg_gen_add_i64(s->T0, s->T0, s->T1); + tcg_gen_add_i64(s->T0, s->T0, carry_in); + tcg_gen_shri_i64(carry_out, s->T0, 32); + break; +#endif + default: + zero = tcg_constant_tl(0); + tcg_gen_add2_tl(s->T0, carry_out, s->T0, zero, carry_in, zero); + tcg_gen_add2_tl(s->T0, carry_out, s->T0, carry_out, s->T1, zero); + break; + } + + opposite_cc_op = cc_op == CC_OP_ADCX ? CC_OP_ADOX : CC_OP_ADCX; + if (s->cc_op == CC_OP_ADCOX || s->cc_op == opposite_cc_op) { + /* Merge with the carry-out from the opposite instruction. */ + set_cc_op(s, CC_OP_ADCOX); + } else { + set_cc_op(s, cc_op); + } +} + +static void gen_ADCX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADCX); +} + +static void gen_ADOX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_ADCOX(s, env, decode->op[0].ot, CC_OP_ADOX); +} + +static void gen_ANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + tcg_gen_andc_tl(s->T0, s->T1, s->T0); + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); +} + +static void gen_BEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); + TCGv zero = tcg_constant_tl(0); + TCGv mone = tcg_constant_tl(-1); + + /* + * Extract START, and shift the operand. + * Shifts larger than operand size get zeros. + */ + tcg_gen_ext8u_tl(s->A0, s->T1); + tcg_gen_shr_tl(s->T0, s->T0, s->A0); + + tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, s->T0, zero); + + /* + * Extract the LEN into an inverse mask. Lengths larger than + * operand size get all zeros, length 0 gets all ones. + */ + tcg_gen_extract_tl(s->A0, s->T1, 8, 8); + tcg_gen_shl_tl(s->T1, mone, s->A0); + tcg_gen_movcond_tl(TCG_COND_LEU, s->T1, s->A0, bound, s->T1, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->T1); + + prepare_update1_cc(decode, s, CC_OP_LOGICB + ot); +} + +/* BLSI do not have memory operands and can use set_cc_op. */ +static void gen_BLSI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_neg_tl(s->T1, s->T0); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +/* BLSMSK do not have memory operands and can use set_cc_op. */ +static void gen_BLSMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_xor_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +/* BLSR do not have memory operands and can use set_cc_op. */ +static void gen_BLSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + tcg_gen_mov_tl(cpu_cc_src, s->T0); + tcg_gen_subi_tl(s->T1, s->T0, 1); + tcg_gen_and_tl(s->T0, s->T0, s->T1); + tcg_gen_mov_tl(cpu_cc_dst, s->T0); + set_cc_op(s, CC_OP_BMILGB + ot); +} + +static void gen_BZHI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + TCGv bound = tcg_constant_tl(ot == MO_64 ? 63 : 31); + TCGv zero = tcg_constant_tl(0); + TCGv mone = tcg_constant_tl(-1); + + tcg_gen_ext8u_tl(s->T1, s->T1); + + tcg_gen_shl_tl(s->A0, mone, s->T1); + tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->T1, bound, s->A0, zero); + tcg_gen_andc_tl(s->T0, s->T0, s->A0); + /* + * Note that since we're using BMILG (in order to get O + * cleared) we need to store the inverse into C. + */ + tcg_gen_setcond_tl(TCG_COND_LEU, s->T1, s->T1, bound); + prepare_update2_cc(decode, s, CC_OP_BMILGB + ot); +} + +static void gen_CMPccXADD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGLabel *label_top = gen_new_label(); + TCGLabel *label_bottom = gen_new_label(); + TCGv oldv = tcg_temp_new(); + TCGv newv = tcg_temp_new(); + TCGv cmpv = tcg_temp_new(); + TCGCond cond; + + TCGv cmp_lhs, cmp_rhs; + MemOp ot, ot_full; + + int jcc_op = (decode->b >> 1) & 7; + static const TCGCond cond_table[8] = { + [JCC_O] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_B] = TCG_COND_LTU, + [JCC_Z] = TCG_COND_EQ, + [JCC_BE] = TCG_COND_LEU, + [JCC_S] = TCG_COND_LT, /* test sign bit by comparing against 0 */ + [JCC_P] = TCG_COND_EQ, /* even parity - tests low bit of popcount */ + [JCC_L] = TCG_COND_LT, + [JCC_LE] = TCG_COND_LE, + }; + + cond = cond_table[jcc_op]; + if (decode->b & 1) { + cond = tcg_invert_cond(cond); + } + + ot = decode->op[0].ot; + ot_full = ot | MO_LE; + if (jcc_op >= JCC_S) { + /* + * Sign-extend values before subtracting for S, P (zero/sign extension + * does not matter there) L, LE and their inverses. + */ + ot_full |= MO_SIGN; + } + + /* + * cmpv will be moved to cc_src *after* cpu_regs[] is written back, so use + * tcg_gen_ext_tl instead of gen_ext_tl. + */ + tcg_gen_ext_tl(cmpv, cpu_regs[decode->op[1].n], ot_full); + + /* + * Cmpxchg loop starts here. + * - s->T1: addition operand (from decoder) + * - s->A0: dest address (from decoder) + * - s->cc_srcT: memory operand (lhs for comparison) + * - cmpv: rhs for comparison + */ + gen_set_label(label_top); + gen_op_ld_v(s, ot_full, s->cc_srcT, s->A0); + tcg_gen_sub_tl(s->T0, s->cc_srcT, cmpv); + + /* Compute the comparison result by hand, to avoid clobbering cc_*. */ + switch (jcc_op) { + case JCC_O: + /* (src1 ^ src2) & (src1 ^ dst). newv is only used here for a moment */ + tcg_gen_xor_tl(newv, s->cc_srcT, s->T0); + tcg_gen_xor_tl(s->tmp0, s->cc_srcT, cmpv); + tcg_gen_and_tl(s->tmp0, s->tmp0, newv); + tcg_gen_sextract_tl(s->tmp0, s->tmp0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_P: + tcg_gen_ext8u_tl(s->tmp0, s->T0); + tcg_gen_ctpop_tl(s->tmp0, s->tmp0); + tcg_gen_andi_tl(s->tmp0, s->tmp0, 1); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + case JCC_S: + tcg_gen_sextract_tl(s->tmp0, s->T0, 0, 8 << ot); + cmp_lhs = s->tmp0, cmp_rhs = tcg_constant_tl(0); + break; + + default: + cmp_lhs = s->cc_srcT, cmp_rhs = cmpv; + break; + } + + /* Compute new value: if condition does not hold, just store back s->cc_srcT */ + tcg_gen_add_tl(newv, s->cc_srcT, s->T1); + tcg_gen_movcond_tl(cond, newv, cmp_lhs, cmp_rhs, newv, s->cc_srcT); + tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, s->cc_srcT, newv, s->mem_index, ot_full); + + /* Exit unconditionally if cmpxchg succeeded. */ + tcg_gen_brcond_tl(TCG_COND_EQ, oldv, s->cc_srcT, label_bottom); + + /* Try again if there was actually a store to make. */ + tcg_gen_brcond_tl(cond, cmp_lhs, cmp_rhs, label_top); + gen_set_label(label_bottom); + + /* Store old value to registers only after a successful store. */ + gen_writeback(s, decode, 1, s->cc_srcT); + + decode->cc_dst = s->T0; + decode->cc_src = cmpv; + decode->cc_op = CC_OP_SUBB + ot; +} + +static void gen_CRC32(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + gen_helper_crc32(s->T0, s->tmp2_i32, s->T1, tcg_constant_i32(8 << ot)); +} + +static void gen_CVTPI2Px(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_enter_mmx(tcg_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvtpi2pd(tcg_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtpi2ps(tcg_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_CVTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_enter_mmx(tcg_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvtpd2pi(tcg_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtps2pi(tcg_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_CVTTPx2PI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_enter_mmx(tcg_env); + if (s->prefix & PREFIX_DATA) { + gen_helper_cvttpd2pi(tcg_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvttps2pi(tcg_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_EMMS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_emms(tcg_env); +} + +static void gen_EXTRQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); + TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); + + gen_helper_extrq_i(tcg_env, OP_PTR0, index, length); +} + +static void gen_EXTRQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_extrq_r(tcg_env, OP_PTR0, OP_PTR2); +} + +static void gen_INSERTQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 length = tcg_constant_i32(decode->immediate & 63); + TCGv_i32 index = tcg_constant_i32((decode->immediate >> 8) & 63); + + gen_helper_insertq_i(tcg_env, OP_PTR0, OP_PTR1, index, length); +} + +static void gen_INSERTQ_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_insertq_r(tcg_env, OP_PTR0, OP_PTR2); +} + +static void gen_LDMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T1); + gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); +} + +static void gen_MASKMOV(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_DS, s->override); + + if (s->prefix & PREFIX_DATA) { + gen_helper_maskmov_xmm(tcg_env, OP_PTR1, OP_PTR2, s->A0); + } else { + gen_helper_maskmov_mmx(tcg_env, OP_PTR1, OP_PTR2, s->A0); + } +} + +static void gen_MOVBE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* M operand type does not load/store */ + if (decode->e.op0 == X86_TYPE_M) { + tcg_gen_qemu_st_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); + } else { + tcg_gen_qemu_ld_tl(s->T0, s->A0, s->mem_index, ot | MO_BE); + } +} + +static void gen_MOVD_from(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_ld32u_tl(s->T0, tcg_env, decode->op[2].offset); + break; + case MO_64: +#endif + tcg_gen_ld_tl(s->T0, tcg_env, decode->op[2].offset); + break; + default: + abort(); + } +} + +static void gen_MOVD_to(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[2].ot; + int vec_len = vector_len(s, decode); + int lo_ofs = vector_elem_offset(&decode->op[0], ot, 0); + + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_st32_tl(s->T1, tcg_env, lo_ofs); + break; + case MO_64: +#endif + tcg_gen_st_tl(s->T1, tcg_env, lo_ofs); + break; + default: + g_assert_not_reached(); + } +} + +static void gen_MOVDQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_store_sse(s, decode, decode->op[2].offset); +} + +static void gen_MOVMSK(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + typeof(gen_helper_movmskps_ymm) *ps, *pd, *fn; + ps = s->vex_l ? gen_helper_movmskps_ymm : gen_helper_movmskps_xmm; + pd = s->vex_l ? gen_helper_movmskpd_ymm : gen_helper_movmskpd_xmm; + fn = s->prefix & PREFIX_DATA ? pd : ps; + fn(s->tmp2_i32, tcg_env, OP_PTR2); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); +} + +static void gen_MOVQ(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + int lo_ofs = vector_elem_offset(&decode->op[0], MO_64, 0); + + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); + if (decode->op[0].has_ea) { + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + } else { + /* + * tcg_gen_gvec_dup_i64(MO_64, op0.offset, 8, vec_len, s->tmp1_64) would + * seem to work, but it does not on big-endian platforms; the cleared parts + * are always at higher addresses, but cross-endian emulation inverts the + * byte order so that the cleared parts need to be at *lower* addresses. + * Because oprsz is 8, we see this here even for SSE; but more in general, + * it disqualifies using oprsz < maxsz to emulate VEX128. + */ + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, lo_ofs); + } +} + +static void gen_MOVq_dq(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_enter_mmx(tcg_env); + /* Otherwise the same as any other movq. */ + return gen_MOVQ(s, env, decode); +} + +static void gen_MULX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + + /* low part of result in VEX.vvvv, high in MODRM */ + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, + s->tmp2_i32, s->tmp3_i32); + tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); + tcg_gen_extu_i32_tl(s->T0, s->tmp3_i32); + break; + + case MO_64: +#endif + tcg_gen_mulu2_tl(cpu_regs[s->vex_v], s->T0, s->T0, s->T1); + break; + + default: + g_assert_not_reached(); + } +} + +static void gen_PALIGNR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + if (!(s->prefix & PREFIX_DATA)) { + gen_helper_palignr_mmx(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else if (!s->vex_l) { + gen_helper_palignr_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } else { + gen_helper_palignr_ymm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); + } +} + +static void gen_PANDN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + /* Careful, operand order is reversed! */ + tcg_gen_gvec_andc(MO_64, + decode->op[0].offset, decode->op[2].offset, + decode->op[1].offset, vec_len, vec_len); +} + +static void gen_PCMPESTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpestri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_PCMPESTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpestrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); + if ((s->prefix & PREFIX_VEX) && !s->vex_l) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), + 16, 16, 0); + } +} + +static void gen_PCMPISTRI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpistri_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_PCMPISTRM(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + gen_helper_pcmpistrm_xmm(tcg_env, OP_PTR1, OP_PTR2, imm); + set_cc_op(s, CC_OP_EFLAGS); + if ((s->prefix & PREFIX_VEX) && !s->vex_l) { + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_regs[0].ZMM_X(1)), + 16, 16, 0); + } +} + +static void gen_PDEP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_pdep(s->T0, s->T0, s->T1); +} + +static void gen_PEXT(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_pext(s->T0, s->T0, s->T1); +} + +static inline void gen_pextr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) +{ + int vec_len = vector_len(s, decode); + int mask = (vec_len >> ot) - 1; + int val = decode->immediate & mask; + + switch (ot) { + case MO_8: + tcg_gen_ld8u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); + break; + case MO_16: + tcg_gen_ld16u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); + break; + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_ld32u_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); + break; + case MO_64: +#endif + tcg_gen_ld_tl(s->T0, tcg_env, vector_elem_offset(&decode->op[1], ot, val)); + break; + default: + abort(); + } +} + +static void gen_PEXTRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pextr(s, env, decode, MO_8); +} + +static void gen_PEXTRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pextr(s, env, decode, MO_16); +} + +static void gen_PEXTR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + gen_pextr(s, env, decode, ot); +} + +static inline void gen_pinsr(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, MemOp ot) +{ + int vec_len = vector_len(s, decode); + int mask = (vec_len >> ot) - 1; + int val = decode->immediate & mask; + + if (decode->op[1].offset != decode->op[0].offset) { + assert(vec_len == 16); + gen_store_sse(s, decode, decode->op[1].offset); + } + + switch (ot) { + case MO_8: + tcg_gen_st8_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); + break; + case MO_16: + tcg_gen_st16_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); + break; + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_st32_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); + break; + case MO_64: +#endif + tcg_gen_st_tl(s->T1, tcg_env, vector_elem_offset(&decode->op[0], ot, val)); + break; + default: + abort(); + } +} + +static void gen_PINSRB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pinsr(s, env, decode, MO_8); +} + +static void gen_PINSRW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pinsr(s, env, decode, MO_16); +} + +static void gen_PINSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pinsr(s, env, decode, decode->op[2].ot); +} + +static void gen_pmovmskb_i64(TCGv_i64 d, TCGv_i64 s) +{ + TCGv_i64 t = tcg_temp_new_i64(); + + tcg_gen_andi_i64(d, s, 0x8080808080808080ull); + + /* + * After each shift+or pair: + * 0: a.......b.......c.......d.......e.......f.......g.......h....... + * 7: ab......bc......cd......de......ef......fg......gh......h....... + * 14: abcd....bcde....cdef....defg....efgh....fgh.....gh......h....... + * 28: abcdefghbcdefgh.cdefgh..defgh...efgh....fgh.....gh......h....... + * The result is left in the high bits of the word. + */ + tcg_gen_shli_i64(t, d, 7); + tcg_gen_or_i64(d, d, t); + tcg_gen_shli_i64(t, d, 14); + tcg_gen_or_i64(d, d, t); + tcg_gen_shli_i64(t, d, 28); + tcg_gen_or_i64(d, d, t); +} + +static void gen_pmovmskb_vec(unsigned vece, TCGv_vec d, TCGv_vec s) +{ + TCGv_vec t = tcg_temp_new_vec_matching(d); + TCGv_vec m = tcg_constant_vec_matching(d, MO_8, 0x80); + + /* See above */ + tcg_gen_and_vec(vece, d, s, m); + tcg_gen_shli_vec(vece, t, d, 7); + tcg_gen_or_vec(vece, d, d, t); + tcg_gen_shli_vec(vece, t, d, 14); + tcg_gen_or_vec(vece, d, d, t); + tcg_gen_shli_vec(vece, t, d, 28); + tcg_gen_or_vec(vece, d, d, t); +} + +#ifdef TARGET_X86_64 +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i64 +#else +#define TCG_TARGET_HAS_extract2_tl TCG_TARGET_HAS_extract2_i32 +#endif + +static void gen_PMOVMSKB(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + static const TCGOpcode vecop_list[] = { INDEX_op_shli_vec, 0 }; + static const GVecGen2 g = { + .fni8 = gen_pmovmskb_i64, + .fniv = gen_pmovmskb_vec, + .opt_opc = vecop_list, + .vece = MO_64, + .prefer_i64 = TCG_TARGET_REG_BITS == 64 + }; + MemOp ot = decode->op[2].ot; + int vec_len = vector_len(s, decode); + TCGv t = tcg_temp_new(); + + tcg_gen_gvec_2(offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), decode->op[2].offset, + vec_len, vec_len, &g); + tcg_gen_ld8u_tl(s->T0, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); + while (vec_len > 8) { + vec_len -= 8; + if (TCG_TARGET_HAS_extract2_tl) { + /* + * Load the next byte of the result into the high byte of T. + * TCG does a similar expansion of deposit to shl+extract2; by + * loading the whole word, the shift left is avoided. + */ +#ifdef TARGET_X86_64 + tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_Q((vec_len - 1) / 8))); +#else + tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L((vec_len - 1) / 4))); +#endif + + tcg_gen_extract2_tl(s->T0, t, s->T0, TARGET_LONG_BITS - 8); + } else { + /* + * The _previous_ value is deposited into bits 8 and higher of t. Because + * those bits are known to be zero after ld8u, this becomes a shift+or + * if deposit is not available. + */ + tcg_gen_ld8u_tl(t, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_B(vec_len - 1))); + tcg_gen_deposit_tl(s->T0, t, s->T0, 8, TARGET_LONG_BITS - 8); + } + } +} + +static void gen_PSHUFW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + gen_helper_pshufw_mmx(OP_PTR0, OP_PTR1, imm); +} + +static void gen_PSRLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 16) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shri(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 16) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shli(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSRAW_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 16) { + decode->immediate = 15; + } + tcg_gen_gvec_sari(MO_16, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); +} + +static void gen_PSRLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 32) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shri(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 32) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shli(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSRAD_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 32) { + decode->immediate = 31; + } + tcg_gen_gvec_sari(MO_32, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); +} + +static void gen_PSRLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 64) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shri(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static void gen_PSLLQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + if (decode->immediate >= 64) { + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else { + tcg_gen_gvec_shli(MO_64, + decode->op[0].offset, decode->op[1].offset, + decode->immediate, vec_len, vec_len); + } +} + +static TCGv_ptr make_imm8u_xmm_vec(uint8_t imm, int vec_len) +{ + MemOp ot = vec_len == 16 ? MO_128 : MO_256; + TCGv_i32 imm_v = tcg_constant8u_i32(imm); + TCGv_ptr ptr = tcg_temp_new_ptr(); + + tcg_gen_gvec_dup_imm(MO_64, offsetof(CPUX86State, xmm_t0) + xmm_offset(ot), + vec_len, vec_len, 0); + + tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_t0)); + tcg_gen_st_i32(imm_v, tcg_env, offsetof(CPUX86State, xmm_t0.ZMM_L(0))); + return ptr; +} + +static void gen_PSRLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); + + if (s->vex_l) { + gen_helper_psrldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); + } else { + gen_helper_psrldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); + } +} + +static void gen_PSLLDQ_i(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + TCGv_ptr imm_vec = make_imm8u_xmm_vec(decode->immediate, vec_len); + + if (s->vex_l) { + gen_helper_pslldq_ymm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); + } else { + gen_helper_pslldq_xmm(tcg_env, OP_PTR0, OP_PTR1, imm_vec); + } +} + +static void gen_RORX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + int mask = ot == MO_64 ? 63 : 31; + int b = decode->immediate & mask; + + switch (ot) { + case MO_32: +#ifdef TARGET_X86_64 + tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + break; + + case MO_64: +#endif + tcg_gen_rotri_tl(s->T0, s->T0, b); + break; + + default: + g_assert_not_reached(); + } +} + +static void gen_SARX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + int mask; + + mask = ot == MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + tcg_gen_sar_tl(s->T0, s->T0, s->T1); +} + +static void gen_SHA1NEXTE(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_sha1nexte(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_SHA1MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_sha1msg1(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_SHA1MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_sha1msg2(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_SHA1RNDS4(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + switch(decode->immediate & 3) { + case 0: + gen_helper_sha1rnds4_f0(OP_PTR0, OP_PTR0, OP_PTR1); + break; + case 1: + gen_helper_sha1rnds4_f1(OP_PTR0, OP_PTR0, OP_PTR1); + break; + case 2: + gen_helper_sha1rnds4_f2(OP_PTR0, OP_PTR0, OP_PTR1); + break; + case 3: + gen_helper_sha1rnds4_f3(OP_PTR0, OP_PTR0, OP_PTR1); + break; + } +} + +static void gen_SHA256MSG1(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_sha256msg1(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_SHA256MSG2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_sha256msg2(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_SHA256RNDS2(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 wk0 = tcg_temp_new_i32(); + TCGv_i32 wk1 = tcg_temp_new_i32(); + + tcg_gen_ld_i32(wk0, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_ld_i32(wk1, tcg_env, ZMM_OFFSET(0) + offsetof(ZMMReg, ZMM_L(1))); + + gen_helper_sha256rnds2(OP_PTR0, OP_PTR1, OP_PTR2, wk0, wk1); +} + +static void gen_SHLX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + int mask; + + mask = ot == MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + tcg_gen_shl_tl(s->T0, s->T0, s->T1); +} + +static void gen_SHRX(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + MemOp ot = decode->op[0].ot; + int mask; + + mask = ot == MO_64 ? 63 : 31; + tcg_gen_andi_tl(s->T1, s->T1, mask); + tcg_gen_shr_tl(s->T0, s->T0, s->T1); +} + +static void gen_VAESKEYGEN(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_aeskeygenassist_xmm(tcg_env, OP_PTR0, OP_PTR1, imm); +} + +static void gen_STMXCSR(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_update_mxcsr(tcg_env); + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); +} + +static void gen_VAESIMC(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + assert(!s->vex_l); + gen_helper_aesimc_xmm(tcg_env, OP_PTR0, OP_PTR2); +} + +/* + * 00 = v*ps Vps, Hps, Wpd + * 66 = v*pd Vpd, Hpd, Wps + * f3 = v*ss Vss, Hss, Wps + * f2 = v*sd Vsd, Hsd, Wps + */ +#define SSE_CMP(x) { \ + gen_helper_ ## x ## ps ## _xmm, gen_helper_ ## x ## pd ## _xmm, \ + gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, \ + gen_helper_ ## x ## ps ## _ymm, gen_helper_ ## x ## pd ## _ymm} +static const SSEFunc_0_eppp gen_helper_cmp_funcs[32][6] = { + SSE_CMP(cmpeq), + SSE_CMP(cmplt), + SSE_CMP(cmple), + SSE_CMP(cmpunord), + SSE_CMP(cmpneq), + SSE_CMP(cmpnlt), + SSE_CMP(cmpnle), + SSE_CMP(cmpord), + + SSE_CMP(cmpequ), + SSE_CMP(cmpnge), + SSE_CMP(cmpngt), + SSE_CMP(cmpfalse), + SSE_CMP(cmpnequ), + SSE_CMP(cmpge), + SSE_CMP(cmpgt), + SSE_CMP(cmptrue), + + SSE_CMP(cmpeqs), + SSE_CMP(cmpltq), + SSE_CMP(cmpleq), + SSE_CMP(cmpunords), + SSE_CMP(cmpneqq), + SSE_CMP(cmpnltq), + SSE_CMP(cmpnleq), + SSE_CMP(cmpords), + + SSE_CMP(cmpequs), + SSE_CMP(cmpngeq), + SSE_CMP(cmpngtq), + SSE_CMP(cmpfalses), + SSE_CMP(cmpnequs), + SSE_CMP(cmpgeq), + SSE_CMP(cmpgtq), + SSE_CMP(cmptrues), +}; +#undef SSE_CMP + +static void gen_VCMP(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int index = decode->immediate & (s->prefix & PREFIX_VEX ? 31 : 7); + int b = + s->prefix & PREFIX_REPZ ? 2 /* ss */ : + s->prefix & PREFIX_REPNZ ? 3 /* sd */ : + !!(s->prefix & PREFIX_DATA) /* pd */ + (s->vex_l << 2); + + gen_helper_cmp_funcs[index][b](tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + SSEFunc_0_epp fn; + fn = s->prefix & PREFIX_DATA ? gen_helper_comisd : gen_helper_comiss; + fn(tcg_env, OP_PTR1, OP_PTR2); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_VCVTPD2PS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->vex_l) { + gen_helper_cvtpd2ps_ymm(tcg_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtpd2ps_xmm(tcg_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_VCVTPS2PD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->vex_l) { + gen_helper_cvtps2pd_ymm(tcg_env, OP_PTR0, OP_PTR2); + } else { + gen_helper_cvtps2pd_xmm(tcg_env, OP_PTR0, OP_PTR2); + } +} + +static void gen_VCVTPS2PH(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_unary_imm_fp_sse(s, env, decode, + gen_helper_cvtps2ph_xmm, + gen_helper_cvtps2ph_ymm); + /* + * VCVTPS2PH is the only instruction that performs an operation on a + * register source and then *stores* into memory. + */ + if (decode->op[0].has_ea) { + gen_store_sse(s, decode, decode->op[0].offset); + } +} + +static void gen_VCVTSD2SS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtsd2ss(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VCVTSS2SD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_helper_cvtss2sd(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VCVTSI2Sx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + TCGv_i32 in; + + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + +#ifdef TARGET_X86_64 + MemOp ot = decode->op[2].ot; + if (ot == MO_64) { + if (s->prefix & PREFIX_REPNZ) { + gen_helper_cvtsq2sd(tcg_env, OP_PTR0, s->T1); + } else { + gen_helper_cvtsq2ss(tcg_env, OP_PTR0, s->T1); + } + return; + } + in = s->tmp2_i32; + tcg_gen_trunc_tl_i32(in, s->T1); +#else + in = s->T1; +#endif + + if (s->prefix & PREFIX_REPNZ) { + gen_helper_cvtsi2sd(tcg_env, OP_PTR0, in); + } else { + gen_helper_cvtsi2ss(tcg_env, OP_PTR0, in); + } +} + +static inline void gen_VCVTtSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_i_ep ss2si, SSEFunc_l_ep ss2sq, + SSEFunc_i_ep sd2si, SSEFunc_l_ep sd2sq) +{ + TCGv_i32 out; + +#ifdef TARGET_X86_64 + MemOp ot = decode->op[0].ot; + if (ot == MO_64) { + if (s->prefix & PREFIX_REPNZ) { + sd2sq(s->T0, tcg_env, OP_PTR2); + } else { + ss2sq(s->T0, tcg_env, OP_PTR2); + } + return; + } + + out = s->tmp2_i32; +#else + out = s->T0; +#endif + if (s->prefix & PREFIX_REPNZ) { + sd2si(out, tcg_env, OP_PTR2); + } else { + ss2si(out, tcg_env, OP_PTR2); + } +#ifdef TARGET_X86_64 + tcg_gen_extu_i32_tl(s->T0, out); +#endif +} + +#ifndef TARGET_X86_64 +#define gen_helper_cvtss2sq NULL +#define gen_helper_cvtsd2sq NULL +#define gen_helper_cvttss2sq NULL +#define gen_helper_cvttsd2sq NULL +#endif + +static void gen_VCVTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_VCVTtSx2SI(s, env, decode, + gen_helper_cvtss2si, gen_helper_cvtss2sq, + gen_helper_cvtsd2si, gen_helper_cvtsd2sq); +} + +static void gen_VCVTTSx2SI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_VCVTtSx2SI(s, env, decode, + gen_helper_cvttss2si, gen_helper_cvttss2sq, + gen_helper_cvttsd2si, gen_helper_cvttsd2sq); +} + +static void gen_VEXTRACTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int mask = decode->immediate & 1; + int src_ofs = vector_elem_offset(&decode->op[1], MO_128, mask); + if (decode->op[0].has_ea) { + /* VEX-only instruction, no alignment requirements. */ + gen_sto_env_A0(s, src_ofs, false); + } else { + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, src_ofs, 16, 16); + } +} + +static void gen_VEXTRACTPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_pextr(s, env, decode, MO_32); +} + +static void gen_vinsertps(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int val = decode->immediate; + int dest_word = (val >> 4) & 3; + int new_mask = (val & 15) | (1 << dest_word); + int vec_len = 16; + + assert(!s->vex_l); + + if (new_mask == 15) { + /* All zeroes except possibly for the inserted element */ + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + } else if (decode->op[1].offset != decode->op[0].offset) { + gen_store_sse(s, decode, decode->op[1].offset); + } + + if (new_mask != (val & 15)) { + tcg_gen_st_i32(s->tmp2_i32, tcg_env, + vector_elem_offset(&decode->op[0], MO_32, dest_word)); + } + + if (new_mask != 15) { + TCGv_i32 zero = tcg_constant_i32(0); /* float32_zero */ + int i; + for (i = 0; i < 4; i++) { + if ((val >> i) & 1) { + tcg_gen_st_i32(zero, tcg_env, + vector_elem_offset(&decode->op[0], MO_32, i)); + } + } + } +} + +static void gen_VINSERTPS_r(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int val = decode->immediate; + tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + vector_elem_offset(&decode->op[2], MO_32, (val >> 6) & 3)); + gen_vinsertps(s, env, decode); +} + +static void gen_VINSERTPS_m(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + gen_vinsertps(s, env, decode); +} + +static void gen_VINSERTx128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int mask = decode->immediate & 1; + tcg_gen_gvec_mov(MO_64, + decode->op[0].offset + offsetof(YMMReg, YMM_X(mask)), + decode->op[2].offset + offsetof(YMMReg, YMM_X(0)), 16, 16); + tcg_gen_gvec_mov(MO_64, + decode->op[0].offset + offsetof(YMMReg, YMM_X(!mask)), + decode->op[1].offset + offsetof(YMMReg, YMM_X(!mask)), 16, 16); +} + +static inline void gen_maskmov(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode, + SSEFunc_0_eppt xmm, SSEFunc_0_eppt ymm) +{ + if (!s->vex_l) { + xmm(tcg_env, OP_PTR2, OP_PTR1, s->A0); + } else { + ymm(tcg_env, OP_PTR2, OP_PTR1, s->A0); + } +} + +static void gen_VMASKMOVPD_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_maskmov(s, env, decode, gen_helper_vpmaskmovq_st_xmm, gen_helper_vpmaskmovq_st_ymm); +} + +static void gen_VMASKMOVPS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_maskmov(s, env, decode, gen_helper_vpmaskmovd_st_xmm, gen_helper_vpmaskmovd_st_ymm); +} + +static void gen_VMOVHPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_ldq_env_A0(s, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + } +} + +static void gen_VMOVHPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + gen_stq_env_A0(s, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); +} + +static void gen_VMOVHPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (decode->op[0].offset != decode->op[2].offset) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + } + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + } +} + +static void gen_VMOVHLPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + } +} + +static void gen_VMOVLHPS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(1))); + if (decode->op[0].offset != decode->op[1].offset) { + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[1].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); + } +} + +/* + * Note that MOVLPx supports 256-bit operation unlike MOVHLPx, MOVLHPx, MOXHPx. + * Use a gvec move to move everything above the bottom 64 bits. + */ + +static void gen_VMOVLPx(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, decode->op[2].offset + offsetof(XMMReg, XMM_Q(0))); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, decode->op[0].offset + offsetof(XMMReg, XMM_Q(0))); +} + +static void gen_VMOVLPx_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); +} + +static void gen_VMOVLPx_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i64(s->tmp1_i64, OP_PTR2, offsetof(ZMMReg, ZMM_Q(0))); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); +} + +static void gen_VMOVSD_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i64 zero = tcg_constant_i64(0); + + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + tcg_gen_st_i64(zero, OP_PTR0, offsetof(ZMMReg, ZMM_Q(1))); + tcg_gen_st_i64(s->tmp1_i64, OP_PTR0, offsetof(ZMMReg, ZMM_Q(0))); +} + +static void gen_VMOVSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_gvec_mov(MO_64, decode->op[0].offset, decode->op[1].offset, vec_len, vec_len); + tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); +} + +static void gen_VMOVSS_ld(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int vec_len = vector_len(s, decode); + + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); + tcg_gen_gvec_dup_imm(MO_64, decode->op[0].offset, vec_len, vec_len, 0); + tcg_gen_st_i32(s->tmp2_i32, OP_PTR0, offsetof(ZMMReg, ZMM_L(0))); +} + +static void gen_VMOVSS_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + tcg_gen_ld_i32(s->tmp2_i32, OP_PTR2, offsetof(ZMMReg, ZMM_L(0))); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); +} + +static void gen_VPMASKMOV_st(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + if (s->vex_w) { + gen_VMASKMOVPD_st(s, env, decode); + } else { + gen_VMASKMOVPS_st(s, env, decode); + } +} + +static void gen_VPERMD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + assert(s->vex_l); + gen_helper_vpermd_ymm(OP_PTR0, OP_PTR1, OP_PTR2); +} + +static void gen_VPERM2x128(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + assert(s->vex_l); + gen_helper_vpermdq_ymm(OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VPHMINPOSUW(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + assert(!s->vex_l); + gen_helper_phminposuw_xmm(tcg_env, OP_PTR0, OP_PTR2); +} + +static void gen_VROUNDSD(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_roundsd_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VROUNDSS(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant8u_i32(decode->immediate); + assert(!s->vex_l); + gen_helper_roundss_xmm(tcg_env, OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VSHUF(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_i32 imm = tcg_constant_i32(decode->immediate); + SSEFunc_0_pppi ps, pd, fn; + ps = s->vex_l ? gen_helper_shufps_ymm : gen_helper_shufps_xmm; + pd = s->vex_l ? gen_helper_shufpd_ymm : gen_helper_shufpd_xmm; + fn = s->prefix & PREFIX_DATA ? pd : ps; + fn(OP_PTR0, OP_PTR1, OP_PTR2, imm); +} + +static void gen_VUCOMI(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + SSEFunc_0_epp fn; + fn = s->prefix & PREFIX_DATA ? gen_helper_ucomisd : gen_helper_ucomiss; + fn(tcg_env, OP_PTR1, OP_PTR2); + set_cc_op(s, CC_OP_EFLAGS); +} + +static void gen_VZEROALL(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + TCGv_ptr ptr = tcg_temp_new_ptr(); + + tcg_gen_addi_ptr(ptr, tcg_env, offsetof(CPUX86State, xmm_regs)); + gen_helper_memset(ptr, ptr, tcg_constant_i32(0), + tcg_constant_ptr(CPU_NB_REGS * sizeof(ZMMReg))); +} + +static void gen_VZEROUPPER(DisasContext *s, CPUX86State *env, X86DecodedInsn *decode) +{ + int i; + + for (i = 0; i < CPU_NB_REGS; i++) { + int offset = offsetof(CPUX86State, xmm_regs[i].ZMM_X(1)); + tcg_gen_gvec_dup_imm(MO_64, offset, 16, 16, 0); + } +} diff --git a/target/i386/tcg/excp_helper.c b/target/i386/tcg/excp_helper.c new file mode 100644 index 0000000000..65e37ae2a0 --- /dev/null +++ b/target/i386/tcg/excp_helper.c @@ -0,0 +1,154 @@ +/* + * x86 exception helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "qemu/log.h" +#include "sysemu/runstate.h" +#include "exec/helper-proto.h" +#include "helper-tcg.h" + +G_NORETURN void helper_raise_interrupt(CPUX86State *env, int intno, + int next_eip_addend) +{ + raise_interrupt(env, intno, next_eip_addend); +} + +G_NORETURN void helper_raise_exception(CPUX86State *env, int exception_index) +{ + raise_exception(env, exception_index); +} + +/* + * Check nested exceptions and change to double or triple fault if + * needed. It should only be called, if this is not an interrupt. + * Returns the new exception number. + */ +static int check_exception(CPUX86State *env, int intno, int *error_code, + uintptr_t retaddr) +{ + int first_contributory = env->old_exception == 0 || + (env->old_exception >= 10 && + env->old_exception <= 13); + int second_contributory = intno == 0 || + (intno >= 10 && intno <= 13); + + qemu_log_mask(CPU_LOG_INT, "check_exception old: 0x%x new 0x%x\n", + env->old_exception, intno); + +#if !defined(CONFIG_USER_ONLY) + if (env->old_exception == EXCP08_DBLE) { + if (env->hflags & HF_GUEST_MASK) { + cpu_vmexit(env, SVM_EXIT_SHUTDOWN, 0, retaddr); /* does not return */ + } + + qemu_log_mask(CPU_LOG_RESET, "Triple fault\n"); + + qemu_system_reset_request(SHUTDOWN_CAUSE_GUEST_RESET); + return EXCP_HLT; + } +#endif + + if ((first_contributory && second_contributory) + || (env->old_exception == EXCP0E_PAGE && + (second_contributory || (intno == EXCP0E_PAGE)))) { + intno = EXCP08_DBLE; + *error_code = 0; + } + + if (second_contributory || (intno == EXCP0E_PAGE) || + (intno == EXCP08_DBLE)) { + env->old_exception = intno; + } + + return intno; +} + +/* + * Signal an interruption. It is executed in the main CPU loop. + * is_int is TRUE if coming from the int instruction. next_eip is the + * env->eip value AFTER the interrupt instruction. It is only relevant if + * is_int is TRUE. + */ +static G_NORETURN +void raise_interrupt2(CPUX86State *env, int intno, + int is_int, int error_code, + int next_eip_addend, + uintptr_t retaddr) +{ + CPUState *cs = env_cpu(env); + + if (!is_int) { + cpu_svm_check_intercept_param(env, SVM_EXIT_EXCP_BASE + intno, + error_code, retaddr); + intno = check_exception(env, intno, &error_code, retaddr); + } else { + cpu_svm_check_intercept_param(env, SVM_EXIT_SWINT, 0, retaddr); + } + + cs->exception_index = intno; + env->error_code = error_code; + env->exception_is_int = is_int; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit_restore(cs, retaddr); +} + +/* shortcuts to generate exceptions */ + +G_NORETURN void raise_interrupt(CPUX86State *env, int intno, int next_eip_addend) +{ + raise_interrupt2(env, intno, 1, 0, next_eip_addend, 0); +} + +G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, + int error_code) +{ + raise_interrupt2(env, exception_index, 0, error_code, 0, 0); +} + +G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index, + int error_code, uintptr_t retaddr) +{ + raise_interrupt2(env, exception_index, 0, error_code, 0, retaddr); +} + +G_NORETURN void raise_exception(CPUX86State *env, int exception_index) +{ + raise_interrupt2(env, exception_index, 0, 0, 0, 0); +} + +G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index, + uintptr_t retaddr) +{ + raise_interrupt2(env, exception_index, 0, 0, 0, retaddr); +} + +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, + MMUAccessType access_type, + uintptr_t retaddr) +{ + /* + * Unaligned accesses are currently only triggered by SSE/AVX + * instructions that impose alignment requirements on memory + * operands. These instructions raise #GP(0) upon accessing an + * unaligned address. + */ + raise_exception_ra(env, EXCP0D_GPF, retaddr); +} diff --git a/target/i386/tcg/fpu_helper.c b/target/i386/tcg/fpu_helper.c new file mode 100644 index 0000000000..4b965a5d6c --- /dev/null +++ b/target/i386/tcg/fpu_helper.c @@ -0,0 +1,3117 @@ +/* + * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include <math.h> +#include "cpu.h" +#include "tcg-cpu.h" +#include "exec/cpu_ldst.h" +#include "exec/helper-proto.h" +#include "fpu/softfloat.h" +#include "fpu/softfloat-macros.h" +#include "helper-tcg.h" + +/* float macros */ +#define FT0 (env->ft0) +#define ST0 (env->fpregs[env->fpstt].d) +#define ST(n) (env->fpregs[(env->fpstt + (n)) & 7].d) +#define ST1 ST(1) + +#define FPU_RC_SHIFT 10 +#define FPU_RC_MASK (3 << FPU_RC_SHIFT) +#define FPU_RC_NEAR 0x000 +#define FPU_RC_DOWN 0x400 +#define FPU_RC_UP 0x800 +#define FPU_RC_CHOP 0xc00 + +#define MAXTAN 9223372036854775808.0 + +/* the following deal with x86 long double-precision numbers */ +#define MAXEXPD 0x7fff +#define EXPBIAS 16383 +#define EXPD(fp) (fp.l.upper & 0x7fff) +#define SIGND(fp) ((fp.l.upper) & 0x8000) +#define MANTD(fp) (fp.l.lower) +#define BIASEXPONENT(fp) fp.l.upper = (fp.l.upper & ~(0x7fff)) | EXPBIAS + +#define FPUS_IE (1 << 0) +#define FPUS_DE (1 << 1) +#define FPUS_ZE (1 << 2) +#define FPUS_OE (1 << 3) +#define FPUS_UE (1 << 4) +#define FPUS_PE (1 << 5) +#define FPUS_SF (1 << 6) +#define FPUS_SE (1 << 7) +#define FPUS_B (1 << 15) + +#define FPUC_EM 0x3f + +#define floatx80_lg2 make_floatx80(0x3ffd, 0x9a209a84fbcff799LL) +#define floatx80_lg2_d make_floatx80(0x3ffd, 0x9a209a84fbcff798LL) +#define floatx80_l2e make_floatx80(0x3fff, 0xb8aa3b295c17f0bcLL) +#define floatx80_l2e_d make_floatx80(0x3fff, 0xb8aa3b295c17f0bbLL) +#define floatx80_l2t make_floatx80(0x4000, 0xd49a784bcd1b8afeLL) +#define floatx80_l2t_u make_floatx80(0x4000, 0xd49a784bcd1b8affLL) +#define floatx80_ln2_d make_floatx80(0x3ffe, 0xb17217f7d1cf79abLL) +#define floatx80_pi_d make_floatx80(0x4000, 0xc90fdaa22168c234LL) + +static inline void fpush(CPUX86State *env) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fptags[env->fpstt] = 0; /* validate stack entry */ +} + +static inline void fpop(CPUX86State *env) +{ + env->fptags[env->fpstt] = 1; /* invalidate stack entry */ + env->fpstt = (env->fpstt + 1) & 7; +} + +static floatx80 do_fldt(CPUX86State *env, target_ulong ptr, uintptr_t retaddr) +{ + CPU_LDoubleU temp; + + temp.l.lower = cpu_ldq_data_ra(env, ptr, retaddr); + temp.l.upper = cpu_lduw_data_ra(env, ptr + 8, retaddr); + return temp.d; +} + +static void do_fstt(CPUX86State *env, floatx80 f, target_ulong ptr, + uintptr_t retaddr) +{ + CPU_LDoubleU temp; + + temp.d = f; + cpu_stq_data_ra(env, ptr, temp.l.lower, retaddr); + cpu_stw_data_ra(env, ptr + 8, temp.l.upper, retaddr); +} + +/* x87 FPU helpers */ + +static inline double floatx80_to_double(CPUX86State *env, floatx80 a) +{ + union { + float64 f64; + double d; + } u; + + u.f64 = floatx80_to_float64(a, &env->fp_status); + return u.d; +} + +static inline floatx80 double_to_floatx80(CPUX86State *env, double a) +{ + union { + float64 f64; + double d; + } u; + + u.d = a; + return float64_to_floatx80(u.f64, &env->fp_status); +} + +static void fpu_set_exception(CPUX86State *env, int mask) +{ + env->fpus |= mask; + if (env->fpus & (~env->fpuc & FPUC_EM)) { + env->fpus |= FPUS_SE | FPUS_B; + } +} + +static inline uint8_t save_exception_flags(CPUX86State *env) +{ + uint8_t old_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + return old_flags; +} + +static void merge_exception_flags(CPUX86State *env, uint8_t old_flags) +{ + uint8_t new_flags = get_float_exception_flags(&env->fp_status); + float_raise(old_flags, &env->fp_status); + fpu_set_exception(env, + ((new_flags & float_flag_invalid ? FPUS_IE : 0) | + (new_flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (new_flags & float_flag_overflow ? FPUS_OE : 0) | + (new_flags & float_flag_underflow ? FPUS_UE : 0) | + (new_flags & float_flag_inexact ? FPUS_PE : 0) | + (new_flags & float_flag_input_denormal ? FPUS_DE : 0))); +} + +static inline floatx80 helper_fdiv(CPUX86State *env, floatx80 a, floatx80 b) +{ + uint8_t old_flags = save_exception_flags(env); + floatx80 ret = floatx80_div(a, b, &env->fp_status); + merge_exception_flags(env, old_flags); + return ret; +} + +static void fpu_raise_exception(CPUX86State *env, uintptr_t retaddr) +{ + if (env->cr[0] & CR0_NE_MASK) { + raise_exception_ra(env, EXCP10_COPR, retaddr); + } +#if !defined(CONFIG_USER_ONLY) + else { + fpu_check_raise_ferr_irq(env); + } +#endif +} + +void helper_flds_FT0(CPUX86State *env, uint32_t val) +{ + uint8_t old_flags = save_exception_flags(env); + union { + float32 f; + uint32_t i; + } u; + + u.i = val; + FT0 = float32_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fldl_FT0(CPUX86State *env, uint64_t val) +{ + uint8_t old_flags = save_exception_flags(env); + union { + float64 f; + uint64_t i; + } u; + + u.i = val; + FT0 = float64_to_floatx80(u.f, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fildl_FT0(CPUX86State *env, int32_t val) +{ + FT0 = int32_to_floatx80(val, &env->fp_status); +} + +void helper_flds_ST0(CPUX86State *env, uint32_t val) +{ + uint8_t old_flags = save_exception_flags(env); + int new_fpstt; + union { + float32 f; + uint32_t i; + } u; + + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float32_to_floatx80(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); +} + +void helper_fldl_ST0(CPUX86State *env, uint64_t val) +{ + uint8_t old_flags = save_exception_flags(env); + int new_fpstt; + union { + float64 f; + uint64_t i; + } u; + + new_fpstt = (env->fpstt - 1) & 7; + u.i = val; + env->fpregs[new_fpstt].d = float64_to_floatx80(u.f, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ + merge_exception_flags(env, old_flags); +} + +static FloatX80RoundPrec tmp_maximise_precision(float_status *st) +{ + FloatX80RoundPrec old = get_floatx80_rounding_precision(st); + set_floatx80_rounding_precision(floatx80_precision_x, st); + return old; +} + +void helper_fildl_ST0(CPUX86State *env, int32_t val) +{ + int new_fpstt; + FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int32_to_floatx80(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ + + set_floatx80_rounding_precision(old, &env->fp_status); +} + +void helper_fildll_ST0(CPUX86State *env, int64_t val) +{ + int new_fpstt; + FloatX80RoundPrec old = tmp_maximise_precision(&env->fp_status); + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = int64_to_floatx80(val, &env->fp_status); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ + + set_floatx80_rounding_precision(old, &env->fp_status); +} + +uint32_t helper_fsts_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + union { + float32 f; + uint32_t i; + } u; + + u.f = floatx80_to_float32(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); + return u.i; +} + +uint64_t helper_fstl_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + union { + float64 f; + uint64_t i; + } u; + + u.f = floatx80_to_float64(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); + return u.i; +} + +int32_t helper_fist_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32(ST0, &env->fp_status); + if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); + val = -32768; + } + merge_exception_flags(env, old_flags); + return val; +} + +int32_t helper_fistl_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x80000000; + } + merge_exception_flags(env, old_flags); + return val; +} + +int64_t helper_fistll_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int64_t val; + + val = floatx80_to_int64(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x8000000000000000ULL; + } + merge_exception_flags(env, old_flags); + return val; +} + +int32_t helper_fistt_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + if (val != (int16_t)val) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); + val = -32768; + } + merge_exception_flags(env, old_flags); + return val; +} + +int32_t helper_fisttl_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int32_t val; + + val = floatx80_to_int32_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x80000000; + } + merge_exception_flags(env, old_flags); + return val; +} + +int64_t helper_fisttll_ST0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int64_t val; + + val = floatx80_to_int64_round_to_zero(ST0, &env->fp_status); + if (get_float_exception_flags(&env->fp_status) & float_flag_invalid) { + val = 0x8000000000000000ULL; + } + merge_exception_flags(env, old_flags); + return val; +} + +void helper_fldt_ST0(CPUX86State *env, target_ulong ptr) +{ + int new_fpstt; + + new_fpstt = (env->fpstt - 1) & 7; + env->fpregs[new_fpstt].d = do_fldt(env, ptr, GETPC()); + env->fpstt = new_fpstt; + env->fptags[new_fpstt] = 0; /* validate stack entry */ +} + +void helper_fstt_ST0(CPUX86State *env, target_ulong ptr) +{ + do_fstt(env, ST0, ptr, GETPC()); +} + +void helper_fpush(CPUX86State *env) +{ + fpush(env); +} + +void helper_fpop(CPUX86State *env) +{ + fpop(env); +} + +void helper_fdecstp(CPUX86State *env) +{ + env->fpstt = (env->fpstt - 1) & 7; + env->fpus &= ~0x4700; +} + +void helper_fincstp(CPUX86State *env) +{ + env->fpstt = (env->fpstt + 1) & 7; + env->fpus &= ~0x4700; +} + +/* FPU move */ + +void helper_ffree_STN(CPUX86State *env, int st_index) +{ + env->fptags[(env->fpstt + st_index) & 7] = 1; +} + +void helper_fmov_ST0_FT0(CPUX86State *env) +{ + ST0 = FT0; +} + +void helper_fmov_FT0_STN(CPUX86State *env, int st_index) +{ + FT0 = ST(st_index); +} + +void helper_fmov_ST0_STN(CPUX86State *env, int st_index) +{ + ST0 = ST(st_index); +} + +void helper_fmov_STN_ST0(CPUX86State *env, int st_index) +{ + ST(st_index) = ST0; +} + +void helper_fxchg_ST0_STN(CPUX86State *env, int st_index) +{ + floatx80 tmp; + + tmp = ST(st_index); + ST(st_index) = ST0; + ST0 = tmp; +} + +/* FPU operations */ + +static const int fcom_ccval[4] = {0x0100, 0x4000, 0x0000, 0x4500}; + +void helper_fcom_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; + + ret = floatx80_compare(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); +} + +void helper_fucom_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + FloatRelation ret; + + ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); + env->fpus = (env->fpus & ~0x4500) | fcom_ccval[ret + 1]; + merge_exception_flags(env, old_flags); +} + +static const int fcomi_ccval[4] = {CC_C, CC_Z, 0, CC_Z | CC_P | CC_C}; + +void helper_fcomi_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int eflags; + FloatRelation ret; + + ret = floatx80_compare(ST0, FT0, &env->fp_status); + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; + merge_exception_flags(env, old_flags); +} + +void helper_fucomi_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + int eflags; + FloatRelation ret; + + ret = floatx80_compare_quiet(ST0, FT0, &env->fp_status); + eflags = cpu_cc_compute_all(env) & ~(CC_Z | CC_P | CC_C); + CC_SRC = eflags | fcomi_ccval[ret + 1]; + merge_exception_flags(env, old_flags); +} + +void helper_fadd_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + ST0 = floatx80_add(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fmul_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + ST0 = floatx80_mul(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fsub_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + ST0 = floatx80_sub(ST0, FT0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fsubr_ST0_FT0(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + ST0 = floatx80_sub(FT0, ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fdiv_ST0_FT0(CPUX86State *env) +{ + ST0 = helper_fdiv(env, ST0, FT0); +} + +void helper_fdivr_ST0_FT0(CPUX86State *env) +{ + ST0 = helper_fdiv(env, FT0, ST0); +} + +/* fp operations between STN and ST0 */ + +void helper_fadd_STN_ST0(CPUX86State *env, int st_index) +{ + uint8_t old_flags = save_exception_flags(env); + ST(st_index) = floatx80_add(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fmul_STN_ST0(CPUX86State *env, int st_index) +{ + uint8_t old_flags = save_exception_flags(env); + ST(st_index) = floatx80_mul(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fsub_STN_ST0(CPUX86State *env, int st_index) +{ + uint8_t old_flags = save_exception_flags(env); + ST(st_index) = floatx80_sub(ST(st_index), ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fsubr_STN_ST0(CPUX86State *env, int st_index) +{ + uint8_t old_flags = save_exception_flags(env); + ST(st_index) = floatx80_sub(ST0, ST(st_index), &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fdiv_STN_ST0(CPUX86State *env, int st_index) +{ + floatx80 *p; + + p = &ST(st_index); + *p = helper_fdiv(env, *p, ST0); +} + +void helper_fdivr_STN_ST0(CPUX86State *env, int st_index) +{ + floatx80 *p; + + p = &ST(st_index); + *p = helper_fdiv(env, ST0, *p); +} + +/* misc FPU operations */ +void helper_fchs_ST0(CPUX86State *env) +{ + ST0 = floatx80_chs(ST0); +} + +void helper_fabs_ST0(CPUX86State *env) +{ + ST0 = floatx80_abs(ST0); +} + +void helper_fld1_ST0(CPUX86State *env) +{ + ST0 = floatx80_one; +} + +void helper_fldl2t_ST0(CPUX86State *env) +{ + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_UP: + ST0 = floatx80_l2t_u; + break; + default: + ST0 = floatx80_l2t; + break; + } +} + +void helper_fldl2e_ST0(CPUX86State *env) +{ + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_l2e_d; + break; + default: + ST0 = floatx80_l2e; + break; + } +} + +void helper_fldpi_ST0(CPUX86State *env) +{ + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_pi_d; + break; + default: + ST0 = floatx80_pi; + break; + } +} + +void helper_fldlg2_ST0(CPUX86State *env) +{ + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_lg2_d; + break; + default: + ST0 = floatx80_lg2; + break; + } +} + +void helper_fldln2_ST0(CPUX86State *env) +{ + switch (env->fpuc & FPU_RC_MASK) { + case FPU_RC_DOWN: + case FPU_RC_CHOP: + ST0 = floatx80_ln2_d; + break; + default: + ST0 = floatx80_ln2; + break; + } +} + +void helper_fldz_ST0(CPUX86State *env) +{ + ST0 = floatx80_zero; +} + +void helper_fldz_FT0(CPUX86State *env) +{ + FT0 = floatx80_zero; +} + +uint32_t helper_fnstsw(CPUX86State *env) +{ + return (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; +} + +uint32_t helper_fnstcw(CPUX86State *env) +{ + return env->fpuc; +} + +static void set_x86_rounding_mode(unsigned mode, float_status *status) +{ + static FloatRoundMode x86_round_mode[4] = { + float_round_nearest_even, + float_round_down, + float_round_up, + float_round_to_zero + }; + assert(mode < ARRAY_SIZE(x86_round_mode)); + set_float_rounding_mode(x86_round_mode[mode], status); +} + +void update_fp_status(CPUX86State *env) +{ + int rnd_mode; + FloatX80RoundPrec rnd_prec; + + /* set rounding mode */ + rnd_mode = (env->fpuc & FPU_RC_MASK) >> FPU_RC_SHIFT; + set_x86_rounding_mode(rnd_mode, &env->fp_status); + + switch ((env->fpuc >> 8) & 3) { + case 0: + rnd_prec = floatx80_precision_s; + break; + case 2: + rnd_prec = floatx80_precision_d; + break; + case 3: + default: + rnd_prec = floatx80_precision_x; + break; + } + set_floatx80_rounding_precision(rnd_prec, &env->fp_status); +} + +void helper_fldcw(CPUX86State *env, uint32_t val) +{ + cpu_set_fpuc(env, val); +} + +void helper_fclex(CPUX86State *env) +{ + env->fpus &= 0x7f00; +} + +void helper_fwait(CPUX86State *env) +{ + if (env->fpus & FPUS_SE) { + fpu_raise_exception(env, GETPC()); + } +} + +static void do_fninit(CPUX86State *env) +{ + env->fpus = 0; + env->fpstt = 0; + env->fpcs = 0; + env->fpds = 0; + env->fpip = 0; + env->fpdp = 0; + cpu_set_fpuc(env, 0x37f); + env->fptags[0] = 1; + env->fptags[1] = 1; + env->fptags[2] = 1; + env->fptags[3] = 1; + env->fptags[4] = 1; + env->fptags[5] = 1; + env->fptags[6] = 1; + env->fptags[7] = 1; +} + +void helper_fninit(CPUX86State *env) +{ + do_fninit(env); +} + +/* BCD ops */ + +void helper_fbld_ST0(CPUX86State *env, target_ulong ptr) +{ + floatx80 tmp; + uint64_t val; + unsigned int v; + int i; + + val = 0; + for (i = 8; i >= 0; i--) { + v = cpu_ldub_data_ra(env, ptr + i, GETPC()); + val = (val * 100) + ((v >> 4) * 10) + (v & 0xf); + } + tmp = int64_to_floatx80(val, &env->fp_status); + if (cpu_ldub_data_ra(env, ptr + 9, GETPC()) & 0x80) { + tmp = floatx80_chs(tmp); + } + fpush(env); + ST0 = tmp; +} + +void helper_fbst_ST0(CPUX86State *env, target_ulong ptr) +{ + uint8_t old_flags = save_exception_flags(env); + int v; + target_ulong mem_ref, mem_end; + int64_t val; + CPU_LDoubleU temp; + + temp.d = ST0; + + val = floatx80_to_int64(ST0, &env->fp_status); + mem_ref = ptr; + if (val >= 1000000000000000000LL || val <= -1000000000000000000LL) { + set_float_exception_flags(float_flag_invalid, &env->fp_status); + while (mem_ref < ptr + 7) { + cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + } + cpu_stb_data_ra(env, mem_ref++, 0xc0, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + cpu_stb_data_ra(env, mem_ref++, 0xff, GETPC()); + merge_exception_flags(env, old_flags); + return; + } + mem_end = mem_ref + 9; + if (SIGND(temp)) { + cpu_stb_data_ra(env, mem_end, 0x80, GETPC()); + val = -val; + } else { + cpu_stb_data_ra(env, mem_end, 0x00, GETPC()); + } + while (mem_ref < mem_end) { + if (val == 0) { + break; + } + v = val % 100; + val = val / 100; + v = ((v / 10) << 4) | (v % 10); + cpu_stb_data_ra(env, mem_ref++, v, GETPC()); + } + while (mem_ref < mem_end) { + cpu_stb_data_ra(env, mem_ref++, 0, GETPC()); + } + merge_exception_flags(env, old_flags); +} + +/* 128-bit significand of log(2). */ +#define ln2_sig_high 0xb17217f7d1cf79abULL +#define ln2_sig_low 0xc9e3b39803f2f6afULL + +/* + * Polynomial coefficients for an approximation to (2^x - 1) / x, on + * the interval [-1/64, 1/64]. + */ +#define f2xm1_coeff_0 make_floatx80(0x3ffe, 0xb17217f7d1cf79acULL) +#define f2xm1_coeff_0_low make_floatx80(0xbfbc, 0xd87edabf495b3762ULL) +#define f2xm1_coeff_1 make_floatx80(0x3ffc, 0xf5fdeffc162c7543ULL) +#define f2xm1_coeff_2 make_floatx80(0x3ffa, 0xe35846b82505fcc7ULL) +#define f2xm1_coeff_3 make_floatx80(0x3ff8, 0x9d955b7dd273b899ULL) +#define f2xm1_coeff_4 make_floatx80(0x3ff5, 0xaec3ff3c4ef4ac0cULL) +#define f2xm1_coeff_5 make_floatx80(0x3ff2, 0xa184897c3a7f0de9ULL) +#define f2xm1_coeff_6 make_floatx80(0x3fee, 0xffe634d0ec30d504ULL) +#define f2xm1_coeff_7 make_floatx80(0x3feb, 0xb160111d2db515e4ULL) + +struct f2xm1_data { + /* + * A value very close to a multiple of 1/32, such that 2^t and 2^t - 1 + * are very close to exact floatx80 values. + */ + floatx80 t; + /* The value of 2^t. */ + floatx80 exp2; + /* The value of 2^t - 1. */ + floatx80 exp2m1; +}; + +static const struct f2xm1_data f2xm1_table[65] = { + { make_floatx80_init(0xbfff, 0x8000000000000000ULL), + make_floatx80_init(0x3ffe, 0x8000000000000000ULL), + make_floatx80_init(0xbffe, 0x8000000000000000ULL) }, + { make_floatx80_init(0xbffe, 0xf800000000002e7eULL), + make_floatx80_init(0x3ffe, 0x82cd8698ac2b9160ULL), + make_floatx80_init(0xbffd, 0xfa64f2cea7a8dd40ULL) }, + { make_floatx80_init(0xbffe, 0xefffffffffffe960ULL), + make_floatx80_init(0x3ffe, 0x85aac367cc488345ULL), + make_floatx80_init(0xbffd, 0xf4aa7930676ef976ULL) }, + { make_floatx80_init(0xbffe, 0xe800000000006f10ULL), + make_floatx80_init(0x3ffe, 0x88980e8092da5c14ULL), + make_floatx80_init(0xbffd, 0xeecfe2feda4b47d8ULL) }, + { make_floatx80_init(0xbffe, 0xe000000000008a45ULL), + make_floatx80_init(0x3ffe, 0x8b95c1e3ea8ba2a5ULL), + make_floatx80_init(0xbffd, 0xe8d47c382ae8bab6ULL) }, + { make_floatx80_init(0xbffe, 0xd7ffffffffff8a9eULL), + make_floatx80_init(0x3ffe, 0x8ea4398b45cd8116ULL), + make_floatx80_init(0xbffd, 0xe2b78ce97464fdd4ULL) }, + { make_floatx80_init(0xbffe, 0xd0000000000019a0ULL), + make_floatx80_init(0x3ffe, 0x91c3d373ab11b919ULL), + make_floatx80_init(0xbffd, 0xdc785918a9dc8dceULL) }, + { make_floatx80_init(0xbffe, 0xc7ffffffffff14dfULL), + make_floatx80_init(0x3ffe, 0x94f4efa8fef76836ULL), + make_floatx80_init(0xbffd, 0xd61620ae02112f94ULL) }, + { make_floatx80_init(0xbffe, 0xc000000000006530ULL), + make_floatx80_init(0x3ffe, 0x9837f0518db87fbbULL), + make_floatx80_init(0xbffd, 0xcf901f5ce48f008aULL) }, + { make_floatx80_init(0xbffe, 0xb7ffffffffff1723ULL), + make_floatx80_init(0x3ffe, 0x9b8d39b9d54eb74cULL), + make_floatx80_init(0xbffd, 0xc8e58c8c55629168ULL) }, + { make_floatx80_init(0xbffe, 0xb00000000000b5e1ULL), + make_floatx80_init(0x3ffe, 0x9ef5326091a0c366ULL), + make_floatx80_init(0xbffd, 0xc2159b3edcbe7934ULL) }, + { make_floatx80_init(0xbffe, 0xa800000000006f8aULL), + make_floatx80_init(0x3ffe, 0xa27043030c49370aULL), + make_floatx80_init(0xbffd, 0xbb1f79f9e76d91ecULL) }, + { make_floatx80_init(0xbffe, 0x9fffffffffff816aULL), + make_floatx80_init(0x3ffe, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0xbffd, 0xb40252ac9d5d1c62ULL) }, + { make_floatx80_init(0xbffe, 0x97ffffffffffb621ULL), + make_floatx80_init(0x3ffe, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0xbffd, 0xacbd4a962b079e34ULL) }, + { make_floatx80_init(0xbffe, 0x8fffffffffff162bULL), + make_floatx80_init(0x3ffe, 0xad583eea42a1b886ULL), + make_floatx80_init(0xbffd, 0xa54f822b7abc8ef4ULL) }, + { make_floatx80_init(0xbffe, 0x87ffffffffff4d34ULL), + make_floatx80_init(0x3ffe, 0xb123f581d2ac7b51ULL), + make_floatx80_init(0xbffd, 0x9db814fc5aa7095eULL) }, + { make_floatx80_init(0xbffe, 0x800000000000227dULL), + make_floatx80_init(0x3ffe, 0xb504f333f9de539dULL), + make_floatx80_init(0xbffd, 0x95f619980c4358c6ULL) }, + { make_floatx80_init(0xbffd, 0xefffffffffff3978ULL), + make_floatx80_init(0x3ffe, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0xbffd, 0x8e08a1713a085ebeULL) }, + { make_floatx80_init(0xbffd, 0xe00000000000df81ULL), + make_floatx80_init(0x3ffe, 0xbd08a39f580bfd8cULL), + make_floatx80_init(0xbffd, 0x85eeb8c14fe804e8ULL) }, + { make_floatx80_init(0xbffd, 0xd00000000000bccfULL), + make_floatx80_init(0x3ffe, 0xc12c4cca667062f6ULL), + make_floatx80_init(0xbffc, 0xfb4eccd6663e7428ULL) }, + { make_floatx80_init(0xbffd, 0xc00000000000eff0ULL), + make_floatx80_init(0x3ffe, 0xc5672a1155069abeULL), + make_floatx80_init(0xbffc, 0xea6357baabe59508ULL) }, + { make_floatx80_init(0xbffd, 0xb000000000000fe6ULL), + make_floatx80_init(0x3ffe, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0xbffc, 0xd91909e6474372d4ULL) }, + { make_floatx80_init(0xbffd, 0x9fffffffffff2172ULL), + make_floatx80_init(0x3ffe, 0xce248c151f84bf00ULL), + make_floatx80_init(0xbffc, 0xc76dcfab81ed0400ULL) }, + { make_floatx80_init(0xbffd, 0x8fffffffffffafffULL), + make_floatx80_init(0x3ffe, 0xd2a81d91f12afb2bULL), + make_floatx80_init(0xbffc, 0xb55f89b83b541354ULL) }, + { make_floatx80_init(0xbffc, 0xffffffffffff81a3ULL), + make_floatx80_init(0x3ffe, 0xd744fccad69d7d5eULL), + make_floatx80_init(0xbffc, 0xa2ec0cd4a58a0a88ULL) }, + { make_floatx80_init(0xbffc, 0xdfffffffffff1568ULL), + make_floatx80_init(0x3ffe, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0xbffc, 0x901121a0943696f0ULL) }, + { make_floatx80_init(0xbffc, 0xbfffffffffff68daULL), + make_floatx80_init(0x3ffe, 0xe0ccdeec2a94f811ULL), + make_floatx80_init(0xbffb, 0xf999089eab583f78ULL) }, + { make_floatx80_init(0xbffc, 0x9fffffffffff4690ULL), + make_floatx80_init(0x3ffe, 0xe5b906e77c83657eULL), + make_floatx80_init(0xbffb, 0xd237c8c41be4d410ULL) }, + { make_floatx80_init(0xbffb, 0xffffffffffff8aeeULL), + make_floatx80_init(0x3ffe, 0xeac0c6e7dd24427cULL), + make_floatx80_init(0xbffb, 0xa9f9c8c116ddec20ULL) }, + { make_floatx80_init(0xbffb, 0xbfffffffffff2d18ULL), + make_floatx80_init(0x3ffe, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0xbffb, 0x80da33211927c8a8ULL) }, + { make_floatx80_init(0xbffa, 0xffffffffffff8ccbULL), + make_floatx80_init(0x3ffe, 0xf5257d152486d0f4ULL), + make_floatx80_init(0xbffa, 0xada82eadb792f0c0ULL) }, + { make_floatx80_init(0xbff9, 0xffffffffffff11feULL), + make_floatx80_init(0x3ffe, 0xfa83b2db722a0846ULL), + make_floatx80_init(0xbff9, 0xaf89a491babef740ULL) }, + { floatx80_zero_init, + make_floatx80_init(0x3fff, 0x8000000000000000ULL), + floatx80_zero_init }, + { make_floatx80_init(0x3ff9, 0xffffffffffff2680ULL), + make_floatx80_init(0x3fff, 0x82cd8698ac2b9f6fULL), + make_floatx80_init(0x3ff9, 0xb361a62b0ae7dbc0ULL) }, + { make_floatx80_init(0x3ffb, 0x800000000000b500ULL), + make_floatx80_init(0x3fff, 0x85aac367cc488345ULL), + make_floatx80_init(0x3ffa, 0xb5586cf9891068a0ULL) }, + { make_floatx80_init(0x3ffb, 0xbfffffffffff4b67ULL), + make_floatx80_init(0x3fff, 0x88980e8092da7cceULL), + make_floatx80_init(0x3ffb, 0x8980e8092da7cce0ULL) }, + { make_floatx80_init(0x3ffb, 0xffffffffffffff57ULL), + make_floatx80_init(0x3fff, 0x8b95c1e3ea8bd6dfULL), + make_floatx80_init(0x3ffb, 0xb95c1e3ea8bd6df0ULL) }, + { make_floatx80_init(0x3ffc, 0x9fffffffffff811fULL), + make_floatx80_init(0x3fff, 0x8ea4398b45cd4780ULL), + make_floatx80_init(0x3ffb, 0xea4398b45cd47800ULL) }, + { make_floatx80_init(0x3ffc, 0xbfffffffffff9980ULL), + make_floatx80_init(0x3fff, 0x91c3d373ab11b919ULL), + make_floatx80_init(0x3ffc, 0x8e1e9b9d588dc8c8ULL) }, + { make_floatx80_init(0x3ffc, 0xdffffffffffff631ULL), + make_floatx80_init(0x3fff, 0x94f4efa8fef70864ULL), + make_floatx80_init(0x3ffc, 0xa7a77d47f7b84320ULL) }, + { make_floatx80_init(0x3ffc, 0xffffffffffff2499ULL), + make_floatx80_init(0x3fff, 0x9837f0518db892d4ULL), + make_floatx80_init(0x3ffc, 0xc1bf828c6dc496a0ULL) }, + { make_floatx80_init(0x3ffd, 0x8fffffffffff80fbULL), + make_floatx80_init(0x3fff, 0x9b8d39b9d54e3a79ULL), + make_floatx80_init(0x3ffc, 0xdc69cdceaa71d3c8ULL) }, + { make_floatx80_init(0x3ffd, 0x9fffffffffffbc23ULL), + make_floatx80_init(0x3fff, 0x9ef5326091a10313ULL), + make_floatx80_init(0x3ffc, 0xf7a993048d081898ULL) }, + { make_floatx80_init(0x3ffd, 0xafffffffffff20ecULL), + make_floatx80_init(0x3fff, 0xa27043030c49370aULL), + make_floatx80_init(0x3ffd, 0x89c10c0c3124dc28ULL) }, + { make_floatx80_init(0x3ffd, 0xc00000000000fd2cULL), + make_floatx80_init(0x3fff, 0xa5fed6a9b15171cfULL), + make_floatx80_init(0x3ffd, 0x97fb5aa6c545c73cULL) }, + { make_floatx80_init(0x3ffd, 0xd0000000000093beULL), + make_floatx80_init(0x3fff, 0xa9a15ab4ea7c30e6ULL), + make_floatx80_init(0x3ffd, 0xa6856ad3a9f0c398ULL) }, + { make_floatx80_init(0x3ffd, 0xe00000000000c2aeULL), + make_floatx80_init(0x3fff, 0xad583eea42a17876ULL), + make_floatx80_init(0x3ffd, 0xb560fba90a85e1d8ULL) }, + { make_floatx80_init(0x3ffd, 0xefffffffffff1e3fULL), + make_floatx80_init(0x3fff, 0xb123f581d2abef6cULL), + make_floatx80_init(0x3ffd, 0xc48fd6074aafbdb0ULL) }, + { make_floatx80_init(0x3ffd, 0xffffffffffff1c23ULL), + make_floatx80_init(0x3fff, 0xb504f333f9de2cadULL), + make_floatx80_init(0x3ffd, 0xd413cccfe778b2b4ULL) }, + { make_floatx80_init(0x3ffe, 0x8800000000006344ULL), + make_floatx80_init(0x3fff, 0xb8fbaf4762fbd0a1ULL), + make_floatx80_init(0x3ffd, 0xe3eebd1d8bef4284ULL) }, + { make_floatx80_init(0x3ffe, 0x9000000000005d67ULL), + make_floatx80_init(0x3fff, 0xbd08a39f580c668dULL), + make_floatx80_init(0x3ffd, 0xf4228e7d60319a34ULL) }, + { make_floatx80_init(0x3ffe, 0x9800000000009127ULL), + make_floatx80_init(0x3fff, 0xc12c4cca6670e042ULL), + make_floatx80_init(0x3ffe, 0x82589994cce1c084ULL) }, + { make_floatx80_init(0x3ffe, 0x9fffffffffff06f9ULL), + make_floatx80_init(0x3fff, 0xc5672a11550655c3ULL), + make_floatx80_init(0x3ffe, 0x8ace5422aa0cab86ULL) }, + { make_floatx80_init(0x3ffe, 0xa7fffffffffff80dULL), + make_floatx80_init(0x3fff, 0xc9b9bd866e2f234bULL), + make_floatx80_init(0x3ffe, 0x93737b0cdc5e4696ULL) }, + { make_floatx80_init(0x3ffe, 0xafffffffffff1470ULL), + make_floatx80_init(0x3fff, 0xce248c151f83fd69ULL), + make_floatx80_init(0x3ffe, 0x9c49182a3f07fad2ULL) }, + { make_floatx80_init(0x3ffe, 0xb800000000000e0aULL), + make_floatx80_init(0x3fff, 0xd2a81d91f12aec5cULL), + make_floatx80_init(0x3ffe, 0xa5503b23e255d8b8ULL) }, + { make_floatx80_init(0x3ffe, 0xc00000000000b7faULL), + make_floatx80_init(0x3fff, 0xd744fccad69dd630ULL), + make_floatx80_init(0x3ffe, 0xae89f995ad3bac60ULL) }, + { make_floatx80_init(0x3ffe, 0xc800000000003aa6ULL), + make_floatx80_init(0x3fff, 0xdbfbb797daf25a44ULL), + make_floatx80_init(0x3ffe, 0xb7f76f2fb5e4b488ULL) }, + { make_floatx80_init(0x3ffe, 0xd00000000000a6aeULL), + make_floatx80_init(0x3fff, 0xe0ccdeec2a954685ULL), + make_floatx80_init(0x3ffe, 0xc199bdd8552a8d0aULL) }, + { make_floatx80_init(0x3ffe, 0xd800000000004165ULL), + make_floatx80_init(0x3fff, 0xe5b906e77c837155ULL), + make_floatx80_init(0x3ffe, 0xcb720dcef906e2aaULL) }, + { make_floatx80_init(0x3ffe, 0xe00000000000582cULL), + make_floatx80_init(0x3fff, 0xeac0c6e7dd24713aULL), + make_floatx80_init(0x3ffe, 0xd5818dcfba48e274ULL) }, + { make_floatx80_init(0x3ffe, 0xe800000000001a5dULL), + make_floatx80_init(0x3fff, 0xefe4b99bdcdb06ebULL), + make_floatx80_init(0x3ffe, 0xdfc97337b9b60dd6ULL) }, + { make_floatx80_init(0x3ffe, 0xefffffffffffc1efULL), + make_floatx80_init(0x3fff, 0xf5257d152486a2faULL), + make_floatx80_init(0x3ffe, 0xea4afa2a490d45f4ULL) }, + { make_floatx80_init(0x3ffe, 0xf800000000001069ULL), + make_floatx80_init(0x3fff, 0xfa83b2db722a0e5cULL), + make_floatx80_init(0x3ffe, 0xf50765b6e4541cb8ULL) }, + { make_floatx80_init(0x3fff, 0x8000000000000000ULL), + make_floatx80_init(0x4000, 0x8000000000000000ULL), + make_floatx80_init(0x3fff, 0x8000000000000000ULL) }, +}; + +void helper_f2xm1(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t sig = extractFloatx80Frac(ST0); + int32_t exp = extractFloatx80Exp(ST0); + bool sign = extractFloatx80Sign(ST0); + + if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (exp > 0x3fff || + (exp == 0x3fff && sig != (0x8000000000000000ULL))) { + /* Out of range for the instruction, treat as invalid. */ + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (exp == 0x3fff) { + /* Argument 1 or -1, exact result 1 or -0.5. */ + if (sign) { + ST0 = make_floatx80(0xbffe, 0x8000000000000000ULL); + } + } else if (exp < 0x3fb0) { + if (!floatx80_is_zero(ST0)) { + /* + * Multiplying the argument by an extra-precision version + * of log(2) is sufficiently precise. Zero arguments are + * returned unchanged. + */ + uint64_t sig0, sig1, sig2; + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + mul128By64To192(ln2_sig_high, ln2_sig_low, sig, &sig0, &sig1, + &sig2); + /* This result is inexact. */ + sig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + sign, exp, sig0, sig1, + &env->fp_status); + } + } else { + floatx80 tmp, y, accum; + bool asign, bsign; + int32_t n, aexp, bexp; + uint64_t asig0, asig1, asig2, bsig0, bsig1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + FloatX80RoundPrec save_prec = + env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + + /* Find the nearest multiple of 1/32 to the argument. */ + tmp = floatx80_scalbn(ST0, 5, &env->fp_status); + n = 32 + floatx80_to_int32(tmp, &env->fp_status); + y = floatx80_sub(ST0, f2xm1_table[n].t, &env->fp_status); + + if (floatx80_is_zero(y)) { + /* + * Use the value of 2^t - 1 from the table, to avoid + * needing to special-case zero as a result of + * multiplication below. + */ + ST0 = f2xm1_table[n].t; + set_float_exception_flags(float_flag_inexact, &env->fp_status); + env->fp_status.float_rounding_mode = save_mode; + } else { + /* + * Compute the lower parts of a polynomial expansion for + * (2^y - 1) / y. + */ + accum = floatx80_mul(f2xm1_coeff_7, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, y, &env->fp_status); + accum = floatx80_add(f2xm1_coeff_0_low, accum, &env->fp_status); + + /* + * The full polynomial expansion is f2xm1_coeff_0 + accum + * (where accum has much lower magnitude, and so, in + * particular, carry out of the addition is not possible). + * (This expansion is only accurate to about 70 bits, not + * 128 bits.) + */ + aexp = extractFloatx80Exp(f2xm1_coeff_0); + asign = extractFloatx80Sign(f2xm1_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(f2xm1_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } + /* And thus compute an approximation to 2^y - 1. */ + mul128By64To192(asig0, asig1, extractFloatx80Frac(y), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(y) - 0x3ffe; + asign ^= extractFloatx80Sign(y); + if (n != 32) { + /* + * Multiply this by the precomputed value of 2^t and + * add that of 2^t - 1. + */ + mul128By64To192(asig0, asig1, + extractFloatx80Frac(f2xm1_table[n].exp2), + &asig0, &asig1, &asig2); + aexp += extractFloatx80Exp(f2xm1_table[n].exp2) - 0x3ffe; + bexp = extractFloatx80Exp(f2xm1_table[n].exp2m1); + bsig0 = extractFloatx80Frac(f2xm1_table[n].exp2m1); + bsig1 = 0; + if (bexp < aexp) { + shift128RightJamming(bsig0, bsig1, aexp - bexp, + &bsig0, &bsig1); + } else if (aexp < bexp) { + shift128RightJamming(asig0, asig1, bexp - aexp, + &asig0, &asig1); + aexp = bexp; + } + /* The sign of 2^t - 1 is always that of the result. */ + bsign = extractFloatx80Sign(f2xm1_table[n].exp2m1); + if (asign == bsign) { + /* Avoid possible carry out of the addition. */ + shift128RightJamming(asig0, asig1, 1, + &asig0, &asig1); + shift128RightJamming(bsig0, bsig1, 1, + &bsig0, &bsig1); + ++aexp; + add128(asig0, asig1, bsig0, bsig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + asign = bsign; + } + } + env->fp_status.float_rounding_mode = save_mode; + /* This result is inexact. */ + asig1 |= 1; + ST0 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + asign, aexp, asig0, asig1, + &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; + } + merge_exception_flags(env, old_flags); +} + +void helper_fptan(CPUX86State *env) +{ + double fptemp = floatx80_to_double(env, ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + fptemp = tan(fptemp); + ST0 = double_to_floatx80(env, fptemp); + fpush(env); + ST0 = floatx80_one; + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**52 only */ + } +} + +/* Values of pi/4, pi/2, 3pi/4 and pi, with 128-bit precision. */ +#define pi_4_exp 0x3ffe +#define pi_4_sig_high 0xc90fdaa22168c234ULL +#define pi_4_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_2_exp 0x3fff +#define pi_2_sig_high 0xc90fdaa22168c234ULL +#define pi_2_sig_low 0xc4c6628b80dc1cd1ULL +#define pi_34_exp 0x4000 +#define pi_34_sig_high 0x96cbe3f9990e91a7ULL +#define pi_34_sig_low 0x9394c9e8a0a5159dULL +#define pi_exp 0x4000 +#define pi_sig_high 0xc90fdaa22168c234ULL +#define pi_sig_low 0xc4c6628b80dc1cd1ULL + +/* + * Polynomial coefficients for an approximation to atan(x), with only + * odd powers of x used, for x in the interval [-1/16, 1/16]. (Unlike + * for some other approximations, no low part is needed for the first + * coefficient here to achieve a sufficiently accurate result, because + * the coefficient in this minimax approximation is very close to + * exactly 1.) + */ +#define fpatan_coeff_0 make_floatx80(0x3fff, 0x8000000000000000ULL) +#define fpatan_coeff_1 make_floatx80(0xbffd, 0xaaaaaaaaaaaaaa43ULL) +#define fpatan_coeff_2 make_floatx80(0x3ffc, 0xccccccccccbfe4f8ULL) +#define fpatan_coeff_3 make_floatx80(0xbffc, 0x92492491fbab2e66ULL) +#define fpatan_coeff_4 make_floatx80(0x3ffb, 0xe38e372881ea1e0bULL) +#define fpatan_coeff_5 make_floatx80(0xbffb, 0xba2c0104bbdd0615ULL) +#define fpatan_coeff_6 make_floatx80(0x3ffb, 0x9baf7ebf898b42efULL) + +struct fpatan_data { + /* High and low parts of atan(x). */ + floatx80 atan_high, atan_low; +}; + +static const struct fpatan_data fpatan_table[9] = { + { floatx80_zero_init, + floatx80_zero_init }, + { make_floatx80_init(0x3ffb, 0xfeadd4d5617b6e33ULL), + make_floatx80_init(0xbfb9, 0xdda19d8305ddc420ULL) }, + { make_floatx80_init(0x3ffc, 0xfadbafc96406eb15ULL), + make_floatx80_init(0x3fbb, 0xdb8f3debef442fccULL) }, + { make_floatx80_init(0x3ffd, 0xb7b0ca0f26f78474ULL), + make_floatx80_init(0xbfbc, 0xeab9bdba460376faULL) }, + { make_floatx80_init(0x3ffd, 0xed63382b0dda7b45ULL), + make_floatx80_init(0x3fbc, 0xdfc88bd978751a06ULL) }, + { make_floatx80_init(0x3ffe, 0x8f005d5ef7f59f9bULL), + make_floatx80_init(0x3fbd, 0xb906bc2ccb886e90ULL) }, + { make_floatx80_init(0x3ffe, 0xa4bc7d1934f70924ULL), + make_floatx80_init(0x3fbb, 0xcd43f9522bed64f8ULL) }, + { make_floatx80_init(0x3ffe, 0xb8053e2bc2319e74ULL), + make_floatx80_init(0xbfbc, 0xd3496ab7bd6eef0cULL) }, + { make_floatx80_init(0x3ffe, 0xc90fdaa22168c235ULL), + make_floatx80_init(0xbfbc, 0xece675d1fc8f8cbcULL) }, +}; + +void helper_fpatan(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (floatx80_is_zero(ST1) && !arg0_sign) { + /* Pass this zero through. */ + } else if (((floatx80_is_infinity(ST0) && !floatx80_is_infinity(ST1)) || + arg0_exp - arg1_exp >= 80) && + !arg0_sign) { + /* + * Dividing ST1 by ST0 gives the correct result up to + * rounding, and avoids spurious underflow exceptions that + * might result from passing some small values through the + * polynomial approximation, but if a finite nonzero result of + * division is exact, the result of fpatan is still inexact + * (and underflowing where appropriate). + */ + FloatX80RoundPrec save_prec = + env->fp_status.floatx80_rounding_precision; + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + ST1 = floatx80_div(ST1, ST0, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + if (!floatx80_is_zero(ST1) && + !(get_float_exception_flags(&env->fp_status) & + float_flag_inexact)) { + /* + * The mathematical result is very slightly closer to zero + * than this exact result. Round a value with the + * significand adjusted accordingly to get the correct + * exceptions, and possibly an adjusted result depending + * on the rounding mode. + */ + uint64_t sig = extractFloatx80Frac(ST1); + int32_t exp = extractFloatx80Exp(ST1); + bool sign = extractFloatx80Sign(ST1); + if (exp == 0) { + normalizeFloatx80Subnormal(sig, &exp, &sig); + } + ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + sign, exp, sig - 1, + -1, &env->fp_status); + } + } else { + /* The result is inexact. */ + bool rsign = arg1_sign; + int32_t rexp; + uint64_t rsig0, rsig1; + if (floatx80_is_zero(ST1)) { + /* + * ST0 is negative. The result is pi with the sign of + * ST1. + */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else if (floatx80_is_infinity(ST1)) { + if (floatx80_is_infinity(ST0)) { + if (arg0_sign) { + rexp = pi_34_exp; + rsig0 = pi_34_sig_high; + rsig1 = pi_34_sig_low; + } else { + rexp = pi_4_exp; + rsig0 = pi_4_sig_high; + rsig1 = pi_4_sig_low; + } + } else { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } + } else if (floatx80_is_zero(ST0) || arg1_exp - arg0_exp >= 80) { + rexp = pi_2_exp; + rsig0 = pi_2_sig_high; + rsig1 = pi_2_sig_low; + } else if (floatx80_is_infinity(ST0) || arg0_exp - arg1_exp >= 80) { + /* ST0 is negative. */ + rexp = pi_exp; + rsig0 = pi_sig_high; + rsig1 = pi_sig_low; + } else { + /* + * ST0 and ST1 are finite, nonzero and with exponents not + * too far apart. + */ + int32_t adj_exp, num_exp, den_exp, xexp, yexp, n, texp, zexp, aexp; + int32_t azexp, axexp; + bool adj_sub, ysign, zsign; + uint64_t adj_sig0, adj_sig1, num_sig, den_sig, xsig0, xsig1; + uint64_t msig0, msig1, msig2, remsig0, remsig1, remsig2; + uint64_t ysig0, ysig1, tsig, zsig0, zsig1, asig0, asig1; + uint64_t azsig0, azsig1; + uint64_t azsig2, azsig3, axsig0, axsig1; + floatx80 x8; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + FloatX80RoundPrec save_prec = + env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + if (arg0_exp > arg1_exp || + (arg0_exp == arg1_exp && arg0_sig >= arg1_sig)) { + /* Work with abs(ST1) / abs(ST0). */ + num_exp = arg1_exp; + num_sig = arg1_sig; + den_exp = arg0_exp; + den_sig = arg0_sig; + if (arg0_sign) { + /* The result is subtracted from pi. */ + adj_exp = pi_exp; + adj_sig0 = pi_sig_high; + adj_sig1 = pi_sig_low; + adj_sub = true; + } else { + /* The result is used as-is. */ + adj_exp = 0; + adj_sig0 = 0; + adj_sig1 = 0; + adj_sub = false; + } + } else { + /* Work with abs(ST0) / abs(ST1). */ + num_exp = arg0_exp; + num_sig = arg0_sig; + den_exp = arg1_exp; + den_sig = arg1_sig; + /* The result is added to or subtracted from pi/2. */ + adj_exp = pi_2_exp; + adj_sig0 = pi_2_sig_high; + adj_sig1 = pi_2_sig_low; + adj_sub = !arg0_sign; + } + + /* + * Compute x = num/den, where 0 < x <= 1 and x is not too + * small. + */ + xexp = num_exp - den_exp + 0x3ffe; + remsig0 = num_sig; + remsig1 = 0; + if (den_sig <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++xexp; + } + xsig0 = estimateDiv128To64(remsig0, remsig1, den_sig); + mul64To128(den_sig, xsig0, &msig0, &msig1); + sub128(remsig0, remsig1, msig0, msig1, &remsig0, &remsig1); + while ((int64_t) remsig0 < 0) { + --xsig0; + add128(remsig0, remsig1, 0, den_sig, &remsig0, &remsig1); + } + xsig1 = estimateDiv128To64(remsig1, 0, den_sig); + /* + * No need to correct any estimation error in xsig1; even + * with such error, it is accurate enough. + */ + + /* + * Split x as x = t + y, where t = n/8 is the nearest + * multiple of 1/8 to x. + */ + x8 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + false, xexp + 3, xsig0, + xsig1, &env->fp_status); + n = floatx80_to_int32(x8, &env->fp_status); + if (n == 0) { + ysign = false; + yexp = xexp; + ysig0 = xsig0; + ysig1 = xsig1; + texp = 0; + tsig = 0; + } else { + int shift = clz32(n) + 32; + texp = 0x403b - shift; + tsig = n; + tsig <<= shift; + if (texp == xexp) { + sub128(xsig0, xsig1, tsig, 0, &ysig0, &ysig1); + if ((int64_t) ysig0 >= 0) { + ysign = false; + if (ysig0 == 0) { + if (ysig1 == 0) { + yexp = 0; + } else { + shift = clz64(ysig1) + 64; + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, + &ysig0, &ysig1); + } + } else { + shift = clz64(ysig0); + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + ysign = true; + sub128(0, 0, ysig0, ysig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = xexp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } else { + /* + * t's exponent must be greater than x's because t + * is positive and the nearest multiple of 1/8 to + * x, and if x has a greater exponent, the power + * of 2 with that exponent is also a multiple of + * 1/8. + */ + uint64_t usig0, usig1; + shift128RightJamming(xsig0, xsig1, texp - xexp, + &usig0, &usig1); + ysign = true; + sub128(tsig, 0, usig0, usig1, &ysig0, &ysig1); + if (ysig0 == 0) { + shift = clz64(ysig1) + 64; + } else { + shift = clz64(ysig0); + } + yexp = texp - shift; + shift128Left(ysig0, ysig1, shift, &ysig0, &ysig1); + } + } + + /* + * Compute z = y/(1+tx), so arctan(x) = arctan(t) + + * arctan(z). + */ + zsign = ysign; + if (texp == 0 || yexp == 0) { + zexp = yexp; + zsig0 = ysig0; + zsig1 = ysig1; + } else { + /* + * t <= 1, x <= 1 and if both are 1 then y is 0, so tx < 1. + */ + int32_t dexp = texp + xexp - 0x3ffe; + uint64_t dsig0, dsig1, dsig2; + mul128By64To192(xsig0, xsig1, tsig, &dsig0, &dsig1, &dsig2); + /* + * dexp <= 0x3fff (and if equal, dsig0 has a leading 0 + * bit). Add 1 to produce the denominator 1+tx. + */ + shift128RightJamming(dsig0, dsig1, 0x3fff - dexp, + &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + zexp = yexp - 1; + remsig0 = ysig0; + remsig1 = ysig1; + remsig2 = 0; + if (dsig0 <= remsig0) { + shift128Right(remsig0, remsig1, 1, &remsig0, &remsig1); + ++zexp; + } + zsig0 = estimateDiv128To64(remsig0, remsig1, dsig0); + mul128By64To192(dsig0, dsig1, zsig0, &msig0, &msig1, &msig2); + sub192(remsig0, remsig1, remsig2, msig0, msig1, msig2, + &remsig0, &remsig1, &remsig2); + while ((int64_t) remsig0 < 0) { + --zsig0; + add192(remsig0, remsig1, remsig2, 0, dsig0, dsig1, + &remsig0, &remsig1, &remsig2); + } + zsig1 = estimateDiv128To64(remsig1, remsig2, dsig0); + /* No need to correct any estimation error in zsig1. */ + } + + if (zexp == 0) { + azexp = 0; + azsig0 = 0; + azsig1 = 0; + } else { + floatx80 z2, accum; + uint64_t z2sig0, z2sig1, z2sig2, z2sig3; + /* Compute z^2. */ + mul128To256(zsig0, zsig1, zsig0, zsig1, + &z2sig0, &z2sig1, &z2sig2, &z2sig3); + z2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, + zexp + zexp - 0x3ffe, + z2sig0, z2sig1, + &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fpatan_coeff_6, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + accum = floatx80_add(fpatan_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, z2, &env->fp_status); + + /* + * The full polynomial expansion is z*(fpatan_coeff_0 + accum). + * fpatan_coeff_0 is 1, and accum is negative and much smaller. + */ + aexp = extractFloatx80Exp(fpatan_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + sub128(extractFloatx80Frac(fpatan_coeff_0), 0, asig0, asig1, + &asig0, &asig1); + /* Multiply by z to compute arctan(z). */ + azexp = aexp + zexp - 0x3ffe; + mul128To256(asig0, asig1, zsig0, zsig1, &azsig0, &azsig1, + &azsig2, &azsig3); + } + + /* Add arctan(t) (positive or zero) and arctan(z) (sign zsign). */ + if (texp == 0) { + /* z is positive. */ + axexp = azexp; + axsig0 = azsig0; + axsig1 = azsig1; + } else { + bool low_sign = extractFloatx80Sign(fpatan_table[n].atan_low); + int32_t low_exp = extractFloatx80Exp(fpatan_table[n].atan_low); + uint64_t low_sig0 = + extractFloatx80Frac(fpatan_table[n].atan_low); + uint64_t low_sig1 = 0; + axexp = extractFloatx80Exp(fpatan_table[n].atan_high); + axsig0 = extractFloatx80Frac(fpatan_table[n].atan_high); + axsig1 = 0; + shift128RightJamming(low_sig0, low_sig1, axexp - low_exp, + &low_sig0, &low_sig1); + if (low_sign) { + sub128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, low_sig0, low_sig1, + &axsig0, &axsig1); + } + if (azexp >= axexp) { + shift128RightJamming(axsig0, axsig1, azexp - axexp + 1, + &axsig0, &axsig1); + axexp = azexp + 1; + shift128RightJamming(azsig0, azsig1, 1, + &azsig0, &azsig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(azsig0, azsig1, axexp - azexp + 1, + &azsig0, &azsig1); + ++axexp; + } + if (zsign) { + sub128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } else { + add128(axsig0, axsig1, azsig0, azsig1, + &axsig0, &axsig1); + } + } + + if (adj_exp == 0) { + rexp = axexp; + rsig0 = axsig0; + rsig1 = axsig1; + } else { + /* + * Add or subtract arctan(x) (exponent axexp, + * significand axsig0 and axsig1, positive, not + * necessarily normalized) to the number given by + * adj_exp, adj_sig0 and adj_sig1, according to + * adj_sub. + */ + if (adj_exp >= axexp) { + shift128RightJamming(axsig0, axsig1, adj_exp - axexp + 1, + &axsig0, &axsig1); + rexp = adj_exp + 1; + shift128RightJamming(adj_sig0, adj_sig1, 1, + &adj_sig0, &adj_sig1); + } else { + shift128RightJamming(axsig0, axsig1, 1, + &axsig0, &axsig1); + shift128RightJamming(adj_sig0, adj_sig1, + axexp - adj_exp + 1, + &adj_sig0, &adj_sig1); + rexp = axexp + 1; + } + if (adj_sub) { + sub128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } else { + add128(adj_sig0, adj_sig1, axsig0, axsig1, + &rsig0, &rsig1); + } + } + + env->fp_status.float_rounding_mode = save_mode; + env->fp_status.floatx80_rounding_precision = save_prec; + } + /* This result is inexact. */ + rsig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, rsign, rexp, + rsig0, rsig1, &env->fp_status); + } + + fpop(env); + merge_exception_flags(env, old_flags); +} + +void helper_fxtract(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + CPU_LDoubleU temp; + + temp.d = ST0; + + if (floatx80_is_zero(ST0)) { + /* Easy way to generate -inf and raising division by 0 exception */ + ST0 = floatx80_div(floatx80_chs(floatx80_one), floatx80_zero, + &env->fp_status); + fpush(env); + ST0 = temp.d; + } else if (floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + fpush(env); + ST0 = ST1; + } else if (floatx80_is_any_nan(ST0)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + fpush(env); + ST0 = ST1; + } else if (floatx80_is_infinity(ST0)) { + fpush(env); + ST0 = ST1; + ST1 = floatx80_infinity; + } else { + int expdif; + + if (EXPD(temp) == 0) { + int shift = clz64(temp.l.lower); + temp.l.lower <<= shift; + expdif = 1 - EXPBIAS - shift; + float_raise(float_flag_input_denormal, &env->fp_status); + } else { + expdif = EXPD(temp) - EXPBIAS; + } + /* DP exponent bias */ + ST0 = int32_to_floatx80(expdif, &env->fp_status); + fpush(env); + BIASEXPONENT(temp); + ST0 = temp.d; + } + merge_exception_flags(env, old_flags); +} + +static void helper_fprem_common(CPUX86State *env, bool mod) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t quotient; + CPU_LDoubleU temp0, temp1; + int exp0, exp1, expdiff; + + temp0.d = ST0; + temp1.d = ST1; + exp0 = EXPD(temp0); + exp1 = EXPD(temp1); + + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + exp0 == 0x7fff || exp1 == 0x7fff || + floatx80_invalid_encoding(ST0) || floatx80_invalid_encoding(ST1)) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + } else { + if (exp0 == 0) { + exp0 = 1 - clz64(temp0.l.lower); + } + if (exp1 == 0) { + exp1 = 1 - clz64(temp1.l.lower); + } + expdiff = exp0 - exp1; + if (expdiff < 64) { + ST0 = floatx80_modrem(ST0, ST1, mod, "ient, &env->fp_status); + env->fpus |= (quotient & 0x4) << (8 - 2); /* (C0) <-- q2 */ + env->fpus |= (quotient & 0x2) << (14 - 1); /* (C3) <-- q1 */ + env->fpus |= (quotient & 0x1) << (9 - 0); /* (C1) <-- q0 */ + } else { + /* + * Partial remainder. This choice of how many bits to + * process at once is specified in AMD instruction set + * manuals, and empirically is followed by Intel + * processors as well; it ensures that the final remainder + * operation in a loop does produce the correct low three + * bits of the quotient. AMD manuals specify that the + * flags other than C2 are cleared, and empirically Intel + * processors clear them as well. + */ + int n = 32 + (expdiff % 32); + temp1.d = floatx80_scalbn(temp1.d, expdiff - n, &env->fp_status); + ST0 = floatx80_mod(ST0, temp1.d, &env->fp_status); + env->fpus |= 0x400; /* C2 <-- 1 */ + } + } + merge_exception_flags(env, old_flags); +} + +void helper_fprem1(CPUX86State *env) +{ + helper_fprem_common(env, false); +} + +void helper_fprem(CPUX86State *env) +{ + helper_fprem_common(env, true); +} + +/* 128-bit significand of log2(e). */ +#define log2_e_sig_high 0xb8aa3b295c17f0bbULL +#define log2_e_sig_low 0xbe87fed0691d3e89ULL + +/* + * Polynomial coefficients for an approximation to log2((1+x)/(1-x)), + * with only odd powers of x used, for x in the interval [2*sqrt(2)-3, + * 3-2*sqrt(2)], which corresponds to logarithms of numbers in the + * interval [sqrt(2)/2, sqrt(2)]. + */ +#define fyl2x_coeff_0 make_floatx80(0x4000, 0xb8aa3b295c17f0bcULL) +#define fyl2x_coeff_0_low make_floatx80(0xbfbf, 0x834972fe2d7bab1bULL) +#define fyl2x_coeff_1 make_floatx80(0x3ffe, 0xf6384ee1d01febb8ULL) +#define fyl2x_coeff_2 make_floatx80(0x3ffe, 0x93bb62877cdfa2e3ULL) +#define fyl2x_coeff_3 make_floatx80(0x3ffd, 0xd30bb153d808f269ULL) +#define fyl2x_coeff_4 make_floatx80(0x3ffd, 0xa42589eaf451499eULL) +#define fyl2x_coeff_5 make_floatx80(0x3ffd, 0x864d42c0f8f17517ULL) +#define fyl2x_coeff_6 make_floatx80(0x3ffc, 0xe3476578adf26272ULL) +#define fyl2x_coeff_7 make_floatx80(0x3ffc, 0xc506c5f874e6d80fULL) +#define fyl2x_coeff_8 make_floatx80(0x3ffc, 0xac5cf50cc57d6372ULL) +#define fyl2x_coeff_9 make_floatx80(0x3ffc, 0xb1ed0066d971a103ULL) + +/* + * Compute an approximation of log2(1+arg), where 1+arg is in the + * interval [sqrt(2)/2, sqrt(2)]. It is assumed that when this + * function is called, rounding precision is set to 80 and the + * round-to-nearest mode is in effect. arg must not be exactly zero, + * and must not be so close to zero that underflow might occur. + */ +static void helper_fyl2x_common(CPUX86State *env, floatx80 arg, int32_t *exp, + uint64_t *sig0, uint64_t *sig1) +{ + uint64_t arg0_sig = extractFloatx80Frac(arg); + int32_t arg0_exp = extractFloatx80Exp(arg); + bool arg0_sign = extractFloatx80Sign(arg); + bool asign; + int32_t dexp, texp, aexp; + uint64_t dsig0, dsig1, tsig0, tsig1, rsig0, rsig1, rsig2; + uint64_t msig0, msig1, msig2, t2sig0, t2sig1, t2sig2, t2sig3; + uint64_t asig0, asig1, asig2, asig3, bsig0, bsig1; + floatx80 t2, accum; + + /* + * Compute an approximation of arg/(2+arg), with extra precision, + * as the argument to a polynomial approximation. The extra + * precision is only needed for the first term of the + * approximation, with subsequent terms being significantly + * smaller; the approximation only uses odd exponents, and the + * square of arg/(2+arg) is at most 17-12*sqrt(2) = 0.029.... + */ + if (arg0_sign) { + dexp = 0x3fff; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + sub128(0, 0, dsig0, dsig1, &dsig0, &dsig1); + } else { + dexp = 0x4000; + shift128RightJamming(arg0_sig, 0, dexp - arg0_exp, &dsig0, &dsig1); + dsig0 |= 0x8000000000000000ULL; + } + texp = arg0_exp - dexp + 0x3ffe; + rsig0 = arg0_sig; + rsig1 = 0; + rsig2 = 0; + if (dsig0 <= rsig0) { + shift128Right(rsig0, rsig1, 1, &rsig0, &rsig1); + ++texp; + } + tsig0 = estimateDiv128To64(rsig0, rsig1, dsig0); + mul128By64To192(dsig0, dsig1, tsig0, &msig0, &msig1, &msig2); + sub192(rsig0, rsig1, rsig2, msig0, msig1, msig2, + &rsig0, &rsig1, &rsig2); + while ((int64_t) rsig0 < 0) { + --tsig0; + add192(rsig0, rsig1, rsig2, 0, dsig0, dsig1, + &rsig0, &rsig1, &rsig2); + } + tsig1 = estimateDiv128To64(rsig1, rsig2, dsig0); + /* + * No need to correct any estimation error in tsig1; even with + * such error, it is accurate enough. Now compute the square of + * that approximation. + */ + mul128To256(tsig0, tsig1, tsig0, tsig1, + &t2sig0, &t2sig1, &t2sig2, &t2sig3); + t2 = normalizeRoundAndPackFloatx80(floatx80_precision_x, false, + texp + texp - 0x3ffe, + t2sig0, t2sig1, &env->fp_status); + + /* Compute the lower parts of the polynomial expansion. */ + accum = floatx80_mul(fyl2x_coeff_9, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_8, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_7, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_6, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_5, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_4, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_3, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_2, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_1, accum, &env->fp_status); + accum = floatx80_mul(accum, t2, &env->fp_status); + accum = floatx80_add(fyl2x_coeff_0_low, accum, &env->fp_status); + + /* + * The full polynomial expansion is fyl2x_coeff_0 + accum (where + * accum has much lower magnitude, and so, in particular, carry + * out of the addition is not possible), multiplied by t. (This + * expansion is only accurate to about 70 bits, not 128 bits.) + */ + aexp = extractFloatx80Exp(fyl2x_coeff_0); + asign = extractFloatx80Sign(fyl2x_coeff_0); + shift128RightJamming(extractFloatx80Frac(accum), 0, + aexp - extractFloatx80Exp(accum), + &asig0, &asig1); + bsig0 = extractFloatx80Frac(fyl2x_coeff_0); + bsig1 = 0; + if (asign == extractFloatx80Sign(accum)) { + add128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } else { + sub128(bsig0, bsig1, asig0, asig1, &asig0, &asig1); + } + /* Multiply by t to compute the required result. */ + mul128To256(asig0, asig1, tsig0, tsig1, + &asig0, &asig1, &asig2, &asig3); + aexp += texp - 0x3ffe; + *exp = aexp; + *sig0 = asig0; + *sig1 = asig1; +} + +void helper_fyl2xp1(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_exp > 0x3ffd || + (arg0_exp == 0x3ffd && arg0_sig > (arg0_sign ? + 0x95f619980c4336f7ULL : + 0xd413cccfe7799211ULL))) { + /* + * Out of range for the instruction (ST0 must have absolute + * value less than 1 - sqrt(2)/2 = 0.292..., according to + * Intel manuals; AMD manuals allow a range from sqrt(2)/2 - 1 + * to sqrt(2) - 1, which we allow here), treat as invalid. + */ + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_zero(ST0) || floatx80_is_zero(ST1) || + arg1_exp == 0x7fff) { + /* + * One argument is zero, or multiplying by infinity; correct + * result is exact and can be obtained by multiplying the + * arguments. + */ + ST1 = floatx80_mul(ST0, ST1, &env->fp_status); + } else if (arg0_exp < 0x3fb0) { + /* + * Multiplying both arguments and an extra-precision version + * of log2(e) is sufficiently precise. + */ + uint64_t sig0, sig1, sig2; + int32_t exp; + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(log2_e_sig_high, log2_e_sig_low, arg0_sig, + &sig0, &sig1, &sig2); + exp = arg0_exp + 1; + mul128By64To192(sig0, sig1, arg1_sig, &sig0, &sig1, &sig2); + exp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + sig1 |= 1; + ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + arg0_sign ^ arg1_sign, exp, + sig0, sig1, &env->fp_status); + } else { + int32_t aexp; + uint64_t asig0, asig1, asig2; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + FloatX80RoundPrec save_prec = + env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + + helper_fyl2x_common(env, ST0, &aexp, &asig0, &asig1); + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + arg0_sign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save_prec; + } + fpop(env); + merge_exception_flags(env, old_flags); +} + +void helper_fyl2x(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + uint64_t arg0_sig = extractFloatx80Frac(ST0); + int32_t arg0_exp = extractFloatx80Exp(ST0); + bool arg0_sign = extractFloatx80Sign(ST0); + uint64_t arg1_sig = extractFloatx80Frac(ST1); + int32_t arg1_exp = extractFloatx80Exp(ST1); + bool arg1_sign = extractFloatx80Sign(ST1); + + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST0, &env->fp_status); + } else if (floatx80_is_signaling_nan(ST1, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_silence_nan(ST1, &env->fp_status); + } else if (floatx80_invalid_encoding(ST0) || + floatx80_invalid_encoding(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST0)) { + ST1 = ST0; + } else if (floatx80_is_any_nan(ST1)) { + /* Pass this NaN through. */ + } else if (arg0_sign && !floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_infinity(ST1)) { + FloatRelation cmp = floatx80_compare(ST0, floatx80_one, + &env->fp_status); + switch (cmp) { + case float_relation_less: + ST1 = floatx80_chs(ST1); + break; + case float_relation_greater: + /* Result is infinity of the same sign as ST1. */ + break; + default: + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + break; + } + } else if (floatx80_is_infinity(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else if (arg1_sign) { + ST1 = floatx80_chs(ST0); + } else { + ST1 = ST0; + } + } else if (floatx80_is_zero(ST0)) { + if (floatx80_is_zero(ST1)) { + float_raise(float_flag_invalid, &env->fp_status); + ST1 = floatx80_default_nan(&env->fp_status); + } else { + /* Result is infinity with opposite sign to ST1. */ + float_raise(float_flag_divbyzero, &env->fp_status); + ST1 = make_floatx80(arg1_sign ? 0x7fff : 0xffff, + 0x8000000000000000ULL); + } + } else if (floatx80_is_zero(ST1)) { + if (floatx80_lt(ST0, floatx80_one, &env->fp_status)) { + ST1 = floatx80_chs(ST1); + } + /* Otherwise, ST1 is already the correct result. */ + } else if (floatx80_eq(ST0, floatx80_one, &env->fp_status)) { + if (arg1_sign) { + ST1 = floatx80_chs(floatx80_zero); + } else { + ST1 = floatx80_zero; + } + } else { + int32_t int_exp; + floatx80 arg0_m1; + FloatRoundMode save_mode = env->fp_status.float_rounding_mode; + FloatX80RoundPrec save_prec = + env->fp_status.floatx80_rounding_precision; + env->fp_status.float_rounding_mode = float_round_nearest_even; + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + + if (arg0_exp == 0) { + normalizeFloatx80Subnormal(arg0_sig, &arg0_exp, &arg0_sig); + } + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + int_exp = arg0_exp - 0x3fff; + if (arg0_sig > 0xb504f333f9de6484ULL) { + ++int_exp; + } + arg0_m1 = floatx80_sub(floatx80_scalbn(ST0, -int_exp, + &env->fp_status), + floatx80_one, &env->fp_status); + if (floatx80_is_zero(arg0_m1)) { + /* Exact power of 2; multiply by ST1. */ + env->fp_status.float_rounding_mode = save_mode; + ST1 = floatx80_mul(int32_to_floatx80(int_exp, &env->fp_status), + ST1, &env->fp_status); + } else { + bool asign = extractFloatx80Sign(arg0_m1); + int32_t aexp; + uint64_t asig0, asig1, asig2; + helper_fyl2x_common(env, arg0_m1, &aexp, &asig0, &asig1); + if (int_exp != 0) { + bool isign = (int_exp < 0); + int32_t iexp; + uint64_t isig; + int shift; + int_exp = isign ? -int_exp : int_exp; + shift = clz32(int_exp) + 32; + isig = int_exp; + isig <<= shift; + iexp = 0x403e - shift; + shift128RightJamming(asig0, asig1, iexp - aexp, + &asig0, &asig1); + if (asign == isign) { + add128(isig, 0, asig0, asig1, &asig0, &asig1); + } else { + sub128(isig, 0, asig0, asig1, &asig0, &asig1); + } + aexp = iexp; + asign = isign; + } + /* + * Multiply by the second argument to compute the required + * result. + */ + if (arg1_exp == 0) { + normalizeFloatx80Subnormal(arg1_sig, &arg1_exp, &arg1_sig); + } + mul128By64To192(asig0, asig1, arg1_sig, &asig0, &asig1, &asig2); + aexp += arg1_exp - 0x3ffe; + /* This result is inexact. */ + asig1 |= 1; + env->fp_status.float_rounding_mode = save_mode; + ST1 = normalizeRoundAndPackFloatx80(floatx80_precision_x, + asign ^ arg1_sign, aexp, + asig0, asig1, &env->fp_status); + } + + env->fp_status.floatx80_rounding_precision = save_prec; + } + fpop(env); + merge_exception_flags(env, old_flags); +} + +void helper_fsqrt(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + if (floatx80_is_neg(ST0)) { + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + env->fpus |= 0x400; + } + ST0 = floatx80_sqrt(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fsincos(CPUX86State *env) +{ + double fptemp = floatx80_to_double(env, ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(env, sin(fptemp)); + fpush(env); + ST0 = double_to_floatx80(env, cos(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_frndint(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + ST0 = floatx80_round_to_int(ST0, &env->fp_status); + merge_exception_flags(env, old_flags); +} + +void helper_fscale(CPUX86State *env) +{ + uint8_t old_flags = save_exception_flags(env); + if (floatx80_invalid_encoding(ST1) || floatx80_invalid_encoding(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else if (floatx80_is_any_nan(ST1)) { + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + } + ST0 = ST1; + if (floatx80_is_signaling_nan(ST0, &env->fp_status)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_silence_nan(ST0, &env->fp_status); + } + } else if (floatx80_is_infinity(ST1) && + !floatx80_invalid_encoding(ST0) && + !floatx80_is_any_nan(ST0)) { + if (floatx80_is_neg(ST1)) { + if (floatx80_is_infinity(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_zero) : + floatx80_zero); + } + } else { + if (floatx80_is_zero(ST0)) { + float_raise(float_flag_invalid, &env->fp_status); + ST0 = floatx80_default_nan(&env->fp_status); + } else { + ST0 = (floatx80_is_neg(ST0) ? + floatx80_chs(floatx80_infinity) : + floatx80_infinity); + } + } + } else { + int n; + FloatX80RoundPrec save = env->fp_status.floatx80_rounding_precision; + uint8_t save_flags = get_float_exception_flags(&env->fp_status); + set_float_exception_flags(0, &env->fp_status); + n = floatx80_to_int32_round_to_zero(ST1, &env->fp_status); + set_float_exception_flags(save_flags, &env->fp_status); + env->fp_status.floatx80_rounding_precision = floatx80_precision_x; + ST0 = floatx80_scalbn(ST0, n, &env->fp_status); + env->fp_status.floatx80_rounding_precision = save; + } + merge_exception_flags(env, old_flags); +} + +void helper_fsin(CPUX86State *env) +{ + double fptemp = floatx80_to_double(env, ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(env, sin(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**53 only */ + } +} + +void helper_fcos(CPUX86State *env) +{ + double fptemp = floatx80_to_double(env, ST0); + + if ((fptemp > MAXTAN) || (fptemp < -MAXTAN)) { + env->fpus |= 0x400; + } else { + ST0 = double_to_floatx80(env, cos(fptemp)); + env->fpus &= ~0x400; /* C2 <-- 0 */ + /* the above code is for |arg| < 2**63 only */ + } +} + +void helper_fxam_ST0(CPUX86State *env) +{ + CPU_LDoubleU temp; + int expdif; + + temp.d = ST0; + + env->fpus &= ~0x4700; /* (C3,C2,C1,C0) <-- 0000 */ + if (SIGND(temp)) { + env->fpus |= 0x200; /* C1 <-- 1 */ + } + + if (env->fptags[env->fpstt]) { + env->fpus |= 0x4100; /* Empty */ + return; + } + + expdif = EXPD(temp); + if (expdif == MAXEXPD) { + if (MANTD(temp) == 0x8000000000000000ULL) { + env->fpus |= 0x500; /* Infinity */ + } else if (MANTD(temp) & 0x8000000000000000ULL) { + env->fpus |= 0x100; /* NaN */ + } + } else if (expdif == 0) { + if (MANTD(temp) == 0) { + env->fpus |= 0x4000; /* Zero */ + } else { + env->fpus |= 0x4400; /* Denormal */ + } + } else if (MANTD(temp) & 0x8000000000000000ULL) { + env->fpus |= 0x400; + } +} + +static void do_fstenv(CPUX86State *env, target_ulong ptr, int data32, + uintptr_t retaddr) +{ + int fpus, fptag, exp, i; + uint64_t mant; + CPU_LDoubleU tmp; + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i = 7; i >= 0; i--) { + fptag <<= 2; + if (env->fptags[i]) { + fptag |= 3; + } else { + tmp.d = env->fpregs[i].d; + exp = EXPD(tmp); + mant = MANTD(tmp); + if (exp == 0 && mant == 0) { + /* zero */ + fptag |= 1; + } else if (exp == 0 || exp == MAXEXPD + || (mant & (1LL << 63)) == 0) { + /* NaNs, infinity, denormal */ + fptag |= 2; + } + } + } + if (data32) { + /* 32 bit */ + cpu_stl_data_ra(env, ptr, env->fpuc, retaddr); + cpu_stl_data_ra(env, ptr + 4, fpus, retaddr); + cpu_stl_data_ra(env, ptr + 8, fptag, retaddr); + cpu_stl_data_ra(env, ptr + 12, env->fpip, retaddr); /* fpip */ + cpu_stl_data_ra(env, ptr + 16, env->fpcs, retaddr); /* fpcs */ + cpu_stl_data_ra(env, ptr + 20, env->fpdp, retaddr); /* fpoo */ + cpu_stl_data_ra(env, ptr + 24, env->fpds, retaddr); /* fpos */ + } else { + /* 16 bit */ + cpu_stw_data_ra(env, ptr, env->fpuc, retaddr); + cpu_stw_data_ra(env, ptr + 2, fpus, retaddr); + cpu_stw_data_ra(env, ptr + 4, fptag, retaddr); + cpu_stw_data_ra(env, ptr + 6, env->fpip, retaddr); + cpu_stw_data_ra(env, ptr + 8, env->fpcs, retaddr); + cpu_stw_data_ra(env, ptr + 10, env->fpdp, retaddr); + cpu_stw_data_ra(env, ptr + 12, env->fpds, retaddr); + } +} + +void helper_fstenv(CPUX86State *env, target_ulong ptr, int data32) +{ + do_fstenv(env, ptr, data32, GETPC()); +} + +static void cpu_set_fpus(CPUX86State *env, uint16_t fpus) +{ + env->fpstt = (fpus >> 11) & 7; + env->fpus = fpus & ~0x3800 & ~FPUS_B; + env->fpus |= env->fpus & FPUS_SE ? FPUS_B : 0; +#if !defined(CONFIG_USER_ONLY) + if (!(env->fpus & FPUS_SE)) { + /* + * Here the processor deasserts FERR#; in response, the chipset deasserts + * IGNNE#. + */ + cpu_clear_ignne(); + } +#endif +} + +static void do_fldenv(CPUX86State *env, target_ulong ptr, int data32, + uintptr_t retaddr) +{ + int i, fpus, fptag; + + if (data32) { + cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); + fpus = cpu_lduw_data_ra(env, ptr + 4, retaddr); + fptag = cpu_lduw_data_ra(env, ptr + 8, retaddr); + } else { + cpu_set_fpuc(env, cpu_lduw_data_ra(env, ptr, retaddr)); + fpus = cpu_lduw_data_ra(env, ptr + 2, retaddr); + fptag = cpu_lduw_data_ra(env, ptr + 4, retaddr); + } + cpu_set_fpus(env, fpus); + for (i = 0; i < 8; i++) { + env->fptags[i] = ((fptag & 3) == 3); + fptag >>= 2; + } +} + +void helper_fldenv(CPUX86State *env, target_ulong ptr, int data32) +{ + do_fldenv(env, ptr, data32, GETPC()); +} + +static void do_fsave(CPUX86State *env, target_ulong ptr, int data32, + uintptr_t retaddr) +{ + floatx80 tmp; + int i; + + do_fstenv(env, ptr, data32, retaddr); + + ptr += (target_ulong)14 << data32; + for (i = 0; i < 8; i++) { + tmp = ST(i); + do_fstt(env, tmp, ptr, retaddr); + ptr += 10; + } + + do_fninit(env); +} + +void helper_fsave(CPUX86State *env, target_ulong ptr, int data32) +{ + do_fsave(env, ptr, data32, GETPC()); +} + +static void do_frstor(CPUX86State *env, target_ulong ptr, int data32, + uintptr_t retaddr) +{ + floatx80 tmp; + int i; + + do_fldenv(env, ptr, data32, retaddr); + ptr += (target_ulong)14 << data32; + + for (i = 0; i < 8; i++) { + tmp = do_fldt(env, ptr, retaddr); + ST(i) = tmp; + ptr += 10; + } +} + +void helper_frstor(CPUX86State *env, target_ulong ptr, int data32) +{ + do_frstor(env, ptr, data32, GETPC()); +} + +#define XO(X) offsetof(X86XSaveArea, X) + +static void do_xsave_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int fpus, fptag, i; + target_ulong addr; + + fpus = (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + fptag = 0; + for (i = 0; i < 8; i++) { + fptag |= (env->fptags[i] << i); + } + + cpu_stw_data_ra(env, ptr + XO(legacy.fcw), env->fpuc, ra); + cpu_stw_data_ra(env, ptr + XO(legacy.fsw), fpus, ra); + cpu_stw_data_ra(env, ptr + XO(legacy.ftw), fptag ^ 0xff, ra); + + /* In 32-bit mode this is eip, sel, dp, sel. + In 64-bit mode this is rip, rdp. + But in either case we don't write actual data, just zeros. */ + cpu_stq_data_ra(env, ptr + XO(legacy.fpip), 0, ra); /* eip+sel; rip */ + cpu_stq_data_ra(env, ptr + XO(legacy.fpdp), 0, ra); /* edp+sel; rdp */ + + addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { + floatx80 tmp = ST(i); + do_fstt(env, tmp, addr, ra); + addr += 16; + } +} + +static void do_xsave_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + update_mxcsr_from_sse_status(env); + cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr), env->mxcsr, ra); + cpu_stl_data_ra(env, ptr + XO(legacy.mxcsr_mask), 0x0000ffff, ra); +} + +static void do_xsave_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, nb_xmm_regs; + target_ulong addr; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + addr = ptr + XO(legacy.xmm_regs); + for (i = 0; i < nb_xmm_regs; i++) { + cpu_stq_data_ra(env, addr, env->xmm_regs[i].ZMM_Q(0), ra); + cpu_stq_data_ra(env, addr + 8, env->xmm_regs[i].ZMM_Q(1), ra); + addr += 16; + } +} + +static void do_xsave_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { + cpu_stq_data_ra(env, ptr, env->xmm_regs[i].ZMM_Q(2), ra); + cpu_stq_data_ra(env, ptr + 8, env->xmm_regs[i].ZMM_Q(3), ra); + } +} + +static void do_xsave_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); + int i; + + for (i = 0; i < 4; i++, addr += 16) { + cpu_stq_data_ra(env, addr, env->bnd_regs[i].lb, ra); + cpu_stq_data_ra(env, addr + 8, env->bnd_regs[i].ub, ra); + } +} + +static void do_xsave_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), + env->bndcs_regs.cfgu, ra); + cpu_stq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), + env->bndcs_regs.sts, ra); +} + +static void do_xsave_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + cpu_stq_data_ra(env, ptr, env->pkru, ra); +} + +static void do_fxsave(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + do_xsave_fpu(env, ptr, ra); + + if (env->cr[4] & CR4_OSFXSR_MASK) { + do_xsave_mxcsr(env, ptr, ra); + /* Fast FXSAVE leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + do_xsave_sse(env, ptr, ra); + } + } +} + +void helper_fxsave(CPUX86State *env, target_ulong ptr) +{ + do_fxsave(env, ptr, GETPC()); +} + +static uint64_t get_xinuse(CPUX86State *env) +{ + uint64_t inuse = -1; + + /* For the most part, we don't track XINUSE. We could calculate it + here for all components, but it's probably less work to simply + indicate in use. That said, the state of BNDREGS is important + enough to track in HFLAGS, so we might as well use that here. */ + if ((env->hflags & HF_MPX_IU_MASK) == 0) { + inuse &= ~XSTATE_BNDREGS_MASK; + } + return inuse; +} + +static void do_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm, + uint64_t inuse, uint64_t opt, uintptr_t ra) +{ + uint64_t old_bv, new_bv; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Never save anything not enabled by XCR0. */ + rfbm &= env->xcr0; + opt &= rfbm; + + if (opt & XSTATE_FP_MASK) { + do_xsave_fpu(env, ptr, ra); + } + if (rfbm & XSTATE_SSE_MASK) { + /* Note that saving MXCSR is not suppressed by XSAVEOPT. */ + do_xsave_mxcsr(env, ptr, ra); + } + if (opt & XSTATE_SSE_MASK) { + do_xsave_sse(env, ptr, ra); + } + if (opt & XSTATE_YMM_MASK) { + do_xsave_ymmh(env, ptr + XO(avx_state), ra); + } + if (opt & XSTATE_BNDREGS_MASK) { + do_xsave_bndregs(env, ptr + XO(bndreg_state), ra); + } + if (opt & XSTATE_BNDCSR_MASK) { + do_xsave_bndcsr(env, ptr + XO(bndcsr_state), ra); + } + if (opt & XSTATE_PKRU_MASK) { + do_xsave_pkru(env, ptr + XO(pkru_state), ra); + } + + /* Update the XSTATE_BV field. */ + old_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + new_bv = (old_bv & ~rfbm) | (inuse & rfbm); + cpu_stq_data_ra(env, ptr + XO(header.xstate_bv), new_bv, ra); +} + +void helper_xsave(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + do_xsave(env, ptr, rfbm, get_xinuse(env), -1, GETPC()); +} + +void helper_xsaveopt(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + uint64_t inuse = get_xinuse(env); + do_xsave(env, ptr, rfbm, inuse, inuse, GETPC()); +} + +static void do_xrstor_fpu(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, fpuc, fpus, fptag; + target_ulong addr; + + fpuc = cpu_lduw_data_ra(env, ptr + XO(legacy.fcw), ra); + fpus = cpu_lduw_data_ra(env, ptr + XO(legacy.fsw), ra); + fptag = cpu_lduw_data_ra(env, ptr + XO(legacy.ftw), ra); + cpu_set_fpuc(env, fpuc); + cpu_set_fpus(env, fpus); + fptag ^= 0xff; + for (i = 0; i < 8; i++) { + env->fptags[i] = ((fptag >> i) & 1); + } + + addr = ptr + XO(legacy.fpregs); + for (i = 0; i < 8; i++) { + floatx80 tmp = do_fldt(env, addr, ra); + ST(i) = tmp; + addr += 16; + } +} + +static void do_xrstor_mxcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + cpu_set_mxcsr(env, cpu_ldl_data_ra(env, ptr + XO(legacy.mxcsr), ra)); +} + +static void do_xrstor_sse(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, nb_xmm_regs; + target_ulong addr; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + addr = ptr + XO(legacy.xmm_regs); + for (i = 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].ZMM_Q(0) = cpu_ldq_data_ra(env, addr, ra); + env->xmm_regs[i].ZMM_Q(1) = cpu_ldq_data_ra(env, addr + 8, ra); + addr += 16; + } +} + +static void do_clear_sse(CPUX86State *env) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + for (i = 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].ZMM_Q(0) = 0; + env->xmm_regs[i].ZMM_Q(1) = 0; + } +} + +static void do_xrstor_ymmh(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + for (i = 0; i < nb_xmm_regs; i++, ptr += 16) { + env->xmm_regs[i].ZMM_Q(2) = cpu_ldq_data_ra(env, ptr, ra); + env->xmm_regs[i].ZMM_Q(3) = cpu_ldq_data_ra(env, ptr + 8, ra); + } +} + +static void do_clear_ymmh(CPUX86State *env) +{ + int i, nb_xmm_regs; + + if (env->hflags & HF_CS64_MASK) { + nb_xmm_regs = 16; + } else { + nb_xmm_regs = 8; + } + + for (i = 0; i < nb_xmm_regs; i++) { + env->xmm_regs[i].ZMM_Q(2) = 0; + env->xmm_regs[i].ZMM_Q(3) = 0; + } +} + +static void do_xrstor_bndregs(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + target_ulong addr = ptr + offsetof(XSaveBNDREG, bnd_regs); + int i; + + for (i = 0; i < 4; i++, addr += 16) { + env->bnd_regs[i].lb = cpu_ldq_data_ra(env, addr, ra); + env->bnd_regs[i].ub = cpu_ldq_data_ra(env, addr + 8, ra); + } +} + +static void do_xrstor_bndcsr(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* FIXME: Extend highest implemented bit of linear address. */ + env->bndcs_regs.cfgu + = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.cfgu), ra); + env->bndcs_regs.sts + = cpu_ldq_data_ra(env, ptr + offsetof(XSaveBNDCSR, bndcsr.sts), ra); +} + +static void do_xrstor_pkru(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + env->pkru = cpu_ldq_data_ra(env, ptr, ra); +} + +static void do_fxrstor(CPUX86State *env, target_ulong ptr, uintptr_t ra) +{ + /* The operand must be 16 byte aligned */ + if (ptr & 0xf) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + do_xrstor_fpu(env, ptr, ra); + + if (env->cr[4] & CR4_OSFXSR_MASK) { + do_xrstor_mxcsr(env, ptr, ra); + /* Fast FXRSTOR leaves out the XMM registers */ + if (!(env->efer & MSR_EFER_FFXSR) + || (env->hflags & HF_CPL_MASK) + || !(env->hflags & HF_LMA_MASK)) { + do_xrstor_sse(env, ptr, ra); + } + } +} + +void helper_fxrstor(CPUX86State *env, target_ulong ptr) +{ + do_fxrstor(env, ptr, GETPC()); +} + +static void do_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm, uintptr_t ra) +{ + uint64_t xstate_bv, xcomp_bv, reserve0; + + rfbm &= env->xcr0; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, ra); + } + + /* The operand must be 64 byte aligned. */ + if (ptr & 63) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + xstate_bv = cpu_ldq_data_ra(env, ptr + XO(header.xstate_bv), ra); + + if ((int64_t)xstate_bv < 0) { + /* FIXME: Compact form. */ + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* Standard form. */ + + /* The XSTATE_BV field must not set bits not present in XCR0. */ + if (xstate_bv & ~env->xcr0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + /* The XCOMP_BV field must be zero. Note that, as of the April 2016 + revision, the description of the XSAVE Header (Vol 1, Sec 13.4.2) + describes only XCOMP_BV, but the description of the standard form + of XRSTOR (Vol 1, Sec 13.8.1) checks bytes 23:8 for zero, which + includes the next 64-bit field. */ + xcomp_bv = cpu_ldq_data_ra(env, ptr + XO(header.xcomp_bv), ra); + reserve0 = cpu_ldq_data_ra(env, ptr + XO(header.reserve0), ra); + if (xcomp_bv || reserve0) { + raise_exception_ra(env, EXCP0D_GPF, ra); + } + + if (rfbm & XSTATE_FP_MASK) { + if (xstate_bv & XSTATE_FP_MASK) { + do_xrstor_fpu(env, ptr, ra); + } else { + do_fninit(env); + memset(env->fpregs, 0, sizeof(env->fpregs)); + } + } + if (rfbm & XSTATE_SSE_MASK) { + /* Note that the standard form of XRSTOR loads MXCSR from memory + whether or not the XSTATE_BV bit is set. */ + do_xrstor_mxcsr(env, ptr, ra); + if (xstate_bv & XSTATE_SSE_MASK) { + do_xrstor_sse(env, ptr, ra); + } else { + do_clear_sse(env); + } + } + if (rfbm & XSTATE_YMM_MASK) { + if (xstate_bv & XSTATE_YMM_MASK) { + do_xrstor_ymmh(env, ptr + XO(avx_state), ra); + } else { + do_clear_ymmh(env); + } + } + if (rfbm & XSTATE_BNDREGS_MASK) { + if (xstate_bv & XSTATE_BNDREGS_MASK) { + do_xrstor_bndregs(env, ptr + XO(bndreg_state), ra); + env->hflags |= HF_MPX_IU_MASK; + } else { + memset(env->bnd_regs, 0, sizeof(env->bnd_regs)); + env->hflags &= ~HF_MPX_IU_MASK; + } + } + if (rfbm & XSTATE_BNDCSR_MASK) { + if (xstate_bv & XSTATE_BNDCSR_MASK) { + do_xrstor_bndcsr(env, ptr + XO(bndcsr_state), ra); + } else { + memset(&env->bndcs_regs, 0, sizeof(env->bndcs_regs)); + } + cpu_sync_bndcs_hflags(env); + } + if (rfbm & XSTATE_PKRU_MASK) { + uint64_t old_pkru = env->pkru; + if (xstate_bv & XSTATE_PKRU_MASK) { + do_xrstor_pkru(env, ptr + XO(pkru_state), ra); + } else { + env->pkru = 0; + } + if (env->pkru != old_pkru) { + CPUState *cs = env_cpu(env); + tlb_flush(cs); + } + } +} + +#undef XO + +void helper_xrstor(CPUX86State *env, target_ulong ptr, uint64_t rfbm) +{ + do_xrstor(env, ptr, rfbm, GETPC()); +} + +#if defined(CONFIG_USER_ONLY) +void cpu_x86_fsave(CPUX86State *env, target_ulong ptr, int data32) +{ + do_fsave(env, ptr, data32, 0); +} + +void cpu_x86_frstor(CPUX86State *env, target_ulong ptr, int data32) +{ + do_frstor(env, ptr, data32, 0); +} + +void cpu_x86_fxsave(CPUX86State *env, target_ulong ptr) +{ + do_fxsave(env, ptr, 0); +} + +void cpu_x86_fxrstor(CPUX86State *env, target_ulong ptr) +{ + do_fxrstor(env, ptr, 0); +} + +void cpu_x86_xsave(CPUX86State *env, target_ulong ptr) +{ + do_xsave(env, ptr, -1, get_xinuse(env), -1, 0); +} + +void cpu_x86_xrstor(CPUX86State *env, target_ulong ptr) +{ + do_xrstor(env, ptr, -1, 0); +} +#endif + +uint64_t helper_xgetbv(CPUX86State *env, uint32_t ecx) +{ + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + switch (ecx) { + case 0: + return env->xcr0; + case 1: + if (env->features[FEAT_XSAVE] & CPUID_XSAVE_XGETBV1) { + return env->xcr0 & get_xinuse(env); + } + break; + } + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + +void helper_xsetbv(CPUX86State *env, uint32_t ecx, uint64_t mask) +{ + uint32_t dummy, ena_lo, ena_hi; + uint64_t ena; + + /* The OS must have enabled XSAVE. */ + if (!(env->cr[4] & CR4_OSXSAVE_MASK)) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } + + /* Only XCR0 is defined at present; the FPU may not be disabled. */ + if (ecx != 0 || (mask & XSTATE_FP_MASK) == 0) { + goto do_gpf; + } + + /* Disallow enabling unimplemented features. */ + cpu_x86_cpuid(env, 0x0d, 0, &ena_lo, &dummy, &dummy, &ena_hi); + ena = ((uint64_t)ena_hi << 32) | ena_lo; + if (mask & ~ena) { + goto do_gpf; + } + + /* Disallow enabling only half of MPX. */ + if ((mask ^ (mask * (XSTATE_BNDCSR_MASK / XSTATE_BNDREGS_MASK))) + & XSTATE_BNDCSR_MASK) { + goto do_gpf; + } + + env->xcr0 = mask; + cpu_sync_bndcs_hflags(env); + cpu_sync_avx_hflag(env); + return; + + do_gpf: + raise_exception_ra(env, EXCP0D_GPF, GETPC()); +} + +/* MMX/SSE */ +/* XXX: optimize by storing fptt and fptags in the static cpu state */ + +#define SSE_DAZ 0x0040 +#define SSE_RC_SHIFT 13 +#define SSE_RC_MASK (3 << SSE_RC_SHIFT) +#define SSE_FZ 0x8000 + +void update_mxcsr_status(CPUX86State *env) +{ + uint32_t mxcsr = env->mxcsr; + int rnd_type; + + /* set rounding mode */ + rnd_type = (mxcsr & SSE_RC_MASK) >> SSE_RC_SHIFT; + set_x86_rounding_mode(rnd_type, &env->sse_status); + + /* Set exception flags. */ + set_float_exception_flags((mxcsr & FPUS_IE ? float_flag_invalid : 0) | + (mxcsr & FPUS_ZE ? float_flag_divbyzero : 0) | + (mxcsr & FPUS_OE ? float_flag_overflow : 0) | + (mxcsr & FPUS_UE ? float_flag_underflow : 0) | + (mxcsr & FPUS_PE ? float_flag_inexact : 0), + &env->sse_status); + + /* set denormals are zero */ + set_flush_inputs_to_zero((mxcsr & SSE_DAZ) ? 1 : 0, &env->sse_status); + + /* set flush to zero */ + set_flush_to_zero((mxcsr & SSE_FZ) ? 1 : 0, &env->sse_status); +} + +void update_mxcsr_from_sse_status(CPUX86State *env) +{ + uint8_t flags = get_float_exception_flags(&env->sse_status); + /* + * The MXCSR denormal flag has opposite semantics to + * float_flag_input_denormal (the softfloat code sets that flag + * only when flushing input denormals to zero, but SSE sets it + * only when not flushing them to zero), so is not converted + * here. + */ + env->mxcsr |= ((flags & float_flag_invalid ? FPUS_IE : 0) | + (flags & float_flag_divbyzero ? FPUS_ZE : 0) | + (flags & float_flag_overflow ? FPUS_OE : 0) | + (flags & float_flag_underflow ? FPUS_UE : 0) | + (flags & float_flag_inexact ? FPUS_PE : 0) | + (flags & float_flag_output_denormal ? FPUS_UE | FPUS_PE : + 0)); +} + +void helper_update_mxcsr(CPUX86State *env) +{ + update_mxcsr_from_sse_status(env); +} + +void helper_ldmxcsr(CPUX86State *env, uint32_t val) +{ + cpu_set_mxcsr(env, val); +} + +void helper_enter_mmx(CPUX86State *env) +{ + env->fpstt = 0; + *(uint32_t *)(env->fptags) = 0; + *(uint32_t *)(env->fptags + 4) = 0; +} + +void helper_emms(CPUX86State *env) +{ + /* set to empty state */ + *(uint32_t *)(env->fptags) = 0x01010101; + *(uint32_t *)(env->fptags + 4) = 0x01010101; +} + +#define SHIFT 0 +#include "ops_sse.h" + +#define SHIFT 1 +#include "ops_sse.h" + +#define SHIFT 2 +#include "ops_sse.h" diff --git a/target/i386/tcg/helper-tcg.h b/target/i386/tcg/helper-tcg.h new file mode 100644 index 0000000000..effc2c1c98 --- /dev/null +++ b/target/i386/tcg/helper-tcg.h @@ -0,0 +1,117 @@ +/* + * TCG specific prototypes for helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef I386_HELPER_TCG_H +#define I386_HELPER_TCG_H + +#include "exec/exec-all.h" + +/* Maximum instruction code size */ +#define TARGET_MAX_INSN_SIZE 16 + +#if defined(TARGET_X86_64) +# define TCG_PHYS_ADDR_BITS 40 +#else +# define TCG_PHYS_ADDR_BITS 36 +#endif + +QEMU_BUILD_BUG_ON(TCG_PHYS_ADDR_BITS > TARGET_PHYS_ADDR_SPACE_BITS); + +/** + * x86_cpu_do_interrupt: + * @cpu: vCPU the interrupt is to be handled by. + */ +void x86_cpu_do_interrupt(CPUState *cpu); +#ifndef CONFIG_USER_ONLY +void x86_cpu_exec_halt(CPUState *cpu); +bool x86_need_replay_interrupt(int interrupt_request); +bool x86_cpu_exec_interrupt(CPUState *cpu, int int_req); +#endif + +void breakpoint_handler(CPUState *cs); + +/* n must be a constant to be efficient */ +static inline target_long lshift(target_long x, int n) +{ + if (n >= 0) { + return x << n; + } else { + return x >> (-n); + } +} + +/* translate.c */ +void tcg_x86_init(void); + +/* excp_helper.c */ +G_NORETURN void raise_exception(CPUX86State *env, int exception_index); +G_NORETURN void raise_exception_ra(CPUX86State *env, int exception_index, + uintptr_t retaddr); +G_NORETURN void raise_exception_err(CPUX86State *env, int exception_index, + int error_code); +G_NORETURN void raise_exception_err_ra(CPUX86State *env, int exception_index, + int error_code, uintptr_t retaddr); +G_NORETURN void raise_interrupt(CPUX86State *nenv, int intno, int next_eip_addend); +G_NORETURN void handle_unaligned_access(CPUX86State *env, vaddr vaddr, + MMUAccessType access_type, + uintptr_t retaddr); +#ifdef CONFIG_USER_ONLY +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra); +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr, + MMUAccessType access_type, uintptr_t ra); +#else +bool x86_cpu_tlb_fill(CPUState *cs, vaddr address, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr); +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr); +#endif + +/* cc_helper.c */ +extern const uint8_t parity_table[256]; + +/* misc_helper.c */ +void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask); +G_NORETURN void do_pause(CPUX86State *env); + +/* sysemu/svm_helper.c */ +#ifndef CONFIG_USER_ONLY +G_NORETURN void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, + uint64_t exit_info_1, uintptr_t retaddr); +void do_vmexit(CPUX86State *env); +#endif + +/* seg_helper.c */ +void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw); +void do_interrupt_all(X86CPU *cpu, int intno, int is_int, + int error_code, target_ulong next_eip, int is_hw); +void handle_even_inj(CPUX86State *env, int intno, int is_int, + int error_code, int is_hw, int rm); +int exception_has_error_code(int intno); + +/* smm_helper.c */ +void do_smm_enter(X86CPU *cpu); + +/* bpt_helper.c */ +bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update); + +#endif /* I386_HELPER_TCG_H */ diff --git a/target/i386/int_helper.c b/target/i386/tcg/int_helper.c index 4dc5c65991..ab85dc5540 100644 --- a/target/i386/int_helper.c +++ b/target/i386/tcg/int_helper.c @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,10 +18,14 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "exec/exec-all.h" #include "qemu/host-utils.h" #include "exec/helper-proto.h" +#include "qapi/error.h" +#include "qemu/guest-random.h" +#include "helper-tcg.h" //#define DEBUG_MULDIV @@ -186,7 +190,7 @@ void helper_aaa(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -210,7 +214,7 @@ void helper_aas(CPUX86State *env) int al, ah, af; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; ah = (env->regs[R_EAX] >> 8) & 0xff; @@ -233,7 +237,7 @@ void helper_daa(CPUX86State *env) int old_al, al, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; old_al = al = env->regs[R_EAX] & 0xff; @@ -260,7 +264,7 @@ void helper_das(CPUX86State *env) int al, al1, af, cf; int eflags; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); cf = eflags & CC_C; af = eflags & CC_A; al = env->regs[R_EAX] & 0xff; @@ -444,20 +448,20 @@ target_ulong helper_pext(target_ulong src, target_ulong mask) } #define SHIFT 0 -#include "shift_helper_template.h" +#include "shift_helper_template.h.inc" #undef SHIFT #define SHIFT 1 -#include "shift_helper_template.h" +#include "shift_helper_template.h.inc" #undef SHIFT #define SHIFT 2 -#include "shift_helper_template.h" +#include "shift_helper_template.h.inc" #undef SHIFT #ifdef TARGET_X86_64 #define SHIFT 3 -#include "shift_helper_template.h" +#include "shift_helper_template.h.inc" #undef SHIFT #endif @@ -470,3 +474,22 @@ void helper_cr4_testbit(CPUX86State *env, uint32_t bit) raise_exception_ra(env, EXCP06_ILLOP, GETPC()); } } + +target_ulong HELPER(rdrand)(CPUX86State *env) +{ + Error *err = NULL; + target_ulong ret; + + if (qemu_guest_getrandom(&ret, sizeof(ret), &err) < 0) { + qemu_log_mask(LOG_UNIMP, "rdrand: Crypto failure: %s", + error_get_pretty(err)); + error_free(err); + /* Failure clears CF and all other flags, and returns 0. */ + env->cc_src = 0; + return 0; + } + + /* Success sets CF and clears all others. */ + env->cc_src = CC_C; + return ret; +} diff --git a/target/i386/tcg/mem_helper.c b/target/i386/tcg/mem_helper.c new file mode 100644 index 0000000000..3ef84e90d9 --- /dev/null +++ b/target/i386/tcg/mem_helper.c @@ -0,0 +1,57 @@ +/* + * x86 memory access helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "qemu/int128.h" +#include "qemu/atomic128.h" +#include "tcg/tcg.h" +#include "helper-tcg.h" + +void helper_boundw(CPUX86State *env, target_ulong a0, int v) +{ + int low, high; + + low = cpu_ldsw_data_ra(env, a0, GETPC()); + high = cpu_ldsw_data_ra(env, a0 + 2, GETPC()); + v = (int16_t)v; + if (v < low || v > high) { + if (env->hflags & HF_MPX_EN_MASK) { + env->bndcs_regs.sts = 0; + } + raise_exception_ra(env, EXCP05_BOUND, GETPC()); + } +} + +void helper_boundl(CPUX86State *env, target_ulong a0, int v) +{ + int low, high; + + low = cpu_ldl_data_ra(env, a0, GETPC()); + high = cpu_ldl_data_ra(env, a0 + 4, GETPC()); + if (v < low || v > high) { + if (env->hflags & HF_MPX_EN_MASK) { + env->bndcs_regs.sts = 0; + } + raise_exception_ra(env, EXCP05_BOUND, GETPC()); + } +} diff --git a/target/i386/tcg/meson.build b/target/i386/tcg/meson.build new file mode 100644 index 0000000000..f9110e890c --- /dev/null +++ b/target/i386/tcg/meson.build @@ -0,0 +1,15 @@ +i386_ss.add(when: 'CONFIG_TCG', if_true: files( + 'bpt_helper.c', + 'cc_helper.c', + 'excp_helper.c', + 'fpu_helper.c', + 'int_helper.c', + 'mem_helper.c', + 'misc_helper.c', + 'mpx_helper.c', + 'seg_helper.c', + 'tcg-cpu.c', + 'translate.c'), if_false: files('tcg-stub.c')) + +subdir('sysemu') +subdir('user') diff --git a/target/i386/tcg/misc_helper.c b/target/i386/tcg/misc_helper.c new file mode 100644 index 0000000000..b0f0f7b893 --- /dev/null +++ b/target/i386/tcg/misc_helper.c @@ -0,0 +1,148 @@ +/* + * x86 misc helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "helper-tcg.h" + +/* + * NOTE: the translator must set DisasContext.cc_op to CC_OP_EFLAGS + * after generating a call to a helper that uses this. + */ +void cpu_load_eflags(CPUX86State *env, int eflags, int update_mask) +{ + CC_SRC = eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + CC_OP = CC_OP_EFLAGS; + env->df = 1 - (2 * ((eflags >> 10) & 1)); + env->eflags = (env->eflags & ~update_mask) | + (eflags & update_mask) | 0x2; +} + +void helper_into(CPUX86State *env, int next_eip_addend) +{ + int eflags; + + eflags = cpu_cc_compute_all(env); + if (eflags & CC_O) { + raise_interrupt(env, EXCP04_INTO, next_eip_addend); + } +} + +void helper_cpuid(CPUX86State *env) +{ + uint32_t eax, ebx, ecx, edx; + + cpu_svm_check_intercept_param(env, SVM_EXIT_CPUID, 0, GETPC()); + + cpu_x86_cpuid(env, (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX], + &eax, &ebx, &ecx, &edx); + env->regs[R_EAX] = eax; + env->regs[R_EBX] = ebx; + env->regs[R_ECX] = ecx; + env->regs[R_EDX] = edx; +} + +void helper_rdtsc(CPUX86State *env) +{ + uint64_t val; + + if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception_ra(env, EXCP0D_GPF, GETPC()); + } + cpu_svm_check_intercept_param(env, SVM_EXIT_RDTSC, 0, GETPC()); + + val = cpu_get_tsc(env) + env->tsc_offset; + env->regs[R_EAX] = (uint32_t)(val); + env->regs[R_EDX] = (uint32_t)(val >> 32); +} + +G_NORETURN void helper_rdpmc(CPUX86State *env) +{ + if (((env->cr[4] & CR4_PCE_MASK) == 0 ) && + ((env->hflags & HF_CPL_MASK) != 0)) { + raise_exception_ra(env, EXCP0D_GPF, GETPC()); + } + cpu_svm_check_intercept_param(env, SVM_EXIT_RDPMC, 0, GETPC()); + + /* currently unimplemented */ + qemu_log_mask(LOG_UNIMP, "x86: unimplemented rdpmc\n"); + raise_exception_err(env, EXCP06_ILLOP, 0); +} + +G_NORETURN void do_pause(CPUX86State *env) +{ + CPUState *cs = env_cpu(env); + + /* Just let another CPU run. */ + cs->exception_index = EXCP_INTERRUPT; + cpu_loop_exit(cs); +} + +G_NORETURN void helper_pause(CPUX86State *env, int next_eip_addend) +{ + cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC()); + env->eip += next_eip_addend; + + do_pause(env); +} + +uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx) +{ + if ((env->cr[4] & CR4_PKE_MASK) == 0) { + raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); + } + if (ecx != 0) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + return env->pkru; +} + +void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val) +{ + CPUState *cs = env_cpu(env); + + if ((env->cr[4] & CR4_PKE_MASK) == 0) { + raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); + } + if (ecx != 0 || (val & 0xFFFFFFFF00000000ull)) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + env->pkru = val; + tlb_flush(cs); +} + +target_ulong HELPER(rdpid)(CPUX86State *env) +{ +#if !defined CONFIG_USER_ONLY + return env->tsc_aux; +#elif defined CONFIG_LINUX && defined CONFIG_GETCPU + unsigned cpu, node; + getcpu(&cpu, &node); + return (node << 12) | (cpu & 0xfff); +#elif defined CONFIG_SCHED_GETCPU + return sched_getcpu(); +#else + return 0; +#endif +} diff --git a/target/i386/mpx_helper.c b/target/i386/tcg/mpx_helper.c index ade5d245d2..22423eedcd 100644 --- a/target/i386/mpx_helper.c +++ b/target/i386/tcg/mpx_helper.c @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -22,6 +22,7 @@ #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" #include "exec/exec-all.h" +#include "helper-tcg.h" void helper_bndck(CPUX86State *env, uint32_t fail) diff --git a/target/i386/tcg/ops_sse_header.h.inc b/target/i386/tcg/ops_sse_header.h.inc new file mode 100644 index 0000000000..d92c6faf6d --- /dev/null +++ b/target/i386/tcg/ops_sse_header.h.inc @@ -0,0 +1,429 @@ +/* + * MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI support + * + * Copyright (c) 2005 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ +#if SHIFT == 0 +#define Reg MMXReg +#define SUFFIX _mmx +#else +#define Reg ZMMReg +#if SHIFT == 1 +#define SUFFIX _xmm +#else +#define SUFFIX _ymm +#endif +#endif + +#define dh_alias_Reg ptr +#define dh_alias_ZMMReg ptr +#define dh_alias_MMXReg ptr +#define dh_ctype_Reg Reg * +#define dh_ctype_ZMMReg ZMMReg * +#define dh_ctype_MMXReg MMXReg * +#define dh_typecode_Reg dh_typecode_ptr +#define dh_typecode_ZMMReg dh_typecode_ptr +#define dh_typecode_MMXReg dh_typecode_ptr + +DEF_HELPER_4(glue(psrlw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psraw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psllw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrld, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrad, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pslld, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psrlq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psllq, SUFFIX), void, env, Reg, Reg, Reg) + +#if SHIFT >= 1 +DEF_HELPER_4(glue(psrldq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pslldq, SUFFIX), void, env, Reg, Reg, Reg) +#endif + +#define SSE_HELPER_B(name, F)\ + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) + +#define SSE_HELPER_W(name, F)\ + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) + +#define SSE_HELPER_L(name, F)\ + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) + +#define SSE_HELPER_Q(name, F)\ + DEF_HELPER_4(glue(name, SUFFIX), void, env, Reg, Reg, Reg) + +#if SHIFT == 0 +DEF_HELPER_3(glue(pmulhrw, SUFFIX), void, env, Reg, Reg) +#endif +SSE_HELPER_W(pmulhuw, FMULHUW) +SSE_HELPER_W(pmulhw, FMULHW) + +SSE_HELPER_B(pavgb, FAVG) +SSE_HELPER_W(pavgw, FAVG) + +DEF_HELPER_4(glue(pmuludq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaddwd, SUFFIX), void, env, Reg, Reg, Reg) + +DEF_HELPER_4(glue(psadbw, SUFFIX), void, env, Reg, Reg, Reg) +#if SHIFT < 2 +DEF_HELPER_4(glue(maskmov, SUFFIX), void, env, Reg, Reg, tl) +#endif + +#if SHIFT == 0 +DEF_HELPER_3(glue(pshufw, SUFFIX), void, Reg, Reg, int) +#else +DEF_HELPER_3(glue(pshufd, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshuflw, SUFFIX), void, Reg, Reg, int) +DEF_HELPER_3(glue(pshufhw, SUFFIX), void, Reg, Reg, int) +#endif + +#if SHIFT >= 1 +/* FPU ops */ +/* XXX: not accurate */ + +#define SSE_HELPER_P4(name) \ + DEF_HELPER_4(glue(name ## ps, SUFFIX), void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(glue(name ## pd, SUFFIX), void, env, Reg, Reg, Reg) + +#define SSE_HELPER_P3(name, ...) \ + DEF_HELPER_3(glue(name ## ps, SUFFIX), void, env, Reg, Reg) \ + DEF_HELPER_3(glue(name ## pd, SUFFIX), void, env, Reg, Reg) + +#if SHIFT == 1 +#define SSE_HELPER_S4(name) \ + SSE_HELPER_P4(name) \ + DEF_HELPER_4(name ## ss, void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(name ## sd, void, env, Reg, Reg, Reg) +#define SSE_HELPER_S3(name) \ + SSE_HELPER_P3(name) \ + DEF_HELPER_4(name ## ss, void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(name ## sd, void, env, Reg, Reg, Reg) +#else +#define SSE_HELPER_S4(name, ...) SSE_HELPER_P4(name) +#define SSE_HELPER_S3(name, ...) SSE_HELPER_P3(name) +#endif + +DEF_HELPER_4(glue(shufps, SUFFIX), void, Reg, Reg, Reg, int) +DEF_HELPER_4(glue(shufpd, SUFFIX), void, Reg, Reg, Reg, int) + +SSE_HELPER_S4(add) +SSE_HELPER_S4(sub) +SSE_HELPER_S4(mul) +SSE_HELPER_S4(div) +SSE_HELPER_S4(min) +SSE_HELPER_S4(max) + +SSE_HELPER_S3(sqrt) + +DEF_HELPER_3(glue(cvtps2pd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(cvtpd2ps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(cvtdq2ps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(cvtdq2pd, SUFFIX), void, env, Reg, Reg) + +DEF_HELPER_3(glue(cvtps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(glue(cvtpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) + +DEF_HELPER_3(glue(cvttps2dq, SUFFIX), void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(glue(cvttpd2dq, SUFFIX), void, env, ZMMReg, ZMMReg) + +#if SHIFT == 1 +DEF_HELPER_4(cvtss2sd, void, env, Reg, Reg, Reg) +DEF_HELPER_4(cvtsd2ss, void, env, Reg, Reg, Reg) +DEF_HELPER_3(cvtpi2ps, void, env, ZMMReg, MMXReg) +DEF_HELPER_3(cvtpi2pd, void, env, ZMMReg, MMXReg) +DEF_HELPER_3(cvtsi2ss, void, env, ZMMReg, i32) +DEF_HELPER_3(cvtsi2sd, void, env, ZMMReg, i32) + +#ifdef TARGET_X86_64 +DEF_HELPER_3(cvtsq2ss, void, env, ZMMReg, i64) +DEF_HELPER_3(cvtsq2sd, void, env, ZMMReg, i64) +#endif + +DEF_HELPER_3(cvtps2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_3(cvtpd2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_2(cvtss2si, s32, env, ZMMReg) +DEF_HELPER_2(cvtsd2si, s32, env, ZMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_2(cvtss2sq, s64, env, ZMMReg) +DEF_HELPER_2(cvtsd2sq, s64, env, ZMMReg) +#endif + +DEF_HELPER_3(cvttps2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_3(cvttpd2pi, void, env, MMXReg, ZMMReg) +DEF_HELPER_2(cvttss2si, s32, env, ZMMReg) +DEF_HELPER_2(cvttsd2si, s32, env, ZMMReg) +#ifdef TARGET_X86_64 +DEF_HELPER_2(cvttss2sq, s64, env, ZMMReg) +DEF_HELPER_2(cvttsd2sq, s64, env, ZMMReg) +#endif +#endif + +DEF_HELPER_3(glue(rsqrtps, SUFFIX), void, env, ZMMReg, ZMMReg) +DEF_HELPER_3(glue(rcpps, SUFFIX), void, env, ZMMReg, ZMMReg) + +#if SHIFT == 1 +DEF_HELPER_4(rsqrtss, void, env, ZMMReg, ZMMReg, ZMMReg) +DEF_HELPER_4(rcpss, void, env, ZMMReg, ZMMReg, ZMMReg) +DEF_HELPER_3(extrq_r, void, env, ZMMReg, ZMMReg) +DEF_HELPER_4(extrq_i, void, env, ZMMReg, int, int) +DEF_HELPER_3(insertq_r, void, env, ZMMReg, ZMMReg) +DEF_HELPER_5(insertq_i, void, env, ZMMReg, ZMMReg, int, int) +#endif + +SSE_HELPER_P4(hadd) +SSE_HELPER_P4(hsub) +SSE_HELPER_P4(addsub) + +#define SSE_HELPER_CMP(name, F, C) SSE_HELPER_S4(name) + +SSE_HELPER_CMP(cmpeq, FPU_CMPQ, FPU_EQ) +SSE_HELPER_CMP(cmplt, FPU_CMPS, FPU_LT) +SSE_HELPER_CMP(cmple, FPU_CMPS, FPU_LE) +SSE_HELPER_CMP(cmpunord, FPU_CMPQ, FPU_UNORD) +SSE_HELPER_CMP(cmpneq, FPU_CMPQ, !FPU_EQ) +SSE_HELPER_CMP(cmpnlt, FPU_CMPS, !FPU_LT) +SSE_HELPER_CMP(cmpnle, FPU_CMPS, !FPU_LE) +SSE_HELPER_CMP(cmpord, FPU_CMPQ, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequ, FPU_CMPQ, FPU_EQU) +SSE_HELPER_CMP(cmpnge, FPU_CMPS, !FPU_GE) +SSE_HELPER_CMP(cmpngt, FPU_CMPS, !FPU_GT) +SSE_HELPER_CMP(cmpfalse, FPU_CMPQ, FPU_FALSE) +SSE_HELPER_CMP(cmpnequ, FPU_CMPQ, !FPU_EQU) +SSE_HELPER_CMP(cmpge, FPU_CMPS, FPU_GE) +SSE_HELPER_CMP(cmpgt, FPU_CMPS, FPU_GT) +SSE_HELPER_CMP(cmptrue, FPU_CMPQ, !FPU_FALSE) + +SSE_HELPER_CMP(cmpeqs, FPU_CMPS, FPU_EQ) +SSE_HELPER_CMP(cmpltq, FPU_CMPQ, FPU_LT) +SSE_HELPER_CMP(cmpleq, FPU_CMPQ, FPU_LE) +SSE_HELPER_CMP(cmpunords, FPU_CMPS, FPU_UNORD) +SSE_HELPER_CMP(cmpneqq, FPU_CMPS, !FPU_EQ) +SSE_HELPER_CMP(cmpnltq, FPU_CMPQ, !FPU_LT) +SSE_HELPER_CMP(cmpnleq, FPU_CMPQ, !FPU_LE) +SSE_HELPER_CMP(cmpords, FPU_CMPS, !FPU_UNORD) + +SSE_HELPER_CMP(cmpequs, FPU_CMPS, FPU_EQU) +SSE_HELPER_CMP(cmpngeq, FPU_CMPQ, !FPU_GE) +SSE_HELPER_CMP(cmpngtq, FPU_CMPQ, !FPU_GT) +SSE_HELPER_CMP(cmpfalses, FPU_CMPS, FPU_FALSE) +SSE_HELPER_CMP(cmpnequs, FPU_CMPS, !FPU_EQU) +SSE_HELPER_CMP(cmpgeq, FPU_CMPQ, FPU_GE) +SSE_HELPER_CMP(cmpgtq, FPU_CMPQ, FPU_GT) +SSE_HELPER_CMP(cmptrues, FPU_CMPS, !FPU_FALSE) + +#if SHIFT == 1 +DEF_HELPER_3(ucomiss, void, env, Reg, Reg) +DEF_HELPER_3(comiss, void, env, Reg, Reg) +DEF_HELPER_3(ucomisd, void, env, Reg, Reg) +DEF_HELPER_3(comisd, void, env, Reg, Reg) +#endif + +DEF_HELPER_2(glue(movmskps, SUFFIX), i32, env, Reg) +DEF_HELPER_2(glue(movmskpd, SUFFIX), i32, env, Reg) +#endif + +DEF_HELPER_4(glue(packsswb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packuswb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packssdw, SUFFIX), void, env, Reg, Reg, Reg) +#define UNPCK_OP(name, base) \ + DEF_HELPER_4(glue(punpck ## name ## bw, SUFFIX), void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(glue(punpck ## name ## wd, SUFFIX), void, env, Reg, Reg, Reg) \ + DEF_HELPER_4(glue(punpck ## name ## dq, SUFFIX), void, env, Reg, Reg, Reg) + +UNPCK_OP(l, 0) +UNPCK_OP(h, 1) + +#if SHIFT >= 1 +DEF_HELPER_4(glue(punpcklqdq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(punpckhqdq, SUFFIX), void, env, Reg, Reg, Reg) +#endif + +/* 3DNow! float ops */ +#if SHIFT == 0 +DEF_HELPER_3(pi2fd, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pi2fw, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pf2id, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pf2iw, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfacc, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfadd, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfcmpeq, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfcmpge, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfcmpgt, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfmax, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfmin, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfmul, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfnacc, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfpnacc, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfrcp, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfrsqrt, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfsub, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pfsubr, void, env, MMXReg, MMXReg) +DEF_HELPER_3(pswapd, void, env, MMXReg, MMXReg) +#endif + +/* SSSE3 op helpers */ +DEF_HELPER_4(glue(phaddw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phaddd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phaddsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(phsubsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmaddubsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pmulhrsw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(pshufb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignb, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignw, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(psignd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_5(glue(palignr, SUFFIX), void, env, Reg, Reg, Reg, i32) + +/* SSE4.1 op helpers */ +#if SHIFT >= 1 +DEF_HELPER_5(glue(pblendvb, SUFFIX), void, env, Reg, Reg, Reg, Reg) +DEF_HELPER_5(glue(blendvps, SUFFIX), void, env, Reg, Reg, Reg, Reg) +DEF_HELPER_5(glue(blendvpd, SUFFIX), void, env, Reg, Reg, Reg, Reg) +DEF_HELPER_3(glue(ptest, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxbw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxbd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxbq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxwd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxwq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsxdq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxbw, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxbd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxbq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxwd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxwq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovzxdq, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovsldup, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovshdup, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(pmovdldup, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(pmuldq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(packusdw, SUFFIX), void, env, Reg, Reg, Reg) +#if SHIFT == 1 +DEF_HELPER_3(glue(phminposuw, SUFFIX), void, env, Reg, Reg) +#endif +DEF_HELPER_4(glue(roundps, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_4(glue(roundpd, SUFFIX), void, env, Reg, Reg, i32) +#if SHIFT == 1 +DEF_HELPER_5(roundss_xmm, void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(roundsd_xmm, void, env, Reg, Reg, Reg, i32) +#endif +DEF_HELPER_5(glue(blendps, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(blendpd, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(pblendw, SUFFIX), void, env, Reg, Reg, Reg, i32) +DEF_HELPER_5(glue(dpps, SUFFIX), void, env, Reg, Reg, Reg, i32) +#if SHIFT == 1 +DEF_HELPER_5(glue(dppd, SUFFIX), void, env, Reg, Reg, Reg, i32) +#endif +DEF_HELPER_5(glue(mpsadbw, SUFFIX), void, env, Reg, Reg, Reg, i32) +#endif + +/* SSE4.2 op helpers */ +#if SHIFT == 1 +DEF_HELPER_4(glue(pcmpestri, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_4(glue(pcmpestrm, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_4(glue(pcmpistri, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_4(glue(pcmpistrm, SUFFIX), void, env, Reg, Reg, i32) +DEF_HELPER_3(crc32, tl, i32, tl, i32) +#endif + +/* AES-NI op helpers */ +#if SHIFT >= 1 +DEF_HELPER_4(glue(aesdec, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesdeclast, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesenc, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(aesenclast, SUFFIX), void, env, Reg, Reg, Reg) +#if SHIFT == 1 +DEF_HELPER_3(glue(aesimc, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(aeskeygenassist, SUFFIX), void, env, Reg, Reg, i32) +#endif +DEF_HELPER_5(glue(pclmulqdq, SUFFIX), void, env, Reg, Reg, Reg, i32) +#endif + +/* F16C helpers */ +#if SHIFT >= 1 +DEF_HELPER_3(glue(cvtph2ps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(cvtps2ph, SUFFIX), void, env, Reg, Reg, int) +#endif + +/* FMA3 helpers */ +#if SHIFT == 1 +DEF_HELPER_6(fma4ss, void, env, Reg, Reg, Reg, Reg, int) +DEF_HELPER_6(fma4sd, void, env, Reg, Reg, Reg, Reg, int) +#endif + +#if SHIFT >= 1 +DEF_HELPER_7(glue(fma4ps, SUFFIX), void, env, Reg, Reg, Reg, Reg, int, int) +DEF_HELPER_7(glue(fma4pd, SUFFIX), void, env, Reg, Reg, Reg, Reg, int, int) +#endif + +/* AVX helpers */ +#if SHIFT >= 1 +DEF_HELPER_4(glue(vpermilpd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpermilps, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_3(glue(vpermilpd_imm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_3(glue(vpermilps_imm, SUFFIX), void, Reg, Reg, i32) +DEF_HELPER_4(glue(vpsrlvd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsravd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsllvd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsrlvq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsravq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpsllvq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_3(glue(vtestps, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_3(glue(vtestpd, SUFFIX), void, env, Reg, Reg) +DEF_HELPER_4(glue(vpmaskmovd_st, SUFFIX), void, env, Reg, Reg, tl) +DEF_HELPER_4(glue(vpmaskmovq_st, SUFFIX), void, env, Reg, Reg, tl) +DEF_HELPER_4(glue(vpmaskmovd, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_4(glue(vpmaskmovq, SUFFIX), void, env, Reg, Reg, Reg) +DEF_HELPER_6(glue(vpgatherdd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherdq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherqd, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +DEF_HELPER_6(glue(vpgatherqq, SUFFIX), void, env, Reg, Reg, Reg, tl, i32) +#if SHIFT == 2 +DEF_HELPER_3(vpermd_ymm, void, Reg, Reg, Reg) +DEF_HELPER_4(vpermdq_ymm, void, Reg, Reg, Reg, i32) +DEF_HELPER_3(vpermq_ymm, void, Reg, Reg, i32) +#endif +#endif + +/* SHA helpers */ +#if SHIFT == 1 +DEF_HELPER_3(sha1rnds4_f0, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1rnds4_f1, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1rnds4_f2, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1rnds4_f3, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1nexte, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1msg1, void, Reg, Reg, Reg) +DEF_HELPER_3(sha1msg2, void, Reg, Reg, Reg) +DEF_HELPER_5(sha256rnds2, void, Reg, Reg, Reg, i32, i32) +DEF_HELPER_3(sha256msg1, void, Reg, Reg, Reg) +DEF_HELPER_3(sha256msg2, void, Reg, Reg, Reg) +#endif + +#undef SHIFT +#undef Reg +#undef SUFFIX + +#undef SSE_HELPER_B +#undef SSE_HELPER_W +#undef SSE_HELPER_L +#undef SSE_HELPER_Q +#undef SSE_HELPER_S3 +#undef SSE_HELPER_S4 +#undef SSE_HELPER_P3 +#undef SSE_HELPER_P4 +#undef SSE_HELPER_CMP +#undef UNPCK_OP diff --git a/target/i386/seg_helper.c b/target/i386/tcg/seg_helper.c index 33714bc6e1..34ccabd8ce 100644 --- a/target/i386/seg_helper.c +++ b/target/i386/tcg/seg_helper.c @@ -7,7 +7,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -25,49 +25,44 @@ #include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/log.h" +#include "helper-tcg.h" +#include "seg_helper.h" -//#define DEBUG_PCALL - -#ifdef DEBUG_PCALL -# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__) -# define LOG_PCALL_STATE(cpu) \ - log_cpu_state_mask(CPU_LOG_PCALL, (cpu), CPU_DUMP_CCOP) -#else -# define LOG_PCALL(...) do { } while (0) -# define LOG_PCALL_STATE(cpu) do { } while (0) -#endif - -#ifdef CONFIG_USER_ONLY -#define MEMSUFFIX _kernel -#define DATA_SIZE 1 -#include "exec/cpu_ldst_useronly_template.h" - -#define DATA_SIZE 2 -#include "exec/cpu_ldst_useronly_template.h" - -#define DATA_SIZE 4 -#include "exec/cpu_ldst_useronly_template.h" - -#define DATA_SIZE 8 -#include "exec/cpu_ldst_useronly_template.h" -#undef MEMSUFFIX -#else -#define CPU_MMU_INDEX (cpu_mmu_index_kernel(env)) -#define MEMSUFFIX _kernel -#define DATA_SIZE 1 -#include "exec/cpu_ldst_template.h" - -#define DATA_SIZE 2 -#include "exec/cpu_ldst_template.h" - -#define DATA_SIZE 4 -#include "exec/cpu_ldst_template.h" - -#define DATA_SIZE 8 -#include "exec/cpu_ldst_template.h" -#undef CPU_MMU_INDEX -#undef MEMSUFFIX -#endif +int get_pg_mode(CPUX86State *env) +{ + int pg_mode = 0; + if (!(env->cr[0] & CR0_PG_MASK)) { + return 0; + } + if (env->cr[0] & CR0_WP_MASK) { + pg_mode |= PG_MODE_WP; + } + if (env->cr[4] & CR4_PAE_MASK) { + pg_mode |= PG_MODE_PAE; + if (env->efer & MSR_EFER_NXE) { + pg_mode |= PG_MODE_NXE; + } + } + if (env->cr[4] & CR4_PSE_MASK) { + pg_mode |= PG_MODE_PSE; + } + if (env->cr[4] & CR4_SMEP_MASK) { + pg_mode |= PG_MODE_SMEP; + } + if (env->hflags & HF_LMA_MASK) { + pg_mode |= PG_MODE_LMA; + if (env->cr[4] & CR4_PKE_MASK) { + pg_mode |= PG_MODE_PKE; + } + if (env->cr[4] & CR4_PKS_MASK) { + pg_mode |= PG_MODE_PKS; + } + if (env->cr[4] & CR4_LA57_MASK) { + pg_mode |= PG_MODE_LA57; + } + } + return pg_mode; +} /* return non zero if error */ static inline int load_segment_ra(CPUX86State *env, uint32_t *e1_ptr, @@ -137,7 +132,7 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, uint32_t *esp_ptr, int dpl, uintptr_t retaddr) { - X86CPU *cpu = x86_env_get_cpu(env); + X86CPU *cpu = env_archcpu(env); int type, index, shift; #if 0 @@ -175,8 +170,8 @@ static inline void get_ss_esp_from_tss(CPUX86State *env, uint32_t *ss_ptr, } } -static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl, - uintptr_t retaddr) +static void tss_load_seg(CPUX86State *env, X86Seg seg_reg, int selector, + int cpl, uintptr_t retaddr) { uint32_t e1, e2; int rpl, dpl; @@ -231,14 +226,29 @@ static void tss_load_seg(CPUX86State *env, int seg_reg, int selector, int cpl, } } +static void tss_set_busy(CPUX86State *env, int tss_selector, bool value, + uintptr_t retaddr) +{ + target_ulong ptr = env->gdt.base + (tss_selector & ~7); + uint32_t e2 = cpu_ldl_kernel_ra(env, ptr + 4, retaddr); + + if (value) { + e2 |= DESC_TSS_BUSY_MASK; + } else { + e2 &= ~DESC_TSS_BUSY_MASK; + } + + cpu_stl_kernel_ra(env, ptr + 4, e2, retaddr); +} + #define SWITCH_TSS_JMP 0 #define SWITCH_TSS_IRET 1 #define SWITCH_TSS_CALL 2 -/* XXX: restore CPU state in registers (PowerPC case) */ -static void switch_tss_ra(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip, uintptr_t retaddr) +/* return 0 if switching to a 16-bit selector */ +static int switch_tss_ra(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip, uintptr_t retaddr) { int tss_limit, tss_limit_max, type, old_tss_limit_max, old_type, v1, v2, i; target_ulong tss_base; @@ -318,11 +328,10 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, new_eip = cpu_lduw_kernel_ra(env, tss_base + 0x0e, retaddr); new_eflags = cpu_lduw_kernel_ra(env, tss_base + 0x10, retaddr); for (i = 0; i < 8; i++) { - new_regs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x12 + i * 2), - retaddr) | 0xffff0000; + new_regs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x12 + i * 2), retaddr); } for (i = 0; i < 4; i++) { - new_segs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x22 + i * 4), + new_segs[i] = cpu_lduw_kernel_ra(env, tss_base + (0x22 + i * 2), retaddr); } new_ldt = cpu_lduw_kernel_ra(env, tss_base + 0x2a, retaddr); @@ -347,13 +356,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, /* clear busy bit (it is restartable) */ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_IRET) { - target_ulong ptr; - uint32_t e2; - - ptr = env->gdt.base + (env->tr.selector & ~7); - e2 = cpu_ldl_kernel_ra(env, ptr + 4, retaddr); - e2 &= ~DESC_TSS_BUSY_MASK; - cpu_stl_kernel_ra(env, ptr + 4, e2, retaddr); + tss_set_busy(env, env->tr.selector, 0, retaddr); } old_eflags = cpu_compute_eflags(env); if (source == SWITCH_TSS_IRET) { @@ -361,7 +364,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, } /* save the current state in the old TSS */ - if (type & 8) { + if (old_type & 8) { /* 32 bit */ cpu_stl_kernel_ra(env, env->tr.base + 0x20, next_eip, retaddr); cpu_stl_kernel_ra(env, env->tr.base + 0x24, old_eflags, retaddr); @@ -390,7 +393,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, cpu_stw_kernel_ra(env, env->tr.base + (0x12 + 6 * 2), env->regs[R_ESI], retaddr); cpu_stw_kernel_ra(env, env->tr.base + (0x12 + 7 * 2), env->regs[R_EDI], retaddr); for (i = 0; i < 4; i++) { - cpu_stw_kernel_ra(env, env->tr.base + (0x22 + i * 4), + cpu_stw_kernel_ra(env, env->tr.base + (0x22 + i * 2), env->segs[i].selector, retaddr); } } @@ -405,13 +408,7 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, /* set busy bit */ if (source == SWITCH_TSS_JMP || source == SWITCH_TSS_CALL) { - target_ulong ptr; - uint32_t e2; - - ptr = env->gdt.base + (tss_selector & ~7); - e2 = cpu_ldl_kernel_ra(env, ptr + 4, retaddr); - e2 |= DESC_TSS_BUSY_MASK; - cpu_stl_kernel_ra(env, ptr + 4, e2, retaddr); + tss_set_busy(env, tss_selector, 1, retaddr); } /* set the new CPU state */ @@ -432,19 +429,17 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, env->eip = new_eip; eflags_mask = TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | NT_MASK; - if (!(type & 8)) { - eflags_mask &= 0xffff; + if (type & 8) { + cpu_load_eflags(env, new_eflags, eflags_mask); + for (i = 0; i < 8; i++) { + env->regs[i] = new_regs[i]; + } + } else { + cpu_load_eflags(env, new_eflags, eflags_mask & 0xffff); + for (i = 0; i < 8; i++) { + env->regs[i] = (env->regs[i] & 0xffff0000) | new_regs[i]; + } } - cpu_load_eflags(env, new_eflags, eflags_mask); - /* XXX: what to do in 16 bit case? */ - env->regs[R_EAX] = new_regs[0]; - env->regs[R_ECX] = new_regs[1]; - env->regs[R_EDX] = new_regs[2]; - env->regs[R_EBX] = new_regs[3]; - env->regs[R_ESP] = new_regs[4]; - env->regs[R_EBP] = new_regs[5]; - env->regs[R_ESI] = new_regs[6]; - env->regs[R_EDI] = new_regs[7]; if (new_eflags & VM_MASK) { for (i = 0; i < 6; i++) { load_seg_vm(env, i, new_segs[i]); @@ -507,13 +502,14 @@ static void switch_tss_ra(CPUX86State *env, int tss_selector, cpu_x86_update_dr7(env, env->dr[7] & ~DR7_LOCAL_BP_MASK); } #endif + return type >> 3; } -static void switch_tss(CPUX86State *env, int tss_selector, - uint32_t e1, uint32_t e2, int source, - uint32_t next_eip) +static int switch_tss(CPUX86State *env, int tss_selector, + uint32_t e1, uint32_t e2, int source, + uint32_t next_eip) { - switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0); + return switch_tss_ra(env, tss_selector, e1, e2, source, next_eip, 0); } static inline unsigned int get_sp_mask(unsigned int e2) @@ -530,7 +526,7 @@ static inline unsigned int get_sp_mask(unsigned int e2) } } -static int exception_has_error_code(int intno) +int exception_has_error_code(int intno) { switch (intno) { case 8: @@ -633,18 +629,33 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, type = (e2 >> DESC_TYPE_SHIFT) & 0x1f; switch (type) { case 5: /* task gate */ + case 6: /* 286 interrupt gate */ + case 7: /* 286 trap gate */ + case 14: /* 386 interrupt gate */ + case 15: /* 386 trap gate */ + break; + default: + raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); + break; + } + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (is_int && dpl < cpl) { + raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); + } + + if (type == 5) { + /* task gate */ /* must do that check here to return the correct error code */ if (!(e2 & DESC_P_MASK)) { raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); } - switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); + shift = switch_tss(env, intno * 8, e1, e2, SWITCH_TSS_CALL, old_eip); if (has_error_code) { - int type; uint32_t mask; /* push the error code */ - type = (env->tr.flags >> DESC_TYPE_SHIFT) & 0xf; - shift = type >> 3; if (env->segs[R_SS].flags & DESC_B_MASK) { mask = 0xffffffff; } else { @@ -660,21 +671,10 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, SET_ESP(esp, mask); } return; - case 6: /* 286 interrupt gate */ - case 7: /* 286 trap gate */ - case 14: /* 386 interrupt gate */ - case 15: /* 386 trap gate */ - break; - default: - raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); - break; - } - dpl = (e2 >> DESC_DPL_SHIFT) & 3; - cpl = env->hflags & HF_CPL_MASK; - /* check privilege if software int */ - if (is_int && dpl < cpl) { - raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); } + + /* Otherwise, trap or interrupt gate */ + /* check valid bit */ if (!(e2 & DESC_P_MASK)) { raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); @@ -830,8 +830,10 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) { - X86CPU *cpu = x86_env_get_cpu(env); - int index; + X86CPU *cpu = env_archcpu(env); + int index, pg_mode; + target_ulong rsp; + int32_t sext; #if 0 printf("TR: base=" TARGET_FMT_lx " limit=%x\n", @@ -845,7 +847,17 @@ static inline target_ulong get_rsp_from_tss(CPUX86State *env, int level) if ((index + 7) > env->tr.limit) { raise_exception_err(env, EXCP0A_TSS, env->tr.selector & 0xfffc); } - return cpu_ldq_kernel(env, env->tr.base + index); + + rsp = cpu_ldq_kernel(env, env->tr.base + index); + + /* test virtual address sign extension */ + pg_mode = get_pg_mode(env); + sext = (int64_t)rsp >> (pg_mode & PG_MODE_LA57 ? 56 : 47); + if (sext != 0 && sext != -1) { + raise_exception_err(env, EXCP0C_STACK, 0); + } + + return rsp; } /* 64 bit interrupt */ @@ -871,7 +883,7 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, dt = &env->idt; if (intno * 16 + 15 > dt->limit) { - raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2); + raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); } ptr = dt->base + intno * 16; e1 = cpu_ldl_kernel(env, ptr); @@ -884,18 +896,18 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, case 15: /* 386 trap gate */ break; default: - raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2); + raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); break; } dpl = (e2 >> DESC_DPL_SHIFT) & 3; cpl = env->hflags & HF_CPL_MASK; /* check privilege if software int */ if (is_int && dpl < cpl) { - raise_exception_err(env, EXCP0D_GPF, intno * 16 + 2); + raise_exception_err(env, EXCP0D_GPF, intno * 8 + 2); } /* check valid bit */ if (!(e2 & DESC_P_MASK)) { - raise_exception_err(env, EXCP0B_NOSEG, intno * 16 + 2); + raise_exception_err(env, EXCP0B_NOSEG, intno * 8 + 2); } selector = e1 >> 16; offset = ((target_ulong)e3 << 32) | (e2 & 0xffff0000) | (e1 & 0x0000ffff); @@ -966,74 +978,8 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, e2); env->eip = offset; } -#endif - -#ifdef TARGET_X86_64 -#if defined(CONFIG_USER_ONLY) -void helper_syscall(CPUX86State *env, int next_eip_addend) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - - cs->exception_index = EXCP_SYSCALL; - env->exception_next_eip = env->eip + next_eip_addend; - cpu_loop_exit(cs); -} -#else -void helper_syscall(CPUX86State *env, int next_eip_addend) -{ - int selector; - - if (!(env->efer & MSR_EFER_SCE)) { - raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); - } - selector = (env->star >> 32) & 0xffff; - if (env->hflags & HF_LMA_MASK) { - int code64; - - env->regs[R_ECX] = env->eip + next_eip_addend; - env->regs[11] = cpu_compute_eflags(env); - - code64 = env->hflags & HF_CS64_MASK; - - env->eflags &= ~env->fmask; - cpu_load_eflags(env, env->eflags, 0); - cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, - 0, 0xffffffff, - DESC_G_MASK | DESC_P_MASK | - DESC_S_MASK | - DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | - DESC_L_MASK); - cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, - 0, 0xffffffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | - DESC_S_MASK | - DESC_W_MASK | DESC_A_MASK); - if (code64) { - env->eip = env->lstar; - } else { - env->eip = env->cstar; - } - } else { - env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend); - - env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); - cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, - 0, 0xffffffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | - DESC_S_MASK | - DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); - cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, - 0, 0xffffffff, - DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | - DESC_S_MASK | - DESC_W_MASK | DESC_A_MASK); - env->eip = (uint32_t)env->star; - } -} -#endif -#endif +#endif /* TARGET_X86_64 */ -#ifdef TARGET_X86_64 void helper_sysret(CPUX86State *env, int dflag) { int cpl, selector; @@ -1046,6 +992,7 @@ void helper_sysret(CPUX86State *env, int dflag) raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); } selector = (env->star >> 48) & 0xffff; +#ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | @@ -1071,7 +1018,9 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | (3 << DESC_DPL_SHIFT) | DESC_W_MASK | DESC_A_MASK); - } else { + } else +#endif + { env->eflags |= IF_MASK; cpu_x86_load_seg_cache(env, R_CS, selector | 3, 0, 0xffffffff, @@ -1086,7 +1035,6 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_W_MASK | DESC_A_MASK); } } -#endif /* real mode interrupt */ static void do_interrupt_real(CPUX86State *env, int intno, int is_int, @@ -1127,84 +1075,13 @@ static void do_interrupt_real(CPUX86State *env, int intno, int is_int, env->eflags &= ~(IF_MASK | TF_MASK | AC_MASK | RF_MASK); } -#if defined(CONFIG_USER_ONLY) -/* fake user mode interrupt. is_int is TRUE if coming from the int - * instruction. next_eip is the env->eip value AFTER the interrupt - * instruction. It is only relevant if is_int is TRUE or if intno - * is EXCP_SYSCALL. - */ -static void do_interrupt_user(CPUX86State *env, int intno, int is_int, - int error_code, target_ulong next_eip) -{ - if (is_int) { - SegmentCache *dt; - target_ulong ptr; - int dpl, cpl, shift; - uint32_t e2; - - dt = &env->idt; - if (env->hflags & HF_LMA_MASK) { - shift = 4; - } else { - shift = 3; - } - ptr = dt->base + (intno << shift); - e2 = cpu_ldl_kernel(env, ptr + 4); - - dpl = (e2 >> DESC_DPL_SHIFT) & 3; - cpl = env->hflags & HF_CPL_MASK; - /* check privilege if software int */ - if (dpl < cpl) { - raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2); - } - } - - /* Since we emulate only user space, we cannot do more than - exiting the emulation with the suitable exception and error - code. So update EIP for INT 0x80 and EXCP_SYSCALL. */ - if (is_int || intno == EXCP_SYSCALL) { - env->eip = next_eip; - } -} - -#else - -static void handle_even_inj(CPUX86State *env, int intno, int is_int, - int error_code, int is_hw, int rm) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - control.event_inj)); - - if (!(event_inj & SVM_EVTINJ_VALID)) { - int type; - - if (is_int) { - type = SVM_EVTINJ_TYPE_SOFT; - } else { - type = SVM_EVTINJ_TYPE_EXEPT; - } - event_inj = intno | type | SVM_EVTINJ_VALID; - if (!rm && exception_has_error_code(intno)) { - event_inj |= SVM_EVTINJ_VALID_ERR; - x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - control.event_inj_err), - error_code); - } - x86_stl_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, control.event_inj), - event_inj); - } -} -#endif - /* * Begin execution of an interruption. is_int is TRUE if coming from * the int instruction. next_eip is the env->eip value AFTER the interrupt * instruction. It is only relevant if is_int is TRUE. */ -static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, - int error_code, target_ulong next_eip, int is_hw) +void do_interrupt_all(X86CPU *cpu, int intno, int is_int, + int error_code, target_ulong next_eip, int is_hw) { CPUX86State *env = &cpu->env; @@ -1280,105 +1157,9 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, #endif } -void x86_cpu_do_interrupt(CPUState *cs) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - -#if defined(CONFIG_USER_ONLY) - /* if user mode only, we simulate a fake exception - which will be handled outside the cpu execution - loop */ - do_interrupt_user(env, cs->exception_index, - env->exception_is_int, - env->error_code, - env->exception_next_eip); - /* successfully delivered */ - env->old_exception = -1; -#else - if (cs->exception_index >= EXCP_VMEXIT) { - assert(env->old_exception == -1); - do_vmexit(env, cs->exception_index - EXCP_VMEXIT, env->error_code); - } else { - do_interrupt_all(cpu, cs->exception_index, - env->exception_is_int, - env->error_code, - env->exception_next_eip, 0); - /* successfully delivered */ - env->old_exception = -1; - } -#endif -} - void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw) { - do_interrupt_all(x86_env_get_cpu(env), intno, 0, 0, 0, is_hw); -} - -bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) -{ - X86CPU *cpu = X86_CPU(cs); - CPUX86State *env = &cpu->env; - int intno; - - interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request); - if (!interrupt_request) { - return false; - } - - /* Don't process multiple interrupt requests in a single call. - * This is required to make icount-driven execution deterministic. - */ - switch (interrupt_request) { -#if !defined(CONFIG_USER_ONLY) - case CPU_INTERRUPT_POLL: - cs->interrupt_request &= ~CPU_INTERRUPT_POLL; - apic_poll_irq(cpu->apic_state); - break; -#endif - case CPU_INTERRUPT_SIPI: - do_cpu_sipi(cpu); - break; - case CPU_INTERRUPT_SMI: - cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_SMI; - do_smm_enter(cpu); - break; - case CPU_INTERRUPT_NMI: - cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); - cs->interrupt_request &= ~CPU_INTERRUPT_NMI; - env->hflags2 |= HF2_NMI_MASK; - do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); - break; - case CPU_INTERRUPT_MCE: - cs->interrupt_request &= ~CPU_INTERRUPT_MCE; - do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); - break; - case CPU_INTERRUPT_HARD: - cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); - cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | - CPU_INTERRUPT_VIRQ); - intno = cpu_get_pic_interrupt(env); - qemu_log_mask(CPU_LOG_TB_IN_ASM, - "Servicing hardware INT=0x%02x\n", intno); - do_interrupt_x86_hardirq(env, intno, 1); - break; -#if !defined(CONFIG_USER_ONLY) - case CPU_INTERRUPT_VIRQ: - /* FIXME: this should respect TPR */ - cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0); - intno = x86_ldl_phys(cs, env->vm_vmcb - + offsetof(struct vmcb, control.int_vector)); - qemu_log_mask(CPU_LOG_TB_IN_ASM, - "Servicing virtual hardware INT=0x%02x\n", intno); - do_interrupt_x86_hardirq(env, intno, 1); - cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; - break; -#endif - } - - /* Ensure that no TB jump will be modified as the program flow was changed. */ - return true; + do_interrupt_all(env_archcpu(env), intno, 0, 0, 0, is_hw); } void helper_lldt(CPUX86State *env, int selector) @@ -1727,14 +1508,12 @@ void helper_ljmp_protected(CPUX86State *env, int new_cs, target_ulong new_eip, } /* real mode call */ -void helper_lcall_real(CPUX86State *env, int new_cs, target_ulong new_eip1, - int shift, int next_eip) +void helper_lcall_real(CPUX86State *env, uint32_t new_cs, uint32_t new_eip, + int shift, uint32_t next_eip) { - int new_eip; uint32_t esp, esp_mask; target_ulong ssp; - new_eip = new_eip1; esp = env->regs[R_ESP]; esp_mask = get_sp_mask(env->segs[R_SS].flags); ssp = env->segs[R_SS].base; @@ -1763,7 +1542,7 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, target_ulong ssp, old_ssp, offset, sp; LOG_PCALL("lcall %04x:" TARGET_FMT_lx " s=%d\n", new_cs, new_eip, shift); - LOG_PCALL_STATE(CPU(x86_env_get_cpu(env))); + LOG_PCALL_STATE(env_cpu(env)); if ((new_cs & 0xfffc) == 0) { raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); } @@ -2089,7 +1868,7 @@ void helper_iret_real(CPUX86State *env, int shift) env->hflags2 &= ~HF2_NMI_MASK; } -static inline void validate_seg(CPUX86State *env, int seg_reg, int cpl) +static inline void validate_seg(CPUX86State *env, X86Seg seg_reg, int cpl) { int dpl; uint32_t e2; @@ -2107,7 +1886,10 @@ static inline void validate_seg(CPUX86State *env, int seg_reg, int cpl) if (!(e2 & DESC_CS_MASK) || !(e2 & DESC_C_MASK)) { /* data or non conforming code segment */ if (dpl < cpl) { - cpu_x86_load_seg_cache(env, seg_reg, 0, 0, 0, 0); + cpu_x86_load_seg_cache(env, seg_reg, 0, + env->segs[seg_reg].base, + env->segs[seg_reg].limit, + env->segs[seg_reg].flags & ~DESC_P_MASK); } } } @@ -2167,7 +1949,7 @@ static inline void helper_ret_protected(CPUX86State *env, int shift, } LOG_PCALL("lret new %04x:" TARGET_FMT_lx " s=%d addend=0x%x\n", new_cs, new_eip, shift, addend); - LOG_PCALL_STATE(CPU(x86_env_get_cpu(env))); + LOG_PCALL_STATE(env_cpu(env)); if ((new_cs & 0xfffc) == 0) { raise_exception_err_ra(env, EXCP0D_GPF, new_cs & 0xfffc, retaddr); } @@ -2448,7 +2230,7 @@ target_ulong helper_lsl(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2495,7 +2277,7 @@ target_ulong helper_lar(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl, type; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2544,7 +2326,7 @@ void helper_verr(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2582,7 +2364,7 @@ void helper_verw(CPUX86State *env, target_ulong selector1) int rpl, dpl, cpl; selector = selector1 & 0xffff; - eflags = cpu_cc_compute_all(env, CC_OP); + eflags = cpu_cc_compute_all(env); if ((selector & 0xfffc) == 0) { goto fail; } @@ -2609,62 +2391,3 @@ void helper_verw(CPUX86State *env, target_ulong selector1) } CC_SRC = eflags | CC_Z; } - -#if defined(CONFIG_USER_ONLY) -void cpu_x86_load_seg(CPUX86State *env, int seg_reg, int selector) -{ - if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { - int dpl = (env->eflags & VM_MASK) ? 3 : 0; - selector &= 0xffff; - cpu_x86_load_seg_cache(env, seg_reg, selector, - (selector << 4), 0xffff, - DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK | (dpl << DESC_DPL_SHIFT)); - } else { - helper_load_seg(env, seg_reg, selector); - } -} -#endif - -/* check if Port I/O is allowed in TSS */ -static inline void check_io(CPUX86State *env, int addr, int size, - uintptr_t retaddr) -{ - int io_offset, val, mask; - - /* TSS must be a valid 32 bit one */ - if (!(env->tr.flags & DESC_P_MASK) || - ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 || - env->tr.limit < 103) { - goto fail; - } - io_offset = cpu_lduw_kernel_ra(env, env->tr.base + 0x66, retaddr); - io_offset += (addr >> 3); - /* Note: the check needs two bytes */ - if ((io_offset + 1) > env->tr.limit) { - goto fail; - } - val = cpu_lduw_kernel_ra(env, env->tr.base + io_offset, retaddr); - val >>= (addr & 7); - mask = (1 << size) - 1; - /* all bits must be zero to allow the I/O */ - if ((val & mask) != 0) { - fail: - raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr); - } -} - -void helper_check_iob(CPUX86State *env, uint32_t t0) -{ - check_io(env, t0, 1, GETPC()); -} - -void helper_check_iow(CPUX86State *env, uint32_t t0) -{ - check_io(env, t0, 2, GETPC()); -} - -void helper_check_iol(CPUX86State *env, uint32_t t0) -{ - check_io(env, t0, 4, GETPC()); -} diff --git a/target/i386/tcg/seg_helper.h b/target/i386/tcg/seg_helper.h new file mode 100644 index 0000000000..ebf1035277 --- /dev/null +++ b/target/i386/tcg/seg_helper.h @@ -0,0 +1,66 @@ +/* + * x86 segmentation related helpers macros + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef SEG_HELPER_H +#define SEG_HELPER_H + +//#define DEBUG_PCALL + +#ifdef DEBUG_PCALL +# define LOG_PCALL(...) qemu_log_mask(CPU_LOG_PCALL, ## __VA_ARGS__) +# define LOG_PCALL_STATE(cpu) \ + log_cpu_state_mask(CPU_LOG_PCALL, (cpu), CPU_DUMP_CCOP) +#else +# define LOG_PCALL(...) do { } while (0) +# define LOG_PCALL_STATE(cpu) do { } while (0) +#endif + +/* + * TODO: Convert callers to compute cpu_mmu_index_kernel once + * and use *_mmuidx_ra directly. + */ +#define cpu_ldub_kernel_ra(e, p, r) \ + cpu_ldub_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) +#define cpu_lduw_kernel_ra(e, p, r) \ + cpu_lduw_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) +#define cpu_ldl_kernel_ra(e, p, r) \ + cpu_ldl_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) +#define cpu_ldq_kernel_ra(e, p, r) \ + cpu_ldq_mmuidx_ra(e, p, cpu_mmu_index_kernel(e), r) + +#define cpu_stb_kernel_ra(e, p, v, r) \ + cpu_stb_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) +#define cpu_stw_kernel_ra(e, p, v, r) \ + cpu_stw_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) +#define cpu_stl_kernel_ra(e, p, v, r) \ + cpu_stl_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) +#define cpu_stq_kernel_ra(e, p, v, r) \ + cpu_stq_mmuidx_ra(e, p, v, cpu_mmu_index_kernel(e), r) + +#define cpu_ldub_kernel(e, p) cpu_ldub_kernel_ra(e, p, 0) +#define cpu_lduw_kernel(e, p) cpu_lduw_kernel_ra(e, p, 0) +#define cpu_ldl_kernel(e, p) cpu_ldl_kernel_ra(e, p, 0) +#define cpu_ldq_kernel(e, p) cpu_ldq_kernel_ra(e, p, 0) + +#define cpu_stb_kernel(e, p, v) cpu_stb_kernel_ra(e, p, v, 0) +#define cpu_stw_kernel(e, p, v) cpu_stw_kernel_ra(e, p, v, 0) +#define cpu_stl_kernel(e, p, v) cpu_stl_kernel_ra(e, p, v, 0) +#define cpu_stq_kernel(e, p, v) cpu_stq_kernel_ra(e, p, v, 0) + +#endif /* SEG_HELPER_H */ diff --git a/target/i386/shift_helper_template.h b/target/i386/tcg/shift_helper_template.h.inc index cf91a2d284..54f15d6e05 100644 --- a/target/i386/shift_helper_template.h +++ b/target/i386/tcg/shift_helper_template.h.inc @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of diff --git a/target/i386/bpt_helper.c b/target/i386/tcg/sysemu/bpt_helper.c index b3efdc77ec..4d96a48a3c 100644 --- a/target/i386/bpt_helper.c +++ b/target/i386/tcg/sysemu/bpt_helper.c @@ -1,12 +1,12 @@ /* - * i386 breakpoint helpers + * i386 breakpoint helpers - sysemu code * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -21,9 +21,9 @@ #include "cpu.h" #include "exec/exec-all.h" #include "exec/helper-proto.h" +#include "tcg/helper-tcg.h" -#ifndef CONFIG_USER_ONLY static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index) { return (dr7 >> (index * 2)) & 1; @@ -53,7 +53,7 @@ static inline int hw_breakpoint_len(unsigned long dr7, int index) static int hw_breakpoint_insert(CPUX86State *env, int index) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + CPUState *cs = env_cpu(env); target_ulong dr7 = env->dr[7]; target_ulong drN = env->dr[index]; int err = 0; @@ -97,7 +97,7 @@ static int hw_breakpoint_insert(CPUX86State *env, int index) static void hw_breakpoint_remove(CPUX86State *env, int index) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + CPUState *cs = env_cpu(env); switch (hw_breakpoint_type(env->dr[7], index)) { case DR7_TYPE_BP_INST: @@ -109,9 +109,9 @@ static void hw_breakpoint_remove(CPUX86State *env, int index) case DR7_TYPE_DATA_WR: case DR7_TYPE_DATA_RW: - if (env->cpu_breakpoint[index]) { + if (env->cpu_watchpoint[index]) { cpu_watchpoint_remove_by_ref(cs, env->cpu_watchpoint[index]); - env->cpu_breakpoint[index] = NULL; + env->cpu_watchpoint[index] = NULL; } break; @@ -164,7 +164,7 @@ void cpu_x86_update_dr7(CPUX86State *env, uint32_t new_dr7) env->hflags = (env->hflags & ~HF_IOBPT_MASK) | iobpt; } -static bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update) +bool check_hw_breakpoints(CPUX86State *env, bool force_dr6_update) { target_ulong dr6; int reg; @@ -210,7 +210,6 @@ void breakpoint_handler(CPUState *cs) { X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; - CPUBreakpoint *bp; if (cs->watchpoint_hit) { if (cs->watchpoint_hit->flags & BP_CPU) { @@ -222,40 +221,37 @@ void breakpoint_handler(CPUState *cs) } } } else { - QTAILQ_FOREACH(bp, &cs->breakpoints, entry) { - if (bp->pc == env->eip) { - if (bp->flags & BP_CPU) { - check_hw_breakpoints(env, true); - raise_exception(env, EXCP01_DB); - } - break; - } + if (cpu_breakpoint_test(cs, env->eip, BP_CPU)) { + check_hw_breakpoints(env, true); + raise_exception(env, EXCP01_DB); } } } -#endif -void helper_single_step(CPUX86State *env) -{ -#ifndef CONFIG_USER_ONLY - check_hw_breakpoints(env, true); - env->dr[6] |= DR6_BS; -#endif - raise_exception(env, EXCP01_DB); -} - -void helper_rechecking_single_step(CPUX86State *env) +target_ulong helper_get_dr(CPUX86State *env, int reg) { - if ((env->eflags & TF_MASK) != 0) { - helper_single_step(env); + if (reg >= 4 && reg < 6) { + if (env->cr[4] & CR4_DE_MASK) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } else { + reg += 2; + } } + + return env->dr[reg]; } void helper_set_dr(CPUX86State *env, int reg, target_ulong t0) { -#ifndef CONFIG_USER_ONLY - switch (reg) { - case 0: case 1: case 2: case 3: + if (reg >= 4 && reg < 6) { + if (env->cr[4] & CR4_DE_MASK) { + raise_exception_ra(env, EXCP06_ILLOP, GETPC()); + } else { + reg += 2; + } + } + + if (reg < 4) { if (hw_breakpoint_enabled(env->dr[7], reg) && hw_breakpoint_type(env->dr[7], reg) != DR7_TYPE_IO_RW) { hw_breakpoint_remove(env, reg); @@ -264,54 +260,22 @@ void helper_set_dr(CPUX86State *env, int reg, target_ulong t0) } else { env->dr[reg] = t0; } - return; - case 4: - if (env->cr[4] & CR4_DE_MASK) { - break; - } - /* fallthru */ - case 6: - env->dr[6] = t0 | DR6_FIXED_1; - return; - case 5: - if (env->cr[4] & CR4_DE_MASK) { - break; - } - /* fallthru */ - case 7: - cpu_x86_update_dr7(env, t0); - return; - } - raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); -#endif -} - -target_ulong helper_get_dr(CPUX86State *env, int reg) -{ - switch (reg) { - case 0: case 1: case 2: case 3: case 6: case 7: - return env->dr[reg]; - case 4: - if (env->cr[4] & CR4_DE_MASK) { - break; - } else { - return env->dr[6]; + } else { + if (t0 & DR_RESERVED_MASK) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); } - case 5: - if (env->cr[4] & CR4_DE_MASK) { - break; + if (reg == 6) { + env->dr[6] = t0 | DR6_FIXED_1; } else { - return env->dr[7]; + cpu_x86_update_dr7(env, t0); } } - raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); } /* Check if Port I/O is trapped by a breakpoint. */ void helper_bpt_io(CPUX86State *env, uint32_t port, uint32_t size, target_ulong next_eip) { -#ifndef CONFIG_USER_ONLY target_ulong dr7 = env->dr[7]; int i, hit = 0; @@ -331,5 +295,4 @@ void helper_bpt_io(CPUX86State *env, uint32_t port, env->eip = next_eip; raise_exception(env, EXCP01_DB); } -#endif } diff --git a/target/i386/tcg/sysemu/excp_helper.c b/target/i386/tcg/sysemu/excp_helper.c new file mode 100644 index 0000000000..7a57b7dd10 --- /dev/null +++ b/target/i386/tcg/sysemu/excp_helper.c @@ -0,0 +1,643 @@ +/* + * x86 exception helpers - sysemu code + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/cpu_ldst.h" +#include "exec/exec-all.h" +#include "tcg/helper-tcg.h" + +typedef struct TranslateParams { + target_ulong addr; + target_ulong cr3; + int pg_mode; + int mmu_idx; + int ptw_idx; + MMUAccessType access_type; +} TranslateParams; + +typedef struct TranslateResult { + hwaddr paddr; + int prot; + int page_size; +} TranslateResult; + +typedef enum TranslateFaultStage2 { + S2_NONE, + S2_GPA, + S2_GPT, +} TranslateFaultStage2; + +typedef struct TranslateFault { + int exception_index; + int error_code; + target_ulong cr2; + TranslateFaultStage2 stage2; +} TranslateFault; + +typedef struct PTETranslate { + CPUX86State *env; + TranslateFault *err; + int ptw_idx; + void *haddr; + hwaddr gaddr; +} PTETranslate; + +static bool ptw_translate(PTETranslate *inout, hwaddr addr, uint64_t ra) +{ + CPUTLBEntryFull *full; + int flags; + + inout->gaddr = addr; + flags = probe_access_full(inout->env, addr, 0, MMU_DATA_STORE, + inout->ptw_idx, true, &inout->haddr, &full, ra); + + if (unlikely(flags & TLB_INVALID_MASK)) { + TranslateFault *err = inout->err; + + assert(inout->ptw_idx == MMU_NESTED_IDX); + *err = (TranslateFault){ + .error_code = inout->env->error_code, + .cr2 = addr, + .stage2 = S2_GPT, + }; + return false; + } + return true; +} + +static inline uint32_t ptw_ldl(const PTETranslate *in, uint64_t ra) +{ + if (likely(in->haddr)) { + return ldl_p(in->haddr); + } + return cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra); +} + +static inline uint64_t ptw_ldq(const PTETranslate *in, uint64_t ra) +{ + if (likely(in->haddr)) { + return ldq_p(in->haddr); + } + return cpu_ldq_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, ra); +} + +/* + * Note that we can use a 32-bit cmpxchg for all page table entries, + * even 64-bit ones, because PG_PRESENT_MASK, PG_ACCESSED_MASK and + * PG_DIRTY_MASK are all in the low 32 bits. + */ +static bool ptw_setl_slow(const PTETranslate *in, uint32_t old, uint32_t new) +{ + uint32_t cmp; + + /* Does x86 really perform a rmw cycle on mmio for ptw? */ + start_exclusive(); + cmp = cpu_ldl_mmuidx_ra(in->env, in->gaddr, in->ptw_idx, 0); + if (cmp == old) { + cpu_stl_mmuidx_ra(in->env, in->gaddr, new, in->ptw_idx, 0); + } + end_exclusive(); + return cmp == old; +} + +static inline bool ptw_setl(const PTETranslate *in, uint32_t old, uint32_t set) +{ + if (set & ~old) { + uint32_t new = old | set; + if (likely(in->haddr)) { + old = cpu_to_le32(old); + new = cpu_to_le32(new); + return qatomic_cmpxchg((uint32_t *)in->haddr, old, new) == old; + } + return ptw_setl_slow(in, old, new); + } + return true; +} + +static bool mmu_translate(CPUX86State *env, const TranslateParams *in, + TranslateResult *out, TranslateFault *err, + uint64_t ra) +{ + const target_ulong addr = in->addr; + const int pg_mode = in->pg_mode; + const bool is_user = is_mmu_index_user(in->mmu_idx); + const MMUAccessType access_type = in->access_type; + uint64_t ptep, pte, rsvd_mask; + PTETranslate pte_trans = { + .env = env, + .err = err, + .ptw_idx = in->ptw_idx, + }; + hwaddr pte_addr, paddr; + uint32_t pkr; + int page_size; + int error_code; + + restart_all: + rsvd_mask = ~MAKE_64BIT_MASK(0, env_archcpu(env)->phys_bits); + rsvd_mask &= PG_ADDRESS_MASK; + if (!(pg_mode & PG_MODE_NXE)) { + rsvd_mask |= PG_NX_MASK; + } + + if (pg_mode & PG_MODE_PAE) { +#ifdef TARGET_X86_64 + if (pg_mode & PG_MODE_LMA) { + if (pg_mode & PG_MODE_LA57) { + /* + * Page table level 5 + */ + pte_addr = (in->cr3 & ~0xfff) + (((addr >> 48) & 0x1ff) << 3); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + restart_5: + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & (rsvd_mask | PG_PSE_MASK)) { + goto do_fault_rsvd; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_5; + } + ptep = pte ^ PG_NX_MASK; + } else { + pte = in->cr3; + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + /* + * Page table level 4 + */ + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 39) & 0x1ff) << 3); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + restart_4: + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & (rsvd_mask | PG_PSE_MASK)) { + goto do_fault_rsvd; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_4; + } + ptep &= pte ^ PG_NX_MASK; + + /* + * Page table level 3 + */ + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + restart_3_lma: + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & rsvd_mask) { + goto do_fault_rsvd; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_3_lma; + } + ptep &= pte ^ PG_NX_MASK; + if (pte & PG_PSE_MASK) { + /* 1 GB page */ + page_size = 1024 * 1024 * 1024; + goto do_check_protect; + } + } else +#endif + { + /* + * Page table level 3 + */ + pte_addr = (in->cr3 & 0xffffffe0ULL) + ((addr >> 27) & 0x18); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + rsvd_mask |= PG_HI_USER_MASK; + restart_3_nolma: + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & (rsvd_mask | PG_NX_MASK)) { + goto do_fault_rsvd; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_3_nolma; + } + ptep = PG_NX_MASK | PG_USER_MASK | PG_RW_MASK; + } + + /* + * Page table level 2 + */ + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + restart_2_pae: + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & rsvd_mask) { + goto do_fault_rsvd; + } + if (pte & PG_PSE_MASK) { + /* 2 MB page */ + page_size = 2048 * 1024; + ptep &= pte ^ PG_NX_MASK; + goto do_check_protect; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_2_pae; + } + ptep &= pte ^ PG_NX_MASK; + + /* + * Page table level 1 + */ + pte_addr = (pte & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + pte = ptw_ldq(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + if (pte & rsvd_mask) { + goto do_fault_rsvd; + } + /* combine pde and pte nx, user and rw protections */ + ptep &= pte ^ PG_NX_MASK; + page_size = 4096; + } else { + /* + * Page table level 2 + */ + pte_addr = (in->cr3 & 0xfffff000ULL) + ((addr >> 20) & 0xffc); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + restart_2_nopae: + pte = ptw_ldl(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + ptep = pte | PG_NX_MASK; + + /* if PSE bit is set, then we use a 4MB page */ + if ((pte & PG_PSE_MASK) && (pg_mode & PG_MODE_PSE)) { + page_size = 4096 * 1024; + /* + * Bits 20-13 provide bits 39-32 of the address, bit 21 is reserved. + * Leave bits 20-13 in place for setting accessed/dirty bits below. + */ + pte = (uint32_t)pte | ((pte & 0x1fe000LL) << (32 - 13)); + rsvd_mask = 0x200000; + goto do_check_protect_pse36; + } + if (!ptw_setl(&pte_trans, pte, PG_ACCESSED_MASK)) { + goto restart_2_nopae; + } + + /* + * Page table level 1 + */ + pte_addr = (pte & ~0xfffu) + ((addr >> 10) & 0xffc); + if (!ptw_translate(&pte_trans, pte_addr, ra)) { + return false; + } + pte = ptw_ldl(&pte_trans, ra); + if (!(pte & PG_PRESENT_MASK)) { + goto do_fault; + } + /* combine pde and pte user and rw protections */ + ptep &= pte | PG_NX_MASK; + page_size = 4096; + rsvd_mask = 0; + } + +do_check_protect: + rsvd_mask |= (page_size - 1) & PG_ADDRESS_MASK & ~PG_PSE_PAT_MASK; +do_check_protect_pse36: + if (pte & rsvd_mask) { + goto do_fault_rsvd; + } + ptep ^= PG_NX_MASK; + + /* can the page can be put in the TLB? prot will tell us */ + if (is_user && !(ptep & PG_USER_MASK)) { + goto do_fault_protect; + } + + int prot = 0; + if (!is_mmu_index_smap(in->mmu_idx) || !(ptep & PG_USER_MASK)) { + prot |= PAGE_READ; + if ((ptep & PG_RW_MASK) || !(is_user || (pg_mode & PG_MODE_WP))) { + prot |= PAGE_WRITE; + } + } + if (!(ptep & PG_NX_MASK) && + (is_user || + !((pg_mode & PG_MODE_SMEP) && (ptep & PG_USER_MASK)))) { + prot |= PAGE_EXEC; + } + + if (ptep & PG_USER_MASK) { + pkr = pg_mode & PG_MODE_PKE ? env->pkru : 0; + } else { + pkr = pg_mode & PG_MODE_PKS ? env->pkrs : 0; + } + if (pkr) { + uint32_t pk = (pte & PG_PKRU_MASK) >> PG_PKRU_BIT; + uint32_t pkr_ad = (pkr >> pk * 2) & 1; + uint32_t pkr_wd = (pkr >> pk * 2) & 2; + uint32_t pkr_prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + + if (pkr_ad) { + pkr_prot &= ~(PAGE_READ | PAGE_WRITE); + } else if (pkr_wd && (is_user || (pg_mode & PG_MODE_WP))) { + pkr_prot &= ~PAGE_WRITE; + } + if ((pkr_prot & (1 << access_type)) == 0) { + goto do_fault_pk_protect; + } + prot &= pkr_prot; + } + + if ((prot & (1 << access_type)) == 0) { + goto do_fault_protect; + } + + /* yes, it can! */ + { + uint32_t set = PG_ACCESSED_MASK; + if (access_type == MMU_DATA_STORE) { + set |= PG_DIRTY_MASK; + } else if (!(pte & PG_DIRTY_MASK)) { + /* + * Only set write access if already dirty... + * otherwise wait for dirty access. + */ + prot &= ~PAGE_WRITE; + } + if (!ptw_setl(&pte_trans, pte, set)) { + /* + * We can arrive here from any of 3 levels and 2 formats. + * The only safe thing is to restart the entire lookup. + */ + goto restart_all; + } + } + + /* merge offset within page */ + paddr = (pte & PG_ADDRESS_MASK & ~(page_size - 1)) | (addr & (page_size - 1)); + + /* + * Note that NPT is walked (for both paging structures and final guest + * addresses) using the address with the A20 bit set. + */ + if (in->ptw_idx == MMU_NESTED_IDX) { + CPUTLBEntryFull *full; + int flags, nested_page_size; + + flags = probe_access_full(env, paddr, 0, access_type, + MMU_NESTED_IDX, true, + &pte_trans.haddr, &full, 0); + if (unlikely(flags & TLB_INVALID_MASK)) { + *err = (TranslateFault){ + .error_code = env->error_code, + .cr2 = paddr, + .stage2 = S2_GPA, + }; + return false; + } + + /* Merge stage1 & stage2 protection bits. */ + prot &= full->prot; + + /* Re-verify resulting protection. */ + if ((prot & (1 << access_type)) == 0) { + goto do_fault_protect; + } + + /* Merge stage1 & stage2 addresses to final physical address. */ + nested_page_size = 1 << full->lg_page_size; + paddr = (full->phys_addr & ~(nested_page_size - 1)) + | (paddr & (nested_page_size - 1)); + + /* + * Use the larger of stage1 & stage2 page sizes, so that + * invalidation works. + */ + if (nested_page_size > page_size) { + page_size = nested_page_size; + } + } + + out->paddr = paddr & x86_get_a20_mask(env); + out->prot = prot; + out->page_size = page_size; + return true; + + do_fault_rsvd: + error_code = PG_ERROR_RSVD_MASK; + goto do_fault_cont; + do_fault_protect: + error_code = PG_ERROR_P_MASK; + goto do_fault_cont; + do_fault_pk_protect: + assert(access_type != MMU_INST_FETCH); + error_code = PG_ERROR_PK_MASK | PG_ERROR_P_MASK; + goto do_fault_cont; + do_fault: + error_code = 0; + do_fault_cont: + if (is_user) { + error_code |= PG_ERROR_U_MASK; + } + switch (access_type) { + case MMU_DATA_LOAD: + break; + case MMU_DATA_STORE: + error_code |= PG_ERROR_W_MASK; + break; + case MMU_INST_FETCH: + if (pg_mode & (PG_MODE_NXE | PG_MODE_SMEP)) { + error_code |= PG_ERROR_I_D_MASK; + } + break; + } + *err = (TranslateFault){ + .exception_index = EXCP0E_PAGE, + .error_code = error_code, + .cr2 = addr, + }; + return false; +} + +static G_NORETURN void raise_stage2(CPUX86State *env, TranslateFault *err, + uintptr_t retaddr) +{ + uint64_t exit_info_1 = err->error_code; + + switch (err->stage2) { + case S2_GPT: + exit_info_1 |= SVM_NPTEXIT_GPT; + break; + case S2_GPA: + exit_info_1 |= SVM_NPTEXIT_GPA; + break; + default: + g_assert_not_reached(); + } + + x86_stq_phys(env_cpu(env), + env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), + err->cr2); + cpu_vmexit(env, SVM_EXIT_NPF, exit_info_1, retaddr); +} + +static bool get_physical_address(CPUX86State *env, vaddr addr, + MMUAccessType access_type, int mmu_idx, + TranslateResult *out, TranslateFault *err, + uint64_t ra) +{ + TranslateParams in; + bool use_stage2 = env->hflags2 & HF2_NPT_MASK; + + in.addr = addr; + in.access_type = access_type; + + switch (mmu_idx) { + case MMU_PHYS_IDX: + break; + + case MMU_NESTED_IDX: + if (likely(use_stage2)) { + in.cr3 = env->nested_cr3; + in.pg_mode = env->nested_pg_mode; + in.mmu_idx = + env->nested_pg_mode & PG_MODE_LMA ? MMU_USER64_IDX : MMU_USER32_IDX; + in.ptw_idx = MMU_PHYS_IDX; + + if (!mmu_translate(env, &in, out, err, ra)) { + err->stage2 = S2_GPA; + return false; + } + return true; + } + break; + + default: + if (is_mmu_index_32(mmu_idx)) { + addr = (uint32_t)addr; + } + + if (likely(env->cr[0] & CR0_PG_MASK)) { + in.cr3 = env->cr[3]; + in.mmu_idx = mmu_idx; + in.ptw_idx = use_stage2 ? MMU_NESTED_IDX : MMU_PHYS_IDX; + in.pg_mode = get_pg_mode(env); + + if (in.pg_mode & PG_MODE_LMA) { + /* test virtual address sign extension */ + int shift = in.pg_mode & PG_MODE_LA57 ? 56 : 47; + int64_t sext = (int64_t)addr >> shift; + if (sext != 0 && sext != -1) { + *err = (TranslateFault){ + .exception_index = EXCP0D_GPF, + .cr2 = addr, + }; + return false; + } + } + return mmu_translate(env, &in, out, err, ra); + } + break; + } + + /* No translation needed. */ + out->paddr = addr & x86_get_a20_mask(env); + out->prot = PAGE_READ | PAGE_WRITE | PAGE_EXEC; + out->page_size = TARGET_PAGE_SIZE; + return true; +} + +bool x86_cpu_tlb_fill(CPUState *cs, vaddr addr, int size, + MMUAccessType access_type, int mmu_idx, + bool probe, uintptr_t retaddr) +{ + CPUX86State *env = cpu_env(cs); + TranslateResult out; + TranslateFault err; + + if (get_physical_address(env, addr, access_type, mmu_idx, &out, &err, + retaddr)) { + /* + * Even if 4MB pages, we map only one 4KB page in the cache to + * avoid filling it too fast. + */ + assert(out.prot & (1 << access_type)); + tlb_set_page_with_attrs(cs, addr & TARGET_PAGE_MASK, + out.paddr & TARGET_PAGE_MASK, + cpu_get_mem_attrs(env), + out.prot, mmu_idx, out.page_size); + return true; + } + + if (probe) { + /* This will be used if recursing for stage2 translation. */ + env->error_code = err.error_code; + return false; + } + + if (err.stage2 != S2_NONE) { + raise_stage2(env, &err, retaddr); + } + + if (env->intercept_exceptions & (1 << err.exception_index)) { + /* cr2 is not modified in case of exceptions */ + x86_stq_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, control.exit_info_2), + err.cr2); + } else { + env->cr[2] = err.cr2; + } + raise_exception_err_ra(env, err.exception_index, err.error_code, retaddr); +} + +G_NORETURN void x86_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, + MMUAccessType access_type, + int mmu_idx, uintptr_t retaddr) +{ + X86CPU *cpu = X86_CPU(cs); + handle_unaligned_access(&cpu->env, vaddr, access_type, retaddr); +} diff --git a/target/i386/tcg/sysemu/fpu_helper.c b/target/i386/tcg/sysemu/fpu_helper.c new file mode 100644 index 0000000000..e0305ba234 --- /dev/null +++ b/target/i386/tcg/sysemu/fpu_helper.c @@ -0,0 +1,63 @@ +/* + * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers (sysemu code) + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "hw/irq.h" + +static qemu_irq ferr_irq; + +void x86_register_ferr_irq(qemu_irq irq) +{ + ferr_irq = irq; +} + +void fpu_check_raise_ferr_irq(CPUX86State *env) +{ + if (ferr_irq && !(env->hflags2 & HF2_IGNNE_MASK)) { + bql_lock(); + qemu_irq_raise(ferr_irq); + bql_unlock(); + return; + } +} + +void cpu_clear_ignne(void) +{ + CPUX86State *env = &X86_CPU(first_cpu)->env; + env->hflags2 &= ~HF2_IGNNE_MASK; +} + +void cpu_set_ignne(void) +{ + CPUX86State *env = &X86_CPU(first_cpu)->env; + + assert(bql_locked()); + + env->hflags2 |= HF2_IGNNE_MASK; + /* + * We get here in response to a write to port F0h. The chipset should + * deassert FP_IRQ and FERR# instead should stay signaled until FPSW_SE is + * cleared, because FERR# and FP_IRQ are two separate pins on real + * hardware. However, we don't model FERR# as a qemu_irq, so we just + * do directly what the chipset would do, i.e. deassert FP_IRQ. + */ + qemu_irq_lower(ferr_irq); +} diff --git a/target/i386/tcg/sysemu/meson.build b/target/i386/tcg/sysemu/meson.build new file mode 100644 index 0000000000..f9ac254541 --- /dev/null +++ b/target/i386/tcg/sysemu/meson.build @@ -0,0 +1,10 @@ +i386_system_ss.add(when: ['CONFIG_TCG', 'CONFIG_SYSTEM_ONLY'], if_true: files( + 'tcg-cpu.c', + 'smm_helper.c', + 'excp_helper.c', + 'bpt_helper.c', + 'misc_helper.c', + 'fpu_helper.c', + 'svm_helper.c', + 'seg_helper.c', +)) diff --git a/target/i386/misc_helper.c b/target/i386/tcg/sysemu/misc_helper.c index 78f2020ef2..edb7c3d894 100644 --- a/target/i386/misc_helper.c +++ b/target/i386/tcg/sysemu/misc_helper.c @@ -1,12 +1,12 @@ /* - * x86 misc helpers + * x86 misc helpers - sysemu code * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -21,121 +21,61 @@ #include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" -#include "exec/exec-all.h" #include "exec/cpu_ldst.h" #include "exec/address-spaces.h" +#include "exec/exec-all.h" +#include "tcg/helper-tcg.h" +#include "hw/i386/apic.h" void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "outb: port=0x%04x, data=%02x\n", port, data); -#else address_space_stb(&address_space_io, port, data, cpu_get_mem_attrs(env), NULL); -#endif } target_ulong helper_inb(CPUX86State *env, uint32_t port) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "inb: port=0x%04x\n", port); - return 0; -#else return address_space_ldub(&address_space_io, port, cpu_get_mem_attrs(env), NULL); -#endif } void helper_outw(CPUX86State *env, uint32_t port, uint32_t data) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "outw: port=0x%04x, data=%04x\n", port, data); -#else address_space_stw(&address_space_io, port, data, cpu_get_mem_attrs(env), NULL); -#endif } target_ulong helper_inw(CPUX86State *env, uint32_t port) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "inw: port=0x%04x\n", port); - return 0; -#else return address_space_lduw(&address_space_io, port, cpu_get_mem_attrs(env), NULL); -#endif } void helper_outl(CPUX86State *env, uint32_t port, uint32_t data) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "outw: port=0x%04x, data=%08x\n", port, data); -#else address_space_stl(&address_space_io, port, data, cpu_get_mem_attrs(env), NULL); -#endif } target_ulong helper_inl(CPUX86State *env, uint32_t port) { -#ifdef CONFIG_USER_ONLY - fprintf(stderr, "inl: port=0x%04x\n", port); - return 0; -#else return address_space_ldl(&address_space_io, port, cpu_get_mem_attrs(env), NULL); -#endif -} - -void helper_into(CPUX86State *env, int next_eip_addend) -{ - int eflags; - - eflags = cpu_cc_compute_all(env, CC_OP); - if (eflags & CC_O) { - raise_interrupt(env, EXCP04_INTO, 1, 0, next_eip_addend); - } -} - -void helper_cpuid(CPUX86State *env) -{ - uint32_t eax, ebx, ecx, edx; - - cpu_svm_check_intercept_param(env, SVM_EXIT_CPUID, 0, GETPC()); - - cpu_x86_cpuid(env, (uint32_t)env->regs[R_EAX], (uint32_t)env->regs[R_ECX], - &eax, &ebx, &ecx, &edx); - env->regs[R_EAX] = eax; - env->regs[R_EBX] = ebx; - env->regs[R_ECX] = ecx; - env->regs[R_EDX] = edx; } -#if defined(CONFIG_USER_ONLY) -target_ulong helper_read_crN(CPUX86State *env, int reg) -{ - return 0; -} - -void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) -{ -} -#else target_ulong helper_read_crN(CPUX86State *env, int reg) { target_ulong val; - cpu_svm_check_intercept_param(env, SVM_EXIT_READ_CR0 + reg, 0, GETPC()); switch (reg) { default: val = env->cr[reg]; break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { - val = cpu_get_apic_tpr(x86_env_get_cpu(env)->apic_state); + val = cpu_get_apic_tpr(env_archcpu(env)->apic_state); } else { - val = env->v_tpr; + val = env->int_ctl & V_TPR_MASK; } break; } @@ -144,92 +84,64 @@ target_ulong helper_read_crN(CPUX86State *env, int reg) void helper_write_crN(CPUX86State *env, int reg, target_ulong t0) { - cpu_svm_check_intercept_param(env, SVM_EXIT_WRITE_CR0 + reg, 0, GETPC()); switch (reg) { case 0: + /* + * If we reach this point, the CR0 write intercept is disabled. + * But we could still exit if the hypervisor has requested the selective + * intercept for bits other than TS and MP + */ + if (cpu_svm_has_intercept(env, SVM_EXIT_CR0_SEL_WRITE) && + ((env->cr[0] ^ t0) & ~(CR0_TS_MASK | CR0_MP_MASK))) { + cpu_vmexit(env, SVM_EXIT_CR0_SEL_WRITE, 0, GETPC()); + } cpu_x86_update_cr0(env, t0); break; case 3: + if ((env->efer & MSR_EFER_LMA) && + (t0 & ((~0ULL) << env_archcpu(env)->phys_bits))) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if (!(env->efer & MSR_EFER_LMA)) { + t0 &= 0xffffffffUL; + } cpu_x86_update_cr3(env, t0); break; case 4: + if (t0 & cr4_reserved_bits(env)) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if (((t0 ^ env->cr[4]) & CR4_LA57_MASK) && + (env->hflags & HF_CS64_MASK)) { + raise_exception_ra(env, EXCP0D_GPF, GETPC()); + } cpu_x86_update_cr4(env, t0); break; case 8: if (!(env->hflags2 & HF2_VINTR_MASK)) { - qemu_mutex_lock_iothread(); - cpu_set_apic_tpr(x86_env_get_cpu(env)->apic_state, t0); - qemu_mutex_unlock_iothread(); + bql_lock(); + cpu_set_apic_tpr(env_archcpu(env)->apic_state, t0); + bql_unlock(); + } + env->int_ctl = (env->int_ctl & ~V_TPR_MASK) | (t0 & V_TPR_MASK); + + CPUState *cs = env_cpu(env); + if (ctl_has_irq(env)) { + cpu_interrupt(cs, CPU_INTERRUPT_VIRQ); + } else { + cpu_reset_interrupt(cs, CPU_INTERRUPT_VIRQ); } - env->v_tpr = t0 & 0x0f; break; default: env->cr[reg] = t0; break; } } -#endif - -void helper_lmsw(CPUX86State *env, target_ulong t0) -{ - /* only 4 lower bits of CR0 are modified. PE cannot be set to zero - if already set to one. */ - t0 = (env->cr[0] & ~0xe) | (t0 & 0xf); - helper_write_crN(env, 0, t0); -} -void helper_invlpg(CPUX86State *env, target_ulong addr) -{ - X86CPU *cpu = x86_env_get_cpu(env); - - cpu_svm_check_intercept_param(env, SVM_EXIT_INVLPG, 0, GETPC()); - tlb_flush_page(CPU(cpu), addr); -} - -void helper_rdtsc(CPUX86State *env) -{ - uint64_t val; - - if ((env->cr[4] & CR4_TSD_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } - cpu_svm_check_intercept_param(env, SVM_EXIT_RDTSC, 0, GETPC()); - - val = cpu_get_tsc(env) + env->tsc_offset; - env->regs[R_EAX] = (uint32_t)(val); - env->regs[R_EDX] = (uint32_t)(val >> 32); -} - -void helper_rdtscp(CPUX86State *env) -{ - helper_rdtsc(env); - env->regs[R_ECX] = (uint32_t)(env->tsc_aux); -} - -void helper_rdpmc(CPUX86State *env) -{ - if ((env->cr[4] & CR4_PCE_MASK) && ((env->hflags & HF_CPL_MASK) != 0)) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } - cpu_svm_check_intercept_param(env, SVM_EXIT_RDPMC, 0, GETPC()); - - /* currently unimplemented */ - qemu_log_mask(LOG_UNIMP, "x86: unimplemented rdpmc\n"); - raise_exception_err(env, EXCP06_ILLOP, 0); -} - -#if defined(CONFIG_USER_ONLY) -void helper_wrmsr(CPUX86State *env) -{ -} - -void helper_rdmsr(CPUX86State *env) -{ -} -#else void helper_wrmsr(CPUX86State *env) { uint64_t val; + CPUState *cs = env_cpu(env); cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 1, GETPC()); @@ -246,9 +158,19 @@ void helper_wrmsr(CPUX86State *env) case MSR_IA32_SYSENTER_EIP: env->sysenter_eip = val; break; - case MSR_IA32_APICBASE: - cpu_set_apic_base(x86_env_get_cpu(env)->apic_state, val); + case MSR_IA32_APICBASE: { + int ret; + + if (val & MSR_IA32_APICBASE_RESERVED) { + goto error; + } + + ret = cpu_set_apic_base(env_archcpu(env)->apic_state, val); + if (ret < 0) { + goto error; + } break; + } case MSR_EFER: { uint64_t update_mask; @@ -282,7 +204,17 @@ void helper_wrmsr(CPUX86State *env) case MSR_PAT: env->pat = val; break; + case MSR_IA32_PKRS: + if (val & 0xFFFFFFFF00000000ull) { + goto error; + } + env->pkrs = val; + tlb_flush(cs); + break; case MSR_VM_HSAVE_PA: + if (val & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + goto error; + } env->vm_hsave = val; break; #ifdef TARGET_X86_64 @@ -371,6 +303,19 @@ void helper_wrmsr(CPUX86State *env) env->msr_bndcfgs = val; cpu_sync_bndcs_hflags(env); break; + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_write(index, val); + bql_unlock(); + if (ret < 0) { + goto error; + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + @@ -385,10 +330,14 @@ void helper_wrmsr(CPUX86State *env) /* XXX: exception? */ break; } + return; +error: + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); } void helper_rdmsr(CPUX86State *env) { + X86CPU *x86_cpu = env_archcpu(env); uint64_t val; cpu_svm_check_intercept_param(env, SVM_EXIT_MSR, 0, GETPC()); @@ -404,7 +353,7 @@ void helper_rdmsr(CPUX86State *env) val = env->sysenter_eip; break; case MSR_IA32_APICBASE: - val = cpu_get_apic_base(x86_env_get_cpu(env)->apic_state); + val = cpu_get_apic_base(env_archcpu(env)->apic_state); break; case MSR_EFER: val = env->efer; @@ -415,6 +364,9 @@ void helper_rdmsr(CPUX86State *env) case MSR_PAT: val = env->pat; break; + case MSR_IA32_PKRS: + val = env->pkrs; + break; case MSR_VM_HSAVE_PA: val = env->vm_hsave; break; @@ -522,6 +474,27 @@ void helper_rdmsr(CPUX86State *env) case MSR_IA32_BNDCFGS: val = env->msr_bndcfgs; break; + case MSR_IA32_UCODE_REV: + val = x86_cpu->ucode_rev; + break; + case MSR_CORE_THREAD_COUNT: { + CPUState *cs = CPU(x86_cpu); + val = (cs->nr_threads * cs->nr_cores) | (cs->nr_cores << 16); + break; + } + case MSR_APIC_START ... MSR_APIC_END: { + int ret; + int index = (uint32_t)env->regs[R_ECX] - MSR_APIC_START; + + bql_lock(); + ret = apic_msr_read(index, &val); + bql_unlock(); + if (ret < 0) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + break; + } default: if ((uint32_t)env->regs[R_ECX] >= MSR_MC0_CTL && (uint32_t)env->regs[R_ECX] < MSR_MC0_CTL + @@ -537,21 +510,16 @@ void helper_rdmsr(CPUX86State *env) env->regs[R_EAX] = (uint32_t)(val); env->regs[R_EDX] = (uint32_t)(val >> 32); } -#endif -static void do_pause(X86CPU *cpu) +void helper_flush_page(CPUX86State *env, target_ulong addr) { - CPUState *cs = CPU(cpu); - - /* Just let another CPU run. */ - cs->exception_index = EXCP_INTERRUPT; - cpu_loop_exit(cs); + tlb_flush_page(env_cpu(env), addr); } -static void do_hlt(X86CPU *cpu) +static G_NORETURN +void do_hlt(CPUX86State *env) { - CPUState *cs = CPU(cpu); - CPUX86State *env = &cpu->env; + CPUState *cs = env_cpu(env); env->hflags &= ~HF_INHIBIT_IRQ_MASK; /* needed if sti is just before */ cs->halted = 1; @@ -559,14 +527,12 @@ static void do_hlt(X86CPU *cpu) cpu_loop_exit(cs); } -void helper_hlt(CPUX86State *env, int next_eip_addend) +G_NORETURN void helper_hlt(CPUX86State *env, int next_eip_addend) { - X86CPU *cpu = x86_env_get_cpu(env); - cpu_svm_check_intercept_param(env, SVM_EXIT_HLT, 0, GETPC()); env->eip += next_eip_addend; - do_hlt(cpu); + do_hlt(env); } void helper_monitor(CPUX86State *env, target_ulong ptr) @@ -578,10 +544,9 @@ void helper_monitor(CPUX86State *env, target_ulong ptr) cpu_svm_check_intercept_param(env, SVM_EXIT_MONITOR, 0, GETPC()); } -void helper_mwait(CPUX86State *env, int next_eip_addend) +G_NORETURN void helper_mwait(CPUX86State *env, int next_eip_addend) { - CPUState *cs; - X86CPU *cpu; + CPUState *cs = env_cpu(env); if ((uint32_t)env->regs[R_ECX] != 0) { raise_exception_ra(env, EXCP0D_GPF, GETPC()); @@ -589,57 +554,10 @@ void helper_mwait(CPUX86State *env, int next_eip_addend) cpu_svm_check_intercept_param(env, SVM_EXIT_MWAIT, 0, GETPC()); env->eip += next_eip_addend; - cpu = x86_env_get_cpu(env); - cs = CPU(cpu); /* XXX: not complete but not completely erroneous */ if (cs->cpu_index != 0 || CPU_NEXT(cs) != NULL) { - do_pause(cpu); + do_pause(env); } else { - do_hlt(cpu); + do_hlt(env); } } - -void helper_pause(CPUX86State *env, int next_eip_addend) -{ - X86CPU *cpu = x86_env_get_cpu(env); - - cpu_svm_check_intercept_param(env, SVM_EXIT_PAUSE, 0, GETPC()); - env->eip += next_eip_addend; - - do_pause(cpu); -} - -void helper_debug(CPUX86State *env) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - - cs->exception_index = EXCP_DEBUG; - cpu_loop_exit(cs); -} - -uint64_t helper_rdpkru(CPUX86State *env, uint32_t ecx) -{ - if ((env->cr[4] & CR4_PKE_MASK) == 0) { - raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); - } - if (ecx != 0) { - raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); - } - - return env->pkru; -} - -void helper_wrpkru(CPUX86State *env, uint32_t ecx, uint64_t val) -{ - CPUState *cs = CPU(x86_env_get_cpu(env)); - - if ((env->cr[4] & CR4_PKE_MASK) == 0) { - raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); - } - if (ecx != 0 || (val & 0xFFFFFFFF00000000ull)) { - raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); - } - - env->pkru = val; - tlb_flush(cs); -} diff --git a/target/i386/tcg/sysemu/seg_helper.c b/target/i386/tcg/sysemu/seg_helper.c new file mode 100644 index 0000000000..2db8083748 --- /dev/null +++ b/target/i386/tcg/sysemu/seg_helper.c @@ -0,0 +1,241 @@ +/* + * x86 segmentation related helpers: (sysemu-only code) + * TSS, interrupts, system calls, jumps and call/task gates, descriptors + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "qemu/log.h" +#include "qemu/main-loop.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/cpu_ldst.h" +#include "tcg/helper-tcg.h" +#include "../seg_helper.h" + +void helper_syscall(CPUX86State *env, int next_eip_addend) +{ + int selector; + + if (!(env->efer & MSR_EFER_SCE)) { + raise_exception_err_ra(env, EXCP06_ILLOP, 0, GETPC()); + } + selector = (env->star >> 32) & 0xffff; +#ifdef TARGET_X86_64 + if (env->hflags & HF_LMA_MASK) { + int code64; + + env->regs[R_ECX] = env->eip + next_eip_addend; + env->regs[11] = cpu_compute_eflags(env) & ~RF_MASK; + + code64 = env->hflags & HF_CS64_MASK; + + env->eflags &= ~(env->fmask | RF_MASK); + cpu_load_eflags(env, env->eflags, 0); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK | + DESC_L_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + if (code64) { + env->eip = env->lstar; + } else { + env->eip = env->cstar; + } + } else +#endif + { + env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend); + + env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); + cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_CS_MASK | DESC_R_MASK | DESC_A_MASK); + cpu_x86_load_seg_cache(env, R_SS, (selector + 8) & 0xfffc, + 0, 0xffffffff, + DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | + DESC_S_MASK | + DESC_W_MASK | DESC_A_MASK); + env->eip = (uint32_t)env->star; + } +} + +void handle_even_inj(CPUX86State *env, int intno, int is_int, + int error_code, int is_hw, int rm) +{ + CPUState *cs = env_cpu(env); + uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, + control.event_inj)); + + if (!(event_inj & SVM_EVTINJ_VALID)) { + int type; + + if (is_int) { + type = SVM_EVTINJ_TYPE_SOFT; + } else { + type = SVM_EVTINJ_TYPE_EXEPT; + } + event_inj = intno | type | SVM_EVTINJ_VALID; + if (!rm && exception_has_error_code(intno)) { + event_inj |= SVM_EVTINJ_VALID_ERR; + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, + control.event_inj_err), + error_code); + } + x86_stl_phys(cs, + env->vm_vmcb + offsetof(struct vmcb, control.event_inj), + event_inj); + } +} + +void x86_cpu_do_interrupt(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + if (cs->exception_index == EXCP_VMEXIT) { + assert(env->old_exception == -1); + do_vmexit(env); + } else { + do_interrupt_all(cpu, cs->exception_index, + env->exception_is_int, + env->error_code, + env->exception_next_eip, 0); + /* successfully delivered */ + env->old_exception = -1; + } +} + +void x86_cpu_exec_halt(CPUState *cpu) +{ + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + X86CPU *x86_cpu = X86_CPU(cpu); + + bql_lock(); + apic_poll_irq(x86_cpu->apic_state); + cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL); + bql_unlock(); + } +} + +bool x86_need_replay_interrupt(int interrupt_request) +{ + /* + * CPU_INTERRUPT_POLL is a virtual event which gets converted into a + * "real" interrupt event later. It does not need to be recorded for + * replay purposes. + */ + return !(interrupt_request & CPU_INTERRUPT_POLL); +} + +bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int intno; + + interrupt_request = x86_cpu_pending_interrupt(cs, interrupt_request); + if (!interrupt_request) { + return false; + } + + /* Don't process multiple interrupt requests in a single call. + * This is required to make icount-driven execution deterministic. + */ + switch (interrupt_request) { + case CPU_INTERRUPT_POLL: + cs->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(cpu->apic_state); + break; + case CPU_INTERRUPT_SIPI: + do_cpu_sipi(cpu); + break; + case CPU_INTERRUPT_SMI: + cpu_svm_check_intercept_param(env, SVM_EXIT_SMI, 0, 0); + cs->interrupt_request &= ~CPU_INTERRUPT_SMI; + do_smm_enter(cpu); + break; + case CPU_INTERRUPT_NMI: + cpu_svm_check_intercept_param(env, SVM_EXIT_NMI, 0, 0); + cs->interrupt_request &= ~CPU_INTERRUPT_NMI; + env->hflags2 |= HF2_NMI_MASK; + do_interrupt_x86_hardirq(env, EXCP02_NMI, 1); + break; + case CPU_INTERRUPT_MCE: + cs->interrupt_request &= ~CPU_INTERRUPT_MCE; + do_interrupt_x86_hardirq(env, EXCP12_MCHK, 0); + break; + case CPU_INTERRUPT_HARD: + cpu_svm_check_intercept_param(env, SVM_EXIT_INTR, 0, 0); + cs->interrupt_request &= ~(CPU_INTERRUPT_HARD | + CPU_INTERRUPT_VIRQ); + intno = cpu_get_pic_interrupt(env); + qemu_log_mask(CPU_LOG_INT, + "Servicing hardware INT=0x%02x\n", intno); + do_interrupt_x86_hardirq(env, intno, 1); + break; + case CPU_INTERRUPT_VIRQ: + cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0, 0); + intno = x86_ldl_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, control.int_vector)); + qemu_log_mask(CPU_LOG_INT, + "Servicing virtual hardware INT=0x%02x\n", intno); + do_interrupt_x86_hardirq(env, intno, 1); + cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + env->int_ctl &= ~V_IRQ_MASK; + break; + } + + /* Ensure that no TB jump will be modified as the program flow was changed. */ + return true; +} + +/* check if Port I/O is allowed in TSS */ +void helper_check_io(CPUX86State *env, uint32_t addr, uint32_t size) +{ + uintptr_t retaddr = GETPC(); + uint32_t io_offset, val, mask; + + /* TSS must be a valid 32 bit one */ + if (!(env->tr.flags & DESC_P_MASK) || + ((env->tr.flags >> DESC_TYPE_SHIFT) & 0xf) != 9 || + env->tr.limit < 103) { + goto fail; + } + io_offset = cpu_lduw_kernel_ra(env, env->tr.base + 0x66, retaddr); + io_offset += (addr >> 3); + /* Note: the check needs two bytes */ + if ((io_offset + 1) > env->tr.limit) { + goto fail; + } + val = cpu_lduw_kernel_ra(env, env->tr.base + io_offset, retaddr); + val >>= (addr & 7); + mask = (1 << size) - 1; + /* all bits must be zero to allow the I/O */ + if ((val & mask) != 0) { + fail: + raise_exception_err_ra(env, EXCP0D_GPF, 0, retaddr); + } +} diff --git a/target/i386/smm_helper.c b/target/i386/tcg/sysemu/smm_helper.c index c1c34a75db..a45b5651c3 100644 --- a/target/i386/smm_helper.c +++ b/target/i386/tcg/sysemu/smm_helper.c @@ -1,12 +1,12 @@ /* - * x86 SMM helpers + * x86 SMM helpers (sysemu-only) * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,24 +18,13 @@ */ #include "qemu/osdep.h" -#include "qemu/main-loop.h" #include "cpu.h" #include "exec/helper-proto.h" #include "exec/log.h" +#include "tcg/helper-tcg.h" -/* SMM support */ - -#if defined(CONFIG_USER_ONLY) - -void do_smm_enter(X86CPU *cpu) -{ -} - -void helper_rsm(CPUX86State *env) -{ -} -#else +/* SMM support */ #ifdef TARGET_X86_64 #define SMM_REVISION_ID 0x00020064 @@ -204,8 +193,8 @@ void do_smm_enter(X86CPU *cpu) void helper_rsm(CPUX86State *env) { - X86CPU *cpu = x86_env_get_cpu(env); - CPUState *cs = CPU(cpu); + X86CPU *cpu = env_archcpu(env); + CPUState *cs = env_cpu(env); target_ulong sm_state; int i, offset; uint32_t val; @@ -328,5 +317,3 @@ void helper_rsm(CPUX86State *env) qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n"); log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP); } - -#endif /* !CONFIG_USER_ONLY */ diff --git a/target/i386/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 9fd22a883b..5d6de2294f 100644 --- a/target/i386/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -1,12 +1,12 @@ /* - * x86 SVM helpers + * x86 SVM helpers (sysemu only) * * Copyright (c) 2003 Fabrice Bellard * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -18,117 +18,151 @@ */ #include "qemu/osdep.h" +#include "qemu/log.h" #include "cpu.h" #include "exec/helper-proto.h" #include "exec/exec-all.h" #include "exec/cpu_ldst.h" +#include "tcg/helper-tcg.h" /* Secure Virtual Machine helpers */ -#if defined(CONFIG_USER_ONLY) - -void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) +static void svm_save_seg(CPUX86State *env, int mmu_idx, hwaddr addr, + const SegmentCache *sc) { + cpu_stw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, selector), + sc->selector, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, base), + sc->base, mmu_idx, 0); + cpu_stl_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, limit), + sc->limit, mmu_idx, 0); + cpu_stw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, attrib), + ((sc->flags >> 8) & 0xff) + | ((sc->flags >> 12) & 0x0f00), + mmu_idx, 0); } -void helper_vmmcall(CPUX86State *env) +/* + * VMRUN and VMLOAD canonicalizes (i.e., sign-extend to bit 63) all base + * addresses in the segment registers that have been loaded. + */ +static inline void svm_canonicalization(CPUX86State *env, target_ulong *seg_base) { + uint16_t shift_amt = 64 - cpu_x86_virtual_addr_width(env); + *seg_base = ((((long) *seg_base) << shift_amt) >> shift_amt); } -void helper_vmload(CPUX86State *env, int aflag) +static void svm_load_seg(CPUX86State *env, int mmu_idx, hwaddr addr, + SegmentCache *sc) { -} + unsigned int flags; -void helper_vmsave(CPUX86State *env, int aflag) -{ -} + sc->selector = + cpu_lduw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, selector), + mmu_idx, 0); + sc->base = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, base), + mmu_idx, 0); + sc->limit = + cpu_ldl_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, limit), + mmu_idx, 0); + flags = + cpu_lduw_mmuidx_ra(env, addr + offsetof(struct vmcb_seg, attrib), + mmu_idx, 0); + sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); -void helper_stgi(CPUX86State *env) -{ + svm_canonicalization(env, &sc->base); } -void helper_clgi(CPUX86State *env) +static void svm_load_seg_cache(CPUX86State *env, int mmu_idx, + hwaddr addr, int seg_reg) { -} + SegmentCache sc; -void helper_skinit(CPUX86State *env) -{ + svm_load_seg(env, mmu_idx, addr, &sc); + cpu_x86_load_seg_cache(env, seg_reg, sc.selector, + sc.base, sc.limit, sc.flags); } -void helper_invlpga(CPUX86State *env, int aflag) +static inline bool is_efer_invalid_state (CPUX86State *env) { -} + if (!(env->efer & MSR_EFER_SVME)) { + return true; + } -void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1, - uintptr_t retaddr) -{ - assert(0); -} + if (env->efer & MSR_EFER_RESERVED) { + return true; + } -void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type, - uint64_t param) -{ -} + if ((env->efer & (MSR_EFER_LMA | MSR_EFER_LME)) && + !(env->features[FEAT_8000_0001_EDX] & CPUID_EXT2_LM)) { + return true; + } -void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type, - uint64_t param, uintptr_t retaddr) -{ -} + if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK) + && !(env->cr[4] & CR4_PAE_MASK)) { + return true; + } -void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param, - uint32_t next_eip_addend) -{ + if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK) + && !(env->cr[0] & CR0_PE_MASK)) { + return true; + } + + if ((env->efer & MSR_EFER_LME) && (env->cr[0] & CR0_PG_MASK) + && (env->cr[4] & CR4_PAE_MASK) + && (env->segs[R_CS].flags & DESC_L_MASK) + && (env->segs[R_CS].flags & DESC_B_MASK)) { + return true; + } + + return false; } -#else -static inline void svm_save_seg(CPUX86State *env, hwaddr addr, - const SegmentCache *sc) +static inline bool virtual_gif_enabled(CPUX86State *env) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - - x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, selector), - sc->selector); - x86_stq_phys(cs, addr + offsetof(struct vmcb_seg, base), - sc->base); - x86_stl_phys(cs, addr + offsetof(struct vmcb_seg, limit), - sc->limit); - x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, attrib), - ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00)); + if (likely(env->hflags & HF_GUEST_MASK)) { + return (env->features[FEAT_SVM] & CPUID_SVM_VGIF) + && (env->int_ctl & V_GIF_ENABLED_MASK); + } + return false; } -static inline void svm_load_seg(CPUX86State *env, hwaddr addr, - SegmentCache *sc) +static inline bool virtual_vm_load_save_enabled(CPUX86State *env, uint32_t exit_code, uintptr_t retaddr) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - unsigned int flags; + uint64_t lbr_ctl; - sc->selector = x86_lduw_phys(cs, - addr + offsetof(struct vmcb_seg, selector)); - sc->base = x86_ldq_phys(cs, addr + offsetof(struct vmcb_seg, base)); - sc->limit = x86_ldl_phys(cs, addr + offsetof(struct vmcb_seg, limit)); - flags = x86_lduw_phys(cs, addr + offsetof(struct vmcb_seg, attrib)); - sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); + if (likely(env->hflags & HF_GUEST_MASK)) { + if (likely(!(env->hflags2 & HF2_NPT_MASK)) || !(env->efer & MSR_EFER_LMA)) { + cpu_vmexit(env, exit_code, 0, retaddr); + } + + lbr_ctl = x86_ldl_phys(env_cpu(env), env->vm_vmcb + offsetof(struct vmcb, + control.lbr_ctl)); + return (env->features[FEAT_SVM] & CPUID_SVM_V_VMSAVE_VMLOAD) + && (lbr_ctl & V_VMLOAD_VMSAVE_ENABLED_MASK); + + } + + return false; } -static inline void svm_load_seg_cache(CPUX86State *env, hwaddr addr, - int seg_reg) +static inline bool virtual_gif_set(CPUX86State *env) { - SegmentCache sc1, *sc = &sc1; - - svm_load_seg(env, addr, sc); - cpu_x86_load_seg_cache(env, seg_reg, sc->selector, - sc->base, sc->limit, sc->flags); + return !virtual_gif_enabled(env) || (env->int_ctl & V_GIF_MASK); } void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + CPUState *cs = env_cpu(env); + X86CPU *cpu = env_archcpu(env); target_ulong addr; uint64_t nested_ctl; uint32_t event_inj; - uint32_t int_ctl; - - cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC()); + uint32_t asid; + uint64_t new_cr0; + uint64_t new_cr3; + uint64_t new_cr4; if (aflag == 2) { addr = env->regs[R_EAX]; @@ -136,6 +170,13 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) addr = (uint32_t)env->regs[R_EAX]; } + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + cpu_svm_check_intercept_param(env, SVM_EXIT_VMRUN, 0, GETPC()); + qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmrun! " TARGET_FMT_lx "\n", addr); env->vm_vmcb = addr; @@ -170,13 +211,17 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_hsave + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env)); - svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.es), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.es), &env->segs[R_ES]); - svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.cs), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.cs), &env->segs[R_CS]); - svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.ss), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.ss), &env->segs[R_SS]); - svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.ds), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rip), @@ -209,22 +254,41 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) nested_ctl = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_ctl)); + asid = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, + control.asid)); + + uint64_t msrpm_base_pa = x86_ldq_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, + control.msrpm_base_pa)); + uint64_t iopm_base_pa = x86_ldq_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, control.iopm_base_pa)); + + if ((msrpm_base_pa & ~0xfff) >= (1ull << cpu->phys_bits) - SVM_MSRPM_SIZE) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + + if ((iopm_base_pa & ~0xfff) >= (1ull << cpu->phys_bits) - SVM_IOPM_SIZE) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + + env->nested_pg_mode = 0; + + if (!cpu_svm_has_intercept(env, SVM_EXIT_VMRUN)) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if (asid == 0) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if (nested_ctl & SVM_NPT_ENABLED) { env->nested_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.nested_cr3)); env->hflags2 |= HF2_NPT_MASK; - env->nested_pg_mode = 0; - if (env->cr[4] & CR4_PAE_MASK) { - env->nested_pg_mode |= SVM_NPT_PAE; - } - if (env->hflags & HF_LMA_MASK) { - env->nested_pg_mode |= SVM_NPT_LMA; - } - if (env->efer & MSR_EFER_NXE) { - env->nested_pg_mode |= SVM_NPT_NXE; - } + env->nested_pg_mode = get_pg_mode(env) & PG_MODE_SVM_MASK; + + tlb_flush_by_mmuidx(cs, 1 << MMU_NESTED_IDX); } /* enable intercepts */ @@ -233,36 +297,35 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->tsc_offset = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset)); - env->gdt.base = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - save.gdtr.base)); - env->gdt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - save.gdtr.limit)); - - env->idt.base = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - save.idtr.base)); - env->idt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, - save.idtr.limit)); - + new_cr0 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr0)); + if (new_cr0 & SVM_CR0_RESERVED_MASK) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if ((new_cr0 & CR0_NW_MASK) && !(new_cr0 & CR0_CD_MASK)) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + new_cr3 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr3)); + if ((env->efer & MSR_EFER_LMA) && + (new_cr3 & ((~0ULL) << cpu->phys_bits))) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + new_cr4 = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4)); + if (new_cr4 & cr4_reserved_bits(env)) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } /* clear exit_info_2 so we behave like the real hardware */ x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0); - cpu_x86_update_cr0(env, x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, - save.cr0))); - cpu_x86_update_cr4(env, x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, - save.cr4))); - cpu_x86_update_cr3(env, x86_ldq_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, - save.cr3))); + cpu_x86_update_cr0(env, new_cr0); + cpu_x86_update_cr4(env, new_cr4); + cpu_x86_update_cr3(env, new_cr3); env->cr[2] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr2)); - int_ctl = x86_ldl_phys(cs, + env->int_ctl = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); - if (int_ctl & V_INTR_MASKING_MASK) { - env->v_tpr = int_ctl & V_TPR_MASK; + if (env->int_ctl & V_INTR_MASKING_MASK) { env->hflags2 |= HF2_VINTR_MASK; if (env->eflags & IF_MASK) { env->hflags2 |= HF2_HIF_MASK; @@ -278,14 +341,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.es), - R_ES); - svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.cs), - R_CS); - svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ss), - R_SS); - svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ds), - R_DS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.es), R_ES); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.cs), R_CS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.ss), R_SS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.ds), R_DS); + svm_load_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.idtr), &env->idt); + svm_load_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.gdtr), &env->gdt); env->eip = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rip)); @@ -299,7 +366,18 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->dr[6] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6)); - /* FIXME: guest state consistency checks */ +#ifdef TARGET_X86_64 + if (env->dr[6] & DR_RESERVED_MASK) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } + if (env->dr[7] & DR_RESERVED_MASK) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } +#endif + + if (is_efer_invalid_state(env)) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } switch (x86_ldub_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) { @@ -313,12 +391,14 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->hflags2 |= HF2_GIF_MASK; - if (int_ctl & V_IRQ_MASK) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - + if (ctl_has_irq(env)) { cs->interrupt_request |= CPU_INTERRUPT_VIRQ; } + if (virtual_gif_set(env)) { + env->hflags2 |= HF2_VGIF_MASK; + } + /* maybe we need to inject an event */ event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); @@ -350,6 +430,9 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) cpu_loop_exit(cs); break; case SVM_EVTINJ_TYPE_EXEPT: + if (vector == EXCP02_NMI || vector >= 31) { + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + } cs->exception_index = vector; env->error_code = event_inj_err; env->exception_is_int = 0; @@ -365,6 +448,9 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) qemu_log_mask(CPU_LOG_TB_IN_ASM, "SOFT"); cpu_loop_exit(cs); break; + default: + cpu_vmexit(env, SVM_EXIT_ERR, 0, GETPC()); + break; } qemu_log_mask(CPU_LOG_TB_IN_ASM, " %#x %#x\n", cs->exception_index, env->error_code); @@ -379,211 +465,236 @@ void helper_vmmcall(CPUX86State *env) void helper_vmload(CPUX86State *env, int aflag) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + int mmu_idx = MMU_PHYS_IDX; target_ulong addr; - cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC()); - if (aflag == 2) { addr = env->regs[R_EAX]; } else { addr = (uint32_t)env->regs[R_EAX]; } - qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx - "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", - addr, x86_ldq_phys(cs, addr + offsetof(struct vmcb, - save.fs.base)), - env->segs[R_FS].base); + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } - svm_load_seg_cache(env, addr + offsetof(struct vmcb, save.fs), R_FS); - svm_load_seg_cache(env, addr + offsetof(struct vmcb, save.gs), R_GS); - svm_load_seg(env, addr + offsetof(struct vmcb, save.tr), &env->tr); - svm_load_seg(env, addr + offsetof(struct vmcb, save.ldtr), &env->ldt); + cpu_svm_check_intercept_param(env, SVM_EXIT_VMLOAD, 0, GETPC()); + + if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMLOAD, GETPC())) { + mmu_idx = MMU_NESTED_IDX; + } + + svm_load_seg_cache(env, mmu_idx, + addr + offsetof(struct vmcb, save.fs), R_FS); + svm_load_seg_cache(env, mmu_idx, + addr + offsetof(struct vmcb, save.gs), R_GS); + svm_load_seg(env, mmu_idx, + addr + offsetof(struct vmcb, save.tr), &env->tr); + svm_load_seg(env, mmu_idx, + addr + offsetof(struct vmcb, save.ldtr), &env->ldt); #ifdef TARGET_X86_64 - env->kernelgsbase = x86_ldq_phys(cs, addr + offsetof(struct vmcb, - save.kernel_gs_base)); - env->lstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.lstar)); - env->cstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.cstar)); - env->fmask = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sfmask)); + env->kernelgsbase = + cpu_ldq_mmuidx_ra(env, + addr + offsetof(struct vmcb, save.kernel_gs_base), + mmu_idx, 0); + env->lstar = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.lstar), + mmu_idx, 0); + env->cstar = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.cstar), + mmu_idx, 0); + env->fmask = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sfmask), + mmu_idx, 0); + svm_canonicalization(env, &env->kernelgsbase); #endif - env->star = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.star)); - env->sysenter_cs = x86_ldq_phys(cs, - addr + offsetof(struct vmcb, save.sysenter_cs)); - env->sysenter_esp = x86_ldq_phys(cs, addr + offsetof(struct vmcb, - save.sysenter_esp)); - env->sysenter_eip = x86_ldq_phys(cs, addr + offsetof(struct vmcb, - save.sysenter_eip)); + env->star = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.star), + mmu_idx, 0); + env->sysenter_cs = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_cs), + mmu_idx, 0); + env->sysenter_esp = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_esp), + mmu_idx, 0); + env->sysenter_eip = + cpu_ldq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_eip), + mmu_idx, 0); } void helper_vmsave(CPUX86State *env, int aflag) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + int mmu_idx = MMU_PHYS_IDX; target_ulong addr; - cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC()); - if (aflag == 2) { addr = env->regs[R_EAX]; } else { addr = (uint32_t)env->regs[R_EAX]; } - qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx - "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", - addr, x86_ldq_phys(cs, - addr + offsetof(struct vmcb, save.fs.base)), - env->segs[R_FS].base); + /* Exceptions are checked before the intercept. */ + if (addr & (0xfff | ((~0ULL) << env_archcpu(env)->phys_bits))) { + raise_exception_err_ra(env, EXCP0D_GPF, 0, GETPC()); + } + + cpu_svm_check_intercept_param(env, SVM_EXIT_VMSAVE, 0, GETPC()); + + if (virtual_vm_load_save_enabled(env, SVM_EXIT_VMSAVE, GETPC())) { + mmu_idx = MMU_NESTED_IDX; + } - svm_save_seg(env, addr + offsetof(struct vmcb, save.fs), + svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.fs), &env->segs[R_FS]); - svm_save_seg(env, addr + offsetof(struct vmcb, save.gs), + svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.gs), &env->segs[R_GS]); - svm_save_seg(env, addr + offsetof(struct vmcb, save.tr), + svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.tr), &env->tr); - svm_save_seg(env, addr + offsetof(struct vmcb, save.ldtr), + svm_save_seg(env, mmu_idx, addr + offsetof(struct vmcb, save.ldtr), &env->ldt); #ifdef TARGET_X86_64 - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.kernel_gs_base), - env->kernelgsbase); - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.lstar), env->lstar); - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.cstar), env->cstar); - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sfmask), env->fmask); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.kernel_gs_base), + env->kernelgsbase, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.lstar), + env->lstar, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.cstar), + env->cstar, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sfmask), + env->fmask, mmu_idx, 0); #endif - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.star), env->star); - x86_stq_phys(cs, - addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs); - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_esp), - env->sysenter_esp); - x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_eip), - env->sysenter_eip); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.star), + env->star, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_cs), + env->sysenter_cs, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_esp), + env->sysenter_esp, mmu_idx, 0); + cpu_stq_mmuidx_ra(env, addr + offsetof(struct vmcb, save.sysenter_eip), + env->sysenter_eip, mmu_idx, 0); } void helper_stgi(CPUX86State *env) { cpu_svm_check_intercept_param(env, SVM_EXIT_STGI, 0, GETPC()); - env->hflags2 |= HF2_GIF_MASK; + + if (virtual_gif_enabled(env)) { + env->int_ctl |= V_GIF_MASK; + env->hflags2 |= HF2_VGIF_MASK; + } else { + env->hflags2 |= HF2_GIF_MASK; + } } void helper_clgi(CPUX86State *env) { cpu_svm_check_intercept_param(env, SVM_EXIT_CLGI, 0, GETPC()); - env->hflags2 &= ~HF2_GIF_MASK; -} -void helper_skinit(CPUX86State *env) -{ - cpu_svm_check_intercept_param(env, SVM_EXIT_SKINIT, 0, GETPC()); - /* XXX: not implemented */ - raise_exception(env, EXCP06_ILLOP); -} - -void helper_invlpga(CPUX86State *env, int aflag) -{ - X86CPU *cpu = x86_env_get_cpu(env); - target_ulong addr; - - cpu_svm_check_intercept_param(env, SVM_EXIT_INVLPGA, 0, GETPC()); - - if (aflag == 2) { - addr = env->regs[R_EAX]; + if (virtual_gif_enabled(env)) { + env->int_ctl &= ~V_GIF_MASK; + env->hflags2 &= ~HF2_VGIF_MASK; } else { - addr = (uint32_t)env->regs[R_EAX]; + env->hflags2 &= ~HF2_GIF_MASK; } - - /* XXX: could use the ASID to see if it is needed to do the - flush */ - tlb_flush_page(CPU(cpu), addr); } -void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type, - uint64_t param, uintptr_t retaddr) +bool cpu_svm_has_intercept(CPUX86State *env, uint32_t type) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - - if (likely(!(env->hflags & HF_GUEST_MASK))) { - return; - } switch (type) { case SVM_EXIT_READ_CR0 ... SVM_EXIT_READ_CR0 + 8: if (env->intercept_cr_read & (1 << (type - SVM_EXIT_READ_CR0))) { - cpu_vmexit(env, type, param, retaddr); + return true; } break; case SVM_EXIT_WRITE_CR0 ... SVM_EXIT_WRITE_CR0 + 8: if (env->intercept_cr_write & (1 << (type - SVM_EXIT_WRITE_CR0))) { - cpu_vmexit(env, type, param, retaddr); + return true; } break; case SVM_EXIT_READ_DR0 ... SVM_EXIT_READ_DR0 + 7: if (env->intercept_dr_read & (1 << (type - SVM_EXIT_READ_DR0))) { - cpu_vmexit(env, type, param, retaddr); + return true; } break; case SVM_EXIT_WRITE_DR0 ... SVM_EXIT_WRITE_DR0 + 7: if (env->intercept_dr_write & (1 << (type - SVM_EXIT_WRITE_DR0))) { - cpu_vmexit(env, type, param, retaddr); + return true; } break; case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 31: if (env->intercept_exceptions & (1 << (type - SVM_EXIT_EXCP_BASE))) { - cpu_vmexit(env, type, param, retaddr); - } - break; - case SVM_EXIT_MSR: - if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) { - /* FIXME: this should be read in at vmrun (faster this way?) */ - uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb + - offsetof(struct vmcb, - control.msrpm_base_pa)); - uint32_t t0, t1; - - switch ((uint32_t)env->regs[R_ECX]) { - case 0 ... 0x1fff: - t0 = (env->regs[R_ECX] * 2) % 8; - t1 = (env->regs[R_ECX] * 2) / 8; - break; - case 0xc0000000 ... 0xc0001fff: - t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2; - t1 = (t0 / 8); - t0 %= 8; - break; - case 0xc0010000 ... 0xc0011fff: - t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2; - t1 = (t0 / 8); - t0 %= 8; - break; - default: - cpu_vmexit(env, type, param, retaddr); - t0 = 0; - t1 = 0; - break; - } - if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) { - cpu_vmexit(env, type, param, retaddr); - } + return true; } break; default: if (env->intercept & (1ULL << (type - SVM_EXIT_INTR))) { - cpu_vmexit(env, type, param, retaddr); + return true; } break; } + return false; } -void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type, - uint64_t param) +void cpu_svm_check_intercept_param(CPUX86State *env, uint32_t type, + uint64_t param, uintptr_t retaddr) { - cpu_svm_check_intercept_param(env, type, param, GETPC()); + CPUState *cs = env_cpu(env); + + if (likely(!(env->hflags & HF_GUEST_MASK))) { + return; + } + + if (!cpu_svm_has_intercept(env, type)) { + return; + } + + if (type == SVM_EXIT_MSR) { + /* FIXME: this should be read in at vmrun (faster this way?) */ + uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb + + offsetof(struct vmcb, + control.msrpm_base_pa)); + uint32_t t0, t1; + + switch ((uint32_t)env->regs[R_ECX]) { + case 0 ... 0x1fff: + t0 = (env->regs[R_ECX] * 2) % 8; + t1 = (env->regs[R_ECX] * 2) / 8; + break; + case 0xc0000000 ... 0xc0001fff: + t0 = (8192 + env->regs[R_ECX] - 0xc0000000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + case 0xc0010000 ... 0xc0011fff: + t0 = (16384 + env->regs[R_ECX] - 0xc0010000) * 2; + t1 = (t0 / 8); + t0 %= 8; + break; + default: + cpu_vmexit(env, type, param, retaddr); + t0 = 0; + t1 = 0; + break; + } + if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) { + cpu_vmexit(env, type, param, retaddr); + } + return; + } + + cpu_vmexit(env, type, param, retaddr); +} + +void helper_svm_check_intercept(CPUX86State *env, uint32_t type) +{ + cpu_svm_check_intercept_param(env, type, 0, GETPC()); } void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param, uint32_t next_eip_addend) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + CPUState *cs = env_cpu(env); if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) { /* FIXME: this should be read in at vmrun (faster this way?) */ @@ -604,9 +715,9 @@ void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param, void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1, uintptr_t retaddr) { - CPUState *cs = CPU(x86_env_get_cpu(env)); + CPUState *cs = env_cpu(env); - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n", @@ -615,18 +726,21 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1, control.exit_info_2)), env->eip); - cs->exception_index = EXCP_VMEXIT + exit_code; - env->error_code = exit_info_1; + cs->exception_index = EXCP_VMEXIT; + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_code), + exit_code); + + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, + control.exit_info_1), exit_info_1), /* remove any pending exception */ env->old_exception = -1; cpu_loop_exit(cs); } -void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) +void do_vmexit(CPUX86State *env) { - CPUState *cs = CPU(x86_env_get_cpu(env)); - uint32_t int_ctl; + CPUState *cs = env_cpu(env); if (env->hflags & HF_INHIBIT_IRQ_MASK) { x86_stl_phys(cs, @@ -638,15 +752,20 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0); } env->hflags2 &= ~HF2_NPT_MASK; + tlb_flush_by_mmuidx(cs, 1 << MMU_NESTED_IDX); /* Save the VM state in the vmcb */ - svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.es), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.es), &env->segs[R_ES]); - svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.cs), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.cs), &env->segs[R_CS]); - svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.ss), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.ss), &env->segs[R_SS]); - svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.ds), + svm_save_seg(env, MMU_PHYS_IDX, + env->vm_vmcb + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), @@ -669,16 +788,8 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]); x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]); - - int_ctl = x86_ldl_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); - int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK); - int_ctl |= env->v_tpr & V_TPR_MASK; - if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { - int_ctl |= V_IRQ_MASK; - } x86_stl_phys(cs, - env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl); + env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), env->int_ctl); x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env)); @@ -701,6 +812,7 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->intercept = 0; env->intercept_exceptions = 0; cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; + env->int_ctl = 0; env->tsc_offset = 0; env->gdt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, @@ -734,14 +846,14 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | VM_MASK)); - svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es), - R_ES); - svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.cs), - R_CS); - svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.ss), - R_SS); - svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.ds), - R_DS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.es), R_ES); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.cs), R_CS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.ss), R_SS); + svm_load_seg_cache(env, MMU_PHYS_IDX, + env->vm_hsave + offsetof(struct vmcb, save.ds), R_DS); env->eip = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rip)); @@ -756,11 +868,6 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->vm_hsave + offsetof(struct vmcb, save.dr7)); /* other setups */ - x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_code), - exit_code); - x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), - exit_info_1); - x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, @@ -773,6 +880,7 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0); env->hflags2 &= ~HF2_GIF_MASK; + env->hflags2 &= ~HF2_VGIF_MASK; /* FIXME: Resets the current ASID register to zero (host ASID). */ /* Clears the V_IRQ and V_INTR_MASKING bits inside the processor. */ @@ -791,5 +899,3 @@ void do_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) host's code segment or non-canonical (in the case of long mode), a #GP fault is delivered inside the host. */ } - -#endif diff --git a/target/i386/tcg/sysemu/tcg-cpu.c b/target/i386/tcg/sysemu/tcg-cpu.c new file mode 100644 index 0000000000..c223c0fe9b --- /dev/null +++ b/target/i386/tcg/sysemu/tcg-cpu.c @@ -0,0 +1,83 @@ +/* + * i386 TCG cpu class initialization functions specific to sysemu + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "tcg/helper-tcg.h" + +#include "sysemu/sysemu.h" +#include "qemu/units.h" +#include "exec/address-spaces.h" + +#include "tcg/tcg-cpu.h" + +static void tcg_cpu_machine_done(Notifier *n, void *unused) +{ + X86CPU *cpu = container_of(n, X86CPU, machine_done); + MemoryRegion *smram = + (MemoryRegion *) object_resolve_path("/machine/smram", NULL); + + if (smram) { + cpu->smram = g_new(MemoryRegion, 1); + memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram", + smram, 0, 4 * GiB); + memory_region_set_enabled(cpu->smram, true); + memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, + cpu->smram, 1); + } +} + +bool tcg_cpu_realizefn(CPUState *cs, Error **errp) +{ + X86CPU *cpu = X86_CPU(cs); + + /* + * The realize order is important, since x86_cpu_realize() checks if + * nothing else has been set by the user (or by accelerators) in + * cpu->ucode_rev and cpu->phys_bits, and the memory regions + * initialized here are needed for the vcpu initialization. + * + * realize order: + * tcg_cpu -> host_cpu -> x86_cpu + */ + cpu->cpu_as_mem = g_new(MemoryRegion, 1); + cpu->cpu_as_root = g_new(MemoryRegion, 1); + + /* Outer container... */ + memory_region_init(cpu->cpu_as_root, OBJECT(cpu), "memory", ~0ull); + memory_region_set_enabled(cpu->cpu_as_root, true); + + /* + * ... with two regions inside: normal system memory with low + * priority, and... + */ + memory_region_init_alias(cpu->cpu_as_mem, OBJECT(cpu), "memory", + get_system_memory(), 0, ~0ull); + memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0); + memory_region_set_enabled(cpu->cpu_as_mem, true); + + cs->num_ases = 2; + cpu_address_space_init(cs, 0, "cpu-memory", cs->memory); + cpu_address_space_init(cs, 1, "cpu-smm", cpu->cpu_as_root); + + /* ... SMRAM with higher priority, linked from /machine/smram. */ + cpu->machine_done.notify = tcg_cpu_machine_done; + qemu_add_machine_init_done_notifier(&cpu->machine_done); + return true; +} diff --git a/target/i386/tcg/tcg-cpu.c b/target/i386/tcg/tcg-cpu.c new file mode 100644 index 0000000000..cca19cd40e --- /dev/null +++ b/target/i386/tcg/tcg-cpu.c @@ -0,0 +1,204 @@ +/* + * i386 TCG cpu class initialization + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "helper-tcg.h" +#include "qemu/accel.h" +#include "hw/core/accel-cpu.h" + +#include "tcg-cpu.h" + +/* Frob eflags into and out of the CPU temporary format. */ + +static void x86_cpu_exec_enter(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + env->df = 1 - (2 * ((env->eflags >> 10) & 1)); + CC_OP = CC_OP_EFLAGS; + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); +} + +static void x86_cpu_exec_exit(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + env->eflags = cpu_compute_eflags(env); +} + +static void x86_cpu_synchronize_from_tb(CPUState *cs, + const TranslationBlock *tb) +{ + /* The instruction pointer is always up to date with CF_PCREL. */ + if (!(tb_cflags(tb) & CF_PCREL)) { + CPUX86State *env = cpu_env(cs); + + if (tb->flags & HF_CS64_MASK) { + env->eip = tb->pc; + } else { + env->eip = (uint32_t)(tb->pc - tb->cs_base); + } + } +} + +static void x86_restore_state_to_opc(CPUState *cs, + const TranslationBlock *tb, + const uint64_t *data) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + int cc_op = data[1]; + uint64_t new_pc; + + if (tb_cflags(tb) & CF_PCREL) { + /* + * data[0] in PC-relative TBs is also a linear address, i.e. an address with + * the CS base added, because it is not guaranteed that EIP bits 12 and higher + * stay the same across the translation block. Add the CS base back before + * replacing the low bits, and subtract it below just like for !CF_PCREL. + */ + uint64_t pc = env->eip + tb->cs_base; + new_pc = (pc & TARGET_PAGE_MASK) | data[0]; + } else { + new_pc = data[0]; + } + if (tb->flags & HF_CS64_MASK) { + env->eip = new_pc; + } else { + env->eip = (uint32_t)(new_pc - tb->cs_base); + } + + if (cc_op != CC_OP_DYNAMIC) { + env->cc_op = cc_op; + } +} + +#ifndef CONFIG_USER_ONLY +static bool x86_debug_check_breakpoint(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + /* RF disables all architectural breakpoints. */ + return !(env->eflags & RF_MASK); +} +#endif + +#include "hw/core/tcg-cpu-ops.h" + +static const TCGCPUOps x86_tcg_ops = { + .initialize = tcg_x86_init, + .synchronize_from_tb = x86_cpu_synchronize_from_tb, + .restore_state_to_opc = x86_restore_state_to_opc, + .cpu_exec_enter = x86_cpu_exec_enter, + .cpu_exec_exit = x86_cpu_exec_exit, +#ifdef CONFIG_USER_ONLY + .fake_user_interrupt = x86_cpu_do_interrupt, + .record_sigsegv = x86_cpu_record_sigsegv, + .record_sigbus = x86_cpu_record_sigbus, +#else + .tlb_fill = x86_cpu_tlb_fill, + .do_interrupt = x86_cpu_do_interrupt, + .cpu_exec_halt = x86_cpu_exec_halt, + .cpu_exec_interrupt = x86_cpu_exec_interrupt, + .do_unaligned_access = x86_cpu_do_unaligned_access, + .debug_excp_handler = breakpoint_handler, + .debug_check_breakpoint = x86_debug_check_breakpoint, + .need_replay_interrupt = x86_need_replay_interrupt, +#endif /* !CONFIG_USER_ONLY */ +}; + +static void x86_tcg_cpu_init_ops(AccelCPUClass *accel_cpu, CPUClass *cc) +{ + /* for x86, all cpus use the same set of operations */ + cc->tcg_ops = &x86_tcg_ops; +} + +static void x86_tcg_cpu_class_init(CPUClass *cc) +{ + cc->init_accel_cpu = x86_tcg_cpu_init_ops; +} + +static void x86_tcg_cpu_xsave_init(void) +{ +#define XO(bit, field) \ + x86_ext_save_areas[bit].offset = offsetof(X86XSaveArea, field); + + XO(XSTATE_FP_BIT, legacy); + XO(XSTATE_SSE_BIT, legacy); + XO(XSTATE_YMM_BIT, avx_state); + XO(XSTATE_BNDREGS_BIT, bndreg_state); + XO(XSTATE_BNDCSR_BIT, bndcsr_state); + XO(XSTATE_OPMASK_BIT, opmask_state); + XO(XSTATE_ZMM_Hi256_BIT, zmm_hi256_state); + XO(XSTATE_Hi16_ZMM_BIT, hi16_zmm_state); + XO(XSTATE_PKRU_BIT, pkru_state); + +#undef XO +} + +/* + * TCG-specific defaults that override cpudef models when using TCG. + * Only for builtin_x86_defs models initialized with x86_register_cpudef_types. + */ +static PropValue x86_tcg_default_props[] = { + { "vme", "off" }, + { NULL, NULL }, +}; + +static void x86_tcg_cpu_instance_init(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + X86CPUClass *xcc = X86_CPU_GET_CLASS(cpu); + + if (xcc->model) { + /* Special cases not set in the X86CPUDefinition structs: */ + x86_cpu_apply_props(cpu, x86_tcg_default_props); + } + + x86_tcg_cpu_xsave_init(); +} + +static void x86_tcg_cpu_accel_class_init(ObjectClass *oc, void *data) +{ + AccelCPUClass *acc = ACCEL_CPU_CLASS(oc); + +#ifndef CONFIG_USER_ONLY + acc->cpu_target_realize = tcg_cpu_realizefn; +#endif /* CONFIG_USER_ONLY */ + + acc->cpu_class_init = x86_tcg_cpu_class_init; + acc->cpu_instance_init = x86_tcg_cpu_instance_init; +} +static const TypeInfo x86_tcg_cpu_accel_type_info = { + .name = ACCEL_CPU_NAME("tcg"), + + .parent = TYPE_ACCEL_CPU, + .class_init = x86_tcg_cpu_accel_class_init, + .abstract = true, +}; +static void x86_tcg_cpu_accel_register_types(void) +{ + type_register_static(&x86_tcg_cpu_accel_type_info); +} +type_init(x86_tcg_cpu_accel_register_types); diff --git a/target/i386/tcg/tcg-cpu.h b/target/i386/tcg/tcg-cpu.h new file mode 100644 index 0000000000..53a8494455 --- /dev/null +++ b/target/i386/tcg/tcg-cpu.h @@ -0,0 +1,81 @@ +/* + * i386 TCG cpu class initialization functions + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ +#ifndef TCG_CPU_H +#define TCG_CPU_H + +#define XSAVE_FCW_FSW_OFFSET 0x000 +#define XSAVE_FTW_FOP_OFFSET 0x004 +#define XSAVE_CWD_RIP_OFFSET 0x008 +#define XSAVE_CWD_RDP_OFFSET 0x010 +#define XSAVE_MXCSR_OFFSET 0x018 +#define XSAVE_ST_SPACE_OFFSET 0x020 +#define XSAVE_XMM_SPACE_OFFSET 0x0a0 +#define XSAVE_XSTATE_BV_OFFSET 0x200 +#define XSAVE_AVX_OFFSET 0x240 +#define XSAVE_BNDREG_OFFSET 0x3c0 +#define XSAVE_BNDCSR_OFFSET 0x400 +#define XSAVE_OPMASK_OFFSET 0x440 +#define XSAVE_ZMM_HI256_OFFSET 0x480 +#define XSAVE_HI16_ZMM_OFFSET 0x680 +#define XSAVE_PKRU_OFFSET 0xa80 + +typedef struct X86XSaveArea { + X86LegacyXSaveArea legacy; + X86XSaveHeader header; + + /* Extended save areas: */ + + /* AVX State: */ + XSaveAVX avx_state; + + /* Ensure that XSaveBNDREG is properly aligned. */ + uint8_t padding[XSAVE_BNDREG_OFFSET + - sizeof(X86LegacyXSaveArea) + - sizeof(X86XSaveHeader) + - sizeof(XSaveAVX)]; + + /* MPX State: */ + XSaveBNDREG bndreg_state; + XSaveBNDCSR bndcsr_state; + /* AVX-512 State: */ + XSaveOpmask opmask_state; + XSaveZMM_Hi256 zmm_hi256_state; + XSaveHi16_ZMM hi16_zmm_state; + /* PKRU State: */ + XSavePKRU pkru_state; +} X86XSaveArea; + +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fcw) != XSAVE_FCW_FSW_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.ftw) != XSAVE_FTW_FOP_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpip) != XSAVE_CWD_RIP_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpdp) != XSAVE_CWD_RDP_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.mxcsr) != XSAVE_MXCSR_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.fpregs) != XSAVE_ST_SPACE_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, legacy.xmm_regs) != XSAVE_XMM_SPACE_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, avx_state) != XSAVE_AVX_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndreg_state) != XSAVE_BNDREG_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, bndcsr_state) != XSAVE_BNDCSR_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, opmask_state) != XSAVE_OPMASK_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, zmm_hi256_state) != XSAVE_ZMM_HI256_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, hi16_zmm_state) != XSAVE_HI16_ZMM_OFFSET); +QEMU_BUILD_BUG_ON(offsetof(X86XSaveArea, pkru_state) != XSAVE_PKRU_OFFSET); + +bool tcg_cpu_realizefn(CPUState *cs, Error **errp); + +#endif /* TCG_CPU_H */ diff --git a/target/i386/tcg/tcg-stub.c b/target/i386/tcg/tcg-stub.c new file mode 100644 index 0000000000..8d45579ada --- /dev/null +++ b/target/i386/tcg/tcg-stub.c @@ -0,0 +1,25 @@ +/* + * x86 FPU, MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4/PNI helpers + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" + +void update_mxcsr_from_sse_status(CPUX86State *env) +{ +} diff --git a/target/i386/translate.c b/target/i386/tcg/translate.c index 83c1ebe491..76a42c679c 100644 --- a/target/i386/translate.c +++ b/target/i386/tcg/translate.c @@ -6,7 +6,7 @@ * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either - * version 2 of the License, or (at your option) any later version. + * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -22,32 +22,30 @@ #include "cpu.h" #include "disas/disas.h" #include "exec/exec-all.h" -#include "tcg-op.h" +#include "tcg/tcg-op.h" +#include "tcg/tcg-op-gvec.h" #include "exec/cpu_ldst.h" #include "exec/translator.h" +#include "fpu/softfloat.h" #include "exec/helper-proto.h" #include "exec/helper-gen.h" +#include "helper-tcg.h" -#include "trace-tcg.h" #include "exec/log.h" +#define HELPER_H "helper.h" +#include "exec/helper-info.c.inc" +#undef HELPER_H + + #define PREFIX_REPZ 0x01 #define PREFIX_REPNZ 0x02 #define PREFIX_LOCK 0x04 #define PREFIX_DATA 0x08 #define PREFIX_ADR 0x10 #define PREFIX_VEX 0x20 - -#ifdef TARGET_X86_64 -#define CODE64(s) ((s)->code64) -#define REX_X(s) ((s)->rex_x) -#define REX_B(s) ((s)->rex_b) -#else -#define CODE64(s) 0 -#define REX_X(s) 0 -#define REX_B(s) 0 -#endif +#define PREFIX_REX 0x40 #ifdef TARGET_X86_64 # define ctztl ctz64 @@ -73,58 +71,58 @@ /* global register indexes */ static TCGv cpu_cc_dst, cpu_cc_src, cpu_cc_src2; +static TCGv cpu_eip; static TCGv_i32 cpu_cc_op; static TCGv cpu_regs[CPU_NB_REGS]; static TCGv cpu_seg_base[6]; static TCGv_i64 cpu_bndl[4]; static TCGv_i64 cpu_bndu[4]; -#include "exec/gen-icount.h" - typedef struct DisasContext { DisasContextBase base; - /* current insn context */ - int override; /* -1 if no override */ - int prefix; - TCGMemOp aflag; - TCGMemOp dflag; - target_ulong pc_start; - target_ulong pc; /* pc = eip + cs_base */ - /* current block context */ - target_ulong cs_base; /* base of CS segment */ - int pe; /* protected mode */ - int code32; /* 32 bit code segment */ -#ifdef TARGET_X86_64 - int lma; /* long mode active */ - int code64; /* 64 bit code segment */ - int rex_x, rex_b; + target_ulong pc; /* pc = eip + cs_base */ + target_ulong cs_base; /* base of CS segment */ + target_ulong pc_save; + + MemOp aflag; + MemOp dflag; + + int8_t override; /* -1 if no override, else R_CS, R_DS, etc */ + uint8_t prefix; + + bool has_modrm; + uint8_t modrm; + +#ifndef CONFIG_USER_ONLY + uint8_t cpl; /* code priv level */ + uint8_t iopl; /* i/o priv level */ #endif - int vex_l; /* vex vector length */ - int vex_v; /* vex vvvv register, without 1's complement. */ - int ss32; /* 32 bit stack segment */ - CCOp cc_op; /* current CC operation */ - bool cc_op_dirty; + uint8_t vex_l; /* vex vector length */ + uint8_t vex_v; /* vex vvvv register, without 1's complement. */ + uint8_t popl_esp_hack; /* for correct popl with esp base handling */ + uint8_t rip_offset; /* only used in x86_64, but left for simplicity */ + #ifdef TARGET_X86_64 - bool x86_64_hregs; + uint8_t rex_r; + uint8_t rex_x; + uint8_t rex_b; #endif - int addseg; /* non zero if either DS/ES/SS have a non zero base */ - int f_st; /* currently unused */ - int vm86; /* vm86 mode */ - int cpl; - int iopl; - int tf; /* TF cpu flag */ - int jmp_opt; /* use direct block chaining for direct jumps */ - int repz_opt; /* optimize jumps within repz instructions */ + bool vex_w; /* used by AVX even on 32-bit processors */ + bool jmp_opt; /* use direct block chaining for direct jumps */ + bool repz_opt; /* optimize jumps within repz instructions */ + bool cc_op_dirty; + + CCOp cc_op; /* current CC operation */ int mem_index; /* select memory access functions */ - uint64_t flags; /* all execution flags */ - int popl_esp_hack; /* for correct popl with esp base handling */ - int rip_offset; /* only used in x86_64, but left for simplicity */ + uint32_t flags; /* all execution flags */ int cpuid_features; int cpuid_ext_features; int cpuid_ext2_features; int cpuid_ext3_features; int cpuid_7_0_ebx_features; + int cpuid_7_0_ecx_features; + int cpuid_7_1_eax_features; int cpuid_xsave_features; /* TCG local temps */ @@ -136,20 +134,113 @@ typedef struct DisasContext { /* TCG local register indexes (only used inside old micro ops) */ TCGv tmp0; TCGv tmp4; - TCGv_ptr ptr0; - TCGv_ptr ptr1; TCGv_i32 tmp2_i32; TCGv_i32 tmp3_i32; TCGv_i64 tmp1_i64; sigjmp_buf jmpbuf; + TCGOp *prev_insn_start; + TCGOp *prev_insn_end; } DisasContext; +#define DISAS_EOB_ONLY DISAS_TARGET_0 +#define DISAS_EOB_NEXT DISAS_TARGET_1 +#define DISAS_EOB_INHIBIT_IRQ DISAS_TARGET_2 +#define DISAS_JUMP DISAS_TARGET_3 + +/* The environment in which user-only runs is constrained. */ +#ifdef CONFIG_USER_ONLY +#define PE(S) true +#define CPL(S) 3 +#define IOPL(S) 0 +#define SVME(S) false +#define GUEST(S) false +#else +#define PE(S) (((S)->flags & HF_PE_MASK) != 0) +#define CPL(S) ((S)->cpl) +#define IOPL(S) ((S)->iopl) +#define SVME(S) (((S)->flags & HF_SVME_MASK) != 0) +#define GUEST(S) (((S)->flags & HF_GUEST_MASK) != 0) +#endif +#if defined(CONFIG_USER_ONLY) && defined(TARGET_X86_64) +#define VM86(S) false +#define CODE32(S) true +#define SS32(S) true +#define ADDSEG(S) false +#else +#define VM86(S) (((S)->flags & HF_VM_MASK) != 0) +#define CODE32(S) (((S)->flags & HF_CS32_MASK) != 0) +#define SS32(S) (((S)->flags & HF_SS32_MASK) != 0) +#define ADDSEG(S) (((S)->flags & HF_ADDSEG_MASK) != 0) +#endif +#if !defined(TARGET_X86_64) +#define CODE64(S) false +#elif defined(CONFIG_USER_ONLY) +#define CODE64(S) true +#else +#define CODE64(S) (((S)->flags & HF_CS64_MASK) != 0) +#endif +#if defined(CONFIG_USER_ONLY) || defined(TARGET_X86_64) +#define LMA(S) (((S)->flags & HF_LMA_MASK) != 0) +#else +#define LMA(S) false +#endif + +#ifdef TARGET_X86_64 +#define REX_PREFIX(S) (((S)->prefix & PREFIX_REX) != 0) +#define REX_W(S) ((S)->vex_w) +#define REX_R(S) ((S)->rex_r + 0) +#define REX_X(S) ((S)->rex_x + 0) +#define REX_B(S) ((S)->rex_b + 0) +#else +#define REX_PREFIX(S) false +#define REX_W(S) false +#define REX_R(S) 0 +#define REX_X(S) 0 +#define REX_B(S) 0 +#endif + +/* + * Many sysemu-only helpers are not reachable for user-only. + * Define stub generators here, so that we need not either sprinkle + * ifdefs through the translator, nor provide the helper function. + */ +#define STUB_HELPER(NAME, ...) \ + static inline void gen_helper_##NAME(__VA_ARGS__) \ + { qemu_build_not_reached(); } + +#ifdef CONFIG_USER_ONLY +STUB_HELPER(clgi, TCGv_env env) +STUB_HELPER(flush_page, TCGv_env env, TCGv addr) +STUB_HELPER(hlt, TCGv_env env, TCGv_i32 pc_ofs) +STUB_HELPER(inb, TCGv ret, TCGv_env env, TCGv_i32 port) +STUB_HELPER(inw, TCGv ret, TCGv_env env, TCGv_i32 port) +STUB_HELPER(inl, TCGv ret, TCGv_env env, TCGv_i32 port) +STUB_HELPER(monitor, TCGv_env env, TCGv addr) +STUB_HELPER(mwait, TCGv_env env, TCGv_i32 pc_ofs) +STUB_HELPER(outb, TCGv_env env, TCGv_i32 port, TCGv_i32 val) +STUB_HELPER(outw, TCGv_env env, TCGv_i32 port, TCGv_i32 val) +STUB_HELPER(outl, TCGv_env env, TCGv_i32 port, TCGv_i32 val) +STUB_HELPER(rdmsr, TCGv_env env) +STUB_HELPER(read_crN, TCGv ret, TCGv_env env, TCGv_i32 reg) +STUB_HELPER(get_dr, TCGv ret, TCGv_env env, TCGv_i32 reg) +STUB_HELPER(set_dr, TCGv_env env, TCGv_i32 reg, TCGv val) +STUB_HELPER(stgi, TCGv_env env) +STUB_HELPER(svm_check_intercept, TCGv_env env, TCGv_i32 type) +STUB_HELPER(vmload, TCGv_env env, TCGv_i32 aflag) +STUB_HELPER(vmmcall, TCGv_env env) +STUB_HELPER(vmrun, TCGv_env env, TCGv_i32 aflag, TCGv_i32 pc_ofs) +STUB_HELPER(vmsave, TCGv_env env, TCGv_i32 aflag) +STUB_HELPER(write_crN, TCGv_env env, TCGv_i32 reg, TCGv val) +STUB_HELPER(wrmsr, TCGv_env env) +#endif + static void gen_eob(DisasContext *s); -static void gen_jr(DisasContext *s, TCGv dest); -static void gen_jmp(DisasContext *s, target_ulong eip); -static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num); -static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d); +static void gen_jr(DisasContext *s); +static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num); +static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num); +static void gen_op(DisasContext *s1, int op, MemOp ot, int d); +static void gen_exception_gpf(DisasContext *s); /* i386 arith/logic operations */ enum { @@ -286,7 +377,7 @@ static void gen_update_cc_op(DisasContext *s) #endif /* !TARGET_X86_64 */ -#if defined(HOST_WORDS_BIGENDIAN) +#if HOST_BIG_ENDIAN #define REG_B_OFFSET (sizeof(target_ulong) - 1) #define REG_H_OFFSET (sizeof(target_ulong) - 2) #define REG_W_OFFSET (sizeof(target_ulong) - 2) @@ -308,19 +399,15 @@ static void gen_update_cc_op(DisasContext *s) */ static inline bool byte_reg_is_xH(DisasContext *s, int reg) { - if (reg < 4) { - return false; - } -#ifdef TARGET_X86_64 - if (reg >= 8 || s->x86_64_hregs) { + /* Any time the REX prefix is present, byte registers are uniform */ + if (reg < 4 || REX_PREFIX(s)) { return false; } -#endif return true; } /* Select the size of a push/pop operation. */ -static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot) +static inline MemOp mo_pushpop(DisasContext *s, MemOp ot) { if (CODE64(s)) { return ot == MO_16 ? MO_16 : MO_64; @@ -330,13 +417,13 @@ static inline TCGMemOp mo_pushpop(DisasContext *s, TCGMemOp ot) } /* Select the size of the stack pointer. */ -static inline TCGMemOp mo_stacksize(DisasContext *s) +static inline MemOp mo_stacksize(DisasContext *s) { - return CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16; + return CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; } /* Select only size 64 else 32. Used for SSE operand sizes. */ -static inline TCGMemOp mo_64_32(TCGMemOp ot) +static inline MemOp mo_64_32(MemOp ot) { #ifdef TARGET_X86_64 return ot == MO_64 ? MO_64 : MO_32; @@ -347,48 +434,67 @@ static inline TCGMemOp mo_64_32(TCGMemOp ot) /* Select size 8 if lsb of B is clear, else OT. Used for decoding byte vs word opcodes. */ -static inline TCGMemOp mo_b_d(int b, TCGMemOp ot) +static inline MemOp mo_b_d(int b, MemOp ot) { return b & 1 ? ot : MO_8; } /* Select size 8 if lsb of B is clear, else OT capped at 32. Used for decoding operand size of port opcodes. */ -static inline TCGMemOp mo_b_d32(int b, TCGMemOp ot) +static inline MemOp mo_b_d32(int b, MemOp ot) { return b & 1 ? (ot == MO_16 ? MO_16 : MO_32) : MO_8; } -static void gen_op_mov_reg_v(DisasContext *s, TCGMemOp ot, int reg, TCGv t0) +/* Compute the result of writing t0 to the OT-sized register REG. + * + * If DEST is NULL, store the result into the register and return the + * register's TCGv. + * + * If DEST is not NULL, store the result into DEST and return the + * register's TCGv. + */ +static TCGv gen_op_deposit_reg_v(DisasContext *s, MemOp ot, int reg, TCGv dest, TCGv t0) { switch(ot) { case MO_8: - if (!byte_reg_is_xH(s, reg)) { - tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 8); - } else { - tcg_gen_deposit_tl(cpu_regs[reg - 4], cpu_regs[reg - 4], t0, 8, 8); + if (byte_reg_is_xH(s, reg)) { + dest = dest ? dest : cpu_regs[reg - 4]; + tcg_gen_deposit_tl(dest, cpu_regs[reg - 4], t0, 8, 8); + return cpu_regs[reg - 4]; } + dest = dest ? dest : cpu_regs[reg]; + tcg_gen_deposit_tl(dest, cpu_regs[reg], t0, 0, 8); break; case MO_16: - tcg_gen_deposit_tl(cpu_regs[reg], cpu_regs[reg], t0, 0, 16); + dest = dest ? dest : cpu_regs[reg]; + tcg_gen_deposit_tl(dest, cpu_regs[reg], t0, 0, 16); break; case MO_32: /* For x86_64, this sets the higher half of register to zero. For i386, this is equivalent to a mov. */ - tcg_gen_ext32u_tl(cpu_regs[reg], t0); + dest = dest ? dest : cpu_regs[reg]; + tcg_gen_ext32u_tl(dest, t0); break; #ifdef TARGET_X86_64 case MO_64: - tcg_gen_mov_tl(cpu_regs[reg], t0); + dest = dest ? dest : cpu_regs[reg]; + tcg_gen_mov_tl(dest, t0); break; #endif default: - tcg_abort(); + g_assert_not_reached(); } + return cpu_regs[reg]; +} + +static void gen_op_mov_reg_v(DisasContext *s, MemOp ot, int reg, TCGv t0) +{ + gen_op_deposit_reg_v(s, ot, reg, NULL, t0); } static inline -void gen_op_mov_v_reg(DisasContext *s, TCGMemOp ot, TCGv t0, int reg) +void gen_op_mov_v_reg(DisasContext *s, MemOp ot, TCGv t0, int reg) { if (ot == MO_8 && byte_reg_is_xH(s, reg)) { tcg_gen_extract_tl(t0, cpu_regs[reg - 4], 8, 8); @@ -405,21 +511,22 @@ static void gen_add_A0_im(DisasContext *s, int val) } } -static inline void gen_op_jmp_v(TCGv dest) +static inline void gen_op_jmp_v(DisasContext *s, TCGv dest) { - tcg_gen_st_tl(dest, cpu_env, offsetof(CPUX86State, eip)); + tcg_gen_mov_tl(cpu_eip, dest); + s->pc_save = -1; } static inline -void gen_op_add_reg_im(DisasContext *s, TCGMemOp size, int reg, int32_t val) +void gen_op_add_reg_im(DisasContext *s, MemOp size, int reg, int32_t val) { tcg_gen_addi_tl(s->tmp0, cpu_regs[reg], val); gen_op_mov_reg_v(s, size, reg, s->tmp0); } -static inline void gen_op_add_reg_T0(DisasContext *s, TCGMemOp size, int reg) +static inline void gen_op_add_reg(DisasContext *s, MemOp size, int reg, TCGv val) { - tcg_gen_add_tl(s->tmp0, cpu_regs[reg], s->T0); + tcg_gen_add_tl(s->tmp0, cpu_regs[reg], val); gen_op_mov_reg_v(s, size, reg, s->tmp0); } @@ -442,43 +549,125 @@ static inline void gen_op_st_rm_T0_A0(DisasContext *s, int idx, int d) } } -static inline void gen_jmp_im(DisasContext *s, target_ulong pc) +static void gen_update_eip_cur(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->base.pc_next - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->base.pc_next); + } else { + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->base.pc_next - s->cs_base)); + } + s->pc_save = s->base.pc_next; +} + +static void gen_update_eip_next(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, s->pc - s->pc_save); + } else if (CODE64(s)) { + tcg_gen_movi_tl(cpu_eip, s->pc); + } else { + tcg_gen_movi_tl(cpu_eip, (uint32_t)(s->pc - s->cs_base)); + } + s->pc_save = s->pc; +} + +static int cur_insn_len(DisasContext *s) { - tcg_gen_movi_tl(s->tmp0, pc); - gen_op_jmp_v(s->tmp0); + return s->pc - s->base.pc_next; +} + +static TCGv_i32 cur_insn_len_i32(DisasContext *s) +{ + return tcg_constant_i32(cur_insn_len(s)); +} + +static TCGv_i32 eip_next_i32(DisasContext *s) +{ + assert(s->pc_save != -1); + /* + * This function has two users: lcall_real (always 16-bit mode), and + * iret_protected (16, 32, or 64-bit mode). IRET only uses the value + * when EFLAGS.NT is set, which is illegal in 64-bit mode, which is + * why passing a 32-bit value isn't broken. To avoid using this where + * we shouldn't, return -1 in 64-bit mode so that execution goes into + * the weeds quickly. + */ + if (CODE64(s)) { + return tcg_constant_i32(-1); + } + if (tb_cflags(s->base.tb) & CF_PCREL) { + TCGv_i32 ret = tcg_temp_new_i32(); + tcg_gen_trunc_tl_i32(ret, cpu_eip); + tcg_gen_addi_i32(ret, ret, s->pc - s->pc_save); + return ret; + } else { + return tcg_constant_i32(s->pc - s->cs_base); + } } -/* Compute SEG:REG into A0. SEG is selected from the override segment +static TCGv eip_next_tl(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + TCGv ret = tcg_temp_new(); + tcg_gen_addi_tl(ret, cpu_eip, s->pc - s->pc_save); + return ret; + } else if (CODE64(s)) { + return tcg_constant_tl(s->pc); + } else { + return tcg_constant_tl((uint32_t)(s->pc - s->cs_base)); + } +} + +static TCGv eip_cur_tl(DisasContext *s) +{ + assert(s->pc_save != -1); + if (tb_cflags(s->base.tb) & CF_PCREL) { + TCGv ret = tcg_temp_new(); + tcg_gen_addi_tl(ret, cpu_eip, s->base.pc_next - s->pc_save); + return ret; + } else if (CODE64(s)) { + return tcg_constant_tl(s->base.pc_next); + } else { + return tcg_constant_tl((uint32_t)(s->base.pc_next - s->cs_base)); + } +} + +/* Compute SEG:REG into DEST. SEG is selected from the override segment (OVR_SEG) and the default segment (DEF_SEG). OVR_SEG may be -1 to indicate no override. */ -static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, - int def_seg, int ovr_seg) +static void gen_lea_v_seg_dest(DisasContext *s, MemOp aflag, TCGv dest, TCGv a0, + int def_seg, int ovr_seg) { switch (aflag) { #ifdef TARGET_X86_64 case MO_64: if (ovr_seg < 0) { - tcg_gen_mov_tl(s->A0, a0); + tcg_gen_mov_tl(dest, a0); return; } break; #endif case MO_32: /* 32 bit address */ - if (ovr_seg < 0 && s->addseg) { + if (ovr_seg < 0 && ADDSEG(s)) { ovr_seg = def_seg; } if (ovr_seg < 0) { - tcg_gen_ext32u_tl(s->A0, a0); + tcg_gen_ext32u_tl(dest, a0); return; } break; case MO_16: /* 16 bit address */ - tcg_gen_ext16u_tl(s->A0, a0); - a0 = s->A0; + tcg_gen_ext16u_tl(dest, a0); + a0 = dest; if (ovr_seg < 0) { - if (s->addseg) { + if (ADDSEG(s)) { ovr_seg = def_seg; } else { return; @@ -486,24 +675,30 @@ static void gen_lea_v_seg(DisasContext *s, TCGMemOp aflag, TCGv a0, } break; default: - tcg_abort(); + g_assert_not_reached(); } if (ovr_seg >= 0) { TCGv seg = cpu_seg_base[ovr_seg]; if (aflag == MO_64) { - tcg_gen_add_tl(s->A0, a0, seg); + tcg_gen_add_tl(dest, a0, seg); } else if (CODE64(s)) { - tcg_gen_ext32u_tl(s->A0, a0); - tcg_gen_add_tl(s->A0, s->A0, seg); + tcg_gen_ext32u_tl(dest, a0); + tcg_gen_add_tl(dest, dest, seg); } else { - tcg_gen_add_tl(s->A0, a0, seg); - tcg_gen_ext32u_tl(s->A0, s->A0); + tcg_gen_add_tl(dest, a0, seg); + tcg_gen_ext32u_tl(dest, dest); } } } +static void gen_lea_v_seg(DisasContext *s, MemOp aflag, TCGv a0, + int def_seg, int ovr_seg) +{ + gen_lea_v_seg_dest(s, aflag, s->A0, a0, def_seg, ovr_seg); +} + static inline void gen_string_movl_A0_ESI(DisasContext *s) { gen_lea_v_seg(s, s->aflag, cpu_regs[R_ESI], R_DS, s->override); @@ -514,145 +709,132 @@ static inline void gen_string_movl_A0_EDI(DisasContext *s) gen_lea_v_seg(s, s->aflag, cpu_regs[R_EDI], R_ES, -1); } -static inline void gen_op_movl_T0_Dshift(DisasContext *s, TCGMemOp ot) +static inline TCGv gen_compute_Dshift(DisasContext *s, MemOp ot) { - tcg_gen_ld32s_tl(s->T0, cpu_env, offsetof(CPUX86State, df)); - tcg_gen_shli_tl(s->T0, s->T0, ot); + TCGv dshift = tcg_temp_new(); + tcg_gen_ld32s_tl(dshift, tcg_env, offsetof(CPUX86State, df)); + tcg_gen_shli_tl(dshift, dshift, ot); + return dshift; }; -static TCGv gen_ext_tl(TCGv dst, TCGv src, TCGMemOp size, bool sign) +static TCGv gen_ext_tl(TCGv dst, TCGv src, MemOp size, bool sign) { - switch (size) { - case MO_8: - if (sign) { - tcg_gen_ext8s_tl(dst, src); - } else { - tcg_gen_ext8u_tl(dst, src); - } - return dst; - case MO_16: - if (sign) { - tcg_gen_ext16s_tl(dst, src); - } else { - tcg_gen_ext16u_tl(dst, src); - } - return dst; -#ifdef TARGET_X86_64 - case MO_32: - if (sign) { - tcg_gen_ext32s_tl(dst, src); - } else { - tcg_gen_ext32u_tl(dst, src); - } - return dst; -#endif - default: + if (size == MO_TL) { return src; } + if (!dst) { + dst = tcg_temp_new(); + } + tcg_gen_ext_tl(dst, src, size | (sign ? MO_SIGN : 0)); + return dst; } -static void gen_extu(TCGMemOp ot, TCGv reg) +static void gen_extu(MemOp ot, TCGv reg) { gen_ext_tl(reg, reg, ot, false); } -static void gen_exts(TCGMemOp ot, TCGv reg) +static void gen_exts(MemOp ot, TCGv reg) { gen_ext_tl(reg, reg, ot, true); } -static inline -void gen_op_jnz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1) +static void gen_op_j_ecx(DisasContext *s, TCGCond cond, TCGLabel *label1) { - tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(size, s->tmp0); - tcg_gen_brcondi_tl(TCG_COND_NE, s->tmp0, 0, label1); + TCGv tmp = gen_ext_tl(NULL, cpu_regs[R_ECX], s->aflag, false); + + tcg_gen_brcondi_tl(cond, tmp, 0, label1); } -static inline -void gen_op_jz_ecx(DisasContext *s, TCGMemOp size, TCGLabel *label1) +static inline void gen_op_jz_ecx(DisasContext *s, TCGLabel *label1) +{ + gen_op_j_ecx(s, TCG_COND_EQ, label1); +} + +static inline void gen_op_jnz_ecx(DisasContext *s, TCGLabel *label1) { - tcg_gen_mov_tl(s->tmp0, cpu_regs[R_ECX]); - gen_extu(size, s->tmp0); - tcg_gen_brcondi_tl(TCG_COND_EQ, s->tmp0, 0, label1); + gen_op_j_ecx(s, TCG_COND_NE, label1); } -static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n) +static void gen_helper_in_func(MemOp ot, TCGv v, TCGv_i32 n) { switch (ot) { case MO_8: - gen_helper_inb(v, cpu_env, n); + gen_helper_inb(v, tcg_env, n); break; case MO_16: - gen_helper_inw(v, cpu_env, n); + gen_helper_inw(v, tcg_env, n); break; case MO_32: - gen_helper_inl(v, cpu_env, n); + gen_helper_inl(v, tcg_env, n); break; default: - tcg_abort(); + g_assert_not_reached(); } } -static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n) +static void gen_helper_out_func(MemOp ot, TCGv_i32 v, TCGv_i32 n) { switch (ot) { case MO_8: - gen_helper_outb(cpu_env, v, n); + gen_helper_outb(tcg_env, v, n); break; case MO_16: - gen_helper_outw(cpu_env, v, n); + gen_helper_outw(tcg_env, v, n); break; case MO_32: - gen_helper_outl(cpu_env, v, n); + gen_helper_outl(tcg_env, v, n); break; default: - tcg_abort(); + g_assert_not_reached(); } } -static void gen_check_io(DisasContext *s, TCGMemOp ot, target_ulong cur_eip, +/* + * Validate that access to [port, port + 1<<ot) is allowed. + * Raise #GP, or VMM exit if not. + */ +static bool gen_check_io(DisasContext *s, MemOp ot, TCGv_i32 port, uint32_t svm_flags) { - target_ulong next_eip; - - if (s->pe && (s->cpl > s->iopl || s->vm86)) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - switch (ot) { - case MO_8: - gen_helper_check_iob(cpu_env, s->tmp2_i32); - break; - case MO_16: - gen_helper_check_iow(cpu_env, s->tmp2_i32); - break; - case MO_32: - gen_helper_check_iol(cpu_env, s->tmp2_i32); - break; - default: - tcg_abort(); - } +#ifdef CONFIG_USER_ONLY + /* + * We do not implement the ioperm(2) syscall, so the TSS check + * will always fail. + */ + gen_exception_gpf(s); + return false; +#else + if (PE(s) && (CPL(s) > IOPL(s) || VM86(s))) { + gen_helper_check_io(tcg_env, port, tcg_constant_i32(1 << ot)); } - if(s->flags & HF_GUEST_MASK) { + if (GUEST(s)) { gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); - svm_flags |= (1 << (4 + ot)); - next_eip = s->pc - s->cs_base; - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_svm_check_io(cpu_env, s->tmp2_i32, - tcg_const_i32(svm_flags), - tcg_const_i32(next_eip - cur_eip)); + gen_update_eip_cur(s); + if (s->prefix & (PREFIX_REPZ | PREFIX_REPNZ)) { + svm_flags |= SVM_IOIO_REP_MASK; + } + svm_flags |= 1 << (SVM_IOIO_SIZE_SHIFT + ot); + gen_helper_svm_check_io(tcg_env, port, + tcg_constant_i32(svm_flags), + cur_insn_len_i32(s)); } + return true; +#endif } -static inline void gen_movs(DisasContext *s, TCGMemOp ot) +static void gen_movs(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_op_update1_cc(DisasContext *s) @@ -685,22 +867,22 @@ static void gen_op_update_neg_cc(DisasContext *s) tcg_gen_movi_tl(s->cc_srcT, 0); } -/* compute all eflags to cc_src */ -static void gen_compute_eflags(DisasContext *s) +/* compute all eflags to reg */ +static void gen_mov_eflags(DisasContext *s, TCGv reg) { - TCGv zero, dst, src1, src2; + TCGv dst, src1, src2; + TCGv_i32 cc_op; int live, dead; if (s->cc_op == CC_OP_EFLAGS) { + tcg_gen_mov_tl(reg, cpu_cc_src); return; } if (s->cc_op == CC_OP_CLR) { - tcg_gen_movi_tl(cpu_cc_src, CC_Z | CC_P); - set_cc_op(s, CC_OP_EFLAGS); + tcg_gen_movi_tl(reg, CC_Z | CC_P); return; } - zero = NULL; dst = cpu_cc_dst; src1 = cpu_cc_src; src2 = cpu_cc_src2; @@ -709,7 +891,7 @@ static void gen_compute_eflags(DisasContext *s) live = cc_op_live[s->cc_op] & ~USES_CC_SRCT; dead = live ^ (USES_CC_DST | USES_CC_SRC | USES_CC_SRC2); if (dead) { - zero = tcg_const_tl(0); + TCGv zero = tcg_constant_tl(0); if (dead & USES_CC_DST) { dst = zero; } @@ -721,13 +903,19 @@ static void gen_compute_eflags(DisasContext *s) } } - gen_update_cc_op(s); - gen_helper_cc_compute_all(cpu_cc_src, dst, src1, src2, cpu_cc_op); - set_cc_op(s, CC_OP_EFLAGS); - - if (dead) { - tcg_temp_free(zero); + if (s->cc_op != CC_OP_DYNAMIC) { + cc_op = tcg_constant_i32(s->cc_op); + } else { + cc_op = cpu_cc_op; } + gen_helper_cc_compute_all(reg, dst, src1, src2, cc_op); +} + +/* compute all eflags to cc_src */ +static void gen_compute_eflags(DisasContext *s) +{ + gen_mov_eflags(s, cpu_cc_src); + set_cc_op(s, CC_OP_EFLAGS); } typedef struct CCPrepare { @@ -840,7 +1028,7 @@ static CCPrepare gen_prepare_eflags_s(DisasContext *s, TCGv reg) return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; default: { - TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3; + MemOp size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, true); return (CCPrepare) { .cond = TCG_COND_LT, .reg = t0, .mask = -1 }; } @@ -858,6 +1046,9 @@ static CCPrepare gen_prepare_eflags_o(DisasContext *s, TCGv reg) case CC_OP_CLR: case CC_OP_POPCNT: return (CCPrepare) { .cond = TCG_COND_NEVER, .mask = -1 }; + case CC_OP_MULB ... CC_OP_MULQ: + return (CCPrepare) { .cond = TCG_COND_NE, + .reg = cpu_cc_src, .mask = -1 }; default: gen_compute_eflags(s); return (CCPrepare) { .cond = TCG_COND_NE, .reg = cpu_cc_src, @@ -885,7 +1076,7 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) .mask = -1 }; default: { - TCGMemOp size = (s->cc_op - CC_OP_ADDB) & 3; + MemOp size = (s->cc_op - CC_OP_ADDB) & 3; TCGv t0 = gen_ext_tl(reg, cpu_cc_dst, size, false); return (CCPrepare) { .cond = TCG_COND_EQ, .reg = t0, .mask = -1 }; } @@ -893,11 +1084,11 @@ static CCPrepare gen_prepare_eflags_z(DisasContext *s, TCGv reg) } /* perform a conditional store into register 'reg' according to jump opcode - value 'b'. In the fast case, T0 is guaranted not to be used. */ + value 'b'. In the fast case, T0 is guaranteed not to be used. */ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) { int inv, jcc_op, cond; - TCGMemOp size; + MemOp size; CCPrepare cc; TCGv t0; @@ -964,10 +1155,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S }; + .mask = CC_O }; break; default: case JCC_LE: @@ -975,10 +1165,9 @@ static CCPrepare gen_prepare_cc(DisasContext *s, int b, TCGv reg) if (reg == cpu_cc_src) { reg = s->tmp0; } - tcg_gen_shri_tl(reg, cpu_cc_src, 4); /* CC_O -> CC_S */ - tcg_gen_xor_tl(reg, reg, cpu_cc_src); + tcg_gen_addi_tl(reg, cpu_cc_src, CC_O - CC_S); cc = (CCPrepare) { .cond = TCG_COND_NE, .reg = reg, - .mask = CC_S | CC_Z }; + .mask = CC_O | CC_Z }; break; } break; @@ -1026,7 +1215,7 @@ static inline void gen_compute_eflags_c(DisasContext *s, TCGv reg) } /* generate a conditional jump to label 'l1' according to jump opcode - value 'b'. In the fast case, T0 is guaranted not to be used. */ + value 'b'. In the fast case, T0 is guaranteed not to be used. */ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) { CCPrepare cc = gen_prepare_cc(s, b, s->T0); @@ -1043,7 +1232,7 @@ static inline void gen_jcc1_noeob(DisasContext *s, int b, TCGLabel *l1) } /* Generate a conditional jump to label 'l1' according to jump opcode - value 'b'. In the fast case, T0 is guaranted not to be used. + value 'b'. In the fast case, T0 is guaranteed not to be used. A translation block must end soon. */ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) { @@ -1064,73 +1253,74 @@ static inline void gen_jcc1(DisasContext *s, int b, TCGLabel *l1) /* XXX: does not work with gdbstub "ice" single step - not a serious problem */ -static TCGLabel *gen_jz_ecx_string(DisasContext *s, target_ulong next_eip) +static TCGLabel *gen_jz_ecx_string(DisasContext *s) { TCGLabel *l1 = gen_new_label(); TCGLabel *l2 = gen_new_label(); - gen_op_jnz_ecx(s, s->aflag, l1); + gen_op_jnz_ecx(s, l1); gen_set_label(l2); - gen_jmp_tb(s, next_eip, 1); + gen_jmp_rel_csize(s, 0, 1); gen_set_label(l1); return l2; } -static inline void gen_stos(DisasContext *s, TCGMemOp ot) +static void gen_stos(DisasContext *s, MemOp ot) { - gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); gen_string_movl_A0_EDI(s); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } -static inline void gen_lods(DisasContext *s, TCGMemOp ot) +static void gen_lods(DisasContext *s, MemOp ot) { gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); gen_op_mov_reg_v(s, ot, R_EAX, s->T0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); } -static inline void gen_scas(DisasContext *s, TCGMemOp ot) +static void gen_scas(DisasContext *s, MemOp ot) { gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); - gen_op(s, OP_CMPL, ot, R_EAX); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + tcg_gen_mov_tl(cpu_cc_src, s->T1); + tcg_gen_mov_tl(s->cc_srcT, s->T0); + tcg_gen_sub_tl(cpu_cc_dst, s->T0, s->T1); + set_cc_op(s, CC_OP_SUBB + ot); + + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); } -static inline void gen_cmps(DisasContext *s, TCGMemOp ot) +static void gen_cmps(DisasContext *s, MemOp ot) { + TCGv dshift; + gen_string_movl_A0_EDI(s); gen_op_ld_v(s, ot, s->T1, s->A0); gen_string_movl_A0_ESI(s); gen_op(s, OP_CMPL, ot, OR_TMP0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + + dshift = gen_compute_Dshift(s, ot); + gen_op_add_reg(s, s->aflag, R_ESI, dshift); + gen_op_add_reg(s, s->aflag, R_EDI, dshift); } static void gen_bpt_io(DisasContext *s, TCGv_i32 t_port, int ot) { if (s->flags & HF_IOBPT_MASK) { - TCGv_i32 t_size = tcg_const_i32(1 << ot); - TCGv t_next = tcg_const_tl(s->pc - s->cs_base); - - gen_helper_bpt_io(cpu_env, t_port, t_size, t_next); - tcg_temp_free_i32(t_size); - tcg_temp_free(t_next); +#ifdef CONFIG_USER_ONLY + /* user-mode cpu should not be in IOBPT mode */ + g_assert_not_reached(); +#else + TCGv_i32 t_size = tcg_constant_i32(1 << ot); + TCGv t_next = eip_next_tl(s); + gen_helper_bpt_io(tcg_env, t_port, t_size, t_next); +#endif /* CONFIG_USER_ONLY */ } } - -static inline void gen_ins(DisasContext *s, TCGMemOp ot) +static void gen_ins(DisasContext *s, MemOp ot) { - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } gen_string_movl_A0_EDI(s); /* Note: we must do this dummy write first to be restartable in case of page fault. */ @@ -1140,19 +1330,12 @@ static inline void gen_ins(DisasContext *s, TCGMemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); gen_helper_in_func(ot, s->T0, s->tmp2_i32); gen_op_st_v(s, ot, s->T0, s->A0); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_EDI); + gen_op_add_reg(s, s->aflag, R_EDI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - } } -static inline void gen_outs(DisasContext *s, TCGMemOp ot) +static void gen_outs(DisasContext *s, MemOp ot) { - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } gen_string_movl_A0_ESI(s); gen_op_ld_v(s, ot, s->T0, s->A0); @@ -1160,50 +1343,53 @@ static inline void gen_outs(DisasContext *s, TCGMemOp ot) tcg_gen_andi_i32(s->tmp2_i32, s->tmp2_i32, 0xffff); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T0); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); - gen_op_movl_T0_Dshift(s, ot); - gen_op_add_reg_T0(s, s->aflag, R_ESI); + gen_op_add_reg(s, s->aflag, R_ESI, gen_compute_Dshift(s, ot)); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); +} + +/* Generate jumps to current or next instruction */ +static void gen_repz(DisasContext *s, MemOp ot, + void (*fn)(DisasContext *s, MemOp ot)) +{ + TCGLabel *l2; + gen_update_cc_op(s); + l2 = gen_jz_ecx_string(s); + fn(s, ot); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + /* + * A loop would cause two single step exceptions if ECX = 1 + * before rep string_insn + */ + if (s->repz_opt) { + gen_op_jz_ecx(s, l2); } + gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } -/* same method as Valgrind : we generate jumps to current or next - instruction */ -#define GEN_REPZ(op) \ -static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ - target_ulong cur_eip, target_ulong next_eip) \ -{ \ - TCGLabel *l2; \ - gen_update_cc_op(s); \ - l2 = gen_jz_ecx_string(s, next_eip); \ - gen_ ## op(s, ot); \ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ - /* a loop would cause two single step exceptions if ECX = 1 \ - before rep string_insn */ \ - if (s->repz_opt) \ - gen_op_jz_ecx(s, s->aflag, l2); \ - gen_jmp(s, cur_eip); \ -} - -#define GEN_REPZ2(op) \ -static inline void gen_repz_ ## op(DisasContext *s, TCGMemOp ot, \ - target_ulong cur_eip, \ - target_ulong next_eip, \ - int nz) \ -{ \ - TCGLabel *l2; \ - gen_update_cc_op(s); \ - l2 = gen_jz_ecx_string(s, next_eip); \ - gen_ ## op(s, ot); \ - gen_op_add_reg_im(s, s->aflag, R_ECX, -1); \ - gen_update_cc_op(s); \ - gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); \ - if (s->repz_opt) \ - gen_op_jz_ecx(s, s->aflag, l2); \ - gen_jmp(s, cur_eip); \ +#define GEN_REPZ(op) \ + static inline void gen_repz_ ## op(DisasContext *s, MemOp ot) \ + { gen_repz(s, ot, gen_##op); } + +static void gen_repz2(DisasContext *s, MemOp ot, int nz, + void (*fn)(DisasContext *s, MemOp ot)) +{ + TCGLabel *l2; + gen_update_cc_op(s); + l2 = gen_jz_ecx_string(s); + fn(s, ot); + gen_op_add_reg_im(s, s->aflag, R_ECX, -1); + gen_update_cc_op(s); + gen_jcc1(s, (JCC_Z << 1) | (nz ^ 1), l2); + if (s->repz_opt) { + gen_op_jz_ecx(s, l2); + } + gen_jmp_rel_csize(s, -cur_insn_len(s), 0); } +#define GEN_REPZ2(op) \ + static inline void gen_repz_ ## op(DisasContext *s, MemOp ot, int nz) \ + { gen_repz2(s, ot, nz, gen_##op); } + GEN_REPZ(movs) GEN_REPZ(stos) GEN_REPZ(lods) @@ -1216,28 +1402,28 @@ static void gen_helper_fp_arith_ST0_FT0(int op) { switch (op) { case 0: - gen_helper_fadd_ST0_FT0(cpu_env); + gen_helper_fadd_ST0_FT0(tcg_env); break; case 1: - gen_helper_fmul_ST0_FT0(cpu_env); + gen_helper_fmul_ST0_FT0(tcg_env); break; case 2: - gen_helper_fcom_ST0_FT0(cpu_env); + gen_helper_fcom_ST0_FT0(tcg_env); break; case 3: - gen_helper_fcom_ST0_FT0(cpu_env); + gen_helper_fcom_ST0_FT0(tcg_env); break; case 4: - gen_helper_fsub_ST0_FT0(cpu_env); + gen_helper_fsub_ST0_FT0(tcg_env); break; case 5: - gen_helper_fsubr_ST0_FT0(cpu_env); + gen_helper_fsubr_ST0_FT0(tcg_env); break; case 6: - gen_helper_fdiv_ST0_FT0(cpu_env); + gen_helper_fdiv_ST0_FT0(tcg_env); break; case 7: - gen_helper_fdivr_ST0_FT0(cpu_env); + gen_helper_fdivr_ST0_FT0(tcg_env); break; } } @@ -1245,32 +1431,89 @@ static void gen_helper_fp_arith_ST0_FT0(int op) /* NOTE the exception in "r" op ordering */ static void gen_helper_fp_arith_STN_ST0(int op, int opreg) { - TCGv_i32 tmp = tcg_const_i32(opreg); + TCGv_i32 tmp = tcg_constant_i32(opreg); switch (op) { case 0: - gen_helper_fadd_STN_ST0(cpu_env, tmp); + gen_helper_fadd_STN_ST0(tcg_env, tmp); break; case 1: - gen_helper_fmul_STN_ST0(cpu_env, tmp); + gen_helper_fmul_STN_ST0(tcg_env, tmp); break; case 4: - gen_helper_fsubr_STN_ST0(cpu_env, tmp); + gen_helper_fsubr_STN_ST0(tcg_env, tmp); break; case 5: - gen_helper_fsub_STN_ST0(cpu_env, tmp); + gen_helper_fsub_STN_ST0(tcg_env, tmp); break; case 6: - gen_helper_fdivr_STN_ST0(cpu_env, tmp); + gen_helper_fdivr_STN_ST0(tcg_env, tmp); break; case 7: - gen_helper_fdiv_STN_ST0(cpu_env, tmp); + gen_helper_fdiv_STN_ST0(tcg_env, tmp); break; } } +static void gen_exception(DisasContext *s, int trapno) +{ + gen_update_cc_op(s); + gen_update_eip_cur(s); + gen_helper_raise_exception(tcg_env, tcg_constant_i32(trapno)); + s->base.is_jmp = DISAS_NORETURN; +} + +/* Generate #UD for the current instruction. The assumption here is that + the instruction is known, but it isn't allowed in the current cpu mode. */ +static void gen_illegal_opcode(DisasContext *s) +{ + gen_exception(s, EXCP06_ILLOP); +} + +/* Generate #GP for the current instruction. */ +static void gen_exception_gpf(DisasContext *s) +{ + gen_exception(s, EXCP0D_GPF); +} + +/* Check for cpl == 0; if not, raise #GP and return false. */ +static bool check_cpl0(DisasContext *s) +{ + if (CPL(s) == 0) { + return true; + } + gen_exception_gpf(s); + return false; +} + +/* If vm86, check for iopl == 3; if not, raise #GP and return false. */ +static bool check_vm86_iopl(DisasContext *s) +{ + if (!VM86(s) || IOPL(s) == 3) { + return true; + } + gen_exception_gpf(s); + return false; +} + +/* Check for iopl allowing access; if not, raise #GP and return false. */ +static bool check_iopl(DisasContext *s) +{ + if (VM86(s) ? IOPL(s) == 3 : CPL(s) <= IOPL(s)) { + return true; + } + gen_exception_gpf(s); + return false; +} + /* if d == OR_TMP0, it means memory operand (address in A0) */ -static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) +static void gen_op(DisasContext *s1, int op, MemOp ot, int d) { + /* Invalid lock prefix when destination is not memory or OP_CMPL. */ + if ((d != OR_TMP0 || op == OP_CMPL) && s1->prefix & PREFIX_LOCK) { + gen_illegal_opcode(s1); + return; + } + if (d != OR_TMP0) { gen_op_mov_v_reg(s1, ot, s1->T0, d); } else if (!(s1->prefix & PREFIX_LOCK)) { @@ -1375,9 +1618,14 @@ static void gen_op(DisasContext *s1, int op, TCGMemOp ot, int d) } /* if d == OR_TMP0, it means memory operand (address in A0) */ -static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) +static void gen_inc(DisasContext *s1, MemOp ot, int d, int c) { if (s1->prefix & PREFIX_LOCK) { + if (d != OR_TMP0) { + /* Lock prefix when destination is not memory */ + gen_illegal_opcode(s1); + return; + } tcg_gen_movi_tl(s1->T0, c > 0 ? 1 : -1); tcg_gen_atomic_add_fetch_tl(s1->T0, s1->A0, s1->T0, s1->mem_index, ot | MO_LE); @@ -1396,7 +1644,7 @@ static void gen_inc(DisasContext *s1, TCGMemOp ot, int d, int c) set_cc_op(s1, (c > 0 ? CC_OP_INCB : CC_OP_DECB) + ot); } -static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, +static void gen_shift_flags(DisasContext *s, MemOp ot, TCGv result, TCGv shm1, TCGv count, bool is_right) { TCGv_i32 z32, s32, oldop; @@ -1405,7 +1653,7 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, /* Store the results into the CC variables. If we know that the variable must be dead, store unconditionally. Otherwise we'll need to not disrupt the current contents. */ - z_tl = tcg_const_tl(0); + z_tl = tcg_constant_tl(0); if (cc_op_live[s->cc_op] & USES_CC_DST) { tcg_gen_movcond_tl(TCG_COND_NE, cpu_cc_dst, count, z_tl, result, cpu_cc_dst); @@ -1418,7 +1666,6 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, } else { tcg_gen_mov_tl(cpu_cc_src, shm1); } - tcg_temp_free(z_tl); /* Get the two potential CC_OP values into temporaries. */ tcg_gen_movi_i32(s->tmp2_i32, (is_right ? CC_OP_SARB : CC_OP_SHLB) + ot); @@ -1430,18 +1677,16 @@ static void gen_shift_flags(DisasContext *s, TCGMemOp ot, TCGv result, } /* Conditionally store the CC_OP value. */ - z32 = tcg_const_i32(0); + z32 = tcg_constant_i32(0); s32 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(s32, count); tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, s32, z32, s->tmp2_i32, oldop); - tcg_temp_free_i32(z32); - tcg_temp_free_i32(s32); /* The CC_OP value is no longer predictable. */ set_cc_op(s, CC_OP_DYNAMIC); } -static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1, +static void gen_shift_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right, int is_arith) { target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f); @@ -1477,7 +1722,7 @@ static void gen_shift_rm_T1(DisasContext *s, TCGMemOp ot, int op1, gen_shift_flags(s, ot, s->T0, s->tmp0, s->T1, is_right); } -static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, +static void gen_shift_rm_im(DisasContext *s, MemOp ot, int op1, int op2, int is_right, int is_arith) { int mask = (ot == MO_64 ? 0x3f : 0x1f); @@ -1517,7 +1762,7 @@ static void gen_shift_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, } } -static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right) +static void gen_rot_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right) { target_ulong mask = (ot == MO_64 ? 0x3f : 0x1f); TCGv_i32 t0, t1; @@ -1588,21 +1833,19 @@ static void gen_rot_rm_T1(DisasContext *s, TCGMemOp ot, int op1, int is_right) is 0 we keep the CC_OP_EFLAGS setting so that only CC_SRC is live. Otherwise reuse CC_OP_ADCOX which have the C and O flags split out exactly as we computed above. */ - t0 = tcg_const_i32(0); + t0 = tcg_constant_i32(0); t1 = tcg_temp_new_i32(); tcg_gen_trunc_tl_i32(t1, s->T1); tcg_gen_movi_i32(s->tmp2_i32, CC_OP_ADCOX); tcg_gen_movi_i32(s->tmp3_i32, CC_OP_EFLAGS); tcg_gen_movcond_i32(TCG_COND_NE, cpu_cc_op, t1, t0, s->tmp2_i32, s->tmp3_i32); - tcg_temp_free_i32(t0); - tcg_temp_free_i32(t1); - /* The CC_OP value is no longer predictable. */ + /* The CC_OP value is no longer predictable. */ set_cc_op(s, CC_OP_DYNAMIC); } -static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, +static void gen_rot_rm_im(DisasContext *s, MemOp ot, int op1, int op2, int is_right) { int mask = (ot == MO_64 ? 0x3f : 0x1f); @@ -1680,7 +1923,7 @@ static void gen_rot_rm_im(DisasContext *s, TCGMemOp ot, int op1, int op2, } /* XXX: add faster immediate = 1 case */ -static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1, +static void gen_rotc_rm_T1(DisasContext *s, MemOp ot, int op1, int is_right) { gen_compute_eflags(s); @@ -1691,44 +1934,44 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1, gen_op_ld_v(s, ot, s->T0, s->A0); else gen_op_mov_v_reg(s, ot, s->T0, op1); - + if (is_right) { switch (ot) { case MO_8: - gen_helper_rcrb(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rcrb(s->T0, tcg_env, s->T0, s->T1); break; case MO_16: - gen_helper_rcrw(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rcrw(s->T0, tcg_env, s->T0, s->T1); break; case MO_32: - gen_helper_rcrl(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rcrl(s->T0, tcg_env, s->T0, s->T1); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_rcrq(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rcrq(s->T0, tcg_env, s->T0, s->T1); break; #endif default: - tcg_abort(); + g_assert_not_reached(); } } else { switch (ot) { case MO_8: - gen_helper_rclb(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rclb(s->T0, tcg_env, s->T0, s->T1); break; case MO_16: - gen_helper_rclw(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rclw(s->T0, tcg_env, s->T0, s->T1); break; case MO_32: - gen_helper_rcll(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rcll(s->T0, tcg_env, s->T0, s->T1); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_rclq(s->T0, cpu_env, s->T0, s->T1); + gen_helper_rclq(s->T0, tcg_env, s->T0, s->T1); break; #endif default: - tcg_abort(); + g_assert_not_reached(); } } /* store */ @@ -1736,7 +1979,7 @@ static void gen_rotc_rm_T1(DisasContext *s, TCGMemOp ot, int op1, } /* XXX: add faster immediate case */ -static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1, +static void gen_shiftd_rm_T1(DisasContext *s, MemOp ot, int op1, bool is_right, TCGv count_in) { target_ulong mask = (ot == MO_64 ? 63 : 31); @@ -1764,9 +2007,12 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1, } else { tcg_gen_deposit_tl(s->T1, s->T0, s->T1, 16, 16); } - /* FALLTHRU */ -#ifdef TARGET_X86_64 + /* + * If TARGET_X86_64 defined then fall through into MO_32 case, + * otherwise fall through default case. + */ case MO_32: +#ifdef TARGET_X86_64 /* Concatenate the two 32-bit values and use a 64-bit shift. */ tcg_gen_subi_tl(s->tmp0, count, 1); if (is_right) { @@ -1814,10 +2060,9 @@ static void gen_shiftd_rm_T1(DisasContext *s, TCGMemOp ot, int op1, gen_op_st_rm_T0_A0(s, ot, op1); gen_shift_flags(s, ot, s->T0, s->tmp0, count, is_right); - tcg_temp_free(count); } -static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s) +static void gen_shift(DisasContext *s1, int op, MemOp ot, int d, int s) { if (s != OR_TMP1) gen_op_mov_v_reg(s1, ot, s1->T1, s); @@ -1847,7 +2092,7 @@ static void gen_shift(DisasContext *s1, int op, TCGMemOp ot, int d, int s) } } -static void gen_shifti(DisasContext *s1, int op, TCGMemOp ot, int d, int c) +static void gen_shifti(DisasContext *s1, int op, MemOp ot, int d, int c) { switch(op) { case OP_ROL: @@ -1880,8 +2125,14 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) { uint64_t pc = s->pc; + /* This is a subsequent insn that crosses a page boundary. */ + if (s->base.num_insns > 1 && + !is_same_page(&s->base, s->pc + num_bytes - 1)) { + siglongjmp(s->jmpbuf, 2); + } + s->pc += num_bytes; - if (unlikely(s->pc - s->pc_start > X86_MAX_INSN_LENGTH)) { + if (unlikely(cur_insn_len(s) > X86_MAX_INSN_LENGTH)) { /* If the instruction's 16th byte is on a different page than the 1st, a * page fault on the second page wins over the general protection fault * caused by the instruction being too long. @@ -1900,28 +2151,28 @@ static uint64_t advance_pc(CPUX86State *env, DisasContext *s, int num_bytes) static inline uint8_t x86_ldub_code(CPUX86State *env, DisasContext *s) { - return cpu_ldub_code(env, advance_pc(env, s, 1)); + return translator_ldub(env, &s->base, advance_pc(env, s, 1)); } static inline int16_t x86_ldsw_code(CPUX86State *env, DisasContext *s) { - return cpu_ldsw_code(env, advance_pc(env, s, 2)); + return translator_lduw(env, &s->base, advance_pc(env, s, 2)); } static inline uint16_t x86_lduw_code(CPUX86State *env, DisasContext *s) { - return cpu_lduw_code(env, advance_pc(env, s, 2)); + return translator_lduw(env, &s->base, advance_pc(env, s, 2)); } static inline uint32_t x86_ldl_code(CPUX86State *env, DisasContext *s) { - return cpu_ldl_code(env, advance_pc(env, s, 4)); + return translator_ldl(env, &s->base, advance_pc(env, s, 4)); } #ifdef TARGET_X86_64 static inline uint64_t x86_ldq_code(CPUX86State *env, DisasContext *s) { - return cpu_ldq_code(env, advance_pc(env, s, 8)); + return translator_ldq(env, &s->base, advance_pc(env, s, 8)); } #endif @@ -2051,7 +2302,7 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s, break; default: - tcg_abort(); + g_assert_not_reached(); } done: @@ -2059,11 +2310,11 @@ static AddressParts gen_lea_modrm_0(CPUX86State *env, DisasContext *s, } /* Compute the address, with a minimum number of TCG ops. */ -static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) +static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a, bool is_vsib) { TCGv ea = NULL; - if (a.index >= 0) { + if (a.index >= 0 && !is_vsib) { if (a.scale == 0) { ea = cpu_regs[a.index]; } else { @@ -2078,7 +2329,12 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) ea = cpu_regs[a.base]; } if (!ea) { - tcg_gen_movi_tl(s->A0, a.disp); + if (tb_cflags(s->base.tb) & CF_PCREL && a.base == -2) { + /* With cpu_eip ~= pc_save, the expression is pc-relative. */ + tcg_gen_addi_tl(s->A0, cpu_eip, a.disp - s->pc_save); + } else { + tcg_gen_movi_tl(s->A0, a.disp); + } ea = s->A0; } else if (a.disp != 0) { tcg_gen_addi_tl(s->A0, ea, a.disp); @@ -2091,7 +2347,7 @@ static TCGv gen_lea_modrm_1(DisasContext *s, AddressParts a) static void gen_lea_modrm(CPUX86State *env, DisasContext *s, int modrm) { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a); + TCGv ea = gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); } @@ -2104,7 +2360,8 @@ static void gen_nop_modrm(CPUX86State *env, DisasContext *s, int modrm) static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, TCGCond cond, TCGv_i64 bndv) { - TCGv ea = gen_lea_modrm_1(s, gen_lea_modrm_0(env, s, modrm)); + AddressParts a = gen_lea_modrm_0(env, s, modrm); + TCGv ea = gen_lea_modrm_1(s, a, false); tcg_gen_extu_tl_i64(s->tmp1_i64, ea); if (!CODE64(s)) { @@ -2112,7 +2369,7 @@ static void gen_bndck(CPUX86State *env, DisasContext *s, int modrm, } tcg_gen_setcond_i64(cond, s->tmp1_i64, s->tmp1_i64, bndv); tcg_gen_extrl_i64_i32(s->tmp2_i32, s->tmp1_i64); - gen_helper_bndck(cpu_env, s->tmp2_i32); + gen_helper_bndck(tcg_env, s->tmp2_i32); } /* used for LEA and MOV AX, mem */ @@ -2124,7 +2381,7 @@ static void gen_add_A0_ds_seg(DisasContext *s) /* generate modrm memory load or store of 'reg'. TMP0 is used if reg == OR_TMP0 */ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, - TCGMemOp ot, int reg, int is_store) + MemOp ot, int reg, int is_store) { int mod, rm; @@ -2154,9 +2411,9 @@ static void gen_ldst_modrm(CPUX86State *env, DisasContext *s, int modrm, } } -static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot) +static target_ulong insn_get_addr(CPUX86State *env, DisasContext *s, MemOp ot) { - uint32_t ret; + target_ulong ret; switch (ot) { case MO_8: @@ -2166,168 +2423,147 @@ static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, TCGMemOp ot) ret = x86_lduw_code(env, s); break; case MO_32: + ret = x86_ldl_code(env, s); + break; #ifdef TARGET_X86_64 case MO_64: -#endif - ret = x86_ldl_code(env, s); + ret = x86_ldq_code(env, s); break; +#endif default: - tcg_abort(); + g_assert_not_reached(); } return ret; } -static inline int insn_const_size(TCGMemOp ot) +static inline uint32_t insn_get(CPUX86State *env, DisasContext *s, MemOp ot) { - if (ot <= MO_32) { - return 1 << ot; - } else { - return 4; + uint32_t ret; + + switch (ot) { + case MO_8: + ret = x86_ldub_code(env, s); + break; + case MO_16: + ret = x86_lduw_code(env, s); + break; + case MO_32: +#ifdef TARGET_X86_64 + case MO_64: +#endif + ret = x86_ldl_code(env, s); + break; + default: + g_assert_not_reached(); } + return ret; } -static inline bool use_goto_tb(DisasContext *s, target_ulong pc) +static target_long insn_get_signed(CPUX86State *env, DisasContext *s, MemOp ot) { -#ifndef CONFIG_USER_ONLY - return (pc & TARGET_PAGE_MASK) == (s->base.tb->pc & TARGET_PAGE_MASK) || - (pc & TARGET_PAGE_MASK) == (s->pc_start & TARGET_PAGE_MASK); -#else - return true; + target_long ret; + + switch (ot) { + case MO_8: + ret = (int8_t) x86_ldub_code(env, s); + break; + case MO_16: + ret = (int16_t) x86_lduw_code(env, s); + break; + case MO_32: + ret = (int32_t) x86_ldl_code(env, s); + break; +#ifdef TARGET_X86_64 + case MO_64: + ret = x86_ldq_code(env, s); + break; #endif + default: + g_assert_not_reached(); + } + return ret; } -static inline void gen_goto_tb(DisasContext *s, int tb_num, target_ulong eip) +static inline int insn_const_size(MemOp ot) { - target_ulong pc = s->cs_base + eip; - - if (use_goto_tb(s, pc)) { - /* jump to same page: we can use a direct jump */ - tcg_gen_goto_tb(tb_num); - gen_jmp_im(s, eip); - tcg_gen_exit_tb(s->base.tb, tb_num); - s->base.is_jmp = DISAS_NORETURN; + if (ot <= MO_32) { + return 1 << ot; } else { - /* jump to another page */ - gen_jmp_im(s, eip); - gen_jr(s, s->tmp0); + return 4; } } -static inline void gen_jcc(DisasContext *s, int b, - target_ulong val, target_ulong next_eip) +static void gen_jcc(DisasContext *s, int b, int diff) { - TCGLabel *l1, *l2; - - if (s->jmp_opt) { - l1 = gen_new_label(); - gen_jcc1(s, b, l1); - - gen_goto_tb(s, 0, next_eip); - - gen_set_label(l1); - gen_goto_tb(s, 1, val); - } else { - l1 = gen_new_label(); - l2 = gen_new_label(); - gen_jcc1(s, b, l1); - - gen_jmp_im(s, next_eip); - tcg_gen_br(l2); + TCGLabel *l1 = gen_new_label(); - gen_set_label(l1); - gen_jmp_im(s, val); - gen_set_label(l2); - gen_eob(s); - } + gen_jcc1(s, b, l1); + gen_jmp_rel_csize(s, 0, 1); + gen_set_label(l1); + gen_jmp_rel(s, s->dflag, diff, 0); } -static void gen_cmovcc1(CPUX86State *env, DisasContext *s, TCGMemOp ot, int b, - int modrm, int reg) +static void gen_cmovcc1(DisasContext *s, int b, TCGv dest, TCGv src) { - CCPrepare cc; - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + CCPrepare cc = gen_prepare_cc(s, b, s->T1); - cc = gen_prepare_cc(s, b, s->T1); if (cc.mask != -1) { TCGv t0 = tcg_temp_new(); tcg_gen_andi_tl(t0, cc.reg, cc.mask); cc.reg = t0; } if (!cc.use_reg2) { - cc.reg2 = tcg_const_tl(cc.imm); + cc.reg2 = tcg_constant_tl(cc.imm); } - tcg_gen_movcond_tl(cc.cond, s->T0, cc.reg, cc.reg2, - s->T0, cpu_regs[reg]); - gen_op_mov_reg_v(s, ot, reg, s->T0); - - if (cc.mask != -1) { - tcg_temp_free(cc.reg); - } - if (!cc.use_reg2) { - tcg_temp_free(cc.reg2); - } + tcg_gen_movcond_tl(cc.cond, dest, cc.reg, cc.reg2, src, dest); } -static inline void gen_op_movl_T0_seg(DisasContext *s, int seg_reg) +static inline void gen_op_movl_T0_seg(DisasContext *s, X86Seg seg_reg) { - tcg_gen_ld32u_tl(s->T0, cpu_env, + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State,segs[seg_reg].selector)); } -static inline void gen_op_movl_seg_T0_vm(DisasContext *s, int seg_reg) +static inline void gen_op_movl_seg_T0_vm(DisasContext *s, X86Seg seg_reg) { tcg_gen_ext16u_tl(s->T0, s->T0); - tcg_gen_st32_tl(s->T0, cpu_env, + tcg_gen_st32_tl(s->T0, tcg_env, offsetof(CPUX86State,segs[seg_reg].selector)); tcg_gen_shli_tl(cpu_seg_base[seg_reg], s->T0, 4); } /* move T0 to seg_reg and compute if the CPU state may change. Never call this function with seg_reg == R_CS */ -static void gen_movl_seg_T0(DisasContext *s, int seg_reg) +static void gen_movl_seg_T0(DisasContext *s, X86Seg seg_reg) { - if (s->pe && !s->vm86) { + if (PE(s) && !VM86(s)) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_load_seg(cpu_env, tcg_const_i32(seg_reg), s->tmp2_i32); + gen_helper_load_seg(tcg_env, tcg_constant_i32(seg_reg), s->tmp2_i32); /* abort translation because the addseg value may change or because ss32 may change. For R_SS, translation must always stop as a special handling must be done to disable hardware interrupts for the next instruction */ - if (seg_reg == R_SS || (s->code32 && seg_reg < R_FS)) { - s->base.is_jmp = DISAS_TOO_MANY; + if (seg_reg == R_SS) { + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; + } else if (CODE32(s) && seg_reg < R_FS) { + s->base.is_jmp = DISAS_EOB_NEXT; } } else { gen_op_movl_seg_T0_vm(s, seg_reg); if (seg_reg == R_SS) { - s->base.is_jmp = DISAS_TOO_MANY; + s->base.is_jmp = DISAS_EOB_INHIBIT_IRQ; } } } -static inline int svm_is_rep(int prefixes) -{ - return ((prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) ? 8 : 0); -} - -static inline void -gen_svm_check_intercept_param(DisasContext *s, target_ulong pc_start, - uint32_t type, uint64_t param) +static void gen_svm_check_intercept(DisasContext *s, uint32_t type) { /* no SVM activated; fast case */ - if (likely(!(s->flags & HF_GUEST_MASK))) + if (likely(!GUEST(s))) { return; - gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_svm_check_intercept_param(cpu_env, tcg_const_i32(type), - tcg_const_i64(param)); -} - -static inline void -gen_svm_check_intercept(DisasContext *s, target_ulong pc_start, uint64_t type) -{ - gen_svm_check_intercept_param(s, pc_start, type, 0); + } + gen_helper_svm_check_intercept(tcg_env, tcg_constant_i32(type)); } static inline void gen_stack_update(DisasContext *s, int addend) @@ -2338,16 +2574,16 @@ static inline void gen_stack_update(DisasContext *s, int addend) /* Generate a push. It depends on ss32, addseg and dflag. */ static void gen_push_v(DisasContext *s, TCGv val) { - TCGMemOp d_ot = mo_pushpop(s, s->dflag); - TCGMemOp a_ot = mo_stacksize(s); + MemOp d_ot = mo_pushpop(s, s->dflag); + MemOp a_ot = mo_stacksize(s); int size = 1 << d_ot; TCGv new_esp = s->A0; tcg_gen_subi_tl(s->A0, cpu_regs[R_ESP], size); if (!CODE64(s)) { - if (s->addseg) { - new_esp = s->tmp4; + if (ADDSEG(s)) { + new_esp = tcg_temp_new(); tcg_gen_mov_tl(new_esp, s->A0); } gen_lea_v_seg(s, a_ot, s->A0, R_SS, -1); @@ -2358,30 +2594,30 @@ static void gen_push_v(DisasContext *s, TCGv val) } /* two step pop is necessary for precise exceptions */ -static TCGMemOp gen_pop_T0(DisasContext *s) +static MemOp gen_pop_T0(DisasContext *s) { - TCGMemOp d_ot = mo_pushpop(s, s->dflag); + MemOp d_ot = mo_pushpop(s, s->dflag); - gen_lea_v_seg(s, mo_stacksize(s), cpu_regs[R_ESP], R_SS, -1); - gen_op_ld_v(s, d_ot, s->T0, s->A0); + gen_lea_v_seg_dest(s, mo_stacksize(s), s->T0, cpu_regs[R_ESP], R_SS, -1); + gen_op_ld_v(s, d_ot, s->T0, s->T0); return d_ot; } -static inline void gen_pop_update(DisasContext *s, TCGMemOp ot) +static inline void gen_pop_update(DisasContext *s, MemOp ot) { gen_stack_update(s, 1 << ot); } static inline void gen_stack_A0(DisasContext *s) { - gen_lea_v_seg(s, s->ss32 ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1); + gen_lea_v_seg(s, SS32(s) ? MO_32 : MO_16, cpu_regs[R_ESP], R_SS, -1); } static void gen_pusha(DisasContext *s) { - TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16; - TCGMemOp d_ot = s->dflag; + MemOp s_ot = SS32(s) ? MO_32 : MO_16; + MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2396,8 +2632,8 @@ static void gen_pusha(DisasContext *s) static void gen_popa(DisasContext *s) { - TCGMemOp s_ot = s->ss32 ? MO_32 : MO_16; - TCGMemOp d_ot = s->dflag; + MemOp s_ot = SS32(s) ? MO_32 : MO_16; + MemOp d_ot = s->dflag; int size = 1 << d_ot; int i; @@ -2417,8 +2653,8 @@ static void gen_popa(DisasContext *s) static void gen_enter(DisasContext *s, int esp_addend, int level) { - TCGMemOp d_ot = mo_pushpop(s, s->dflag); - TCGMemOp a_ot = CODE64(s) ? MO_64 : s->ss32 ? MO_32 : MO_16; + MemOp d_ot = mo_pushpop(s, s->dflag); + MemOp a_ot = CODE64(s) ? MO_64 : SS32(s) ? MO_32 : MO_16; int size = 1 << d_ot; /* Push BP; compute FrameTemp into T1. */ @@ -2457,8 +2693,8 @@ static void gen_enter(DisasContext *s, int esp_addend, int level) static void gen_leave(DisasContext *s) { - TCGMemOp d_ot = mo_pushpop(s, s->dflag); - TCGMemOp a_ot = mo_stacksize(s); + MemOp d_ot = mo_pushpop(s, s->dflag); + MemOp a_ot = mo_stacksize(s); gen_lea_v_seg(s, a_ot, cpu_regs[R_EBP], R_SS, -1); gen_op_ld_v(s, d_ot, s->T0, s->A0); @@ -2469,21 +2705,6 @@ static void gen_leave(DisasContext *s) gen_op_mov_reg_v(s, a_ot, R_ESP, s->T1); } -static void gen_exception(DisasContext *s, int trapno, target_ulong cur_eip) -{ - gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); - gen_helper_raise_exception(cpu_env, tcg_const_i32(trapno)); - s->base.is_jmp = DISAS_NORETURN; -} - -/* Generate #UD for the current instruction. The assumption here is that - the instruction is known, but it isn't allowed in the current cpu mode. */ -static void gen_illegal_opcode(DisasContext *s) -{ - gen_exception(s, EXCP06_ILLOP, s->pc_start - s->cs_base); -} - /* Similarly, except that the assumption here is that we don't decode the instruction at all -- either a missing opcode, an unimplemented feature, or just a bogus instruction stream. */ @@ -2492,34 +2713,28 @@ static void gen_unknown_opcode(CPUX86State *env, DisasContext *s) gen_illegal_opcode(s); if (qemu_loglevel_mask(LOG_UNIMP)) { - target_ulong pc = s->pc_start, end = s->pc; - qemu_log_lock(); - qemu_log("ILLOPC: " TARGET_FMT_lx ":", pc); - for (; pc < end; ++pc) { - qemu_log(" %02x", cpu_ldub_code(env, pc)); - } - qemu_log("\n"); - qemu_log_unlock(); + FILE *logfile = qemu_log_trylock(); + if (logfile) { + target_ulong pc = s->base.pc_next, end = s->pc; + + fprintf(logfile, "ILLOPC: " TARGET_FMT_lx ":", pc); + for (; pc < end; ++pc) { + fprintf(logfile, " %02x", cpu_ldub_code(env, pc)); + } + fprintf(logfile, "\n"); + qemu_log_unlock(logfile); + } } } /* an interrupt is different from an exception because of the privilege checks */ -static void gen_interrupt(DisasContext *s, int intno, - target_ulong cur_eip, target_ulong next_eip) -{ - gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); - gen_helper_raise_interrupt(cpu_env, tcg_const_i32(intno), - tcg_const_i32(next_eip - cur_eip)); - s->base.is_jmp = DISAS_NORETURN; -} - -static void gen_debug(DisasContext *s, target_ulong cur_eip) +static void gen_interrupt(DisasContext *s, int intno) { gen_update_cc_op(s); - gen_jmp_im(s, cur_eip); - gen_helper_debug(cpu_env); + gen_update_eip_cur(s); + gen_helper_raise_interrupt(tcg_env, tcg_constant_i32(intno), + cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } @@ -2527,10 +2742,9 @@ static void gen_set_hflag(DisasContext *s, uint32_t mask) { if ((s->flags & mask) == 0) { TCGv_i32 t = tcg_temp_new_i32(); - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags)); + tcg_gen_ld_i32(t, tcg_env, offsetof(CPUX86State, hflags)); tcg_gen_ori_i32(t, t, mask); - tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags)); - tcg_temp_free_i32(t); + tcg_gen_st_i32(t, tcg_env, offsetof(CPUX86State, hflags)); s->flags |= mask; } } @@ -2539,14 +2753,31 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask) { if (s->flags & mask) { TCGv_i32 t = tcg_temp_new_i32(); - tcg_gen_ld_i32(t, cpu_env, offsetof(CPUX86State, hflags)); + tcg_gen_ld_i32(t, tcg_env, offsetof(CPUX86State, hflags)); tcg_gen_andi_i32(t, t, ~mask); - tcg_gen_st_i32(t, cpu_env, offsetof(CPUX86State, hflags)); - tcg_temp_free_i32(t); + tcg_gen_st_i32(t, tcg_env, offsetof(CPUX86State, hflags)); s->flags &= ~mask; } } +static void gen_set_eflags(DisasContext *s, target_ulong mask) +{ + TCGv t = tcg_temp_new(); + + tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, eflags)); + tcg_gen_ori_tl(t, t, mask); + tcg_gen_st_tl(t, tcg_env, offsetof(CPUX86State, eflags)); +} + +static void gen_reset_eflags(DisasContext *s, target_ulong mask) +{ + TCGv t = tcg_temp_new(); + + tcg_gen_ld_tl(t, tcg_env, offsetof(CPUX86State, eflags)); + tcg_gen_andi_tl(t, t, ~mask); + tcg_gen_st_tl(t, tcg_env, offsetof(CPUX86State, eflags)); +} + /* Clear BND registers during legacy branches. */ static void gen_bnd_jmp(DisasContext *s) { @@ -2556,7 +2787,7 @@ static void gen_bnd_jmp(DisasContext *s) if ((s->prefix & PREFIX_REPNZ) == 0 && (s->flags & HF_MPX_EN_MASK) != 0 && (s->flags & HF_MPX_IU_MASK) != 0) { - gen_helper_bnd_jmp(cpu_env); + gen_helper_bnd_jmp(tcg_env); } } @@ -2577,15 +2808,13 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) } if (s->base.tb->flags & HF_RF_MASK) { - gen_helper_reset_rf(cpu_env); + gen_reset_eflags(s, RF_MASK); } - if (s->base.singlestep_enabled) { - gen_helper_debug(cpu_env); - } else if (recheck_tf) { - gen_helper_rechecking_single_step(cpu_env); + if (recheck_tf) { + gen_helper_rechecking_single_step(tcg_env); tcg_gen_exit_tb(NULL, 0); - } else if (s->tf) { - gen_helper_single_step(cpu_env); + } else if (s->flags & HF_TF_MASK) { + gen_helper_single_step(tcg_env); } else if (jr) { tcg_gen_lookup_and_goto_ptr(); } else { @@ -2614,1904 +2843,313 @@ static void gen_eob(DisasContext *s) } /* Jump to register */ -static void gen_jr(DisasContext *s, TCGv dest) +static void gen_jr(DisasContext *s) { do_gen_eob_worker(s, false, false, true); } -/* generate a jump to eip. No segment change must happen before as a - direct call to the next block may occur */ -static void gen_jmp_tb(DisasContext *s, target_ulong eip, int tb_num) +/* Jump to eip+diff, truncating the result to OT. */ +static void gen_jmp_rel(DisasContext *s, MemOp ot, int diff, int tb_num) { + bool use_goto_tb = s->jmp_opt; + target_ulong mask = -1; + target_ulong new_pc = s->pc + diff; + target_ulong new_eip = new_pc - s->cs_base; + + /* In 64-bit mode, operand size is fixed at 64 bits. */ + if (!CODE64(s)) { + if (ot == MO_16) { + mask = 0xffff; + if (tb_cflags(s->base.tb) & CF_PCREL && CODE32(s)) { + use_goto_tb = false; + } + } else { + mask = 0xffffffff; + } + } + new_eip &= mask; + gen_update_cc_op(s); set_cc_op(s, CC_OP_DYNAMIC); - if (s->jmp_opt) { - gen_goto_tb(s, tb_num, eip); + + if (tb_cflags(s->base.tb) & CF_PCREL) { + tcg_gen_addi_tl(cpu_eip, cpu_eip, new_pc - s->pc_save); + /* + * If we can prove the branch does not leave the page and we have + * no extra masking to apply (data16 branch in code32, see above), + * then we have also proven that the addition does not wrap. + */ + if (!use_goto_tb || !is_same_page(&s->base, new_pc)) { + tcg_gen_andi_tl(cpu_eip, cpu_eip, mask); + use_goto_tb = false; + } + } else if (!CODE64(s)) { + new_pc = (uint32_t)(new_eip + s->cs_base); + } + + if (use_goto_tb && translator_use_goto_tb(&s->base, new_pc)) { + /* jump to same page: we can use a direct jump */ + tcg_gen_goto_tb(tb_num); + if (!(tb_cflags(s->base.tb) & CF_PCREL)) { + tcg_gen_movi_tl(cpu_eip, new_eip); + } + tcg_gen_exit_tb(s->base.tb, tb_num); + s->base.is_jmp = DISAS_NORETURN; } else { - gen_jmp_im(s, eip); - gen_eob(s); + if (!(tb_cflags(s->base.tb) & CF_PCREL)) { + tcg_gen_movi_tl(cpu_eip, new_eip); + } + if (s->jmp_opt) { + gen_jr(s); /* jump to another page */ + } else { + gen_eob(s); /* exit to main loop */ + } } } -static void gen_jmp(DisasContext *s, target_ulong eip) +/* Jump to eip+diff, truncating to the current code size. */ +static void gen_jmp_rel_csize(DisasContext *s, int diff, int tb_num) { - gen_jmp_tb(s, eip, 0); + /* CODE64 ignores the OT argument, so we need not consider it. */ + gen_jmp_rel(s, CODE32(s) ? MO_32 : MO_16, diff, tb_num); } static inline void gen_ldq_env_A0(DisasContext *s, int offset) { - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset); + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); + tcg_gen_st_i64(s->tmp1_i64, tcg_env, offset); } static inline void gen_stq_env_A0(DisasContext *s, int offset) { - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); + tcg_gen_ld_i64(s->tmp1_i64, tcg_env, offset); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEUQ); } -static inline void gen_ldo_env_A0(DisasContext *s, int offset) +static inline void gen_ldo_env_A0(DisasContext *s, int offset, bool align) { + MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX + ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR); + MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0); int mem_index = s->mem_index; - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_addi_tl(s->tmp0, s->A0, 8); - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); + TCGv_i128 t = tcg_temp_new_i128(); + + tcg_gen_qemu_ld_i128(t, s->A0, mem_index, mop); + tcg_gen_st_i128(t, tcg_env, offset); } -static inline void gen_sto_env_A0(DisasContext *s, int offset) +static inline void gen_sto_env_A0(DisasContext *s, int offset, bool align) { + MemOp atom = (s->cpuid_ext_features & CPUID_EXT_AVX + ? MO_ATOM_IFALIGN : MO_ATOM_IFALIGN_PAIR); + MemOp mop = MO_128 | MO_LE | atom | (align ? MO_ALIGN_16 : 0); int mem_index = s->mem_index; - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, mem_index, MO_LEQ); - tcg_gen_addi_tl(s->tmp0, s->A0, 8); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, offset + offsetof(ZMMReg, ZMM_Q(1))); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->tmp0, mem_index, MO_LEQ); -} - -static inline void gen_op_movo(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(0))); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset + offsetof(ZMMReg, ZMM_Q(1))); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset + offsetof(ZMMReg, ZMM_Q(1))); -} - -static inline void gen_op_movq(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, s_offset); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); -} - -static inline void gen_op_movl(DisasContext *s, int d_offset, int s_offset) -{ - tcg_gen_ld_i32(s->tmp2_i32, cpu_env, s_offset); - tcg_gen_st_i32(s->tmp2_i32, cpu_env, d_offset); -} - -static inline void gen_op_movq_env_0(DisasContext *s, int d_offset) -{ - tcg_gen_movi_i64(s->tmp1_i64, 0); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, d_offset); -} - -typedef void (*SSEFunc_i_ep)(TCGv_i32 val, TCGv_ptr env, TCGv_ptr reg); -typedef void (*SSEFunc_l_ep)(TCGv_i64 val, TCGv_ptr env, TCGv_ptr reg); -typedef void (*SSEFunc_0_epi)(TCGv_ptr env, TCGv_ptr reg, TCGv_i32 val); -typedef void (*SSEFunc_0_epl)(TCGv_ptr env, TCGv_ptr reg, TCGv_i64 val); -typedef void (*SSEFunc_0_epp)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b); -typedef void (*SSEFunc_0_eppi)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, - TCGv_i32 val); -typedef void (*SSEFunc_0_ppi)(TCGv_ptr reg_a, TCGv_ptr reg_b, TCGv_i32 val); -typedef void (*SSEFunc_0_eppt)(TCGv_ptr env, TCGv_ptr reg_a, TCGv_ptr reg_b, - TCGv val); - -#define SSE_SPECIAL ((void *)1) -#define SSE_DUMMY ((void *)2) - -#define MMX_OP2(x) { gen_helper_ ## x ## _mmx, gen_helper_ ## x ## _xmm } -#define SSE_FOP(x) { gen_helper_ ## x ## ps, gen_helper_ ## x ## pd, \ - gen_helper_ ## x ## ss, gen_helper_ ## x ## sd, } - -static const SSEFunc_0_epp sse_op_table1[256][4] = { - /* 3DNow! extensions */ - [0x0e] = { SSE_DUMMY }, /* femms */ - [0x0f] = { SSE_DUMMY }, /* pf... */ - /* pure SSE operations */ - [0x10] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ - [0x11] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movups, movupd, movss, movsd */ - [0x12] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd, movsldup, movddup */ - [0x13] = { SSE_SPECIAL, SSE_SPECIAL }, /* movlps, movlpd */ - [0x14] = { gen_helper_punpckldq_xmm, gen_helper_punpcklqdq_xmm }, - [0x15] = { gen_helper_punpckhdq_xmm, gen_helper_punpckhqdq_xmm }, - [0x16] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd, movshdup */ - [0x17] = { SSE_SPECIAL, SSE_SPECIAL }, /* movhps, movhpd */ - - [0x28] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ - [0x29] = { SSE_SPECIAL, SSE_SPECIAL }, /* movaps, movapd */ - [0x2a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtpi2ps, cvtpi2pd, cvtsi2ss, cvtsi2sd */ - [0x2b] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movntps, movntpd, movntss, movntsd */ - [0x2c] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvttps2pi, cvttpd2pi, cvttsd2si, cvttss2si */ - [0x2d] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* cvtps2pi, cvtpd2pi, cvtsd2si, cvtss2si */ - [0x2e] = { gen_helper_ucomiss, gen_helper_ucomisd }, - [0x2f] = { gen_helper_comiss, gen_helper_comisd }, - [0x50] = { SSE_SPECIAL, SSE_SPECIAL }, /* movmskps, movmskpd */ - [0x51] = SSE_FOP(sqrt), - [0x52] = { gen_helper_rsqrtps, NULL, gen_helper_rsqrtss, NULL }, - [0x53] = { gen_helper_rcpps, NULL, gen_helper_rcpss, NULL }, - [0x54] = { gen_helper_pand_xmm, gen_helper_pand_xmm }, /* andps, andpd */ - [0x55] = { gen_helper_pandn_xmm, gen_helper_pandn_xmm }, /* andnps, andnpd */ - [0x56] = { gen_helper_por_xmm, gen_helper_por_xmm }, /* orps, orpd */ - [0x57] = { gen_helper_pxor_xmm, gen_helper_pxor_xmm }, /* xorps, xorpd */ - [0x58] = SSE_FOP(add), - [0x59] = SSE_FOP(mul), - [0x5a] = { gen_helper_cvtps2pd, gen_helper_cvtpd2ps, - gen_helper_cvtss2sd, gen_helper_cvtsd2ss }, - [0x5b] = { gen_helper_cvtdq2ps, gen_helper_cvtps2dq, gen_helper_cvttps2dq }, - [0x5c] = SSE_FOP(sub), - [0x5d] = SSE_FOP(min), - [0x5e] = SSE_FOP(div), - [0x5f] = SSE_FOP(max), - - [0xc2] = SSE_FOP(cmpeq), - [0xc6] = { (SSEFunc_0_epp)gen_helper_shufps, - (SSEFunc_0_epp)gen_helper_shufpd }, /* XXX: casts */ - - /* SSSE3, SSE4, MOVBE, CRC32, BMI1, BMI2, ADX. */ - [0x38] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, - [0x3a] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, - - /* MMX ops and their SSE extensions */ - [0x60] = MMX_OP2(punpcklbw), - [0x61] = MMX_OP2(punpcklwd), - [0x62] = MMX_OP2(punpckldq), - [0x63] = MMX_OP2(packsswb), - [0x64] = MMX_OP2(pcmpgtb), - [0x65] = MMX_OP2(pcmpgtw), - [0x66] = MMX_OP2(pcmpgtl), - [0x67] = MMX_OP2(packuswb), - [0x68] = MMX_OP2(punpckhbw), - [0x69] = MMX_OP2(punpckhwd), - [0x6a] = MMX_OP2(punpckhdq), - [0x6b] = MMX_OP2(packssdw), - [0x6c] = { NULL, gen_helper_punpcklqdq_xmm }, - [0x6d] = { NULL, gen_helper_punpckhqdq_xmm }, - [0x6e] = { SSE_SPECIAL, SSE_SPECIAL }, /* movd mm, ea */ - [0x6f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, , movqdu */ - [0x70] = { (SSEFunc_0_epp)gen_helper_pshufw_mmx, - (SSEFunc_0_epp)gen_helper_pshufd_xmm, - (SSEFunc_0_epp)gen_helper_pshufhw_xmm, - (SSEFunc_0_epp)gen_helper_pshuflw_xmm }, /* XXX: casts */ - [0x71] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftw */ - [0x72] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftd */ - [0x73] = { SSE_SPECIAL, SSE_SPECIAL }, /* shiftq */ - [0x74] = MMX_OP2(pcmpeqb), - [0x75] = MMX_OP2(pcmpeqw), - [0x76] = MMX_OP2(pcmpeql), - [0x77] = { SSE_DUMMY }, /* emms */ - [0x78] = { NULL, SSE_SPECIAL, NULL, SSE_SPECIAL }, /* extrq_i, insertq_i */ - [0x79] = { NULL, gen_helper_extrq_r, NULL, gen_helper_insertq_r }, - [0x7c] = { NULL, gen_helper_haddpd, NULL, gen_helper_haddps }, - [0x7d] = { NULL, gen_helper_hsubpd, NULL, gen_helper_hsubps }, - [0x7e] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movd, movd, , movq */ - [0x7f] = { SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, /* movq, movdqa, movdqu */ - [0xc4] = { SSE_SPECIAL, SSE_SPECIAL }, /* pinsrw */ - [0xc5] = { SSE_SPECIAL, SSE_SPECIAL }, /* pextrw */ - [0xd0] = { NULL, gen_helper_addsubpd, NULL, gen_helper_addsubps }, - [0xd1] = MMX_OP2(psrlw), - [0xd2] = MMX_OP2(psrld), - [0xd3] = MMX_OP2(psrlq), - [0xd4] = MMX_OP2(paddq), - [0xd5] = MMX_OP2(pmullw), - [0xd6] = { NULL, SSE_SPECIAL, SSE_SPECIAL, SSE_SPECIAL }, - [0xd7] = { SSE_SPECIAL, SSE_SPECIAL }, /* pmovmskb */ - [0xd8] = MMX_OP2(psubusb), - [0xd9] = MMX_OP2(psubusw), - [0xda] = MMX_OP2(pminub), - [0xdb] = MMX_OP2(pand), - [0xdc] = MMX_OP2(paddusb), - [0xdd] = MMX_OP2(paddusw), - [0xde] = MMX_OP2(pmaxub), - [0xdf] = MMX_OP2(pandn), - [0xe0] = MMX_OP2(pavgb), - [0xe1] = MMX_OP2(psraw), - [0xe2] = MMX_OP2(psrad), - [0xe3] = MMX_OP2(pavgw), - [0xe4] = MMX_OP2(pmulhuw), - [0xe5] = MMX_OP2(pmulhw), - [0xe6] = { NULL, gen_helper_cvttpd2dq, gen_helper_cvtdq2pd, gen_helper_cvtpd2dq }, - [0xe7] = { SSE_SPECIAL , SSE_SPECIAL }, /* movntq, movntq */ - [0xe8] = MMX_OP2(psubsb), - [0xe9] = MMX_OP2(psubsw), - [0xea] = MMX_OP2(pminsw), - [0xeb] = MMX_OP2(por), - [0xec] = MMX_OP2(paddsb), - [0xed] = MMX_OP2(paddsw), - [0xee] = MMX_OP2(pmaxsw), - [0xef] = MMX_OP2(pxor), - [0xf0] = { NULL, NULL, NULL, SSE_SPECIAL }, /* lddqu */ - [0xf1] = MMX_OP2(psllw), - [0xf2] = MMX_OP2(pslld), - [0xf3] = MMX_OP2(psllq), - [0xf4] = MMX_OP2(pmuludq), - [0xf5] = MMX_OP2(pmaddwd), - [0xf6] = MMX_OP2(psadbw), - [0xf7] = { (SSEFunc_0_epp)gen_helper_maskmov_mmx, - (SSEFunc_0_epp)gen_helper_maskmov_xmm }, /* XXX: casts */ - [0xf8] = MMX_OP2(psubb), - [0xf9] = MMX_OP2(psubw), - [0xfa] = MMX_OP2(psubl), - [0xfb] = MMX_OP2(psubq), - [0xfc] = MMX_OP2(paddb), - [0xfd] = MMX_OP2(paddw), - [0xfe] = MMX_OP2(paddl), -}; + TCGv_i128 t = tcg_temp_new_i128(); -static const SSEFunc_0_epp sse_op_table2[3 * 8][2] = { - [0 + 2] = MMX_OP2(psrlw), - [0 + 4] = MMX_OP2(psraw), - [0 + 6] = MMX_OP2(psllw), - [8 + 2] = MMX_OP2(psrld), - [8 + 4] = MMX_OP2(psrad), - [8 + 6] = MMX_OP2(pslld), - [16 + 2] = MMX_OP2(psrlq), - [16 + 3] = { NULL, gen_helper_psrldq_xmm }, - [16 + 6] = MMX_OP2(psllq), - [16 + 7] = { NULL, gen_helper_pslldq_xmm }, -}; + tcg_gen_ld_i128(t, tcg_env, offset); + tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop); +} -static const SSEFunc_0_epi sse_op_table3ai[] = { - gen_helper_cvtsi2ss, - gen_helper_cvtsi2sd -}; +static void gen_ldy_env_A0(DisasContext *s, int offset, bool align) +{ + MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR; + int mem_index = s->mem_index; + TCGv_i128 t0 = tcg_temp_new_i128(); + TCGv_i128 t1 = tcg_temp_new_i128(); -#ifdef TARGET_X86_64 -static const SSEFunc_0_epl sse_op_table3aq[] = { - gen_helper_cvtsq2ss, - gen_helper_cvtsq2sd -}; -#endif + tcg_gen_qemu_ld_i128(t0, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_qemu_ld_i128(t1, s->tmp0, mem_index, mop); -static const SSEFunc_i_ep sse_op_table3bi[] = { - gen_helper_cvttss2si, - gen_helper_cvtss2si, - gen_helper_cvttsd2si, - gen_helper_cvtsd2si -}; + tcg_gen_st_i128(t0, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); + tcg_gen_st_i128(t1, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); +} -#ifdef TARGET_X86_64 -static const SSEFunc_l_ep sse_op_table3bq[] = { - gen_helper_cvttss2sq, - gen_helper_cvtss2sq, - gen_helper_cvttsd2sq, - gen_helper_cvtsd2sq -}; -#endif +static void gen_sty_env_A0(DisasContext *s, int offset, bool align) +{ + MemOp mop = MO_128 | MO_LE | MO_ATOM_IFALIGN_PAIR; + int mem_index = s->mem_index; + TCGv_i128 t = tcg_temp_new_i128(); -static const SSEFunc_0_epp sse_op_table4[8][4] = { - SSE_FOP(cmpeq), - SSE_FOP(cmplt), - SSE_FOP(cmple), - SSE_FOP(cmpunord), - SSE_FOP(cmpneq), - SSE_FOP(cmpnlt), - SSE_FOP(cmpnle), - SSE_FOP(cmpord), -}; + tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(0))); + tcg_gen_qemu_st_i128(t, s->A0, mem_index, mop | (align ? MO_ALIGN_32 : 0)); + tcg_gen_addi_tl(s->tmp0, s->A0, 16); + tcg_gen_ld_i128(t, tcg_env, offset + offsetof(YMMReg, YMM_X(1))); + tcg_gen_qemu_st_i128(t, s->tmp0, mem_index, mop); +} -static const SSEFunc_0_epp sse_op_table5[256] = { - [0x0c] = gen_helper_pi2fw, - [0x0d] = gen_helper_pi2fd, - [0x1c] = gen_helper_pf2iw, - [0x1d] = gen_helper_pf2id, - [0x8a] = gen_helper_pfnacc, - [0x8e] = gen_helper_pfpnacc, - [0x90] = gen_helper_pfcmpge, - [0x94] = gen_helper_pfmin, - [0x96] = gen_helper_pfrcp, - [0x97] = gen_helper_pfrsqrt, - [0x9a] = gen_helper_pfsub, - [0x9e] = gen_helper_pfadd, - [0xa0] = gen_helper_pfcmpgt, - [0xa4] = gen_helper_pfmax, - [0xa6] = gen_helper_movq, /* pfrcpit1; no need to actually increase precision */ - [0xa7] = gen_helper_movq, /* pfrsqit1 */ - [0xaa] = gen_helper_pfsubr, - [0xae] = gen_helper_pfacc, - [0xb0] = gen_helper_pfcmpeq, - [0xb4] = gen_helper_pfmul, - [0xb6] = gen_helper_movq, /* pfrcpit2 */ - [0xb7] = gen_helper_pmulhrw_mmx, - [0xbb] = gen_helper_pswapd, - [0xbf] = gen_helper_pavgb_mmx /* pavgusb */ -}; +#include "decode-new.h" +#include "emit.c.inc" +#include "decode-new.c.inc" -struct SSEOpHelper_epp { - SSEFunc_0_epp op[2]; - uint32_t ext_mask; -}; +static void gen_cmpxchg8b(DisasContext *s, CPUX86State *env, int modrm) +{ + TCGv_i64 cmp, val, old; + TCGv Z; -struct SSEOpHelper_eppi { - SSEFunc_0_eppi op[2]; - uint32_t ext_mask; -}; + gen_lea_modrm(env, s, modrm); -#define SSSE3_OP(x) { MMX_OP2(x), CPUID_EXT_SSSE3 } -#define SSE41_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE41 } -#define SSE42_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_SSE42 } -#define SSE41_SPECIAL { { NULL, SSE_SPECIAL }, CPUID_EXT_SSE41 } -#define PCLMULQDQ_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, \ - CPUID_EXT_PCLMULQDQ } -#define AESNI_OP(x) { { NULL, gen_helper_ ## x ## _xmm }, CPUID_EXT_AES } - -static const struct SSEOpHelper_epp sse_op_table6[256] = { - [0x00] = SSSE3_OP(pshufb), - [0x01] = SSSE3_OP(phaddw), - [0x02] = SSSE3_OP(phaddd), - [0x03] = SSSE3_OP(phaddsw), - [0x04] = SSSE3_OP(pmaddubsw), - [0x05] = SSSE3_OP(phsubw), - [0x06] = SSSE3_OP(phsubd), - [0x07] = SSSE3_OP(phsubsw), - [0x08] = SSSE3_OP(psignb), - [0x09] = SSSE3_OP(psignw), - [0x0a] = SSSE3_OP(psignd), - [0x0b] = SSSE3_OP(pmulhrsw), - [0x10] = SSE41_OP(pblendvb), - [0x14] = SSE41_OP(blendvps), - [0x15] = SSE41_OP(blendvpd), - [0x17] = SSE41_OP(ptest), - [0x1c] = SSSE3_OP(pabsb), - [0x1d] = SSSE3_OP(pabsw), - [0x1e] = SSSE3_OP(pabsd), - [0x20] = SSE41_OP(pmovsxbw), - [0x21] = SSE41_OP(pmovsxbd), - [0x22] = SSE41_OP(pmovsxbq), - [0x23] = SSE41_OP(pmovsxwd), - [0x24] = SSE41_OP(pmovsxwq), - [0x25] = SSE41_OP(pmovsxdq), - [0x28] = SSE41_OP(pmuldq), - [0x29] = SSE41_OP(pcmpeqq), - [0x2a] = SSE41_SPECIAL, /* movntqda */ - [0x2b] = SSE41_OP(packusdw), - [0x30] = SSE41_OP(pmovzxbw), - [0x31] = SSE41_OP(pmovzxbd), - [0x32] = SSE41_OP(pmovzxbq), - [0x33] = SSE41_OP(pmovzxwd), - [0x34] = SSE41_OP(pmovzxwq), - [0x35] = SSE41_OP(pmovzxdq), - [0x37] = SSE42_OP(pcmpgtq), - [0x38] = SSE41_OP(pminsb), - [0x39] = SSE41_OP(pminsd), - [0x3a] = SSE41_OP(pminuw), - [0x3b] = SSE41_OP(pminud), - [0x3c] = SSE41_OP(pmaxsb), - [0x3d] = SSE41_OP(pmaxsd), - [0x3e] = SSE41_OP(pmaxuw), - [0x3f] = SSE41_OP(pmaxud), - [0x40] = SSE41_OP(pmulld), - [0x41] = SSE41_OP(phminposuw), - [0xdb] = AESNI_OP(aesimc), - [0xdc] = AESNI_OP(aesenc), - [0xdd] = AESNI_OP(aesenclast), - [0xde] = AESNI_OP(aesdec), - [0xdf] = AESNI_OP(aesdeclast), -}; + cmp = tcg_temp_new_i64(); + val = tcg_temp_new_i64(); + old = tcg_temp_new_i64(); -static const struct SSEOpHelper_eppi sse_op_table7[256] = { - [0x08] = SSE41_OP(roundps), - [0x09] = SSE41_OP(roundpd), - [0x0a] = SSE41_OP(roundss), - [0x0b] = SSE41_OP(roundsd), - [0x0c] = SSE41_OP(blendps), - [0x0d] = SSE41_OP(blendpd), - [0x0e] = SSE41_OP(pblendw), - [0x0f] = SSSE3_OP(palignr), - [0x14] = SSE41_SPECIAL, /* pextrb */ - [0x15] = SSE41_SPECIAL, /* pextrw */ - [0x16] = SSE41_SPECIAL, /* pextrd/pextrq */ - [0x17] = SSE41_SPECIAL, /* extractps */ - [0x20] = SSE41_SPECIAL, /* pinsrb */ - [0x21] = SSE41_SPECIAL, /* insertps */ - [0x22] = SSE41_SPECIAL, /* pinsrd/pinsrq */ - [0x40] = SSE41_OP(dpps), - [0x41] = SSE41_OP(dppd), - [0x42] = SSE41_OP(mpsadbw), - [0x44] = PCLMULQDQ_OP(pclmulqdq), - [0x60] = SSE42_OP(pcmpestrm), - [0x61] = SSE42_OP(pcmpestri), - [0x62] = SSE42_OP(pcmpistrm), - [0x63] = SSE42_OP(pcmpistri), - [0xdf] = AESNI_OP(aeskeygenassist), -}; + /* Construct the comparison values from the register pair. */ + tcg_gen_concat_tl_i64(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_tl_i64(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); -static void gen_sse(CPUX86State *env, DisasContext *s, int b, - target_ulong pc_start, int rex_r) -{ - int b1, op1_offset, op2_offset, is_xmm, val; - int modrm, mod, rm, reg; - SSEFunc_0_epp sse_fn_epp; - SSEFunc_0_eppi sse_fn_eppi; - SSEFunc_0_ppi sse_fn_ppi; - SSEFunc_0_eppt sse_fn_eppt; - TCGMemOp ot; - - b &= 0xff; - if (s->prefix & PREFIX_DATA) - b1 = 1; - else if (s->prefix & PREFIX_REPZ) - b1 = 2; - else if (s->prefix & PREFIX_REPNZ) - b1 = 3; - else - b1 = 0; - sse_fn_epp = sse_op_table1[b][b1]; - if (!sse_fn_epp) { - goto unknown_op; - } - if ((b <= 0x5f && b >= 0x10) || b == 0xc6 || b == 0xc2) { - is_xmm = 1; + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i64(old, s->A0, cmp, val, s->mem_index, MO_TEUQ); } else { - if (b1 == 0) { - /* MMX case */ - is_xmm = 0; - } else { - is_xmm = 1; - } - } - /* simple MMX/SSE operation */ - if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); - return; + tcg_gen_nonatomic_cmpxchg_i64(old, s->A0, cmp, val, + s->mem_index, MO_TEUQ); } - if (s->flags & HF_EM_MASK) { - illegal_op: - gen_illegal_opcode(s); - return; - } - if (is_xmm - && !(s->flags & HF_OSFXSR_MASK) - && ((b != 0x38 && b != 0x3a) || (s->prefix & PREFIX_DATA))) { - goto unknown_op; - } - if (b == 0x0e) { - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { - /* If we were fully decoding this we might use illegal_op. */ - goto unknown_op; - } - /* femms */ - gen_helper_emms(cpu_env); - return; - } - if (b == 0x77) { - /* emms */ - gen_helper_emms(cpu_env); - return; - } - /* prepare MMX state (XXX: optimize by storing fptt and fptags in - the static cpu state) */ - if (!is_xmm) { - gen_helper_enter_mmx(cpu_env); - } - - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7); - if (is_xmm) - reg |= rex_r; - mod = (modrm >> 6) & 3; - if (sse_fn_epp == SSE_SPECIAL) { - b |= (b1 << 8); - switch(b) { - case 0x0e7: /* movntq */ - if (mod == 3) { - goto illegal_op; - } - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); - break; - case 0x1e7: /* movntdq */ - case 0x02b: /* movntps */ - case 0x12b: /* movntps */ - if (mod == 3) - goto illegal_op; - gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - break; - case 0x3f0: /* lddqu */ - if (mod == 3) - goto illegal_op; - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - break; - case 0x22b: /* movntss */ - case 0x32b: /* movntsd */ - if (mod == 3) - goto illegal_op; - gen_lea_modrm(env, s, modrm); - if (b1 & 1) { - gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(0))); - gen_op_st_v(s, MO_32, s->T0, s->A0); - } - break; - case 0x6e: /* movd mm, ea */ -#ifdef TARGET_X86_64 - if (s->dflag == MO_64) { - gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); - tcg_gen_st_tl(s->T0, cpu_env, - offsetof(CPUX86State, fpregs[reg].mmx)); - } else -#endif - { - gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,fpregs[reg].mmx)); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_movl_mm_T0_mmx(s->ptr0, s->tmp2_i32); - } - break; - case 0x16e: /* movd xmm, ea */ -#ifdef TARGET_X86_64 - if (s->dflag == MO_64) { - gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 0); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); - gen_helper_movq_mm_T0_xmm(s->ptr0, s->T0); - } else -#endif - { - gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 0); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_movl_mm_T0_xmm(s->ptr0, s->tmp2_i32); - } - break; - case 0x6f: /* movq mm, ea */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); - } else { - rm = (modrm & 7); - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, - offsetof(CPUX86State,fpregs[rm].mmx)); - tcg_gen_st_i64(s->tmp1_i64, cpu_env, - offsetof(CPUX86State,fpregs[reg].mmx)); - } - break; - case 0x010: /* movups */ - case 0x110: /* movupd */ - case 0x028: /* movaps */ - case 0x128: /* movapd */ - case 0x16f: /* movdqa xmm, ea */ - case 0x26f: /* movdqu xmm, ea */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movo(s, offsetof(CPUX86State, xmm_regs[reg]), - offsetof(CPUX86State,xmm_regs[rm])); - } - break; - case 0x210: /* movss xmm, ea */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_op_ld_v(s, MO_32, s->T0, s->A0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); - tcg_gen_movi_tl(s->T0, 0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1))); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2))); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); - } - break; - case 0x310: /* movsd xmm, ea */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - tcg_gen_movi_tl(s->T0, 0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2))); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); - } - break; - case 0x012: /* movlps */ - case 0x112: /* movlpd */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - /* movhlps */ - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(1))); - } - break; - case 0x212: /* movsldup */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(0))); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(2))); - } - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(2))); - break; - case 0x312: /* movddup */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); - } - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); - break; - case 0x016: /* movhps */ - case 0x116: /* movhpd */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(1))); - } else { - /* movlhps */ - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); - } - break; - case 0x216: /* movshdup */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(1)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(1))); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(3)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_L(3))); - } - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(1))); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_L(2)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(3))); - break; - case 0x178: - case 0x378: - { - int bit_index, field_length; - - if (b1 == 1 && reg != 0) - goto illegal_op; - field_length = x86_ldub_code(env, s) & 0x3F; - bit_index = x86_ldub_code(env, s) & 0x3F; - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg])); - if (b1 == 1) - gen_helper_extrq_i(cpu_env, s->ptr0, - tcg_const_i32(bit_index), - tcg_const_i32(field_length)); - else - gen_helper_insertq_i(cpu_env, s->ptr0, - tcg_const_i32(bit_index), - tcg_const_i32(field_length)); - } - break; - case 0x7e: /* movd ea, mm */ -#ifdef TARGET_X86_64 - if (s->dflag == MO_64) { - tcg_gen_ld_i64(s->T0, cpu_env, - offsetof(CPUX86State,fpregs[reg].mmx)); - gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1); - } else -#endif - { - tcg_gen_ld32u_tl(s->T0, cpu_env, - offsetof(CPUX86State,fpregs[reg].mmx.MMX_L(0))); - gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1); - } - break; - case 0x17e: /* movd ea, xmm */ -#ifdef TARGET_X86_64 - if (s->dflag == MO_64) { - tcg_gen_ld_i64(s->T0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); - gen_ldst_modrm(env, s, modrm, MO_64, OR_TMP0, 1); - } else -#endif - { - tcg_gen_ld32u_tl(s->T0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - gen_ldst_modrm(env, s, modrm, MO_32, OR_TMP0, 1); - } - break; - case 0x27e: /* movq xmm, ea */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); - } - gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); - break; - case 0x7f: /* movq ea, mm */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, fpregs[reg].mmx)); - } else { - rm = (modrm & 7); - gen_op_movq(s, offsetof(CPUX86State, fpregs[rm].mmx), - offsetof(CPUX86State,fpregs[reg].mmx)); - } - break; - case 0x011: /* movups */ - case 0x111: /* movupd */ - case 0x029: /* movaps */ - case 0x129: /* movapd */ - case 0x17f: /* movdqa ea, xmm */ - case 0x27f: /* movdqu ea, xmm */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_sto_env_A0(s, offsetof(CPUX86State, xmm_regs[reg])); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movo(s, offsetof(CPUX86State, xmm_regs[rm]), - offsetof(CPUX86State,xmm_regs[reg])); - } - break; - case 0x211: /* movss ea, xmm */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_regs[reg].ZMM_L(0))); - gen_op_st_v(s, MO_32, s->T0, s->A0); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movl(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_L(0)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_L(0))); - } - break; - case 0x311: /* movsd ea, xmm */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); - } - break; - case 0x013: /* movlps */ - case 0x113: /* movlpd */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - goto illegal_op; - } - break; - case 0x017: /* movhps */ - case 0x117: /* movhpd */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(1))); - } else { - goto illegal_op; - } - break; - case 0x71: /* shift mm, im */ - case 0x72: - case 0x73: - case 0x171: /* shift xmm, im */ - case 0x172: - case 0x173: - if (b1 >= 2) { - goto unknown_op; - } - val = x86_ldub_code(env, s); - if (is_xmm) { - tcg_gen_movi_tl(s->T0, val); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_t0.ZMM_L(0))); - tcg_gen_movi_tl(s->T0, 0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_t0.ZMM_L(1))); - op1_offset = offsetof(CPUX86State,xmm_t0); - } else { - tcg_gen_movi_tl(s->T0, val); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, mmx_t0.MMX_L(0))); - tcg_gen_movi_tl(s->T0, 0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, mmx_t0.MMX_L(1))); - op1_offset = offsetof(CPUX86State,mmx_t0); - } - sse_fn_epp = sse_op_table2[((b - 1) & 3) * 8 + - (((modrm >> 3)) & 7)][b1]; - if (!sse_fn_epp) { - goto unknown_op; - } - if (is_xmm) { - rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); - } else { - rm = (modrm & 7); - op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); - } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op1_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - case 0x050: /* movmskps */ - rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskps(s->tmp2_i32, cpu_env, s->ptr0); - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); - break; - case 0x150: /* movmskpd */ - rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State,xmm_regs[rm])); - gen_helper_movmskpd(s->tmp2_i32, cpu_env, s->ptr0); - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); - break; - case 0x02a: /* cvtpi2ps */ - case 0x12a: /* cvtpi2pd */ - gen_helper_enter_mmx(cpu_env); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - op2_offset = offsetof(CPUX86State,mmx_t0); - gen_ldq_env_A0(s, op2_offset); - } else { - rm = (modrm & 7); - op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); - } - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - switch(b >> 8) { - case 0x0: - gen_helper_cvtpi2ps(cpu_env, s->ptr0, s->ptr1); - break; - default: - case 0x1: - gen_helper_cvtpi2pd(cpu_env, s->ptr0, s->ptr1); - break; - } - break; - case 0x22a: /* cvtsi2ss */ - case 0x32a: /* cvtsi2sd */ - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - if (ot == MO_32) { - SSEFunc_0_epi sse_fn_epi = sse_op_table3ai[(b >> 8) & 1]; - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - sse_fn_epi(cpu_env, s->ptr0, s->tmp2_i32); - } else { -#ifdef TARGET_X86_64 - SSEFunc_0_epl sse_fn_epl = sse_op_table3aq[(b >> 8) & 1]; - sse_fn_epl(cpu_env, s->ptr0, s->T0); -#else - goto illegal_op; -#endif - } - break; - case 0x02c: /* cvttps2pi */ - case 0x12c: /* cvttpd2pi */ - case 0x02d: /* cvtps2pi */ - case 0x12d: /* cvtpd2pi */ - gen_helper_enter_mmx(cpu_env); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - op2_offset = offsetof(CPUX86State,xmm_t0); - gen_ldo_env_A0(s, op2_offset); - } else { - rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); - } - op1_offset = offsetof(CPUX86State,fpregs[reg & 7].mmx); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - switch(b) { - case 0x02c: - gen_helper_cvttps2pi(cpu_env, s->ptr0, s->ptr1); - break; - case 0x12c: - gen_helper_cvttpd2pi(cpu_env, s->ptr0, s->ptr1); - break; - case 0x02d: - gen_helper_cvtps2pi(cpu_env, s->ptr0, s->ptr1); - break; - case 0x12d: - gen_helper_cvtpd2pi(cpu_env, s->ptr0, s->ptr1); - break; - } - break; - case 0x22c: /* cvttss2si */ - case 0x32c: /* cvttsd2si */ - case 0x22d: /* cvtss2si */ - case 0x32d: /* cvtsd2si */ - ot = mo_64_32(s->dflag); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - if ((b >> 8) & 1) { - gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_Q(0))); - } else { - gen_op_ld_v(s, MO_32, s->T0, s->A0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State, xmm_t0.ZMM_L(0))); - } - op2_offset = offsetof(CPUX86State,xmm_t0); - } else { - rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); - } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op2_offset); - if (ot == MO_32) { - SSEFunc_i_ep sse_fn_i_ep = - sse_op_table3bi[((b >> 7) & 2) | (b & 1)]; - sse_fn_i_ep(s->tmp2_i32, cpu_env, s->ptr0); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - } else { -#ifdef TARGET_X86_64 - SSEFunc_l_ep sse_fn_l_ep = - sse_op_table3bq[((b >> 7) & 2) | (b & 1)]; - sse_fn_l_ep(s->T0, cpu_env, s->ptr0); -#else - goto illegal_op; -#endif - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - case 0xc4: /* pinsrw */ - case 0x1c4: - s->rip_offset = 1; - gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - val = x86_ldub_code(env, s); - if (b1) { - val &= 7; - tcg_gen_st16_tl(s->T0, cpu_env, - offsetof(CPUX86State,xmm_regs[reg].ZMM_W(val))); - } else { - val &= 3; - tcg_gen_st16_tl(s->T0, cpu_env, - offsetof(CPUX86State,fpregs[reg].mmx.MMX_W(val))); - } - break; - case 0xc5: /* pextrw */ - case 0x1c5: - if (mod != 3) - goto illegal_op; - ot = mo_64_32(s->dflag); - val = x86_ldub_code(env, s); - if (b1) { - val &= 7; - rm = (modrm & 7) | REX_B(s); - tcg_gen_ld16u_tl(s->T0, cpu_env, - offsetof(CPUX86State,xmm_regs[rm].ZMM_W(val))); - } else { - val &= 3; - rm = (modrm & 7); - tcg_gen_ld16u_tl(s->T0, cpu_env, - offsetof(CPUX86State,fpregs[rm].mmx.MMX_W(val))); - } - reg = ((modrm >> 3) & 7) | rex_r; - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - case 0x1d6: /* movq ea, xmm */ - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - gen_stq_env_A0(s, offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(0))); - } else { - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(0)), - offsetof(CPUX86State,xmm_regs[reg].ZMM_Q(0))); - gen_op_movq_env_0(s, - offsetof(CPUX86State, xmm_regs[rm].ZMM_Q(1))); - } - break; - case 0x2d6: /* movq2dq */ - gen_helper_enter_mmx(cpu_env); - rm = (modrm & 7); - gen_op_movq(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(0)), - offsetof(CPUX86State,fpregs[rm].mmx)); - gen_op_movq_env_0(s, offsetof(CPUX86State, xmm_regs[reg].ZMM_Q(1))); - break; - case 0x3d6: /* movdq2q */ - gen_helper_enter_mmx(cpu_env); - rm = (modrm & 7) | REX_B(s); - gen_op_movq(s, offsetof(CPUX86State, fpregs[reg & 7].mmx), - offsetof(CPUX86State,xmm_regs[rm].ZMM_Q(0))); - break; - case 0xd7: /* pmovmskb */ - case 0x1d7: - if (mod != 3) - goto illegal_op; - if (b1) { - rm = (modrm & 7) | REX_B(s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State, xmm_regs[rm])); - gen_helper_pmovmskb_xmm(s->tmp2_i32, cpu_env, s->ptr0); - } else { - rm = (modrm & 7); - tcg_gen_addi_ptr(s->ptr0, cpu_env, - offsetof(CPUX86State, fpregs[rm].mmx)); - gen_helper_pmovmskb_mmx(s->tmp2_i32, cpu_env, s->ptr0); - } - reg = ((modrm >> 3) & 7) | rex_r; - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp2_i32); - break; - - case 0x138: - case 0x038: - b = modrm; - if ((b & 0xf0) == 0xf0) { - goto do_0f_38_fx; - } - modrm = x86_ldub_code(env, s); - rm = modrm & 7; - reg = ((modrm >> 3) & 7) | rex_r; - mod = (modrm >> 6) & 3; - if (b1 >= 2) { - goto unknown_op; - } - - sse_fn_epp = sse_op_table6[b].op[b1]; - if (!sse_fn_epp) { - goto unknown_op; - } - if (!(s->cpuid_ext_features & sse_op_table6[b].ext_mask)) - goto illegal_op; - - if (b1) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - if (mod == 3) { - op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); - } else { - op2_offset = offsetof(CPUX86State,xmm_t0); - gen_lea_modrm(env, s, modrm); - switch (b) { - case 0x20: case 0x30: /* pmovsxbw, pmovzxbw */ - case 0x23: case 0x33: /* pmovsxwd, pmovzxwd */ - case 0x25: case 0x35: /* pmovsxdq, pmovzxdq */ - gen_ldq_env_A0(s, op2_offset + - offsetof(ZMMReg, ZMM_Q(0))); - break; - case 0x21: case 0x31: /* pmovsxbd, pmovzxbd */ - case 0x24: case 0x34: /* pmovsxwq, pmovzxwq */ - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - tcg_gen_st_i32(s->tmp2_i32, cpu_env, op2_offset + - offsetof(ZMMReg, ZMM_L(0))); - break; - case 0x22: case 0x32: /* pmovsxbq, pmovzxbq */ - tcg_gen_qemu_ld_tl(s->tmp0, s->A0, - s->mem_index, MO_LEUW); - tcg_gen_st16_tl(s->tmp0, cpu_env, op2_offset + - offsetof(ZMMReg, ZMM_W(0))); - break; - case 0x2a: /* movntqda */ - gen_ldo_env_A0(s, op1_offset); - return; - default: - gen_ldo_env_A0(s, op2_offset); - } - } - } else { - op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); - if (mod == 3) { - op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); - } else { - op2_offset = offsetof(CPUX86State,mmx_t0); - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, op2_offset); - } - } - if (sse_fn_epp == SSE_SPECIAL) { - goto unknown_op; - } - - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - - if (b == 0x17) { - set_cc_op(s, CC_OP_EFLAGS); - } - break; - - case 0x238: - case 0x338: - do_0f_38_fx: - /* Various integer extensions at 0f 38 f[0-f]. */ - b = modrm | (b1 << 8); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; - - switch (b) { - case 0x3f0: /* crc32 Gd,Eb */ - case 0x3f1: /* crc32 Gd,Ey */ - do_crc32: - if (!(s->cpuid_ext_features & CPUID_EXT_SSE42)) { - goto illegal_op; - } - if ((b & 0xff) == 0xf0) { - ot = MO_8; - } else if (s->dflag != MO_64) { - ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32); - } else { - ot = MO_64; - } - - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[reg]); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - gen_helper_crc32(s->T0, s->tmp2_i32, - s->T0, tcg_const_i32(8 << ot)); - - ot = mo_64_32(s->dflag); - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - case 0x1f0: /* crc32 or movbe */ - case 0x1f1: - /* For these insns, the f3 prefix is supposed to have priority - over the 66 prefix, but that's not what we implement above - setting b1. */ - if (s->prefix & PREFIX_REPNZ) { - goto do_crc32; - } - /* FALLTHRU */ - case 0x0f0: /* movbe Gy,My */ - case 0x0f1: /* movbe My,Gy */ - if (!(s->cpuid_ext_features & CPUID_EXT_MOVBE)) { - goto illegal_op; - } - if (s->dflag != MO_64) { - ot = (s->prefix & PREFIX_DATA ? MO_16 : MO_32); - } else { - ot = MO_64; - } - - gen_lea_modrm(env, s, modrm); - if ((b & 1) == 0) { - tcg_gen_qemu_ld_tl(s->T0, s->A0, - s->mem_index, ot | MO_BE); - gen_op_mov_reg_v(s, ot, reg, s->T0); - } else { - tcg_gen_qemu_st_tl(cpu_regs[reg], s->A0, - s->mem_index, ot | MO_BE); - } - break; - - case 0x0f2: /* andn Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_andc_tl(s->T0, s->T0, cpu_regs[s->vex_v]); - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - break; - - case 0x0f7: /* bextr Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - { - TCGv bound, zero; - - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Extract START, and shift the operand. - Shifts larger than operand size get zeros. */ - tcg_gen_ext8u_tl(s->A0, cpu_regs[s->vex_v]); - tcg_gen_shr_tl(s->T0, s->T0, s->A0); - - bound = tcg_const_tl(ot == MO_64 ? 63 : 31); - zero = tcg_const_tl(0); - tcg_gen_movcond_tl(TCG_COND_LEU, s->T0, s->A0, bound, - s->T0, zero); - tcg_temp_free(zero); - - /* Extract the LEN into a mask. Lengths larger than - operand size get all ones. */ - tcg_gen_extract_tl(s->A0, cpu_regs[s->vex_v], 8, 8); - tcg_gen_movcond_tl(TCG_COND_LEU, s->A0, s->A0, bound, - s->A0, bound); - tcg_temp_free(bound); - tcg_gen_movi_tl(s->T1, 1); - tcg_gen_shl_tl(s->T1, s->T1, s->A0); - tcg_gen_subi_tl(s->T1, s->T1, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_LOGICB + ot); - } - break; - - case 0x0f5: /* bzhi Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_ext8u_tl(s->T1, cpu_regs[s->vex_v]); - { - TCGv bound = tcg_const_tl(ot == MO_64 ? 63 : 31); - /* Note that since we're using BMILG (in order to get O - cleared) we need to store the inverse into C. */ - tcg_gen_setcond_tl(TCG_COND_LT, cpu_cc_src, - s->T1, bound); - tcg_gen_movcond_tl(TCG_COND_GT, s->T1, s->T1, - bound, bound, s->T1); - tcg_temp_free(bound); - } - tcg_gen_movi_tl(s->A0, -1); - tcg_gen_shl_tl(s->A0, s->A0, s->T1); - tcg_gen_andc_tl(s->T0, s->T0, s->A0); - gen_op_mov_reg_v(s, ot, reg, s->T0); - gen_op_update1_cc(s); - set_cc_op(s, CC_OP_BMILGB + ot); - break; - - case 0x3f6: /* mulx By, Gy, rdx, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - switch (ot) { - default: - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_trunc_tl_i32(s->tmp3_i32, cpu_regs[R_EDX]); - tcg_gen_mulu2_i32(s->tmp2_i32, s->tmp3_i32, - s->tmp2_i32, s->tmp3_i32); - tcg_gen_extu_i32_tl(cpu_regs[s->vex_v], s->tmp2_i32); - tcg_gen_extu_i32_tl(cpu_regs[reg], s->tmp3_i32); - break; -#ifdef TARGET_X86_64 - case MO_64: - tcg_gen_mulu2_i64(s->T0, s->T1, - s->T0, cpu_regs[R_EDX]); - tcg_gen_mov_i64(cpu_regs[s->vex_v], s->T0); - tcg_gen_mov_i64(cpu_regs[reg], s->T1); - break; -#endif - } - break; - - case 0x3f5: /* pdep Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we - automatically handle zero-extending the result. */ - if (ot == MO_64) { - tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); - } else { - tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); - } - gen_helper_pdep(cpu_regs[reg], s->T0, s->T1); - break; - - case 0x2f5: /* pext Gy, By, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - /* Note that by zero-extending the mask operand, we - automatically handle zero-extending the result. */ - if (ot == MO_64) { - tcg_gen_mov_tl(s->T1, cpu_regs[s->vex_v]); - } else { - tcg_gen_ext32u_tl(s->T1, cpu_regs[s->vex_v]); - } - gen_helper_pext(cpu_regs[reg], s->T0, s->T1); - break; - - case 0x1f6: /* adcx Gy, Ey */ - case 0x2f6: /* adox Gy, Ey */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_ADX)) { - goto illegal_op; - } else { - TCGv carry_in, carry_out, zero; - int end_op; - - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - - /* Re-use the carry-out from a previous round. */ - carry_in = NULL; - carry_out = (b == 0x1f6 ? cpu_cc_dst : cpu_cc_src2); - switch (s->cc_op) { - case CC_OP_ADCX: - if (b == 0x1f6) { - carry_in = cpu_cc_dst; - end_op = CC_OP_ADCX; - } else { - end_op = CC_OP_ADCOX; - } - break; - case CC_OP_ADOX: - if (b == 0x1f6) { - end_op = CC_OP_ADCOX; - } else { - carry_in = cpu_cc_src2; - end_op = CC_OP_ADOX; - } - break; - case CC_OP_ADCOX: - end_op = CC_OP_ADCOX; - carry_in = carry_out; - break; - default: - end_op = (b == 0x1f6 ? CC_OP_ADCX : CC_OP_ADOX); - break; - } - /* If we can't reuse carry-out, get it out of EFLAGS. */ - if (!carry_in) { - if (s->cc_op != CC_OP_ADCX && s->cc_op != CC_OP_ADOX) { - gen_compute_eflags(s); - } - carry_in = s->tmp0; - tcg_gen_extract_tl(carry_in, cpu_cc_src, - ctz32(b == 0x1f6 ? CC_C : CC_O), 1); - } - - switch (ot) { -#ifdef TARGET_X86_64 - case MO_32: - /* If we know TL is 64-bit, and we want a 32-bit - result, just do everything in 64-bit arithmetic. */ - tcg_gen_ext32u_i64(cpu_regs[reg], cpu_regs[reg]); - tcg_gen_ext32u_i64(s->T0, s->T0); - tcg_gen_add_i64(s->T0, s->T0, cpu_regs[reg]); - tcg_gen_add_i64(s->T0, s->T0, carry_in); - tcg_gen_ext32u_i64(cpu_regs[reg], s->T0); - tcg_gen_shri_i64(carry_out, s->T0, 32); - break; -#endif - default: - /* Otherwise compute the carry-out in two steps. */ - zero = tcg_const_tl(0); - tcg_gen_add2_tl(s->T0, carry_out, - s->T0, zero, - carry_in, zero); - tcg_gen_add2_tl(cpu_regs[reg], carry_out, - cpu_regs[reg], carry_out, - s->T0, zero); - tcg_temp_free(zero); - break; - } - set_cc_op(s, end_op); - } - break; - - case 0x1f7: /* shlx Gy, Ey, By */ - case 0x2f7: /* sarx Gy, Ey, By */ - case 0x3f7: /* shrx Gy, Ey, By */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - if (ot == MO_64) { - tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 63); - } else { - tcg_gen_andi_tl(s->T1, cpu_regs[s->vex_v], 31); - } - if (b == 0x1f7) { - tcg_gen_shl_tl(s->T0, s->T0, s->T1); - } else if (b == 0x2f7) { - if (ot != MO_64) { - tcg_gen_ext32s_tl(s->T0, s->T0); - } - tcg_gen_sar_tl(s->T0, s->T0, s->T1); - } else { - if (ot != MO_64) { - tcg_gen_ext32u_tl(s->T0, s->T0); - } - tcg_gen_shr_tl(s->T0, s->T0, s->T1); - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - case 0x0f3: - case 0x1f3: - case 0x2f3: - case 0x3f3: /* Group 17 */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI1) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - tcg_gen_mov_tl(cpu_cc_src, s->T0); - switch (reg & 7) { - case 1: /* blsr By,Ey */ - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - break; - case 2: /* blsmsk By,Ey */ - tcg_gen_subi_tl(s->T1, s->T0, 1); - tcg_gen_xor_tl(s->T0, s->T0, s->T1); - break; - case 3: /* blsi By, Ey */ - tcg_gen_neg_tl(s->T1, s->T0); - tcg_gen_and_tl(s->T0, s->T0, s->T1); - break; - default: - goto unknown_op; - } - tcg_gen_mov_tl(cpu_cc_dst, s->T0); - gen_op_mov_reg_v(s, ot, s->vex_v, s->T0); - set_cc_op(s, CC_OP_BMILGB + ot); - break; - - default: - goto unknown_op; - } - break; - - case 0x03a: - case 0x13a: - b = modrm; - modrm = x86_ldub_code(env, s); - rm = modrm & 7; - reg = ((modrm >> 3) & 7) | rex_r; - mod = (modrm >> 6) & 3; - if (b1 >= 2) { - goto unknown_op; - } + /* Set tmp0 to match the required value of Z. */ + tcg_gen_setcond_i64(TCG_COND_EQ, cmp, old, cmp); + Z = tcg_temp_new(); + tcg_gen_trunc_i64_tl(Z, cmp); + + /* + * Extract the result values for the register pair. + * For 32-bit, we may do this unconditionally, because on success (Z=1), + * the old value matches the previous value in EDX:EAX. For x86_64, + * the store must be conditional, because we must leave the source + * registers unchanged on success, and zero-extend the writeback + * on failure (Z=0). + */ + if (TARGET_LONG_BITS == 32) { + tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], old); + } else { + TCGv zero = tcg_constant_tl(0); - sse_fn_eppi = sse_op_table7[b].op[b1]; - if (!sse_fn_eppi) { - goto unknown_op; - } - if (!(s->cpuid_ext_features & sse_op_table7[b].ext_mask)) - goto illegal_op; + tcg_gen_extr_i64_tl(s->T0, s->T1, old); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EAX], Z, zero, + s->T0, cpu_regs[R_EAX]); + tcg_gen_movcond_tl(TCG_COND_EQ, cpu_regs[R_EDX], Z, zero, + s->T1, cpu_regs[R_EDX]); + } - s->rip_offset = 1; + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, Z, ctz32(CC_Z), 1); +} - if (sse_fn_eppi == SSE_SPECIAL) { - ot = mo_64_32(s->dflag); - rm = (modrm & 7) | REX_B(s); - if (mod != 3) - gen_lea_modrm(env, s, modrm); - reg = ((modrm >> 3) & 7) | rex_r; - val = x86_ldub_code(env, s); - switch (b) { - case 0x14: /* pextrb */ - tcg_gen_ld8u_tl(s->T0, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_B(val & 15))); - if (mod == 3) { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - tcg_gen_qemu_st_tl(s->T0, s->A0, - s->mem_index, MO_UB); - } - break; - case 0x15: /* pextrw */ - tcg_gen_ld16u_tl(s->T0, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_W(val & 7))); - if (mod == 3) { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - tcg_gen_qemu_st_tl(s->T0, s->A0, - s->mem_index, MO_LEUW); - } - break; - case 0x16: - if (ot == MO_32) { /* pextrd */ - tcg_gen_ld_i32(s->tmp2_i32, cpu_env, - offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(val & 3))); - if (mod == 3) { - tcg_gen_extu_i32_tl(cpu_regs[rm], s->tmp2_i32); - } else { - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - } - } else { /* pextrq */ #ifdef TARGET_X86_64 - tcg_gen_ld_i64(s->tmp1_i64, cpu_env, - offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(val & 1))); - if (mod == 3) { - tcg_gen_mov_i64(cpu_regs[rm], s->tmp1_i64); - } else { - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); - } -#else - goto illegal_op; -#endif - } - break; - case 0x17: /* extractps */ - tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(val & 3))); - if (mod == 3) { - gen_op_mov_reg_v(s, ot, rm, s->T0); - } else { - tcg_gen_qemu_st_tl(s->T0, s->A0, - s->mem_index, MO_LEUL); - } - break; - case 0x20: /* pinsrb */ - if (mod == 3) { - gen_op_mov_v_reg(s, MO_32, s->T0, rm); - } else { - tcg_gen_qemu_ld_tl(s->T0, s->A0, - s->mem_index, MO_UB); - } - tcg_gen_st8_tl(s->T0, cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_B(val & 15))); - break; - case 0x21: /* insertps */ - if (mod == 3) { - tcg_gen_ld_i32(s->tmp2_i32, cpu_env, - offsetof(CPUX86State,xmm_regs[rm] - .ZMM_L((val >> 6) & 3))); - } else { - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - } - tcg_gen_st_i32(s->tmp2_i32, cpu_env, - offsetof(CPUX86State,xmm_regs[reg] - .ZMM_L((val >> 4) & 3))); - if ((val >> 0) & 1) - tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), - cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(0))); - if ((val >> 1) & 1) - tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), - cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(1))); - if ((val >> 2) & 1) - tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), - cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(2))); - if ((val >> 3) & 1) - tcg_gen_st_i32(tcg_const_i32(0 /*float32_zero*/), - cpu_env, offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(3))); - break; - case 0x22: - if (ot == MO_32) { /* pinsrd */ - if (mod == 3) { - tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[rm]); - } else { - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - } - tcg_gen_st_i32(s->tmp2_i32, cpu_env, - offsetof(CPUX86State, - xmm_regs[reg].ZMM_L(val & 3))); - } else { /* pinsrq */ -#ifdef TARGET_X86_64 - if (mod == 3) { - gen_op_mov_v_reg(s, ot, s->tmp1_i64, rm); - } else { - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); - } - tcg_gen_st_i64(s->tmp1_i64, cpu_env, - offsetof(CPUX86State, - xmm_regs[reg].ZMM_Q(val & 1))); -#else - goto illegal_op; -#endif - } - break; - } - return; - } - - if (b1) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - if (mod == 3) { - op2_offset = offsetof(CPUX86State,xmm_regs[rm | REX_B(s)]); - } else { - op2_offset = offsetof(CPUX86State,xmm_t0); - gen_lea_modrm(env, s, modrm); - gen_ldo_env_A0(s, op2_offset); - } - } else { - op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); - if (mod == 3) { - op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); - } else { - op2_offset = offsetof(CPUX86State,mmx_t0); - gen_lea_modrm(env, s, modrm); - gen_ldq_env_A0(s, op2_offset); - } - } - val = x86_ldub_code(env, s); - - if ((b & 0xfc) == 0x60) { /* pcmpXstrX */ - set_cc_op(s, CC_OP_EFLAGS); - - if (s->dflag == MO_64) { - /* The helper must use entire 64-bit gp registers */ - val |= 1 << 8; - } - } +static void gen_cmpxchg16b(DisasContext *s, CPUX86State *env, int modrm) +{ + MemOp mop = MO_TE | MO_128 | MO_ALIGN; + TCGv_i64 t0, t1; + TCGv_i128 cmp, val; - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_eppi(cpu_env, s->ptr0, s->ptr1, tcg_const_i32(val)); - break; + gen_lea_modrm(env, s, modrm); - case 0x33a: - /* Various integer extensions at 0f 3a f[0-f]. */ - b = modrm | (b1 << 8); - modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + cmp = tcg_temp_new_i128(); + val = tcg_temp_new_i128(); + tcg_gen_concat_i64_i128(cmp, cpu_regs[R_EAX], cpu_regs[R_EDX]); + tcg_gen_concat_i64_i128(val, cpu_regs[R_EBX], cpu_regs[R_ECX]); - switch (b) { - case 0x3f0: /* rorx Gy,Ey, Ib */ - if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_BMI2) - || !(s->prefix & PREFIX_VEX) - || s->vex_l != 0) { - goto illegal_op; - } - ot = mo_64_32(s->dflag); - gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); - b = x86_ldub_code(env, s); - if (ot == MO_64) { - tcg_gen_rotri_tl(s->T0, s->T0, b & 63); - } else { - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - tcg_gen_rotri_i32(s->tmp2_i32, s->tmp2_i32, b & 31); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - } - gen_op_mov_reg_v(s, ot, reg, s->T0); - break; - - default: - goto unknown_op; - } - break; - - default: - unknown_op: - gen_unknown_opcode(env, s); - return; - } + /* Only require atomic with LOCK; non-parallel handled in generator. */ + if (s->prefix & PREFIX_LOCK) { + tcg_gen_atomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); } else { - /* generic MMX or SSE operation */ - switch(b) { - case 0x70: /* pshufx insn */ - case 0xc6: /* pshufx insn */ - case 0xc2: /* compare insns */ - s->rip_offset = 1; - break; - default: - break; - } - if (is_xmm) { - op1_offset = offsetof(CPUX86State,xmm_regs[reg]); - if (mod != 3) { - int sz = 4; + tcg_gen_nonatomic_cmpxchg_i128(val, s->A0, cmp, val, s->mem_index, mop); + } - gen_lea_modrm(env, s, modrm); - op2_offset = offsetof(CPUX86State,xmm_t0); - - switch (b) { - case 0x50 ... 0x5a: - case 0x5c ... 0x5f: - case 0xc2: - /* Most sse scalar operations. */ - if (b1 == 2) { - sz = 2; - } else if (b1 == 3) { - sz = 3; - } - break; + tcg_gen_extr_i128_i64(s->T0, s->T1, val); - case 0x2e: /* ucomis[sd] */ - case 0x2f: /* comis[sd] */ - if (b1 == 0) { - sz = 2; - } else { - sz = 3; - } - break; - } + /* Determine success after the fact. */ + t0 = tcg_temp_new_i64(); + t1 = tcg_temp_new_i64(); + tcg_gen_xor_i64(t0, s->T0, cpu_regs[R_EAX]); + tcg_gen_xor_i64(t1, s->T1, cpu_regs[R_EDX]); + tcg_gen_or_i64(t0, t0, t1); - switch (sz) { - case 2: - /* 32 bit access */ - gen_op_ld_v(s, MO_32, s->T0, s->A0); - tcg_gen_st32_tl(s->T0, cpu_env, - offsetof(CPUX86State,xmm_t0.ZMM_L(0))); - break; - case 3: - /* 64 bit access */ - gen_ldq_env_A0(s, offsetof(CPUX86State, xmm_t0.ZMM_D(0))); - break; - default: - /* 128 bit access */ - gen_ldo_env_A0(s, op2_offset); - break; - } - } else { - rm = (modrm & 7) | REX_B(s); - op2_offset = offsetof(CPUX86State,xmm_regs[rm]); - } - } else { - op1_offset = offsetof(CPUX86State,fpregs[reg].mmx); - if (mod != 3) { - gen_lea_modrm(env, s, modrm); - op2_offset = offsetof(CPUX86State,mmx_t0); - gen_ldq_env_A0(s, op2_offset); - } else { - rm = (modrm & 7); - op2_offset = offsetof(CPUX86State,fpregs[rm].mmx); - } - } - switch(b) { - case 0x0f: /* 3DNow! data insns */ - val = x86_ldub_code(env, s); - sse_fn_epp = sse_op_table5[val]; - if (!sse_fn_epp) { - goto unknown_op; - } - if (!(s->cpuid_ext2_features & CPUID_EXT2_3DNOW)) { - goto illegal_op; - } - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - case 0x70: /* pshufx insn */ - case 0xc6: /* pshufx insn */ - val = x86_ldub_code(env, s); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - /* XXX: introduce a new table? */ - sse_fn_ppi = (SSEFunc_0_ppi)sse_fn_epp; - sse_fn_ppi(s->ptr0, s->ptr1, tcg_const_i32(val)); - break; - case 0xc2: - /* compare insns */ - val = x86_ldub_code(env, s); - if (val >= 8) - goto unknown_op; - sse_fn_epp = sse_op_table4[val][b1]; - - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - case 0xf7: - /* maskmov : we must prepare A0 */ - if (mod != 3) - goto illegal_op; - tcg_gen_mov_tl(s->A0, cpu_regs[R_EDI]); - gen_extu(s->aflag, s->A0); - gen_add_A0_ds_seg(s); + /* Update Z. */ + gen_compute_eflags(s); + tcg_gen_setcondi_i64(TCG_COND_EQ, t0, t0, 0); + tcg_gen_deposit_tl(cpu_cc_src, cpu_cc_src, t0, ctz32(CC_Z), 1); - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - /* XXX: introduce a new table? */ - sse_fn_eppt = (SSEFunc_0_eppt)sse_fn_epp; - sse_fn_eppt(cpu_env, s->ptr0, s->ptr1, s->A0); - break; - default: - tcg_gen_addi_ptr(s->ptr0, cpu_env, op1_offset); - tcg_gen_addi_ptr(s->ptr1, cpu_env, op2_offset); - sse_fn_epp(cpu_env, s->ptr0, s->ptr1); - break; - } - if (b == 0x2e || b == 0x2f) { - set_cc_op(s, CC_OP_EFLAGS); - } - } + /* + * Extract the result values for the register pair. We may do this + * unconditionally, because on success (Z=1), the old value matches + * the previous value in RDX:RAX. + */ + tcg_gen_mov_i64(cpu_regs[R_EAX], s->T0); + tcg_gen_mov_i64(cpu_regs[R_EDX], s->T1); } +#endif /* convert one instruction. s->base.is_jmp is set if the translation must be stopped. Return the next pc value */ -static target_ulong disas_insn(DisasContext *s, CPUState *cpu) +static bool disas_insn(DisasContext *s, CPUState *cpu) { - CPUX86State *env = cpu->env_ptr; + CPUX86State *env = cpu_env(cpu); int b, prefixes; int shift; - TCGMemOp ot, aflag, dflag; + MemOp ot, aflag, dflag; int modrm, reg, rm, mod, op, opreg, val; - target_ulong next_eip, tval; - int rex_w, rex_r; - target_ulong pc_start = s->base.pc_next; + bool orig_cc_op_dirty = s->cc_op_dirty; + CCOp orig_cc_op = s->cc_op; + target_ulong orig_pc_save = s->pc_save; - s->pc_start = s->pc = pc_start; + s->pc = s->base.pc_next; s->override = -1; #ifdef TARGET_X86_64 + s->rex_r = 0; s->rex_x = 0; s->rex_b = 0; - s->x86_64_hregs = false; #endif s->rip_offset = 0; /* for relative ip address */ s->vex_l = 0; s->vex_v = 0; - if (sigsetjmp(s->jmpbuf, 0) != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - return s->pc; + s->vex_w = false; + switch (sigsetjmp(s->jmpbuf, 0)) { + case 0: + break; + case 1: + gen_exception_gpf(s); + return true; + case 2: + /* Restore state that may affect the next instruction. */ + s->pc = s->base.pc_next; + /* + * TODO: These save/restore can be removed after the table-based + * decoder is complete; we will be decoding the insn completely + * before any code generation that might affect these variables. + */ + s->cc_op_dirty = orig_cc_op_dirty; + s->cc_op = orig_cc_op; + s->pc_save = orig_pc_save; + /* END TODO */ + s->base.num_insns--; + tcg_remove_ops_after(s->prev_insn_end); + s->base.insn_start = s->prev_insn_start; + s->base.is_jmp = DISAS_TOO_MANY; + return false; + default: + g_assert_not_reached(); } prefixes = 0; - rex_w = -1; - rex_r = 0; next_byte: + s->prefix = prefixes; b = x86_ldub_code(env, s); /* Collect prefixes. */ switch (b) { + default: + break; + case 0x0f: + b = x86_ldub_code(env, s) + 0x100; + break; case 0xf3: prefixes |= PREFIX_REPZ; + prefixes &= ~PREFIX_REPNZ; goto next_byte; case 0xf2: prefixes |= PREFIX_REPNZ; + prefixes &= ~PREFIX_REPZ; goto next_byte; case 0xf0: prefixes |= PREFIX_LOCK; @@ -4544,73 +3182,28 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x40 ... 0x4f: if (CODE64(s)) { /* REX prefix */ - rex_w = (b >> 3) & 1; - rex_r = (b & 0x4) << 1; + prefixes |= PREFIX_REX; + s->vex_w = (b >> 3) & 1; + s->rex_r = (b & 0x4) << 1; s->rex_x = (b & 0x2) << 2; - REX_B(s) = (b & 0x1) << 3; - /* select uniform byte register addressing */ - s->x86_64_hregs = true; + s->rex_b = (b & 0x1) << 3; goto next_byte; } break; #endif case 0xc5: /* 2-byte VEX */ case 0xc4: /* 3-byte VEX */ - /* VEX prefixes cannot be used except in 32-bit mode. - Otherwise the instruction is LES or LDS. */ - if (s->code32 && !s->vm86) { - static const int pp_prefix[4] = { - 0, PREFIX_DATA, PREFIX_REPZ, PREFIX_REPNZ - }; - int vex3, vex2 = x86_ldub_code(env, s); + if (CODE32(s) && !VM86(s)) { + int vex2 = x86_ldub_code(env, s); + s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ if (!CODE64(s) && (vex2 & 0xc0) != 0xc0) { /* 4.1.4.6: In 32-bit mode, bits [7:6] must be 11b, otherwise the instruction is LES or LDS. */ - s->pc--; /* rewind the advance_pc() x86_ldub_code() did */ break; } - - /* 4.1.1-4.1.3: No preceding lock, 66, f2, f3, or rex prefixes. */ - if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ - | PREFIX_LOCK | PREFIX_DATA)) { - goto illegal_op; - } -#ifdef TARGET_X86_64 - if (s->x86_64_hregs) { - goto illegal_op; - } -#endif - rex_r = (~vex2 >> 4) & 8; - if (b == 0xc5) { - /* 2-byte VEX prefix: RVVVVlpp, implied 0f leading opcode byte */ - vex3 = vex2; - b = x86_ldub_code(env, s) | 0x100; - } else { - /* 3-byte VEX prefix: RXBmmmmm wVVVVlpp */ -#ifdef TARGET_X86_64 - s->rex_x = (~vex2 >> 3) & 8; - s->rex_b = (~vex2 >> 2) & 8; -#endif - vex3 = x86_ldub_code(env, s); - rex_w = (vex3 >> 7) & 1; - switch (vex2 & 0x1f) { - case 0x01: /* Implied 0f leading opcode bytes. */ - b = x86_ldub_code(env, s) | 0x100; - break; - case 0x02: /* Implied 0f 38 leading opcode bytes. */ - b = 0x138; - break; - case 0x03: /* Implied 0f 3a leading opcode bytes. */ - b = 0x13a; - break; - default: /* Reserved for future use. */ - goto unknown_op; - } - } - s->vex_v = (~vex3 >> 3) & 0xf; - s->vex_l = (vex3 >> 2) & 1; - prefixes |= pp_prefix[vex3 & 3] | PREFIX_VEX; + disas_insn_new(s, cpu, b); + return s->pc; } break; } @@ -4620,18 +3213,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* In 64-bit mode, the default data size is 32-bit. Select 64-bit data with rex_w, and 16-bit data with 0x66; rex_w takes precedence over 0x66 if both are present. */ - dflag = (rex_w > 0 ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32); + dflag = (REX_W(s) ? MO_64 : prefixes & PREFIX_DATA ? MO_16 : MO_32); /* In 64-bit mode, 0x67 selects 32-bit addressing. */ aflag = (prefixes & PREFIX_ADR ? MO_32 : MO_64); } else { /* In 16/32-bit mode, 0x66 selects the opposite data size. */ - if (s->code32 ^ ((prefixes & PREFIX_DATA) != 0)) { + if (CODE32(s) ^ ((prefixes & PREFIX_DATA) != 0)) { dflag = MO_32; } else { dflag = MO_16; } /* In 16/32-bit mode, 0x67 selects the opposite addressing. */ - if (s->code32 ^ ((prefixes & PREFIX_ADR) != 0)) { + if (CODE32(s) ^ ((prefixes & PREFIX_ADR) != 0)) { aflag = MO_32; } else { aflag = MO_16; @@ -4643,14 +3236,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) s->dflag = dflag; /* now check op code */ - reswitch: - switch(b) { - case 0x0f: - /**************************/ - /* extended op code */ - b = x86_ldub_code(env, s) | 0x100; - goto reswitch; - + switch (b) { /**************************/ /* arith & logic */ case 0x00 ... 0x05: @@ -4662,7 +3248,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x30 ... 0x35: case 0x38 ... 0x3d: { - int op, f, val; + int f; op = (b >> 3) & 7; f = (b >> 1) & 3; @@ -4671,7 +3257,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) switch(f) { case 0: /* OP Ev, Gv */ modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); if (mod != 3) { @@ -4693,7 +3279,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 1: /* OP Gv, Ev */ modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); rm = (modrm & 7) | REX_B(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); @@ -4722,8 +3308,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x81: case 0x83: { - int val; - ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); @@ -4822,13 +3406,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod == 3) { goto illegal_op; } - a0 = tcg_temp_local_new(); - t0 = tcg_temp_local_new(); + a0 = s->A0; + t0 = s->T0; label1 = gen_new_label(); - tcg_gen_mov_tl(a0, s->A0); - tcg_gen_mov_tl(t0, s->T0); - gen_set_label(label1); t1 = tcg_temp_new(); t2 = tcg_temp_new(); @@ -4836,13 +3417,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_neg_tl(t1, t0); tcg_gen_atomic_cmpxchg_tl(t0, a0, t0, t1, s->mem_index, ot | MO_LE); - tcg_temp_free(t1); tcg_gen_brcond_tl(TCG_COND_NE, t0, t2, label1); - tcg_temp_free(t2); - tcg_temp_free(a0); - tcg_gen_mov_tl(s->T0, t0); - tcg_temp_free(t0); + tcg_gen_neg_tl(s->T0, t0); } else { tcg_gen_neg_tl(s->T0, s->T0); if (mod != 3) { @@ -4960,18 +3537,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 6: /* div */ switch(ot) { case MO_8: - gen_helper_divb_AL(cpu_env, s->T0); + gen_helper_divb_AL(tcg_env, s->T0); break; case MO_16: - gen_helper_divw_AX(cpu_env, s->T0); + gen_helper_divw_AX(tcg_env, s->T0); break; default: case MO_32: - gen_helper_divl_EAX(cpu_env, s->T0); + gen_helper_divl_EAX(tcg_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_divq_EAX(cpu_env, s->T0); + gen_helper_divq_EAX(tcg_env, s->T0); break; #endif } @@ -4979,18 +3556,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 7: /* idiv */ switch(ot) { case MO_8: - gen_helper_idivb_AL(cpu_env, s->T0); + gen_helper_idivb_AL(tcg_env, s->T0); break; case MO_16: - gen_helper_idivw_AX(cpu_env, s->T0); + gen_helper_idivw_AX(tcg_env, s->T0); break; default: case MO_32: - gen_helper_idivl_EAX(cpu_env, s->T0); + gen_helper_idivl_EAX(tcg_env, s->T0); break; #ifdef TARGET_X86_64 case MO_64: - gen_helper_idivq_EAX(cpu_env, s->T0); + gen_helper_idivq_EAX(tcg_env, s->T0); break; #endif } @@ -5016,7 +3593,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* operand size for jumps is 64 bit */ ot = MO_64; } else if (op == 3 || op == 5) { - ot = dflag != MO_16 ? MO_32 + (rex_w == 1) : MO_16; + ot = dflag != MO_16 ? MO_32 + REX_W(s) : MO_16; } else if (op == 6) { /* default push size is 64 bit */ ot = mo_pushpop(s, dflag); @@ -5050,55 +3627,58 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (dflag == MO_16) { tcg_gen_ext16u_tl(s->T0, s->T0); } - next_eip = s->pc - s->cs_base; - tcg_gen_movi_tl(s->T1, next_eip); - gen_push_v(s, s->T1); - gen_op_jmp_v(s->T0); + gen_push_v(s, eip_next_tl(s)); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 3: /* lcall Ev */ + if (mod == 3) { + goto illegal_op; + } gen_op_ld_v(s, ot, s->T1, s->A0); gen_add_A0_im(s, 1 << ot); gen_op_ld_v(s, MO_16, s->T0, s->A0); do_lcall: - if (s->pe && !s->vm86) { + if (PE(s) && !VM86(s)) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_lcall_protected(cpu_env, s->tmp2_i32, s->T1, - tcg_const_i32(dflag - 1), - tcg_const_tl(s->pc - s->cs_base)); + gen_helper_lcall_protected(tcg_env, s->tmp2_i32, s->T1, + tcg_constant_i32(dflag - 1), + eip_next_tl(s)); } else { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_lcall_real(cpu_env, s->tmp2_i32, s->T1, - tcg_const_i32(dflag - 1), - tcg_const_i32(s->pc - s->cs_base)); + tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); + gen_helper_lcall_real(tcg_env, s->tmp2_i32, s->tmp3_i32, + tcg_constant_i32(dflag - 1), + eip_next_i32(s)); } - tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, s->tmp4); + s->base.is_jmp = DISAS_JUMP; break; case 4: /* jmp Ev */ if (dflag == MO_16) { tcg_gen_ext16u_tl(s->T0, s->T0); } - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 5: /* ljmp Ev */ + if (mod == 3) { + goto illegal_op; + } gen_op_ld_v(s, ot, s->T1, s->A0); gen_add_A0_im(s, 1 << ot); gen_op_ld_v(s, MO_16, s->T0, s->A0); do_ljmp: - if (s->pe && !s->vm86) { + if (PE(s) && !VM86(s)) { tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ljmp_protected(cpu_env, s->tmp2_i32, s->T1, - tcg_const_tl(s->pc - s->cs_base)); + gen_helper_ljmp_protected(tcg_env, s->tmp2_i32, s->T1, + eip_next_tl(s)); } else { gen_op_movl_seg_T0_vm(s, R_CS); - gen_op_jmp_v(s->T1); + gen_op_jmp_v(s, s->T1); } - tcg_gen_ld_tl(s->tmp4, cpu_env, offsetof(CPUX86State, eip)); - gen_jr(s, s->tmp4); + s->base.is_jmp = DISAS_JUMP; break; case 6: /* push Ev */ gen_push_v(s, s->T0); @@ -5113,7 +3693,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_op_mov_v_reg(s, ot, s->T1, reg); @@ -5152,7 +3732,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); break; default: - tcg_abort(); + g_assert_not_reached(); } break; case 0x99: /* CDQ/CWD */ @@ -5177,7 +3757,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_mov_reg_v(s, MO_16, R_EDX, s->T0); break; default: - tcg_abort(); + g_assert_not_reached(); } break; case 0x1af: /* imul Gv, Ev */ @@ -5185,7 +3765,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x6b: ot = dflag; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (b == 0x69) s->rip_offset = insn_const_size(ot); else if (b == 0x6b) @@ -5237,7 +3817,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1c1: /* xadd Ev, Gv */ ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; gen_op_mov_v_reg(s, ot, s->T0, reg); if (mod == 3) { @@ -5265,18 +3845,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1b0: case 0x1b1: /* cmpxchg Ev, Gv */ { - TCGv oldv, newv, cmpv; + TCGv oldv, newv, cmpv, dest; ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; oldv = tcg_temp_new(); newv = tcg_temp_new(); cmpv = tcg_temp_new(); gen_op_mov_v_reg(s, ot, newv, reg); tcg_gen_mov_tl(cmpv, cpu_regs[R_EAX]); - + gen_extu(ot, cmpv); if (s->prefix & PREFIX_LOCK) { if (mod == 3) { goto illegal_op; @@ -5284,69 +3864,109 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); tcg_gen_atomic_cmpxchg_tl(oldv, s->A0, cmpv, newv, s->mem_index, ot | MO_LE); - gen_op_mov_reg_v(s, ot, R_EAX, oldv); } else { if (mod == 3) { rm = (modrm & 7) | REX_B(s); gen_op_mov_v_reg(s, ot, oldv, rm); + gen_extu(ot, oldv); + + /* + * Unlike the memory case, where "the destination operand receives + * a write cycle without regard to the result of the comparison", + * rm must not be touched altogether if the write fails, including + * not zero-extending it on 64-bit processors. So, precompute + * the result of a successful writeback and perform the movcond + * directly on cpu_regs. Also need to write accumulator first, in + * case rm is part of RAX too. + */ + dest = gen_op_deposit_reg_v(s, ot, rm, newv, newv); + tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, newv, dest); } else { gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, ot, oldv, s->A0); - rm = 0; /* avoid warning */ - } - gen_extu(ot, oldv); - gen_extu(ot, cmpv); - /* store value = (old == cmp ? new : old); */ - tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); - if (mod == 3) { - gen_op_mov_reg_v(s, ot, R_EAX, oldv); - gen_op_mov_reg_v(s, ot, rm, newv); - } else { - /* Perform an unconditional store cycle like physical cpu; - must be before changing accumulator to ensure - idempotency if the store faults and the instruction - is restarted */ + + /* + * Perform an unconditional store cycle like physical cpu; + * must be before changing accumulator to ensure + * idempotency if the store faults and the instruction + * is restarted + */ + tcg_gen_movcond_tl(TCG_COND_EQ, newv, oldv, cmpv, newv, oldv); gen_op_st_v(s, ot, newv, s->A0); - gen_op_mov_reg_v(s, ot, R_EAX, oldv); } } + /* + * Write EAX only if the cmpxchg fails; reuse newv as the destination, + * since it's dead here. + */ + dest = gen_op_deposit_reg_v(s, ot, R_EAX, newv, oldv); + tcg_gen_movcond_tl(TCG_COND_EQ, dest, oldv, cmpv, dest, newv); tcg_gen_mov_tl(cpu_cc_src, oldv); tcg_gen_mov_tl(s->cc_srcT, cmpv); tcg_gen_sub_tl(cpu_cc_dst, cmpv, oldv); set_cc_op(s, CC_OP_SUBB + ot); - tcg_temp_free(oldv); - tcg_temp_free(newv); - tcg_temp_free(cmpv); } break; case 0x1c7: /* cmpxchg8b */ modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; - if ((mod == 3) || ((modrm & 0x38) != 0x8)) - goto illegal_op; + switch ((modrm >> 3) & 7) { + case 1: /* CMPXCHG8, CMPXCHG16 */ + if (mod == 3) { + goto illegal_op; + } #ifdef TARGET_X86_64 - if (dflag == MO_64) { - if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) + if (dflag == MO_64) { + if (!(s->cpuid_ext_features & CPUID_EXT_CX16)) { + goto illegal_op; + } + gen_cmpxchg16b(s, env, modrm); + break; + } +#endif + if (!(s->cpuid_features & CPUID_CX8)) { goto illegal_op; - gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg16b(cpu_env, s->A0); - } else { - gen_helper_cmpxchg16b_unlocked(cpu_env, s->A0); } - } else -#endif - { - if (!(s->cpuid_features & CPUID_CX8)) + gen_cmpxchg8b(s, env, modrm); + break; + + case 7: /* RDSEED, RDPID with f3 prefix */ + if (mod != 3 || + (s->prefix & (PREFIX_LOCK | PREFIX_REPNZ))) { goto illegal_op; - gen_lea_modrm(env, s, modrm); - if ((s->prefix & PREFIX_LOCK) && (tb_cflags(s->base.tb) & CF_PARALLEL)) { - gen_helper_cmpxchg8b(cpu_env, s->A0); + } + if (s->prefix & PREFIX_REPZ) { + if (!(s->cpuid_ext_features & CPUID_7_0_ECX_RDPID)) { + goto illegal_op; + } + gen_helper_rdpid(s->T0, tcg_env); + rm = (modrm & 7) | REX_B(s); + gen_op_mov_reg_v(s, dflag, rm, s->T0); + break; } else { - gen_helper_cmpxchg8b_unlocked(cpu_env, s->A0); + if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_RDSEED)) { + goto illegal_op; + } + goto do_rdrand; + } + + case 6: /* RDRAND */ + if (mod != 3 || + (s->prefix & (PREFIX_LOCK | PREFIX_REPZ | PREFIX_REPNZ)) || + !(s->cpuid_ext_features & CPUID_EXT_RDRAND)) { + goto illegal_op; } + do_rdrand: + translator_io_start(&s->base); + gen_helper_rdrand(s->T0, tcg_env); + rm = (modrm & 7) | REX_B(s); + gen_op_mov_reg_v(s, dflag, rm, s->T0); + set_cc_op(s, CC_OP_EFLAGS); + break; + + default: + goto illegal_op; } - set_cc_op(s, CC_OP_EFLAGS); break; /**************************/ @@ -5432,26 +4052,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); gen_movl_seg_T0(s, reg); gen_pop_update(s, ot); - /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - if (reg == R_SS) { - s->tf = 0; - gen_eob_inhibit_irq(s, true); - } else { - gen_eob(s); - } - } break; case 0x1a1: /* pop fs */ case 0x1a9: /* pop gs */ ot = gen_pop_T0(s); gen_movl_seg_T0(s, (b >> 3) & 7); gen_pop_update(s, ot); - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); - } break; /**************************/ @@ -5460,7 +4066,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x89: /* mov Gv, Ev */ ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); /* generate a generic store */ gen_ldst_modrm(env, s, modrm, ot, reg, 1); @@ -5486,7 +4092,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x8b: /* mov Ev, Gv */ ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_op_mov_reg_v(s, ot, reg, s->T0); @@ -5498,16 +4104,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_movl_seg_T0(s, reg); - /* Note that reg == R_SS in gen_movl_seg_T0 always sets is_jmp. */ - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - if (reg == R_SS) { - s->tf = 0; - gen_eob_inhibit_irq(s, true); - } else { - gen_eob(s); - } - } break; case 0x8c: /* mov Gv, seg */ modrm = x86_ldub_code(env, s); @@ -5525,8 +4121,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1be: /* movsbS Gv, Eb */ case 0x1bf: /* movswS Gv, Eb */ { - TCGMemOp d_ot; - TCGMemOp s_ot; + MemOp d_ot; + MemOp s_ot; /* d_ot is the size of destination */ d_ot = dflag; @@ -5536,7 +4132,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) s_ot = b & 8 ? MO_SIGN | ot : ot; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); @@ -5575,10 +4171,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); { AddressParts a = gen_lea_modrm_0(env, s, modrm); - TCGv ea = gen_lea_modrm_1(s, a); + TCGv ea = gen_lea_modrm_1(s, a, false); gen_lea_v_seg(s, s->aflag, ea, -1, -1); gen_op_mov_reg_v(s, dflag, reg, s->A0); } @@ -5592,16 +4188,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) target_ulong offset_addr; ot = mo_b_d(b, dflag); - switch (s->aflag) { -#ifdef TARGET_X86_64 - case MO_64: - offset_addr = x86_ldq_code(env, s); - break; -#endif - default: - offset_addr = insn_get(env, s, s->aflag); - break; - } + offset_addr = insn_get_addr(env, s, s->aflag); tcg_gen_movi_tl(s->A0, offset_addr); gen_add_A0_ds_seg(s); if ((b & 2) == 0) { @@ -5617,7 +4204,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_mov_tl(s->A0, cpu_regs[R_EBX]); tcg_gen_ext8u_tl(s->T0, cpu_regs[R_EAX]); tcg_gen_add_tl(s->A0, s->A0, s->T0); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); gen_op_ld_v(s, MO_8, s->T0, s->A0); gen_op_mov_reg_v(s, MO_8, R_EAX, s->T0); @@ -5657,7 +4243,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x87: /* xchg Ev, Gv */ ot = mo_b_d(b, dflag); modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; if (mod == 3) { rm = (modrm & 7) | REX_B(s); @@ -5694,7 +4280,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) do_lxx: ot = dflag != MO_16 ? MO_32 : MO_16; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; @@ -5706,10 +4292,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_movl_seg_T0(s, op); /* then put the data */ gen_op_mov_reg_v(s, ot, reg, s->T1); - if (s->base.is_jmp) { - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); - } break; /************************/ @@ -5777,7 +4359,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (mod != 3) { gen_lea_modrm(env, s, modrm); opreg = OR_TMP0; @@ -5787,9 +4369,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_mov_v_reg(s, ot, s->T1, reg); if (shift) { - TCGv imm = tcg_const_tl(x86_ldub_code(env, s)); + TCGv imm = tcg_constant_tl(x86_ldub_code(env, s)); gen_shiftd_rm_T1(s, ot, opreg, op, imm); - tcg_temp_free(imm); } else { gen_shiftd_rm_T1(s, ot, opreg, op, cpu_regs[R_ECX]); } @@ -5798,503 +4379,559 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /************************/ /* floats */ case 0xd8 ... 0xdf: - if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { - /* if CR0.EM or CR0.TS are set, generate an FPU exception */ - /* XXX: what to do if illegal op ? */ - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); - break; - } - modrm = x86_ldub_code(env, s); - mod = (modrm >> 6) & 3; - rm = modrm & 7; - op = ((b & 7) << 3) | ((modrm >> 3) & 7); - if (mod != 3) { - /* memory op */ - gen_lea_modrm(env, s, modrm); - switch(op) { - case 0x00 ... 0x07: /* fxxxs */ - case 0x10 ... 0x17: /* fixxxl */ - case 0x20 ... 0x27: /* fxxxl */ - case 0x30 ... 0x37: /* fixxx */ - { - int op1; - op1 = op & 7; - - switch(op >> 4) { + { + bool update_fip = true; + + if (s->flags & (HF_EM_MASK | HF_TS_MASK)) { + /* if CR0.EM or CR0.TS are set, generate an FPU exception */ + /* XXX: what to do if illegal op ? */ + gen_exception(s, EXCP07_PREX); + break; + } + modrm = x86_ldub_code(env, s); + mod = (modrm >> 6) & 3; + rm = modrm & 7; + op = ((b & 7) << 3) | ((modrm >> 3) & 7); + if (mod != 3) { + /* memory op */ + AddressParts a = gen_lea_modrm_0(env, s, modrm); + TCGv ea = gen_lea_modrm_1(s, a, false); + TCGv last_addr = tcg_temp_new(); + bool update_fdp = true; + + tcg_gen_mov_tl(last_addr, ea); + gen_lea_v_seg(s, s->aflag, ea, a.def_seg, s->override); + + switch (op) { + case 0x00 ... 0x07: /* fxxxs */ + case 0x10 ... 0x17: /* fixxxl */ + case 0x20 ... 0x27: /* fxxxl */ + case 0x30 ... 0x37: /* fixxx */ + { + int op1; + op1 = op & 7; + + switch (op >> 4) { + case 0: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_flds_FT0(tcg_env, s->tmp2_i32); + break; + case 1: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fldl_FT0(tcg_env, s->tmp1_i64); + break; + case 3: + default: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LESW); + gen_helper_fildl_FT0(tcg_env, s->tmp2_i32); + break; + } + + gen_helper_fp_arith_ST0_FT0(op1); + if (op1 == 3) { + /* fcomp needs pop */ + gen_helper_fpop(tcg_env); + } + } + break; + case 0x08: /* flds */ + case 0x0a: /* fsts */ + case 0x0b: /* fstps */ + case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ + case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ + case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ + switch (op & 7) { case 0: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_flds_FT0(cpu_env, s->tmp2_i32); + switch (op >> 4) { + case 0: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_flds_ST0(tcg_env, s->tmp2_i32); + break; + case 1: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); + break; + case 2: + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fldl_ST0(tcg_env, s->tmp1_i64); + break; + case 3: + default: + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LESW); + gen_helper_fildl_ST0(tcg_env, s->tmp2_i32); + break; + } break; case 1: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); - break; - case 2: - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); - gen_helper_fldl_FT0(cpu_env, s->tmp1_i64); + /* XXX: the corresponding CPUID bit must be tested ! */ + switch (op >> 4) { + case 1: + gen_helper_fisttl_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 2: + gen_helper_fisttll_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + break; + case 3: + default: + gen_helper_fistt_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + break; + } + gen_helper_fpop(tcg_env); break; - case 3: default: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LESW); - gen_helper_fildl_FT0(cpu_env, s->tmp2_i32); + switch (op >> 4) { + case 0: + gen_helper_fsts_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 1: + gen_helper_fistl_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUL); + break; + case 2: + gen_helper_fstl_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + break; + case 3: + default: + gen_helper_fist_ST0(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + break; + } + if ((op & 7) == 3) { + gen_helper_fpop(tcg_env); + } break; } + break; + case 0x0c: /* fldenv mem */ + gen_helper_fldenv(tcg_env, s->A0, + tcg_constant_i32(dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x0d: /* fldcw mem */ + tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + gen_helper_fldcw(tcg_env, s->tmp2_i32); + update_fip = update_fdp = false; + break; + case 0x0e: /* fnstenv mem */ + gen_helper_fstenv(tcg_env, s->A0, + tcg_constant_i32(dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x0f: /* fnstcw mem */ + gen_helper_fnstcw(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + update_fip = update_fdp = false; + break; + case 0x1d: /* fldt mem */ + gen_helper_fldt_ST0(tcg_env, s->A0); + break; + case 0x1f: /* fstpt mem */ + gen_helper_fstt_ST0(tcg_env, s->A0); + gen_helper_fpop(tcg_env); + break; + case 0x2c: /* frstor mem */ + gen_helper_frstor(tcg_env, s->A0, + tcg_constant_i32(dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x2e: /* fnsave mem */ + gen_helper_fsave(tcg_env, s->A0, + tcg_constant_i32(dflag - 1)); + update_fip = update_fdp = false; + break; + case 0x2f: /* fnstsw mem */ + gen_helper_fnstsw(s->tmp2_i32, tcg_env); + tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, + s->mem_index, MO_LEUW); + update_fip = update_fdp = false; + break; + case 0x3c: /* fbld */ + gen_helper_fbld_ST0(tcg_env, s->A0); + break; + case 0x3e: /* fbstp */ + gen_helper_fbst_ST0(tcg_env, s->A0); + gen_helper_fpop(tcg_env); + break; + case 0x3d: /* fildll */ + tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fildll_ST0(tcg_env, s->tmp1_i64); + break; + case 0x3f: /* fistpll */ + gen_helper_fistll_ST0(s->tmp1_i64, tcg_env); + tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, + s->mem_index, MO_LEUQ); + gen_helper_fpop(tcg_env); + break; + default: + goto unknown_op; + } - gen_helper_fp_arith_ST0_FT0(op1); - if (op1 == 3) { - /* fcomp needs pop */ - gen_helper_fpop(cpu_env); - } + if (update_fdp) { + int last_seg = s->override >= 0 ? s->override : a.def_seg; + + tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, + segs[last_seg].selector)); + tcg_gen_st16_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, fpds)); + tcg_gen_st_tl(last_addr, tcg_env, + offsetof(CPUX86State, fpdp)); } - break; - case 0x08: /* flds */ - case 0x0a: /* fsts */ - case 0x0b: /* fstps */ - case 0x18 ... 0x1b: /* fildl, fisttpl, fistl, fistpl */ - case 0x28 ... 0x2b: /* fldl, fisttpll, fstl, fstpl */ - case 0x38 ... 0x3b: /* filds, fisttps, fists, fistps */ - switch(op & 7) { - case 0: - switch(op >> 4) { - case 0: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_flds_ST0(cpu_env, s->tmp2_i32); + } else { + /* register float ops */ + opreg = rm; + + switch (op) { + case 0x08: /* fld sti */ + gen_helper_fpush(tcg_env); + gen_helper_fmov_ST0_STN(tcg_env, + tcg_constant_i32((opreg + 1) & 7)); + break; + case 0x09: /* fxchg sti */ + case 0x29: /* fxchg4 sti, undocumented op */ + case 0x39: /* fxchg7 sti, undocumented op */ + gen_helper_fxchg_ST0_STN(tcg_env, tcg_constant_i32(opreg)); + break; + case 0x0a: /* grp d9/2 */ + switch (rm) { + case 0: /* fnop */ + /* + * check exceptions (FreeBSD FPU probe) + * needs to be treated as I/O because of ferr_irq + */ + translator_io_start(&s->base); + gen_helper_fwait(tcg_env); + update_fip = false; break; - case 1: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); - gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); + default: + goto unknown_op; + } + break; + case 0x0c: /* grp d9/4 */ + switch (rm) { + case 0: /* fchs */ + gen_helper_fchs_ST0(tcg_env); break; - case 2: - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); - gen_helper_fldl_ST0(cpu_env, s->tmp1_i64); + case 1: /* fabs */ + gen_helper_fabs_ST0(tcg_env); break; - case 3: - default: - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LESW); - gen_helper_fildl_ST0(cpu_env, s->tmp2_i32); + case 4: /* ftst */ + gen_helper_fldz_FT0(tcg_env); + gen_helper_fcom_ST0_FT0(tcg_env); + break; + case 5: /* fxam */ + gen_helper_fxam_ST0(tcg_env); break; + default: + goto unknown_op; } break; - case 1: - /* XXX: the corresponding CPUID bit must be tested ! */ - switch(op >> 4) { - case 1: - gen_helper_fisttl_ST0(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); + case 0x0d: /* grp d9/5 */ + { + switch (rm) { + case 0: + gen_helper_fpush(tcg_env); + gen_helper_fld1_ST0(tcg_env); + break; + case 1: + gen_helper_fpush(tcg_env); + gen_helper_fldl2t_ST0(tcg_env); + break; + case 2: + gen_helper_fpush(tcg_env); + gen_helper_fldl2e_ST0(tcg_env); + break; + case 3: + gen_helper_fpush(tcg_env); + gen_helper_fldpi_ST0(tcg_env); + break; + case 4: + gen_helper_fpush(tcg_env); + gen_helper_fldlg2_ST0(tcg_env); + break; + case 5: + gen_helper_fpush(tcg_env); + gen_helper_fldln2_ST0(tcg_env); + break; + case 6: + gen_helper_fpush(tcg_env); + gen_helper_fldz_ST0(tcg_env); + break; + default: + goto unknown_op; + } + } + break; + case 0x0e: /* grp d9/6 */ + switch (rm) { + case 0: /* f2xm1 */ + gen_helper_f2xm1(tcg_env); + break; + case 1: /* fyl2x */ + gen_helper_fyl2x(tcg_env); break; - case 2: - gen_helper_fisttll_ST0(s->tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); + case 2: /* fptan */ + gen_helper_fptan(tcg_env); + break; + case 3: /* fpatan */ + gen_helper_fpatan(tcg_env); + break; + case 4: /* fxtract */ + gen_helper_fxtract(tcg_env); + break; + case 5: /* fprem1 */ + gen_helper_fprem1(tcg_env); + break; + case 6: /* fdecstp */ + gen_helper_fdecstp(tcg_env); break; - case 3: default: - gen_helper_fistt_ST0(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); + case 7: /* fincstp */ + gen_helper_fincstp(tcg_env); break; } - gen_helper_fpop(cpu_env); break; - default: - switch(op >> 4) { - case 0: - gen_helper_fsts_ST0(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); + case 0x0f: /* grp d9/7 */ + switch (rm) { + case 0: /* fprem */ + gen_helper_fprem(tcg_env); break; - case 1: - gen_helper_fistl_ST0(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUL); + case 1: /* fyl2xp1 */ + gen_helper_fyl2xp1(tcg_env); + break; + case 2: /* fsqrt */ + gen_helper_fsqrt(tcg_env); + break; + case 3: /* fsincos */ + gen_helper_fsincos(tcg_env); break; - case 2: - gen_helper_fstl_ST0(s->tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, - s->mem_index, MO_LEQ); + case 5: /* fscale */ + gen_helper_fscale(tcg_env); + break; + case 4: /* frndint */ + gen_helper_frndint(tcg_env); + break; + case 6: /* fsin */ + gen_helper_fsin(tcg_env); break; - case 3: default: - gen_helper_fist_ST0(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); + case 7: /* fcos */ + gen_helper_fcos(tcg_env); break; } - if ((op & 7) == 3) - gen_helper_fpop(cpu_env); break; - } - break; - case 0x0c: /* fldenv mem */ - gen_helper_fldenv(cpu_env, s->A0, tcg_const_i32(dflag - 1)); - break; - case 0x0d: /* fldcw mem */ - tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - gen_helper_fldcw(cpu_env, s->tmp2_i32); - break; - case 0x0e: /* fnstenv mem */ - gen_helper_fstenv(cpu_env, s->A0, tcg_const_i32(dflag - 1)); - break; - case 0x0f: /* fnstcw mem */ - gen_helper_fnstcw(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - break; - case 0x1d: /* fldt mem */ - gen_helper_fldt_ST0(cpu_env, s->A0); - break; - case 0x1f: /* fstpt mem */ - gen_helper_fstt_ST0(cpu_env, s->A0); - gen_helper_fpop(cpu_env); - break; - case 0x2c: /* frstor mem */ - gen_helper_frstor(cpu_env, s->A0, tcg_const_i32(dflag - 1)); - break; - case 0x2e: /* fnsave mem */ - gen_helper_fsave(cpu_env, s->A0, tcg_const_i32(dflag - 1)); - break; - case 0x2f: /* fnstsw mem */ - gen_helper_fnstsw(s->tmp2_i32, cpu_env); - tcg_gen_qemu_st_i32(s->tmp2_i32, s->A0, - s->mem_index, MO_LEUW); - break; - case 0x3c: /* fbld */ - gen_helper_fbld_ST0(cpu_env, s->A0); - break; - case 0x3e: /* fbstp */ - gen_helper_fbst_ST0(cpu_env, s->A0); - gen_helper_fpop(cpu_env); - break; - case 0x3d: /* fildll */ - tcg_gen_qemu_ld_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fildll_ST0(cpu_env, s->tmp1_i64); - break; - case 0x3f: /* fistpll */ - gen_helper_fistll_ST0(s->tmp1_i64, cpu_env); - tcg_gen_qemu_st_i64(s->tmp1_i64, s->A0, s->mem_index, MO_LEQ); - gen_helper_fpop(cpu_env); - break; - default: - goto unknown_op; - } - } else { - /* register float ops */ - opreg = rm; - - switch(op) { - case 0x08: /* fld sti */ - gen_helper_fpush(cpu_env); - gen_helper_fmov_ST0_STN(cpu_env, - tcg_const_i32((opreg + 1) & 7)); - break; - case 0x09: /* fxchg sti */ - case 0x29: /* fxchg4 sti, undocumented op */ - case 0x39: /* fxchg7 sti, undocumented op */ - gen_helper_fxchg_ST0_STN(cpu_env, tcg_const_i32(opreg)); - break; - case 0x0a: /* grp d9/2 */ - switch(rm) { - case 0: /* fnop */ - /* check exceptions (FreeBSD FPU probe) */ - gen_helper_fwait(cpu_env); - break; - default: - goto unknown_op; - } - break; - case 0x0c: /* grp d9/4 */ - switch(rm) { - case 0: /* fchs */ - gen_helper_fchs_ST0(cpu_env); - break; - case 1: /* fabs */ - gen_helper_fabs_ST0(cpu_env); + case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ + case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ + case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ + { + int op1; + + op1 = op & 7; + if (op >= 0x20) { + gen_helper_fp_arith_STN_ST0(op1, opreg); + if (op >= 0x30) { + gen_helper_fpop(tcg_env); + } + } else { + gen_helper_fmov_FT0_STN(tcg_env, + tcg_constant_i32(opreg)); + gen_helper_fp_arith_ST0_FT0(op1); + } + } break; - case 4: /* ftst */ - gen_helper_fldz_FT0(cpu_env); - gen_helper_fcom_ST0_FT0(cpu_env); + case 0x02: /* fcom */ + case 0x22: /* fcom2, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcom_ST0_FT0(tcg_env); break; - case 5: /* fxam */ - gen_helper_fxam_ST0(cpu_env); + case 0x03: /* fcomp */ + case 0x23: /* fcomp3, undocumented op */ + case 0x32: /* fcomp5, undocumented op */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); break; - default: - goto unknown_op; - } - break; - case 0x0d: /* grp d9/5 */ - { - switch(rm) { - case 0: - gen_helper_fpush(cpu_env); - gen_helper_fld1_ST0(cpu_env); - break; - case 1: - gen_helper_fpush(cpu_env); - gen_helper_fldl2t_ST0(cpu_env); + case 0x15: /* da/5 */ + switch (rm) { + case 1: /* fucompp */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); + gen_helper_fucom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + gen_helper_fpop(tcg_env); break; - case 2: - gen_helper_fpush(cpu_env); - gen_helper_fldl2e_ST0(cpu_env); + default: + goto unknown_op; + } + break; + case 0x1c: + switch (rm) { + case 0: /* feni (287 only, just do nop here) */ break; - case 3: - gen_helper_fpush(cpu_env); - gen_helper_fldpi_ST0(cpu_env); + case 1: /* fdisi (287 only, just do nop here) */ break; - case 4: - gen_helper_fpush(cpu_env); - gen_helper_fldlg2_ST0(cpu_env); + case 2: /* fclex */ + gen_helper_fclex(tcg_env); + update_fip = false; break; - case 5: - gen_helper_fpush(cpu_env); - gen_helper_fldln2_ST0(cpu_env); + case 3: /* fninit */ + gen_helper_fninit(tcg_env); + update_fip = false; break; - case 6: - gen_helper_fpush(cpu_env); - gen_helper_fldz_ST0(cpu_env); + case 4: /* fsetpm (287 only, just do nop here) */ break; default: goto unknown_op; } - } - break; - case 0x0e: /* grp d9/6 */ - switch(rm) { - case 0: /* f2xm1 */ - gen_helper_f2xm1(cpu_env); - break; - case 1: /* fyl2x */ - gen_helper_fyl2x(cpu_env); - break; - case 2: /* fptan */ - gen_helper_fptan(cpu_env); - break; - case 3: /* fpatan */ - gen_helper_fpatan(cpu_env); - break; - case 4: /* fxtract */ - gen_helper_fxtract(cpu_env); - break; - case 5: /* fprem1 */ - gen_helper_fprem1(cpu_env); - break; - case 6: /* fdecstp */ - gen_helper_fdecstp(cpu_env); - break; - default: - case 7: /* fincstp */ - gen_helper_fincstp(cpu_env); - break; - } - break; - case 0x0f: /* grp d9/7 */ - switch(rm) { - case 0: /* fprem */ - gen_helper_fprem(cpu_env); break; - case 1: /* fyl2xp1 */ - gen_helper_fyl2xp1(cpu_env); + case 0x1d: /* fucomi */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucomi_ST0_FT0(tcg_env); + set_cc_op(s, CC_OP_EFLAGS); break; - case 2: /* fsqrt */ - gen_helper_fsqrt(cpu_env); + case 0x1e: /* fcomi */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcomi_ST0_FT0(tcg_env); + set_cc_op(s, CC_OP_EFLAGS); break; - case 3: /* fsincos */ - gen_helper_fsincos(cpu_env); + case 0x28: /* ffree sti */ + gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); break; - case 5: /* fscale */ - gen_helper_fscale(cpu_env); + case 0x2a: /* fst sti */ + gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); break; - case 4: /* frndint */ - gen_helper_frndint(cpu_env); + case 0x2b: /* fstp sti */ + case 0x0b: /* fstp1 sti, undocumented op */ + case 0x3a: /* fstp8 sti, undocumented op */ + case 0x3b: /* fstp9 sti, undocumented op */ + gen_helper_fmov_STN_ST0(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fpop(tcg_env); break; - case 6: /* fsin */ - gen_helper_fsin(cpu_env); + case 0x2c: /* fucom st(i) */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucom_ST0_FT0(tcg_env); break; - default: - case 7: /* fcos */ - gen_helper_fcos(cpu_env); + case 0x2d: /* fucomp st(i) */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); break; - } - break; - case 0x00: case 0x01: case 0x04 ... 0x07: /* fxxx st, sti */ - case 0x20: case 0x21: case 0x24 ... 0x27: /* fxxx sti, st */ - case 0x30: case 0x31: case 0x34 ... 0x37: /* fxxxp sti, st */ - { - int op1; - - op1 = op & 7; - if (op >= 0x20) { - gen_helper_fp_arith_STN_ST0(op1, opreg); - if (op >= 0x30) - gen_helper_fpop(cpu_env); - } else { - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fp_arith_ST0_FT0(op1); + case 0x33: /* de/3 */ + switch (rm) { + case 1: /* fcompp */ + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(1)); + gen_helper_fcom_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + gen_helper_fpop(tcg_env); + break; + default: + goto unknown_op; } - } - break; - case 0x02: /* fcom */ - case 0x22: /* fcom2, undocumented op */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcom_ST0_FT0(cpu_env); - break; - case 0x03: /* fcomp */ - case 0x23: /* fcomp3, undocumented op */ - case 0x32: /* fcomp5, undocumented op */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - break; - case 0x15: /* da/5 */ - switch(rm) { - case 1: /* fucompp */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1)); - gen_helper_fucom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - gen_helper_fpop(cpu_env); - break; - default: - goto unknown_op; - } - break; - case 0x1c: - switch(rm) { - case 0: /* feni (287 only, just do nop here) */ - break; - case 1: /* fdisi (287 only, just do nop here) */ break; - case 2: /* fclex */ - gen_helper_fclex(cpu_env); + case 0x38: /* ffreep sti, undocumented op */ + gen_helper_ffree_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fpop(tcg_env); break; - case 3: /* fninit */ - gen_helper_fninit(cpu_env); + case 0x3c: /* df/4 */ + switch (rm) { + case 0: + gen_helper_fnstsw(s->tmp2_i32, tcg_env); + tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); + gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + break; + default: + goto unknown_op; + } break; - case 4: /* fsetpm (287 only, just do nop here) */ + case 0x3d: /* fucomip */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fucomi_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + set_cc_op(s, CC_OP_EFLAGS); break; - default: - goto unknown_op; - } - break; - case 0x1d: /* fucomi */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fucomi_ST0_FT0(cpu_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x1e: /* fcomi */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcomi_ST0_FT0(cpu_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x28: /* ffree sti */ - gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); - break; - case 0x2a: /* fst sti */ - gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg)); - break; - case 0x2b: /* fstp sti */ - case 0x0b: /* fstp1 sti, undocumented op */ - case 0x3a: /* fstp8 sti, undocumented op */ - case 0x3b: /* fstp9 sti, undocumented op */ - gen_helper_fmov_STN_ST0(cpu_env, tcg_const_i32(opreg)); - gen_helper_fpop(cpu_env); - break; - case 0x2c: /* fucom st(i) */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fucom_ST0_FT0(cpu_env); - break; - case 0x2d: /* fucomp st(i) */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fucom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - break; - case 0x33: /* de/3 */ - switch(rm) { - case 1: /* fcompp */ - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(1)); - gen_helper_fcom_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - gen_helper_fpop(cpu_env); + case 0x3e: /* fcomip */ + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + gen_update_cc_op(s); + gen_helper_fmov_FT0_STN(tcg_env, tcg_constant_i32(opreg)); + gen_helper_fcomi_ST0_FT0(tcg_env); + gen_helper_fpop(tcg_env); + set_cc_op(s, CC_OP_EFLAGS); break; - default: - goto unknown_op; - } - break; - case 0x38: /* ffreep sti, undocumented op */ - gen_helper_ffree_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fpop(cpu_env); - break; - case 0x3c: /* df/4 */ - switch(rm) { - case 0: - gen_helper_fnstsw(s->tmp2_i32, cpu_env); - tcg_gen_extu_i32_tl(s->T0, s->tmp2_i32); - gen_op_mov_reg_v(s, MO_16, R_EAX, s->T0); + case 0x10 ... 0x13: /* fcmovxx */ + case 0x18 ... 0x1b: + { + int op1; + TCGLabel *l1; + static const uint8_t fcmov_cc[8] = { + (JCC_B << 1), + (JCC_Z << 1), + (JCC_BE << 1), + (JCC_P << 1), + }; + + if (!(s->cpuid_features & CPUID_CMOV)) { + goto illegal_op; + } + op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); + l1 = gen_new_label(); + gen_jcc1_noeob(s, op1, l1); + gen_helper_fmov_ST0_STN(tcg_env, + tcg_constant_i32(opreg)); + gen_set_label(l1); + } break; default: goto unknown_op; } - break; - case 0x3d: /* fucomip */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fucomi_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x3e: /* fcomip */ - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - gen_update_cc_op(s); - gen_helper_fmov_FT0_STN(cpu_env, tcg_const_i32(opreg)); - gen_helper_fcomi_ST0_FT0(cpu_env); - gen_helper_fpop(cpu_env); - set_cc_op(s, CC_OP_EFLAGS); - break; - case 0x10 ... 0x13: /* fcmovxx */ - case 0x18 ... 0x1b: - { - int op1; - TCGLabel *l1; - static const uint8_t fcmov_cc[8] = { - (JCC_B << 1), - (JCC_Z << 1), - (JCC_BE << 1), - (JCC_P << 1), - }; + } - if (!(s->cpuid_features & CPUID_CMOV)) { - goto illegal_op; - } - op1 = fcmov_cc[op & 3] | (((op >> 3) & 1) ^ 1); - l1 = gen_new_label(); - gen_jcc1_noeob(s, op1, l1); - gen_helper_fmov_ST0_STN(cpu_env, tcg_const_i32(opreg)); - gen_set_label(l1); - } - break; - default: - goto unknown_op; + if (update_fip) { + tcg_gen_ld_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, segs[R_CS].selector)); + tcg_gen_st16_i32(s->tmp2_i32, tcg_env, + offsetof(CPUX86State, fpcs)); + tcg_gen_st_tl(eip_cur_tl(s), + tcg_env, offsetof(CPUX86State, fpip)); } } break; @@ -6305,7 +4942,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xa5: ot = mo_b_d(b, dflag); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_movs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_movs(s, ot); } else { gen_movs(s, ot); } @@ -6314,8 +4951,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xaa: /* stosS */ case 0xab: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_stos(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_stos(s, ot); } else { gen_stos(s, ot); } @@ -6324,7 +4962,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xad: ot = mo_b_d(b, dflag); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_lods(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_lods(s, ot); } else { gen_lods(s, ot); } @@ -6332,10 +4970,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xae: /* scasS */ case 0xaf: ot = mo_b_d(b, dflag); + gen_op_mov_v_reg(s, MO_32, s->T0, R_EAX); if (prefixes & PREFIX_REPNZ) { - gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + gen_repz_scas(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { - gen_repz_scas(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + gen_repz_scas(s, ot, 0); } else { gen_scas(s, ot); } @@ -6345,9 +4984,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xa7: ot = mo_b_d(b, dflag); if (prefixes & PREFIX_REPNZ) { - gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 1); + gen_repz_cmps(s, ot, 1); } else if (prefixes & PREFIX_REPZ) { - gen_repz_cmps(s, ot, pc_start - s->cs_base, s->pc - s->cs_base, 0); + gen_repz_cmps(s, ot, 0); } else { gen_cmps(s, ot); } @@ -6355,31 +4994,32 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x6c: /* insS */ case 0x6d: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); - gen_check_io(s, ot, pc_start - s->cs_base, - SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes) | 4); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); + if (!gen_check_io(s, ot, s->tmp2_i32, + SVM_IOIO_TYPE_MASK | SVM_IOIO_STR_MASK)) { + break; + } + translator_io_start(&s->base); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_ins(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_ins(s, ot); } else { gen_ins(s, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } } break; case 0x6e: /* outsS */ case 0x6f: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); - gen_check_io(s, ot, pc_start - s->cs_base, - svm_is_rep(prefixes) | 4); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); + if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_STR_MASK)) { + break; + } + translator_io_start(&s->base); if (prefixes & (PREFIX_REPZ | PREFIX_REPNZ)) { - gen_repz_outs(s, ot, pc_start - s->cs_base, s->pc - s->cs_base); + gen_repz_outs(s, ot); } else { gen_outs(s, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_jmp(s, s->pc - s->cs_base); - } } break; @@ -6390,79 +5030,55 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xe5: ot = mo_b_d32(b, dflag); val = x86_ldub_code(env, s); - tcg_gen_movi_tl(s->T0, val); - gen_check_io(s, ot, pc_start - s->cs_base, - SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } tcg_gen_movi_i32(s->tmp2_i32, val); + if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) { + break; + } + translator_io_start(&s->base); gen_helper_in_func(ot, s->T1, s->tmp2_i32); gen_op_mov_reg_v(s, ot, R_EAX, s->T1); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xe6: case 0xe7: ot = mo_b_d32(b, dflag); val = x86_ldub_code(env, s); - tcg_gen_movi_tl(s->T0, val); - gen_check_io(s, ot, pc_start - s->cs_base, - svm_is_rep(prefixes)); - gen_op_mov_v_reg(s, ot, s->T1, R_EAX); - - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } tcg_gen_movi_i32(s->tmp2_i32, val); + if (!gen_check_io(s, ot, s->tmp2_i32, 0)) { + break; + } + translator_io_start(&s->base); + gen_op_mov_v_reg(s, ot, s->T1, R_EAX); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xec: case 0xed: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); - gen_check_io(s, ot, pc_start - s->cs_base, - SVM_IOIO_TYPE_MASK | svm_is_rep(prefixes)); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); + if (!gen_check_io(s, ot, s->tmp2_i32, SVM_IOIO_TYPE_MASK)) { + break; + } + translator_io_start(&s->base); gen_helper_in_func(ot, s->T1, s->tmp2_i32); gen_op_mov_reg_v(s, ot, R_EAX, s->T1); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } break; case 0xee: case 0xef: ot = mo_b_d32(b, dflag); - tcg_gen_ext16u_tl(s->T0, cpu_regs[R_EDX]); - gen_check_io(s, ot, pc_start - s->cs_base, - svm_is_rep(prefixes)); + tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_EDX]); + tcg_gen_ext16u_i32(s->tmp2_i32, s->tmp2_i32); + if (!gen_check_io(s, ot, s->tmp2_i32, 0)) { + break; + } + translator_io_start(&s->base); gen_op_mov_v_reg(s, ot, s->T1, R_EAX); - - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); tcg_gen_trunc_tl_i32(s->tmp3_i32, s->T1); gen_helper_out_func(ot, s->tmp2_i32, s->tmp3_i32); gen_bpt_io(s, s->tmp2_i32, ot); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } break; /************************/ @@ -6472,33 +5088,33 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) ot = gen_pop_T0(s); gen_stack_update(s, val + (1 << ot)); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 0xc3: /* ret */ ot = gen_pop_T0(s); gen_pop_update(s, ot); /* Note that gen_pop_T0 uses a zero-extending load. */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); gen_bnd_jmp(s); - gen_jr(s, s->T0); + s->base.is_jmp = DISAS_JUMP; break; case 0xca: /* lret im */ val = x86_ldsw_code(env, s); do_lret: - if (s->pe && !s->vm86) { + if (PE(s) && !VM86(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_lret_protected(cpu_env, tcg_const_i32(dflag - 1), - tcg_const_i32(val)); + gen_update_eip_cur(s); + gen_helper_lret_protected(tcg_env, tcg_constant_i32(dflag - 1), + tcg_constant_i32(val)); } else { gen_stack_A0(s); /* pop offset */ gen_op_ld_v(s, dflag, s->T0, s->A0); /* NOTE: keeping EIP updated is not a problem in case of exception */ - gen_op_jmp_v(s->T0); + gen_op_jmp_v(s, s->T0); /* pop selector */ gen_add_A0_im(s, 1 << dflag); gen_op_ld_v(s, dflag, s->T0, s->A0); @@ -6506,49 +5122,34 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* add stack offset */ gen_stack_update(s, val + (2 << dflag)); } - gen_eob(s); + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0xcb: /* lret */ val = 0; goto do_lret; case 0xcf: /* iret */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_IRET); - if (!s->pe) { - /* real mode */ - gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1)); - set_cc_op(s, CC_OP_EFLAGS); - } else if (s->vm86) { - if (s->iopl != 3) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_helper_iret_real(cpu_env, tcg_const_i32(dflag - 1)); - set_cc_op(s, CC_OP_EFLAGS); + gen_svm_check_intercept(s, SVM_EXIT_IRET); + if (!PE(s) || VM86(s)) { + /* real mode or vm86 mode */ + if (!check_vm86_iopl(s)) { + break; } + gen_helper_iret_real(tcg_env, tcg_constant_i32(dflag - 1)); } else { - gen_helper_iret_protected(cpu_env, tcg_const_i32(dflag - 1), - tcg_const_i32(s->pc - s->cs_base)); - set_cc_op(s, CC_OP_EFLAGS); + gen_helper_iret_protected(tcg_env, tcg_constant_i32(dflag - 1), + eip_next_i32(s)); } - gen_eob(s); + set_cc_op(s, CC_OP_EFLAGS); + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0xe8: /* call im */ { - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); - } - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; - } else if (!CODE64(s)) { - tval &= 0xffffffff; - } - tcg_gen_movi_tl(s->T0, next_eip); - gen_push_v(s, s->T0); + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_push_v(s, eip_next_tl(s)); gen_bnd_jmp(s); - gen_jmp(s, tval); + gen_jmp_rel(s, dflag, diff, 0); } break; case 0x9a: /* lcall im */ @@ -6566,19 +5167,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } goto do_lcall; case 0xe9: /* jmp im */ - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); - } - tval += s->pc - s->cs_base; - if (dflag == MO_16) { - tval &= 0xffff; - } else if (!CODE64(s)) { - tval &= 0xffffffff; + { + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_bnd_jmp(s); + gen_jmp_rel(s, dflag, diff, 0); } - gen_bnd_jmp(s); - gen_jmp(s, tval); break; case 0xea: /* ljmp im */ { @@ -6595,30 +5190,26 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } goto do_ljmp; case 0xeb: /* jmp Jb */ - tval = (int8_t)insn_get(env, s, MO_8); - tval += s->pc - s->cs_base; - if (dflag == MO_16) { - tval &= 0xffff; + { + int diff = (int8_t)insn_get(env, s, MO_8); + gen_jmp_rel(s, dflag, diff, 0); } - gen_jmp(s, tval); break; case 0x70 ... 0x7f: /* jcc Jb */ - tval = (int8_t)insn_get(env, s, MO_8); - goto do_jcc; - case 0x180 ... 0x18f: /* jcc Jv */ - if (dflag != MO_16) { - tval = (int32_t)insn_get(env, s, MO_32); - } else { - tval = (int16_t)insn_get(env, s, MO_16); + { + int diff = (int8_t)insn_get(env, s, MO_8); + gen_bnd_jmp(s); + gen_jcc(s, b, diff); } - do_jcc: - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; + break; + case 0x180 ... 0x18f: /* jcc Jv */ + { + int diff = (dflag != MO_16 + ? (int32_t)insn_get(env, s, MO_32) + : (int16_t)insn_get(env, s, MO_16)); + gen_bnd_jmp(s); + gen_jcc(s, b, diff); } - gen_bnd_jmp(s); - gen_jcc(s, b, tval, next_eip); break; case 0x190 ... 0x19f: /* setcc Gv */ @@ -6632,84 +5223,48 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } ot = dflag; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; - gen_cmovcc1(env, s, ot, b, modrm, reg); + reg = ((modrm >> 3) & 7) | REX_R(s); + gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); + gen_cmovcc1(s, b ^ 1, s->T0, cpu_regs[reg]); + gen_op_mov_reg_v(s, ot, reg, s->T0); break; /************************/ /* flags */ case 0x9c: /* pushf */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_PUSHF); - if (s->vm86 && s->iopl != 3) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { + gen_svm_check_intercept(s, SVM_EXIT_PUSHF); + if (check_vm86_iopl(s)) { gen_update_cc_op(s); - gen_helper_read_eflags(s->T0, cpu_env); + gen_helper_read_eflags(s->T0, tcg_env); gen_push_v(s, s->T0); } break; case 0x9d: /* popf */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_POPF); - if (s->vm86 && s->iopl != 3) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - ot = gen_pop_T0(s); - if (s->cpl == 0) { - if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | AC_MASK | - ID_MASK | NT_MASK | - IF_MASK | - IOPL_MASK))); - } else { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | AC_MASK | - ID_MASK | NT_MASK | - IF_MASK | IOPL_MASK) - & 0xffff)); - } - } else { - if (s->cpl <= s->iopl) { - if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | - AC_MASK | - ID_MASK | - NT_MASK | - IF_MASK))); - } else { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | - AC_MASK | - ID_MASK | - NT_MASK | - IF_MASK) - & 0xffff)); - } - } else { - if (dflag != MO_16) { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | AC_MASK | - ID_MASK | NT_MASK))); - } else { - gen_helper_write_eflags(cpu_env, s->T0, - tcg_const_i32((TF_MASK | AC_MASK | - ID_MASK | NT_MASK) - & 0xffff)); - } - } + gen_svm_check_intercept(s, SVM_EXIT_POPF); + if (check_vm86_iopl(s)) { + int mask = TF_MASK | AC_MASK | ID_MASK | NT_MASK; + + if (CPL(s) == 0) { + mask |= IF_MASK | IOPL_MASK; + } else if (CPL(s) <= IOPL(s)) { + mask |= IF_MASK; + } + if (dflag == MO_16) { + mask &= 0xffff; } + + ot = gen_pop_T0(s); + gen_helper_write_eflags(tcg_env, s->T0, tcg_constant_i32(mask)); gen_pop_update(s, ot); set_cc_op(s, CC_OP_EFLAGS); /* abort translation because TF/AC flag may change */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } break; case 0x9e: /* sahf */ if (CODE64(s) && !(s->cpuid_ext3_features & CPUID_EXT3_LAHF_LM)) goto illegal_op; - gen_op_mov_v_reg(s, MO_8, s->T0, R_AH); + tcg_gen_shri_tl(s->T0, cpu_regs[R_EAX], 8); gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, CC_O); tcg_gen_andi_tl(s->T0, s->T0, CC_S | CC_Z | CC_A | CC_P | CC_C); @@ -6721,7 +5276,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_compute_eflags(s); /* Note: gen_compute_eflags() only gives the condition codes */ tcg_gen_ori_tl(s->T0, cpu_cc_src, 0x02); - gen_op_mov_reg_v(s, MO_8, R_AH, s->T0); + tcg_gen_deposit_tl(cpu_regs[R_EAX], cpu_regs[R_EAX], s->T0, 8, 8); break; case 0xf5: /* cmc */ gen_compute_eflags(s); @@ -6737,11 +5292,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0xfc: /* cld */ tcg_gen_movi_i32(s->tmp2_i32, 1); - tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df)); break; case 0xfd: /* std */ tcg_gen_movi_i32(s->tmp2_i32, -1); - tcg_gen_st_i32(s->tmp2_i32, cpu_env, offsetof(CPUX86State, df)); + tcg_gen_st_i32(s->tmp2_i32, tcg_env, offsetof(CPUX86State, df)); break; /************************/ @@ -6782,7 +5337,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) do_btx: ot = dflag; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); gen_op_mov_v_reg(s, MO_32, s->T1, reg); @@ -6792,7 +5347,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_exts(ot, s->T1); tcg_gen_sari_tl(s->tmp0, s->T1, 3 + ot); tcg_gen_shli_tl(s->tmp0, s->tmp0, ot); - tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a), s->tmp0); + tcg_gen_add_tl(s->A0, gen_lea_modrm_1(s, a, false), s->tmp0); gen_lea_v_seg(s, s->aflag, s->A0, a.def_seg, s->override); if (!(s->prefix & PREFIX_LOCK)) { gen_op_ld_v(s, ot, s->T0, s->A0); @@ -6807,7 +5362,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (s->prefix & PREFIX_LOCK) { switch (op) { case 0: /* bt */ - /* Needs no atomic ops; we surpressed the normal + /* Needs no atomic ops; we suppressed the normal memory load for LOCK above so do it now. */ gen_op_ld_v(s, ot, s->T0, s->A0); break; @@ -6887,7 +5442,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1bd: /* bsr / lzcnt */ ot = dflag; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 0); gen_extu(ot, s->T0); @@ -6939,28 +5494,28 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_helper_daa(cpu_env); + gen_helper_daa(tcg_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x2f: /* das */ if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_helper_das(cpu_env); + gen_helper_das(tcg_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x37: /* aaa */ if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_helper_aaa(cpu_env); + gen_helper_aaa(tcg_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0x3f: /* aas */ if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_helper_aas(cpu_env); + gen_helper_aas(tcg_env); set_cc_op(s, CC_OP_EFLAGS); break; case 0xd4: /* aam */ @@ -6968,9 +5523,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; val = x86_ldub_code(env, s); if (val == 0) { - gen_exception(s, EXCP00_DIVZ, pc_start - s->cs_base); + gen_exception(s, EXCP00_DIVZ); } else { - gen_helper_aam(cpu_env, tcg_const_i32(val)); + gen_helper_aam(tcg_env, tcg_constant_i32(val)); set_cc_op(s, CC_OP_LOGICB); } break; @@ -6978,7 +5533,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (CODE64(s)) goto illegal_op; val = x86_ldub_code(env, s); - gen_helper_aad(cpu_env, tcg_const_i32(val)); + gen_helper_aad(tcg_env, tcg_constant_i32(val)); set_cc_op(s, CC_OP_LOGICB); break; /************************/ @@ -6994,72 +5549,54 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) } if (prefixes & PREFIX_REPZ) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_pause(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_pause(tcg_env, cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } break; case 0x9b: /* fwait */ if ((s->flags & (HF_MP_MASK | HF_TS_MASK)) == (HF_MP_MASK | HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); } else { - gen_helper_fwait(cpu_env); + /* needs to be treated as I/O because of ferr_irq */ + translator_io_start(&s->base); + gen_helper_fwait(tcg_env); } break; case 0xcc: /* int3 */ - gen_interrupt(s, EXCP03_INT3, pc_start - s->cs_base, s->pc - s->cs_base); + gen_interrupt(s, EXCP03_INT3); break; case 0xcd: /* int N */ val = x86_ldub_code(env, s); - if (s->vm86 && s->iopl != 3) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_interrupt(s, val, pc_start - s->cs_base, s->pc - s->cs_base); + if (check_vm86_iopl(s)) { + gen_interrupt(s, val); } break; case 0xce: /* into */ if (CODE64(s)) goto illegal_op; gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_into(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_into(tcg_env, cur_insn_len_i32(s)); break; #ifdef WANT_ICEBP case 0xf1: /* icebp (undocumented, exits to external debugger) */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_ICEBP); -#if 1 - gen_debug(s, pc_start - s->cs_base); -#else - /* start debug */ - tb_flush(CPU(x86_env_get_cpu(env))); - qemu_set_log(CPU_LOG_INT | CPU_LOG_TB_IN_ASM); -#endif + gen_svm_check_intercept(s, SVM_EXIT_ICEBP); + gen_debug(s); break; #endif case 0xfa: /* cli */ - if (!s->vm86) { - if (s->cpl <= s->iopl) { - gen_helper_cli(cpu_env); - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } - } else { - if (s->iopl == 3) { - gen_helper_cli(cpu_env); - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } + if (check_iopl(s)) { + gen_reset_eflags(s, IF_MASK); } break; case 0xfb: /* sti */ - if (s->vm86 ? s->iopl == 3 : s->cpl <= s->iopl) { - gen_helper_sti(cpu_env); + if (check_iopl(s)) { + gen_set_eflags(s, IF_MASK); /* interruptions are enabled only the first insn after sti */ - gen_jmp_im(s, s->pc - s->cs_base); + gen_update_eip_next(s); gen_eob_inhibit_irq(s, true); - } else { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); } break; case 0x62: /* bound */ @@ -7075,26 +5612,20 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); if (ot == MO_16) { - gen_helper_boundw(cpu_env, s->A0, s->tmp2_i32); + gen_helper_boundw(tcg_env, s->A0, s->tmp2_i32); } else { - gen_helper_boundl(cpu_env, s->A0, s->tmp2_i32); + gen_helper_boundl(tcg_env, s->A0, s->tmp2_i32); } break; case 0x1c8 ... 0x1cf: /* bswap reg */ reg = (b & 7) | REX_B(s); #ifdef TARGET_X86_64 if (dflag == MO_64) { - gen_op_mov_v_reg(s, MO_64, s->T0, reg); - tcg_gen_bswap64_i64(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_64, reg, s->T0); - } else -#endif - { - gen_op_mov_v_reg(s, MO_32, s->T0, reg); - tcg_gen_ext32u_tl(s->T0, s->T0); - tcg_gen_bswap32_tl(s->T0, s->T0); - gen_op_mov_reg_v(s, MO_32, reg, s->T0); + tcg_gen_bswap64_i64(cpu_regs[reg], cpu_regs[reg]); + break; } +#endif + tcg_gen_bswap32_tl(cpu_regs[reg], cpu_regs[reg], TCG_BSWAP_OZ); break; case 0xd6: /* salc */ if (CODE64(s)) @@ -7108,117 +5639,110 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0xe2: /* loop */ case 0xe3: /* jecxz */ { - TCGLabel *l1, *l2, *l3; - - tval = (int8_t)insn_get(env, s, MO_8); - next_eip = s->pc - s->cs_base; - tval += next_eip; - if (dflag == MO_16) { - tval &= 0xffff; - } + TCGLabel *l1, *l2; + int diff = (int8_t)insn_get(env, s, MO_8); l1 = gen_new_label(); l2 = gen_new_label(); - l3 = gen_new_label(); + gen_update_cc_op(s); b &= 3; switch(b) { case 0: /* loopnz */ case 1: /* loopz */ gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jz_ecx(s, s->aflag, l3); + gen_op_jz_ecx(s, l2); gen_jcc1(s, (JCC_Z << 1) | (b ^ 1), l1); break; case 2: /* loop */ gen_op_add_reg_im(s, s->aflag, R_ECX, -1); - gen_op_jnz_ecx(s, s->aflag, l1); + gen_op_jnz_ecx(s, l1); break; default: case 3: /* jcxz */ - gen_op_jz_ecx(s, s->aflag, l1); + gen_op_jz_ecx(s, l1); break; } - gen_set_label(l3); - gen_jmp_im(s, next_eip); - tcg_gen_br(l2); + gen_set_label(l2); + gen_jmp_rel_csize(s, 0, 1); gen_set_label(l1); - gen_jmp_im(s, tval); - gen_set_label(l2); - gen_eob(s); + gen_jmp_rel(s, dflag, diff, 0); } break; case 0x130: /* wrmsr */ case 0x132: /* rdmsr */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { + if (check_cpl0(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); if (b & 2) { - gen_helper_rdmsr(cpu_env); + gen_helper_rdmsr(tcg_env); } else { - gen_helper_wrmsr(cpu_env); + gen_helper_wrmsr(tcg_env); + s->base.is_jmp = DISAS_EOB_NEXT; } } break; case 0x131: /* rdtsc */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_rdtsc(cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } + gen_update_eip_cur(s); + translator_io_start(&s->base); + gen_helper_rdtsc(tcg_env); break; case 0x133: /* rdpmc */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_rdpmc(cpu_env); + gen_update_eip_cur(s); + gen_helper_rdpmc(tcg_env); + s->base.is_jmp = DISAS_NORETURN; break; case 0x134: /* sysenter */ - /* For Intel SYSENTER is valid on 64-bit */ - if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) + /* For AMD SYSENTER is not valid in long mode */ + if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { goto illegal_op; - if (!s->pe) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + if (!PE(s)) { + gen_exception_gpf(s); } else { - gen_helper_sysenter(cpu_env); - gen_eob(s); + gen_helper_sysenter(tcg_env); + s->base.is_jmp = DISAS_EOB_ONLY; } break; case 0x135: /* sysexit */ - /* For Intel SYSEXIT is valid on 64-bit */ - if (CODE64(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) + /* For AMD SYSEXIT is not valid in long mode */ + if (LMA(s) && env->cpuid_vendor1 != CPUID_VENDOR_INTEL_1) { goto illegal_op; - if (!s->pe) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + } + if (!PE(s) || CPL(s) != 0) { + gen_exception_gpf(s); } else { - gen_helper_sysexit(cpu_env, tcg_const_i32(dflag - 1)); - gen_eob(s); + gen_helper_sysexit(tcg_env, tcg_constant_i32(dflag - 1)); + s->base.is_jmp = DISAS_EOB_ONLY; } break; -#ifdef TARGET_X86_64 case 0x105: /* syscall */ - /* XXX: is it usable in real mode ? */ + /* For Intel SYSCALL is only valid in long mode */ + if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + goto illegal_op; + } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_syscall(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_syscall(tcg_env, cur_insn_len_i32(s)); /* TF handling for the syscall insn is different. The TF bit is checked after the syscall insn completes. This allows #DB to not be generated after one has entered CPL0 if TF is set in FMASK. */ gen_eob_worker(s, false, true); break; case 0x107: /* sysret */ - if (!s->pe) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + /* For Intel SYSRET is only valid in long mode */ + if (!LMA(s) && env->cpuid_vendor1 == CPUID_VENDOR_INTEL_1) { + goto illegal_op; + } + if (!PE(s) || CPL(s) != 0) { + gen_exception_gpf(s); } else { - gen_helper_sysret(cpu_env, tcg_const_i32(dflag - 1)); + gen_helper_sysret(tcg_env, tcg_constant_i32(dflag - 1)); /* condition codes are modified only in long mode */ - if (s->lma) { + if (LMA(s)) { set_cc_op(s, CC_OP_EFLAGS); } /* TF handling for the sysret insn is different. The TF bit is @@ -7228,19 +5752,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_eob_worker(s, false, true); } break; -#endif case 0x1a2: /* cpuid */ gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_cpuid(cpu_env); + gen_update_eip_cur(s); + gen_helper_cpuid(tcg_env); break; case 0xf4: /* hlt */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { + if (check_cpl0(s)) { gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_hlt(cpu_env, tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_hlt(tcg_env, cur_insn_len_i32(s)); s->base.is_jmp = DISAS_NORETURN; } break; @@ -7250,57 +5771,59 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) op = (modrm >> 3) & 7; switch(op) { case 0: /* sldt */ - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; - gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_READ); - tcg_gen_ld32u_tl(s->T0, cpu_env, + if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { + break; + } + gen_svm_check_intercept(s, SVM_EXIT_LDTR_READ); + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, ldt.selector)); ot = mod == 3 ? dflag : MO_16; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; case 2: /* lldt */ - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_svm_check_intercept(s, pc_start, SVM_EXIT_LDTR_WRITE); + if (check_cpl0(s)) { + gen_svm_check_intercept(s, SVM_EXIT_LDTR_WRITE); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_lldt(cpu_env, s->tmp2_i32); + gen_helper_lldt(tcg_env, s->tmp2_i32); } break; case 1: /* str */ - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; - gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_READ); - tcg_gen_ld32u_tl(s->T0, cpu_env, + if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { + break; + } + gen_svm_check_intercept(s, SVM_EXIT_TR_READ); + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, tr.selector)); ot = mod == 3 ? dflag : MO_16; gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; case 3: /* ltr */ - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_svm_check_intercept(s, pc_start, SVM_EXIT_TR_WRITE); + if (check_cpl0(s)) { + gen_svm_check_intercept(s, SVM_EXIT_TR_WRITE); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); tcg_gen_trunc_tl_i32(s->tmp2_i32, s->T0); - gen_helper_ltr(cpu_env, s->tmp2_i32); + gen_helper_ltr(tcg_env, s->tmp2_i32); } break; case 4: /* verr */ case 5: /* verw */ - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); gen_update_cc_op(s); if (op == 4) { - gen_helper_verr(cpu_env, s->T0); + gen_helper_verr(tcg_env, s->T0); } else { - gen_helper_verw(cpu_env, s->T0); + gen_helper_verw(tcg_env, s->T0); } set_cc_op(s, CC_OP_EFLAGS); break; @@ -7313,13 +5836,16 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); switch (modrm) { CASE_MODRM_MEM_OP(0): /* sgdt */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_READ); + if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { + break; + } + gen_svm_check_intercept(s, SVM_EXIT_GDTR_READ); gen_lea_modrm(env, s, modrm); tcg_gen_ld32u_tl(s->T0, - cpu_env, offsetof(CPUX86State, gdt.limit)); + tcg_env, offsetof(CPUX86State, gdt.limit)); gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, gdt.base)); if (dflag == MO_16) { tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } @@ -7327,54 +5853,54 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0xc8: /* monitor */ - if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) { + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) { goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_update_eip_cur(s); tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); - gen_extu(s->aflag, s->A0); gen_add_A0_ds_seg(s); - gen_helper_monitor(cpu_env, s->A0); + gen_helper_monitor(tcg_env, s->A0); break; case 0xc9: /* mwait */ - if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || s->cpl != 0) { + if (!(s->cpuid_ext_features & CPUID_EXT_MONITOR) || CPL(s) != 0) { goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_mwait(cpu_env, tcg_const_i32(s->pc - pc_start)); - gen_eob(s); + gen_update_eip_cur(s); + gen_helper_mwait(tcg_env, cur_insn_len_i32(s)); + s->base.is_jmp = DISAS_NORETURN; break; case 0xca: /* clac */ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) - || s->cpl != 0) { + || CPL(s) != 0) { goto illegal_op; } - gen_helper_clac(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + gen_reset_eflags(s, AC_MASK); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xcb: /* stac */ if (!(s->cpuid_7_0_ebx_features & CPUID_7_0_EBX_SMAP) - || s->cpl != 0) { + || CPL(s) != 0) { goto illegal_op; } - gen_helper_stac(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + gen_set_eflags(s, AC_MASK); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(1): /* sidt */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_READ); + if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { + break; + } + gen_svm_check_intercept(s, SVM_EXIT_IDTR_READ); gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.limit)); + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.limit)); gen_op_st_v(s, MO_16, s->T0, s->A0); gen_add_A0_im(s, 2); - tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base)); + tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.base)); if (dflag == MO_16) { tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } @@ -7388,7 +5914,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); - gen_helper_xgetbv(s->tmp1_i64, cpu_env, s->tmp2_i32); + gen_helper_xgetbv(s->tmp1_i64, tcg_env, s->tmp2_i32); tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; @@ -7398,129 +5924,122 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) | PREFIX_REPZ | PREFIX_REPNZ))) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + gen_svm_check_intercept(s, SVM_EXIT_XSETBV); + if (!check_cpl0(s)) { break; } tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); - gen_helper_xsetbv(cpu_env, s->tmp2_i32, s->tmp1_i64); + gen_helper_xsetbv(tcg_env, s->tmp2_i32, s->tmp1_i64); /* End TB because translation flags may change. */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xd8: /* VMRUN */ - if (!(s->flags & HF_SVME_MASK) || !s->pe) { + if (!SVME(s) || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_vmrun(cpu_env, tcg_const_i32(s->aflag - 1), - tcg_const_i32(s->pc - pc_start)); + gen_update_eip_cur(s); + gen_helper_vmrun(tcg_env, tcg_constant_i32(s->aflag - 1), + cur_insn_len_i32(s)); tcg_gen_exit_tb(NULL, 0); s->base.is_jmp = DISAS_NORETURN; break; case 0xd9: /* VMMCALL */ - if (!(s->flags & HF_SVME_MASK)) { + if (!SVME(s)) { goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_vmmcall(cpu_env); + gen_update_eip_cur(s); + gen_helper_vmmcall(tcg_env); break; case 0xda: /* VMLOAD */ - if (!(s->flags & HF_SVME_MASK) || !s->pe) { + if (!SVME(s) || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_vmload(cpu_env, tcg_const_i32(s->aflag - 1)); + gen_update_eip_cur(s); + gen_helper_vmload(tcg_env, tcg_constant_i32(s->aflag - 1)); break; case 0xdb: /* VMSAVE */ - if (!(s->flags & HF_SVME_MASK) || !s->pe) { + if (!SVME(s) || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_vmsave(cpu_env, tcg_const_i32(s->aflag - 1)); + gen_update_eip_cur(s); + gen_helper_vmsave(tcg_env, tcg_constant_i32(s->aflag - 1)); break; case 0xdc: /* STGI */ - if ((!(s->flags & HF_SVME_MASK) - && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) - || !s->pe) { + if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) + || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } gen_update_cc_op(s); - gen_helper_stgi(cpu_env); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + gen_helper_stgi(tcg_env); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xdd: /* CLGI */ - if (!(s->flags & HF_SVME_MASK) || !s->pe) { + if (!SVME(s) || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_clgi(cpu_env); + gen_update_eip_cur(s); + gen_helper_clgi(tcg_env); break; case 0xde: /* SKINIT */ - if ((!(s->flags & HF_SVME_MASK) - && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) - || !s->pe) { + if ((!SVME(s) && !(s->cpuid_ext3_features & CPUID_EXT3_SKINIT)) + || !PE(s)) { goto illegal_op; } - gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_skinit(cpu_env); - break; + gen_svm_check_intercept(s, SVM_EXIT_SKINIT); + /* If not intercepted, not implemented -- raise #UD. */ + goto illegal_op; case 0xdf: /* INVLPGA */ - if (!(s->flags & HF_SVME_MASK) || !s->pe) { + if (!SVME(s) || !PE(s)) { goto illegal_op; } - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } - gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - gen_helper_invlpga(cpu_env, tcg_const_i32(s->aflag - 1)); + gen_svm_check_intercept(s, SVM_EXIT_INVLPGA); + if (s->aflag == MO_64) { + tcg_gen_mov_tl(s->A0, cpu_regs[R_EAX]); + } else { + tcg_gen_ext32u_tl(s->A0, cpu_regs[R_EAX]); + } + gen_helper_flush_page(tcg_env, s->A0); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(2): /* lgdt */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } - gen_svm_check_intercept(s, pc_start, SVM_EXIT_GDTR_WRITE); + gen_svm_check_intercept(s, SVM_EXIT_GDTR_WRITE); gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); @@ -7528,16 +6047,15 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (dflag == MO_16) { tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, gdt.base)); - tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, gdt.limit)); + tcg_gen_st_tl(s->T0, tcg_env, offsetof(CPUX86State, gdt.base)); + tcg_gen_st32_tl(s->T1, tcg_env, offsetof(CPUX86State, gdt.limit)); break; CASE_MODRM_MEM_OP(3): /* lidt */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } - gen_svm_check_intercept(s, pc_start, SVM_EXIT_IDTR_WRITE); + gen_svm_check_intercept(s, SVM_EXIT_IDTR_WRITE); gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, MO_16, s->T1, s->A0); gen_add_A0_im(s, 2); @@ -7545,19 +6063,23 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (dflag == MO_16) { tcg_gen_andi_tl(s->T0, s->T0, 0xffffff); } - tcg_gen_st_tl(s->T0, cpu_env, offsetof(CPUX86State, idt.base)); - tcg_gen_st32_tl(s->T1, cpu_env, offsetof(CPUX86State, idt.limit)); + tcg_gen_st_tl(s->T0, tcg_env, offsetof(CPUX86State, idt.base)); + tcg_gen_st32_tl(s->T1, tcg_env, offsetof(CPUX86State, idt.limit)); break; CASE_MODRM_OP(4): /* smsw */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_CR0); - tcg_gen_ld_tl(s->T0, cpu_env, offsetof(CPUX86State, cr[0])); - if (CODE64(s)) { - mod = (modrm >> 6) & 3; - ot = (mod != 3 ? MO_16 : s->dflag); - } else { - ot = MO_16; + if (s->flags & HF_UMIP_MASK && !check_cpl0(s)) { + break; } + gen_svm_check_intercept(s, SVM_EXIT_READ_CR0); + tcg_gen_ld_tl(s->T0, tcg_env, offsetof(CPUX86State, cr[0])); + /* + * In 32-bit mode, the higher 16 bits of the destination + * register are undefined. In practice CR0[31:0] is stored + * just like in 64-bit mode. + */ + mod = (modrm >> 6) & 3; + ot = (mod != 3 ? MO_16 : s->dflag); gen_ldst_modrm(env, s, modrm, ot, OR_TMP0, 1); break; case 0xee: /* rdpkru */ @@ -7565,7 +6087,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); - gen_helper_rdpkru(s->tmp1_i64, cpu_env, s->tmp2_i32); + gen_helper_rdpkru(s->tmp1_i64, tcg_env, s->tmp2_i32); tcg_gen_extr_i64_tl(cpu_regs[R_EAX], cpu_regs[R_EDX], s->tmp1_i64); break; case 0xef: /* wrpkru */ @@ -7575,43 +6097,45 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); tcg_gen_trunc_tl_i32(s->tmp2_i32, cpu_regs[R_ECX]); - gen_helper_wrpkru(cpu_env, s->tmp2_i32, s->tmp1_i64); + gen_helper_wrpkru(tcg_env, s->tmp2_i32, s->tmp1_i64); break; + CASE_MODRM_OP(6): /* lmsw */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } - gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); + gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - gen_helper_lmsw(cpu_env, s->T0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + /* + * Only the 4 lower bits of CR0 are modified. + * PE cannot be set to zero if already set to one. + */ + tcg_gen_ld_tl(s->T1, tcg_env, offsetof(CPUX86State, cr[0])); + tcg_gen_andi_tl(s->T0, s->T0, 0xf); + tcg_gen_andi_tl(s->T1, s->T1, ~0xe); + tcg_gen_or_tl(s->T0, s->T0, s->T1); + gen_helper_write_crN(tcg_env, tcg_constant_i32(0), s->T0); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(7): /* invlpg */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); + if (!check_cpl0(s)) { break; } - gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); + gen_svm_check_intercept(s, SVM_EXIT_INVLPG); gen_lea_modrm(env, s, modrm); - gen_helper_invlpg(cpu_env, s->A0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + gen_helper_flush_page(tcg_env, s->A0); + s->base.is_jmp = DISAS_EOB_NEXT; break; case 0xf8: /* swapgs */ #ifdef TARGET_X86_64 if (CODE64(s)) { - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { + if (check_cpl0(s)) { tcg_gen_mov_tl(s->T0, cpu_seg_base[R_GS]); - tcg_gen_ld_tl(cpu_seg_base[R_GS], cpu_env, + tcg_gen_ld_tl(cpu_seg_base[R_GS], tcg_env, offsetof(CPUX86State, kernelgsbase)); - tcg_gen_st_tl(s->T0, cpu_env, + tcg_gen_st_tl(s->T0, tcg_env, offsetof(CPUX86State, kernelgsbase)); } break; @@ -7624,15 +6148,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_rdtscp(cpu_env); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - gen_jmp(s, s->pc - s->cs_base); - } + gen_update_eip_cur(s); + translator_io_start(&s->base); + gen_helper_rdtsc(tcg_env); + gen_helper_rdpid(s->T0, tcg_env); + gen_op_mov_reg_v(s, dflag, R_ECX, s->T0); break; default: @@ -7641,11 +6161,9 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) break; case 0x108: /* invd */ - case 0x109: /* wbinvd */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_svm_check_intercept(s, pc_start, (b & 2) ? SVM_EXIT_INVD : SVM_EXIT_WBINVD); + case 0x109: /* wbinvd; wbnoinvd with REPZ prefix */ + if (check_cpl0(s)) { + gen_svm_check_intercept(s, (b & 1) ? SVM_EXIT_WBINVD : SVM_EXIT_INVD); /* nothing to do */ } break; @@ -7657,7 +6175,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) d_ot = dflag; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); mod = (modrm >> 6) & 3; rm = (modrm & 7) | REX_B(s); @@ -7677,13 +6195,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) #endif { TCGLabel *label1; - TCGv t0, t1, t2, a0; + TCGv t0, t1, t2; - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; - t0 = tcg_temp_local_new(); - t1 = tcg_temp_local_new(); - t2 = tcg_temp_local_new(); + t0 = tcg_temp_new(); + t1 = tcg_temp_new(); + t2 = tcg_temp_new(); ot = MO_16; modrm = x86_ldub_code(env, s); reg = (modrm >> 3) & 7; @@ -7692,11 +6210,8 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) if (mod != 3) { gen_lea_modrm(env, s, modrm); gen_op_ld_v(s, ot, t0, s->A0); - a0 = tcg_temp_local_new(); - tcg_gen_mov_tl(a0, s->A0); } else { gen_op_mov_v_reg(s, ot, t0, rm); - a0 = NULL; } gen_op_mov_v_reg(s, ot, t1, reg); tcg_gen_andi_tl(s->tmp0, t0, 3); @@ -7709,17 +6224,13 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_movi_tl(t2, CC_Z); gen_set_label(label1); if (mod != 3) { - gen_op_st_v(s, ot, t0, a0); - tcg_temp_free(a0); + gen_op_st_v(s, ot, t0, s->A0); } else { gen_op_mov_reg_v(s, ot, rm, t0); } gen_compute_eflags(s); tcg_gen_andi_tl(cpu_cc_src, cpu_cc_src, ~CC_Z); tcg_gen_or_tl(cpu_cc_src, cpu_cc_src, t2); - tcg_temp_free(t0); - tcg_temp_free(t1); - tcg_temp_free(t2); } break; case 0x102: /* lar */ @@ -7727,18 +6238,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) { TCGLabel *label1; TCGv t0; - if (!s->pe || s->vm86) + if (!PE(s) || VM86(s)) goto illegal_op; ot = dflag != MO_16 ? MO_32 : MO_16; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); gen_ldst_modrm(env, s, modrm, MO_16, OR_TMP0, 0); - t0 = tcg_temp_local_new(); + t0 = tcg_temp_new(); gen_update_cc_op(s); if (b == 0x102) { - gen_helper_lar(t0, cpu_env, s->T0); + gen_helper_lar(t0, tcg_env, s->T0); } else { - gen_helper_lsl(t0, cpu_env, s->T0); + gen_helper_lsl(t0, tcg_env, s->T0); } tcg_gen_andi_tl(s->tmp0, cpu_cc_src, CC_Z); label1 = gen_new_label(); @@ -7746,7 +6257,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_op_mov_reg_v(s, ot, reg, t0); gen_set_label(label1); set_cc_op(s, CC_OP_EFLAGS); - tcg_temp_free(t0); } break; case 0x118: @@ -7772,7 +6282,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { mod = (modrm >> 6) & 3; - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (prefixes & PREFIX_REPZ) { /* bndcl */ if (reg >= 4 @@ -7791,7 +6301,6 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) TCGv_i64 notu = tcg_temp_new_i64(); tcg_gen_not_i64(notu, cpu_bndu[reg]); gen_bndck(env, s, modrm, TCG_COND_GTU, notu); - tcg_temp_free_i64(notu); } else if (prefixes & PREFIX_DATA) { /* bndmov -- from reg/mem */ if (reg >= 4 || s->aflag == MO_16) { @@ -7810,10 +6319,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); if (CODE64(s)) { tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0, - s->mem_index, MO_LEQ); + s->mem_index, MO_LEUQ); tcg_gen_addi_tl(s->A0, s->A0, 8); tcg_gen_qemu_ld_i64(cpu_bndu[reg], s->A0, - s->mem_index, MO_LEQ); + s->mem_index, MO_LEUQ); } else { tcg_gen_qemu_ld_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUL); @@ -7845,11 +6354,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_movi_tl(s->T0, 0); } if (CODE64(s)) { - gen_helper_bndldx64(cpu_bndl[reg], cpu_env, s->A0, s->T0); - tcg_gen_ld_i64(cpu_bndu[reg], cpu_env, + gen_helper_bndldx64(cpu_bndl[reg], tcg_env, s->A0, s->T0); + tcg_gen_ld_i64(cpu_bndu[reg], tcg_env, offsetof(CPUX86State, mmx_t0.MMX_Q(0))); } else { - gen_helper_bndldx32(cpu_bndu[reg], cpu_env, s->A0, s->T0); + gen_helper_bndldx32(cpu_bndu[reg], tcg_env, s->A0, s->T0); tcg_gen_ext32u_i64(cpu_bndl[reg], cpu_bndu[reg]); tcg_gen_shri_i64(cpu_bndu[reg], cpu_bndu[reg], 32); } @@ -7862,7 +6371,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); if (s->flags & HF_MPX_EN_MASK) { mod = (modrm >> 6) & 3; - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (mod != 3 && (prefixes & PREFIX_REPZ)) { /* bndmk */ if (reg >= 4 @@ -7883,7 +6392,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* rip-relative generates #ud */ goto illegal_op; } - tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a)); + tcg_gen_not_tl(s->A0, gen_lea_modrm_1(s, a, false)); if (!CODE64(s)) { tcg_gen_ext32u_tl(s->A0, s->A0); } @@ -7917,10 +6426,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); if (CODE64(s)) { tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0, - s->mem_index, MO_LEQ); + s->mem_index, MO_LEUQ); tcg_gen_addi_tl(s->A0, s->A0, 8); tcg_gen_qemu_st_i64(cpu_bndu[reg], s->A0, - s->mem_index, MO_LEQ); + s->mem_index, MO_LEUQ); } else { tcg_gen_qemu_st_i64(cpu_bndl[reg], s->A0, s->mem_index, MO_LEUL); @@ -7950,10 +6459,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) tcg_gen_movi_tl(s->T0, 0); } if (CODE64(s)) { - gen_helper_bndstx64(cpu_env, s->A0, s->T0, + gen_helper_bndstx64(tcg_env, s->A0, s->T0, cpu_bndl[reg], cpu_bndu[reg]); } else { - gen_helper_bndstx32(cpu_env, s->A0, s->T0, + gen_helper_bndstx32(tcg_env, s->A0, s->T0, cpu_bndl[reg], cpu_bndu[reg]); } } @@ -7964,68 +6473,54 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) modrm = x86_ldub_code(env, s); gen_nop_modrm(env, s, modrm); break; + case 0x120: /* mov reg, crN */ case 0x122: /* mov crN, reg */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - modrm = x86_ldub_code(env, s); - /* Ignore the mod bits (assume (modrm&0xc0)==0xc0). - * AMD documentation (24594.pdf) and testing of - * intel 386 and 486 processors all show that the mod bits - * are assumed to be 1's, regardless of actual values. - */ - rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | rex_r; - if (CODE64(s)) - ot = MO_64; - else - ot = MO_32; - if ((prefixes & PREFIX_LOCK) && (reg == 0) && + if (!check_cpl0(s)) { + break; + } + modrm = x86_ldub_code(env, s); + /* + * Ignore the mod bits (assume (modrm&0xc0)==0xc0). + * AMD documentation (24594.pdf) and testing of Intel 386 and 486 + * processors all show that the mod bits are assumed to be 1's, + * regardless of actual values. + */ + rm = (modrm & 7) | REX_B(s); + reg = ((modrm >> 3) & 7) | REX_R(s); + switch (reg) { + case 0: + if ((prefixes & PREFIX_LOCK) && (s->cpuid_ext3_features & CPUID_EXT3_CR8LEG)) { reg = 8; } - switch(reg) { - case 0: - case 2: - case 3: - case 4: - case 8: - gen_update_cc_op(s); - gen_jmp_im(s, pc_start - s->cs_base); - if (b & 2) { - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_op_mov_v_reg(s, ot, s->T0, rm); - gen_helper_write_crN(cpu_env, tcg_const_i32(reg), - s->T0); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - } - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); - } else { - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } - gen_helper_read_crN(s->T0, cpu_env, tcg_const_i32(reg)); - gen_op_mov_reg_v(s, ot, rm, s->T0); - if (tb_cflags(s->base.tb) & CF_USE_ICOUNT) { - gen_io_end(); - } - } - break; - default: - goto unknown_op; - } + break; + case 2: + case 3: + case 4: + case 8: + break; + default: + goto unknown_op; + } + ot = (CODE64(s) ? MO_64 : MO_32); + + translator_io_start(&s->base); + if (b & 2) { + gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0 + reg); + gen_op_mov_v_reg(s, ot, s->T0, rm); + gen_helper_write_crN(tcg_env, tcg_constant_i32(reg), s->T0); + s->base.is_jmp = DISAS_EOB_NEXT; + } else { + gen_svm_check_intercept(s, SVM_EXIT_READ_CR0 + reg); + gen_helper_read_crN(s->T0, tcg_env, tcg_constant_i32(reg)); + gen_op_mov_reg_v(s, ot, rm, s->T0); } break; + case 0x121: /* mov reg, drN */ case 0x123: /* mov drN, reg */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { + if (check_cpl0(s)) { modrm = x86_ldub_code(env, s); /* Ignore the mod bits (assume (modrm&0xc0)==0xc0). * AMD documentation (24594.pdf) and testing of @@ -8033,7 +6528,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) * are assumed to be 1's, regardless of actual values. */ rm = (modrm & 7) | REX_B(s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (CODE64(s)) ot = MO_64; else @@ -8042,29 +6537,25 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (b & 2) { - gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_DR0 + reg); + gen_svm_check_intercept(s, SVM_EXIT_WRITE_DR0 + reg); gen_op_mov_v_reg(s, ot, s->T0, rm); tcg_gen_movi_i32(s->tmp2_i32, reg); - gen_helper_set_dr(cpu_env, s->tmp2_i32, s->T0); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + gen_helper_set_dr(tcg_env, s->tmp2_i32, s->T0); + s->base.is_jmp = DISAS_EOB_NEXT; } else { - gen_svm_check_intercept(s, pc_start, SVM_EXIT_READ_DR0 + reg); + gen_svm_check_intercept(s, SVM_EXIT_READ_DR0 + reg); tcg_gen_movi_i32(s->tmp2_i32, reg); - gen_helper_get_dr(s->T0, cpu_env, s->tmp2_i32); + gen_helper_get_dr(s->T0, tcg_env, s->tmp2_i32); gen_op_mov_reg_v(s, ot, rm, s->T0); } } break; case 0x106: /* clts */ - if (s->cpl != 0) { - gen_exception(s, EXCP0D_GPF, pc_start - s->cs_base); - } else { - gen_svm_check_intercept(s, pc_start, SVM_EXIT_WRITE_CR0); - gen_helper_clts(cpu_env); + if (check_cpl0(s)) { + gen_svm_check_intercept(s, SVM_EXIT_WRITE_CR0); + gen_helper_clts(tcg_env); /* abort block because static cpu state changed */ - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; } break; /* MMX/3DNow!/SSE/SSE2/SSE3/SSSE3/SSE4 support */ @@ -8076,7 +6567,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) mod = (modrm >> 6) & 3; if (mod == 3) goto illegal_op; - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); /* generate a generic store */ gen_ldst_modrm(env, s, modrm, ot, reg, 1); break; @@ -8089,11 +6580,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); - gen_helper_fxsave(cpu_env, s->A0); + gen_helper_fxsave(tcg_env, s->A0); break; CASE_MODRM_MEM_OP(1): /* fxrstor */ @@ -8102,11 +6593,11 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if ((s->flags & HF_EM_MASK) || (s->flags & HF_TS_MASK)) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); - gen_helper_fxrstor(cpu_env, s->A0); + gen_helper_fxrstor(tcg_env, s->A0); break; CASE_MODRM_MEM_OP(2): /* ldmxcsr */ @@ -8114,12 +6605,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } gen_lea_modrm(env, s, modrm); tcg_gen_qemu_ld_i32(s->tmp2_i32, s->A0, s->mem_index, MO_LEUL); - gen_helper_ldmxcsr(cpu_env, s->tmp2_i32); + gen_helper_ldmxcsr(tcg_env, s->tmp2_i32); break; CASE_MODRM_MEM_OP(3): /* stmxcsr */ @@ -8127,11 +6618,12 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; } if (s->flags & HF_TS_MASK) { - gen_exception(s, EXCP07_PREX, pc_start - s->cs_base); + gen_exception(s, EXCP07_PREX); break; } + gen_helper_update_mxcsr(tcg_env); gen_lea_modrm(env, s, modrm); - tcg_gen_ld32u_tl(s->T0, cpu_env, offsetof(CPUX86State, mxcsr)); + tcg_gen_ld32u_tl(s->T0, tcg_env, offsetof(CPUX86State, mxcsr)); gen_op_st_v(s, MO_32, s->T0, s->A0); break; @@ -8144,7 +6636,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xsave(cpu_env, s->A0, s->tmp1_i64); + gen_helper_xsave(tcg_env, s->A0, s->tmp1_i64); break; CASE_MODRM_MEM_OP(5): /* xrstor */ @@ -8156,12 +6648,10 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xrstor(cpu_env, s->A0, s->tmp1_i64); + gen_helper_xrstor(tcg_env, s->A0, s->tmp1_i64); /* XRSTOR is how MPX is enabled, which changes how we translate. Thus we need to end the TB. */ - gen_update_cc_op(s); - gen_jmp_im(s, s->pc - s->cs_base); - gen_eob(s); + s->base.is_jmp = DISAS_EOB_NEXT; break; CASE_MODRM_MEM_OP(6): /* xsaveopt / clwb */ @@ -8184,7 +6674,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_lea_modrm(env, s, modrm); tcg_gen_concat_tl_i64(s->tmp1_i64, cpu_regs[R_EAX], cpu_regs[R_EDX]); - gen_helper_xsaveopt(cpu_env, s->A0, s->tmp1_i64); + gen_helper_xsaveopt(tcg_env, s->A0, s->tmp1_i64); } break; @@ -8219,7 +6709,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) /* Preserve hflags bits by testing CR4 at runtime. */ tcg_gen_movi_i32(s->tmp2_i32, CR4_FSGSBASE_MASK); - gen_helper_cr4_testbit(cpu_env, s->tmp2_i32); + gen_helper_cr4_testbit(tcg_env, s->tmp2_i32); base = cpu_seg_base[modrm & 8 ? R_GS : R_FS]; treg = cpu_regs[(modrm & 7) | REX_B(s)]; @@ -8286,13 +6776,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) gen_nop_modrm(env, s, modrm); break; case 0x1aa: /* rsm */ - gen_svm_check_intercept(s, pc_start, SVM_EXIT_RSM); + gen_svm_check_intercept(s, SVM_EXIT_RSM); if (!(s->flags & HF_SMM_MASK)) goto illegal_op; +#ifdef CONFIG_USER_ONLY + /* we should not be in SMM mode */ + g_assert_not_reached(); +#else gen_update_cc_op(s); - gen_jmp_im(s, s->pc - s->cs_base); - gen_helper_rsm(cpu_env); - gen_eob(s); + gen_update_eip_next(s); + gen_helper_rsm(tcg_env); +#endif /* CONFIG_USER_ONLY */ + s->base.is_jmp = DISAS_EOB_ONLY; break; case 0x1b8: /* SSE4.2 popcnt */ if ((prefixes & (PREFIX_REPZ | PREFIX_LOCK | PREFIX_REPNZ)) != @@ -8302,7 +6797,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) goto illegal_op; modrm = x86_ldub_code(env, s); - reg = ((modrm >> 3) & 7) | rex_r; + reg = ((modrm >> 3) & 7) | REX_R(s); if (s->prefix & PREFIX_DATA) { ot = MO_16; @@ -8318,11 +6813,7 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) set_cc_op(s, CC_OP_POPCNT); break; - case 0x10e ... 0x10f: - /* 3DNow! instructions, ignore prefixes */ - s->prefix &= ~(PREFIX_REPZ | PREFIX_REPNZ | PREFIX_DATA); - /* fall through */ - case 0x110 ... 0x117: + case 0x10e ... 0x117: case 0x128 ... 0x12f: case 0x138 ... 0x13a: case 0x150 ... 0x179: @@ -8330,18 +6821,18 @@ static target_ulong disas_insn(DisasContext *s, CPUState *cpu) case 0x1c2: case 0x1c4 ... 0x1c6: case 0x1d0 ... 0x1fe: - gen_sse(env, s, b, pc_start, rex_r); + disas_insn_new(s, cpu, b); break; default: goto unknown_op; } - return s->pc; + return true; illegal_op: gen_illegal_opcode(s); - return s->pc; + return true; unknown_op: gen_unknown_opcode(env, s); - return s->pc; + return true; } void tcg_x86_init(void) @@ -8375,6 +6866,13 @@ void tcg_x86_init(void) [R_ESP] = "esp", #endif }; + static const char eip_name[] = { +#ifdef TARGET_X86_64 + "rip" +#else + "eip" +#endif + }; static const char seg_base_names[6][8] = { [R_CS] = "cs_base", [R_DS] = "ds_base", @@ -8391,35 +6889,36 @@ void tcg_x86_init(void) }; int i; - cpu_cc_op = tcg_global_mem_new_i32(cpu_env, + cpu_cc_op = tcg_global_mem_new_i32(tcg_env, offsetof(CPUX86State, cc_op), "cc_op"); - cpu_cc_dst = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_dst), + cpu_cc_dst = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_dst), "cc_dst"); - cpu_cc_src = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src), + cpu_cc_src = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_src), "cc_src"); - cpu_cc_src2 = tcg_global_mem_new(cpu_env, offsetof(CPUX86State, cc_src2), + cpu_cc_src2 = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, cc_src2), "cc_src2"); + cpu_eip = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, eip), eip_name); for (i = 0; i < CPU_NB_REGS; ++i) { - cpu_regs[i] = tcg_global_mem_new(cpu_env, + cpu_regs[i] = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, regs[i]), reg_names[i]); } for (i = 0; i < 6; ++i) { cpu_seg_base[i] - = tcg_global_mem_new(cpu_env, + = tcg_global_mem_new(tcg_env, offsetof(CPUX86State, segs[i].base), seg_base_names[i]); } for (i = 0; i < 4; ++i) { cpu_bndl[i] - = tcg_global_mem_new_i64(cpu_env, + = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, bnd_regs[i].lb), bnd_regl_names[i]); cpu_bndu[i] - = tcg_global_mem_new_i64(cpu_env, + = tcg_global_mem_new_i64(tcg_env, offsetof(CPUX86State, bnd_regs[i].ub), bnd_regu_names[i]); } @@ -8428,57 +6927,54 @@ void tcg_x86_init(void) static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - CPUX86State *env = cpu->env_ptr; + CPUX86State *env = cpu_env(cpu); uint32_t flags = dc->base.tb->flags; - target_ulong cs_base = dc->base.tb->cs_base; - - dc->pe = (flags >> HF_PE_SHIFT) & 1; - dc->code32 = (flags >> HF_CS32_SHIFT) & 1; - dc->ss32 = (flags >> HF_SS32_SHIFT) & 1; - dc->addseg = (flags >> HF_ADDSEG_SHIFT) & 1; - dc->f_st = 0; - dc->vm86 = (flags >> VM_SHIFT) & 1; - dc->cpl = (flags >> HF_CPL_SHIFT) & 3; - dc->iopl = (flags >> IOPL_SHIFT) & 3; - dc->tf = (flags >> TF_SHIFT) & 1; + uint32_t cflags = tb_cflags(dc->base.tb); + int cpl = (flags >> HF_CPL_SHIFT) & 3; + int iopl = (flags >> IOPL_SHIFT) & 3; + + dc->cs_base = dc->base.tb->cs_base; + dc->pc_save = dc->base.pc_next; + dc->flags = flags; +#ifndef CONFIG_USER_ONLY + dc->cpl = cpl; + dc->iopl = iopl; +#endif + + /* We make some simplifying assumptions; validate they're correct. */ + g_assert(PE(dc) == ((flags & HF_PE_MASK) != 0)); + g_assert(CPL(dc) == cpl); + g_assert(IOPL(dc) == iopl); + g_assert(VM86(dc) == ((flags & HF_VM_MASK) != 0)); + g_assert(CODE32(dc) == ((flags & HF_CS32_MASK) != 0)); + g_assert(CODE64(dc) == ((flags & HF_CS64_MASK) != 0)); + g_assert(SS32(dc) == ((flags & HF_SS32_MASK) != 0)); + g_assert(LMA(dc) == ((flags & HF_LMA_MASK) != 0)); + g_assert(ADDSEG(dc) == ((flags & HF_ADDSEG_MASK) != 0)); + g_assert(SVME(dc) == ((flags & HF_SVME_MASK) != 0)); + g_assert(GUEST(dc) == ((flags & HF_GUEST_MASK) != 0)); + dc->cc_op = CC_OP_DYNAMIC; dc->cc_op_dirty = false; - dc->cs_base = cs_base; dc->popl_esp_hack = 0; /* select memory access functions */ - dc->mem_index = 0; -#ifdef CONFIG_SOFTMMU - dc->mem_index = cpu_mmu_index(env, false); -#endif + dc->mem_index = cpu_mmu_index(cpu, false); dc->cpuid_features = env->features[FEAT_1_EDX]; dc->cpuid_ext_features = env->features[FEAT_1_ECX]; dc->cpuid_ext2_features = env->features[FEAT_8000_0001_EDX]; dc->cpuid_ext3_features = env->features[FEAT_8000_0001_ECX]; dc->cpuid_7_0_ebx_features = env->features[FEAT_7_0_EBX]; + dc->cpuid_7_0_ecx_features = env->features[FEAT_7_0_ECX]; + dc->cpuid_7_1_eax_features = env->features[FEAT_7_1_EAX]; dc->cpuid_xsave_features = env->features[FEAT_XSAVE]; -#ifdef TARGET_X86_64 - dc->lma = (flags >> HF_LMA_SHIFT) & 1; - dc->code64 = (flags >> HF_CS64_SHIFT) & 1; -#endif - dc->flags = flags; - dc->jmp_opt = !(dc->tf || dc->base.singlestep_enabled || - (flags & HF_INHIBIT_IRQ_MASK)); - /* Do not optimize repz jumps at all in icount mode, because - rep movsS instructions are execured with different paths - in !repz_opt and repz_opt modes. The first one was used - always except single step mode. And this setting - disables jumps optimization and control paths become - equivalent in run and single step modes. - Now there will be no jump optimization for repz in - record/replay modes and there will always be an - additional step for ecx=0 when icount is enabled. + dc->jmp_opt = !((cflags & CF_NO_GOTO_TB) || + (flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK))); + /* + * If jmp_opt, we want to handle each string instruction individually. + * For icount also disable repz optimization so that each iteration + * is accounted separately. */ - dc->repz_opt = !dc->jmp_opt && !(tb_cflags(dc->base.tb) & CF_USE_ICOUNT); -#if 0 - /* check addseg logic */ - if (!dc->addseg && (dc->vm86 || !dc->pe || !dc->code32)) - printf("ERROR addseg\n"); -#endif + dc->repz_opt = !dc->jmp_opt && !(cflags & CF_USE_ICOUNT); dc->T0 = tcg_temp_new(); dc->T1 = tcg_temp_new(); @@ -8489,9 +6985,7 @@ static void i386_tr_init_disas_context(DisasContextBase *dcbase, CPUState *cpu) dc->tmp2_i32 = tcg_temp_new_i32(); dc->tmp3_i32 = tcg_temp_new_i32(); dc->tmp4 = tcg_temp_new(); - dc->ptr0 = tcg_temp_new_ptr(); - dc->ptr1 = tcg_temp_new_ptr(); - dc->cc_srcT = tcg_temp_local_new(); + dc->cc_srcT = tcg_temp_new(); } static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu) @@ -8501,104 +6995,106 @@ static void i386_tr_tb_start(DisasContextBase *db, CPUState *cpu) static void i386_tr_insn_start(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); + target_ulong pc_arg = dc->base.pc_next; - tcg_gen_insn_start(dc->base.pc_next, dc->cc_op); -} - -static bool i386_tr_breakpoint_check(DisasContextBase *dcbase, CPUState *cpu, - const CPUBreakpoint *bp) -{ - DisasContext *dc = container_of(dcbase, DisasContext, base); - /* If RF is set, suppress an internally generated breakpoint. */ - int flags = dc->base.tb->flags & HF_RF_MASK ? BP_GDB : BP_ANY; - if (bp->flags & flags) { - gen_debug(dc, dc->base.pc_next - dc->cs_base); - dc->base.is_jmp = DISAS_NORETURN; - /* The address covered by the breakpoint must be included in - [tb->pc, tb->pc + tb->size) in order to for it to be - properly cleared -- thus we increment the PC here so that - the generic logic setting tb->size later does the right thing. */ - dc->base.pc_next += 1; - return true; - } else { - return false; + dc->prev_insn_start = dc->base.insn_start; + dc->prev_insn_end = tcg_last_op(); + if (tb_cflags(dcbase->tb) & CF_PCREL) { + pc_arg &= ~TARGET_PAGE_MASK; } + tcg_gen_insn_start(pc_arg, dc->cc_op); } static void i386_tr_translate_insn(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - target_ulong pc_next = disas_insn(dc, cpu); - - if (dc->tf || (dc->base.tb->flags & HF_INHIBIT_IRQ_MASK)) { - /* if single step mode, we generate only one instruction and - generate an exception */ - /* if irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear - the flag and abort the translation to give the irqs a - chance to happen */ - dc->base.is_jmp = DISAS_TOO_MANY; - } else if ((tb_cflags(dc->base.tb) & CF_USE_ICOUNT) - && ((pc_next & TARGET_PAGE_MASK) - != ((pc_next + TARGET_MAX_INSN_SIZE - 1) - & TARGET_PAGE_MASK) - || (pc_next & ~TARGET_PAGE_MASK) == 0)) { - /* Do not cross the boundary of the pages in icount mode, - it can cause an exception. Do it only when boundary is - crossed by the first instruction in the block. - If current instruction already crossed the bound - it's ok, - because an exception hasn't stopped this code. - */ - dc->base.is_jmp = DISAS_TOO_MANY; - } else if ((pc_next - dc->base.pc_first) >= (TARGET_PAGE_SIZE - 32)) { - dc->base.is_jmp = DISAS_TOO_MANY; + +#ifdef TARGET_VSYSCALL_PAGE + /* + * Detect entry into the vsyscall page and invoke the syscall. + */ + if ((dc->base.pc_next & TARGET_PAGE_MASK) == TARGET_VSYSCALL_PAGE) { + gen_exception(dc, EXCP_VSYSCALL); + dc->base.pc_next = dc->pc + 1; + return; } +#endif + + if (disas_insn(dc, cpu)) { + target_ulong pc_next = dc->pc; + dc->base.pc_next = pc_next; - dc->base.pc_next = pc_next; + if (dc->base.is_jmp == DISAS_NEXT) { + if (dc->flags & (HF_TF_MASK | HF_INHIBIT_IRQ_MASK)) { + /* + * If single step mode, we generate only one instruction and + * generate an exception. + * If irq were inhibited with HF_INHIBIT_IRQ_MASK, we clear + * the flag and abort the translation to give the irqs a + * chance to happen. + */ + dc->base.is_jmp = DISAS_EOB_NEXT; + } else if (!is_same_page(&dc->base, pc_next)) { + dc->base.is_jmp = DISAS_TOO_MANY; + } + } + } } static void i386_tr_tb_stop(DisasContextBase *dcbase, CPUState *cpu) { DisasContext *dc = container_of(dcbase, DisasContext, base); - if (dc->base.is_jmp == DISAS_TOO_MANY) { - gen_jmp_im(dc, dc->base.pc_next - dc->cs_base); + switch (dc->base.is_jmp) { + case DISAS_NORETURN: + break; + case DISAS_TOO_MANY: + gen_update_cc_op(dc); + gen_jmp_rel_csize(dc, 0, 0); + break; + case DISAS_EOB_NEXT: + gen_update_cc_op(dc); + gen_update_eip_cur(dc); + /* fall through */ + case DISAS_EOB_ONLY: gen_eob(dc); + break; + case DISAS_EOB_INHIBIT_IRQ: + gen_update_cc_op(dc); + gen_update_eip_cur(dc); + gen_eob_inhibit_irq(dc, true); + break; + case DISAS_JUMP: + gen_jr(dc); + break; + default: + g_assert_not_reached(); } } static void i386_tr_disas_log(const DisasContextBase *dcbase, - CPUState *cpu) + CPUState *cpu, FILE *logfile) { DisasContext *dc = container_of(dcbase, DisasContext, base); - qemu_log("IN: %s\n", lookup_symbol(dc->base.pc_first)); - log_target_disas(cpu, dc->base.pc_first, dc->base.tb->size); + fprintf(logfile, "IN: %s\n", lookup_symbol(dc->base.pc_first)); + target_disas(logfile, cpu, dc->base.pc_first, dc->base.tb->size); } static const TranslatorOps i386_tr_ops = { .init_disas_context = i386_tr_init_disas_context, .tb_start = i386_tr_tb_start, .insn_start = i386_tr_insn_start, - .breakpoint_check = i386_tr_breakpoint_check, .translate_insn = i386_tr_translate_insn, .tb_stop = i386_tr_tb_stop, .disas_log = i386_tr_disas_log, }; /* generate intermediate code for basic block 'tb'. */ -void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb) +void gen_intermediate_code(CPUState *cpu, TranslationBlock *tb, int *max_insns, + vaddr pc, void *host_pc) { DisasContext dc; - translator_loop(&i386_tr_ops, &dc.base, cpu, tb); -} - -void restore_state_to_opc(CPUX86State *env, TranslationBlock *tb, - target_ulong *data) -{ - int cc_op = data[1]; - env->eip = data[0] - tb->cs_base; - if (cc_op != CC_OP_DYNAMIC) { - env->cc_op = cc_op; - } + translator_loop(cpu, tb, max_insns, pc, host_pc, &i386_tr_ops, &dc.base); } diff --git a/target/i386/tcg/user/excp_helper.c b/target/i386/tcg/user/excp_helper.c new file mode 100644 index 0000000000..b3bdb7831a --- /dev/null +++ b/target/i386/tcg/user/excp_helper.c @@ -0,0 +1,57 @@ +/* + * x86 exception helpers - user-mode specific code + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/exec-all.h" +#include "tcg/helper-tcg.h" + +void x86_cpu_record_sigsegv(CPUState *cs, vaddr addr, + MMUAccessType access_type, + bool maperr, uintptr_t ra) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + /* + * The error_code that hw reports as part of the exception frame + * is copied to linux sigcontext.err. The exception_index is + * copied to linux sigcontext.trapno. Short of inventing a new + * place to store the trapno, we cannot let our caller raise the + * signal and set exception_index to EXCP_INTERRUPT. + */ + env->cr[2] = addr; + env->error_code = ((access_type == MMU_DATA_STORE) << PG_ERROR_W_BIT) + | (maperr ? 0 : PG_ERROR_P_MASK) + | PG_ERROR_U_MASK; + cs->exception_index = EXCP0E_PAGE; + + /* Disable do_interrupt_user. */ + env->exception_is_int = 0; + env->exception_next_eip = -1; + + cpu_loop_exit_restore(cs, ra); +} + +void x86_cpu_record_sigbus(CPUState *cs, vaddr addr, + MMUAccessType access_type, uintptr_t ra) +{ + X86CPU *cpu = X86_CPU(cs); + handle_unaligned_access(&cpu->env, addr, access_type, ra); +} diff --git a/target/i386/tcg/user/meson.build b/target/i386/tcg/user/meson.build new file mode 100644 index 0000000000..1df6bc4343 --- /dev/null +++ b/target/i386/tcg/user/meson.build @@ -0,0 +1,4 @@ +i386_user_ss.add(when: ['CONFIG_TCG', 'CONFIG_USER_ONLY'], if_true: files( + 'excp_helper.c', + 'seg_helper.c', +)) diff --git a/target/i386/tcg/user/seg_helper.c b/target/i386/tcg/user/seg_helper.c new file mode 100644 index 0000000000..c45f2ac2ba --- /dev/null +++ b/target/i386/tcg/user/seg_helper.c @@ -0,0 +1,107 @@ +/* + * x86 segmentation related helpers (user-mode code): + * TSS, interrupts, system calls, jumps and call/task gates, descriptors + * + * Copyright (c) 2003 Fabrice Bellard + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see <http://www.gnu.org/licenses/>. + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/helper-proto.h" +#include "exec/exec-all.h" +#include "exec/cpu_ldst.h" +#include "tcg/helper-tcg.h" +#include "tcg/seg_helper.h" + +void helper_syscall(CPUX86State *env, int next_eip_addend) +{ + CPUState *cs = env_cpu(env); + + cs->exception_index = EXCP_SYSCALL; + env->exception_is_int = 0; + env->exception_next_eip = env->eip + next_eip_addend; + cpu_loop_exit(cs); +} + +/* + * fake user mode interrupt. is_int is TRUE if coming from the int + * instruction. next_eip is the env->eip value AFTER the interrupt + * instruction. It is only relevant if is_int is TRUE or if intno + * is EXCP_SYSCALL. + */ +static void do_interrupt_user(CPUX86State *env, int intno, int is_int, + int error_code, target_ulong next_eip) +{ + if (is_int) { + SegmentCache *dt; + target_ulong ptr; + int dpl, cpl, shift; + uint32_t e2; + + dt = &env->idt; + if (env->hflags & HF_LMA_MASK) { + shift = 4; + } else { + shift = 3; + } + ptr = dt->base + (intno << shift); + e2 = cpu_ldl_kernel(env, ptr + 4); + + dpl = (e2 >> DESC_DPL_SHIFT) & 3; + cpl = env->hflags & HF_CPL_MASK; + /* check privilege if software int */ + if (dpl < cpl) { + raise_exception_err(env, EXCP0D_GPF, (intno << shift) + 2); + } + } + + /* Since we emulate only user space, we cannot do more than + exiting the emulation with the suitable exception and error + code. So update EIP for INT 0x80 and EXCP_SYSCALL. */ + if (is_int || intno == EXCP_SYSCALL) { + env->eip = next_eip; + } +} + +void x86_cpu_do_interrupt(CPUState *cs) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + /* if user mode only, we simulate a fake exception + which will be handled outside the cpu execution + loop */ + do_interrupt_user(env, cs->exception_index, + env->exception_is_int, + env->error_code, + env->exception_next_eip); + /* successfully delivered */ + env->old_exception = -1; +} + +void cpu_x86_load_seg(CPUX86State *env, X86Seg seg_reg, int selector) +{ + if (!(env->cr[0] & CR0_PE_MASK) || (env->eflags & VM_MASK)) { + int dpl = (env->eflags & VM_MASK) ? 3 : 0; + selector &= 0xffff; + cpu_x86_load_seg_cache(env, seg_reg, selector, + (selector << 4), 0xffff, + DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | + DESC_A_MASK | (dpl << DESC_DPL_SHIFT)); + } else { + helper_load_seg(env, seg_reg, selector); + } +} diff --git a/target/i386/trace-events b/target/i386/trace-events index 6a19a69af5..2cd8726eeb 100644 --- a/target/i386/trace-events +++ b/target/i386/trace-events @@ -1,17 +1,13 @@ -# See docs/devel/tracing.txt for syntax documentation. +# See docs/devel/tracing.rst for syntax documentation. -# target/i386/kvm.c -kvm_x86_fixup_msi_error(uint32_t gsi) "VT-d failed to remap interrupt for GSI %" PRIu32 -kvm_x86_add_msi_route(int virq) "Adding route entry for virq %d" -kvm_x86_remove_msi_route(int virq) "Removing route entry for virq %d" -kvm_x86_update_msi_routes(int num) "Updated %d MSI routes" - -# target/i386/sev.c +# sev.c kvm_sev_init(void) "" -kvm_memcrypt_register_region(void *addr, size_t len) "addr %p len 0x%zu" -kvm_memcrypt_unregister_region(void *addr, size_t len) "addr %p len 0x%zu" +kvm_memcrypt_register_region(void *addr, size_t len) "addr %p len 0x%zx" +kvm_memcrypt_unregister_region(void *addr, size_t len) "addr %p len 0x%zx" kvm_sev_change_state(const char *old, const char *new) "%s -> %s" kvm_sev_launch_start(int policy, void *session, void *pdh) "policy 0x%x session %p pdh %p" -kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIu64 +kvm_sev_launch_update_data(void *addr, uint64_t len) "addr %p len 0x%" PRIx64 kvm_sev_launch_measurement(const char *value) "data %s" kvm_sev_launch_finish(void) "" +kvm_sev_launch_secret(uint64_t hpa, uint64_t hva, uint64_t secret, int len) "hpa 0x%" PRIx64 " hva 0x%" PRIx64 " data 0x%" PRIx64 " len %d" +kvm_sev_attestation_report(const char *mnonce, const char *data) "mnonce %s data %s" diff --git a/target/i386/trace.h b/target/i386/trace.h new file mode 100644 index 0000000000..781e8ec55c --- /dev/null +++ b/target/i386/trace.h @@ -0,0 +1 @@ +#include "trace/trace-target_i386.h" diff --git a/target/i386/whpx-all.c b/target/i386/whpx-all.c deleted file mode 100644 index 57e53e1f1f..0000000000 --- a/target/i386/whpx-all.c +++ /dev/null @@ -1,1543 +0,0 @@ -/* - * QEMU Windows Hypervisor Platform accelerator (WHPX) - * - * Copyright Microsoft Corp. 2017 - * - * This work is licensed under the terms of the GNU GPL, version 2 or later. - * See the COPYING file in the top-level directory. - * - */ - -#include "qemu/osdep.h" -#include "cpu.h" -#include "exec/address-spaces.h" -#include "exec/ioport.h" -#include "qemu-common.h" -#include "strings.h" -#include "sysemu/accel.h" -#include "sysemu/whpx.h" -#include "sysemu/sysemu.h" -#include "sysemu/cpus.h" -#include "qemu/main-loop.h" -#include "hw/boards.h" -#include "qemu/error-report.h" -#include "qemu/queue.h" -#include "qapi/error.h" -#include "migration/blocker.h" -#include "whp-dispatch.h" - -#include <WinHvPlatform.h> -#include <WinHvEmulation.h> - -struct whpx_state { - uint64_t mem_quota; - WHV_PARTITION_HANDLE partition; -}; - -static const WHV_REGISTER_NAME whpx_register_names[] = { - - /* X64 General purpose registers */ - WHvX64RegisterRax, - WHvX64RegisterRcx, - WHvX64RegisterRdx, - WHvX64RegisterRbx, - WHvX64RegisterRsp, - WHvX64RegisterRbp, - WHvX64RegisterRsi, - WHvX64RegisterRdi, - WHvX64RegisterR8, - WHvX64RegisterR9, - WHvX64RegisterR10, - WHvX64RegisterR11, - WHvX64RegisterR12, - WHvX64RegisterR13, - WHvX64RegisterR14, - WHvX64RegisterR15, - WHvX64RegisterRip, - WHvX64RegisterRflags, - - /* X64 Segment registers */ - WHvX64RegisterEs, - WHvX64RegisterCs, - WHvX64RegisterSs, - WHvX64RegisterDs, - WHvX64RegisterFs, - WHvX64RegisterGs, - WHvX64RegisterLdtr, - WHvX64RegisterTr, - - /* X64 Table registers */ - WHvX64RegisterIdtr, - WHvX64RegisterGdtr, - - /* X64 Control Registers */ - WHvX64RegisterCr0, - WHvX64RegisterCr2, - WHvX64RegisterCr3, - WHvX64RegisterCr4, - WHvX64RegisterCr8, - - /* X64 Debug Registers */ - /* - * WHvX64RegisterDr0, - * WHvX64RegisterDr1, - * WHvX64RegisterDr2, - * WHvX64RegisterDr3, - * WHvX64RegisterDr6, - * WHvX64RegisterDr7, - */ - - /* X64 Floating Point and Vector Registers */ - WHvX64RegisterXmm0, - WHvX64RegisterXmm1, - WHvX64RegisterXmm2, - WHvX64RegisterXmm3, - WHvX64RegisterXmm4, - WHvX64RegisterXmm5, - WHvX64RegisterXmm6, - WHvX64RegisterXmm7, - WHvX64RegisterXmm8, - WHvX64RegisterXmm9, - WHvX64RegisterXmm10, - WHvX64RegisterXmm11, - WHvX64RegisterXmm12, - WHvX64RegisterXmm13, - WHvX64RegisterXmm14, - WHvX64RegisterXmm15, - WHvX64RegisterFpMmx0, - WHvX64RegisterFpMmx1, - WHvX64RegisterFpMmx2, - WHvX64RegisterFpMmx3, - WHvX64RegisterFpMmx4, - WHvX64RegisterFpMmx5, - WHvX64RegisterFpMmx6, - WHvX64RegisterFpMmx7, - WHvX64RegisterFpControlStatus, - WHvX64RegisterXmmControlStatus, - - /* X64 MSRs */ - WHvX64RegisterTsc, - WHvX64RegisterEfer, -#ifdef TARGET_X86_64 - WHvX64RegisterKernelGsBase, -#endif - WHvX64RegisterApicBase, - /* WHvX64RegisterPat, */ - WHvX64RegisterSysenterCs, - WHvX64RegisterSysenterEip, - WHvX64RegisterSysenterEsp, - WHvX64RegisterStar, -#ifdef TARGET_X86_64 - WHvX64RegisterLstar, - WHvX64RegisterCstar, - WHvX64RegisterSfmask, -#endif - - /* Interrupt / Event Registers */ - /* - * WHvRegisterPendingInterruption, - * WHvRegisterInterruptState, - * WHvRegisterPendingEvent0, - * WHvRegisterPendingEvent1 - * WHvX64RegisterDeliverabilityNotifications, - */ -}; - -struct whpx_register_set { - WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; -}; - -struct whpx_vcpu { - WHV_EMULATOR_HANDLE emulator; - bool window_registered; - bool interruptable; - uint64_t tpr; - uint64_t apic_base; - bool interruption_pending; - - /* Must be the last field as it may have a tail */ - WHV_RUN_VP_EXIT_CONTEXT exit_ctx; -}; - -static bool whpx_allowed; -static bool whp_dispatch_initialized; -static HMODULE hWinHvPlatform, hWinHvEmulation; - -struct whpx_state whpx_global; -struct WHPDispatch whp_dispatch; - - -/* - * VP support - */ - -static struct whpx_vcpu *get_whpx_vcpu(CPUState *cpu) -{ - return (struct whpx_vcpu *)cpu->hax_vcpu; -} - -static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, - int r86) -{ - WHV_X64_SEGMENT_REGISTER hs; - unsigned flags = qs->flags; - - hs.Base = qs->base; - hs.Limit = qs->limit; - hs.Selector = qs->selector; - - if (v86) { - hs.Attributes = 0; - hs.SegmentType = 3; - hs.Present = 1; - hs.DescriptorPrivilegeLevel = 3; - hs.NonSystemSegment = 1; - - } else { - hs.Attributes = (flags >> DESC_TYPE_SHIFT); - - if (r86) { - /* hs.Base &= 0xfffff; */ - } - } - - return hs; -} - -static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) -{ - SegmentCache qs; - - qs.base = hs->Base; - qs.limit = hs->Limit; - qs.selector = hs->Selector; - - qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; - - return qs; -} - -static void whpx_set_registers(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_register_set vcxt; - HRESULT hr; - int idx; - int idx_next; - int i; - int v86, r86; - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - - memset(&vcxt, 0, sizeof(struct whpx_register_set)); - - v86 = (env->eflags & VM_MASK); - r86 = !(env->cr[0] & CR0_PE_MASK); - - vcpu->tpr = cpu_get_apic_tpr(x86_cpu->apic_state); - vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); - - idx = 0; - - /* Indexes for first 16 registers match between HV and QEMU definitions */ - idx_next = 16; - for (idx = 0; idx < CPU_NB_REGS; idx += 1) { - vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx]; - } - idx = idx_next; - - /* Same goes for RIP and RFLAGS */ - assert(whpx_register_names[idx] == WHvX64RegisterRip); - vcxt.values[idx++].Reg64 = env->eip; - - assert(whpx_register_names[idx] == WHvX64RegisterRflags); - vcxt.values[idx++].Reg64 = env->eflags; - - /* Translate 6+4 segment registers. HV and QEMU order matches */ - assert(idx == WHvX64RegisterEs); - for (i = 0; i < 6; i += 1, idx += 1) { - vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); - } - - assert(idx == WHvX64RegisterLdtr); - vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); - - assert(idx == WHvX64RegisterTr); - vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); - - assert(idx == WHvX64RegisterIdtr); - vcxt.values[idx].Table.Base = env->idt.base; - vcxt.values[idx].Table.Limit = env->idt.limit; - idx += 1; - - assert(idx == WHvX64RegisterGdtr); - vcxt.values[idx].Table.Base = env->gdt.base; - vcxt.values[idx].Table.Limit = env->gdt.limit; - idx += 1; - - /* CR0, 2, 3, 4, 8 */ - assert(whpx_register_names[idx] == WHvX64RegisterCr0); - vcxt.values[idx++].Reg64 = env->cr[0]; - assert(whpx_register_names[idx] == WHvX64RegisterCr2); - vcxt.values[idx++].Reg64 = env->cr[2]; - assert(whpx_register_names[idx] == WHvX64RegisterCr3); - vcxt.values[idx++].Reg64 = env->cr[3]; - assert(whpx_register_names[idx] == WHvX64RegisterCr4); - vcxt.values[idx++].Reg64 = env->cr[4]; - assert(whpx_register_names[idx] == WHvX64RegisterCr8); - vcxt.values[idx++].Reg64 = vcpu->tpr; - - /* 8 Debug Registers - Skipped */ - - /* 16 XMM registers */ - assert(whpx_register_names[idx] == WHvX64RegisterXmm0); - idx_next = idx + 16; - for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { - vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); - vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); - } - idx = idx_next; - - /* 8 FP registers */ - assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); - for (i = 0; i < 8; i += 1, idx += 1) { - vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); - /* vcxt.values[idx].Fp.AsUINT128.High64 = - env->fpregs[i].mmx.MMX_Q(1); - */ - } - - /* FP control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); - vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; - vcxt.values[idx].FpControlStatus.FpStatus = - (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; - vcxt.values[idx].FpControlStatus.FpTag = 0; - for (i = 0; i < 8; ++i) { - vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; - } - vcxt.values[idx].FpControlStatus.Reserved = 0; - vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; - vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; - idx += 1; - - /* XMM control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); - vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; - vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; - vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; - idx += 1; - - /* MSRs */ - assert(whpx_register_names[idx] == WHvX64RegisterTsc); - vcxt.values[idx++].Reg64 = env->tsc; - assert(whpx_register_names[idx] == WHvX64RegisterEfer); - vcxt.values[idx++].Reg64 = env->efer; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); - vcxt.values[idx++].Reg64 = env->kernelgsbase; -#endif - - assert(whpx_register_names[idx] == WHvX64RegisterApicBase); - vcxt.values[idx++].Reg64 = vcpu->apic_base; - - /* WHvX64RegisterPat - Skipped */ - - assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); - vcxt.values[idx++].Reg64 = env->sysenter_cs; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); - vcxt.values[idx++].Reg64 = env->sysenter_eip; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); - vcxt.values[idx++].Reg64 = env->sysenter_esp; - assert(whpx_register_names[idx] == WHvX64RegisterStar); - vcxt.values[idx++].Reg64 = env->star; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterLstar); - vcxt.values[idx++].Reg64 = env->lstar; - assert(whpx_register_names[idx] == WHvX64RegisterCstar); - vcxt.values[idx++].Reg64 = env->cstar; - assert(whpx_register_names[idx] == WHvX64RegisterSfmask); - vcxt.values[idx++].Reg64 = env->fmask; -#endif - - /* Interrupt / Event Registers - Skipped */ - - assert(idx == RTL_NUMBER_OF(whpx_register_names)); - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - whpx_register_names, - RTL_NUMBER_OF(whpx_register_names), - &vcxt.values[0]); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set virtual processor context, hr=%08lx", - hr); - } - - return; -} - -static void whpx_get_registers(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_register_set vcxt; - uint64_t tpr, apic_base; - HRESULT hr; - int idx; - int idx_next; - int i; - - assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); - - hr = whp_dispatch.WHvGetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - whpx_register_names, - RTL_NUMBER_OF(whpx_register_names), - &vcxt.values[0]); - if (FAILED(hr)) { - error_report("WHPX: Failed to get virtual processor context, hr=%08lx", - hr); - } - - idx = 0; - - /* Indexes for first 16 registers match between HV and QEMU definitions */ - idx_next = 16; - for (idx = 0; idx < CPU_NB_REGS; idx += 1) { - env->regs[idx] = vcxt.values[idx].Reg64; - } - idx = idx_next; - - /* Same goes for RIP and RFLAGS */ - assert(whpx_register_names[idx] == WHvX64RegisterRip); - env->eip = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterRflags); - env->eflags = vcxt.values[idx++].Reg64; - - /* Translate 6+4 segment registers. HV and QEMU order matches */ - assert(idx == WHvX64RegisterEs); - for (i = 0; i < 6; i += 1, idx += 1) { - env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); - } - - assert(idx == WHvX64RegisterLdtr); - env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); - assert(idx == WHvX64RegisterTr); - env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); - assert(idx == WHvX64RegisterIdtr); - env->idt.base = vcxt.values[idx].Table.Base; - env->idt.limit = vcxt.values[idx].Table.Limit; - idx += 1; - assert(idx == WHvX64RegisterGdtr); - env->gdt.base = vcxt.values[idx].Table.Base; - env->gdt.limit = vcxt.values[idx].Table.Limit; - idx += 1; - - /* CR0, 2, 3, 4, 8 */ - assert(whpx_register_names[idx] == WHvX64RegisterCr0); - env->cr[0] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr2); - env->cr[2] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr3); - env->cr[3] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr4); - env->cr[4] = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCr8); - tpr = vcxt.values[idx++].Reg64; - if (tpr != vcpu->tpr) { - vcpu->tpr = tpr; - cpu_set_apic_tpr(x86_cpu->apic_state, tpr); - } - - /* 8 Debug Registers - Skipped */ - - /* 16 XMM registers */ - assert(whpx_register_names[idx] == WHvX64RegisterXmm0); - idx_next = idx + 16; - for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { - env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; - env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; - } - idx = idx_next; - - /* 8 FP registers */ - assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); - for (i = 0; i < 8; i += 1, idx += 1) { - env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; - /* env->fpregs[i].mmx.MMX_Q(1) = - vcxt.values[idx].Fp.AsUINT128.High64; - */ - } - - /* FP control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); - env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; - env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; - env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; - for (i = 0; i < 8; ++i) { - env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); - } - env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; - env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; - idx += 1; - - /* XMM control status register */ - assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); - env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; - idx += 1; - - /* MSRs */ - assert(whpx_register_names[idx] == WHvX64RegisterTsc); - env->tsc = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterEfer); - env->efer = vcxt.values[idx++].Reg64; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); - env->kernelgsbase = vcxt.values[idx++].Reg64; -#endif - - assert(whpx_register_names[idx] == WHvX64RegisterApicBase); - apic_base = vcxt.values[idx++].Reg64; - if (apic_base != vcpu->apic_base) { - vcpu->apic_base = apic_base; - cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); - } - - /* WHvX64RegisterPat - Skipped */ - - assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); - env->sysenter_cs = vcxt.values[idx++].Reg64;; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); - env->sysenter_eip = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); - env->sysenter_esp = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterStar); - env->star = vcxt.values[idx++].Reg64; -#ifdef TARGET_X86_64 - assert(whpx_register_names[idx] == WHvX64RegisterLstar); - env->lstar = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterCstar); - env->cstar = vcxt.values[idx++].Reg64; - assert(whpx_register_names[idx] == WHvX64RegisterSfmask); - env->fmask = vcxt.values[idx++].Reg64; -#endif - - /* Interrupt / Event Registers - Skipped */ - - assert(idx == RTL_NUMBER_OF(whpx_register_names)); - - return; -} - -static HRESULT CALLBACK whpx_emu_ioport_callback( - void *ctx, - WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) -{ - MemTxAttrs attrs = { 0 }; - address_space_rw(&address_space_io, IoAccess->Port, attrs, - (uint8_t *)&IoAccess->Data, IoAccess->AccessSize, - IoAccess->Direction); - return S_OK; -} - -static HRESULT CALLBACK whpx_emu_mmio_callback( - void *ctx, - WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) -{ - cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, - ma->Direction); - return S_OK; -} - -static HRESULT CALLBACK whpx_emu_getreg_callback( - void *ctx, - const WHV_REGISTER_NAME *RegisterNames, - UINT32 RegisterCount, - WHV_REGISTER_VALUE *RegisterValues) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - - hr = whp_dispatch.WHvGetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - RegisterNames, RegisterCount, - RegisterValues); - if (FAILED(hr)) { - error_report("WHPX: Failed to get virtual processor registers," - " hr=%08lx", hr); - } - - return hr; -} - -static HRESULT CALLBACK whpx_emu_setreg_callback( - void *ctx, - const WHV_REGISTER_NAME *RegisterNames, - UINT32 RegisterCount, - const WHV_REGISTER_VALUE *RegisterValues) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - RegisterNames, RegisterCount, - RegisterValues); - if (FAILED(hr)) { - error_report("WHPX: Failed to set virtual processor registers," - " hr=%08lx", hr); - } - - /* - * The emulator just successfully wrote the register state. We clear the - * dirty state so we avoid the double write on resume of the VP. - */ - cpu->vcpu_dirty = false; - - return hr; -} - -static HRESULT CALLBACK whpx_emu_translate_callback( - void *ctx, - WHV_GUEST_VIRTUAL_ADDRESS Gva, - WHV_TRANSLATE_GVA_FLAGS TranslateFlags, - WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, - WHV_GUEST_PHYSICAL_ADDRESS *Gpa) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - CPUState *cpu = (CPUState *)ctx; - WHV_TRANSLATE_GVA_RESULT res; - - hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index, - Gva, TranslateFlags, &res, Gpa); - if (FAILED(hr)) { - error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); - } else { - *TranslationResult = res.ResultCode; - } - - return hr; -} - -static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { - .Size = sizeof(WHV_EMULATOR_CALLBACKS), - .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, - .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback, - .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, - .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, - .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, -}; - -static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) -{ - HRESULT hr; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - WHV_EMULATOR_STATUS emu_status; - - hr = whp_dispatch.WHvEmulatorTryMmioEmulation( - vcpu->emulator, cpu, - &vcpu->exit_ctx.VpContext, ctx, - &emu_status); - if (FAILED(hr)) { - error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); - return -1; - } - - if (!emu_status.EmulationSuccessful) { - error_report("WHPX: Failed to emulate MMIO access with" - " EmulatorReturnStatus: %u", emu_status.AsUINT32); - return -1; - } - - return 0; -} - -static int whpx_handle_portio(CPUState *cpu, - WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) -{ - HRESULT hr; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - WHV_EMULATOR_STATUS emu_status; - - hr = whp_dispatch.WHvEmulatorTryIoEmulation( - vcpu->emulator, cpu, - &vcpu->exit_ctx.VpContext, ctx, - &emu_status); - if (FAILED(hr)) { - error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); - return -1; - } - - if (!emu_status.EmulationSuccessful) { - error_report("WHPX: Failed to emulate PortIO access with" - " EmulatorReturnStatus: %u", emu_status.AsUINT32); - return -1; - } - - return 0; -} - -static int whpx_handle_halt(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - int ret = 0; - - qemu_mutex_lock_iothread(); - if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) && - !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { - cpu->exception_index = EXCP_HLT; - cpu->halted = true; - ret = 1; - } - qemu_mutex_unlock_iothread(); - - return ret; -} - -static void whpx_vcpu_pre_run(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - int irq; - uint8_t tpr; - WHV_X64_PENDING_INTERRUPTION_REGISTER new_int; - UINT32 reg_count = 0; - WHV_REGISTER_VALUE reg_values[3]; - WHV_REGISTER_NAME reg_names[3]; - - memset(&new_int, 0, sizeof(new_int)); - memset(reg_values, 0, sizeof(reg_values)); - - qemu_mutex_lock_iothread(); - - /* Inject NMI */ - if (!vcpu->interruption_pending && - cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { - if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; - vcpu->interruptable = false; - new_int.InterruptionType = WHvX64PendingNmi; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = 2; - } - if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { - cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; - } - } - - /* - * Force the VCPU out of its inner loop to process any INIT requests or - * commit pending TPR access. - */ - if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - cpu->exit_request = 1; - } - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->exit_request = 1; - } - } - - /* Get pending hard interruption or replay one that was overwritten */ - if (!vcpu->interruption_pending && - vcpu->interruptable && (env->eflags & IF_MASK)) { - assert(!new_int.InterruptionPending); - if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { - cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - new_int.InterruptionType = WHvX64PendingInterrupt; - new_int.InterruptionPending = 1; - new_int.InterruptionVector = irq; - } - } - } - - /* Setup interrupt state if new one was prepared */ - if (new_int.InterruptionPending) { - reg_values[reg_count].PendingInterruption = new_int; - reg_names[reg_count] = WHvRegisterPendingInterruption; - reg_count += 1; - } - - /* Sync the TPR to the CR8 if was modified during the intercept */ - tpr = cpu_get_apic_tpr(x86_cpu->apic_state); - if (tpr != vcpu->tpr) { - vcpu->tpr = tpr; - reg_values[reg_count].Reg64 = tpr; - cpu->exit_request = 1; - reg_names[reg_count] = WHvX64RegisterCr8; - reg_count += 1; - } - - /* Update the state of the interrupt delivery notification */ - if (!vcpu->window_registered && - cpu->interrupt_request & CPU_INTERRUPT_HARD) { - reg_values[reg_count].DeliverabilityNotifications.InterruptNotification - = 1; - vcpu->window_registered = 1; - reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; - reg_count += 1; - } - - qemu_mutex_unlock_iothread(); - - if (reg_count) { - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - reg_names, reg_count, reg_values); - if (FAILED(hr)) { - error_report("WHPX: Failed to set interrupt state registers," - " hr=%08lx", hr); - } - } - - return; -} - -static void whpx_vcpu_post_run(CPUState *cpu) -{ - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - - env->eflags = vcpu->exit_ctx.VpContext.Rflags; - - uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; - if (vcpu->tpr != tpr) { - vcpu->tpr = tpr; - qemu_mutex_lock_iothread(); - cpu_set_apic_tpr(x86_cpu->apic_state, vcpu->tpr); - qemu_mutex_unlock_iothread(); - } - - vcpu->interruption_pending = - vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending; - - vcpu->interruptable = - !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; - - return; -} - -static void whpx_vcpu_process_async_events(CPUState *cpu) -{ - struct CPUX86State *env = (CPUArchState *)(cpu->env_ptr); - X86CPU *x86_cpu = X86_CPU(cpu); - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - - if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && - !(env->hflags & HF_SMM_MASK)) { - - do_cpu_init(x86_cpu); - cpu->vcpu_dirty = true; - vcpu->interruptable = true; - } - - if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { - cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; - apic_poll_irq(x86_cpu->apic_state); - } - - if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) || - (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { - cpu->halted = false; - } - - if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { - if (!cpu->vcpu_dirty) { - whpx_get_registers(cpu); - } - do_cpu_sipi(x86_cpu); - } - - if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { - cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; - if (!cpu->vcpu_dirty) { - whpx_get_registers(cpu); - } - apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, - env->tpr_access_type); - } - - return; -} - -static int whpx_vcpu_run(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - int ret; - - whpx_vcpu_process_async_events(cpu); - if (cpu->halted) { - cpu->exception_index = EXCP_HLT; - atomic_set(&cpu->exit_request, false); - return 0; - } - - qemu_mutex_unlock_iothread(); - cpu_exec_start(cpu); - - do { - if (cpu->vcpu_dirty) { - whpx_set_registers(cpu); - cpu->vcpu_dirty = false; - } - - whpx_vcpu_pre_run(cpu); - - if (atomic_read(&cpu->exit_request)) { - whpx_vcpu_kick(cpu); - } - - hr = whp_dispatch.WHvRunVirtualProcessor( - whpx->partition, cpu->cpu_index, - &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to exec a virtual processor," - " hr=%08lx", hr); - ret = -1; - break; - } - - whpx_vcpu_post_run(cpu); - - switch (vcpu->exit_ctx.ExitReason) { - case WHvRunVpExitReasonMemoryAccess: - ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); - break; - - case WHvRunVpExitReasonX64IoPortAccess: - ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); - break; - - case WHvRunVpExitReasonX64InterruptWindow: - vcpu->window_registered = 0; - ret = 0; - break; - - case WHvRunVpExitReasonX64Halt: - ret = whpx_handle_halt(cpu); - break; - - case WHvRunVpExitReasonCanceled: - cpu->exception_index = EXCP_INTERRUPT; - ret = 1; - break; - - case WHvRunVpExitReasonX64MsrAccess: { - WHV_REGISTER_VALUE reg_values[3] = {0}; - WHV_REGISTER_NAME reg_names[3]; - UINT32 reg_count; - - reg_names[0] = WHvX64RegisterRip; - reg_names[1] = WHvX64RegisterRax; - reg_names[2] = WHvX64RegisterRdx; - - reg_values[0].Reg64 = - vcpu->exit_ctx.VpContext.Rip + - vcpu->exit_ctx.VpContext.InstructionLength; - - /* - * For all unsupported MSR access we: - * ignore writes - * return 0 on read. - */ - reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ? - 1 : 3; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, - cpu->cpu_index, - reg_names, reg_count, - reg_values); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set MsrAccess state " - " registers, hr=%08lx", hr); - } - ret = 0; - break; - } - case WHvRunVpExitReasonX64Cpuid: { - WHV_REGISTER_VALUE reg_values[5]; - WHV_REGISTER_NAME reg_names[5]; - UINT32 reg_count = 5; - UINT64 rip, rax, rcx, rdx, rbx; - - memset(reg_values, 0, sizeof(reg_values)); - - rip = vcpu->exit_ctx.VpContext.Rip + - vcpu->exit_ctx.VpContext.InstructionLength; - switch (vcpu->exit_ctx.CpuidAccess.Rax) { - case 1: - rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax; - /* Advertise that we are running on a hypervisor */ - rcx = - vcpu->exit_ctx.CpuidAccess.DefaultResultRcx | - CPUID_EXT_HYPERVISOR; - - rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx; - rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx; - break; - case 0x80000001: - rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax; - /* Remove any support of OSVW */ - rcx = - vcpu->exit_ctx.CpuidAccess.DefaultResultRcx & - ~CPUID_EXT3_OSVW; - - rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx; - rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx; - break; - default: - rax = vcpu->exit_ctx.CpuidAccess.DefaultResultRax; - rcx = vcpu->exit_ctx.CpuidAccess.DefaultResultRcx; - rdx = vcpu->exit_ctx.CpuidAccess.DefaultResultRdx; - rbx = vcpu->exit_ctx.CpuidAccess.DefaultResultRbx; - } - - reg_names[0] = WHvX64RegisterRip; - reg_names[1] = WHvX64RegisterRax; - reg_names[2] = WHvX64RegisterRcx; - reg_names[3] = WHvX64RegisterRdx; - reg_names[4] = WHvX64RegisterRbx; - - reg_values[0].Reg64 = rip; - reg_values[1].Reg64 = rax; - reg_values[2].Reg64 = rcx; - reg_values[3].Reg64 = rdx; - reg_values[4].Reg64 = rbx; - - hr = whp_dispatch.WHvSetVirtualProcessorRegisters( - whpx->partition, cpu->cpu_index, - reg_names, - reg_count, - reg_values); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set CpuidAccess state registers," - " hr=%08lx", hr); - } - ret = 0; - break; - } - case WHvRunVpExitReasonNone: - case WHvRunVpExitReasonUnrecoverableException: - case WHvRunVpExitReasonInvalidVpRegisterValue: - case WHvRunVpExitReasonUnsupportedFeature: - case WHvRunVpExitReasonException: - default: - error_report("WHPX: Unexpected VP exit code %d", - vcpu->exit_ctx.ExitReason); - whpx_get_registers(cpu); - qemu_mutex_lock_iothread(); - qemu_system_guest_panicked(cpu_get_crash_info(cpu)); - qemu_mutex_unlock_iothread(); - break; - } - - } while (!ret); - - cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); - current_cpu = cpu; - - atomic_set(&cpu->exit_request, false); - - return ret < 0; -} - -static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) -{ - whpx_get_registers(cpu); - cpu->vcpu_dirty = true; -} - -static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, - run_on_cpu_data arg) -{ - whpx_set_registers(cpu); - cpu->vcpu_dirty = false; -} - -static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, - run_on_cpu_data arg) -{ - whpx_set_registers(cpu); - cpu->vcpu_dirty = false; -} - -static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, - run_on_cpu_data arg) -{ - cpu->vcpu_dirty = true; -} - -/* - * CPU support. - */ - -void whpx_cpu_synchronize_state(CPUState *cpu) -{ - if (!cpu->vcpu_dirty) { - run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); - } -} - -void whpx_cpu_synchronize_post_reset(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); -} - -void whpx_cpu_synchronize_post_init(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); -} - -void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) -{ - run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); -} - -/* - * Vcpu support. - */ - -static Error *whpx_migration_blocker; - -int whpx_init_vcpu(CPUState *cpu) -{ - HRESULT hr; - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu; - Error *local_error = NULL; - - /* Add migration blockers for all unsupported features of the - * Windows Hypervisor Platform - */ - if (whpx_migration_blocker == NULL) { - error_setg(&whpx_migration_blocker, - "State blocked due to non-migratable CPUID feature support," - "dirty memory tracking support, and XSAVE/XRSTOR support"); - - (void)migrate_add_blocker(whpx_migration_blocker, &local_error); - if (local_error) { - error_report_err(local_error); - migrate_del_blocker(whpx_migration_blocker); - error_free(whpx_migration_blocker); - return -EINVAL; - } - } - - vcpu = g_malloc0(sizeof(struct whpx_vcpu)); - - if (!vcpu) { - error_report("WHPX: Failed to allocte VCPU context."); - return -ENOMEM; - } - - hr = whp_dispatch.WHvEmulatorCreateEmulator( - &whpx_emu_callbacks, - &vcpu->emulator); - if (FAILED(hr)) { - error_report("WHPX: Failed to setup instruction completion support," - " hr=%08lx", hr); - g_free(vcpu); - return -EINVAL; - } - - hr = whp_dispatch.WHvCreateVirtualProcessor( - whpx->partition, cpu->cpu_index, 0); - if (FAILED(hr)) { - error_report("WHPX: Failed to create a virtual processor," - " hr=%08lx", hr); - whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); - g_free(vcpu); - return -EINVAL; - } - - vcpu->interruptable = true; - - cpu->vcpu_dirty = true; - cpu->hax_vcpu = (struct hax_vcpu_state *)vcpu; - - return 0; -} - -int whpx_vcpu_exec(CPUState *cpu) -{ - int ret; - int fatal; - - for (;;) { - if (cpu->exception_index >= EXCP_INTERRUPT) { - ret = cpu->exception_index; - cpu->exception_index = -1; - break; - } - - fatal = whpx_vcpu_run(cpu); - - if (fatal) { - error_report("WHPX: Failed to exec a virtual processor"); - abort(); - } - } - - return ret; -} - -void whpx_destroy_vcpu(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - struct whpx_vcpu *vcpu = get_whpx_vcpu(cpu); - - whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); - whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); - g_free(cpu->hax_vcpu); - return; -} - -void whpx_vcpu_kick(CPUState *cpu) -{ - struct whpx_state *whpx = &whpx_global; - whp_dispatch.WHvCancelRunVirtualProcessor( - whpx->partition, cpu->cpu_index, 0); -} - -/* - * Memory support. - */ - -static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, - void *host_va, int add, int rom, - const char *name) -{ - struct whpx_state *whpx = &whpx_global; - HRESULT hr; - - /* - if (add) { - printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", - (void*)start_pa, (void*)size, host_va, - (rom ? "ROM" : "RAM"), name); - } else { - printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", - (void*)start_pa, (void*)size, host_va, name); - } - */ - - if (add) { - hr = whp_dispatch.WHvMapGpaRange(whpx->partition, - host_va, - start_pa, - size, - (WHvMapGpaRangeFlagRead | - WHvMapGpaRangeFlagExecute | - (rom ? 0 : WHvMapGpaRangeFlagWrite))); - } else { - hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition, - start_pa, - size); - } - - if (FAILED(hr)) { - error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," - " Host:%p, hr=%08lx", - (add ? "MAP" : "UNMAP"), name, - (void *)(uintptr_t)start_pa, (void *)size, host_va, hr); - } -} - -static void whpx_process_section(MemoryRegionSection *section, int add) -{ - MemoryRegion *mr = section->mr; - hwaddr start_pa = section->offset_within_address_space; - ram_addr_t size = int128_get64(section->size); - unsigned int delta; - uint64_t host_va; - - if (!memory_region_is_ram(mr)) { - return; - } - - delta = qemu_real_host_page_size - (start_pa & ~qemu_real_host_page_mask); - delta &= ~qemu_real_host_page_mask; - if (delta > size) { - return; - } - start_pa += delta; - size -= delta; - size &= qemu_real_host_page_mask; - if (!size || (start_pa & ~qemu_real_host_page_mask)) { - return; - } - - host_va = (uintptr_t)memory_region_get_ram_ptr(mr) - + section->offset_within_region + delta; - - whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add, - memory_region_is_rom(mr), mr->name); -} - -static void whpx_region_add(MemoryListener *listener, - MemoryRegionSection *section) -{ - memory_region_ref(section->mr); - whpx_process_section(section, 1); -} - -static void whpx_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - whpx_process_section(section, 0); - memory_region_unref(section->mr); -} - -static void whpx_transaction_begin(MemoryListener *listener) -{ -} - -static void whpx_transaction_commit(MemoryListener *listener) -{ -} - -static void whpx_log_sync(MemoryListener *listener, - MemoryRegionSection *section) -{ - MemoryRegion *mr = section->mr; - - if (!memory_region_is_ram(mr)) { - return; - } - - memory_region_set_dirty(mr, 0, int128_get64(section->size)); -} - -static MemoryListener whpx_memory_listener = { - .begin = whpx_transaction_begin, - .commit = whpx_transaction_commit, - .region_add = whpx_region_add, - .region_del = whpx_region_del, - .log_sync = whpx_log_sync, - .priority = 10, -}; - -static void whpx_memory_init(void) -{ - memory_listener_register(&whpx_memory_listener, &address_space_memory); -} - -static void whpx_handle_interrupt(CPUState *cpu, int mask) -{ - cpu->interrupt_request |= mask; - - if (!qemu_cpu_is_self(cpu)) { - qemu_cpu_kick(cpu); - } -} - -/* - * Partition support - */ - -static int whpx_accel_init(MachineState *ms) -{ - struct whpx_state *whpx; - int ret; - HRESULT hr; - WHV_CAPABILITY whpx_cap; - UINT32 whpx_cap_size; - WHV_PARTITION_PROPERTY prop; - - whpx = &whpx_global; - - if (!init_whp_dispatch()) { - ret = -ENOSYS; - goto error; - } - - memset(whpx, 0, sizeof(struct whpx_state)); - whpx->mem_quota = ms->ram_size; - - hr = whp_dispatch.WHvGetCapability( - WHvCapabilityCodeHypervisorPresent, &whpx_cap, - sizeof(whpx_cap), &whpx_cap_size); - if (FAILED(hr) || !whpx_cap.HypervisorPresent) { - error_report("WHPX: No accelerator found, hr=%08lx", hr); - ret = -ENOSPC; - goto error; - } - - hr = whp_dispatch.WHvCreatePartition(&whpx->partition); - if (FAILED(hr)) { - error_report("WHPX: Failed to create partition, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); - prop.ProcessorCount = smp_cpus; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeProcessorCount, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set partition core count to %d," - " hr=%08lx", smp_cores, hr); - ret = -EINVAL; - goto error; - } - - memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); - prop.ExtendedVmExits.X64MsrExit = 1; - prop.ExtendedVmExits.X64CpuidExit = 1; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeExtendedVmExits, - &prop, - sizeof(WHV_PARTITION_PROPERTY)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to enable partition extended X64MsrExit and" - " X64CpuidExit hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - UINT32 cpuidExitList[] = {1, 0x80000001}; - hr = whp_dispatch.WHvSetPartitionProperty( - whpx->partition, - WHvPartitionPropertyCodeCpuidExitList, - cpuidExitList, - RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); - - if (FAILED(hr)) { - error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", - hr); - ret = -EINVAL; - goto error; - } - - hr = whp_dispatch.WHvSetupPartition(whpx->partition); - if (FAILED(hr)) { - error_report("WHPX: Failed to setup partition, hr=%08lx", hr); - ret = -EINVAL; - goto error; - } - - whpx_memory_init(); - - cpu_interrupt_handler = whpx_handle_interrupt; - - printf("Windows Hypervisor Platform accelerator is operational\n"); - return 0; - - error: - - if (NULL != whpx->partition) { - whp_dispatch.WHvDeletePartition(whpx->partition); - whpx->partition = NULL; - } - - - return ret; -} - -int whpx_enabled(void) -{ - return whpx_allowed; -} - -static void whpx_accel_class_init(ObjectClass *oc, void *data) -{ - AccelClass *ac = ACCEL_CLASS(oc); - ac->name = "WHPX"; - ac->init_machine = whpx_accel_init; - ac->allowed = &whpx_allowed; -} - -static const TypeInfo whpx_accel_type = { - .name = ACCEL_CLASS_NAME("whpx"), - .parent = TYPE_ACCEL, - .class_init = whpx_accel_class_init, -}; - -static void whpx_type_init(void) -{ - type_register_static(&whpx_accel_type); -} - -bool init_whp_dispatch(void) -{ - const char *lib_name; - HMODULE hLib; - - if (whp_dispatch_initialized) { - return true; - } - - #define WHP_LOAD_FIELD(return_type, function_name, signature) \ - whp_dispatch.function_name = \ - (function_name ## _t)GetProcAddress(hLib, #function_name); \ - if (!whp_dispatch.function_name) { \ - error_report("Could not load function %s from library %s.", \ - #function_name, lib_name); \ - goto error; \ - } \ - - lib_name = "WinHvPlatform.dll"; - hWinHvPlatform = LoadLibrary(lib_name); - if (!hWinHvPlatform) { - error_report("Could not load library %s.", lib_name); - goto error; - } - hLib = hWinHvPlatform; - LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD) - - lib_name = "WinHvEmulation.dll"; - hWinHvEmulation = LoadLibrary(lib_name); - if (!hWinHvEmulation) { - error_report("Could not load library %s.", lib_name); - goto error; - } - hLib = hWinHvEmulation; - LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) - - whp_dispatch_initialized = true; - return true; - - error: - - if (hWinHvPlatform) { - FreeLibrary(hWinHvPlatform); - } - if (hWinHvEmulation) { - FreeLibrary(hWinHvEmulation); - } - return false; -} - -type_init(whpx_type_init); diff --git a/target/i386/whpx/meson.build b/target/i386/whpx/meson.build new file mode 100644 index 0000000000..9c54aaad39 --- /dev/null +++ b/target/i386/whpx/meson.build @@ -0,0 +1,5 @@ +i386_system_ss.add(when: 'CONFIG_WHPX', if_true: files( + 'whpx-all.c', + 'whpx-apic.c', + 'whpx-accel-ops.c', +)) diff --git a/target/i386/whpx/whpx-accel-ops.c b/target/i386/whpx/whpx-accel-ops.c new file mode 100644 index 0000000000..189ae0f140 --- /dev/null +++ b/target/i386/whpx/whpx-accel-ops.c @@ -0,0 +1,115 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "sysemu/kvm_int.h" +#include "qemu/main-loop.h" +#include "sysemu/cpus.h" +#include "qemu/guest-random.h" + +#include "sysemu/whpx.h" +#include "whpx-internal.h" +#include "whpx-accel-ops.h" + +static void *whpx_cpu_thread_fn(void *arg) +{ + CPUState *cpu = arg; + int r; + + rcu_register_thread(); + + bql_lock(); + qemu_thread_get_self(cpu->thread); + cpu->thread_id = qemu_get_thread_id(); + current_cpu = cpu; + + r = whpx_init_vcpu(cpu); + if (r < 0) { + fprintf(stderr, "whpx_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } + + /* signal CPU creation */ + cpu_thread_signal_created(cpu); + qemu_guest_random_seed_thread_part2(cpu->random_seed); + + do { + if (cpu_can_run(cpu)) { + r = whpx_vcpu_exec(cpu); + if (r == EXCP_DEBUG) { + cpu_handle_guest_debug(cpu); + } + } + while (cpu_thread_is_idle(cpu)) { + qemu_cond_wait_bql(cpu->halt_cond); + } + qemu_wait_io_event_common(cpu); + } while (!cpu->unplug || cpu_can_run(cpu)); + + whpx_destroy_vcpu(cpu); + cpu_thread_signal_destroyed(cpu); + bql_unlock(); + rcu_unregister_thread(); + return NULL; +} + +static void whpx_start_vcpu_thread(CPUState *cpu) +{ + char thread_name[VCPU_THREAD_NAME_SIZE]; + + cpu->thread = g_new0(QemuThread, 1); + cpu->halt_cond = g_new0(QemuCond, 1); + qemu_cond_init(cpu->halt_cond); + snprintf(thread_name, VCPU_THREAD_NAME_SIZE, "CPU %d/WHPX", + cpu->cpu_index); + qemu_thread_create(cpu->thread, thread_name, whpx_cpu_thread_fn, + cpu, QEMU_THREAD_JOINABLE); +} + +static void whpx_kick_vcpu_thread(CPUState *cpu) +{ + if (!qemu_cpu_is_self(cpu)) { + whpx_vcpu_kick(cpu); + } +} + +static bool whpx_vcpu_thread_is_idle(CPUState *cpu) +{ + return !whpx_apic_in_platform(); +} + +static void whpx_accel_ops_class_init(ObjectClass *oc, void *data) +{ + AccelOpsClass *ops = ACCEL_OPS_CLASS(oc); + + ops->create_vcpu_thread = whpx_start_vcpu_thread; + ops->kick_vcpu_thread = whpx_kick_vcpu_thread; + ops->cpu_thread_is_idle = whpx_vcpu_thread_is_idle; + + ops->synchronize_post_reset = whpx_cpu_synchronize_post_reset; + ops->synchronize_post_init = whpx_cpu_synchronize_post_init; + ops->synchronize_state = whpx_cpu_synchronize_state; + ops->synchronize_pre_loadvm = whpx_cpu_synchronize_pre_loadvm; + ops->synchronize_pre_resume = whpx_cpu_synchronize_pre_resume; +} + +static const TypeInfo whpx_accel_ops_type = { + .name = ACCEL_OPS_NAME("whpx"), + + .parent = TYPE_ACCEL_OPS, + .class_init = whpx_accel_ops_class_init, + .abstract = true, +}; + +static void whpx_accel_ops_register_types(void) +{ + type_register_static(&whpx_accel_ops_type); +} +type_init(whpx_accel_ops_register_types); diff --git a/target/i386/whpx/whpx-accel-ops.h b/target/i386/whpx/whpx-accel-ops.h new file mode 100644 index 0000000000..7a1bb1ab57 --- /dev/null +++ b/target/i386/whpx/whpx-accel-ops.h @@ -0,0 +1,33 @@ +/* + * Accelerator CPUS Interface + * + * Copyright 2020 SUSE LLC + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#ifndef TARGET_I386_WHPX_ACCEL_OPS_H +#define TARGET_I386_WHPX_ACCEL_OPS_H + +#include "sysemu/cpus.h" + +int whpx_init_vcpu(CPUState *cpu); +int whpx_vcpu_exec(CPUState *cpu); +void whpx_destroy_vcpu(CPUState *cpu); +void whpx_vcpu_kick(CPUState *cpu); + +void whpx_cpu_synchronize_state(CPUState *cpu); +void whpx_cpu_synchronize_post_reset(CPUState *cpu); +void whpx_cpu_synchronize_post_init(CPUState *cpu); +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu); +void whpx_cpu_synchronize_pre_resume(bool step_pending); + +/* state subset only touched by the VCPU itself during runtime */ +#define WHPX_SET_RUNTIME_STATE 1 +/* state subset modified during VCPU reset */ +#define WHPX_SET_RESET_STATE 2 +/* full state set, modified during initialization or on vmload */ +#define WHPX_SET_FULL_STATE 3 + +#endif /* TARGET_I386_WHPX_ACCEL_OPS_H */ diff --git a/target/i386/whpx/whpx-all.c b/target/i386/whpx/whpx-all.c new file mode 100644 index 0000000000..31eec7048c --- /dev/null +++ b/target/i386/whpx/whpx-all.c @@ -0,0 +1,2777 @@ +/* + * QEMU Windows Hypervisor Platform accelerator (WHPX) + * + * Copyright Microsoft Corp. 2017 + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu/osdep.h" +#include "cpu.h" +#include "exec/address-spaces.h" +#include "exec/ioport.h" +#include "gdbstub/helpers.h" +#include "qemu/accel.h" +#include "sysemu/whpx.h" +#include "sysemu/cpus.h" +#include "sysemu/runstate.h" +#include "qemu/main-loop.h" +#include "hw/boards.h" +#include "hw/intc/ioapic.h" +#include "hw/i386/apic_internal.h" +#include "qemu/error-report.h" +#include "qapi/error.h" +#include "qapi/qapi-types-common.h" +#include "qapi/qapi-visit-common.h" +#include "migration/blocker.h" +#include <winerror.h> + +#include "whpx-internal.h" +#include "whpx-accel-ops.h" + +#include <winhvplatform.h> +#include <winhvemulation.h> + +#define HYPERV_APIC_BUS_FREQUENCY (200000000ULL) + +static const WHV_REGISTER_NAME whpx_register_names[] = { + + /* X64 General purpose registers */ + WHvX64RegisterRax, + WHvX64RegisterRcx, + WHvX64RegisterRdx, + WHvX64RegisterRbx, + WHvX64RegisterRsp, + WHvX64RegisterRbp, + WHvX64RegisterRsi, + WHvX64RegisterRdi, + WHvX64RegisterR8, + WHvX64RegisterR9, + WHvX64RegisterR10, + WHvX64RegisterR11, + WHvX64RegisterR12, + WHvX64RegisterR13, + WHvX64RegisterR14, + WHvX64RegisterR15, + WHvX64RegisterRip, + WHvX64RegisterRflags, + + /* X64 Segment registers */ + WHvX64RegisterEs, + WHvX64RegisterCs, + WHvX64RegisterSs, + WHvX64RegisterDs, + WHvX64RegisterFs, + WHvX64RegisterGs, + WHvX64RegisterLdtr, + WHvX64RegisterTr, + + /* X64 Table registers */ + WHvX64RegisterIdtr, + WHvX64RegisterGdtr, + + /* X64 Control Registers */ + WHvX64RegisterCr0, + WHvX64RegisterCr2, + WHvX64RegisterCr3, + WHvX64RegisterCr4, + WHvX64RegisterCr8, + + /* X64 Debug Registers */ + /* + * WHvX64RegisterDr0, + * WHvX64RegisterDr1, + * WHvX64RegisterDr2, + * WHvX64RegisterDr3, + * WHvX64RegisterDr6, + * WHvX64RegisterDr7, + */ + + /* X64 Floating Point and Vector Registers */ + WHvX64RegisterXmm0, + WHvX64RegisterXmm1, + WHvX64RegisterXmm2, + WHvX64RegisterXmm3, + WHvX64RegisterXmm4, + WHvX64RegisterXmm5, + WHvX64RegisterXmm6, + WHvX64RegisterXmm7, + WHvX64RegisterXmm8, + WHvX64RegisterXmm9, + WHvX64RegisterXmm10, + WHvX64RegisterXmm11, + WHvX64RegisterXmm12, + WHvX64RegisterXmm13, + WHvX64RegisterXmm14, + WHvX64RegisterXmm15, + WHvX64RegisterFpMmx0, + WHvX64RegisterFpMmx1, + WHvX64RegisterFpMmx2, + WHvX64RegisterFpMmx3, + WHvX64RegisterFpMmx4, + WHvX64RegisterFpMmx5, + WHvX64RegisterFpMmx6, + WHvX64RegisterFpMmx7, + WHvX64RegisterFpControlStatus, + WHvX64RegisterXmmControlStatus, + + /* X64 MSRs */ + WHvX64RegisterEfer, +#ifdef TARGET_X86_64 + WHvX64RegisterKernelGsBase, +#endif + WHvX64RegisterApicBase, + /* WHvX64RegisterPat, */ + WHvX64RegisterSysenterCs, + WHvX64RegisterSysenterEip, + WHvX64RegisterSysenterEsp, + WHvX64RegisterStar, +#ifdef TARGET_X86_64 + WHvX64RegisterLstar, + WHvX64RegisterCstar, + WHvX64RegisterSfmask, +#endif + + /* Interrupt / Event Registers */ + /* + * WHvRegisterPendingInterruption, + * WHvRegisterInterruptState, + * WHvRegisterPendingEvent0, + * WHvRegisterPendingEvent1 + * WHvX64RegisterDeliverabilityNotifications, + */ +}; + +struct whpx_register_set { + WHV_REGISTER_VALUE values[RTL_NUMBER_OF(whpx_register_names)]; +}; + +/* + * The current implementation of instruction stepping sets the TF flag + * in RFLAGS, causing the CPU to raise an INT1 after each instruction. + * This corresponds to the WHvX64ExceptionTypeDebugTrapOrFault exception. + * + * This approach has a few limitations: + * 1. Stepping over a PUSHF/SAHF instruction will save the TF flag + * along with the other flags, possibly restoring it later. It would + * result in another INT1 when the flags are restored, triggering + * a stop in gdb that could be cleared by doing another step. + * + * Stepping over a POPF/LAHF instruction will let it overwrite the + * TF flags, ending the stepping mode. + * + * 2. Stepping over an instruction raising an exception (e.g. INT, DIV, + * or anything that could result in a page fault) will save the flags + * to the stack, clear the TF flag, and let the guest execute the + * handler. Normally, the guest will restore the original flags, + * that will continue single-stepping. + * + * 3. Debuggers running on the guest may wish to set TF to do instruction + * stepping. INT1 events generated by it would be intercepted by us, + * as long as the gdb is connected to QEMU. + * + * In practice this means that: + * 1. Stepping through flags-modifying instructions may cause gdb to + * continue or stop in unexpected places. This will be fully recoverable + * and will not crash the target. + * + * 2. Stepping over an instruction that triggers an exception will step + * over the exception handler, not into it. + * + * 3. Debugging the guest via gdb, while running debugger on the guest + * at the same time may lead to unexpected effects. Removing all + * breakpoints set via QEMU will prevent any further interference + * with the guest-level debuggers. + * + * The limitations can be addressed as shown below: + * 1. PUSHF/SAHF/POPF/LAHF/IRET instructions can be emulated instead of + * stepping through them. The exact semantics of the instructions is + * defined in the "Combined Volume Set of Intel 64 and IA-32 + * Architectures Software Developer's Manuals", however it involves a + * fair amount of corner cases due to compatibility with real mode, + * virtual 8086 mode, and differences between 64-bit and 32-bit modes. + * + * 2. We could step into the guest's exception handlers using the following + * sequence: + * a. Temporarily enable catching of all exception types via + * whpx_set_exception_exit_bitmap(). + * b. Once an exception is intercepted, read the IDT/GDT and locate + * the original handler. + * c. Patch the original handler, injecting an INT3 at the beginning. + * d. Update the exception exit bitmap to only catch the + * WHvX64ExceptionTypeBreakpointTrap exception. + * e. Let the affected CPU run in the exclusive mode. + * f. Restore the original handler and the exception exit bitmap. + * Note that handling all corner cases related to IDT/GDT is harder + * than it may seem. See x86_cpu_get_phys_page_attrs_debug() for a + * rough idea. + * + * 3. In order to properly support guest-level debugging in parallel with + * the QEMU-level debugging, we would need to be able to pass some INT1 + * events to the guest. This could be done via the following methods: + * a. Using the WHvRegisterPendingEvent register. As of Windows 21H1, + * it seems to only work for interrupts and not software + * exceptions. + * b. Locating and patching the original handler by parsing IDT/GDT. + * This involves relatively complex logic outlined in the previous + * paragraph. + * c. Emulating the exception invocation (i.e. manually updating RIP, + * RFLAGS, and pushing the old values to stack). This is even more + * complicated than the previous option, since it involves checking + * CPL, gate attributes, and doing various adjustments depending + * on the current CPU mode, whether the CPL is changing, etc. + */ +typedef enum WhpxStepMode { + WHPX_STEP_NONE = 0, + /* Halt other VCPUs */ + WHPX_STEP_EXCLUSIVE, +} WhpxStepMode; + +struct AccelCPUState { + WHV_EMULATOR_HANDLE emulator; + bool window_registered; + bool interruptable; + bool ready_for_pic_interrupt; + uint64_t tpr; + uint64_t apic_base; + bool interruption_pending; + + /* Must be the last field as it may have a tail */ + WHV_RUN_VP_EXIT_CONTEXT exit_ctx; +}; + +static bool whpx_allowed; +static bool whp_dispatch_initialized; +static HMODULE hWinHvPlatform, hWinHvEmulation; +static uint32_t max_vcpu_index; +static WHV_PROCESSOR_XSAVE_FEATURES whpx_xsave_cap; + +struct whpx_state whpx_global; +struct WHPDispatch whp_dispatch; + +static bool whpx_has_xsave(void) +{ + return whpx_xsave_cap.XsaveSupport; +} + +static WHV_X64_SEGMENT_REGISTER whpx_seg_q2h(const SegmentCache *qs, int v86, + int r86) +{ + WHV_X64_SEGMENT_REGISTER hs; + unsigned flags = qs->flags; + + hs.Base = qs->base; + hs.Limit = qs->limit; + hs.Selector = qs->selector; + + if (v86) { + hs.Attributes = 0; + hs.SegmentType = 3; + hs.Present = 1; + hs.DescriptorPrivilegeLevel = 3; + hs.NonSystemSegment = 1; + + } else { + hs.Attributes = (flags >> DESC_TYPE_SHIFT); + + if (r86) { + /* hs.Base &= 0xfffff; */ + } + } + + return hs; +} + +static SegmentCache whpx_seg_h2q(const WHV_X64_SEGMENT_REGISTER *hs) +{ + SegmentCache qs; + + qs.base = hs->Base; + qs.limit = hs->Limit; + qs.selector = hs->Selector; + + qs.flags = ((uint32_t)hs->Attributes) << DESC_TYPE_SHIFT; + + return qs; +} + +/* X64 Extended Control Registers */ +static void whpx_set_xcrs(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + WHV_REGISTER_VALUE xcr0; + WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0; + + if (!whpx_has_xsave()) { + return; + } + + /* Only xcr0 is supported by the hypervisor currently */ + xcr0.Reg64 = cpu_env(cpu)->xcr0; + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0); + if (FAILED(hr)) { + error_report("WHPX: Failed to set register xcr0, hr=%08lx", hr); + } +} + +static int whpx_set_tsc(CPUState *cpu) +{ + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; + WHV_REGISTER_VALUE tsc_val; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + /* + * Suspend the partition prior to setting the TSC to reduce the variance + * in TSC across vCPUs. When the first vCPU runs post suspend, the + * partition is automatically resumed. + */ + if (whp_dispatch.WHvSuspendPartitionTime) { + + /* + * Unable to suspend partition while setting TSC is not a fatal + * error. It just increases the likelihood of TSC variance between + * vCPUs and some guest OS are able to handle that just fine. + */ + hr = whp_dispatch.WHvSuspendPartitionTime(whpx->partition); + if (FAILED(hr)) { + warn_report("WHPX: Failed to suspend partition, hr=%08lx", hr); + } + } + + tsc_val.Reg64 = cpu_env(cpu)->tsc; + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); + if (FAILED(hr)) { + error_report("WHPX: Failed to set TSC, hr=%08lx", hr); + return -1; + } + + return 0; +} + +/* + * The CR8 register in the CPU is mapped to the TPR register of the APIC, + * however, they use a slightly different encoding. Specifically: + * + * APIC.TPR[bits 7:4] = CR8[bits 3:0] + * + * This mechanism is described in section 10.8.6.1 of Volume 3 of Intel 64 + * and IA-32 Architectures Software Developer's Manual. + * + * The functions below translate the value of CR8 to TPR and vice versa. + */ + +static uint64_t whpx_apic_tpr_to_cr8(uint64_t tpr) +{ + return tpr >> 4; +} + +static uint64_t whpx_cr8_to_apic_tpr(uint64_t cr8) +{ + return cr8 << 4; +} + +static void whpx_set_registers(CPUState *cpu, int level) +{ + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = cpu->accel; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct whpx_register_set vcxt; + HRESULT hr; + int idx; + int idx_next; + int i; + int v86, r86; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + /* + * Following MSRs have side effects on the guest or are too heavy for + * runtime. Limit them to full state update. + */ + if (level >= WHPX_SET_RESET_STATE) { + whpx_set_tsc(cpu); + } + + memset(&vcxt, 0, sizeof(struct whpx_register_set)); + + v86 = (env->eflags & VM_MASK); + r86 = !(env->cr[0] & CR0_PE_MASK); + + vcpu->tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); + vcpu->apic_base = cpu_get_apic_base(x86_cpu->apic_state); + + idx = 0; + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + idx_next = 16; + for (idx = 0; idx < CPU_NB_REGS; idx += 1) { + vcxt.values[idx].Reg64 = (uint64_t)env->regs[idx]; + } + idx = idx_next; + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + vcxt.values[idx++].Reg64 = env->eip; + + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + vcxt.values[idx++].Reg64 = env->eflags; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + vcxt.values[idx].Segment = whpx_seg_q2h(&env->segs[i], v86, r86); + } + + assert(idx == WHvX64RegisterLdtr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->ldt, 0, 0); + + assert(idx == WHvX64RegisterTr); + vcxt.values[idx++].Segment = whpx_seg_q2h(&env->tr, 0, 0); + + assert(idx == WHvX64RegisterIdtr); + vcxt.values[idx].Table.Base = env->idt.base; + vcxt.values[idx].Table.Limit = env->idt.limit; + idx += 1; + + assert(idx == WHvX64RegisterGdtr); + vcxt.values[idx].Table.Base = env->gdt.base; + vcxt.values[idx].Table.Limit = env->gdt.limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + vcxt.values[idx++].Reg64 = env->cr[0]; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + vcxt.values[idx++].Reg64 = env->cr[2]; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + vcxt.values[idx++].Reg64 = env->cr[3]; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + vcxt.values[idx++].Reg64 = env->cr[4]; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + vcxt.values[idx++].Reg64 = vcpu->tpr; + + /* 8 Debug Registers - Skipped */ + + /* + * Extended control registers needs to be handled separately depending + * on whether xsave is supported/enabled or not. + */ + whpx_set_xcrs(cpu); + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + idx_next = idx + 16; + for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { + vcxt.values[idx].Reg128.Low64 = env->xmm_regs[i].ZMM_Q(0); + vcxt.values[idx].Reg128.High64 = env->xmm_regs[i].ZMM_Q(1); + } + idx = idx_next; + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + vcxt.values[idx].Fp.AsUINT128.Low64 = env->fpregs[i].mmx.MMX_Q(0); + /* vcxt.values[idx].Fp.AsUINT128.High64 = + env->fpregs[i].mmx.MMX_Q(1); + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + vcxt.values[idx].FpControlStatus.FpControl = env->fpuc; + vcxt.values[idx].FpControlStatus.FpStatus = + (env->fpus & ~0x3800) | (env->fpstt & 0x7) << 11; + vcxt.values[idx].FpControlStatus.FpTag = 0; + for (i = 0; i < 8; ++i) { + vcxt.values[idx].FpControlStatus.FpTag |= (!env->fptags[i]) << i; + } + vcxt.values[idx].FpControlStatus.Reserved = 0; + vcxt.values[idx].FpControlStatus.LastFpOp = env->fpop; + vcxt.values[idx].FpControlStatus.LastFpRip = env->fpip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + vcxt.values[idx].XmmControlStatus.LastFpRdp = 0; + vcxt.values[idx].XmmControlStatus.XmmStatusControl = env->mxcsr; + vcxt.values[idx].XmmControlStatus.XmmStatusControlMask = 0x0000ffff; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + vcxt.values[idx++].Reg64 = env->efer; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + vcxt.values[idx++].Reg64 = env->kernelgsbase; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + vcxt.values[idx++].Reg64 = vcpu->apic_base; + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + vcxt.values[idx++].Reg64 = env->sysenter_cs; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + vcxt.values[idx++].Reg64 = env->sysenter_eip; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + vcxt.values[idx++].Reg64 = env->sysenter_esp; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + vcxt.values[idx++].Reg64 = env->star; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + vcxt.values[idx++].Reg64 = env->lstar; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + vcxt.values[idx++].Reg64 = env->cstar; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + vcxt.values[idx++].Reg64 = env->fmask; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor context, hr=%08lx", + hr); + } + + return; +} + +static int whpx_get_tsc(CPUState *cpu) +{ + WHV_REGISTER_NAME tsc_reg = WHvX64RegisterTsc; + WHV_REGISTER_VALUE tsc_val; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &tsc_reg, 1, &tsc_val); + if (FAILED(hr)) { + error_report("WHPX: Failed to get TSC, hr=%08lx", hr); + return -1; + } + + cpu_env(cpu)->tsc = tsc_val.Reg64; + return 0; +} + +/* X64 Extended Control Registers */ +static void whpx_get_xcrs(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + WHV_REGISTER_VALUE xcr0; + WHV_REGISTER_NAME xcr0_name = WHvX64RegisterXCr0; + + if (!whpx_has_xsave()) { + return; + } + + /* Only xcr0 is supported by the hypervisor currently */ + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, &xcr0_name, 1, &xcr0); + if (FAILED(hr)) { + error_report("WHPX: Failed to get register xcr0, hr=%08lx", hr); + return; + } + + cpu_env(cpu)->xcr0 = xcr0.Reg64; +} + +static void whpx_get_registers(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = cpu->accel; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + struct whpx_register_set vcxt; + uint64_t tpr, apic_base; + HRESULT hr; + int idx; + int idx_next; + int i; + + assert(cpu_is_stopped(cpu) || qemu_cpu_is_self(cpu)); + + if (!env->tsc_valid) { + whpx_get_tsc(cpu); + env->tsc_valid = !runstate_is_running(); + } + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + whpx_register_names, + RTL_NUMBER_OF(whpx_register_names), + &vcxt.values[0]); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor context, hr=%08lx", + hr); + } + + if (whpx_apic_in_platform()) { + /* + * Fetch the TPR value from the emulated APIC. It may get overwritten + * below with the value from CR8 returned by + * WHvGetVirtualProcessorRegisters(). + */ + whpx_apic_get(x86_cpu->apic_state); + vcpu->tpr = whpx_apic_tpr_to_cr8( + cpu_get_apic_tpr(x86_cpu->apic_state)); + } + + idx = 0; + + /* Indexes for first 16 registers match between HV and QEMU definitions */ + idx_next = 16; + for (idx = 0; idx < CPU_NB_REGS; idx += 1) { + env->regs[idx] = vcxt.values[idx].Reg64; + } + idx = idx_next; + + /* Same goes for RIP and RFLAGS */ + assert(whpx_register_names[idx] == WHvX64RegisterRip); + env->eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterRflags); + env->eflags = vcxt.values[idx++].Reg64; + + /* Translate 6+4 segment registers. HV and QEMU order matches */ + assert(idx == WHvX64RegisterEs); + for (i = 0; i < 6; i += 1, idx += 1) { + env->segs[i] = whpx_seg_h2q(&vcxt.values[idx].Segment); + } + + assert(idx == WHvX64RegisterLdtr); + env->ldt = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterTr); + env->tr = whpx_seg_h2q(&vcxt.values[idx++].Segment); + assert(idx == WHvX64RegisterIdtr); + env->idt.base = vcxt.values[idx].Table.Base; + env->idt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + assert(idx == WHvX64RegisterGdtr); + env->gdt.base = vcxt.values[idx].Table.Base; + env->gdt.limit = vcxt.values[idx].Table.Limit; + idx += 1; + + /* CR0, 2, 3, 4, 8 */ + assert(whpx_register_names[idx] == WHvX64RegisterCr0); + env->cr[0] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr2); + env->cr[2] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr3); + env->cr[3] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr4); + env->cr[4] = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCr8); + tpr = vcxt.values[idx++].Reg64; + if (tpr != vcpu->tpr) { + vcpu->tpr = tpr; + cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(tpr)); + } + + /* 8 Debug Registers - Skipped */ + + /* + * Extended control registers needs to be handled separately depending + * on whether xsave is supported/enabled or not. + */ + whpx_get_xcrs(cpu); + + /* 16 XMM registers */ + assert(whpx_register_names[idx] == WHvX64RegisterXmm0); + idx_next = idx + 16; + for (i = 0; i < sizeof(env->xmm_regs) / sizeof(ZMMReg); i += 1, idx += 1) { + env->xmm_regs[i].ZMM_Q(0) = vcxt.values[idx].Reg128.Low64; + env->xmm_regs[i].ZMM_Q(1) = vcxt.values[idx].Reg128.High64; + } + idx = idx_next; + + /* 8 FP registers */ + assert(whpx_register_names[idx] == WHvX64RegisterFpMmx0); + for (i = 0; i < 8; i += 1, idx += 1) { + env->fpregs[i].mmx.MMX_Q(0) = vcxt.values[idx].Fp.AsUINT128.Low64; + /* env->fpregs[i].mmx.MMX_Q(1) = + vcxt.values[idx].Fp.AsUINT128.High64; + */ + } + + /* FP control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterFpControlStatus); + env->fpuc = vcxt.values[idx].FpControlStatus.FpControl; + env->fpstt = (vcxt.values[idx].FpControlStatus.FpStatus >> 11) & 0x7; + env->fpus = vcxt.values[idx].FpControlStatus.FpStatus & ~0x3800; + for (i = 0; i < 8; ++i) { + env->fptags[i] = !((vcxt.values[idx].FpControlStatus.FpTag >> i) & 1); + } + env->fpop = vcxt.values[idx].FpControlStatus.LastFpOp; + env->fpip = vcxt.values[idx].FpControlStatus.LastFpRip; + idx += 1; + + /* XMM control status register */ + assert(whpx_register_names[idx] == WHvX64RegisterXmmControlStatus); + env->mxcsr = vcxt.values[idx].XmmControlStatus.XmmStatusControl; + idx += 1; + + /* MSRs */ + assert(whpx_register_names[idx] == WHvX64RegisterEfer); + env->efer = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterKernelGsBase); + env->kernelgsbase = vcxt.values[idx++].Reg64; +#endif + + assert(whpx_register_names[idx] == WHvX64RegisterApicBase); + apic_base = vcxt.values[idx++].Reg64; + if (apic_base != vcpu->apic_base) { + vcpu->apic_base = apic_base; + cpu_set_apic_base(x86_cpu->apic_state, vcpu->apic_base); + } + + /* WHvX64RegisterPat - Skipped */ + + assert(whpx_register_names[idx] == WHvX64RegisterSysenterCs); + env->sysenter_cs = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEip); + env->sysenter_eip = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSysenterEsp); + env->sysenter_esp = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterStar); + env->star = vcxt.values[idx++].Reg64; +#ifdef TARGET_X86_64 + assert(whpx_register_names[idx] == WHvX64RegisterLstar); + env->lstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterCstar); + env->cstar = vcxt.values[idx++].Reg64; + assert(whpx_register_names[idx] == WHvX64RegisterSfmask); + env->fmask = vcxt.values[idx++].Reg64; +#endif + + /* Interrupt / Event Registers - Skipped */ + + assert(idx == RTL_NUMBER_OF(whpx_register_names)); + + if (whpx_apic_in_platform()) { + whpx_apic_get(x86_cpu->apic_state); + } + + x86_update_hflags(env); + + return; +} + +static HRESULT CALLBACK whpx_emu_ioport_callback( + void *ctx, + WHV_EMULATOR_IO_ACCESS_INFO *IoAccess) +{ + MemTxAttrs attrs = { 0 }; + address_space_rw(&address_space_io, IoAccess->Port, attrs, + &IoAccess->Data, IoAccess->AccessSize, + IoAccess->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_mmio_callback( + void *ctx, + WHV_EMULATOR_MEMORY_ACCESS_INFO *ma) +{ + cpu_physical_memory_rw(ma->GpaAddress, ma->Data, ma->AccessSize, + ma->Direction); + return S_OK; +} + +static HRESULT CALLBACK whpx_emu_getreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to get virtual processor registers," + " hr=%08lx", hr); + } + + return hr; +} + +static HRESULT CALLBACK whpx_emu_setreg_callback( + void *ctx, + const WHV_REGISTER_NAME *RegisterNames, + UINT32 RegisterCount, + const WHV_REGISTER_VALUE *RegisterValues) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + RegisterNames, RegisterCount, + RegisterValues); + if (FAILED(hr)) { + error_report("WHPX: Failed to set virtual processor registers," + " hr=%08lx", hr); + } + + /* + * The emulator just successfully wrote the register state. We clear the + * dirty state so we avoid the double write on resume of the VP. + */ + cpu->vcpu_dirty = false; + + return hr; +} + +static HRESULT CALLBACK whpx_emu_translate_callback( + void *ctx, + WHV_GUEST_VIRTUAL_ADDRESS Gva, + WHV_TRANSLATE_GVA_FLAGS TranslateFlags, + WHV_TRANSLATE_GVA_RESULT_CODE *TranslationResult, + WHV_GUEST_PHYSICAL_ADDRESS *Gpa) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + CPUState *cpu = (CPUState *)ctx; + WHV_TRANSLATE_GVA_RESULT res; + + hr = whp_dispatch.WHvTranslateGva(whpx->partition, cpu->cpu_index, + Gva, TranslateFlags, &res, Gpa); + if (FAILED(hr)) { + error_report("WHPX: Failed to translate GVA, hr=%08lx", hr); + } else { + *TranslationResult = res.ResultCode; + } + + return hr; +} + +static const WHV_EMULATOR_CALLBACKS whpx_emu_callbacks = { + .Size = sizeof(WHV_EMULATOR_CALLBACKS), + .WHvEmulatorIoPortCallback = whpx_emu_ioport_callback, + .WHvEmulatorMemoryCallback = whpx_emu_mmio_callback, + .WHvEmulatorGetVirtualProcessorRegisters = whpx_emu_getreg_callback, + .WHvEmulatorSetVirtualProcessorRegisters = whpx_emu_setreg_callback, + .WHvEmulatorTranslateGvaPage = whpx_emu_translate_callback, +}; + +static int whpx_handle_mmio(CPUState *cpu, WHV_MEMORY_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + AccelCPUState *vcpu = cpu->accel; + WHV_EMULATOR_STATUS emu_status; + + hr = whp_dispatch.WHvEmulatorTryMmioEmulation( + vcpu->emulator, cpu, + &vcpu->exit_ctx.VpContext, ctx, + &emu_status); + if (FAILED(hr)) { + error_report("WHPX: Failed to parse MMIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + error_report("WHPX: Failed to emulate MMIO access with" + " EmulatorReturnStatus: %u", emu_status.AsUINT32); + return -1; + } + + return 0; +} + +static int whpx_handle_portio(CPUState *cpu, + WHV_X64_IO_PORT_ACCESS_CONTEXT *ctx) +{ + HRESULT hr; + AccelCPUState *vcpu = cpu->accel; + WHV_EMULATOR_STATUS emu_status; + + hr = whp_dispatch.WHvEmulatorTryIoEmulation( + vcpu->emulator, cpu, + &vcpu->exit_ctx.VpContext, ctx, + &emu_status); + if (FAILED(hr)) { + error_report("WHPX: Failed to parse PortIO access, hr=%08lx", hr); + return -1; + } + + if (!emu_status.EmulationSuccessful) { + error_report("WHPX: Failed to emulate PortIO access with" + " EmulatorReturnStatus: %u", emu_status.AsUINT32); + return -1; + } + + return 0; +} + +/* + * Controls whether we should intercept various exceptions on the guest, + * namely breakpoint/single-step events. + * + * The 'exceptions' argument accepts a bitmask, e.g: + * (1 << WHvX64ExceptionTypeDebugTrapOrFault) | (...) + */ +static HRESULT whpx_set_exception_exit_bitmap(UINT64 exceptions) +{ + struct whpx_state *whpx = &whpx_global; + WHV_PARTITION_PROPERTY prop = { 0, }; + HRESULT hr; + + if (exceptions == whpx->exception_exit_bitmap) { + return S_OK; + } + + prop.ExceptionExitBitmap = exceptions; + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeExceptionExitBitmap, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + + if (SUCCEEDED(hr)) { + whpx->exception_exit_bitmap = exceptions; + } + + return hr; +} + + +/* + * This function is called before/after stepping over a single instruction. + * It will update the CPU registers to arm/disarm the instruction stepping + * accordingly. + */ +static HRESULT whpx_vcpu_configure_single_stepping(CPUState *cpu, + bool set, + uint64_t *exit_context_rflags) +{ + WHV_REGISTER_NAME reg_name; + WHV_REGISTER_VALUE reg_value; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + /* + * If we are trying to step over a single instruction, we need to set the + * TF bit in rflags. Otherwise, clear it. + */ + reg_name = WHvX64RegisterRflags; + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to get rflags, hr=%08lx", hr); + return hr; + } + + if (exit_context_rflags) { + assert(*exit_context_rflags == reg_value.Reg64); + } + + if (set) { + /* Raise WHvX64ExceptionTypeDebugTrapOrFault after each instruction */ + reg_value.Reg64 |= TF_MASK; + } else { + reg_value.Reg64 &= ~TF_MASK; + } + + if (exit_context_rflags) { + *exit_context_rflags = reg_value.Reg64; + } + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set rflags," + " hr=%08lx", + hr); + return hr; + } + + reg_name = WHvRegisterInterruptState; + reg_value.Reg64 = 0; + + /* Suspend delivery of hardware interrupts during single-stepping. */ + reg_value.InterruptState.InterruptShadow = set != 0; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set InterruptState," + " hr=%08lx", + hr); + return hr; + } + + if (!set) { + /* + * We have just finished stepping over a single instruction, + * and intercepted the INT1 generated by it. + * We need to now hide the INT1 from the guest, + * as it would not be expecting it. + */ + + reg_name = WHvX64RegisterPendingDebugException; + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to get pending debug exceptions," + "hr=%08lx", hr); + return hr; + } + + if (reg_value.PendingDebugException.SingleStep) { + reg_value.PendingDebugException.SingleStep = 0; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to clear pending debug exceptions," + "hr=%08lx", hr); + return hr; + } + } + + } + + return S_OK; +} + +/* Tries to find a breakpoint at the specified address. */ +static struct whpx_breakpoint *whpx_lookup_breakpoint_by_addr(uint64_t address) +{ + struct whpx_state *whpx = &whpx_global; + int i; + + if (whpx->breakpoints.breakpoints) { + for (i = 0; i < whpx->breakpoints.breakpoints->used; i++) { + if (address == whpx->breakpoints.breakpoints->data[i].address) { + return &whpx->breakpoints.breakpoints->data[i]; + } + } + } + + return NULL; +} + +/* + * Linux uses int3 (0xCC) during startup (see int3_selftest()) and for + * debugging user-mode applications. Since the WHPX API does not offer + * an easy way to pass the intercepted exception back to the guest, we + * resort to using INT1 instead, and let the guest always handle INT3. + */ +static const uint8_t whpx_breakpoint_instruction = 0xF1; + +/* + * The WHPX QEMU backend implements breakpoints by writing the INT1 + * instruction into memory (ignoring the DRx registers). This raises a few + * issues that need to be carefully handled: + * + * 1. Although unlikely, other parts of QEMU may set multiple breakpoints + * at the same location, and later remove them in arbitrary order. + * This should not cause memory corruption, and should only remove the + * physical breakpoint instruction when the last QEMU breakpoint is gone. + * + * 2. Writing arbitrary virtual memory may fail if it's not mapped to a valid + * physical location. Hence, physically adding/removing a breakpoint can + * theoretically fail at any time. We need to keep track of it. + * + * The function below rebuilds a list of low-level breakpoints (one per + * address, tracking the original instruction and any errors) from the list of + * high-level breakpoints (set via cpu_breakpoint_insert()). + * + * In order to optimize performance, this function stores the list of + * high-level breakpoints (a.k.a. CPU breakpoints) used to compute the + * low-level ones, so that it won't be re-invoked until these breakpoints + * change. + * + * Note that this function decides which breakpoints should be inserted into, + * memory, but doesn't actually do it. The memory accessing is done in + * whpx_apply_breakpoints(). + */ +static void whpx_translate_cpu_breakpoints( + struct whpx_breakpoints *breakpoints, + CPUState *cpu, + int cpu_breakpoint_count) +{ + CPUBreakpoint *bp; + int cpu_bp_index = 0; + + breakpoints->original_addresses = + g_renew(vaddr, breakpoints->original_addresses, cpu_breakpoint_count); + + breakpoints->original_address_count = cpu_breakpoint_count; + + int max_breakpoints = cpu_breakpoint_count + + (breakpoints->breakpoints ? breakpoints->breakpoints->used : 0); + + struct whpx_breakpoint_collection *new_breakpoints = + g_malloc0(sizeof(struct whpx_breakpoint_collection) + + max_breakpoints * sizeof(struct whpx_breakpoint)); + + new_breakpoints->allocated = max_breakpoints; + new_breakpoints->used = 0; + + /* + * 1. Preserve all old breakpoints that could not be automatically + * cleared when the CPU got stopped. + */ + if (breakpoints->breakpoints) { + int i; + for (i = 0; i < breakpoints->breakpoints->used; i++) { + if (breakpoints->breakpoints->data[i].state != WHPX_BP_CLEARED) { + new_breakpoints->data[new_breakpoints->used++] = + breakpoints->breakpoints->data[i]; + } + } + } + + /* 2. Map all CPU breakpoints to WHPX breakpoints */ + QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { + int i; + bool found = false; + + /* This will be used to detect changed CPU breakpoints later. */ + breakpoints->original_addresses[cpu_bp_index++] = bp->pc; + + for (i = 0; i < new_breakpoints->used; i++) { + /* + * WARNING: This loop has O(N^2) complexity, where N is the + * number of breakpoints. It should not be a bottleneck in + * real-world scenarios, since it only needs to run once after + * the breakpoints have been modified. + * If this ever becomes a concern, it can be optimized by storing + * high-level breakpoint objects in a tree or hash map. + */ + + if (new_breakpoints->data[i].address == bp->pc) { + /* There was already a breakpoint at this address. */ + if (new_breakpoints->data[i].state == WHPX_BP_CLEAR_PENDING) { + new_breakpoints->data[i].state = WHPX_BP_SET; + } else if (new_breakpoints->data[i].state == WHPX_BP_SET) { + new_breakpoints->data[i].state = WHPX_BP_SET_PENDING; + } + + found = true; + break; + } + } + + if (!found && new_breakpoints->used < new_breakpoints->allocated) { + /* No WHPX breakpoint at this address. Create one. */ + new_breakpoints->data[new_breakpoints->used].address = bp->pc; + new_breakpoints->data[new_breakpoints->used].state = + WHPX_BP_SET_PENDING; + new_breakpoints->used++; + } + } + + /* + * Free the previous breakpoint list. This can be optimized by keeping + * it as shadow buffer for the next computation instead of freeing + * it immediately. + */ + g_free(breakpoints->breakpoints); + + breakpoints->breakpoints = new_breakpoints; +} + +/* + * Physically inserts/removes the breakpoints by reading and writing the + * physical memory, keeping a track of the failed attempts. + * + * Passing resuming=true will try to set all previously unset breakpoints. + * Passing resuming=false will remove all inserted ones. + */ +static void whpx_apply_breakpoints( + struct whpx_breakpoint_collection *breakpoints, + CPUState *cpu, + bool resuming) +{ + int i, rc; + if (!breakpoints) { + return; + } + + for (i = 0; i < breakpoints->used; i++) { + /* Decide what to do right now based on the last known state. */ + WhpxBreakpointState state = breakpoints->data[i].state; + switch (state) { + case WHPX_BP_CLEARED: + if (resuming) { + state = WHPX_BP_SET_PENDING; + } + break; + case WHPX_BP_SET_PENDING: + if (!resuming) { + state = WHPX_BP_CLEARED; + } + break; + case WHPX_BP_SET: + if (!resuming) { + state = WHPX_BP_CLEAR_PENDING; + } + break; + case WHPX_BP_CLEAR_PENDING: + if (resuming) { + state = WHPX_BP_SET; + } + break; + } + + if (state == WHPX_BP_SET_PENDING) { + /* Remember the original instruction. */ + rc = cpu_memory_rw_debug(cpu, + breakpoints->data[i].address, + &breakpoints->data[i].original_instruction, + 1, + false); + + if (!rc) { + /* Write the breakpoint instruction. */ + rc = cpu_memory_rw_debug(cpu, + breakpoints->data[i].address, + (void *)&whpx_breakpoint_instruction, + 1, + true); + } + + if (!rc) { + state = WHPX_BP_SET; + } + + } + + if (state == WHPX_BP_CLEAR_PENDING) { + /* Restore the original instruction. */ + rc = cpu_memory_rw_debug(cpu, + breakpoints->data[i].address, + &breakpoints->data[i].original_instruction, + 1, + true); + + if (!rc) { + state = WHPX_BP_CLEARED; + } + } + + breakpoints->data[i].state = state; + } +} + +/* + * This function is called when the a VCPU is about to start and no other + * VCPUs have been started so far. Since the VCPU start order could be + * arbitrary, it doesn't have to be VCPU#0. + * + * It is used to commit the breakpoints into memory, and configure WHPX + * to intercept debug exceptions. + * + * Note that whpx_set_exception_exit_bitmap() cannot be called if one or + * more VCPUs are already running, so this is the best place to do it. + */ +static int whpx_first_vcpu_starting(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + HRESULT hr; + + g_assert(bql_locked()); + + if (!QTAILQ_EMPTY(&cpu->breakpoints) || + (whpx->breakpoints.breakpoints && + whpx->breakpoints.breakpoints->used)) { + CPUBreakpoint *bp; + int i = 0; + bool update_pending = false; + + QTAILQ_FOREACH(bp, &cpu->breakpoints, entry) { + if (i >= whpx->breakpoints.original_address_count || + bp->pc != whpx->breakpoints.original_addresses[i]) { + update_pending = true; + } + + i++; + } + + if (i != whpx->breakpoints.original_address_count) { + update_pending = true; + } + + if (update_pending) { + /* + * The CPU breakpoints have changed since the last call to + * whpx_translate_cpu_breakpoints(). WHPX breakpoints must + * now be recomputed. + */ + whpx_translate_cpu_breakpoints(&whpx->breakpoints, cpu, i); + } + + /* Actually insert the breakpoints into the memory. */ + whpx_apply_breakpoints(whpx->breakpoints.breakpoints, cpu, true); + } + + uint64_t exception_mask; + if (whpx->step_pending || + (whpx->breakpoints.breakpoints && + whpx->breakpoints.breakpoints->used)) { + /* + * We are either attempting to single-step one or more CPUs, or + * have one or more breakpoints enabled. Both require intercepting + * the WHvX64ExceptionTypeBreakpointTrap exception. + */ + + exception_mask = 1UL << WHvX64ExceptionTypeDebugTrapOrFault; + } else { + /* Let the guest handle all exceptions. */ + exception_mask = 0; + } + + hr = whpx_set_exception_exit_bitmap(exception_mask); + if (!SUCCEEDED(hr)) { + error_report("WHPX: Failed to update exception exit mask," + "hr=%08lx.", hr); + return 1; + } + + return 0; +} + +/* + * This function is called when the last VCPU has finished running. + * It is used to remove any previously set breakpoints from memory. + */ +static int whpx_last_vcpu_stopping(CPUState *cpu) +{ + whpx_apply_breakpoints(whpx_global.breakpoints.breakpoints, cpu, false); + return 0; +} + +/* Returns the address of the next instruction that is about to be executed. */ +static vaddr whpx_vcpu_get_pc(CPUState *cpu, bool exit_context_valid) +{ + if (cpu->vcpu_dirty) { + /* The CPU registers have been modified by other parts of QEMU. */ + return cpu_env(cpu)->eip; + } else if (exit_context_valid) { + /* + * The CPU registers have not been modified by neither other parts + * of QEMU, nor this port by calling WHvSetVirtualProcessorRegisters(). + * This is the most common case. + */ + AccelCPUState *vcpu = cpu->accel; + return vcpu->exit_ctx.VpContext.Rip; + } else { + /* + * The CPU registers have been modified by a call to + * WHvSetVirtualProcessorRegisters() and must be re-queried from + * the target. + */ + WHV_REGISTER_VALUE reg_value; + WHV_REGISTER_NAME reg_name = WHvX64RegisterRip; + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + + hr = whp_dispatch.WHvGetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + ®_name, + 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to get PC, hr=%08lx", hr); + return 0; + } + + return reg_value.Reg64; + } +} + +static int whpx_handle_halt(CPUState *cpu) +{ + int ret = 0; + + bql_lock(); + if (!((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (cpu_env(cpu)->eflags & IF_MASK)) && + !(cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->exception_index = EXCP_HLT; + cpu->halted = true; + ret = 1; + } + bql_unlock(); + + return ret; +} + +static void whpx_vcpu_pre_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = cpu->accel; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + int irq; + uint8_t tpr; + WHV_X64_PENDING_INTERRUPTION_REGISTER new_int; + UINT32 reg_count = 0; + WHV_REGISTER_VALUE reg_values[3]; + WHV_REGISTER_NAME reg_names[3]; + + memset(&new_int, 0, sizeof(new_int)); + memset(reg_values, 0, sizeof(reg_values)); + + bql_lock(); + + /* Inject NMI */ + if (!vcpu->interruption_pending && + cpu->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_SMI)) { + if (cpu->interrupt_request & CPU_INTERRUPT_NMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_NMI; + vcpu->interruptable = false; + new_int.InterruptionType = WHvX64PendingNmi; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = 2; + } + if (cpu->interrupt_request & CPU_INTERRUPT_SMI) { + cpu->interrupt_request &= ~CPU_INTERRUPT_SMI; + } + } + + /* + * Force the VCPU out of its inner loop to process any INIT requests or + * commit pending TPR access. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + cpu->exit_request = 1; + } + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->exit_request = 1; + } + } + + /* Get pending hard interruption or replay one that was overwritten */ + if (!whpx_apic_in_platform()) { + if (!vcpu->interruption_pending && + vcpu->interruptable && (env->eflags & IF_MASK)) { + assert(!new_int.InterruptionPending); + if (cpu->interrupt_request & CPU_INTERRUPT_HARD) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + new_int.InterruptionType = WHvX64PendingInterrupt; + new_int.InterruptionPending = 1; + new_int.InterruptionVector = irq; + } + } + } + + /* Setup interrupt state if new one was prepared */ + if (new_int.InterruptionPending) { + reg_values[reg_count].PendingInterruption = new_int; + reg_names[reg_count] = WHvRegisterPendingInterruption; + reg_count += 1; + } + } else if (vcpu->ready_for_pic_interrupt && + (cpu->interrupt_request & CPU_INTERRUPT_HARD)) { + cpu->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + reg_names[reg_count] = WHvRegisterPendingEvent; + reg_values[reg_count].ExtIntEvent = (WHV_X64_PENDING_EXT_INT_EVENT) + { + .EventPending = 1, + .EventType = WHvX64PendingEventExtInt, + .Vector = irq, + }; + reg_count += 1; + } + } + + /* Sync the TPR to the CR8 if was modified during the intercept */ + tpr = whpx_apic_tpr_to_cr8(cpu_get_apic_tpr(x86_cpu->apic_state)); + if (tpr != vcpu->tpr) { + vcpu->tpr = tpr; + reg_values[reg_count].Reg64 = tpr; + cpu->exit_request = 1; + reg_names[reg_count] = WHvX64RegisterCr8; + reg_count += 1; + } + + /* Update the state of the interrupt delivery notification */ + if (!vcpu->window_registered && + cpu->interrupt_request & CPU_INTERRUPT_HARD) { + reg_values[reg_count].DeliverabilityNotifications = + (WHV_X64_DELIVERABILITY_NOTIFICATIONS_REGISTER) { + .InterruptNotification = 1 + }; + vcpu->window_registered = 1; + reg_names[reg_count] = WHvX64RegisterDeliverabilityNotifications; + reg_count += 1; + } + + bql_unlock(); + vcpu->ready_for_pic_interrupt = false; + + if (reg_count) { + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + reg_names, reg_count, reg_values); + if (FAILED(hr)) { + error_report("WHPX: Failed to set interrupt state registers," + " hr=%08lx", hr); + } + } + + return; +} + +static void whpx_vcpu_post_run(CPUState *cpu) +{ + AccelCPUState *vcpu = cpu->accel; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + env->eflags = vcpu->exit_ctx.VpContext.Rflags; + + uint64_t tpr = vcpu->exit_ctx.VpContext.Cr8; + if (vcpu->tpr != tpr) { + vcpu->tpr = tpr; + bql_lock(); + cpu_set_apic_tpr(x86_cpu->apic_state, whpx_cr8_to_apic_tpr(vcpu->tpr)); + bql_unlock(); + } + + vcpu->interruption_pending = + vcpu->exit_ctx.VpContext.ExecutionState.InterruptionPending; + + vcpu->interruptable = + !vcpu->exit_ctx.VpContext.ExecutionState.InterruptShadow; + + return; +} + +static void whpx_vcpu_process_async_events(CPUState *cpu) +{ + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + AccelCPUState *vcpu = cpu->accel; + + if ((cpu->interrupt_request & CPU_INTERRUPT_INIT) && + !(env->hflags & HF_SMM_MASK)) { + whpx_cpu_synchronize_state(cpu); + do_cpu_init(x86_cpu); + vcpu->interruptable = true; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_POLL) { + cpu->interrupt_request &= ~CPU_INTERRUPT_POLL; + apic_poll_irq(x86_cpu->apic_state); + } + + if (((cpu->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) || + (cpu->interrupt_request & CPU_INTERRUPT_NMI)) { + cpu->halted = false; + } + + if (cpu->interrupt_request & CPU_INTERRUPT_SIPI) { + whpx_cpu_synchronize_state(cpu); + do_cpu_sipi(x86_cpu); + } + + if (cpu->interrupt_request & CPU_INTERRUPT_TPR) { + cpu->interrupt_request &= ~CPU_INTERRUPT_TPR; + whpx_cpu_synchronize_state(cpu); + apic_handle_tpr_access_report(x86_cpu->apic_state, env->eip, + env->tpr_access_type); + } + + return; +} + +static int whpx_vcpu_run(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = cpu->accel; + struct whpx_breakpoint *stepped_over_bp = NULL; + WhpxStepMode exclusive_step_mode = WHPX_STEP_NONE; + int ret; + + g_assert(bql_locked()); + + if (whpx->running_cpus++ == 0) { + /* Insert breakpoints into memory, update exception exit bitmap. */ + ret = whpx_first_vcpu_starting(cpu); + if (ret != 0) { + return ret; + } + } + + if (whpx->breakpoints.breakpoints && + whpx->breakpoints.breakpoints->used > 0) + { + uint64_t pc = whpx_vcpu_get_pc(cpu, true); + stepped_over_bp = whpx_lookup_breakpoint_by_addr(pc); + if (stepped_over_bp && stepped_over_bp->state != WHPX_BP_SET) { + stepped_over_bp = NULL; + } + + if (stepped_over_bp) { + /* + * We are trying to run the instruction overwritten by an active + * breakpoint. We will temporarily disable the breakpoint, suspend + * other CPUs, and step over the instruction. + */ + exclusive_step_mode = WHPX_STEP_EXCLUSIVE; + } + } + + if (exclusive_step_mode == WHPX_STEP_NONE) { + whpx_vcpu_process_async_events(cpu); + if (cpu->halted && !whpx_apic_in_platform()) { + cpu->exception_index = EXCP_HLT; + qatomic_set(&cpu->exit_request, false); + return 0; + } + } + + bql_unlock(); + + if (exclusive_step_mode != WHPX_STEP_NONE) { + start_exclusive(); + g_assert(cpu == current_cpu); + g_assert(!cpu->running); + cpu->running = true; + + hr = whpx_set_exception_exit_bitmap( + 1UL << WHvX64ExceptionTypeDebugTrapOrFault); + if (!SUCCEEDED(hr)) { + error_report("WHPX: Failed to update exception exit mask, " + "hr=%08lx.", hr); + return 1; + } + + if (stepped_over_bp) { + /* Temporarily disable the triggered breakpoint. */ + cpu_memory_rw_debug(cpu, + stepped_over_bp->address, + &stepped_over_bp->original_instruction, + 1, + true); + } + } else { + cpu_exec_start(cpu); + } + + do { + if (cpu->vcpu_dirty) { + whpx_set_registers(cpu, WHPX_SET_RUNTIME_STATE); + cpu->vcpu_dirty = false; + } + + if (exclusive_step_mode == WHPX_STEP_NONE) { + whpx_vcpu_pre_run(cpu); + + if (qatomic_read(&cpu->exit_request)) { + whpx_vcpu_kick(cpu); + } + } + + if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) { + whpx_vcpu_configure_single_stepping(cpu, true, NULL); + } + + hr = whp_dispatch.WHvRunVirtualProcessor( + whpx->partition, cpu->cpu_index, + &vcpu->exit_ctx, sizeof(vcpu->exit_ctx)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to exec a virtual processor," + " hr=%08lx", hr); + ret = -1; + break; + } + + if (exclusive_step_mode != WHPX_STEP_NONE || cpu->singlestep_enabled) { + whpx_vcpu_configure_single_stepping(cpu, + false, + &vcpu->exit_ctx.VpContext.Rflags); + } + + whpx_vcpu_post_run(cpu); + + switch (vcpu->exit_ctx.ExitReason) { + case WHvRunVpExitReasonMemoryAccess: + ret = whpx_handle_mmio(cpu, &vcpu->exit_ctx.MemoryAccess); + break; + + case WHvRunVpExitReasonX64IoPortAccess: + ret = whpx_handle_portio(cpu, &vcpu->exit_ctx.IoPortAccess); + break; + + case WHvRunVpExitReasonX64InterruptWindow: + vcpu->ready_for_pic_interrupt = 1; + vcpu->window_registered = 0; + ret = 0; + break; + + case WHvRunVpExitReasonX64ApicEoi: + assert(whpx_apic_in_platform()); + ioapic_eoi_broadcast(vcpu->exit_ctx.ApicEoi.InterruptVector); + break; + + case WHvRunVpExitReasonX64Halt: + /* + * WARNING: as of build 19043.1526 (21H1), this exit reason is no + * longer used. + */ + ret = whpx_handle_halt(cpu); + break; + + case WHvRunVpExitReasonX64ApicInitSipiTrap: { + WHV_INTERRUPT_CONTROL ipi = {0}; + uint64_t icr = vcpu->exit_ctx.ApicInitSipi.ApicIcr; + uint32_t delivery_mode = + (icr & APIC_ICR_DELIV_MOD) >> APIC_ICR_DELIV_MOD_SHIFT; + int dest_shorthand = + (icr & APIC_ICR_DEST_SHORT) >> APIC_ICR_DEST_SHORT_SHIFT; + bool broadcast = false; + bool include_self = false; + uint32_t i; + + /* We only registered for INIT and SIPI exits. */ + if ((delivery_mode != APIC_DM_INIT) && + (delivery_mode != APIC_DM_SIPI)) { + error_report( + "WHPX: Unexpected APIC exit that is not a INIT or SIPI"); + break; + } + + if (delivery_mode == APIC_DM_INIT) { + ipi.Type = WHvX64InterruptTypeInit; + } else { + ipi.Type = WHvX64InterruptTypeSipi; + } + + ipi.DestinationMode = + ((icr & APIC_ICR_DEST_MOD) >> APIC_ICR_DEST_MOD_SHIFT) ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical; + + ipi.TriggerMode = + ((icr & APIC_ICR_TRIGGER_MOD) >> APIC_ICR_TRIGGER_MOD_SHIFT) ? + WHvX64InterruptTriggerModeLevel : + WHvX64InterruptTriggerModeEdge; + + ipi.Vector = icr & APIC_VECTOR_MASK; + switch (dest_shorthand) { + /* no shorthand. Bits 56-63 contain the destination. */ + case 0: + ipi.Destination = (icr >> 56) & APIC_VECTOR_MASK; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report("WHPX: Failed to request interrupt hr=%08lx", + hr); + } + + break; + + /* self */ + case 1: + include_self = true; + break; + + /* broadcast, including self */ + case 2: + broadcast = true; + include_self = true; + break; + + /* broadcast, excluding self */ + case 3: + broadcast = true; + break; + } + + if (!broadcast && !include_self) { + break; + } + + for (i = 0; i <= max_vcpu_index; i++) { + if (i == cpu->cpu_index && !include_self) { + continue; + } + + /* + * Assuming that APIC Ids are identity mapped since + * WHvX64RegisterApicId & WHvX64RegisterInitialApicId registers + * are not handled yet and the hypervisor doesn't allow the + * guest to modify the APIC ID. + */ + ipi.Destination = i; + hr = whp_dispatch.WHvRequestInterrupt(whpx->partition, + &ipi, sizeof(ipi)); + if (FAILED(hr)) { + error_report( + "WHPX: Failed to request SIPI for %d, hr=%08lx", + i, hr); + } + } + + break; + } + + case WHvRunVpExitReasonCanceled: + if (exclusive_step_mode != WHPX_STEP_NONE) { + /* + * We are trying to step over a single instruction, and + * likely got a request to stop from another thread. + * Delay it until we are done stepping + * over. + */ + ret = 0; + } else { + cpu->exception_index = EXCP_INTERRUPT; + ret = 1; + } + break; + case WHvRunVpExitReasonX64MsrAccess: { + WHV_REGISTER_VALUE reg_values[3] = {0}; + WHV_REGISTER_NAME reg_names[3]; + UINT32 reg_count; + + reg_names[0] = WHvX64RegisterRip; + reg_names[1] = WHvX64RegisterRax; + reg_names[2] = WHvX64RegisterRdx; + + reg_values[0].Reg64 = + vcpu->exit_ctx.VpContext.Rip + + vcpu->exit_ctx.VpContext.InstructionLength; + + /* + * For all unsupported MSR access we: + * ignore writes + * return 0 on read. + */ + reg_count = vcpu->exit_ctx.MsrAccess.AccessInfo.IsWrite ? + 1 : 3; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, + cpu->cpu_index, + reg_names, reg_count, + reg_values); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MsrAccess state " + " registers, hr=%08lx", hr); + } + ret = 0; + break; + } + case WHvRunVpExitReasonX64Cpuid: { + WHV_REGISTER_VALUE reg_values[5]; + WHV_REGISTER_NAME reg_names[5]; + UINT32 reg_count = 5; + UINT64 cpuid_fn, rip = 0, rax = 0, rcx = 0, rdx = 0, rbx = 0; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + + memset(reg_values, 0, sizeof(reg_values)); + + rip = vcpu->exit_ctx.VpContext.Rip + + vcpu->exit_ctx.VpContext.InstructionLength; + cpuid_fn = vcpu->exit_ctx.CpuidAccess.Rax; + + /* + * Ideally, these should be supplied to the hypervisor during VCPU + * initialization and it should be able to satisfy this request. + * But, currently, WHPX doesn't support setting CPUID values in the + * hypervisor once the partition has been setup, which is too late + * since VCPUs are realized later. For now, use the values from + * QEMU to satisfy these requests, until WHPX adds support for + * being able to set these values in the hypervisor at runtime. + */ + cpu_x86_cpuid(env, cpuid_fn, 0, (UINT32 *)&rax, (UINT32 *)&rbx, + (UINT32 *)&rcx, (UINT32 *)&rdx); + switch (cpuid_fn) { + case 0x40000000: + /* Expose the vmware cpu frequency cpuid leaf */ + rax = 0x40000010; + rbx = rcx = rdx = 0; + break; + + case 0x40000010: + rax = env->tsc_khz; + rbx = env->apic_bus_freq / 1000; /* Hz to KHz */ + rcx = rdx = 0; + break; + + case 0x80000001: + /* Remove any support of OSVW */ + rcx &= ~CPUID_EXT3_OSVW; + break; + } + + reg_names[0] = WHvX64RegisterRip; + reg_names[1] = WHvX64RegisterRax; + reg_names[2] = WHvX64RegisterRcx; + reg_names[3] = WHvX64RegisterRdx; + reg_names[4] = WHvX64RegisterRbx; + + reg_values[0].Reg64 = rip; + reg_values[1].Reg64 = rax; + reg_values[2].Reg64 = rcx; + reg_values[3].Reg64 = rdx; + reg_values[4].Reg64 = rbx; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx->partition, cpu->cpu_index, + reg_names, + reg_count, + reg_values); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set CpuidAccess state registers," + " hr=%08lx", hr); + } + ret = 0; + break; + } + case WHvRunVpExitReasonException: + whpx_get_registers(cpu); + + if ((vcpu->exit_ctx.VpException.ExceptionType == + WHvX64ExceptionTypeDebugTrapOrFault) && + (vcpu->exit_ctx.VpException.InstructionByteCount >= 1) && + (vcpu->exit_ctx.VpException.InstructionBytes[0] == + whpx_breakpoint_instruction)) { + /* Stopped at a software breakpoint. */ + cpu->exception_index = EXCP_DEBUG; + } else if ((vcpu->exit_ctx.VpException.ExceptionType == + WHvX64ExceptionTypeDebugTrapOrFault) && + !cpu->singlestep_enabled) { + /* + * Just finished stepping over a breakpoint, but the + * gdb does not expect us to do single-stepping. + * Don't do anything special. + */ + cpu->exception_index = EXCP_INTERRUPT; + } else { + /* Another exception or debug event. Report it to GDB. */ + cpu->exception_index = EXCP_DEBUG; + } + + ret = 1; + break; + case WHvRunVpExitReasonNone: + case WHvRunVpExitReasonUnrecoverableException: + case WHvRunVpExitReasonInvalidVpRegisterValue: + case WHvRunVpExitReasonUnsupportedFeature: + default: + error_report("WHPX: Unexpected VP exit code %d", + vcpu->exit_ctx.ExitReason); + whpx_get_registers(cpu); + bql_lock(); + qemu_system_guest_panicked(cpu_get_crash_info(cpu)); + bql_unlock(); + break; + } + + } while (!ret); + + if (stepped_over_bp) { + /* Restore the breakpoint we stepped over */ + cpu_memory_rw_debug(cpu, + stepped_over_bp->address, + (void *)&whpx_breakpoint_instruction, + 1, + true); + } + + if (exclusive_step_mode != WHPX_STEP_NONE) { + g_assert(cpu_in_exclusive_context(cpu)); + cpu->running = false; + end_exclusive(); + + exclusive_step_mode = WHPX_STEP_NONE; + } else { + cpu_exec_end(cpu); + } + + bql_lock(); + current_cpu = cpu; + + if (--whpx->running_cpus == 0) { + whpx_last_vcpu_stopping(cpu); + } + + qatomic_set(&cpu->exit_request, false); + + return ret < 0; +} + +static void do_whpx_cpu_synchronize_state(CPUState *cpu, run_on_cpu_data arg) +{ + if (!cpu->vcpu_dirty) { + whpx_get_registers(cpu); + cpu->vcpu_dirty = true; + } +} + +static void do_whpx_cpu_synchronize_post_reset(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu, WHPX_SET_RESET_STATE); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_post_init(CPUState *cpu, + run_on_cpu_data arg) +{ + whpx_set_registers(cpu, WHPX_SET_FULL_STATE); + cpu->vcpu_dirty = false; +} + +static void do_whpx_cpu_synchronize_pre_loadvm(CPUState *cpu, + run_on_cpu_data arg) +{ + cpu->vcpu_dirty = true; +} + +/* + * CPU support. + */ + +void whpx_cpu_synchronize_state(CPUState *cpu) +{ + if (!cpu->vcpu_dirty) { + run_on_cpu(cpu, do_whpx_cpu_synchronize_state, RUN_ON_CPU_NULL); + } +} + +void whpx_cpu_synchronize_post_reset(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_reset, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_post_init(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_post_init, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_pre_loadvm(CPUState *cpu) +{ + run_on_cpu(cpu, do_whpx_cpu_synchronize_pre_loadvm, RUN_ON_CPU_NULL); +} + +void whpx_cpu_synchronize_pre_resume(bool step_pending) +{ + whpx_global.step_pending = step_pending; +} + +/* + * Vcpu support. + */ + +static Error *whpx_migration_blocker; + +static void whpx_cpu_update_state(void *opaque, bool running, RunState state) +{ + CPUX86State *env = opaque; + + if (running) { + env->tsc_valid = false; + } +} + +int whpx_init_vcpu(CPUState *cpu) +{ + HRESULT hr; + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = NULL; + Error *local_error = NULL; + X86CPU *x86_cpu = X86_CPU(cpu); + CPUX86State *env = &x86_cpu->env; + UINT64 freq = 0; + int ret; + + /* Add migration blockers for all unsupported features of the + * Windows Hypervisor Platform + */ + if (whpx_migration_blocker == NULL) { + error_setg(&whpx_migration_blocker, + "State blocked due to non-migratable CPUID feature support," + "dirty memory tracking support, and XSAVE/XRSTOR support"); + + if (migrate_add_blocker(&whpx_migration_blocker, &local_error) < 0) { + error_report_err(local_error); + ret = -EINVAL; + goto error; + } + } + + vcpu = g_new0(AccelCPUState, 1); + + hr = whp_dispatch.WHvEmulatorCreateEmulator( + &whpx_emu_callbacks, + &vcpu->emulator); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup instruction completion support," + " hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvCreateVirtualProcessor( + whpx->partition, cpu->cpu_index, 0); + if (FAILED(hr)) { + error_report("WHPX: Failed to create a virtual processor," + " hr=%08lx", hr); + whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); + ret = -EINVAL; + goto error; + } + + /* + * vcpu's TSC frequency is either specified by user, or use the value + * provided by Hyper-V if the former is not present. In the latter case, we + * query it from Hyper-V and record in env->tsc_khz, so that vcpu's TSC + * frequency can be migrated later via this field. + */ + if (!env->tsc_khz) { + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeProcessorClockFrequency, &freq, sizeof(freq), + NULL); + if (hr != WHV_E_UNKNOWN_CAPABILITY) { + if (FAILED(hr)) { + printf("WHPX: Failed to query tsc frequency, hr=0x%08lx\n", hr); + } else { + env->tsc_khz = freq / 1000; /* Hz to KHz */ + } + } + } + + env->apic_bus_freq = HYPERV_APIC_BUS_FREQUENCY; + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeInterruptClockFrequency, &freq, sizeof(freq), NULL); + if (hr != WHV_E_UNKNOWN_CAPABILITY) { + if (FAILED(hr)) { + printf("WHPX: Failed to query apic bus frequency hr=0x%08lx\n", hr); + } else { + env->apic_bus_freq = freq; + } + } + + /* + * If the vmware cpuid frequency leaf option is set, and we have a valid + * tsc value, trap the corresponding cpuid's. + */ + if (x86_cpu->vmware_cpuid_freq && env->tsc_khz) { + UINT32 cpuidExitList[] = {1, 0x80000001, 0x40000000, 0x40000010}; + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeCpuidExitList, + cpuidExitList, + RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", + hr); + ret = -EINVAL; + goto error; + } + } + + vcpu->interruptable = true; + cpu->vcpu_dirty = true; + cpu->accel = vcpu; + max_vcpu_index = max(max_vcpu_index, cpu->cpu_index); + qemu_add_vm_change_state_handler(whpx_cpu_update_state, env); + + return 0; + +error: + g_free(vcpu); + + return ret; +} + +int whpx_vcpu_exec(CPUState *cpu) +{ + int ret; + int fatal; + + for (;;) { + if (cpu->exception_index >= EXCP_INTERRUPT) { + ret = cpu->exception_index; + cpu->exception_index = -1; + break; + } + + fatal = whpx_vcpu_run(cpu); + + if (fatal) { + error_report("WHPX: Failed to exec a virtual processor"); + abort(); + } + } + + return ret; +} + +void whpx_destroy_vcpu(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + AccelCPUState *vcpu = cpu->accel; + + whp_dispatch.WHvDeleteVirtualProcessor(whpx->partition, cpu->cpu_index); + whp_dispatch.WHvEmulatorDestroyEmulator(vcpu->emulator); + g_free(cpu->accel); + return; +} + +void whpx_vcpu_kick(CPUState *cpu) +{ + struct whpx_state *whpx = &whpx_global; + whp_dispatch.WHvCancelRunVirtualProcessor( + whpx->partition, cpu->cpu_index, 0); +} + +/* + * Memory support. + */ + +static void whpx_update_mapping(hwaddr start_pa, ram_addr_t size, + void *host_va, int add, int rom, + const char *name) +{ + struct whpx_state *whpx = &whpx_global; + HRESULT hr; + + /* + if (add) { + printf("WHPX: ADD PA:%p Size:%p, Host:%p, %s, '%s'\n", + (void*)start_pa, (void*)size, host_va, + (rom ? "ROM" : "RAM"), name); + } else { + printf("WHPX: DEL PA:%p Size:%p, Host:%p, '%s'\n", + (void*)start_pa, (void*)size, host_va, name); + } + */ + + if (add) { + hr = whp_dispatch.WHvMapGpaRange(whpx->partition, + host_va, + start_pa, + size, + (WHvMapGpaRangeFlagRead | + WHvMapGpaRangeFlagExecute | + (rom ? 0 : WHvMapGpaRangeFlagWrite))); + } else { + hr = whp_dispatch.WHvUnmapGpaRange(whpx->partition, + start_pa, + size); + } + + if (FAILED(hr)) { + error_report("WHPX: Failed to %s GPA range '%s' PA:%p, Size:%p bytes," + " Host:%p, hr=%08lx", + (add ? "MAP" : "UNMAP"), name, + (void *)(uintptr_t)start_pa, (void *)size, host_va, hr); + } +} + +static void whpx_process_section(MemoryRegionSection *section, int add) +{ + MemoryRegion *mr = section->mr; + hwaddr start_pa = section->offset_within_address_space; + ram_addr_t size = int128_get64(section->size); + unsigned int delta; + uint64_t host_va; + + if (!memory_region_is_ram(mr)) { + return; + } + + delta = qemu_real_host_page_size() - (start_pa & ~qemu_real_host_page_mask()); + delta &= ~qemu_real_host_page_mask(); + if (delta > size) { + return; + } + start_pa += delta; + size -= delta; + size &= qemu_real_host_page_mask(); + if (!size || (start_pa & ~qemu_real_host_page_mask())) { + return; + } + + host_va = (uintptr_t)memory_region_get_ram_ptr(mr) + + section->offset_within_region + delta; + + whpx_update_mapping(start_pa, size, (void *)(uintptr_t)host_va, add, + memory_region_is_rom(mr), mr->name); +} + +static void whpx_region_add(MemoryListener *listener, + MemoryRegionSection *section) +{ + memory_region_ref(section->mr); + whpx_process_section(section, 1); +} + +static void whpx_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + whpx_process_section(section, 0); + memory_region_unref(section->mr); +} + +static void whpx_transaction_begin(MemoryListener *listener) +{ +} + +static void whpx_transaction_commit(MemoryListener *listener) +{ +} + +static void whpx_log_sync(MemoryListener *listener, + MemoryRegionSection *section) +{ + MemoryRegion *mr = section->mr; + + if (!memory_region_is_ram(mr)) { + return; + } + + memory_region_set_dirty(mr, 0, int128_get64(section->size)); +} + +static MemoryListener whpx_memory_listener = { + .name = "whpx", + .begin = whpx_transaction_begin, + .commit = whpx_transaction_commit, + .region_add = whpx_region_add, + .region_del = whpx_region_del, + .log_sync = whpx_log_sync, + .priority = MEMORY_LISTENER_PRIORITY_ACCEL, +}; + +static void whpx_memory_init(void) +{ + memory_listener_register(&whpx_memory_listener, &address_space_memory); +} + +/* + * Load the functions from the given library, using the given handle. If a + * handle is provided, it is used, otherwise the library is opened. The + * handle will be updated on return with the opened one. + */ +static bool load_whp_dispatch_fns(HMODULE *handle, + WHPFunctionList function_list) +{ + HMODULE hLib = *handle; + + #define WINHV_PLATFORM_DLL "WinHvPlatform.dll" + #define WINHV_EMULATION_DLL "WinHvEmulation.dll" + #define WHP_LOAD_FIELD_OPTIONAL(return_type, function_name, signature) \ + whp_dispatch.function_name = \ + (function_name ## _t)GetProcAddress(hLib, #function_name); \ + + #define WHP_LOAD_FIELD(return_type, function_name, signature) \ + whp_dispatch.function_name = \ + (function_name ## _t)GetProcAddress(hLib, #function_name); \ + if (!whp_dispatch.function_name) { \ + error_report("Could not load function %s", #function_name); \ + goto error; \ + } \ + + #define WHP_LOAD_LIB(lib_name, handle_lib) \ + if (!handle_lib) { \ + handle_lib = LoadLibrary(lib_name); \ + if (!handle_lib) { \ + error_report("Could not load library %s.", lib_name); \ + goto error; \ + } \ + } \ + + switch (function_list) { + case WINHV_PLATFORM_FNS_DEFAULT: + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) + LIST_WINHVPLATFORM_FUNCTIONS(WHP_LOAD_FIELD) + break; + + case WINHV_EMULATION_FNS_DEFAULT: + WHP_LOAD_LIB(WINHV_EMULATION_DLL, hLib) + LIST_WINHVEMULATION_FUNCTIONS(WHP_LOAD_FIELD) + break; + + case WINHV_PLATFORM_FNS_SUPPLEMENTAL: + WHP_LOAD_LIB(WINHV_PLATFORM_DLL, hLib) + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_LOAD_FIELD_OPTIONAL) + break; + } + + *handle = hLib; + return true; + +error: + if (hLib) { + FreeLibrary(hLib); + } + + return false; +} + +static void whpx_set_kernel_irqchip(Object *obj, Visitor *v, + const char *name, void *opaque, + Error **errp) +{ + struct whpx_state *whpx = &whpx_global; + OnOffSplit mode; + + if (!visit_type_OnOffSplit(v, name, &mode, errp)) { + return; + } + + switch (mode) { + case ON_OFF_SPLIT_ON: + whpx->kernel_irqchip_allowed = true; + whpx->kernel_irqchip_required = true; + break; + + case ON_OFF_SPLIT_OFF: + whpx->kernel_irqchip_allowed = false; + whpx->kernel_irqchip_required = false; + break; + + case ON_OFF_SPLIT_SPLIT: + error_setg(errp, "WHPX: split irqchip currently not supported"); + error_append_hint(errp, + "Try without kernel-irqchip or with kernel-irqchip=on|off"); + break; + + default: + /* + * The value was checked in visit_type_OnOffSplit() above. If + * we get here, then something is wrong in QEMU. + */ + abort(); + } +} + +/* + * Partition support + */ + +static int whpx_accel_init(MachineState *ms) +{ + struct whpx_state *whpx; + int ret; + HRESULT hr; + WHV_CAPABILITY whpx_cap; + UINT32 whpx_cap_size; + WHV_PARTITION_PROPERTY prop; + UINT32 cpuidExitList[] = {1, 0x80000001}; + WHV_CAPABILITY_FEATURES features = {0}; + + whpx = &whpx_global; + + if (!init_whp_dispatch()) { + ret = -ENOSYS; + goto error; + } + + whpx->mem_quota = ms->ram_size; + + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeHypervisorPresent, &whpx_cap, + sizeof(whpx_cap), &whpx_cap_size); + if (FAILED(hr) || !whpx_cap.HypervisorPresent) { + error_report("WHPX: No accelerator found, hr=%08lx", hr); + ret = -ENOSPC; + goto error; + } + + hr = whp_dispatch.WHvGetCapability( + WHvCapabilityCodeFeatures, &features, sizeof(features), NULL); + if (FAILED(hr)) { + error_report("WHPX: Failed to query capabilities, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvCreatePartition(&whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to create partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + /* + * Query the XSAVE capability of the partition. Any error here is not + * considered fatal. + */ + hr = whp_dispatch.WHvGetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeProcessorXsaveFeatures, + &whpx_xsave_cap, + sizeof(whpx_xsave_cap), + &whpx_cap_size); + + /* + * Windows version which don't support this property will return with the + * specific error code. + */ + if (FAILED(hr) && hr != WHV_E_UNKNOWN_PROPERTY) { + error_report("WHPX: Failed to query XSAVE capability, hr=%08lx", hr); + } + + if (!whpx_has_xsave()) { + printf("WHPX: Partition is not XSAVE capable\n"); + } + + memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); + prop.ProcessorCount = ms->smp.cpus; + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeProcessorCount, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition processor count to %u," + " hr=%08lx", prop.ProcessorCount, hr); + ret = -EINVAL; + goto error; + } + + /* + * Error out if WHP doesn't support apic emulation and user is requiring + * it. + */ + if (whpx->kernel_irqchip_required && (!features.LocalApicEmulation || + !whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2)) { + error_report("WHPX: kernel irqchip requested, but unavailable. " + "Try without kernel-irqchip or with kernel-irqchip=off"); + ret = -EINVAL; + goto error; + } + + if (whpx->kernel_irqchip_allowed && features.LocalApicEmulation && + whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2) { + WHV_X64_LOCAL_APIC_EMULATION_MODE mode = + WHvX64LocalApicEmulationModeXApic; + printf("WHPX: setting APIC emulation mode in the hypervisor\n"); + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeLocalApicEmulationMode, + &mode, + sizeof(mode)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable kernel irqchip hr=%08lx", hr); + if (whpx->kernel_irqchip_required) { + error_report("WHPX: kernel irqchip requested, but unavailable"); + ret = -EINVAL; + goto error; + } + } else { + whpx->apic_in_platform = true; + } + } + + /* Register for MSR and CPUID exits */ + memset(&prop, 0, sizeof(WHV_PARTITION_PROPERTY)); + prop.ExtendedVmExits.X64MsrExit = 1; + prop.ExtendedVmExits.X64CpuidExit = 1; + prop.ExtendedVmExits.ExceptionExit = 1; + if (whpx_apic_in_platform()) { + prop.ExtendedVmExits.X64ApicInitSipiExitTrap = 1; + } + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeExtendedVmExits, + &prop, + sizeof(WHV_PARTITION_PROPERTY)); + if (FAILED(hr)) { + error_report("WHPX: Failed to enable MSR & CPUIDexit, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvSetPartitionProperty( + whpx->partition, + WHvPartitionPropertyCodeCpuidExitList, + cpuidExitList, + RTL_NUMBER_OF(cpuidExitList) * sizeof(UINT32)); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set partition CpuidExitList hr=%08lx", + hr); + ret = -EINVAL; + goto error; + } + + /* + * We do not want to intercept any exceptions from the guest, + * until we actually start debugging with gdb. + */ + whpx->exception_exit_bitmap = -1; + hr = whpx_set_exception_exit_bitmap(0); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set exception exit bitmap, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + hr = whp_dispatch.WHvSetupPartition(whpx->partition); + if (FAILED(hr)) { + error_report("WHPX: Failed to setup partition, hr=%08lx", hr); + ret = -EINVAL; + goto error; + } + + whpx_memory_init(); + + printf("Windows Hypervisor Platform accelerator is operational\n"); + return 0; + +error: + + if (NULL != whpx->partition) { + whp_dispatch.WHvDeletePartition(whpx->partition); + whpx->partition = NULL; + } + + return ret; +} + +int whpx_enabled(void) +{ + return whpx_allowed; +} + +bool whpx_apic_in_platform(void) { + return whpx_global.apic_in_platform; +} + +static void whpx_accel_class_init(ObjectClass *oc, void *data) +{ + AccelClass *ac = ACCEL_CLASS(oc); + ac->name = "WHPX"; + ac->init_machine = whpx_accel_init; + ac->allowed = &whpx_allowed; + + object_class_property_add(oc, "kernel-irqchip", "on|off|split", + NULL, whpx_set_kernel_irqchip, + NULL, NULL); + object_class_property_set_description(oc, "kernel-irqchip", + "Configure WHPX in-kernel irqchip"); +} + +static void whpx_accel_instance_init(Object *obj) +{ + struct whpx_state *whpx = &whpx_global; + + memset(whpx, 0, sizeof(struct whpx_state)); + /* Turn on kernel-irqchip, by default */ + whpx->kernel_irqchip_allowed = true; +} + +static const TypeInfo whpx_accel_type = { + .name = ACCEL_CLASS_NAME("whpx"), + .parent = TYPE_ACCEL, + .instance_init = whpx_accel_instance_init, + .class_init = whpx_accel_class_init, +}; + +static void whpx_type_init(void) +{ + type_register_static(&whpx_accel_type); +} + +bool init_whp_dispatch(void) +{ + if (whp_dispatch_initialized) { + return true; + } + + if (!load_whp_dispatch_fns(&hWinHvPlatform, WINHV_PLATFORM_FNS_DEFAULT)) { + goto error; + } + + if (!load_whp_dispatch_fns(&hWinHvEmulation, WINHV_EMULATION_FNS_DEFAULT)) { + goto error; + } + + assert(load_whp_dispatch_fns(&hWinHvPlatform, + WINHV_PLATFORM_FNS_SUPPLEMENTAL)); + whp_dispatch_initialized = true; + + return true; +error: + if (hWinHvPlatform) { + FreeLibrary(hWinHvPlatform); + } + + if (hWinHvEmulation) { + FreeLibrary(hWinHvEmulation); + } + + return false; +} + +type_init(whpx_type_init); diff --git a/target/i386/whpx/whpx-apic.c b/target/i386/whpx/whpx-apic.c new file mode 100644 index 0000000000..7e14ded978 --- /dev/null +++ b/target/i386/whpx/whpx-apic.c @@ -0,0 +1,282 @@ +/* + * WHPX platform APIC support + * + * Copyright (c) 2011 Siemens AG + * + * Authors: + * Jan Kiszka <jan.kiszka@siemens.com> + * John Starks <jostarks@microsoft.com> + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + */ +#include "qemu/osdep.h" +#include "qemu/error-report.h" +#include "cpu.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" +#include "hw/pci/msi.h" +#include "sysemu/hw_accel.h" +#include "sysemu/whpx.h" +#include "whpx-internal.h" + +struct whpx_lapic_state { + struct { + uint32_t data; + uint32_t padding[3]; + } fields[256]; +}; + +static void whpx_put_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i; + + memset(kapic, 0, sizeof(*kapic)); + kapic->fields[0x2].data = s->id << 24; + kapic->fields[0x3].data = s->version | ((APIC_LVT_NB - 1) << 16); + kapic->fields[0x8].data = s->tpr; + kapic->fields[0xd].data = s->log_dest << 24; + kapic->fields[0xe].data = s->dest_mode << 28 | 0x0fffffff; + kapic->fields[0xf].data = s->spurious_vec; + for (i = 0; i < 8; i++) { + kapic->fields[0x10 + i].data = s->isr[i]; + kapic->fields[0x18 + i].data = s->tmr[i]; + kapic->fields[0x20 + i].data = s->irr[i]; + } + + kapic->fields[0x28].data = s->esr; + kapic->fields[0x30].data = s->icr[0]; + kapic->fields[0x31].data = s->icr[1]; + for (i = 0; i < APIC_LVT_NB; i++) { + kapic->fields[0x32 + i].data = s->lvt[i]; + } + + kapic->fields[0x38].data = s->initial_count; + kapic->fields[0x3e].data = s->divide_conf; +} + +static void whpx_get_apic_state(APICCommonState *s, + struct whpx_lapic_state *kapic) +{ + int i, v; + + s->id = kapic->fields[0x2].data >> 24; + s->tpr = kapic->fields[0x8].data; + s->arb_id = kapic->fields[0x9].data; + s->log_dest = kapic->fields[0xd].data >> 24; + s->dest_mode = kapic->fields[0xe].data >> 28; + s->spurious_vec = kapic->fields[0xf].data; + for (i = 0; i < 8; i++) { + s->isr[i] = kapic->fields[0x10 + i].data; + s->tmr[i] = kapic->fields[0x18 + i].data; + s->irr[i] = kapic->fields[0x20 + i].data; + } + + s->esr = kapic->fields[0x28].data; + s->icr[0] = kapic->fields[0x30].data; + s->icr[1] = kapic->fields[0x31].data; + for (i = 0; i < APIC_LVT_NB; i++) { + s->lvt[i] = kapic->fields[0x32 + i].data; + } + + s->initial_count = kapic->fields[0x38].data; + s->divide_conf = kapic->fields[0x3e].data; + + v = (s->divide_conf & 3) | ((s->divide_conf >> 1) & 4); + s->count_shift = (v + 1) & 7; + + s->initial_count_load_time = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL); + apic_next_timer(s, s->initial_count_load_time); +} + +static int whpx_apic_set_base(APICCommonState *s, uint64_t val) +{ + s->apicbase = val; + return 0; +} + +static void whpx_put_apic_base(CPUState *cpu, uint64_t val) +{ + HRESULT hr; + WHV_REGISTER_VALUE reg_value = {.Reg64 = val}; + WHV_REGISTER_NAME reg_name = WHvX64RegisterApicBase; + + hr = whp_dispatch.WHvSetVirtualProcessorRegisters( + whpx_global.partition, + cpu->cpu_index, + ®_name, 1, + ®_value); + + if (FAILED(hr)) { + error_report("WHPX: Failed to set MSR APIC base, hr=%08lx", hr); + } +} + +static void whpx_apic_set_tpr(APICCommonState *s, uint8_t val) +{ + s->tpr = val; +} + +static uint8_t whpx_apic_get_tpr(APICCommonState *s) +{ + return s->tpr; +} + +static void whpx_apic_vapic_base_update(APICCommonState *s) +{ + /* not implemented yet */ +} + +static void whpx_apic_put(CPUState *cs, run_on_cpu_data data) +{ + APICCommonState *s = data.host_ptr; + struct whpx_lapic_state kapic; + HRESULT hr; + + whpx_put_apic_base(CPU(s->cpu), s->apicbase); + whpx_put_apic_state(s, &kapic); + + hr = whp_dispatch.WHvSetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cs->cpu_index, + &kapic, + sizeof(kapic)); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } +} + +void whpx_apic_get(DeviceState *dev) +{ + APICCommonState *s = APIC_COMMON(dev); + CPUState *cpu = CPU(s->cpu); + struct whpx_lapic_state kapic; + + HRESULT hr = whp_dispatch.WHvGetVirtualProcessorInterruptControllerState2( + whpx_global.partition, + cpu->cpu_index, + &kapic, + sizeof(kapic), + NULL); + if (FAILED(hr)) { + fprintf(stderr, + "WHvSetVirtualProcessorInterruptControllerState failed: %08lx\n", + hr); + + abort(); + } + + whpx_get_apic_state(s, &kapic); +} + +static void whpx_apic_post_load(APICCommonState *s) +{ + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_external_nmi(APICCommonState *s) +{ +} + +static void whpx_send_msi(MSIMessage *msg) +{ + uint64_t addr = msg->address; + uint32_t data = msg->data; + uint8_t dest = (addr & MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT; + uint8_t vector = (data & MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT; + uint8_t dest_mode = (addr >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1; + uint8_t trigger_mode = (data >> MSI_DATA_TRIGGER_SHIFT) & 0x1; + uint8_t delivery = (data >> MSI_DATA_DELIVERY_MODE_SHIFT) & 0x7; + + WHV_INTERRUPT_CONTROL interrupt = { + /* Values correspond to delivery modes */ + .Type = delivery, + .DestinationMode = dest_mode ? + WHvX64InterruptDestinationModeLogical : + WHvX64InterruptDestinationModePhysical, + + .TriggerMode = trigger_mode ? + WHvX64InterruptTriggerModeLevel : WHvX64InterruptTriggerModeEdge, + .Reserved = 0, + .Vector = vector, + .Destination = dest, + }; + HRESULT hr = whp_dispatch.WHvRequestInterrupt(whpx_global.partition, + &interrupt, sizeof(interrupt)); + if (FAILED(hr)) { + fprintf(stderr, "whpx: injection failed, MSI (%llx, %x) delivery: %d, " + "dest_mode: %d, trigger mode: %d, vector: %d, lost (%08lx)\n", + addr, data, delivery, dest_mode, trigger_mode, vector, hr); + } +} + +static uint64_t whpx_apic_mem_read(void *opaque, hwaddr addr, + unsigned size) +{ + return ~(uint64_t)0; +} + +static void whpx_apic_mem_write(void *opaque, hwaddr addr, + uint64_t data, unsigned size) +{ + MSIMessage msg = { .address = addr, .data = data }; + whpx_send_msi(&msg); +} + +static const MemoryRegionOps whpx_apic_io_ops = { + .read = whpx_apic_mem_read, + .write = whpx_apic_mem_write, + .endianness = DEVICE_NATIVE_ENDIAN, +}; + +static void whpx_apic_reset(APICCommonState *s) +{ + /* Not used by WHPX. */ + s->wait_for_sipi = 0; + + run_on_cpu(CPU(s->cpu), whpx_apic_put, RUN_ON_CPU_HOST_PTR(s)); +} + +static void whpx_apic_realize(DeviceState *dev, Error **errp) +{ + APICCommonState *s = APIC_COMMON(dev); + + memory_region_init_io(&s->io_memory, OBJECT(s), &whpx_apic_io_ops, s, + "whpx-apic-msi", APIC_SPACE_SIZE); + + msi_nonbroken = true; +} + +static void whpx_apic_class_init(ObjectClass *klass, void *data) +{ + APICCommonClass *k = APIC_COMMON_CLASS(klass); + + k->realize = whpx_apic_realize; + k->reset = whpx_apic_reset; + k->set_base = whpx_apic_set_base; + k->set_tpr = whpx_apic_set_tpr; + k->get_tpr = whpx_apic_get_tpr; + k->post_load = whpx_apic_post_load; + k->vapic_base_update = whpx_apic_vapic_base_update; + k->external_nmi = whpx_apic_external_nmi; + k->send_msi = whpx_send_msi; +} + +static const TypeInfo whpx_apic_info = { + .name = "whpx-apic", + .parent = TYPE_APIC_COMMON, + .instance_size = sizeof(APICCommonState), + .class_init = whpx_apic_class_init, +}; + +static void whpx_apic_register_types(void) +{ + type_register_static(&whpx_apic_info); +} + +type_init(whpx_apic_register_types) diff --git a/target/i386/whp-dispatch.h b/target/i386/whpx/whpx-internal.h index d8d3485976..6633e9c4ca 100644 --- a/target/i386/whp-dispatch.h +++ b/target/i386/whpx/whpx-internal.h @@ -1,12 +1,55 @@ -#include "windows.h" -#include <stdbool.h> +#ifndef TARGET_I386_WHPX_INTERNAL_H +#define TARGET_I386_WHPX_INTERNAL_H -#include <WinHvPlatform.h> -#include <WinHvEmulation.h> +#include <windows.h> +#include <winhvplatform.h> +#include <winhvemulation.h> -#ifndef WHP_DISPATCH_H -#define WHP_DISPATCH_H +typedef enum WhpxBreakpointState { + WHPX_BP_CLEARED = 0, + WHPX_BP_SET_PENDING, + WHPX_BP_SET, + WHPX_BP_CLEAR_PENDING, +} WhpxBreakpointState; +struct whpx_breakpoint { + vaddr address; + WhpxBreakpointState state; + uint8_t original_instruction; +}; + +struct whpx_breakpoint_collection { + int allocated, used; + struct whpx_breakpoint data[0]; +}; + +struct whpx_breakpoints { + int original_address_count; + vaddr *original_addresses; + + struct whpx_breakpoint_collection *breakpoints; +}; + +struct whpx_state { + uint64_t mem_quota; + WHV_PARTITION_HANDLE partition; + uint64_t exception_exit_bitmap; + int32_t running_cpus; + struct whpx_breakpoints breakpoints; + bool step_pending; + + bool kernel_irqchip_allowed; + bool kernel_irqchip_required; + bool apic_in_platform; +}; + +extern struct whpx_state whpx_global; +void whpx_apic_get(DeviceState *s); + +#define WHV_E_UNKNOWN_CAPABILITY 0x80370300L + +/* This should eventually come from the Windows SDK */ +#define WHV_E_UNKNOWN_PROPERTY 0x80370302 #define LIST_WINHVPLATFORM_FUNCTIONS(X) \ X(HRESULT, WHvGetCapability, (WHV_CAPABILITY_CODE CapabilityCode, VOID* CapabilityBuffer, UINT32 CapabilityBufferSizeInBytes, UINT32* WrittenSizeInBytes)) \ @@ -25,6 +68,20 @@ X(HRESULT, WHvGetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, WHV_REGISTER_VALUE* RegisterValues)) \ X(HRESULT, WHvSetVirtualProcessorRegisters, (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, const WHV_REGISTER_NAME* RegisterNames, UINT32 RegisterCount, const WHV_REGISTER_VALUE* RegisterValues)) \ +/* + * These are supplemental functions that may not be present + * on all versions and are not critical for basic functionality. + */ +#define LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(X) \ + X(HRESULT, WHvSuspendPartitionTime, (WHV_PARTITION_HANDLE Partition)) \ + X(HRESULT, WHvRequestInterrupt, (WHV_PARTITION_HANDLE Partition, \ + WHV_INTERRUPT_CONTROL* Interrupt, UINT32 InterruptControlSize)) \ + X(HRESULT, WHvGetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize, UINT32* WrittenSize)) \ + X(HRESULT, WHvSetVirtualProcessorInterruptControllerState2, \ + (WHV_PARTITION_HANDLE Partition, UINT32 VpIndex, PVOID State, \ + UINT32 StateSize)) \ #define LIST_WINHVEMULATION_FUNCTIONS(X) \ X(HRESULT, WHvEmulatorCreateEmulator, (const WHV_EMULATOR_CALLBACKS* Callbacks, WHV_EMULATOR_HANDLE* Emulator)) \ @@ -32,7 +89,6 @@ X(HRESULT, WHvEmulatorTryIoEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_X64_IO_PORT_ACCESS_CONTEXT* IoInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ X(HRESULT, WHvEmulatorTryMmioEmulation, (WHV_EMULATOR_HANDLE Emulator, VOID* Context, const WHV_VP_EXIT_CONTEXT* VpContext, const WHV_MEMORY_ACCESS_CONTEXT* MmioInstructionContext, WHV_EMULATOR_STATUS* EmulatorReturnStatus)) \ - #define WHP_DEFINE_TYPE(return_type, function_name, signature) \ typedef return_type (WINAPI *function_name ## _t) signature; @@ -42,15 +98,22 @@ /* Define function typedef */ LIST_WINHVPLATFORM_FUNCTIONS(WHP_DEFINE_TYPE) LIST_WINHVEMULATION_FUNCTIONS(WHP_DEFINE_TYPE) +LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DEFINE_TYPE) struct WHPDispatch { LIST_WINHVPLATFORM_FUNCTIONS(WHP_DECLARE_MEMBER) LIST_WINHVEMULATION_FUNCTIONS(WHP_DECLARE_MEMBER) + LIST_WINHVPLATFORM_FUNCTIONS_SUPPLEMENTAL(WHP_DECLARE_MEMBER) }; extern struct WHPDispatch whp_dispatch; bool init_whp_dispatch(void); +typedef enum WHPFunctionList { + WINHV_PLATFORM_FNS_DEFAULT, + WINHV_EMULATION_FNS_DEFAULT, + WINHV_PLATFORM_FNS_SUPPLEMENTAL +} WHPFunctionList; -#endif /* WHP_DISPATCH_H */ +#endif /* TARGET_I386_WHPX_INTERNAL_H */ diff --git a/target/i386/xsave_helper.c b/target/i386/xsave_helper.c index 52ea7e654b..996e9f3bfe 100644 --- a/target/i386/xsave_helper.c +++ b/target/i386/xsave_helper.c @@ -4,17 +4,25 @@ */ #include "qemu/osdep.h" -#include "qemu-common.h" #include "cpu.h" -void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf) +void x86_cpu_xsave_all_areas(X86CPU *cpu, void *buf, uint32_t buflen) { CPUX86State *env = &cpu->env; - X86XSaveArea *xsave = buf; + const ExtSaveArea *e, *f; + int i; + X86LegacyXSaveArea *legacy; + X86XSaveHeader *header; uint16_t cwd, swd, twd; - int i; - memset(xsave, 0, sizeof(X86XSaveArea)); + + memset(buf, 0, buflen); + + e = &x86_ext_save_areas[XSTATE_FP_BIT]; + + legacy = buf + e->offset; + header = buf + e->offset + sizeof(*legacy); + twd = 0; swd = env->fpus & ~(7 << 11); swd |= (env->fpstt & 7) << 11; @@ -22,92 +30,250 @@ void x86_cpu_xsave_all_areas(X86CPU *cpu, X86XSaveArea *buf) for (i = 0; i < 8; ++i) { twd |= (!env->fptags[i]) << i; } - xsave->legacy.fcw = cwd; - xsave->legacy.fsw = swd; - xsave->legacy.ftw = twd; - xsave->legacy.fpop = env->fpop; - xsave->legacy.fpip = env->fpip; - xsave->legacy.fpdp = env->fpdp; - memcpy(&xsave->legacy.fpregs, env->fpregs, - sizeof env->fpregs); - xsave->legacy.mxcsr = env->mxcsr; - xsave->header.xstate_bv = env->xstate_bv; - memcpy(&xsave->bndreg_state.bnd_regs, env->bnd_regs, - sizeof env->bnd_regs); - xsave->bndcsr_state.bndcsr = env->bndcs_regs; - memcpy(&xsave->opmask_state.opmask_regs, env->opmask_regs, - sizeof env->opmask_regs); + legacy->fcw = cwd; + legacy->fsw = swd; + legacy->ftw = twd; + legacy->fpop = env->fpop; + legacy->fpip = env->fpip; + legacy->fpdp = env->fpdp; + memcpy(&legacy->fpregs, env->fpregs, + sizeof(env->fpregs)); + legacy->mxcsr = env->mxcsr; for (i = 0; i < CPU_NB_REGS; i++) { - uint8_t *xmm = xsave->legacy.xmm_regs[i]; - uint8_t *ymmh = xsave->avx_state.ymmh[i]; - uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i]; + uint8_t *xmm = legacy->xmm_regs[i]; + stq_p(xmm, env->xmm_regs[i].ZMM_Q(0)); - stq_p(xmm+8, env->xmm_regs[i].ZMM_Q(1)); - stq_p(ymmh, env->xmm_regs[i].ZMM_Q(2)); - stq_p(ymmh+8, env->xmm_regs[i].ZMM_Q(3)); - stq_p(zmmh, env->xmm_regs[i].ZMM_Q(4)); - stq_p(zmmh+8, env->xmm_regs[i].ZMM_Q(5)); - stq_p(zmmh+16, env->xmm_regs[i].ZMM_Q(6)); - stq_p(zmmh+24, env->xmm_regs[i].ZMM_Q(7)); + stq_p(xmm + 8, env->xmm_regs[i].ZMM_Q(1)); + } + + header->xstate_bv = env->xstate_bv; + + e = &x86_ext_save_areas[XSTATE_YMM_BIT]; + if (e->size && e->offset) { + XSaveAVX *avx; + + avx = buf + e->offset; + + for (i = 0; i < CPU_NB_REGS; i++) { + uint8_t *ymmh = avx->ymmh[i]; + + stq_p(ymmh, env->xmm_regs[i].ZMM_Q(2)); + stq_p(ymmh + 8, env->xmm_regs[i].ZMM_Q(3)); + } + } + + e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT]; + if (e->size && e->offset) { + XSaveBNDREG *bndreg; + XSaveBNDCSR *bndcsr; + + f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT]; + assert(f->size); + assert(f->offset); + + bndreg = buf + e->offset; + bndcsr = buf + f->offset; + + memcpy(&bndreg->bnd_regs, env->bnd_regs, + sizeof(env->bnd_regs)); + bndcsr->bndcsr = env->bndcs_regs; } + e = &x86_ext_save_areas[XSTATE_OPMASK_BIT]; + if (e->size && e->offset) { + XSaveOpmask *opmask; + XSaveZMM_Hi256 *zmm_hi256; #ifdef TARGET_X86_64 - memcpy(&xsave->hi16_zmm_state.hi16_zmm, &env->xmm_regs[16], - 16 * sizeof env->xmm_regs[16]); - memcpy(&xsave->pkru_state, &env->pkru, sizeof env->pkru); + XSaveHi16_ZMM *hi16_zmm; #endif + f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT]; + assert(f->size); + assert(f->offset); + + opmask = buf + e->offset; + zmm_hi256 = buf + f->offset; + + memcpy(&opmask->opmask_regs, env->opmask_regs, + sizeof(env->opmask_regs)); + + for (i = 0; i < CPU_NB_REGS; i++) { + uint8_t *zmmh = zmm_hi256->zmm_hi256[i]; + + stq_p(zmmh, env->xmm_regs[i].ZMM_Q(4)); + stq_p(zmmh + 8, env->xmm_regs[i].ZMM_Q(5)); + stq_p(zmmh + 16, env->xmm_regs[i].ZMM_Q(6)); + stq_p(zmmh + 24, env->xmm_regs[i].ZMM_Q(7)); + } + +#ifdef TARGET_X86_64 + f = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT]; + assert(f->size); + assert(f->offset); + + hi16_zmm = buf + f->offset; + + memcpy(&hi16_zmm->hi16_zmm, &env->xmm_regs[16], + 16 * sizeof(env->xmm_regs[16])); +#endif + } + +#ifdef TARGET_X86_64 + e = &x86_ext_save_areas[XSTATE_PKRU_BIT]; + if (e->size && e->offset) { + XSavePKRU *pkru = buf + e->offset; + + memcpy(pkru, &env->pkru, sizeof(env->pkru)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(tilecfg, &env->xtilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(tiledata, &env->xtiledata, sizeof(env->xtiledata)); + } +#endif } -void x86_cpu_xrstor_all_areas(X86CPU *cpu, const X86XSaveArea *buf) +void x86_cpu_xrstor_all_areas(X86CPU *cpu, const void *buf, uint32_t buflen) { - CPUX86State *env = &cpu->env; - const X86XSaveArea *xsave = buf; - + const ExtSaveArea *e, *f, *g; int i; + + const X86LegacyXSaveArea *legacy; + const X86XSaveHeader *header; uint16_t cwd, swd, twd; - cwd = xsave->legacy.fcw; - swd = xsave->legacy.fsw; - twd = xsave->legacy.ftw; - env->fpop = xsave->legacy.fpop; + + e = &x86_ext_save_areas[XSTATE_FP_BIT]; + + legacy = buf + e->offset; + header = buf + e->offset + sizeof(*legacy); + + cwd = legacy->fcw; + swd = legacy->fsw; + twd = legacy->ftw; + env->fpop = legacy->fpop; env->fpstt = (swd >> 11) & 7; env->fpus = swd; env->fpuc = cwd; for (i = 0; i < 8; ++i) { env->fptags[i] = !((twd >> i) & 1); } - env->fpip = xsave->legacy.fpip; - env->fpdp = xsave->legacy.fpdp; - env->mxcsr = xsave->legacy.mxcsr; - memcpy(env->fpregs, &xsave->legacy.fpregs, - sizeof env->fpregs); - env->xstate_bv = xsave->header.xstate_bv; - memcpy(env->bnd_regs, &xsave->bndreg_state.bnd_regs, - sizeof env->bnd_regs); - env->bndcs_regs = xsave->bndcsr_state.bndcsr; - memcpy(env->opmask_regs, &xsave->opmask_state.opmask_regs, - sizeof env->opmask_regs); + env->fpip = legacy->fpip; + env->fpdp = legacy->fpdp; + env->mxcsr = legacy->mxcsr; + memcpy(env->fpregs, &legacy->fpregs, + sizeof(env->fpregs)); for (i = 0; i < CPU_NB_REGS; i++) { - const uint8_t *xmm = xsave->legacy.xmm_regs[i]; - const uint8_t *ymmh = xsave->avx_state.ymmh[i]; - const uint8_t *zmmh = xsave->zmm_hi256_state.zmm_hi256[i]; + const uint8_t *xmm = legacy->xmm_regs[i]; + env->xmm_regs[i].ZMM_Q(0) = ldq_p(xmm); - env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm+8); - env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh); - env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh+8); - env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh); - env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh+8); - env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh+16); - env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh+24); + env->xmm_regs[i].ZMM_Q(1) = ldq_p(xmm + 8); + } + + env->xstate_bv = header->xstate_bv; + + e = &x86_ext_save_areas[XSTATE_YMM_BIT]; + if (e->size && e->offset) { + const XSaveAVX *avx; + + avx = buf + e->offset; + for (i = 0; i < CPU_NB_REGS; i++) { + const uint8_t *ymmh = avx->ymmh[i]; + + env->xmm_regs[i].ZMM_Q(2) = ldq_p(ymmh); + env->xmm_regs[i].ZMM_Q(3) = ldq_p(ymmh + 8); + } + } + + e = &x86_ext_save_areas[XSTATE_BNDREGS_BIT]; + if (e->size && e->offset) { + const XSaveBNDREG *bndreg; + const XSaveBNDCSR *bndcsr; + + f = &x86_ext_save_areas[XSTATE_BNDCSR_BIT]; + assert(f->size); + assert(f->offset); + + bndreg = buf + e->offset; + bndcsr = buf + f->offset; + + memcpy(env->bnd_regs, &bndreg->bnd_regs, + sizeof(env->bnd_regs)); + env->bndcs_regs = bndcsr->bndcsr; } + e = &x86_ext_save_areas[XSTATE_OPMASK_BIT]; + if (e->size && e->offset) { + const XSaveOpmask *opmask; + const XSaveZMM_Hi256 *zmm_hi256; #ifdef TARGET_X86_64 - memcpy(&env->xmm_regs[16], &xsave->hi16_zmm_state.hi16_zmm, - 16 * sizeof env->xmm_regs[16]); - memcpy(&env->pkru, &xsave->pkru_state, sizeof env->pkru); + const XSaveHi16_ZMM *hi16_zmm; #endif + f = &x86_ext_save_areas[XSTATE_ZMM_Hi256_BIT]; + assert(f->size); + assert(f->offset); + + g = &x86_ext_save_areas[XSTATE_Hi16_ZMM_BIT]; + assert(g->size); + assert(g->offset); + + opmask = buf + e->offset; + zmm_hi256 = buf + f->offset; +#ifdef TARGET_X86_64 + hi16_zmm = buf + g->offset; +#endif + + memcpy(env->opmask_regs, &opmask->opmask_regs, + sizeof(env->opmask_regs)); + + for (i = 0; i < CPU_NB_REGS; i++) { + const uint8_t *zmmh = zmm_hi256->zmm_hi256[i]; + + env->xmm_regs[i].ZMM_Q(4) = ldq_p(zmmh); + env->xmm_regs[i].ZMM_Q(5) = ldq_p(zmmh + 8); + env->xmm_regs[i].ZMM_Q(6) = ldq_p(zmmh + 16); + env->xmm_regs[i].ZMM_Q(7) = ldq_p(zmmh + 24); + } + +#ifdef TARGET_X86_64 + memcpy(&env->xmm_regs[16], &hi16_zmm->hi16_zmm, + 16 * sizeof(env->xmm_regs[16])); +#endif + } + +#ifdef TARGET_X86_64 + e = &x86_ext_save_areas[XSTATE_PKRU_BIT]; + if (e->size && e->offset) { + const XSavePKRU *pkru; + + pkru = buf + e->offset; + memcpy(&env->pkru, pkru, sizeof(env->pkru)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_CFG_BIT]; + if (e->size && e->offset) { + const XSaveXTILECFG *tilecfg = buf + e->offset; + + memcpy(&env->xtilecfg, tilecfg, sizeof(env->xtilecfg)); + } + + e = &x86_ext_save_areas[XSTATE_XTILE_DATA_BIT]; + if (e->size && e->offset && buflen >= e->size + e->offset) { + const XSaveXTILEDATA *tiledata = buf + e->offset; + + memcpy(&env->xtiledata, tiledata, sizeof(env->xtiledata)); + } +#endif } |