diff options
Diffstat (limited to 'target/ppc/kvm.c')
-rw-r--r-- | target/ppc/kvm.c | 782 |
1 files changed, 469 insertions, 313 deletions
diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index ebbb48c42f..8231feb2d4 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -21,24 +21,22 @@ #include <linux/kvm.h> -#include "qemu-common.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "cpu.h" #include "cpu-models.h" #include "qemu/timer.h" -#include "sysemu/sysemu.h" #include "sysemu/hw_accel.h" #include "kvm_ppc.h" #include "sysemu/cpus.h" #include "sysemu/device_tree.h" #include "mmu-hash64.h" -#include "hw/sysbus.h" #include "hw/ppc/spapr.h" -#include "hw/ppc/spapr_vio.h" #include "hw/ppc/spapr_cpu_core.h" +#include "hw/hw.h" #include "hw/ppc/ppc.h" +#include "migration/qemu-file-types.h" #include "sysemu/watchdog.h" #include "trace.h" #include "exec/gdbstub.h" @@ -46,28 +44,21 @@ #include "exec/ram_addr.h" #include "sysemu/hostmem.h" #include "qemu/cutils.h" +#include "qemu/main-loop.h" #include "qemu/mmap-alloc.h" #include "elf.h" #include "sysemu/kvm_int.h" -//#define DEBUG_KVM - -#ifdef DEBUG_KVM -#define DPRINTF(fmt, ...) \ - do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0) -#else -#define DPRINTF(fmt, ...) \ - do { } while (0) -#endif - #define PROC_DEVTREE_CPU "/proc/device-tree/cpus/" +#define DEBUG_RETURN_GUEST 0 +#define DEBUG_RETURN_GDB 1 + const KVMCapabilityInfo kvm_arch_required_capabilities[] = { KVM_CAP_LAST_INFO }; -static int cap_interrupt_unset = false; -static int cap_interrupt_level = false; +static int cap_interrupt_unset; static int cap_segstate; static int cap_booke_sregs; static int cap_ppc_smt; @@ -86,50 +77,45 @@ static int cap_fixup_hcalls; static int cap_htm; /* Hardware transactional memory support */ static int cap_mmu_radix; static int cap_mmu_hash_v3; +static int cap_xive; static int cap_resize_hpt; static int cap_ppc_pvr_compat; static int cap_ppc_safe_cache; static int cap_ppc_safe_bounds_check; static int cap_ppc_safe_indirect_branch; +static int cap_ppc_count_cache_flush_assist; static int cap_ppc_nested_kvm_hv; +static int cap_large_decr; +static int cap_fwnmi; +static int cap_rpt_invalidate; +static int cap_ail_mode_3; static uint32_t debug_inst_opcode; -/* XXX We have a race condition where we actually have a level triggered - * interrupt, but the infrastructure can't expose that yet, so the guest - * takes but ignores it, goes to sleep and never gets notified that there's - * still an interrupt pending. - * - * As a quick workaround, let's just wake up again 20 ms after we injected - * an interrupt. That way we can assure that we're always reinjecting - * interrupts in case the guest swallowed them. - */ -static QEMUTimer *idle_timer; - -static void kvm_kick_cpu(void *opaque) -{ - PowerPCCPU *cpu = opaque; - - qemu_cpu_kick(CPU(cpu)); -} - -/* Check whether we are running with KVM-PR (instead of KVM-HV). This +/* + * Check whether we are running with KVM-PR (instead of KVM-HV). This * should only be used for fallback tests - generally we should use * explicit capabilities for the features we want, rather than - * assuming what is/isn't available depending on the KVM variant. */ + * assuming what is/isn't available depending on the KVM variant. + */ static bool kvmppc_is_pr(KVMState *ks) { /* Assume KVM-PR if the GET_PVINFO capability is available */ return kvm_vm_check_extension(ks, KVM_CAP_PPC_GET_PVINFO) != 0; } -static int kvm_ppc_register_host_cpu_type(MachineState *ms); +static int kvm_ppc_register_host_cpu_type(void); static void kvmppc_get_cpu_characteristics(KVMState *s); +static int kvmppc_get_dec_bits(void); + +int kvm_arch_get_default_type(MachineState *ms) +{ + return 0; +} int kvm_arch_init(MachineState *ms, KVMState *s) { cap_interrupt_unset = kvm_check_extension(s, KVM_CAP_PPC_UNSET_IRQ); - cap_interrupt_level = kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL); cap_segstate = kvm_check_extension(s, KVM_CAP_PPC_SEGSTATE); cap_booke_sregs = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_SREGS); cap_ppc_smt_possible = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT_POSSIBLE); @@ -141,36 +127,44 @@ int kvm_arch_init(MachineState *ms, KVMState *s) cap_hior = kvm_check_extension(s, KVM_CAP_PPC_HIOR); cap_epr = kvm_check_extension(s, KVM_CAP_PPC_EPR); cap_ppc_watchdog = kvm_check_extension(s, KVM_CAP_PPC_BOOKE_WATCHDOG); - /* Note: we don't set cap_papr here, because this capability is - * only activated after this by kvmppc_set_papr() */ + /* + * Note: we don't set cap_papr here, because this capability is + * only activated after this by kvmppc_set_papr() + */ cap_htab_fd = kvm_vm_check_extension(s, KVM_CAP_PPC_HTAB_FD); cap_fixup_hcalls = kvm_check_extension(s, KVM_CAP_PPC_FIXUP_HCALL); cap_ppc_smt = kvm_vm_check_extension(s, KVM_CAP_PPC_SMT); cap_htm = kvm_vm_check_extension(s, KVM_CAP_PPC_HTM); cap_mmu_radix = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_RADIX); cap_mmu_hash_v3 = kvm_vm_check_extension(s, KVM_CAP_PPC_MMU_HASH_V3); + cap_xive = kvm_vm_check_extension(s, KVM_CAP_PPC_IRQ_XIVE); cap_resize_hpt = kvm_vm_check_extension(s, KVM_CAP_SPAPR_RESIZE_HPT); kvmppc_get_cpu_characteristics(s); cap_ppc_nested_kvm_hv = kvm_vm_check_extension(s, KVM_CAP_PPC_NESTED_HV); + cap_large_decr = kvmppc_get_dec_bits(); + cap_fwnmi = kvm_vm_check_extension(s, KVM_CAP_PPC_FWNMI); /* * Note: setting it to false because there is not such capability * in KVM at this moment. * * TODO: call kvm_vm_check_extension() with the right capability - * after the kernel starts implementing it.*/ + * after the kernel starts implementing it. + */ cap_ppc_pvr_compat = false; - if (!cap_interrupt_level) { - fprintf(stderr, "KVM: Couldn't find level irq capability. Expect the " - "VM to stall at times!\n"); + if (!kvm_check_extension(s, KVM_CAP_PPC_IRQ_LEVEL)) { + error_report("KVM: Host kernel doesn't have level irq capability"); + exit(1); } - kvm_ppc_register_host_cpu_type(ms); + cap_rpt_invalidate = kvm_vm_check_extension(s, KVM_CAP_PPC_RPT_INVALIDATE); + cap_ail_mode_3 = kvm_vm_check_extension(s, KVM_CAP_PPC_AIL_MODE_3); + kvm_ppc_register_host_cpu_type(); return 0; } -int kvm_arch_irqchip_create(MachineState *ms, KVMState *s) +int kvm_arch_irqchip_create(KVMState *s) { return 0; } @@ -183,10 +177,13 @@ static int kvm_arch_sync_sregs(PowerPCCPU *cpu) int ret; if (cenv->excp_model == POWERPC_EXCP_BOOKE) { - /* What we're really trying to say is "if we're on BookE, we use - the native PVR for now". This is the only sane way to check - it though, so we potentially confuse users that they can run - BookE guests on BookS. Let's hope nobody dares enough :) */ + /* + * What we're really trying to say is "if we're on BookE, we + * use the native PVR for now". This is the only sane way to + * check it though, so we potentially confuse users that they + * can run BookE guests on BookS. Let's hope nobody dares + * enough :) + */ return 0; } else { if (!cap_segstate) { @@ -271,11 +268,11 @@ static void kvm_get_smmu_info(struct kvm_ppc_smmu_info *info, Error **errp) "KVM failed to provide the MMU features it supports"); } -struct ppc_radix_page_info *kvm_get_radix_page_info(void) +static struct ppc_radix_page_info *kvmppc_get_radix_page_info(void) { - KVMState *s = KVM_STATE(current_machine->accelerator); + KVMState *s = KVM_STATE(current_accel()); struct ppc_radix_page_info *radix_page_info; - struct kvm_ppc_rmmu_info rmmu_info; + struct kvm_ppc_rmmu_info rmmu_info = { }; int i; if (!kvm_check_extension(s, KVM_CAP_PPC_MMU_RADIX)) { @@ -418,13 +415,15 @@ void kvm_check_mmu(PowerPCCPU *cpu, Error **errp) } if (ppc_hash64_has(cpu, PPC_HASH64_CI_LARGEPAGE)) { - /* Mostly what guest pagesizes we can use are related to the + /* + * Mostly what guest pagesizes we can use are related to the * host pages used to map guest RAM, which is handled in the * platform code. Cache-Inhibited largepages (64k) however are * used for I/O, so if they're mapped to the host at all it * will be a normal mapping, not a special hugepage one used - * for RAM. */ - if (getpagesize() < 0x10000) { + * for RAM. + */ + if (qemu_real_host_page_size() < 0x10000) { error_setg(errp, "KVM can't supply 64kiB CI pages, which guest expects"); } @@ -437,9 +436,9 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu) return POWERPC_CPU(cpu)->vcpu_id; } -/* e500 supports 2 h/w breakpoint and 2 watchpoint. - * book3s supports only 1 watchpoint, so array size - * of 4 is sufficient for now. +/* + * e500 supports 2 h/w breakpoint and 2 watchpoint. book3s supports + * only 1 watchpoint, so array size of 4 is sufficient for now. */ #define MAX_HW_BKPTS 4 @@ -485,8 +484,6 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } - idle_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL, kvm_kick_cpu, cpu); - switch (cenv->mmu_model) { case POWERPC_MMU_BOOKE206: /* This target supports access to KVM's guest TLB */ @@ -494,9 +491,12 @@ int kvm_arch_init_vcpu(CPUState *cs) break; case POWERPC_MMU_2_07: if (!cap_htm && !kvmppc_is_pr(cs->kvm_state)) { - /* KVM-HV has transactional memory on POWER8 also without the - * KVM_CAP_PPC_HTM extension, so enable it here instead as - * long as it's availble to userspace on the host. */ + /* + * KVM-HV has transactional memory on POWER8 also without + * the KVM_CAP_PPC_HTM extension, so enable it here + * instead as long as it's available to userspace on the + * host. + */ if (qemu_getauxval(AT_HWCAP2) & PPC_FEATURE2_HAS_HTM) { cap_htm = true; } @@ -512,6 +512,11 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } +int kvm_arch_destroy_vcpu(CPUState *cs) +{ + return 0; +} + static void kvm_sw_tlb_put(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; @@ -541,12 +546,12 @@ static void kvm_sw_tlb_put(PowerPCCPU *cpu) static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); + /* Init 'val' to avoid "uninitialised value" Valgrind warnings */ union { uint32_t u32; uint64_t u64; - } val; + } val = { }; struct kvm_one_reg reg = { .id = id, .addr = (uintptr_t) &val, @@ -575,8 +580,7 @@ static void kvm_get_one_spr(CPUState *cs, uint64_t id, int spr) static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); union { uint32_t u32; uint64_t u64; @@ -609,8 +613,7 @@ static void kvm_put_one_spr(CPUState *cs, uint64_t id, int spr) static int kvm_put_fp(CPUState *cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); struct kvm_one_reg reg; int i; int ret; @@ -623,16 +626,16 @@ static int kvm_put_fp(CPUState *cs) reg.addr = (uintptr_t)&fpscr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set FPSCR to KVM: %s\n", strerror(errno)); + trace_kvm_failed_fpscr_set(strerror(errno)); return ret; } for (i = 0; i < 32; i++) { uint64_t vsr[2]; - uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); - uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); + uint64_t *fpr = cpu_fpr_ptr(env, i); + uint64_t *vsrl = cpu_vsrl_ptr(env, i); -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN vsr[0] = float64_val(*fpr); vsr[1] = *vsrl; #else @@ -644,8 +647,8 @@ static int kvm_put_fp(CPUState *cs) ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set %s%d to KVM: %s\n", vsx ? "VSR" : "FPR", - i, strerror(errno)); + trace_kvm_failed_fp_set(vsx ? "VSR" : "FPR", i, + strerror(errno)); return ret; } } @@ -656,7 +659,7 @@ static int kvm_put_fp(CPUState *cs) reg.addr = (uintptr_t)&env->vscr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set VSCR to KVM: %s\n", strerror(errno)); + trace_kvm_failed_vscr_set(strerror(errno)); return ret; } @@ -665,7 +668,7 @@ static int kvm_put_fp(CPUState *cs) reg.addr = (uintptr_t)cpu_avr_ptr(env, i); ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set VR%d to KVM: %s\n", i, strerror(errno)); + trace_kvm_failed_vr_set(i, strerror(errno)); return ret; } } @@ -676,8 +679,7 @@ static int kvm_put_fp(CPUState *cs) static int kvm_get_fp(CPUState *cs) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; + CPUPPCState *env = cpu_env(cs); struct kvm_one_reg reg; int i; int ret; @@ -690,7 +692,7 @@ static int kvm_get_fp(CPUState *cs) reg.addr = (uintptr_t)&fpscr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get FPSCR from KVM: %s\n", strerror(errno)); + trace_kvm_failed_fpscr_get(strerror(errno)); return ret; } else { env->fpscr = fpscr; @@ -698,19 +700,19 @@ static int kvm_get_fp(CPUState *cs) for (i = 0; i < 32; i++) { uint64_t vsr[2]; - uint64_t *fpr = cpu_fpr_ptr(&cpu->env, i); - uint64_t *vsrl = cpu_vsrl_ptr(&cpu->env, i); + uint64_t *fpr = cpu_fpr_ptr(env, i); + uint64_t *vsrl = cpu_vsrl_ptr(env, i); reg.addr = (uintptr_t) &vsr; reg.id = vsx ? KVM_REG_PPC_VSR(i) : KVM_REG_PPC_FPR(i); ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get %s%d from KVM: %s\n", - vsx ? "VSR" : "FPR", i, strerror(errno)); + trace_kvm_failed_fp_get(vsx ? "VSR" : "FPR", i, + strerror(errno)); return ret; } else { -#ifdef HOST_WORDS_BIGENDIAN +#if HOST_BIG_ENDIAN *fpr = vsr[0]; if (vsx) { *vsrl = vsr[1]; @@ -730,7 +732,7 @@ static int kvm_get_fp(CPUState *cs) reg.addr = (uintptr_t)&env->vscr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get VSCR from KVM: %s\n", strerror(errno)); + trace_kvm_failed_vscr_get(strerror(errno)); return ret; } @@ -739,8 +741,7 @@ static int kvm_get_fp(CPUState *cs) reg.addr = (uintptr_t)cpu_avr_ptr(env, i); ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get VR%d from KVM: %s\n", - i, strerror(errno)); + trace_kvm_failed_vr_get(i, strerror(errno)); return ret; } } @@ -753,7 +754,7 @@ static int kvm_get_fp(CPUState *cs) static int kvm_get_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; @@ -761,7 +762,7 @@ static int kvm_get_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get VPA address from KVM: %s\n", strerror(errno)); + trace_kvm_failed_vpa_addr_get(strerror(errno)); return ret; } @@ -771,8 +772,7 @@ static int kvm_get_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get SLB shadow state from KVM: %s\n", - strerror(errno)); + trace_kvm_failed_slb_get(strerror(errno)); return ret; } @@ -782,8 +782,7 @@ static int kvm_get_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; ret = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to get dispatch trace log state from KVM: %s\n", - strerror(errno)); + trace_kvm_failed_dtl_get(strerror(errno)); return ret; } @@ -793,14 +792,16 @@ static int kvm_get_vpa(CPUState *cs) static int kvm_put_vpa(CPUState *cs) { PowerPCCPU *cpu = POWERPC_CPU(cs); - sPAPRCPUState *spapr_cpu = spapr_cpu_state(cpu); + SpaprCpuState *spapr_cpu = spapr_cpu_state(cpu); struct kvm_one_reg reg; int ret; - /* SLB shadow or DTL can't be registered unless a master VPA is + /* + * SLB shadow or DTL can't be registered unless a master VPA is * registered. That means when restoring state, if a VPA *is* * registered, we need to set that up first. If not, we need to - * deregister the others before deregistering the master VPA */ + * deregister the others before deregistering the master VPA + */ assert(spapr_cpu->vpa_addr || !(spapr_cpu->slb_shadow_addr || spapr_cpu->dtl_addr)); @@ -809,7 +810,7 @@ static int kvm_put_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); + trace_kvm_failed_vpa_addr_set(strerror(errno)); return ret; } } @@ -820,7 +821,7 @@ static int kvm_put_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->slb_shadow_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set SLB shadow state to KVM: %s\n", strerror(errno)); + trace_kvm_failed_slb_set(strerror(errno)); return ret; } @@ -830,8 +831,7 @@ static int kvm_put_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->dtl_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set dispatch trace log state to KVM: %s\n", - strerror(errno)); + trace_kvm_failed_dtl_set(strerror(errno)); return ret; } @@ -840,7 +840,7 @@ static int kvm_put_vpa(CPUState *cs) reg.addr = (uintptr_t)&spapr_cpu->vpa_addr; ret = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); if (ret < 0) { - DPRINTF("Unable to set VPA address to KVM: %s\n", strerror(errno)); + trace_kvm_failed_null_vpa_addr_set(strerror(errno)); return ret; } } @@ -852,7 +852,7 @@ static int kvm_put_vpa(CPUState *cs) int kvmppc_put_books_sregs(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; - struct kvm_sregs sregs; + struct kvm_sregs sregs = { }; int i; sregs.pvr = env->spr[SPR_PVR]; @@ -926,17 +926,16 @@ int kvm_arch_put_registers(CPUState *cs, int level) regs.pid = env->spr[SPR_BOOKE_PID]; - for (i = 0;i < 32; i++) + for (i = 0; i < 32; i++) { regs.gpr[i] = env->gpr[i]; - - regs.cr = 0; - for (i = 0; i < 8; i++) { - regs.cr |= (env->crf[i] & 15) << (4 * (7 - i)); } + regs.cr = ppc_get_cr(env); + ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, ®s); - if (ret < 0) + if (ret < 0) { return ret; + } kvm_put_fp(cs); @@ -957,12 +956,12 @@ int kvm_arch_put_registers(CPUState *cs, int level) } if (cap_one_reg) { - int i; - - /* We deliberately ignore errors here, for kernels which have + /* + * We deliberately ignore errors here, for kernels which have * the ONE_REG calls, but don't support the specific * registers, there's a reasonable chance things will still - * work, at least until we try to migrate. */ + * work, at least until we try to migrate. + */ for (i = 0; i < 1024; i++) { uint64_t id = env->spr_cb[i].one_reg_id; @@ -972,7 +971,7 @@ int kvm_arch_put_registers(CPUState *cs, int level) } #ifdef TARGET_PPC64 - if (msr_ts) { + if (FIELD_EX64(env->msr, MSR, TS)) { for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { kvm_set_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); } @@ -993,11 +992,15 @@ int kvm_arch_put_registers(CPUState *cs, int level) if (cap_papr) { if (kvm_put_vpa(cs) < 0) { - DPRINTF("Warning: Unable to set VPA information to KVM\n"); + trace_kvm_failed_put_vpa(); } } kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); + + if (level > KVM_PUT_RUNTIME_STATE) { + kvm_put_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES); + } #endif /* TARGET_PPC64 */ } @@ -1200,19 +1203,14 @@ int kvm_arch_get_registers(CPUState *cs) PowerPCCPU *cpu = POWERPC_CPU(cs); CPUPPCState *env = &cpu->env; struct kvm_regs regs; - uint32_t cr; int i, ret; ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, ®s); - if (ret < 0) + if (ret < 0) { return ret; - - cr = regs.cr; - for (i = 7; i >= 0; i--) { - env->crf[i] = cr & 15; - cr >>= 4; } + ppc_set_cr(env, regs.cr); env->ctr = regs.ctr; env->lr = regs.lr; cpu_write_xer(env, regs.xer); @@ -1233,8 +1231,9 @@ int kvm_arch_get_registers(CPUState *cs) env->spr[SPR_BOOKE_PID] = regs.pid; - for (i = 0;i < 32; i++) + for (i = 0; i < 32; i++) { env->gpr[i] = regs.gpr[i]; + } kvm_get_fp(cs); @@ -1257,12 +1256,12 @@ int kvm_arch_get_registers(CPUState *cs) } if (cap_one_reg) { - int i; - - /* We deliberately ignore errors here, for kernels which have + /* + * We deliberately ignore errors here, for kernels which have * the ONE_REG calls, but don't support the specific * registers, there's a reasonable chance things will still - * work, at least until we try to migrate. */ + * work, at least until we try to migrate. + */ for (i = 0; i < 1024; i++) { uint64_t id = env->spr_cb[i].one_reg_id; @@ -1272,7 +1271,7 @@ int kvm_arch_get_registers(CPUState *cs) } #ifdef TARGET_PPC64 - if (msr_ts) { + if (FIELD_EX64(env->msr, MSR, TS)) { for (i = 0; i < ARRAY_SIZE(env->tm_gpr); i++) { kvm_get_one_reg(cs, KVM_REG_PPC_TM_GPR(i), &env->tm_gpr[i]); } @@ -1293,11 +1292,12 @@ int kvm_arch_get_registers(CPUState *cs) if (cap_papr) { if (kvm_get_vpa(cs) < 0) { - DPRINTF("Warning: Unable to get VPA information from KVM\n"); + trace_kvm_failed_get_vpa(); } } kvm_get_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &env->tb_env->tb_offset); + kvm_get_one_spr(cs, KVM_REG_PPC_DPDES, SPR_DPDES); #endif } @@ -1312,7 +1312,7 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) return 0; } - if (!kvm_enabled() || !cap_interrupt_unset || !cap_interrupt_level) { + if (!cap_interrupt_unset) { return 0; } @@ -1321,50 +1321,9 @@ int kvmppc_set_interrupt(PowerPCCPU *cpu, int irq, int level) return 0; } -#if defined(TARGET_PPC64) -#define PPC_INPUT_INT PPC970_INPUT_INT -#else -#define PPC_INPUT_INT PPC6xx_INPUT_INT -#endif - void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run) { - PowerPCCPU *cpu = POWERPC_CPU(cs); - CPUPPCState *env = &cpu->env; - int r; - unsigned irq; - - qemu_mutex_lock_iothread(); - - /* PowerPC QEMU tracks the various core input pins (interrupt, critical - * interrupt, reset, etc) in PPC-specific env->irq_input_state. */ - if (!cap_interrupt_level && - run->ready_for_interrupt_injection && - (cs->interrupt_request & CPU_INTERRUPT_HARD) && - (env->irq_input_state & (1<<PPC_INPUT_INT))) - { - /* For now KVM disregards the 'irq' argument. However, in the - * future KVM could cache it in-kernel to avoid a heavyweight exit - * when reading the UIC. - */ - irq = KVM_INTERRUPT_SET; - - DPRINTF("injected interrupt %d\n", irq); - r = kvm_vcpu_ioctl(cs, KVM_INTERRUPT, &irq); - if (r < 0) { - printf("cpu %d fail inject %x\n", cs->cpu_index, irq); - } - - /* Always wake up soon in case the interrupt was level based */ - timer_mod(idle_timer, qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + - (NANOSECONDS_PER_SECOND / 50)); - } - - /* We don't know if there are more interrupts pending after this. However, - * the guest will return to userspace in the course of handling this one - * anyways, so we will get a chance to deliver the rest. */ - - qemu_mutex_unlock_iothread(); + return; } MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run) @@ -1382,7 +1341,8 @@ static int kvmppc_handle_halt(PowerPCCPU *cpu) CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; - if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && (msr_ee)) { + if (!(cs->interrupt_request & CPU_INTERRUPT_HARD) && + FIELD_EX64(env->msr, MSR, EE)) { cs->halted = 1; cs->exception_index = EXCP_HLT; } @@ -1391,18 +1351,22 @@ static int kvmppc_handle_halt(PowerPCCPU *cpu) } /* map dcr access to existing qemu dcr emulation */ -static int kvmppc_handle_dcr_read(CPUPPCState *env, uint32_t dcrn, uint32_t *data) +static int kvmppc_handle_dcr_read(CPUPPCState *env, + uint32_t dcrn, uint32_t *data) { - if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) + if (ppc_dcr_read(env->dcr_env, dcrn, data) < 0) { fprintf(stderr, "Read to unhandled DCR (0x%x)\n", dcrn); + } return 0; } -static int kvmppc_handle_dcr_write(CPUPPCState *env, uint32_t dcrn, uint32_t data) +static int kvmppc_handle_dcr_write(CPUPPCState *env, + uint32_t dcrn, uint32_t data) { - if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) + if (ppc_dcr_write(env->dcr_env, dcrn, data) < 0) { fprintf(stderr, "Write to unhandled DCR (0x%x)\n", dcrn); + } return 0; } @@ -1477,15 +1441,15 @@ static int find_hw_watchpoint(target_ulong addr, int *flag) return -1; } -int kvm_arch_insert_hw_breakpoint(target_ulong addr, - target_ulong len, int type) +int kvm_arch_insert_hw_breakpoint(vaddr addr, vaddr len, int type) { - if ((nb_hw_breakpoint + nb_hw_watchpoint) >= ARRAY_SIZE(hw_debug_points)) { + const unsigned breakpoint_index = nb_hw_breakpoint + nb_hw_watchpoint; + if (breakpoint_index >= ARRAY_SIZE(hw_debug_points)) { return -ENOBUFS; } - hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].addr = addr; - hw_debug_points[nb_hw_breakpoint + nb_hw_watchpoint].type = type; + hw_debug_points[breakpoint_index].addr = addr; + hw_debug_points[breakpoint_index].type = type; switch (type) { case GDB_BREAKPOINT_HW: @@ -1521,8 +1485,7 @@ int kvm_arch_insert_hw_breakpoint(target_ulong addr, return 0; } -int kvm_arch_remove_hw_breakpoint(target_ulong addr, - target_ulong len, int type) +int kvm_arch_remove_hw_breakpoint(vaddr addr, vaddr len, int type) { int n; @@ -1594,70 +1557,93 @@ void kvm_arch_update_guest_debug(CPUState *cs, struct kvm_guest_debug *dbg) } } +static int kvm_handle_hw_breakpoint(CPUState *cs, + struct kvm_debug_exit_arch *arch_info) +{ + int handle = DEBUG_RETURN_GUEST; + int n; + int flag = 0; + + if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { + if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { + n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); + if (n >= 0) { + handle = DEBUG_RETURN_GDB; + } + } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | + KVMPPC_DEBUG_WATCH_WRITE)) { + n = find_hw_watchpoint(arch_info->address, &flag); + if (n >= 0) { + handle = DEBUG_RETURN_GDB; + cs->watchpoint_hit = &hw_watchpoint; + hw_watchpoint.vaddr = hw_debug_points[n].addr; + hw_watchpoint.flags = flag; + } + } + } + return handle; +} + +static int kvm_handle_singlestep(void) +{ + return DEBUG_RETURN_GDB; +} + +static int kvm_handle_sw_breakpoint(void) +{ + return DEBUG_RETURN_GDB; +} + static int kvm_handle_debug(PowerPCCPU *cpu, struct kvm_run *run) { CPUState *cs = CPU(cpu); CPUPPCState *env = &cpu->env; struct kvm_debug_exit_arch *arch_info = &run->debug.arch; - int handle = 0; - int n; - int flag = 0; if (cs->singlestep_enabled) { - handle = 1; - } else if (arch_info->status) { - if (nb_hw_breakpoint + nb_hw_watchpoint > 0) { - if (arch_info->status & KVMPPC_DEBUG_BREAKPOINT) { - n = find_hw_breakpoint(arch_info->address, GDB_BREAKPOINT_HW); - if (n >= 0) { - handle = 1; - } - } else if (arch_info->status & (KVMPPC_DEBUG_WATCH_READ | - KVMPPC_DEBUG_WATCH_WRITE)) { - n = find_hw_watchpoint(arch_info->address, &flag); - if (n >= 0) { - handle = 1; - cs->watchpoint_hit = &hw_watchpoint; - hw_watchpoint.vaddr = hw_debug_points[n].addr; - hw_watchpoint.flags = flag; - } - } - } - } else if (kvm_find_sw_breakpoint(cs, arch_info->address)) { - handle = 1; - } else { - /* QEMU is not able to handle debug exception, so inject - * program exception to guest; - * Yes program exception NOT debug exception !! - * When QEMU is using debug resources then debug exception must - * be always set. To achieve this we set MSR_DE and also set - * MSRP_DEP so guest cannot change MSR_DE. - * When emulating debug resource for guest we want guest - * to control MSR_DE (enable/disable debug interrupt on need). - * Supporting both configurations are NOT possible. - * So the result is that we cannot share debug resources - * between QEMU and Guest on BOOKE architecture. - * In the current design QEMU gets the priority over guest, - * this means that if QEMU is using debug resources then guest - * cannot use them; - * For software breakpoint QEMU uses a privileged instruction; - * So there cannot be any reason that we are here for guest - * set debug exception, only possibility is guest executed a - * privileged / illegal instruction and that's why we are - * injecting a program interrupt. - */ + return kvm_handle_singlestep(); + } - cpu_synchronize_state(cs); - /* env->nip is PC, so increment this by 4 to use - * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. - */ - env->nip += 4; - cs->exception_index = POWERPC_EXCP_PROGRAM; - env->error_code = POWERPC_EXCP_INVAL; - ppc_cpu_do_interrupt(cs); + if (arch_info->status) { + return kvm_handle_hw_breakpoint(cs, arch_info); } - return handle; + if (kvm_find_sw_breakpoint(cs, arch_info->address)) { + return kvm_handle_sw_breakpoint(); + } + + /* + * QEMU is not able to handle debug exception, so inject + * program exception to guest; + * Yes program exception NOT debug exception !! + * When QEMU is using debug resources then debug exception must + * be always set. To achieve this we set MSR_DE and also set + * MSRP_DEP so guest cannot change MSR_DE. + * When emulating debug resource for guest we want guest + * to control MSR_DE (enable/disable debug interrupt on need). + * Supporting both configurations are NOT possible. + * So the result is that we cannot share debug resources + * between QEMU and Guest on BOOKE architecture. + * In the current design QEMU gets the priority over guest, + * this means that if QEMU is using debug resources then guest + * cannot use them; + * For software breakpoint QEMU uses a privileged instruction; + * So there cannot be any reason that we are here for guest + * set debug exception, only possibility is guest executed a + * privileged / illegal instruction and that's why we are + * injecting a program interrupt. + */ + cpu_synchronize_state(cs); + /* + * env->nip is PC, so increment this by 4 to use + * ppc_cpu_do_interrupt(), which set srr0 = env->nip - 4. + */ + env->nip += 4; + cs->exception_index = POWERPC_EXCP_PROGRAM; + env->error_code = POWERPC_EXCP_INVAL; + ppc_cpu_do_interrupt(cs); + + return DEBUG_RETURN_GUEST; } int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) @@ -1666,25 +1652,25 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) CPUPPCState *env = &cpu->env; int ret; - qemu_mutex_lock_iothread(); + bql_lock(); switch (run->exit_reason) { case KVM_EXIT_DCR: if (run->dcr.is_write) { - DPRINTF("handle dcr write\n"); + trace_kvm_handle_dcr_write(); ret = kvmppc_handle_dcr_write(env, run->dcr.dcrn, run->dcr.data); } else { - DPRINTF("handle dcr read\n"); + trace_kvm_handle_dcr_read(); ret = kvmppc_handle_dcr_read(env, run->dcr.dcrn, &run->dcr.data); } break; case KVM_EXIT_HLT: - DPRINTF("handle halt\n"); + trace_kvm_handle_halt(); ret = kvmppc_handle_halt(cpu); break; #if defined(TARGET_PPC64) case KVM_EXIT_PAPR_HCALL: - DPRINTF("handle PAPR hypercall\n"); + trace_kvm_handle_papr_hcall(run->papr_hcall.nr); run->papr_hcall.ret = spapr_hypercall(cpu, run->papr_hcall.nr, run->papr_hcall.args); @@ -1692,18 +1678,18 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) break; #endif case KVM_EXIT_EPR: - DPRINTF("handle epr\n"); + trace_kvm_handle_epr(); run->epr.epr = ldl_phys(cs->as, env->mpic_iack); ret = 0; break; case KVM_EXIT_WATCHDOG: - DPRINTF("handle watchdog expiry\n"); + trace_kvm_handle_watchdog_expiry(); watchdog_perform_action(); ret = 0; break; case KVM_EXIT_DEBUG: - DPRINTF("handle debug exception\n"); + trace_kvm_handle_debug_exception(); if (kvm_handle_debug(cpu, run)) { ret = EXCP_DEBUG; break; @@ -1712,13 +1698,20 @@ int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run) ret = 0; break; +#if defined(TARGET_PPC64) + case KVM_EXIT_NMI: + trace_kvm_handle_nmi_exception(); + ret = kvm_handle_nmi(cpu, run); + break; +#endif + default: fprintf(stderr, "KVM: unknown exit reason %d\n", run->exit_reason); ret = -1; break; } - qemu_mutex_unlock_iothread(); + bql_unlock(); return ret; } @@ -1731,6 +1724,10 @@ int kvmppc_or_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1744,6 +1741,10 @@ int kvmppc_clear_tsr_bits(PowerPCCPU *cpu, uint32_t tsr_bits) .addr = (uintptr_t) &bits, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1758,6 +1759,10 @@ int kvmppc_set_tcr(PowerPCCPU *cpu) .addr = (uintptr_t) &tcr, }; + if (!kvm_enabled()) { + return 0; + } + return kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); } @@ -1806,30 +1811,44 @@ static int read_cpuinfo(const char *field, char *value, int len) ret = 0; break; } - } while(*line); + } while (*line); fclose(f); return ret; } -uint32_t kvmppc_get_tbfreq(void) +static uint32_t kvmppc_get_tbfreq_procfs(void) { char line[512]; char *ns; - uint32_t retval = NANOSECONDS_PER_SECOND; + uint32_t tbfreq_fallback = NANOSECONDS_PER_SECOND; + uint32_t tbfreq_procfs; if (read_cpuinfo("timebase", line, sizeof(line))) { - return retval; + return tbfreq_fallback; } - if (!(ns = strchr(line, ':'))) { - return retval; + ns = strchr(line, ':'); + if (!ns) { + return tbfreq_fallback; } - ns++; + tbfreq_procfs = atoi(++ns); - return atoi(ns); + /* 0 is certainly not acceptable by the guest, return fallback value */ + return tbfreq_procfs ? tbfreq_procfs : tbfreq_fallback; +} + +uint32_t kvmppc_get_tbfreq(void) +{ + static uint32_t cached_tbfreq; + + if (!cached_tbfreq) { + cached_tbfreq = kvmppc_get_tbfreq_procfs(); + } + + return cached_tbfreq; } bool kvmppc_get_host_serial(char **value) @@ -1849,7 +1868,8 @@ static int kvmppc_find_cpu_dt(char *buf, int buf_len) struct dirent *dirp; DIR *dp; - if ((dp = opendir(PROC_DEVTREE_CPU)) == NULL) { + dp = opendir(PROC_DEVTREE_CPU); + if (!dp) { printf("Can't open directory " PROC_DEVTREE_CPU "\n"); return -1; } @@ -1857,6 +1877,12 @@ static int kvmppc_find_cpu_dt(char *buf, int buf_len) buf[0] = '\0'; while ((dirp = readdir(dp)) != NULL) { FILE *f; + + /* Don't accidentally read from the current and parent directories */ + if (strcmp(dirp->d_name, ".") == 0 || strcmp(dirp->d_name, "..") == 0) { + continue; + } + snprintf(buf, buf_len, "%s%s/clock-frequency", PROC_DEVTREE_CPU, dirp->d_name); f = fopen(buf, "r"); @@ -1903,10 +1929,11 @@ static uint64_t kvmppc_read_int_dt(const char *filename) return 0; } -/* Read a CPU node property from the host device tree that's a single +/* + * Read a CPU node property from the host device tree that's a single * integer (32-bit or 64-bit). Returns 0 if anything goes wrong - * (can't find or open the property, or doesn't understand the - * format) */ + * (can't find or open the property, or doesn't understand the format) + */ static uint64_t kvmppc_read_int_cpu_dt(const char *propname) { char buf[PATH_MAX], *tmp; @@ -1928,10 +1955,19 @@ uint64_t kvmppc_get_clockfreq(void) return kvmppc_read_int_cpu_dt("clock-frequency"); } +static int kvmppc_get_dec_bits(void) +{ + int nr_bits = kvmppc_read_int_cpu_dt("ibm,dec-bits"); + + if (nr_bits > 0) { + return nr_bits; + } + return 0; +} + static int kvmppc_get_pvinfo(CPUPPCState *env, struct kvm_ppc_pvinfo *pvinfo) - { - PowerPCCPU *cpu = ppc_env_get_cpu(env); - CPUState *cs = CPU(cpu); +{ + CPUState *cs = env_cpu(env); if (kvm_vm_check_extension(cs->kvm_state, KVM_CAP_PPC_GET_PVINFO) && !kvm_vm_ioctl(cs->kvm_state, KVM_PPC_GET_PVINFO, pvinfo)) { @@ -1955,7 +1991,7 @@ int kvmppc_get_hasidle(CPUPPCState *env) int kvmppc_get_hypercall(CPUPPCState *env, uint8_t *buf, int buf_len) { - uint32_t *hc = (uint32_t*)buf; + uint32_t *hc = (uint32_t *)buf; struct kvm_ppc_pvinfo pvinfo; if (!kvmppc_get_pvinfo(env, &pvinfo)) { @@ -2008,6 +2044,16 @@ void kvmppc_enable_clear_ref_mod_hcalls(void) kvmppc_enable_hcall(kvm_state, H_CLEAR_MOD); } +void kvmppc_enable_h_page_init(void) +{ + kvmppc_enable_hcall(kvm_state, H_PAGE_INIT); +} + +void kvmppc_enable_h_rpt_invalidate(void) +{ + kvmppc_enable_hcall(kvm_state, H_RPT_INVALIDATE); +} + void kvmppc_set_papr(PowerPCCPU *cpu) { CPUState *cs = CPU(cpu); @@ -2023,8 +2069,10 @@ void kvmppc_set_papr(PowerPCCPU *cpu) exit(1); } - /* Update the capability flag so we sync the right information - * with kvm */ + /* + * Update the capability flag so we sync the right information + * with kvm + */ cap_papr = 1; } @@ -2045,6 +2093,18 @@ void kvmppc_set_mpic_proxy(PowerPCCPU *cpu, int mpic_proxy) } } +bool kvmppc_get_fwnmi(void) +{ + return cap_fwnmi; +} + +int kvmppc_set_fwnmi(PowerPCCPU *cpu) +{ + CPUState *cs = CPU(cpu); + + return kvm_vcpu_enable_cap(cs, KVM_CAP_PPC_FWNMI, 0); +} + int kvmppc_smt_threads(void) { return cap_ppc_smt ? cap_ppc_smt : 1; @@ -2061,7 +2121,7 @@ int kvmppc_set_smt_threads(int smt) return ret; } -void kvmppc_hint_smt_possible(Error **errp) +void kvmppc_error_append_smt_possible_hint(Error *const *errp) { int i; GString *g; @@ -2086,16 +2146,18 @@ void kvmppc_hint_smt_possible(Error **errp) #ifdef TARGET_PPC64 -uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) +uint64_t kvmppc_vrma_limit(unsigned int hash_shift) { struct kvm_ppc_smmu_info info; long rampagesize, best_page_shift; int i; - /* Find the largest hardware supported page size that's less than - * or equal to the (logical) backing page size of guest RAM */ + /* + * Find the largest hardware supported page size that's less than + * or equal to the (logical) backing page size of guest RAM + */ kvm_get_smmu_info(&info, &error_fatal); - rampagesize = qemu_getrampagesize(); + rampagesize = qemu_minrampagesize(); best_page_shift = 0; for (i = 0; i < KVM_PPC_PAGE_SIZES_MAX_SZ; i++) { @@ -2111,8 +2173,7 @@ uint64_t kvmppc_rma_size(uint64_t current_size, unsigned int hash_shift) } } - return MIN(current_size, - 1ULL << (best_page_shift + hash_shift - 7)); + return 1ULL << (best_page_shift + hash_shift - 7); } #endif @@ -2143,7 +2204,8 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, int fd; void *table; - /* Must set fd to -1 so we don't try to munmap when called for + /* + * Must set fd to -1 so we don't try to munmap when called for * destroying the table, which the upper layers -will- do */ *pfd = -1; @@ -2188,7 +2250,7 @@ void *kvmppc_create_spapr_tce(uint32_t liobn, uint32_t page_shift, len = nb_table * sizeof(uint64_t); /* FIXME: round this up to page size */ - table = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + table = mmap(NULL, len, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); if (table == MAP_FAILED) { fprintf(stderr, "KVM: Failed to map TCE table for liobn 0x%x\n", liobn); @@ -2231,10 +2293,12 @@ int kvmppc_reset_htab(int shift_hint) int ret; ret = kvm_vm_ioctl(kvm_state, KVM_PPC_ALLOCATE_HTAB, &shift); if (ret == -ENOTTY) { - /* At least some versions of PR KVM advertise the + /* + * At least some versions of PR KVM advertise the * capability, but don't implement the ioctl(). Oops. * Return 0 so that we allocate the htab in qemu, as is - * correct for PR. */ + * correct for PR. + */ return 0; } else if (ret < 0) { return ret; @@ -2242,9 +2306,12 @@ int kvmppc_reset_htab(int shift_hint) return shift; } - /* We have a kernel that predates the htab reset calls. For PR + /* + * We have a kernel that predates the htab reset calls. For PR * KVM, we need to allocate the htab ourselves, for an HV KVM of - * this era, it has allocated a 16MB fixed size hash table already. */ + * this era, it has allocated a 16MB fixed size hash table + * already. + */ if (kvmppc_is_pr(kvm_state)) { /* PR - tell caller to allocate htab */ return 0; @@ -2297,18 +2364,7 @@ static void kvmppc_host_cpu_class_init(ObjectClass *oc, void *data) } #if defined(TARGET_PPC64) - pcc->radix_page_info = kvm_get_radix_page_info(); - - if ((pcc->pvr & 0xffffff00) == CPU_POWERPC_POWER9_DD1) { - /* - * POWER9 DD1 has some bugs which make it not really ISA 3.00 - * compliant. More importantly, advertising ISA 3.00 - * architected mode may prevent guests from activating - * necessary DD1 workarounds. - */ - pcc->pcr_supported &= ~(PCR_COMPAT_3_00 | PCR_COMPAT_2_07 - | PCR_COMPAT_2_06 | PCR_COMPAT_2_05); - } + pcc->radix_page_info = kvmppc_get_radix_page_info(); #endif /* defined(TARGET_PPC64) */ } @@ -2380,7 +2436,13 @@ static int parse_cap_ppc_safe_bounds_check(struct kvm_ppc_cpu_char c) static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) { - if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { + if ((~c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) && + (~c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) && + (~c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED)) { + return SPAPR_CAP_FIXED_NA; + } else if (c.behaviour & c.behaviour_mask & H_CPU_BEHAV_FLUSH_COUNT_CACHE) { + return SPAPR_CAP_WORKAROUND; + } else if (c.character & c.character_mask & H_CPU_CHAR_CACHE_COUNT_DIS) { return SPAPR_CAP_FIXED_CCD; } else if (c.character & c.character_mask & H_CPU_CHAR_BCCTRL_SERIALISED) { return SPAPR_CAP_FIXED_IBS; @@ -2389,6 +2451,19 @@ static int parse_cap_ppc_safe_indirect_branch(struct kvm_ppc_cpu_char c) return 0; } +static int parse_cap_ppc_count_cache_flush_assist(struct kvm_ppc_cpu_char c) +{ + if (c.character & c.character_mask & H_CPU_CHAR_BCCTR_FLUSH_ASSIST) { + return 1; + } + return 0; +} + +bool kvmppc_has_cap_xive(void) +{ + return cap_xive; +} + static void kvmppc_get_cpu_characteristics(KVMState *s) { struct kvm_ppc_cpu_char c; @@ -2411,6 +2486,8 @@ static void kvmppc_get_cpu_characteristics(KVMState *s) cap_ppc_safe_cache = parse_cap_ppc_safe_cache(c); cap_ppc_safe_bounds_check = parse_cap_ppc_safe_bounds_check(c); cap_ppc_safe_indirect_branch = parse_cap_ppc_safe_indirect_branch(c); + cap_ppc_count_cache_flush_assist = + parse_cap_ppc_count_cache_flush_assist(c); } int kvmppc_get_cap_safe_cache(void) @@ -2428,6 +2505,11 @@ int kvmppc_get_cap_safe_indirect_branch(void) return cap_ppc_safe_indirect_branch; } +int kvmppc_get_cap_count_cache_flush_assist(void) +{ + return cap_ppc_count_cache_flush_assist; +} + bool kvmppc_has_cap_nested_kvm_hv(void) { return !!cap_ppc_nested_kvm_hv; @@ -2443,6 +2525,45 @@ bool kvmppc_has_cap_spapr_vfio(void) return cap_spapr_vfio; } +int kvmppc_get_cap_large_decr(void) +{ + return cap_large_decr; +} + +int kvmppc_enable_cap_large_decr(PowerPCCPU *cpu, int enable) +{ + CPUState *cs = CPU(cpu); + uint64_t lpcr = 0; + + kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + /* Do we need to modify the LPCR? */ + if (!!(lpcr & LPCR_LD) != !!enable) { + if (enable) { + lpcr |= LPCR_LD; + } else { + lpcr &= ~LPCR_LD; + } + kvm_set_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + kvm_get_one_reg(cs, KVM_REG_PPC_LPCR_64, &lpcr); + + if (!!(lpcr & LPCR_LD) != !!enable) { + return -1; + } + } + + return 0; +} + +int kvmppc_has_cap_rpt_invalidate(void) +{ + return cap_rpt_invalidate; +} + +bool kvmppc_supports_ail_3(void) +{ + return cap_ail_mode_3; +} + PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) { uint32_t host_pvr = mfpvr(); @@ -2456,13 +2577,19 @@ PowerPCCPUClass *kvm_ppc_get_host_cpu_class(void) return pvr_pcc; } -static int kvm_ppc_register_host_cpu_type(MachineState *ms) +static void pseries_machine_class_fixup(ObjectClass *oc, void *opaque) +{ + MachineClass *mc = MACHINE_CLASS(oc); + + mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; +} + +static int kvm_ppc_register_host_cpu_type(void) { TypeInfo type_info = { .name = TYPE_HOST_POWERPC_CPU, .class_init = kvmppc_host_cpu_class_init, }; - MachineClass *mc = MACHINE_GET_CLASS(ms); PowerPCCPUClass *pvr_pcc; ObjectClass *oc; DeviceClass *dc; @@ -2474,10 +2601,9 @@ static int kvm_ppc_register_host_cpu_type(MachineState *ms) } type_info.parent = object_class_get_name(OBJECT_CLASS(pvr_pcc)); type_register(&type_info); - if (object_dynamic_cast(OBJECT(ms), TYPE_SPAPR_MACHINE)) { - /* override TCG default cpu type with 'host' cpu model */ - mc->default_cpu_type = TYPE_HOST_POWERPC_CPU; - } + /* override TCG default cpu type with 'host' cpu model */ + object_class_foreach(pseries_machine_class_fixup, TYPE_SPAPR_MACHINE, + false, NULL); oc = object_class_by_name(type_info.name); g_assert(oc); @@ -2514,7 +2640,7 @@ int kvmppc_define_rtas_kernel_token(uint32_t token, const char *function) return -ENOENT; } - strncpy(args.name, function, sizeof(args.name)); + strncpy(args.name, function, sizeof(args.name) - 1); return kvm_vm_ioctl(kvm_state, KVM_PPC_RTAS_DEFINE_TOKEN, &args); } @@ -2547,7 +2673,7 @@ int kvmppc_get_htab_fd(bool write, uint64_t index, Error **errp) int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) { int64_t starttime = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); - uint8_t buf[bufsize]; + g_autofree uint8_t *buf = g_malloc(bufsize); ssize_t rc; do { @@ -2576,17 +2702,17 @@ int kvmppc_save_htab(QEMUFile *f, int fd, size_t bufsize, int64_t max_ns) } } } while ((rc != 0) - && ((max_ns < 0) - || ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); + && ((max_ns < 0) || + ((qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - starttime) < max_ns))); return (rc == 0) ? 1 : 0; } int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, - uint16_t n_valid, uint16_t n_invalid) + uint16_t n_valid, uint16_t n_invalid, Error **errp) { struct kvm_get_htab_header *buf; - size_t chunksize = sizeof(*buf) + n_valid*HASH_PTE_SIZE_64; + size_t chunksize = sizeof(*buf) + n_valid * HASH_PTE_SIZE_64; ssize_t rc; buf = alloca(chunksize); @@ -2594,18 +2720,17 @@ int kvmppc_load_htab_chunk(QEMUFile *f, int fd, uint32_t index, buf->n_valid = n_valid; buf->n_invalid = n_invalid; - qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64*n_valid); + qemu_get_buffer(f, (void *)(buf + 1), HASH_PTE_SIZE_64 * n_valid); rc = write(fd, buf, chunksize); if (rc < 0) { - fprintf(stderr, "Error writing KVM hash table: %s\n", - strerror(errno)); - return rc; + error_setg_errno(errp, errno, "Error writing the KVM hash table"); + return -errno; } if (rc != chunksize) { /* We should never get a short write on a single chunk */ - fprintf(stderr, "Short write, restoring KVM hash table\n"); - return -1; + error_setg(errp, "Short write while restoring the KVM hash table"); + return -ENOSPC; } return 0; } @@ -2630,9 +2755,9 @@ void kvmppc_read_hptes(ppc_hash_pte64_t *hptes, hwaddr ptex, int n) while (i < n) { struct kvm_get_htab_header *hdr; int m = n < HPTES_PER_GROUP ? n : HPTES_PER_GROUP; - char buf[sizeof(*hdr) + m * HASH_PTE_SIZE_64]; + char buf[sizeof(*hdr) + HPTES_PER_GROUP * HASH_PTE_SIZE_64]; - rc = read(fd, buf, sizeof(buf)); + rc = read(fd, buf, sizeof(*hdr) + m * HASH_PTE_SIZE_64); if (rc < 0) { hw_error("kvmppc_read_hptes: Unable to read HPTEs"); } @@ -2712,6 +2837,19 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) return data & 0xffff; } +#if defined(TARGET_PPC64) +int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) +{ + uint16_t flags = run->flags & KVM_RUN_PPC_NMI_DISP_MASK; + + cpu_synchronize_state(CPU(cpu)); + + spapr_mce_req_event(cpu, flags == KVM_RUN_PPC_NMI_DISP_FULLY_RECOV); + + return 0; +} +#endif + int kvmppc_enable_hwrng(void) { if (!kvm_enabled() || !kvm_check_extension(kvm_state, KVM_CAP_PPC_HWRNG)) { @@ -2808,3 +2946,21 @@ void kvmppc_set_reg_ppc_online(PowerPCCPU *cpu, unsigned int online) kvm_set_one_reg(cs, KVM_REG_PPC_ONLINE, &online); } } + +void kvmppc_set_reg_tb_offset(PowerPCCPU *cpu, int64_t tb_offset) +{ + CPUState *cs = CPU(cpu); + + if (kvm_enabled()) { + kvm_set_one_reg(cs, KVM_REG_PPC_TB_OFFSET, &tb_offset); + } +} + +bool kvm_arch_cpu_check_are_resettable(void) +{ + return true; +} + +void kvm_arch_accel_class_init(ObjectClass *oc) +{ +} |