aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/include/asm/cpuidle.h20
-rw-r--r--arch/powerpc/include/asm/kvm_book3s.h2
-rw-r--r--arch/powerpc/include/asm/kvm_book3s_64.h3
-rw-r--r--arch/powerpc/include/asm/kvm_host.h18
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h2
-rw-r--r--arch/powerpc/include/asm/opal.h42
-rw-r--r--arch/powerpc/include/asm/paca.h10
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h2
-rw-r--r--arch/powerpc/include/asm/processor.h3
-rw-r--r--arch/powerpc/include/asm/reg.h4
-rw-r--r--arch/powerpc/include/asm/syscall.h6
-rw-r--r--arch/powerpc/include/asm/uaccess.h6
-rw-r--r--arch/powerpc/kernel/asm-offsets.c13
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S35
-rw-r--r--arch/powerpc/kernel/idle_power7.S344
-rw-r--r--arch/powerpc/kernel/smp.c9
-rw-r--r--arch/powerpc/kvm/Kconfig1
-rw-r--r--arch/powerpc/kvm/book3s.c8
-rw-r--r--arch/powerpc/kvm/book3s_32_mmu.c5
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c224
-rw-r--r--arch/powerpc/kvm/book3s_hv.c438
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c136
-rw-r--r--arch/powerpc/kvm/book3s_hv_interrupts.S39
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c5
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c150
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c36
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S251
-rw-r--r--arch/powerpc/kvm/book3s_paired_singles.c8
-rw-r--r--arch/powerpc/kvm/book3s_pr.c5
-rw-r--r--arch/powerpc/kvm/book3s_xics.c30
-rw-r--r--arch/powerpc/kvm/book3s_xics.h1
-rw-r--r--arch/powerpc/kvm/e500.c10
-rw-r--r--arch/powerpc/kvm/powerpc.c10
-rw-r--r--arch/powerpc/kvm/trace_book3s.h32
-rw-r--r--arch/powerpc/kvm/trace_booke.h47
-rw-r--r--arch/powerpc/kvm/trace_hv.h477
-rw-r--r--arch/powerpc/kvm/trace_pr.h25
-rw-r--r--arch/powerpc/perf/hv-24x7.c23
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S39
-rw-r--r--arch/powerpc/platforms/powernv/opal.c50
-rw-r--r--arch/powerpc/platforms/powernv/powernv.h2
-rw-r--r--arch/powerpc/platforms/powernv/setup.c166
-rw-r--r--arch/powerpc/platforms/powernv/smp.c29
-rw-r--r--arch/powerpc/platforms/powernv/subcore.c34
-rw-r--r--arch/powerpc/platforms/powernv/subcore.h9
45 files changed, 1637 insertions, 1172 deletions
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
new file mode 100644
index 000000000000..d2f99ca1e3a6
--- /dev/null
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -0,0 +1,20 @@
+#ifndef _ASM_POWERPC_CPUIDLE_H
+#define _ASM_POWERPC_CPUIDLE_H
+
+#ifdef CONFIG_PPC_POWERNV
+/* Used in powernv idle state management */
+#define PNV_THREAD_RUNNING 0
+#define PNV_THREAD_NAP 1
+#define PNV_THREAD_SLEEP 2
+#define PNV_THREAD_WINKLE 3
+#define PNV_CORE_IDLE_LOCK_BIT 0x100
+#define PNV_CORE_IDLE_THREAD_BITS 0x0FF
+
+#ifndef __ASSEMBLY__
+extern u32 pnv_fastsleep_workaround_at_entry[];
+extern u32 pnv_fastsleep_workaround_at_exit[];
+#endif
+
+#endif
+
+#endif
diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h
index 6acf0c2a0f99..942c7b1678e3 100644
--- a/arch/powerpc/include/asm/kvm_book3s.h
+++ b/arch/powerpc/include/asm/kvm_book3s.h
@@ -170,8 +170,6 @@ extern void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long addr,
unsigned long *nb_ret);
extern void kvmppc_unpin_guest_page(struct kvm *kvm, void *addr,
unsigned long gpa, bool dirty);
-extern long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh, unsigned long ptel);
extern long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh, unsigned long ptel,
pgd_t *pgdir, bool realmode, unsigned long *idx_ret);
diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h
index 0aa817933e6a..2d81e202bdcc 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -37,7 +37,6 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
-extern unsigned long kvm_rma_pages;
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */
@@ -148,7 +147,7 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r,
/* This covers 14..54 bits of va*/
rb = (v & ~0x7fUL) << 16; /* AVA field */
- rb |= v >> (62 - 8); /* B field */
+ rb |= (v >> HPTE_V_SSIZE_SHIFT) << 8; /* B field */
/*
* AVA in v had cleared lower 23 bits. We need to derive
* that from pteg index
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 047855619cc4..7efd666a3fa7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -180,11 +180,6 @@ struct kvmppc_spapr_tce_table {
struct page *pages[0];
};
-struct kvm_rma_info {
- atomic_t use_count;
- unsigned long base_pfn;
-};
-
/* XICS components, defined in book3s_xics.c */
struct kvmppc_xics;
struct kvmppc_icp;
@@ -214,16 +209,9 @@ struct revmap_entry {
#define KVMPPC_RMAP_PRESENT 0x100000000ul
#define KVMPPC_RMAP_INDEX 0xfffffffful
-/* Low-order bits in memslot->arch.slot_phys[] */
-#define KVMPPC_PAGE_ORDER_MASK 0x1f
-#define KVMPPC_PAGE_NO_CACHE HPTE_R_I /* 0x20 */
-#define KVMPPC_PAGE_WRITETHRU HPTE_R_W /* 0x40 */
-#define KVMPPC_GOT_PAGE 0x80
-
struct kvm_arch_memory_slot {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
unsigned long *rmap;
- unsigned long *slot_phys;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
};
@@ -242,14 +230,12 @@ struct kvm_arch {
struct kvm_rma_info *rma;
unsigned long vrma_slb_v;
int rma_setup_done;
- int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
u32 online_vcores;
unsigned long hpt_npte;
unsigned long hpt_mask;
atomic_t hpte_mod_interest;
- spinlock_t slot_phys_lock;
cpumask_t need_tlb_flush;
int hpt_cma_alloc;
#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */
@@ -297,6 +283,7 @@ struct kvmppc_vcore {
struct list_head runnable_threads;
spinlock_t lock;
wait_queue_head_t wq;
+ spinlock_t stoltb_lock; /* protects stolen_tb and preempt_tb */
u64 stolen_tb;
u64 preempt_tb;
struct kvm_vcpu *runner;
@@ -308,6 +295,7 @@ struct kvmppc_vcore {
ulong dpdes; /* doorbell state (POWER8) */
void *mpp_buffer; /* Micro Partition Prefetch buffer */
bool mpp_buffer_is_valid;
+ ulong conferring_threads;
};
#define VCORE_ENTRY_COUNT(vc) ((vc)->entry_exit_count & 0xff)
@@ -664,6 +652,8 @@ struct kvm_vcpu_arch {
spinlock_t tbacct_lock;
u64 busy_stolen;
u64 busy_preempt;
+
+ u32 emul_inst;
#endif
};
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index a6dcdb6d13c1..46bf652c9169 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -170,8 +170,6 @@ extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba, unsigned long tce);
extern long kvmppc_h_get_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
unsigned long ioba);
-extern struct kvm_rma_info *kvm_alloc_rma(void);
-extern void kvm_release_rma(struct kvm_rma_info *ri);
extern struct page *kvm_alloc_hpt(unsigned long nr_pages);
extern void kvm_release_hpt(struct page *page, unsigned long nr_pages);
extern int kvmppc_core_init_vm(struct kvm *kvm);
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index 5cd8d2fddba9..eb95b675109b 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -56,6 +56,14 @@ struct opal_sg_list {
#define OPAL_HARDWARE_FROZEN -13
#define OPAL_WRONG_STATE -14
#define OPAL_ASYNC_COMPLETION -15
+#define OPAL_I2C_TIMEOUT -17
+#define OPAL_I2C_INVALID_CMD -18
+#define OPAL_I2C_LBUS_PARITY -19
+#define OPAL_I2C_BKEND_OVERRUN -20
+#define OPAL_I2C_BKEND_ACCESS -21
+#define OPAL_I2C_ARBT_LOST -22
+#define OPAL_I2C_NACK_RCVD -23
+#define OPAL_I2C_STOP_ERR -24
/* API Tokens (in r0) */
#define OPAL_INVALID_CALL -1
@@ -152,12 +160,25 @@ struct opal_sg_list {
#define OPAL_PCI_ERR_INJECT 96
#define OPAL_PCI_EEH_FREEZE_SET 97
#define OPAL_HANDLE_HMI 98
+#define OPAL_CONFIG_CPU_IDLE_STATE 99
+#define OPAL_SLW_SET_REG 100
#define OPAL_REGISTER_DUMP_REGION 101
#define OPAL_UNREGISTER_DUMP_REGION 102
#define OPAL_WRITE_TPO 103
#define OPAL_READ_TPO 104
#define OPAL_IPMI_SEND 107
#define OPAL_IPMI_RECV 108
+#define OPAL_I2C_REQUEST 109
+
+/* Device tree flags */
+
+/* Flags set in power-mgmt nodes in device tree if
+ * respective idle states are supported in the platform.
+ */
+#define OPAL_PM_NAP_ENABLED 0x00010000
+#define OPAL_PM_SLEEP_ENABLED 0x00020000
+#define OPAL_PM_WINKLE_ENABLED 0x00040000
+#define OPAL_PM_SLEEP_ENABLED_ER1 0x00080000
#ifndef __ASSEMBLY__
@@ -712,6 +733,24 @@ typedef struct oppanel_line {
uint64_t line_len;
} oppanel_line_t;
+/* OPAL I2C request */
+struct opal_i2c_request {
+ uint8_t type;
+#define OPAL_I2C_RAW_READ 0
+#define OPAL_I2C_RAW_WRITE 1
+#define OPAL_I2C_SM_READ 2
+#define OPAL_I2C_SM_WRITE 3
+ uint8_t flags;
+#define OPAL_I2C_ADDR_10 0x01 /* Not supported yet */
+ uint8_t subaddr_sz; /* Max 4 */
+ uint8_t reserved;
+ __be16 addr; /* 7 or 10 bit address */
+ __be16 reserved2;
+ __be32 subaddr; /* Sub-address if any */
+ __be32 size; /* Data size */
+ __be64 buffer_ra; /* Buffer real address */
+};
+
/* /sys/firmware/opal */
extern struct kobject *opal_kobj;
@@ -876,11 +915,14 @@ int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data);
int64_t opal_handle_hmi(void);
int64_t opal_register_dump_region(uint32_t id, uint64_t start, uint64_t end);
int64_t opal_unregister_dump_region(uint32_t id);
+int64_t opal_slw_set_reg(uint64_t cpu_pir, uint64_t sprn, uint64_t val);
int64_t opal_pci_set_phb_cxl_mode(uint64_t phb_id, uint64_t mode, uint64_t pe_number);
int64_t opal_ipmi_send(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t msg_len);
int64_t opal_ipmi_recv(uint64_t interface, struct opal_ipmi_msg *msg,
uint64_t *msg_len);
+int64_t opal_i2c_request(uint64_t async_token, uint32_t bus_id,
+ struct opal_i2c_request *oreq);
/* Internal functions */
extern int early_init_dt_scan_opal(unsigned long node, const char *uname,
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 24a386cbb928..e5f22c6c4bf9 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,6 +152,16 @@ struct paca_struct {
u64 tm_scratch; /* TM scratch area for reclaim */
#endif
+#ifdef CONFIG_PPC_POWERNV
+ /* Per-core mask tracking idle threads and a lock bit-[L][TTTTTTTT] */
+ u32 *core_idle_state_ptr;
+ u8 thread_idle_state; /* PNV_THREAD_RUNNING/NAP/SLEEP */
+ /* Mask to indicate thread id in core */
+ u8 thread_mask;
+ /* Mask to denote subcore sibling threads */
+ u8 subcore_sibling_mask;
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
/* Exclusive emergency stack pointer for machine check exception. */
void *mc_emergency_sp;
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1a5287759fc8..03cd858a401c 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -194,6 +194,7 @@
#define PPC_INST_NAP 0x4c000364
#define PPC_INST_SLEEP 0x4c0003a4
+#define PPC_INST_WINKLE 0x4c0003e4
/* A2 specific instructions */
#define PPC_INST_ERATWE 0x7c0001a6
@@ -375,6 +376,7 @@
#define PPC_NAP stringify_in_c(.long PPC_INST_NAP)
#define PPC_SLEEP stringify_in_c(.long PPC_INST_SLEEP)
+#define PPC_WINKLE stringify_in_c(.long PPC_INST_WINKLE)
/* BHRB instructions */
#define PPC_CLRBHRB stringify_in_c(.long PPC_INST_CLRBHRB)
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index 29c3798cf800..bf117d8fb45f 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -452,7 +452,8 @@ enum idle_boot_override {IDLE_NO_OVERRIDE = 0, IDLE_POWERSAVE_OFF};
extern int powersave_nap; /* set if nap mode can be used in idle loop */
extern unsigned long power7_nap(int check_irq);
-extern void power7_sleep(void);
+extern unsigned long power7_sleep(void);
+extern unsigned long power7_winkle(void);
extern void flush_instruction_cache(void);
extern void hard_reset_now(void);
extern void poweroff_now(void);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index c998279bd85b..1c874fb533bb 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -118,8 +118,10 @@
#define __MSR (MSR_ME | MSR_RI | MSR_IR | MSR_DR | MSR_ISF |MSR_HV)
#ifdef __BIG_ENDIAN__
#define MSR_ __MSR
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV)
#else
#define MSR_ (__MSR | MSR_LE)
+#define MSR_IDLE (MSR_ME | MSR_SF | MSR_HV | MSR_LE)
#endif
#define MSR_KERNEL (MSR_ | MSR_64BIT)
#define MSR_USER32 (MSR_ | MSR_PR | MSR_EE)
@@ -371,6 +373,7 @@
#define SPRN_DBAT7L 0x23F /* Data BAT 7 Lower Register */
#define SPRN_DBAT7U 0x23E /* Data BAT 7 Upper Register */
#define SPRN_PPR 0x380 /* SMT Thread status Register */
+#define SPRN_TSCR 0x399 /* Thread Switch Control Register */
#define SPRN_DEC 0x016 /* Decrement Register */
#define SPRN_DER 0x095 /* Debug Enable Regsiter */
@@ -728,6 +731,7 @@
#define SPRN_BESCR 806 /* Branch event status and control register */
#define BESCR_GE 0x8000000000000000ULL /* Global Enable */
#define SPRN_WORT 895 /* Workload optimization register - thread */
+#define SPRN_WORC 863 /* Workload optimization register - core */
#define SPRN_PMC1 787
#define SPRN_PMC2 788
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 6240698fee9a..ff21b7a2f0cc 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -90,6 +90,10 @@ static inline void syscall_set_arguments(struct task_struct *task,
static inline int syscall_get_arch(void)
{
- return is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+ int arch = is_32bit_task() ? AUDIT_ARCH_PPC : AUDIT_ARCH_PPC64;
+#ifdef __LITTLE_ENDIAN__
+ arch |= __AUDIT_ARCH_LE;
+#endif
+ return arch;
}
#endif /* _ASM_SYSCALL_H */
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 9485b43a7c00..a0c071d24e0e 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -284,7 +284,7 @@ do { \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
#endif /* __powerpc64__ */
@@ -297,7 +297,7 @@ do { \
might_fault(); \
if (access_ok(VERIFY_READ, __gu_addr, (size))) \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
@@ -308,7 +308,7 @@ do { \
const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
- (x) = (__typeof__(*(ptr)))__gu_val; \
+ (x) = (__force __typeof__(*(ptr)))__gu_val; \
__gu_err; \
})
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index c161ef3f28a1..e624f9646350 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -489,7 +489,6 @@ int main(void)
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
DEFINE(KVM_HOST_SDR1, offsetof(struct kvm, arch.host_sdr1));
- DEFINE(KVM_TLBIE_LOCK, offsetof(struct kvm, arch.tlbie_lock));
DEFINE(KVM_NEED_FLUSH, offsetof(struct kvm, arch.need_tlb_flush.bits));
DEFINE(KVM_ENABLED_HCALLS, offsetof(struct kvm, arch.enabled_hcalls));
DEFINE(KVM_LPCR, offsetof(struct kvm, arch.lpcr));
@@ -499,6 +498,7 @@ int main(void)
DEFINE(VCPU_DAR, offsetof(struct kvm_vcpu, arch.shregs.dar));
DEFINE(VCPU_VPA, offsetof(struct kvm_vcpu, arch.vpa.pinned_addr));
DEFINE(VCPU_VPA_DIRTY, offsetof(struct kvm_vcpu, arch.vpa.dirty));
+ DEFINE(VCPU_HEIR, offsetof(struct kvm_vcpu, arch.emul_inst));
#endif
#ifdef CONFIG_PPC_BOOK3S
DEFINE(VCPU_VCPUID, offsetof(struct kvm_vcpu, vcpu_id));
@@ -726,5 +726,16 @@ int main(void)
arch.timing_last_enter.tv32.tbl));
#endif
+#ifdef CONFIG_PPC_POWERNV
+ DEFINE(PACA_CORE_IDLE_STATE_PTR,
+ offsetof(struct paca_struct, core_idle_state_ptr));
+ DEFINE(PACA_THREAD_IDLE_STATE,
+ offsetof(struct paca_struct, thread_idle_state));
+ DEFINE(PACA_THREAD_MASK,
+ offsetof(struct paca_struct, thread_mask));
+ DEFINE(PACA_SUBCORE_SIBLING_MASK,
+ offsetof(struct paca_struct, subcore_sibling_mask));
+#endif
+
return 0;
}
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index db08382e19f1..c2df8150bd7a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -15,6 +15,7 @@
#include <asm/hw_irq.h>
#include <asm/exception-64s.h>
#include <asm/ptrace.h>
+#include <asm/cpuidle.h>
/*
* We layout physical memory as follows:
@@ -101,23 +102,34 @@ system_reset_pSeries:
#ifdef CONFIG_PPC_P7_NAP
BEGIN_FTR_SECTION
/* Running native on arch 2.06 or later, check if we are
- * waking up from nap. We only handle no state loss and
- * supervisor state loss. We do -not- handle hypervisor
- * state loss at this time.
+ * waking up from nap/sleep/winkle.
*/
mfspr r13,SPRN_SRR1
rlwinm. r13,r13,47-31,30,31
beq 9f
- /* waking up from powersave (nap) state */
- cmpwi cr1,r13,2
- /* Total loss of HV state is fatal, we could try to use the
- * PIR to locate a PACA, then use an emergency stack etc...
- * OPAL v3 based powernv platforms have new idle states
- * which fall in this catagory.
+ cmpwi cr3,r13,2
+
+ /*
+ * Check if last bit of HSPGR0 is set. This indicates whether we are
+ * waking up from winkle.
*/
- bgt cr1,8f
GET_PACA(r13)
+ clrldi r5,r13,63
+ clrrdi r13,r13,1
+ cmpwi cr4,r5,1
+ mtspr SPRN_HSPRG0,r13
+
+ lbz r0,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr2,r0,PNV_THREAD_NAP
+ bgt cr2,8f /* Either sleep or Winkle */
+
+ /* Waking up from nap should not cause hypervisor state loss */
+ bgt cr3,.
+
+ /* Waking up from nap */
+ li r0,PNV_THREAD_RUNNING
+ stb r0,PACA_THREAD_IDLE_STATE(r13) /* Clear thread state */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
li r0,KVM_HWTHREAD_IN_KERNEL
@@ -133,7 +145,7 @@ BEGIN_FTR_SECTION
/* Return SRR1 from power7_nap() */
mfspr r3,SPRN_SRR1
- beq cr1,2f
+ beq cr3,2f
b power7_wakeup_noloss
2: b power7_wakeup_loss
@@ -1382,6 +1394,7 @@ machine_check_handle_early:
MACHINE_CHECK_HANDLER_WINDUP
GET_PACA(r13)
ld r1,PACAR1(r13)
+ li r3,PNV_THREAD_NAP
b power7_enter_nap_mode
4:
#endif
diff --git a/arch/powerpc/kernel/idle_power7.S b/arch/powerpc/kernel/idle_power7.S
index 18c0687e5ab3..05adc8bbdef8 100644
--- a/arch/powerpc/kernel/idle_power7.S
+++ b/arch/powerpc/kernel/idle_power7.S
@@ -18,9 +18,25 @@
#include <asm/hw_irq.h>
#include <asm/kvm_book3s_asm.h>
#include <asm/opal.h>
+#include <asm/cpuidle.h>
+#include <asm/mmu-hash64.h>
#undef DEBUG
+/*
+ * Use unused space in the interrupt stack to save and restore
+ * registers for winkle support.
+ */
+#define _SDR1 GPR3
+#define _RPR GPR4
+#define _SPURR GPR5
+#define _PURR GPR6
+#define _TSCR GPR7
+#define _DSCR GPR8
+#define _AMOR GPR9
+#define _WORT GPR10
+#define _WORC GPR11
+
/* Idle state entry routines */
#define IDLE_STATE_ENTER_SEQ(IDLE_INST) \
@@ -37,8 +53,7 @@
/*
* Pass requested state in r3:
- * 0 - nap
- * 1 - sleep
+ * r3 - PNV_THREAD_NAP/SLEEP/WINKLE
*
* To check IRQ_HAPPENED in r4
* 0 - don't check
@@ -101,18 +116,105 @@ _GLOBAL(power7_powersave_common)
std r9,_MSR(r1)
std r1,PACAR1(r13)
-_GLOBAL(power7_enter_nap_mode)
+ /*
+ * Go to real mode to do the nap, as required by the architecture.
+ * Also, we need to be in real mode before setting hwthread_state,
+ * because as soon as we do that, another thread can switch
+ * the MMU context to the guest.
+ */
+ LOAD_REG_IMMEDIATE(r5, MSR_IDLE)
+ li r6, MSR_RI
+ andc r6, r9, r6
+ LOAD_REG_ADDR(r7, power7_enter_nap_mode)
+ mtmsrd r6, 1 /* clear RI before setting SRR0/1 */
+ mtspr SPRN_SRR0, r7
+ mtspr SPRN_SRR1, r5
+ rfid
+
+ .globl power7_enter_nap_mode
+power7_enter_nap_mode:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
/* Tell KVM we're napping */
li r4,KVM_HWTHREAD_IN_NAP
stb r4,HSTATE_HWTHREAD_STATE(r13)
#endif
- cmpwi cr0,r3,1
- beq 2f
+ stb r3,PACA_THREAD_IDLE_STATE(r13)
+ cmpwi cr3,r3,PNV_THREAD_SLEEP
+ bge cr3,2f
IDLE_STATE_ENTER_SEQ(PPC_NAP)
/* No return */
-2: IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
- /* No return */
+2:
+ /* Sleep or winkle */
+ lbz r7,PACA_THREAD_MASK(r13)
+ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop1:
+ lwarx r15,0,r14
+ andc r15,r15,r7 /* Clear thread bit */
+
+ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+
+/*
+ * If cr0 = 0, then current thread is the last thread of the core entering
+ * sleep. Last thread needs to execute the hardware bug workaround code if
+ * required by the platform.
+ * Make the workaround call unconditionally here. The below branch call is
+ * patched out when the idle states are discovered if the platform does not
+ * require it.
+ */
+.global pnv_fastsleep_workaround_at_entry
+pnv_fastsleep_workaround_at_entry:
+ beq fastsleep_workaround_at_entry
+
+ stwcx. r15,0,r14
+ bne- lwarx_loop1
+ isync
+
+common_enter: /* common code for all the threads entering sleep or winkle */
+ bgt cr3,enter_winkle
+ IDLE_STATE_ENTER_SEQ(PPC_SLEEP)
+
+fastsleep_workaround_at_entry:
+ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ stwcx. r15,0,r14
+ bne- lwarx_loop1
+ isync
+
+ /* Fast sleep workaround */
+ li r3,1
+ li r4,1
+ li r0,OPAL_CONFIG_CPU_IDLE_STATE
+ bl opal_call_realmode
+
+ /* Clear Lock bit */
+ li r0,0
+ lwsync
+ stw r0,0(r14)
+ b common_enter
+
+enter_winkle:
+ /*
+ * Note all register i.e per-core, per-subcore or per-thread is saved
+ * here since any thread in the core might wake up first
+ */
+ mfspr r3,SPRN_SDR1
+ std r3,_SDR1(r1)
+ mfspr r3,SPRN_RPR
+ std r3,_RPR(r1)
+ mfspr r3,SPRN_SPURR
+ std r3,_SPURR(r1)
+ mfspr r3,SPRN_PURR
+ std r3,_PURR(r1)
+ mfspr r3,SPRN_TSCR
+ std r3,_TSCR(r1)
+ mfspr r3,SPRN_DSCR
+ std r3,_DSCR(r1)
+ mfspr r3,SPRN_AMOR
+ std r3,_AMOR(r1)
+ mfspr r3,SPRN_WORT
+ std r3,_WORT(r1)
+ mfspr r3,SPRN_WORC
+ std r3,_WORC(r1)
+ IDLE_STATE_ENTER_SEQ(PPC_WINKLE)
_GLOBAL(power7_idle)
/* Now check if user or arch enabled NAP mode */
@@ -125,48 +227,21 @@ _GLOBAL(power7_idle)
_GLOBAL(power7_nap)
mr r4,r3
- li r3,0
+ li r3,PNV_THREAD_NAP
b power7_powersave_common
/* No return */
_GLOBAL(power7_sleep)
- li r3,1
+ li r3,PNV_THREAD_SLEEP
li r4,1
b power7_powersave_common
/* No return */
-/*
- * Make opal call in realmode. This is a generic function to be called
- * from realmode from reset vector. It handles endianess.
- *
- * r13 - paca pointer
- * r1 - stack pointer
- * r3 - opal token
- */
-opal_call_realmode:
- mflr r12
- std r12,_LINK(r1)
- ld r2,PACATOC(r13)
- /* Set opal return address */
- LOAD_REG_ADDR(r0,return_from_opal_call)
- mtlr r0
- /* Handle endian-ness */
- li r0,MSR_LE
- mfmsr r12
- andc r12,r12,r0
- mtspr SPRN_HSRR1,r12
- mr r0,r3 /* Move opal token to r0 */
- LOAD_REG_ADDR(r11,opal)
- ld r12,8(r11)
- ld r2,0(r11)
- mtspr SPRN_HSRR0,r12
- hrfid
-
-return_from_opal_call:
- FIXUP_ENDIAN
- ld r0,_LINK(r1)
- mtlr r0
- blr
+_GLOBAL(power7_winkle)
+ li r3,3
+ li r4,1
+ b power7_powersave_common
+ /* No return */
#define CHECK_HMI_INTERRUPT \
mfspr r0,SPRN_SRR1; \
@@ -181,7 +256,7 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
ld r2,PACATOC(r13); \
ld r1,PACAR1(r13); \
std r3,ORIG_GPR3(r1); /* Save original r3 */ \
- li r3,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
+ li r0,OPAL_HANDLE_HMI; /* Pass opal token argument*/ \
bl opal_call_realmode; \
ld r3,ORIG_GPR3(r1); /* Restore original r3 */ \
20: nop;
@@ -190,16 +265,190 @@ ALT_FTR_SECTION_END_NESTED_IFSET(CPU_FTR_ARCH_207S, 66); \
_GLOBAL(power7_wakeup_tb_loss)
ld r2,PACATOC(r13);
ld r1,PACAR1(r13)
+ /*
+ * Before entering any idle state, the NVGPRs are saved in the stack
+ * and they are restored before switching to the process context. Hence
+ * until they are restored, they are free to be used.
+ *
+ * Save SRR1 in a NVGPR as it might be clobbered in opal_call_realmode
+ * (called in CHECK_HMI_INTERRUPT). SRR1 is required to determine the
+ * wakeup reason if we branch to kvm_start_guest.
+ */
+ mfspr r16,SPRN_SRR1
BEGIN_FTR_SECTION
CHECK_HMI_INTERRUPT
END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
+
+ lbz r7,PACA_THREAD_MASK(r13)
+ ld r14,PACA_CORE_IDLE_STATE_PTR(r13)
+lwarx_loop2:
+ lwarx r15,0,r14
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ /*
+ * Lock bit is set in one of the 2 cases-
+ * a. In the sleep/winkle enter path, the last thread is executing
+ * fastsleep workaround code.
+ * b. In the wake up path, another thread is executing fastsleep
+ * workaround undo code or resyncing timebase or restoring context
+ * In either case loop until the lock bit is cleared.
+ */
+ bne core_idle_lock_held
+
+ cmpwi cr2,r15,0
+ lbz r4,PACA_SUBCORE_SIBLING_MASK(r13)
+ and r4,r4,r15
+ cmpwi cr1,r4,0 /* Check if first in subcore */
+
+ /*
+ * At this stage
+ * cr1 - 0b0100 if first thread to wakeup in subcore
+ * cr2 - 0b0100 if first thread to wakeup in core
+ * cr3- 0b0010 if waking up from sleep or winkle
+ * cr4 - 0b0100 if waking up from winkle
+ */
+
+ or r15,r15,r7 /* Set thread bit */
+
+ beq cr1,first_thread_in_subcore
+
+ /* Not first thread in subcore to wake up */
+ stwcx. r15,0,r14
+ bne- lwarx_loop2
+ isync
+ b common_exit
+
+core_idle_lock_held:
+ HMT_LOW
+core_idle_lock_loop:
+ lwz r15,0(14)
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ bne core_idle_lock_loop
+ HMT_MEDIUM
+ b lwarx_loop2
+
+first_thread_in_subcore:
+ /* First thread in subcore to wakeup */
+ ori r15,r15,PNV_CORE_IDLE_LOCK_BIT
+ stwcx. r15,0,r14
+ bne- lwarx_loop2
+ isync
+
+ /*
+ * If waking up from sleep, subcore state is not lost. Hence
+ * skip subcore state restore
+ */
+ bne cr4,subcore_state_restored
+
+ /* Restore per-subcore state */
+ ld r4,_SDR1(r1)
+ mtspr SPRN_SDR1,r4
+ ld r4,_RPR(r1)
+ mtspr SPRN_RPR,r4
+ ld r4,_AMOR(r1)
+ mtspr SPRN_AMOR,r4
+
+subcore_state_restored:
+ /*
+ * Check if the thread is also the first thread in the core. If not,
+ * skip to clear_lock.
+ */
+ bne cr2,clear_lock
+
+first_thread_in_core:
+
+ /*
+ * First thread in the core waking up from fastsleep. It needs to
+ * call the fastsleep workaround code if the platform requires it.
+ * Call it unconditionally here. The below branch instruction will
+ * be patched out when the idle states are discovered if platform
+ * does not require workaround.
+ */
+.global pnv_fastsleep_workaround_at_exit
+pnv_fastsleep_workaround_at_exit:
+ b fastsleep_workaround_at_exit
+
+timebase_resync:
+ /* Do timebase resync if we are waking up from sleep. Use cr3 value
+ * set in exceptions-64s.S */
+ ble cr3,clear_lock
/* Time base re-sync */
- li r3,OPAL_RESYNC_TIMEBASE
+ li r0,OPAL_RESYNC_TIMEBASE
bl opal_call_realmode;
-
/* TODO: Check r3 for failure */
+ /*
+ * If waking up from sleep, per core state is not lost, skip to
+ * clear_lock.
+ */
+ bne cr4,clear_lock
+
+ /* Restore per core state */
+ ld r4,_TSCR(r1)
+ mtspr SPRN_TSCR,r4
+ ld r4,_WORC(r1)
+ mtspr SPRN_WORC,r4
+
+clear_lock:
+ andi. r15,r15,PNV_CORE_IDLE_THREAD_BITS
+ lwsync
+ stw r15,0(r14)
+
+common_exit:
+ /*
+ * Common to all threads.
+ *
+ * If waking up from sleep, hypervisor state is not lost. Hence
+ * skip hypervisor state restore.
+ */
+ bne cr4,hypervisor_state_restored
+
+ /* Waking up from winkle */
+
+ /* Restore per thread state */
+ bl __restore_cpu_power8
+
+ /* Restore SLB from PACA */
+ ld r8,PACA_SLBSHADOWPTR(r13)
+
+ .rept SLB_NUM_BOLTED
+ li r3, SLBSHADOW_SAVEAREA
+ LDX_BE r5, r8, r3
+ addi r3, r3, 8
+ LDX_BE r6, r8, r3
+ andis. r7,r5,SLB_ESID_V@h
+ beq 1f
+ slbmte r6,r5
+1: addi r8,r8,16
+ .endr
+
+ ld r4,_SPURR(r1)
+ mtspr SPRN_SPURR,r4
+ ld r4,_PURR(r1)
+ mtspr SPRN_PURR,r4
+ ld r4,_DSCR(r1)
+ mtspr SPRN_DSCR,r4
+ ld r4,_WORT(r1)
+ mtspr SPRN_WORT,r4
+
+hypervisor_state_restored:
+
+ li r5,PNV_THREAD_RUNNING
+ stb r5,PACA_THREAD_IDLE_STATE(r13)
+
+ mtspr SPRN_SRR1,r16
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ li r0,KVM_HWTHREAD_IN_KERNEL
+ stb r0,HSTATE_HWTHREAD_STATE(r13)
+ /* Order setting hwthread_state vs. testing hwthread_req */
+ sync
+ lbz r0,HSTATE_HWTHREAD_REQ(r13)
+ cmpwi r0,0
+ beq 6f
+ b kvm_start_guest
+6:
+#endif
+
REST_NVGPRS(r1)
REST_GPR(2, r1)
ld r3,_CCR(r1)
@@ -212,6 +461,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
mtspr SPRN_SRR0,r5
rfid
+fastsleep_workaround_at_exit:
+ li r3,1
+ li r4,0
+ li r0,OPAL_CONFIG_CPU_IDLE_STATE
+ bl opal_call_realmode
+ b timebase_resync
+
/*
* R3 here contains the value that will be returned to the caller
* of power7_nap.
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 8b2d2dc8ef10..8ec017cb4446 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -700,7 +700,6 @@ void start_secondary(void *unused)
smp_store_cpu_info(cpu);
set_dec(tb_ticks_per_jiffy);
preempt_disable();
- cpu_callin_map[cpu] = 1;
if (smp_ops->setup_cpu)
smp_ops->setup_cpu(cpu);
@@ -739,6 +738,14 @@ void start_secondary(void *unused)
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ /*
+ * CPU must be marked active and online before we signal back to the
+ * master, because the scheduler needs to see the cpu_online and
+ * cpu_active bits set.
+ */
+ smp_wmb();
+ cpu_callin_map[cpu] = 1;
+
local_irq_enable();
cpu_startup_entry(CPUHP_ONLINE);
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 602eb51d20bc..f5769f19ae25 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -172,6 +172,7 @@ config KVM_XICS
depends on KVM_BOOK3S_64 && !KVM_MPIC
select HAVE_KVM_IRQCHIP
select HAVE_KVM_IRQFD
+ default y
---help---
Include support for the XICS (eXternal Interrupt Controller
Specification) interrupt controller architecture used on
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index b32db4b95361..888bf466d8c6 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -64,14 +64,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ NULL }
};
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
void kvmppc_unfixup_split_real(struct kvm_vcpu *vcpu)
{
if (vcpu->arch.hflags & BOOK3S_HFLAG_SPLIT_HACK) {
diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c
index cd0b0730e29e..a2eb6d354a57 100644
--- a/arch/powerpc/kvm/book3s_32_mmu.c
+++ b/arch/powerpc/kvm/book3s_32_mmu.c
@@ -78,11 +78,6 @@ static inline bool sr_kp(u32 sr_raw)
return (sr_raw & 0x20000000) ? true: false;
}
-static inline bool sr_nx(u32 sr_raw)
-{
- return (sr_raw & 0x10000000) ? true: false;
-}
-
static int kvmppc_mmu_book3s_32_xlate_bat(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_pte *pte, bool data,
bool iswrite);
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index d40770248b6a..534acb3c6c3d 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -37,8 +37,7 @@
#include <asm/ppc-opcode.h>
#include <asm/cputable.h>
-/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
-#define MAX_LPID_970 63
+#include "trace_hv.h"
/* Power architecture requires HPT is at least 256kB */
#define PPC_MIN_HPT_ORDER 18
@@ -229,14 +228,9 @@ int kvmppc_mmu_hv_init(void)
if (!cpu_has_feature(CPU_FTR_HVMODE))
return -EINVAL;
- /* POWER7 has 10-bit LPIDs, PPC970 and e500mc have 6-bit LPIDs */
- if (cpu_has_feature(CPU_FTR_ARCH_206)) {
- host_lpid = mfspr(SPRN_LPID); /* POWER7 */
- rsvd_lpid = LPID_RSVD;
- } else {
- host_lpid = 0; /* PPC970 */
- rsvd_lpid = MAX_LPID_970;
- }
+ /* POWER7 has 10-bit LPIDs (12-bit in POWER8) */
+ host_lpid = mfspr(SPRN_LPID);
+ rsvd_lpid = LPID_RSVD;
kvmppc_init_lpid(rsvd_lpid + 1);
@@ -259,130 +253,12 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, msr);
}
-/*
- * This is called to get a reference to a guest page if there isn't
- * one already in the memslot->arch.slot_phys[] array.
- */
-static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
- struct kvm_memory_slot *memslot,
- unsigned long psize)
-{
- unsigned long start;
- long np, err;
- struct page *page, *hpage, *pages[1];
- unsigned long s, pgsize;
- unsigned long *physp;
- unsigned int is_io, got, pgorder;
- struct vm_area_struct *vma;
- unsigned long pfn, i, npages;
-
- physp = memslot->arch.slot_phys;
- if (!physp)
- return -EINVAL;
- if (physp[gfn - memslot->base_gfn])
- return 0;
-
- is_io = 0;
- got = 0;
- page = NULL;
- pgsize = psize;
- err = -EINVAL;
- start = gfn_to_hva_memslot(memslot, gfn);
-
- /* Instantiate and get the page we want access to */
- np = get_user_pages_fast(start, 1, 1, pages);
- if (np != 1) {
- /* Look up the vma for the page */
- down_read(&current->mm->mmap_sem);
- vma = find_vma(current->mm, start);
- if (!vma || vma->vm_start > start ||
- start + psize > vma->vm_end ||
- !(vma->vm_flags & VM_PFNMAP))
- goto up_err;
- is_io = hpte_cache_bits(pgprot_val(vma->vm_page_prot));
- pfn = vma->vm_pgoff + ((start - vma->vm_start) >> PAGE_SHIFT);
- /* check alignment of pfn vs. requested page size */
- if (psize > PAGE_SIZE && (pfn & ((psize >> PAGE_SHIFT) - 1)))
- goto up_err;
- up_read(&current->mm->mmap_sem);
-
- } else {
- page = pages[0];
- got = KVMPPC_GOT_PAGE;
-
- /* See if this is a large page */
- s = PAGE_SIZE;
- if (PageHuge(page)) {
- hpage = compound_head(page);
- s <<= compound_order(hpage);
- /* Get the whole large page if slot alignment is ok */
- if (s > psize && slot_is_aligned(memslot, s) &&
- !(memslot->userspace_addr & (s - 1))) {
- start &= ~(s - 1);
- pgsize = s;
- get_page(hpage);
- put_page(page);
- page = hpage;
- }
- }
- if (s < psize)
- goto out;
- pfn = page_to_pfn(page);
- }
-
- npages = pgsize >> PAGE_SHIFT;
- pgorder = __ilog2(npages);
- physp += (gfn - memslot->base_gfn) & ~(npages - 1);
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i) {
- if (!physp[i]) {
- physp[i] = ((pfn + i) << PAGE_SHIFT) +
- got + is_io + pgorder;
- got = 0;
- }
- }
- spin_unlock(&kvm->arch.slot_phys_lock);
- err = 0;
-
- out:
- if (got)
- put_page(page);
- return err;
-
- up_err:
- up_read(&current->mm->mmap_sem);
- return err;
-}
-
long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
{
- unsigned long psize, gpa, gfn;
- struct kvm_memory_slot *memslot;
long ret;
- if (kvm->arch.using_mmu_notifiers)
- goto do_insert;
-
- psize = hpte_page_size(pteh, ptel);
- if (!psize)
- return H_PARAMETER;
-
- pteh &= ~(HPTE_V_HVLOCK | HPTE_V_ABSENT | HPTE_V_VALID);
-
- /* Find the memslot (if any) for this address */
- gpa = (ptel & HPTE_R_RPN) & ~(psize - 1);
- gfn = gpa >> PAGE_SHIFT;
- memslot = gfn_to_memslot(kvm, gfn);
- if (memslot && !(memslot->flags & KVM_MEMSLOT_INVALID)) {
- if (!slot_is_aligned(memslot, psize))
- return H_PARAMETER;
- if (kvmppc_get_guest_page(kvm, gfn, memslot, psize) < 0)
- return H_PARAMETER;
- }
-
- do_insert:
/* Protect linux PTE lookup from page table destruction */
rcu_read_lock_sched(); /* this disables preemption too */
ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel,
@@ -397,19 +273,6 @@ long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
}
-/*
- * We come here on a H_ENTER call from the guest when we are not
- * using mmu notifiers and we don't have the requested page pinned
- * already.
- */
-long kvmppc_virtmode_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
- long pte_index, unsigned long pteh,
- unsigned long ptel)
-{
- return kvmppc_virtmode_do_h_enter(vcpu->kvm, flags, pte_index,
- pteh, ptel, &vcpu->arch.gpr[4]);
-}
-
static struct kvmppc_slb *kvmppc_mmu_book3s_hv_find_slbe(struct kvm_vcpu *vcpu,
gva_t eaddr)
{
@@ -494,7 +357,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
gpte->may_execute = gpte->may_read && !(gr & (HPTE_R_N | HPTE_R_G));
/* Storage key permission check for POWER7 */
- if (data && virtmode && cpu_has_feature(CPU_FTR_ARCH_206)) {
+ if (data && virtmode) {
int amrfield = hpte_get_skey_perm(gr, vcpu->arch.amr);
if (amrfield & 1)
gpte->may_read = 0;
@@ -622,14 +485,13 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
gfn = gpa >> PAGE_SHIFT;
memslot = gfn_to_memslot(kvm, gfn);
+ trace_kvm_page_fault_enter(vcpu, hpte, memslot, ea, dsisr);
+
/* No memslot means it's an emulated MMIO region */
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
dsisr & DSISR_ISSTORE);
- if (!kvm->arch.using_mmu_notifiers)
- return -EFAULT; /* should never get here */
-
/*
* This should never happen, because of the slot_is_aligned()
* check in kvmppc_do_h_enter().
@@ -641,6 +503,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
mmu_seq = kvm->mmu_notifier_seq;
smp_rmb();
+ ret = -EFAULT;
is_io = 0;
pfn = 0;
page = NULL;
@@ -664,7 +527,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
up_read(&current->mm->mmap_sem);
if (!pfn)
- return -EFAULT;
+ goto out_put;
} else {
page = pages[0];
pfn = page_to_pfn(page);
@@ -694,14 +557,14 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
}
}
- ret = -EFAULT;
if (psize > pte_size)
goto out_put;
/* Check WIMG vs. the actual page we're accessing */
if (!hpte_cache_flags_ok(r, is_io)) {
if (is_io)
- return -EFAULT;
+ goto out_put;
+
/*
* Allow guest to map emulated device memory as
* uncacheable, but actually make it cacheable.
@@ -765,6 +628,8 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
SetPageDirty(page);
out_put:
+ trace_kvm_page_fault_exit(vcpu, hpte, ret);
+
if (page) {
/*
* We drop pages[0] here, not page because page might
@@ -895,8 +760,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
psize = hpte_page_size(be64_to_cpu(hptep[0]), ptel);
if ((be64_to_cpu(hptep[0]) & HPTE_V_VALID) &&
hpte_rpn(ptel, psize) == gfn) {
- if (kvm->arch.using_mmu_notifiers)
- hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
+ hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
kvmppc_invalidate_hpte(kvm, hptep, i);
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
@@ -914,15 +778,13 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva)
{
- if (kvm->arch.using_mmu_notifiers)
- kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
+ kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
return 0;
}
int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
- if (kvm->arch.using_mmu_notifiers)
- kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
+ kvm_handle_hva_range(kvm, start, end, kvm_unmap_rmapp);
return 0;
}
@@ -1004,8 +866,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
{
- if (!kvm->arch.using_mmu_notifiers)
- return 0;
return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
}
@@ -1042,15 +902,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva)
{
- if (!kvm->arch.using_mmu_notifiers)
- return 0;
return kvm_handle_hva(kvm, hva, kvm_test_age_rmapp);
}
void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte)
{
- if (!kvm->arch.using_mmu_notifiers)
- return;
kvm_handle_hva(kvm, hva, kvm_unmap_rmapp);
}
@@ -1117,8 +973,11 @@ static int kvm_test_clear_dirty_npages(struct kvm *kvm, unsigned long *rmapp)
}
/* Now check and modify the HPTE */
- if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID)))
+ if (!(hptep[0] & cpu_to_be64(HPTE_V_VALID))) {
+ /* unlock and continue */
+ hptep[0] &= ~cpu_to_be64(HPTE_V_HVLOCK);
continue;
+ }
/* need to make it temporarily absent so C is stable */
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
@@ -1206,35 +1065,17 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
struct page *page, *pages[1];
int npages;
unsigned long hva, offset;
- unsigned long pa;
- unsigned long *physp;
int srcu_idx;
srcu_idx = srcu_read_lock(&kvm->srcu);
memslot = gfn_to_memslot(kvm, gfn);
if (!memslot || (memslot->flags & KVM_MEMSLOT_INVALID))
goto err;
- if (!kvm->arch.using_mmu_notifiers) {
- physp = memslot->arch.slot_phys;
- if (!physp)
- goto err;
- physp += gfn - memslot->base_gfn;
- pa = *physp;
- if (!pa) {
- if (kvmppc_get_guest_page(kvm, gfn, memslot,
- PAGE_SIZE) < 0)
- goto err;
- pa = *physp;
- }
- page = pfn_to_page(pa >> PAGE_SHIFT);
- get_page(page);
- } else {
- hva = gfn_to_hva_memslot(memslot, gfn);
- npages = get_user_pages_fast(hva, 1, 1, pages);
- if (npages < 1)
- goto err;
- page = pages[0];
- }
+ hva = gfn_to_hva_memslot(memslot, gfn);
+ npages = get_user_pages_fast(hva, 1, 1, pages);
+ if (npages < 1)
+ goto err;
+ page = pages[0];
srcu_read_unlock(&kvm->srcu, srcu_idx);
offset = gpa & (PAGE_SIZE - 1);
@@ -1258,7 +1099,7 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va, unsigned long gpa,
put_page(page);
- if (!dirty || !kvm->arch.using_mmu_notifiers)
+ if (!dirty)
return;
/* We need to mark this page dirty in the rmap chain */
@@ -1539,9 +1380,15 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf,
hptp = (__be64 *)(kvm->arch.hpt_virt + (i * HPTE_SIZE));
lbuf = (unsigned long __user *)buf;
for (j = 0; j < hdr.n_valid; ++j) {
+ __be64 hpte_v;
+ __be64 hpte_r;
+
err = -EFAULT;
- if (__get_user(v, lbuf) || __get_user(r, lbuf + 1))
+ if (__get_user(hpte_v, lbuf) ||
+ __get_user(hpte_r, lbuf + 1))
goto out;
+ v = be64_to_cpu(hpte_v);
+ r = be64_to_cpu(hpte_r);
err = -EINVAL;
if (!(v & HPTE_V_VALID))
goto out;
@@ -1652,10 +1499,7 @@ void kvmppc_mmu_book3s_hv_init(struct kvm_vcpu *vcpu)
{
struct kvmppc_mmu *mmu = &vcpu->arch.mmu;
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- vcpu->arch.slb_nr = 32; /* POWER7 */
- else
- vcpu->arch.slb_nr = 64;
+ vcpu->arch.slb_nr = 32; /* POWER7/POWER8 */
mmu->xlate = kvmppc_mmu_book3s_64_hv_xlate;
mmu->reset_msr = kvmppc_mmu_book3s_64_hv_reset_msr;
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index e63587d30b70..de4018a1bc4b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -58,6 +58,9 @@
#include "book3s.h"
+#define CREATE_TRACE_POINTS
+#include "trace_hv.h"
+
/* #define EXIT_DEBUG */
/* #define EXIT_DEBUG_SIMPLE */
/* #define EXIT_DEBUG_INT */
@@ -135,11 +138,10 @@ static void kvmppc_fast_vcpu_kick_hv(struct kvm_vcpu *vcpu)
* stolen.
*
* Updates to busy_stolen are protected by arch.tbacct_lock;
- * updates to vc->stolen_tb are protected by the arch.tbacct_lock
- * of the vcpu that has taken responsibility for running the vcore
- * (i.e. vc->runner). The stolen times are measured in units of
- * timebase ticks. (Note that the != TB_NIL checks below are
- * purely defensive; they should never fail.)
+ * updates to vc->stolen_tb are protected by the vcore->stoltb_lock
+ * lock. The stolen times are measured in units of timebase ticks.
+ * (Note that the != TB_NIL checks below are purely defensive;
+ * they should never fail.)
*/
static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
@@ -147,12 +149,21 @@ static void kvmppc_core_vcpu_load_hv(struct kvm_vcpu *vcpu, int cpu)
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
- spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE &&
- vc->preempt_tb != TB_NIL) {
- vc->stolen_tb += mftb() - vc->preempt_tb;
- vc->preempt_tb = TB_NIL;
+ /*
+ * We can test vc->runner without taking the vcore lock,
+ * because only this task ever sets vc->runner to this
+ * vcpu, and once it is set to this vcpu, only this task
+ * ever sets it to NULL.
+ */
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ if (vc->preempt_tb != TB_NIL) {
+ vc->stolen_tb += mftb() - vc->preempt_tb;
+ vc->preempt_tb = TB_NIL;
+ }
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
}
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST &&
vcpu->arch.busy_preempt != TB_NIL) {
vcpu->arch.busy_stolen += mftb() - vcpu->arch.busy_preempt;
@@ -166,9 +177,12 @@ static void kvmppc_core_vcpu_put_hv(struct kvm_vcpu *vcpu)
struct kvmppc_vcore *vc = vcpu->arch.vcore;
unsigned long flags;
- spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
- if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE)
+ if (vc->runner == vcpu && vc->vcore_state != VCORE_INACTIVE) {
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
vc->preempt_tb = mftb();
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
+ }
+ spin_lock_irqsave(&vcpu->arch.tbacct_lock, flags);
if (vcpu->arch.state == KVMPPC_VCPU_BUSY_IN_HOST)
vcpu->arch.busy_preempt = mftb();
spin_unlock_irqrestore(&vcpu->arch.tbacct_lock, flags);
@@ -191,9 +205,6 @@ int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
struct kvmppc_vcore *vc = vcpu->arch.vcore;
if (arch_compat) {
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- return -EINVAL; /* 970 has no compat mode support */
-
switch (arch_compat) {
case PVR_ARCH_205:
/*
@@ -505,25 +516,14 @@ static void kvmppc_update_vpas(struct kvm_vcpu *vcpu)
static u64 vcore_stolen_time(struct kvmppc_vcore *vc, u64 now)
{
u64 p;
+ unsigned long flags;
- /*
- * If we are the task running the vcore, then since we hold
- * the vcore lock, we can't be preempted, so stolen_tb/preempt_tb
- * can't be updated, so we don't need the tbacct_lock.
- * If the vcore is inactive, it can't become active (since we
- * hold the vcore lock), so the vcpu load/put functions won't
- * update stolen_tb/preempt_tb, and we don't need tbacct_lock.
- */
+ spin_lock_irqsave(&vc->stoltb_lock, flags);
+ p = vc->stolen_tb;
if (vc->vcore_state != VCORE_INACTIVE &&
- vc->runner->arch.run_task != current) {
- spin_lock_irq(&vc->runner->arch.tbacct_lock);
- p = vc->stolen_tb;
- if (vc->preempt_tb != TB_NIL)
- p += now - vc->preempt_tb;
- spin_unlock_irq(&vc->runner->arch.tbacct_lock);
- } else {
- p = vc->stolen_tb;
- }
+ vc->preempt_tb != TB_NIL)
+ p += now - vc->preempt_tb;
+ spin_unlock_irqrestore(&vc->stoltb_lock, flags);
return p;
}
@@ -607,10 +607,45 @@ static int kvmppc_h_set_mode(struct kvm_vcpu *vcpu, unsigned long mflags,
}
}
+static int kvm_arch_vcpu_yield_to(struct kvm_vcpu *target)
+{
+ struct kvmppc_vcore *vcore = target->arch.vcore;
+
+ /*
+ * We expect to have been called by the real mode handler
+ * (kvmppc_rm_h_confer()) which would have directly returned
+ * H_SUCCESS if the source vcore wasn't idle (e.g. if it may
+ * have useful work to do and should not confer) so we don't
+ * recheck that here.
+ */
+
+ spin_lock(&vcore->lock);
+ if (target->arch.state == KVMPPC_VCPU_RUNNABLE &&
+ vcore->vcore_state != VCORE_INACTIVE)
+ target = vcore->runner;
+ spin_unlock(&vcore->lock);
+
+ return kvm_vcpu_yield_to(target);
+}
+
+static int kvmppc_get_yield_count(struct kvm_vcpu *vcpu)
+{
+ int yield_count = 0;
+ struct lppaca *lppaca;
+
+ spin_lock(&vcpu->arch.vpa_update_lock);
+ lppaca = (struct lppaca *)vcpu->arch.vpa.pinned_addr;
+ if (lppaca)
+ yield_count = lppaca->yield_count;
+ spin_unlock(&vcpu->arch.vpa_update_lock);
+ return yield_count;
+}
+
int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
{
unsigned long req = kvmppc_get_gpr(vcpu, 3);
unsigned long target, ret = H_SUCCESS;
+ int yield_count;
struct kvm_vcpu *tvcpu;
int idx, rc;
@@ -619,14 +654,6 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
return RESUME_HOST;
switch (req) {
- case H_ENTER:
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- ret = kvmppc_virtmode_h_enter(vcpu, kvmppc_get_gpr(vcpu, 4),
- kvmppc_get_gpr(vcpu, 5),
- kvmppc_get_gpr(vcpu, 6),
- kvmppc_get_gpr(vcpu, 7));
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
- break;
case H_CEDE:
break;
case H_PROD:
@@ -654,7 +681,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu)
ret = H_PARAMETER;
break;
}
- kvm_vcpu_yield_to(tvcpu);
+ yield_count = kvmppc_get_gpr(vcpu, 5);
+ if (kvmppc_get_yield_count(tvcpu) != yield_count)
+ break;
+ kvm_arch_vcpu_yield_to(tvcpu);
break;
case H_REGISTER_VPA:
ret = do_h_register_vpa(vcpu, kvmppc_get_gpr(vcpu, 4),
@@ -769,6 +799,8 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
+ /* HMI is hypervisor interrupt and host has handled it. Resume guest.*/
+ case BOOK3S_INTERRUPT_HMI:
case BOOK3S_INTERRUPT_PERFMON:
r = RESUME_GUEST;
break;
@@ -837,6 +869,10 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
* Accordingly return to Guest or Host.
*/
case BOOK3S_INTERRUPT_H_EMUL_ASSIST:
+ if (vcpu->arch.emul_inst != KVM_INST_FETCH_FAILED)
+ vcpu->arch.last_inst = kvmppc_need_byteswap(vcpu) ?
+ swab32(vcpu->arch.emul_inst) :
+ vcpu->arch.emul_inst;
if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP) {
r = kvmppc_emulate_debug_inst(run, vcpu);
} else {
@@ -1357,6 +1393,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
INIT_LIST_HEAD(&vcore->runnable_threads);
spin_lock_init(&vcore->lock);
+ spin_lock_init(&vcore->stoltb_lock);
init_waitqueue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
@@ -1694,9 +1731,11 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->n_woken = 0;
vc->nap_count = 0;
vc->entry_exit_count = 0;
+ vc->preempt_tb = TB_NIL;
vc->vcore_state = VCORE_STARTING;
vc->in_guest = 0;
vc->napping_threads = 0;
+ vc->conferring_threads = 0;
/*
* Updating any of the vpas requires calling kvmppc_pin_guest_page,
@@ -1726,6 +1765,7 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
kvmppc_start_thread(vcpu);
kvmppc_create_dtl_entry(vcpu, vc);
+ trace_kvm_guest_enter(vcpu);
}
/* Set this explicitly in case thread 0 doesn't have a vcpu */
@@ -1734,6 +1774,9 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_RUNNING;
preempt_disable();
+
+ trace_kvmppc_run_core(vc, 0);
+
spin_unlock(&vc->lock);
kvm_guest_enter();
@@ -1779,6 +1822,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
kvmppc_core_pending_dec(vcpu))
kvmppc_core_dequeue_dec(vcpu);
+ trace_kvm_guest_exit(vcpu);
+
ret = RESUME_GUEST;
if (vcpu->arch.trap)
ret = kvmppc_handle_exit_hv(vcpu->arch.kvm_run, vcpu,
@@ -1804,6 +1849,8 @@ static void kvmppc_run_core(struct kvmppc_vcore *vc)
wake_up(&vcpu->arch.cpu_run);
}
}
+
+ trace_kvmppc_run_core(vc, 1);
}
/*
@@ -1826,15 +1873,37 @@ static void kvmppc_wait_for_exec(struct kvm_vcpu *vcpu, int wait_state)
*/
static void kvmppc_vcore_blocked(struct kvmppc_vcore *vc)
{
+ struct kvm_vcpu *vcpu;
+ int do_sleep = 1;
+
DEFINE_WAIT(wait);
prepare_to_wait(&vc->wq, &wait, TASK_INTERRUPTIBLE);
+
+ /*
+ * Check one last time for pending exceptions and ceded state after
+ * we put ourselves on the wait queue
+ */
+ list_for_each_entry(vcpu, &vc->runnable_threads, arch.run_list) {
+ if (vcpu->arch.pending_exceptions || !vcpu->arch.ceded) {
+ do_sleep = 0;
+ break;
+ }
+ }
+
+ if (!do_sleep) {
+ finish_wait(&vc->wq, &wait);
+ return;
+ }
+
vc->vcore_state = VCORE_SLEEPING;
+ trace_kvmppc_vcore_blocked(vc, 0);
spin_unlock(&vc->lock);
schedule();
finish_wait(&vc->wq, &wait);
spin_lock(&vc->lock);
vc->vcore_state = VCORE_INACTIVE;
+ trace_kvmppc_vcore_blocked(vc, 1);
}
static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
@@ -1843,6 +1912,8 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
struct kvmppc_vcore *vc;
struct kvm_vcpu *v, *vn;
+ trace_kvmppc_run_vcpu_enter(vcpu);
+
kvm_run->exit_reason = 0;
vcpu->arch.ret = RESUME_GUEST;
vcpu->arch.trap = 0;
@@ -1872,6 +1943,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
VCORE_EXIT_COUNT(vc) == 0) {
kvmppc_create_dtl_entry(vcpu, vc);
kvmppc_start_thread(vcpu);
+ trace_kvm_guest_enter(vcpu);
} else if (vc->vcore_state == VCORE_SLEEPING) {
wake_up(&vc->wq);
}
@@ -1936,6 +2008,7 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
wake_up(&v->arch.cpu_run);
}
+ trace_kvmppc_run_vcpu_exit(vcpu, kvm_run);
spin_unlock(&vc->lock);
return vcpu->arch.ret;
}
@@ -1962,7 +2035,7 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
smp_mb();
- /* On the first time here, set up HTAB and VRMA or RMA */
+ /* On the first time here, set up HTAB and VRMA */
if (!vcpu->kvm->arch.rma_setup_done) {
r = kvmppc_hv_setup_htab_rma(vcpu);
if (r)
@@ -1981,7 +2054,9 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
if (run->exit_reason == KVM_EXIT_PAPR_HCALL &&
!(vcpu->arch.shregs.msr & MSR_PR)) {
+ trace_kvm_hcall_enter(vcpu);
r = kvmppc_pseries_do_hcall(vcpu);
+ trace_kvm_hcall_exit(vcpu, r);
kvmppc_core_prepare_to_enter(vcpu);
} else if (r == RESUME_PAGE_FAULT) {
srcu_idx = srcu_read_lock(&vcpu->kvm->srcu);
@@ -1997,98 +2072,6 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
return r;
}
-
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
- Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
- switch (rma_size) {
- case 32ul << 20: /* 32 MB */
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return 8; /* only supported on POWER7 */
- return -1;
- case 64ul << 20: /* 64 MB */
- return 3;
- case 128ul << 20: /* 128 MB */
- return 7;
- case 256ul << 20: /* 256 MB */
- return 4;
- case 1ul << 30: /* 1 GB */
- return 2;
- case 16ul << 30: /* 16 GB */
- return 1;
- case 256ul << 30: /* 256 GB */
- return 0;
- default:
- return -1;
- }
-}
-
-static int kvm_rma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- struct page *page;
- struct kvm_rma_info *ri = vma->vm_file->private_data;
-
- if (vmf->pgoff >= kvm_rma_pages)
- return VM_FAULT_SIGBUS;
-
- page = pfn_to_page(ri->base_pfn + vmf->pgoff);
- get_page(page);
- vmf->page = page;
- return 0;
-}
-
-static const struct vm_operations_struct kvm_rma_vm_ops = {
- .fault = kvm_rma_fault,
-};
-
-static int kvm_rma_mmap(struct file *file, struct vm_area_struct *vma)
-{
- vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
- vma->vm_ops = &kvm_rma_vm_ops;
- return 0;
-}
-
-static int kvm_rma_release(struct inode *inode, struct file *filp)
-{
- struct kvm_rma_info *ri = filp->private_data;
-
- kvm_release_rma(ri);
- return 0;
-}
-
-static const struct file_operations kvm_rma_fops = {
- .mmap = kvm_rma_mmap,
- .release = kvm_rma_release,
-};
-
-static long kvm_vm_ioctl_allocate_rma(struct kvm *kvm,
- struct kvm_allocate_rma *ret)
-{
- long fd;
- struct kvm_rma_info *ri;
- /*
- * Only do this on PPC970 in HV mode
- */
- if (!cpu_has_feature(CPU_FTR_HVMODE) ||
- !cpu_has_feature(CPU_FTR_ARCH_201))
- return -EINVAL;
-
- if (!kvm_rma_pages)
- return -EINVAL;
-
- ri = kvm_alloc_rma();
- if (!ri)
- return -ENOMEM;
-
- fd = anon_inode_getfd("kvm-rma", &kvm_rma_fops, ri, O_RDWR | O_CLOEXEC);
- if (fd < 0)
- kvm_release_rma(ri);
-
- ret->rma_size = kvm_rma_pages << PAGE_SHIFT;
- return fd;
-}
-
static void kvmppc_add_seg_page_size(struct kvm_ppc_one_seg_page_size **sps,
int linux_psize)
{
@@ -2167,26 +2150,6 @@ out:
return r;
}
-static void unpin_slot(struct kvm_memory_slot *memslot)
-{
- unsigned long *physp;
- unsigned long j, npages, pfn;
- struct page *page;
-
- physp = memslot->arch.slot_phys;
- npages = memslot->npages;
- if (!physp)
- return;
- for (j = 0; j < npages; j++) {
- if (!(physp[j] & KVMPPC_GOT_PAGE))
- continue;
- pfn = physp[j] >> PAGE_SHIFT;
- page = pfn_to_page(pfn);
- SetPageDirty(page);
- put_page(page);
- }
-}
-
static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
struct kvm_memory_slot *dont)
{
@@ -2194,11 +2157,6 @@ static void kvmppc_core_free_memslot_hv(struct kvm_memory_slot *free,
vfree(free->arch.rmap);
free->arch.rmap = NULL;
}
- if (!dont || free->arch.slot_phys != dont->arch.slot_phys) {
- unpin_slot(free);
- vfree(free->arch.slot_phys);
- free->arch.slot_phys = NULL;
- }
}
static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
@@ -2207,7 +2165,6 @@ static int kvmppc_core_create_memslot_hv(struct kvm_memory_slot *slot,
slot->arch.rmap = vzalloc(npages * sizeof(*slot->arch.rmap));
if (!slot->arch.rmap)
return -ENOMEM;
- slot->arch.slot_phys = NULL;
return 0;
}
@@ -2216,17 +2173,6 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm,
struct kvm_memory_slot *memslot,
struct kvm_userspace_memory_region *mem)
{
- unsigned long *phys;
-
- /* Allocate a slot_phys array if needed */
- phys = memslot->arch.slot_phys;
- if (!kvm->arch.using_mmu_notifiers && !phys && memslot->npages) {
- phys = vzalloc(memslot->npages * sizeof(unsigned long));
- if (!phys)
- return -ENOMEM;
- memslot->arch.slot_phys = phys;
- }
-
return 0;
}
@@ -2284,17 +2230,11 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
- struct kvm_rma_info *ri = NULL;
unsigned long hva;
struct kvm_memory_slot *memslot;
struct vm_area_struct *vma;
unsigned long lpcr = 0, senc;
- unsigned long lpcr_mask = 0;
unsigned long psize, porder;
- unsigned long rma_size;
- unsigned long rmls;
- unsigned long *physp;
- unsigned long i, npages;
int srcu_idx;
mutex_lock(&kvm->lock);
@@ -2329,88 +2269,25 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
psize = vma_kernel_pagesize(vma);
porder = __ilog2(psize);
- /* Is this one of our preallocated RMAs? */
- if (vma->vm_file && vma->vm_file->f_op == &kvm_rma_fops &&
- hva == vma->vm_start)
- ri = vma->vm_file->private_data;
-
up_read(&current->mm->mmap_sem);
- if (!ri) {
- /* On POWER7, use VRMA; on PPC970, give up */
- err = -EPERM;
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- pr_err("KVM: CPU requires an RMO\n");
- goto out_srcu;
- }
+ /* We can handle 4k, 64k or 16M pages in the VRMA */
+ err = -EINVAL;
+ if (!(psize == 0x1000 || psize == 0x10000 ||
+ psize == 0x1000000))
+ goto out_srcu;
- /* We can handle 4k, 64k or 16M pages in the VRMA */
- err = -EINVAL;
- if (!(psize == 0x1000 || psize == 0x10000 ||
- psize == 0x1000000))
- goto out_srcu;
+ /* Update VRMASD field in the LPCR */
+ senc = slb_pgsize_encoding(psize);
+ kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* the -4 is to account for senc values starting at 0x10 */
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
- /* Update VRMASD field in the LPCR */
- senc = slb_pgsize_encoding(psize);
- kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
- (VRMA_VSID << SLB_VSID_SHIFT_1T);
- lpcr_mask = LPCR_VRMASD;
- /* the -4 is to account for senc values starting at 0x10 */
- lpcr = senc << (LPCR_VRMASD_SH - 4);
+ /* Create HPTEs in the hash page table for the VRMA */
+ kvmppc_map_vrma(vcpu, memslot, porder);
- /* Create HPTEs in the hash page table for the VRMA */
- kvmppc_map_vrma(vcpu, memslot, porder);
-
- } else {
- /* Set up to use an RMO region */
- rma_size = kvm_rma_pages;
- if (rma_size > memslot->npages)
- rma_size = memslot->npages;
- rma_size <<= PAGE_SHIFT;
- rmls = lpcr_rmls(rma_size);
- err = -EINVAL;
- if ((long)rmls < 0) {
- pr_err("KVM: Can't use RMA of 0x%lx bytes\n", rma_size);
- goto out_srcu;
- }
- atomic_inc(&ri->use_count);
- kvm->arch.rma = ri;
-
- /* Update LPCR and RMOR */
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970; insert RMLS value (split field) in HID4 */
- lpcr_mask = (1ul << HID4_RMLS0_SH) |
- (3ul << HID4_RMLS2_SH) | HID4_RMOR;
- lpcr = ((rmls >> 2) << HID4_RMLS0_SH) |
- ((rmls & 3) << HID4_RMLS2_SH);
- /* RMOR is also in HID4 */
- lpcr |= ((ri->base_pfn >> (26 - PAGE_SHIFT)) & 0xffff)
- << HID4_RMOR_SH;
- } else {
- /* POWER7 */
- lpcr_mask = LPCR_VPM0 | LPCR_VRMA_L | LPCR_RMLS;
- lpcr = rmls << LPCR_RMLS_SH;
- kvm->arch.rmor = ri->base_pfn << PAGE_SHIFT;
- }
- pr_info("KVM: Using RMO at %lx size %lx (LPCR = %lx)\n",
- ri->base_pfn << PAGE_SHIFT, rma_size, lpcr);
-
- /* Initialize phys addrs of pages in RMO */
- npages = kvm_rma_pages;
- porder = __ilog2(npages);
- physp = memslot->arch.slot_phys;
- if (physp) {
- if (npages > memslot->npages)
- npages = memslot->npages;
- spin_lock(&kvm->arch.slot_phys_lock);
- for (i = 0; i < npages; ++i)
- physp[i] = ((ri->base_pfn + i) << PAGE_SHIFT) +
- porder;
- spin_unlock(&kvm->arch.slot_phys_lock);
- }
- }
-
- kvmppc_update_lpcr(kvm, lpcr, lpcr_mask);
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
/* Order updates to kvm->arch.lpcr etc. vs. rma_setup_done */
smp_wmb();
@@ -2449,35 +2326,21 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
sizeof(kvm->arch.enabled_hcalls));
- kvm->arch.rma = NULL;
-
kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970; HID4 is effectively the LPCR */
- kvm->arch.host_lpid = 0;
- kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
- lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));
- lpcr |= ((lpid >> 4) << HID4_LPID1_SH) |
- ((lpid & 0xf) << HID4_LPID5_SH);
- } else {
- /* POWER7; init LPCR for virtual RMA mode */
- kvm->arch.host_lpid = mfspr(SPRN_LPID);
- kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
- lpcr &= LPCR_PECE | LPCR_LPES;
- lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
- LPCR_VPM0 | LPCR_VPM1;
- kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
- (VRMA_VSID << SLB_VSID_SHIFT_1T);
- /* On POWER8 turn on online bit to enable PURR/SPURR */
- if (cpu_has_feature(CPU_FTR_ARCH_207S))
- lpcr |= LPCR_ONL;
- }
+ /* Init LPCR for virtual RMA mode */
+ kvm->arch.host_lpid = mfspr(SPRN_LPID);
+ kvm->arch.host_lpcr = lpcr = mfspr(SPRN_LPCR);
+ lpcr &= LPCR_PECE | LPCR_LPES;
+ lpcr |= (4UL << LPCR_DPFD_SH) | LPCR_HDICE |
+ LPCR_VPM0 | LPCR_VPM1;
+ kvm->arch.vrma_slb_v = SLB_VSID_B_1T |
+ (VRMA_VSID << SLB_VSID_SHIFT_1T);
+ /* On POWER8 turn on online bit to enable PURR/SPURR */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ lpcr |= LPCR_ONL;
kvm->arch.lpcr = lpcr;
- kvm->arch.using_mmu_notifiers = !!cpu_has_feature(CPU_FTR_ARCH_206);
- spin_lock_init(&kvm->arch.slot_phys_lock);
-
/*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
@@ -2507,10 +2370,6 @@ static void kvmppc_core_destroy_vm_hv(struct kvm *kvm)
kvm_hv_vm_deactivated();
kvmppc_free_vcores(kvm);
- if (kvm->arch.rma) {
- kvm_release_rma(kvm->arch.rma);
- kvm->arch.rma = NULL;
- }
kvmppc_free_hpt(kvm);
}
@@ -2536,7 +2395,8 @@ static int kvmppc_core_emulate_mfspr_hv(struct kvm_vcpu *vcpu, int sprn,
static int kvmppc_core_check_processor_compat_hv(void)
{
- if (!cpu_has_feature(CPU_FTR_HVMODE))
+ if (!cpu_has_feature(CPU_FTR_HVMODE) ||
+ !cpu_has_feature(CPU_FTR_ARCH_206))
return -EIO;
return 0;
}
@@ -2550,16 +2410,6 @@ static long kvm_arch_vm_ioctl_hv(struct file *filp,
switch (ioctl) {
- case KVM_ALLOCATE_RMA: {
- struct kvm_allocate_rma rma;
- struct kvm *kvm = filp->private_data;
-
- r = kvm_vm_ioctl_allocate_rma(kvm, &rma);
- if (r >= 0 && copy_to_user(argp, &rma, sizeof(rma)))
- r = -EFAULT;
- break;
- }
-
case KVM_PPC_ALLOCATE_HTAB: {
u32 htab_order;
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 3f1bb5a36c27..1f083ff8a61a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -16,6 +16,7 @@
#include <linux/memblock.h>
#include <linux/sizes.h>
#include <linux/cma.h>
+#include <linux/bitops.h>
#include <asm/cputable.h>
#include <asm/kvm_ppc.h>
@@ -32,95 +33,9 @@
* By default we reserve 5% of memory for hash pagetable allocation.
*/
static unsigned long kvm_cma_resv_ratio = 5;
-/*
- * We allocate RMAs (real mode areas) for KVM guests from the KVM CMA area.
- * Each RMA has to be physically contiguous and of a size that the
- * hardware supports. PPC970 and POWER7 support 64MB, 128MB and 256MB,
- * and other larger sizes. Since we are unlikely to be allocate that
- * much physically contiguous memory after the system is up and running,
- * we preallocate a set of RMAs in early boot using CMA.
- * should be power of 2.
- */
-unsigned long kvm_rma_pages = (1 << 27) >> PAGE_SHIFT; /* 128MB */
-EXPORT_SYMBOL_GPL(kvm_rma_pages);
static struct cma *kvm_cma;
-/* Work out RMLS (real mode limit selector) field value for a given RMA size.
- Assumes POWER7 or PPC970. */
-static inline int lpcr_rmls(unsigned long rma_size)
-{
- switch (rma_size) {
- case 32ul << 20: /* 32 MB */
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return 8; /* only supported on POWER7 */
- return -1;
- case 64ul << 20: /* 64 MB */
- return 3;
- case 128ul << 20: /* 128 MB */
- return 7;
- case 256ul << 20: /* 256 MB */
- return 4;
- case 1ul << 30: /* 1 GB */
- return 2;
- case 16ul << 30: /* 16 GB */
- return 1;
- case 256ul << 30: /* 256 GB */
- return 0;
- default:
- return -1;
- }
-}
-
-static int __init early_parse_rma_size(char *p)
-{
- unsigned long kvm_rma_size;
-
- pr_debug("%s(%s)\n", __func__, p);
- if (!p)
- return -EINVAL;
- kvm_rma_size = memparse(p, &p);
- /*
- * Check that the requested size is one supported in hardware
- */
- if (lpcr_rmls(kvm_rma_size) < 0) {
- pr_err("RMA size of 0x%lx not supported\n", kvm_rma_size);
- return -EINVAL;
- }
- kvm_rma_pages = kvm_rma_size >> PAGE_SHIFT;
- return 0;
-}
-early_param("kvm_rma_size", early_parse_rma_size);
-
-struct kvm_rma_info *kvm_alloc_rma()
-{
- struct page *page;
- struct kvm_rma_info *ri;
-
- ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
- if (!ri)
- return NULL;
- page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
- if (!page)
- goto err_out;
- atomic_set(&ri->use_count, 1);
- ri->base_pfn = page_to_pfn(page);
- return ri;
-err_out:
- kfree(ri);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(kvm_alloc_rma);
-
-void kvm_release_rma(struct kvm_rma_info *ri)
-{
- if (atomic_dec_and_test(&ri->use_count)) {
- cma_release(kvm_cma, pfn_to_page(ri->base_pfn), kvm_rma_pages);
- kfree(ri);
- }
-}
-EXPORT_SYMBOL_GPL(kvm_release_rma);
-
static int __init early_parse_kvm_cma_resv(char *p)
{
pr_debug("%s(%s)\n", __func__, p);
@@ -132,14 +47,9 @@ early_param("kvm_cma_resv_ratio", early_parse_kvm_cma_resv);
struct page *kvm_alloc_hpt(unsigned long nr_pages)
{
- unsigned long align_pages = HPT_ALIGN_PAGES;
-
VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
- /* Old CPUs require HPT aligned on a multiple of its size */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- align_pages = nr_pages;
- return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
+ return cma_alloc(kvm_cma, nr_pages, order_base_2(HPT_ALIGN_PAGES));
}
EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
@@ -180,22 +90,44 @@ void __init kvm_cma_reserve(void)
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
(unsigned long)selected_size / SZ_1M);
- /*
- * Old CPUs require HPT aligned on a multiple of its size. So for them
- * make the alignment as max size we could request.
- */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- align_size = __rounddown_pow_of_two(selected_size);
- else
- align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
-
- align_size = max(kvm_rma_pages << PAGE_SHIFT, align_size);
+ align_size = HPT_ALIGN_PAGES << PAGE_SHIFT;
cma_declare_contiguous(0, selected_size, 0, align_size,
KVM_CMA_CHUNK_ORDER - PAGE_SHIFT, false, &kvm_cma);
}
}
/*
+ * Real-mode H_CONFER implementation.
+ * We check if we are the only vcpu out of this virtual core
+ * still running in the guest and not ceded. If so, we pop up
+ * to the virtual-mode implementation; if not, just return to
+ * the guest.
+ */
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+ unsigned int yield_count)
+{
+ struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ int threads_running;
+ int threads_ceded;
+ int threads_conferring;
+ u64 stop = get_tb() + 10 * tb_ticks_per_usec;
+ int rv = H_SUCCESS; /* => don't yield */
+
+ set_bit(vcpu->arch.ptid, &vc->conferring_threads);
+ while ((get_tb() < stop) && (VCORE_EXIT_COUNT(vc) == 0)) {
+ threads_running = VCORE_ENTRY_COUNT(vc);
+ threads_ceded = hweight32(vc->napping_threads);
+ threads_conferring = hweight32(vc->conferring_threads);
+ if (threads_ceded + threads_conferring >= threads_running) {
+ rv = H_TOO_HARD; /* => do yield */
+ break;
+ }
+ }
+ clear_bit(vcpu->arch.ptid, &vc->conferring_threads);
+ return rv;
+}
+
+/*
* When running HV mode KVM we need to block certain operations while KVM VMs
* exist in the system. We use a counter of VMs to track this.
*
diff --git a/arch/powerpc/kvm/book3s_hv_interrupts.S b/arch/powerpc/kvm/book3s_hv_interrupts.S
index 731be7478b27..36540a99d178 100644
--- a/arch/powerpc/kvm/book3s_hv_interrupts.S
+++ b/arch/powerpc/kvm/book3s_hv_interrupts.S
@@ -52,10 +52,8 @@ _GLOBAL(__kvmppc_vcore_entry)
std r3, _CCR(r1)
/* Save host DSCR */
-BEGIN_FTR_SECTION
mfspr r3, SPRN_DSCR
std r3, HSTATE_DSCR(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
/* Save host DABR */
@@ -84,11 +82,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r7, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable interrupts */
mfspr r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
- /* On P7, clear MMCRA in order to disable SDAR updates */
+ /* Clear MMCRA in order to disable SDAR updates */
li r5, 0
mtspr SPRN_MMCRA, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */
lbz r5, LPPACA_PMCINUSE(r3)
@@ -113,20 +109,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r7, SPRN_PMC4
mfspr r8, SPRN_PMC5
mfspr r9, SPRN_PMC6
-BEGIN_FTR_SECTION
- mfspr r10, SPRN_PMC7
- mfspr r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
stw r3, HSTATE_PMC(r13)
stw r5, HSTATE_PMC + 4(r13)
stw r6, HSTATE_PMC + 8(r13)
stw r7, HSTATE_PMC + 12(r13)
stw r8, HSTATE_PMC + 16(r13)
stw r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
- stw r10, HSTATE_PMC + 24(r13)
- stw r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
31:
/*
@@ -140,31 +128,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)
-#ifdef CONFIG_SMP
- /*
- * On PPC970, if the guest vcpu has an external interrupt pending,
- * send ourselves an IPI so as to interrupt the guest once it
- * enables interrupts. (It must have interrupts disabled,
- * otherwise we would already have delivered the interrupt.)
- *
- * XXX If this is a UP build, smp_send_reschedule is not available,
- * so the interrupt will be delayed until the next time the vcpu
- * enters the guest with interrupts enabled.
- */
-BEGIN_FTR_SECTION
- ld r4, HSTATE_KVM_VCPU(r13)
- ld r0, VCPU_PENDING_EXC(r4)
- li r7, (1 << BOOK3S_IRQPRIO_EXTERNAL)
- oris r7, r7, (1 << BOOK3S_IRQPRIO_EXTERNAL_LEVEL)@h
- and. r0, r0, r7
- beq 32f
- lhz r3, PACAPACAINDEX(r13)
- bl smp_send_reschedule
- nop
-32:
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-#endif /* CONFIG_SMP */
-
/* Jump to partition switch code */
bl kvmppc_hv_entry_trampoline
nop
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index d562c8e2bc30..60081bd75847 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -138,8 +138,5 @@ out:
long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu)
{
- if (cpu_has_feature(CPU_FTR_ARCH_206))
- return kvmppc_realmode_mc_power7(vcpu);
-
- return 0;
+ return kvmppc_realmode_mc_power7(vcpu);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 084ad54c73cd..510bdfbc4073 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -45,16 +45,12 @@ static int global_invalidates(struct kvm *kvm, unsigned long flags)
* as indicated by local_paca->kvm_hstate.kvm_vcpu being set,
* we can use tlbiel as long as we mark all other physical
* cores as potentially having stale TLB entries for this lpid.
- * If we're not using MMU notifiers, we never take pages away
- * from the guest, so we can use tlbiel if requested.
* Otherwise, don't use tlbiel.
*/
if (kvm->arch.online_vcores == 1 && local_paca->kvm_hstate.kvm_vcpu)
global = 0;
- else if (kvm->arch.using_mmu_notifiers)
- global = 1;
else
- global = !(flags & H_LOCAL);
+ global = 1;
if (!global) {
/* any other core might now have stale TLB entries... */
@@ -170,7 +166,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
struct revmap_entry *rev;
unsigned long g_ptel;
struct kvm_memory_slot *memslot;
- unsigned long *physp, pte_size;
+ unsigned long pte_size;
unsigned long is_io;
unsigned long *rmap;
pte_t pte;
@@ -198,9 +194,6 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
is_io = ~0ul;
rmap = NULL;
if (!(memslot && !(memslot->flags & KVM_MEMSLOT_INVALID))) {
- /* PPC970 can't do emulated MMIO */
- if (!cpu_has_feature(CPU_FTR_ARCH_206))
- return H_PARAMETER;
/* Emulated MMIO - mark this with key=31 */
pteh |= HPTE_V_ABSENT;
ptel |= HPTE_R_KEY_HI | HPTE_R_KEY_LO;
@@ -213,37 +206,20 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
slot_fn = gfn - memslot->base_gfn;
rmap = &memslot->arch.rmap[slot_fn];
- if (!kvm->arch.using_mmu_notifiers) {
- physp = memslot->arch.slot_phys;
- if (!physp)
- return H_PARAMETER;
- physp += slot_fn;
- if (realmode)
- physp = real_vmalloc_addr(physp);
- pa = *physp;
- if (!pa)
- return H_TOO_HARD;
- is_io = pa & (HPTE_R_I | HPTE_R_W);
- pte_size = PAGE_SIZE << (pa & KVMPPC_PAGE_ORDER_MASK);
- pa &= PAGE_MASK;
+ /* Translate to host virtual address */
+ hva = __gfn_to_hva_memslot(memslot, gfn);
+
+ /* Look up the Linux PTE for the backing page */
+ pte_size = psize;
+ pte = lookup_linux_pte_and_update(pgdir, hva, writing, &pte_size);
+ if (pte_present(pte) && !pte_numa(pte)) {
+ if (writing && !pte_write(pte))
+ /* make the actual HPTE be read-only */
+ ptel = hpte_make_readonly(ptel);
+ is_io = hpte_cache_bits(pte_val(pte));
+ pa = pte_pfn(pte) << PAGE_SHIFT;
+ pa |= hva & (pte_size - 1);
pa |= gpa & ~PAGE_MASK;
- } else {
- /* Translate to host virtual address */
- hva = __gfn_to_hva_memslot(memslot, gfn);
-
- /* Look up the Linux PTE for the backing page */
- pte_size = psize;
- pte = lookup_linux_pte_and_update(pgdir, hva, writing,
- &pte_size);
- if (pte_present(pte) && !pte_numa(pte)) {
- if (writing && !pte_write(pte))
- /* make the actual HPTE be read-only */
- ptel = hpte_make_readonly(ptel);
- is_io = hpte_cache_bits(pte_val(pte));
- pa = pte_pfn(pte) << PAGE_SHIFT;
- pa |= hva & (pte_size - 1);
- pa |= gpa & ~PAGE_MASK;
- }
}
if (pte_size < psize)
@@ -337,8 +313,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
rmap = real_vmalloc_addr(rmap);
lock_rmap(rmap);
/* Check for pending invalidations under the rmap chain lock */
- if (kvm->arch.using_mmu_notifiers &&
- mmu_notifier_retry(kvm, mmu_seq)) {
+ if (mmu_notifier_retry(kvm, mmu_seq)) {
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
@@ -395,61 +370,11 @@ static inline int try_lock_tlbie(unsigned int *lock)
return old == 0;
}
-/*
- * tlbie/tlbiel is a bit different on the PPC970 compared to later
- * processors such as POWER7; the large page bit is in the instruction
- * not RB, and the top 16 bits and the bottom 12 bits of the VA
- * in RB must be 0.
- */
-static void do_tlbies_970(struct kvm *kvm, unsigned long *rbvalues,
- long npages, int global, bool need_sync)
-{
- long i;
-
- if (global) {
- while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
- cpu_relax();
- if (need_sync)
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i) {
- unsigned long rb = rbvalues[i];
-
- if (rb & 1) /* large page */
- asm volatile("tlbie %0,1" : :
- "r" (rb & 0x0000fffffffff000ul));
- else
- asm volatile("tlbie %0,0" : :
- "r" (rb & 0x0000fffffffff000ul));
- }
- asm volatile("eieio; tlbsync; ptesync" : : : "memory");
- kvm->arch.tlbie_lock = 0;
- } else {
- if (need_sync)
- asm volatile("ptesync" : : : "memory");
- for (i = 0; i < npages; ++i) {
- unsigned long rb = rbvalues[i];
-
- if (rb & 1) /* large page */
- asm volatile("tlbiel %0,1" : :
- "r" (rb & 0x0000fffffffff000ul));
- else
- asm volatile("tlbiel %0,0" : :
- "r" (rb & 0x0000fffffffff000ul));
- }
- asm volatile("ptesync" : : : "memory");
- }
-}
-
static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
long npages, int global, bool need_sync)
{
long i;
- if (cpu_has_feature(CPU_FTR_ARCH_201)) {
- /* PPC970 tlbie instruction is a bit different */
- do_tlbies_970(kvm, rbvalues, npages, global, need_sync);
- return;
- }
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
@@ -667,40 +592,29 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
rev->guest_rpte = r;
note_hpte_modification(kvm, rev);
}
- r = (be64_to_cpu(hpte[1]) & ~mask) | bits;
/* Update HPTE */
if (v & HPTE_V_VALID) {
- rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = cpu_to_be64(v & ~HPTE_V_VALID);
- do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
- * If the host has this page as readonly but the guest
- * wants to make it read/write, reduce the permissions.
- * Checking the host permissions involves finding the
- * memslot and then the Linux PTE for the page.
+ * If the page is valid, don't let it transition from
+ * readonly to writable. If it should be writable, we'll
+ * take a trap and let the page fault code sort it out.
*/
- if (hpte_is_writable(r) && kvm->arch.using_mmu_notifiers) {
- unsigned long psize, gfn, hva;
- struct kvm_memory_slot *memslot;
- pgd_t *pgdir = vcpu->arch.pgdir;
- pte_t pte;
-
- psize = hpte_page_size(v, r);
- gfn = ((r & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT;
- memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
- if (memslot) {
- hva = __gfn_to_hva_memslot(memslot, gfn);
- pte = lookup_linux_pte_and_update(pgdir, hva,
- 1, &psize);
- if (pte_present(pte) && !pte_write(pte))
- r = hpte_make_readonly(r);
- }
+ pte = be64_to_cpu(hpte[1]);
+ r = (pte & ~mask) | bits;
+ if (hpte_is_writable(r) && !hpte_is_writable(pte))
+ r = hpte_make_readonly(r);
+ /* If the PTE is changing, invalidate it first */
+ if (r != pte) {
+ rb = compute_tlbie_rb(v, r, pte_index);
+ hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+ HPTE_V_ABSENT);
+ do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
+ true);
+ hpte[1] = cpu_to_be64(r);
}
}
- hpte[1] = cpu_to_be64(r);
- eieio();
- hpte[0] = cpu_to_be64(v & ~HPTE_V_HVLOCK);
+ unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory");
return H_SUCCESS;
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 3ee38e6e884f..7b066f6b02ad 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -183,8 +183,10 @@ static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
* state update in HW (ie bus transactions) so we can handle them
* separately here as well.
*/
- if (resend)
+ if (resend) {
icp->rm_action |= XICS_RM_CHECK_RESEND;
+ icp->rm_resend_icp = icp;
+ }
}
@@ -254,10 +256,25 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* nothing needs to be done as there can be no XISR to
* reject.
*
+ * ICP state: Check_IPI
+ *
* If the CPPR is less favored, then we might be replacing
- * an interrupt, and thus need to possibly reject it as in
+ * an interrupt, and thus need to possibly reject it.
*
- * ICP state: Check_IPI
+ * ICP State: IPI
+ *
+ * Besides rejecting any pending interrupts, we also
+ * update XISR and pending_pri to mark IPI as pending.
+ *
+ * PAPR does not describe this state, but if the MFRR is being
+ * made less favored than its earlier value, there might be
+ * a previously-rejected interrupt needing to be resent.
+ * Ideally, we would want to resend only if
+ * prio(pending_interrupt) < mfrr &&
+ * prio(pending_interrupt) < cppr
+ * where pending interrupt is the one that was rejected. But
+ * we don't have that state, so we simply trigger a resend
+ * whenever the MFRR is made less favored.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
@@ -270,13 +287,14 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
- if (mfrr <= new_state.pending_pri)
+ if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
- new_state.pending_pri = mfrr;
- new_state.xisr = XICS_IPI;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
}
- if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
@@ -289,8 +307,10 @@ int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
}
/* Pass resends to virtual mode */
- if (resend)
+ if (resend) {
this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+ this_icp->rm_resend_icp = icp;
+ }
return check_too_hard(xics, this_icp);
}
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index 65c105b17a25..10554df13852 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -94,20 +94,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
lwz r6, HSTATE_PMC + 12(r13)
lwz r8, HSTATE_PMC + 16(r13)
lwz r9, HSTATE_PMC + 20(r13)
-BEGIN_FTR_SECTION
- lwz r10, HSTATE_PMC + 24(r13)
- lwz r11, HSTATE_PMC + 28(r13)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_PMC1, r3
mtspr SPRN_PMC2, r4
mtspr SPRN_PMC3, r5
mtspr SPRN_PMC4, r6
mtspr SPRN_PMC5, r8
mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
ld r3, HSTATE_MMCR(r13)
ld r4, HSTATE_MMCR + 8(r13)
ld r5, HSTATE_MMCR + 16(r13)
@@ -153,11 +145,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
-BEGIN_FTR_SECTION
beq 11f
cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI
beq cr2, 14f /* HMI check */
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* RFI into the highmem handler, or branch to interrupt handler */
mfmsr r6
@@ -166,7 +156,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
mtmsrd r6, 1 /* Clear RI in MSR */
mtsrr0 r8
mtsrr1 r7
- beqa 0x500 /* external interrupt (PPC970) */
beq cr1, 13f /* machine check */
RFI
@@ -393,11 +382,8 @@ kvmppc_hv_entry:
slbia
ptesync
-BEGIN_FTR_SECTION
- b 30f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
- * POWER7 host -> guest partition switch code.
+ * POWER7/POWER8 host -> guest partition switch code.
* We don't have to lock against concurrent tlbies,
* but we do have to coordinate across hardware threads.
*/
@@ -505,97 +491,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
cmpwi r3,512 /* 1 microsecond */
li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
blt hdec_soon
- b 31f
-
- /*
- * PPC970 host -> guest partition switch code.
- * We have to lock against concurrent tlbies,
- * using native_tlbie_lock to lock against host tlbies
- * and kvm->arch.tlbie_lock to lock against guest tlbies.
- * We also have to invalidate the TLB since its
- * entries aren't tagged with the LPID.
- */
-30: ld r5,HSTATE_KVM_VCORE(r13)
- ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
-
- /* first take native_tlbie_lock */
- .section ".toc","aw"
-toc_tlbie_lock:
- .tc native_tlbie_lock[TC],native_tlbie_lock
- .previous
- ld r3,toc_tlbie_lock@toc(r2)
-#ifdef __BIG_ENDIAN__
- lwz r8,PACA_LOCK_TOKEN(r13)
-#else
- lwz r8,PACAPACAINDEX(r13)
-#endif
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
-
- ld r5,HSTATE_KVM_VCORE(r13)
- ld r7,VCORE_LPCR(r5) /* use vcore->lpcr to store HID4 */
- li r0,0x18f
- rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
- or r0,r7,r0
- ptesync
- sync
- mtspr SPRN_HID4,r0 /* switch to reserved LPID */
- isync
- li r0,0
- stw r0,0(r3) /* drop native_tlbie_lock */
-
- /* invalidate the whole TLB */
- li r0,256
- mtctr r0
- li r6,0
-25: tlbiel r6
- addi r6,r6,0x1000
- bdnz 25b
- ptesync
- /* Take the guest's tlbie_lock */
- addi r3,r9,KVM_TLBIE_LOCK
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
- ld r6,KVM_SDR1(r9)
- mtspr SPRN_SDR1,r6 /* switch to partition page table */
-
- /* Set up HID4 with the guest's LPID etc. */
- sync
- mtspr SPRN_HID4,r7
- isync
-
- /* drop the guest's tlbie_lock */
- li r0,0
- stw r0,0(r3)
-
- /* Check if HDEC expires soon */
- mfspr r3,SPRN_HDEC
- cmpwi r3,10
- li r12,BOOK3S_INTERRUPT_HV_DECREMENTER
- blt hdec_soon
-
- /* Enable HDEC interrupts */
- mfspr r0,SPRN_HID0
- li r3,1
- rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
- sync
- mtspr SPRN_HID0,r0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
-31:
/* Do we have a guest vcpu to run? */
cmpdi r4, 0
beq kvmppc_primary_no_guest
@@ -625,7 +521,6 @@ kvmppc_got_guest:
stb r6, VCPU_VPA_DIRTY(r4)
25:
-BEGIN_FTR_SECTION
/* Save purr/spurr */
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
@@ -635,7 +530,6 @@ BEGIN_FTR_SECTION
ld r8,VCPU_SPURR(r4)
mtspr SPRN_PURR,r7
mtspr SPRN_SPURR,r8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
/* Set partition DABR */
@@ -644,9 +538,7 @@ BEGIN_FTR_SECTION
ld r6,VCPU_DABR(r4)
mtspr SPRN_DABRX,r5
mtspr SPRN_DABR,r6
- BEGIN_FTR_SECTION_NESTED(89)
isync
- END_FTR_SECTION_NESTED(CPU_FTR_ARCH_206, CPU_FTR_ARCH_206, 89)
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
@@ -777,20 +669,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
lwz r7, VCPU_PMC + 12(r4)
lwz r8, VCPU_PMC + 16(r4)
lwz r9, VCPU_PMC + 20(r4)
-BEGIN_FTR_SECTION
- lwz r10, VCPU_PMC + 24(r4)
- lwz r11, VCPU_PMC + 28(r4)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
mtspr SPRN_PMC1, r3
mtspr SPRN_PMC2, r5
mtspr SPRN_PMC3, r6
mtspr SPRN_PMC4, r7
mtspr SPRN_PMC5, r8
mtspr SPRN_PMC6, r9
-BEGIN_FTR_SECTION
- mtspr SPRN_PMC7, r10
- mtspr SPRN_PMC8, r11
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
ld r3, VCPU_MMCR(r4)
ld r5, VCPU_MMCR + 8(r4)
ld r6, VCPU_MMCR + 16(r4)
@@ -837,14 +721,12 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ld r30, VCPU_GPR(R30)(r4)
ld r31, VCPU_GPR(R31)(r4)
-BEGIN_FTR_SECTION
/* Switch DSCR to guest value */
ld r5, VCPU_DSCR(r4)
mtspr SPRN_DSCR, r5
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
BEGIN_FTR_SECTION
- /* Skip next section on POWER7 or PPC970 */
+ /* Skip next section on POWER7 */
b 8f
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
/* Turn on TM so we can access TFHAR/TFIAR/TEXASR */
@@ -920,7 +802,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_DAR, r5
mtspr SPRN_DSISR, r6
-BEGIN_FTR_SECTION
/* Restore AMR and UAMOR, set AMOR to all 1s */
ld r5,VCPU_AMR(r4)
ld r6,VCPU_UAMOR(r4)
@@ -928,7 +809,6 @@ BEGIN_FTR_SECTION
mtspr SPRN_AMR,r5
mtspr SPRN_UAMOR,r6
mtspr SPRN_AMOR,r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Restore state of CTRL run bit; assume 1 on entry */
lwz r5,VCPU_CTRL(r4)
@@ -963,13 +843,11 @@ deliver_guest_interrupt:
rldicl r0, r0, 64 - BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 63
cmpdi cr1, r0, 0
andi. r8, r11, MSR_EE
-BEGIN_FTR_SECTION
mfspr r8, SPRN_LPCR
/* Insert EXTERNAL_LEVEL bit into LPCR at the MER bit position */
rldimi r8, r0, LPCR_MER_SH, 63 - LPCR_MER_SH
mtspr SPRN_LPCR, r8
isync
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
beq 5f
li r0, BOOK3S_INTERRUPT_EXTERNAL
bne cr1, 12f
@@ -1124,15 +1002,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
stw r12,VCPU_TRAP(r9)
- /* Save HEIR (HV emulation assist reg) in last_inst
+ /* Save HEIR (HV emulation assist reg) in emul_inst
if this is an HEI (HV emulation interrupt, e40) */
li r3,KVM_INST_FETCH_FAILED
-BEGIN_FTR_SECTION
cmpwi r12,BOOK3S_INTERRUPT_H_EMUL_ASSIST
bne 11f
mfspr r3,SPRN_HEIR
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
-11: stw r3,VCPU_LAST_INST(r9)
+11: stw r3,VCPU_HEIR(r9)
/* these are volatile across C function calls */
mfctr r3
@@ -1140,13 +1016,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r3, VCPU_CTR(r9)
stw r4, VCPU_XER(r9)
-BEGIN_FTR_SECTION
/* If this is a page table miss then see if it's theirs or ours */
cmpwi r12, BOOK3S_INTERRUPT_H_DATA_STORAGE
beq kvmppc_hdsi
cmpwi r12, BOOK3S_INTERRUPT_H_INST_STORAGE
beq kvmppc_hisi
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* See if this is a leftover HDEC interrupt */
cmpwi r12,BOOK3S_INTERRUPT_HV_DECREMENTER
@@ -1159,11 +1033,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
cmpwi r12,BOOK3S_INTERRUPT_SYSCALL
beq hcall_try_real_mode
- /* Only handle external interrupts here on arch 206 and later */
-BEGIN_FTR_SECTION
- b ext_interrupt_to_host
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
-
/* External interrupt ? */
cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL
bne+ ext_interrupt_to_host
@@ -1193,11 +1062,9 @@ guest_exit_cont: /* r9 = vcpu, r12 = trap, r13 = paca */
mfdsisr r7
std r6, VCPU_DAR(r9)
stw r7, VCPU_DSISR(r9)
-BEGIN_FTR_SECTION
/* don't overwrite fault_dar/fault_dsisr if HDSI */
cmpwi r12,BOOK3S_INTERRUPT_H_DATA_STORAGE
beq 6f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
std r6, VCPU_FAULT_DAR(r9)
stw r7, VCPU_FAULT_DSISR(r9)
@@ -1236,7 +1103,6 @@ mc_cont:
/*
* Save the guest PURR/SPURR
*/
-BEGIN_FTR_SECTION
mfspr r5,SPRN_PURR
mfspr r6,SPRN_SPURR
ld r7,VCPU_PURR(r9)
@@ -1256,7 +1122,6 @@ BEGIN_FTR_SECTION
add r4,r4,r6
mtspr SPRN_PURR,r3
mtspr SPRN_SPURR,r4
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_201)
/* Save DEC */
mfspr r5,SPRN_DEC
@@ -1306,22 +1171,18 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
-BEGIN_FTR_SECTION
mfspr r5,SPRN_AMR
mfspr r6,SPRN_UAMOR
std r5,VCPU_AMR(r9)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Switch DSCR back to host value */
-BEGIN_FTR_SECTION
mfspr r8, SPRN_DSCR
ld r7, HSTATE_DSCR(r13)
std r8, VCPU_DSCR(r9)
mtspr SPRN_DSCR, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Save non-volatile GPRs */
std r14, VCPU_GPR(R14)(r9)
@@ -1503,11 +1364,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r4, SPRN_MMCR0 /* save MMCR0 */
mtspr SPRN_MMCR0, r3 /* freeze all counters, disable ints */
mfspr r6, SPRN_MMCRA
-BEGIN_FTR_SECTION
- /* On P7, clear MMCRA in order to disable SDAR updates */
+ /* Clear MMCRA in order to disable SDAR updates */
li r7, 0
mtspr SPRN_MMCRA, r7
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
isync
beq 21f /* if no VPA, save PMU stuff anyway */
lbz r7, LPPACA_PMCINUSE(r8)
@@ -1532,10 +1391,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mfspr r6, SPRN_PMC4
mfspr r7, SPRN_PMC5
mfspr r8, SPRN_PMC6
-BEGIN_FTR_SECTION
- mfspr r10, SPRN_PMC7
- mfspr r11, SPRN_PMC8
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
stw r3, VCPU_PMC(r9)
stw r4, VCPU_PMC + 4(r9)
stw r5, VCPU_PMC + 8(r9)
@@ -1543,10 +1398,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
stw r7, VCPU_PMC + 16(r9)
stw r8, VCPU_PMC + 20(r9)
BEGIN_FTR_SECTION
- stw r10, VCPU_PMC + 24(r9)
- stw r11, VCPU_PMC + 28(r9)
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
-BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
@@ -1566,11 +1417,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ptesync
hdec_soon: /* r12 = trap, r13 = paca */
-BEGIN_FTR_SECTION
- b 32f
-END_FTR_SECTION_IFSET(CPU_FTR_ARCH_201)
/*
- * POWER7 guest -> host partition switch code.
+ * POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
* have to coordinate the hardware threads.
*/
@@ -1698,87 +1546,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
16: ld r8,KVM_HOST_LPCR(r4)
mtspr SPRN_LPCR,r8
isync
- b 33f
-
- /*
- * PPC970 guest -> host partition switch code.
- * We have to lock against concurrent tlbies, and
- * we have to flush the whole TLB.
- */
-32: ld r5,HSTATE_KVM_VCORE(r13)
- ld r4,VCORE_KVM(r5) /* pointer to struct kvm */
-
- /* Take the guest's tlbie_lock */
-#ifdef __BIG_ENDIAN__
- lwz r8,PACA_LOCK_TOKEN(r13)
-#else
- lwz r8,PACAPACAINDEX(r13)
-#endif
- addi r3,r4,KVM_TLBIE_LOCK
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
-
- ld r7,KVM_HOST_LPCR(r4) /* use kvm->arch.host_lpcr for HID4 */
- li r0,0x18f
- rotldi r0,r0,HID4_LPID5_SH /* all lpid bits in HID4 = 1 */
- or r0,r7,r0
- ptesync
- sync
- mtspr SPRN_HID4,r0 /* switch to reserved LPID */
- isync
- li r0,0
- stw r0,0(r3) /* drop guest tlbie_lock */
-
- /* invalidate the whole TLB */
- li r0,256
- mtctr r0
- li r6,0
-25: tlbiel r6
- addi r6,r6,0x1000
- bdnz 25b
- ptesync
-
- /* take native_tlbie_lock */
- ld r3,toc_tlbie_lock@toc(2)
-24: lwarx r0,0,r3
- cmpwi r0,0
- bne 24b
- stwcx. r8,0,r3
- bne 24b
- isync
-
- ld r6,KVM_HOST_SDR1(r4)
- mtspr SPRN_SDR1,r6 /* switch to host page table */
-
- /* Set up host HID4 value */
- sync
- mtspr SPRN_HID4,r7
- isync
- li r0,0
- stw r0,0(r3) /* drop native_tlbie_lock */
-
- lis r8,0x7fff /* MAX_INT@h */
- mtspr SPRN_HDEC,r8
-
- /* Disable HDEC interrupts */
- mfspr r0,SPRN_HID0
- li r3,0
- rldimi r0,r3, HID0_HDICE_SH, 64-HID0_HDICE_SH-1
- sync
- mtspr SPRN_HID0,r0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
- mfspr r0,SPRN_HID0
/* load host SLB entries */
-33: ld r8,PACA_SLBSHADOWPTR(r13)
+ ld r8,PACA_SLBSHADOWPTR(r13)
.rept SLB_NUM_BOLTED
li r3, SLBSHADOW_SAVEAREA
@@ -2047,7 +1817,7 @@ hcall_real_table:
.long 0 /* 0xd8 */
.long 0 /* 0xdc */
.long DOTSYM(kvmppc_h_cede) - hcall_real_table
- .long 0 /* 0xe4 */
+ .long DOTSYM(kvmppc_rm_h_confer) - hcall_real_table
.long 0 /* 0xe8 */
.long 0 /* 0xec */
.long 0 /* 0xf0 */
@@ -2126,9 +1896,6 @@ _GLOBAL(kvmppc_h_cede)
stw r0,VCPU_TRAP(r3)
li r0,H_SUCCESS
std r0,VCPU_GPR(R3)(r3)
-BEGIN_FTR_SECTION
- b kvm_cede_exit /* just send it up to host on 970 */
-END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_206)
/*
* Set our bit in the bitmask of napping threads unless all the
@@ -2455,7 +2222,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
- isync
addi r3,r3,VCPU_FPRS
bl store_fp_state
#ifdef CONFIG_ALTIVEC
@@ -2491,7 +2257,6 @@ BEGIN_FTR_SECTION
END_FTR_SECTION_IFSET(CPU_FTR_VSX)
#endif
mtmsrd r8
- isync
addi r3,r4,VCPU_FPRS
bl load_fp_state
#ifdef CONFIG_ALTIVEC
diff --git a/arch/powerpc/kvm/book3s_paired_singles.c b/arch/powerpc/kvm/book3s_paired_singles.c
index bfb8035314e3..bd6ab1672ae6 100644
--- a/arch/powerpc/kvm/book3s_paired_singles.c
+++ b/arch/powerpc/kvm/book3s_paired_singles.c
@@ -352,14 +352,6 @@ static inline u32 inst_get_field(u32 inst, int msb, int lsb)
return kvmppc_get_field(inst, msb + 32, lsb + 32);
}
-/*
- * Replaces inst bits with ordering according to spec.
- */
-static inline u32 inst_set_field(u32 inst, int msb, int lsb, int value)
-{
- return kvmppc_set_field(inst, msb + 32, lsb + 32, value);
-}
-
bool kvmppc_inst_is_paired_single(struct kvm_vcpu *vcpu, u32 inst)
{
if (!(vcpu->arch.hflags & BOOK3S_HFLAG_PAIRED_SINGLE))
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index cf2eb16846d1..f57383941d03 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -644,11 +644,6 @@ int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
return r;
}
-static inline int get_fpr_index(int i)
-{
- return i * TS_FPRWIDTH;
-}
-
/* Give up external provider (FPU, Altivec, VSX) */
void kvmppc_giveup_ext(struct kvm_vcpu *vcpu, ulong msr)
{
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index eaeb78047fb8..807351f76f84 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -613,10 +613,25 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
* there might be a previously-rejected interrupt needing
* to be resent.
*
+ * ICP state: Check_IPI
+ *
* If the CPPR is less favored, then we might be replacing
- * an interrupt, and thus need to possibly reject it as in
+ * an interrupt, and thus need to possibly reject it.
*
- * ICP state: Check_IPI
+ * ICP State: IPI
+ *
+ * Besides rejecting any pending interrupts, we also
+ * update XISR and pending_pri to mark IPI as pending.
+ *
+ * PAPR does not describe this state, but if the MFRR is being
+ * made less favored than its earlier value, there might be
+ * a previously-rejected interrupt needing to be resent.
+ * Ideally, we would want to resend only if
+ * prio(pending_interrupt) < mfrr &&
+ * prio(pending_interrupt) < cppr
+ * where pending interrupt is the one that was rejected. But
+ * we don't have that state, so we simply trigger a resend
+ * whenever the MFRR is made less favored.
*/
do {
old_state = new_state = ACCESS_ONCE(icp->state);
@@ -629,13 +644,14 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
resend = false;
if (mfrr < new_state.cppr) {
/* Reject a pending interrupt if not an IPI */
- if (mfrr <= new_state.pending_pri)
+ if (mfrr <= new_state.pending_pri) {
reject = new_state.xisr;
- new_state.pending_pri = mfrr;
- new_state.xisr = XICS_IPI;
+ new_state.pending_pri = mfrr;
+ new_state.xisr = XICS_IPI;
+ }
}
- if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+ if (mfrr > old_state.mfrr) {
resend = new_state.need_resend;
new_state.need_resend = 0;
}
@@ -789,7 +805,7 @@ static noinline int kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
if (icp->rm_action & XICS_RM_KICK_VCPU)
kvmppc_fast_vcpu_kick(icp->rm_kick_target);
if (icp->rm_action & XICS_RM_CHECK_RESEND)
- icp_check_resend(xics, icp);
+ icp_check_resend(xics, icp->rm_resend_icp);
if (icp->rm_action & XICS_RM_REJECT)
icp_deliver_irq(xics, icp, icp->rm_reject);
if (icp->rm_action & XICS_RM_NOTIFY_EOI)
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
index e8aaa7a3f209..73f0f2723c07 100644
--- a/arch/powerpc/kvm/book3s_xics.h
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -74,6 +74,7 @@ struct kvmppc_icp {
#define XICS_RM_NOTIFY_EOI 0x8
u32 rm_action;
struct kvm_vcpu *rm_kick_target;
+ struct kvmppc_icp *rm_resend_icp;
u32 rm_reject;
u32 rm_eoied_irq;
diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index 16095841afe1..b29ce752c7d6 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -78,7 +78,7 @@ static inline int local_sid_setup_one(struct id *entry)
sid = __this_cpu_inc_return(pcpu_last_used_sid);
if (sid < NUM_TIDS) {
- __this_cpu_write(pcpu_sids)entry[sid], entry);
+ __this_cpu_write(pcpu_sids.entry[sid], entry);
entry->val = sid;
entry->pentry = this_cpu_ptr(&pcpu_sids.entry[sid]);
ret = sid;
@@ -299,14 +299,6 @@ void kvmppc_mmu_msr_notify(struct kvm_vcpu *vcpu, u32 old_msr)
kvmppc_e500_recalc_shadow_pid(to_e500(vcpu));
}
-void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
-void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
-{
-}
-
static void kvmppc_core_vcpu_load_e500(struct kvm_vcpu *vcpu, int cpu)
{
kvmppc_booke_vcpu_load(vcpu, cpu);
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index c1f8f53cd312..c45eaab752b0 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,18 +527,12 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = 0;
break;
case KVM_CAP_PPC_RMA:
- r = hv_enabled;
- /* PPC970 requires an RMA */
- if (r && cpu_has_feature(CPU_FTR_ARCH_201))
- r = 2;
+ r = 0;
break;
#endif
case KVM_CAP_SYNC_MMU:
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- if (hv_enabled)
- r = cpu_has_feature(CPU_FTR_ARCH_206) ? 1 : 0;
- else
- r = 0;
+ r = hv_enabled;
#elif defined(KVM_ARCH_WANT_MMU_NOTIFIER)
r = 1;
#else
diff --git a/arch/powerpc/kvm/trace_book3s.h b/arch/powerpc/kvm/trace_book3s.h
new file mode 100644
index 000000000000..f647ce0f428b
--- /dev/null
+++ b/arch/powerpc/kvm/trace_book3s.h
@@ -0,0 +1,32 @@
+#if !defined(_TRACE_KVM_BOOK3S_H)
+#define _TRACE_KVM_BOOK3S_H
+
+/*
+ * Common defines used by the trace macros in trace_pr.h and trace_hv.h
+ */
+
+#define kvm_trace_symbol_exit \
+ {0x100, "SYSTEM_RESET"}, \
+ {0x200, "MACHINE_CHECK"}, \
+ {0x300, "DATA_STORAGE"}, \
+ {0x380, "DATA_SEGMENT"}, \
+ {0x400, "INST_STORAGE"}, \
+ {0x480, "INST_SEGMENT"}, \
+ {0x500, "EXTERNAL"}, \
+ {0x501, "EXTERNAL_LEVEL"}, \
+ {0x502, "EXTERNAL_HV"}, \
+ {0x600, "ALIGNMENT"}, \
+ {0x700, "PROGRAM"}, \
+ {0x800, "FP_UNAVAIL"}, \
+ {0x900, "DECREMENTER"}, \
+ {0x980, "HV_DECREMENTER"}, \
+ {0xc00, "SYSCALL"}, \
+ {0xd00, "TRACE"}, \
+ {0xe00, "H_DATA_STORAGE"}, \
+ {0xe20, "H_INST_STORAGE"}, \
+ {0xe40, "H_EMUL_ASSIST"}, \
+ {0xf00, "PERFMON"}, \
+ {0xf20, "ALTIVEC"}, \
+ {0xf40, "VSX"}
+
+#endif
diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h
index f7537cf26ce7..7ec534d1db9f 100644
--- a/arch/powerpc/kvm/trace_booke.h
+++ b/arch/powerpc/kvm/trace_booke.h
@@ -151,6 +151,47 @@ TRACE_EVENT(kvm_booke206_ref_release,
__entry->pfn, __entry->flags)
);
+#ifdef CONFIG_SPE_POSSIBLE
+#define kvm_trace_symbol_irqprio_spe \
+ {BOOKE_IRQPRIO_SPE_UNAVAIL, "SPE_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_SPE_FP_DATA, "SPE_FP_DATA"}, \
+ {BOOKE_IRQPRIO_SPE_FP_ROUND, "SPE_FP_ROUND"},
+#else
+#define kvm_trace_symbol_irqprio_spe
+#endif
+
+#ifdef CONFIG_PPC_E500MC
+#define kvm_trace_symbol_irqprio_e500mc \
+ {BOOKE_IRQPRIO_ALTIVEC_UNAVAIL, "ALTIVEC_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_ALTIVEC_ASSIST, "ALTIVEC_ASSIST"},
+#else
+#define kvm_trace_symbol_irqprio_e500mc
+#endif
+
+#define kvm_trace_symbol_irqprio \
+ kvm_trace_symbol_irqprio_spe \
+ kvm_trace_symbol_irqprio_e500mc \
+ {BOOKE_IRQPRIO_DATA_STORAGE, "DATA_STORAGE"}, \
+ {BOOKE_IRQPRIO_INST_STORAGE, "INST_STORAGE"}, \
+ {BOOKE_IRQPRIO_ALIGNMENT, "ALIGNMENT"}, \
+ {BOOKE_IRQPRIO_PROGRAM, "PROGRAM"}, \
+ {BOOKE_IRQPRIO_FP_UNAVAIL, "FP_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_SYSCALL, "SYSCALL"}, \
+ {BOOKE_IRQPRIO_AP_UNAVAIL, "AP_UNAVAIL"}, \
+ {BOOKE_IRQPRIO_DTLB_MISS, "DTLB_MISS"}, \
+ {BOOKE_IRQPRIO_ITLB_MISS, "ITLB_MISS"}, \
+ {BOOKE_IRQPRIO_MACHINE_CHECK, "MACHINE_CHECK"}, \
+ {BOOKE_IRQPRIO_DEBUG, "DEBUG"}, \
+ {BOOKE_IRQPRIO_CRITICAL, "CRITICAL"}, \
+ {BOOKE_IRQPRIO_WATCHDOG, "WATCHDOG"}, \
+ {BOOKE_IRQPRIO_EXTERNAL, "EXTERNAL"}, \
+ {BOOKE_IRQPRIO_FIT, "FIT"}, \
+ {BOOKE_IRQPRIO_DECREMENTER, "DECREMENTER"}, \
+ {BOOKE_IRQPRIO_PERFORMANCE_MONITOR, "PERFORMANCE_MONITOR"}, \
+ {BOOKE_IRQPRIO_EXTERNAL_LEVEL, "EXTERNAL_LEVEL"}, \
+ {BOOKE_IRQPRIO_DBELL, "DBELL"}, \
+ {BOOKE_IRQPRIO_DBELL_CRIT, "DBELL_CRIT"} \
+
TRACE_EVENT(kvm_booke_queue_irqprio,
TP_PROTO(struct kvm_vcpu *vcpu, unsigned int priority),
TP_ARGS(vcpu, priority),
@@ -167,8 +208,10 @@ TRACE_EVENT(kvm_booke_queue_irqprio,
__entry->pending = vcpu->arch.pending_exceptions;
),
- TP_printk("vcpu=%x prio=%x pending=%lx",
- __entry->cpu_nr, __entry->priority, __entry->pending)
+ TP_printk("vcpu=%x prio=%s pending=%lx",
+ __entry->cpu_nr,
+ __print_symbolic(__entry->priority, kvm_trace_symbol_irqprio),
+ __entry->pending)
);
#endif
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
new file mode 100644
index 000000000000..33d9daff5783
--- /dev/null
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -0,0 +1,477 @@
+#if !defined(_TRACE_KVM_HV_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_HV_H
+
+#include <linux/tracepoint.h>
+#include "trace_book3s.h"
+#include <asm/hvcall.h>
+#include <asm/kvm_asm.h>
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm_hv
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE trace_hv
+
+#define kvm_trace_symbol_hcall \
+ {H_REMOVE, "H_REMOVE"}, \
+ {H_ENTER, "H_ENTER"}, \
+ {H_READ, "H_READ"}, \
+ {H_CLEAR_MOD, "H_CLEAR_MOD"}, \
+ {H_CLEAR_REF, "H_CLEAR_REF"}, \
+ {H_PROTECT, "H_PROTECT"}, \
+ {H_GET_TCE, "H_GET_TCE"}, \
+ {H_PUT_TCE, "H_PUT_TCE"}, \
+ {H_SET_SPRG0, "H_SET_SPRG0"}, \
+ {H_SET_DABR, "H_SET_DABR"}, \
+ {H_PAGE_INIT, "H_PAGE_INIT"}, \
+ {H_SET_ASR, "H_SET_ASR"}, \
+ {H_ASR_ON, "H_ASR_ON"}, \
+ {H_ASR_OFF, "H_ASR_OFF"}, \
+ {H_LOGICAL_CI_LOAD, "H_LOGICAL_CI_LOAD"}, \
+ {H_LOGICAL_CI_STORE, "H_LOGICAL_CI_STORE"}, \
+ {H_LOGICAL_CACHE_LOAD, "H_LOGICAL_CACHE_LOAD"}, \
+ {H_LOGICAL_CACHE_STORE, "H_LOGICAL_CACHE_STORE"}, \
+ {H_LOGICAL_ICBI, "H_LOGICAL_ICBI"}, \
+ {H_LOGICAL_DCBF, "H_LOGICAL_DCBF"}, \
+ {H_GET_TERM_CHAR, "H_GET_TERM_CHAR"}, \
+ {H_PUT_TERM_CHAR, "H_PUT_TERM_CHAR"}, \
+ {H_REAL_TO_LOGICAL, "H_REAL_TO_LOGICAL"}, \
+ {H_HYPERVISOR_DATA, "H_HYPERVISOR_DATA"}, \
+ {H_EOI, "H_EOI"}, \
+ {H_CPPR, "H_CPPR"}, \
+ {H_IPI, "H_IPI"}, \
+ {H_IPOLL, "H_IPOLL"}, \
+ {H_XIRR, "H_XIRR"}, \
+ {H_PERFMON, "H_PERFMON"}, \
+ {H_MIGRATE_DMA, "H_MIGRATE_DMA"}, \
+ {H_REGISTER_VPA, "H_REGISTER_VPA"}, \
+ {H_CEDE, "H_CEDE"}, \
+ {H_CONFER, "H_CONFER"}, \
+ {H_PROD, "H_PROD"}, \
+ {H_GET_PPP, "H_GET_PPP"}, \
+ {H_SET_PPP, "H_SET_PPP"}, \
+ {H_PURR, "H_PURR"}, \
+ {H_PIC, "H_PIC"}, \
+ {H_REG_CRQ, "H_REG_CRQ"}, \
+ {H_FREE_CRQ, "H_FREE_CRQ"}, \
+ {H_VIO_SIGNAL, "H_VIO_SIGNAL"}, \
+ {H_SEND_CRQ, "H_SEND_CRQ"}, \
+ {H_COPY_RDMA, "H_COPY_RDMA"}, \
+ {H_REGISTER_LOGICAL_LAN, "H_REGISTER_LOGICAL_LAN"}, \
+ {H_FREE_LOGICAL_LAN, "H_FREE_LOGICAL_LAN"}, \
+ {H_ADD_LOGICAL_LAN_BUFFER, "H_ADD_LOGICAL_LAN_BUFFER"}, \
+ {H_SEND_LOGICAL_LAN, "H_SEND_LOGICAL_LAN"}, \
+ {H_BULK_REMOVE, "H_BULK_REMOVE"}, \
+ {H_MULTICAST_CTRL, "H_MULTICAST_CTRL"}, \
+ {H_SET_XDABR, "H_SET_XDABR"}, \
+ {H_STUFF_TCE, "H_STUFF_TCE"}, \
+ {H_PUT_TCE_INDIRECT, "H_PUT_TCE_INDIRECT"}, \
+ {H_CHANGE_LOGICAL_LAN_MAC, "H_CHANGE_LOGICAL_LAN_MAC"}, \
+ {H_VTERM_PARTNER_INFO, "H_VTERM_PARTNER_INFO"}, \
+ {H_REGISTER_VTERM, "H_REGISTER_VTERM"}, \
+ {H_FREE_VTERM, "H_FREE_VTERM"}, \
+ {H_RESET_EVENTS, "H_RESET_EVENTS"}, \
+ {H_ALLOC_RESOURCE, "H_ALLOC_RESOURCE"}, \
+ {H_FREE_RESOURCE, "H_FREE_RESOURCE"}, \
+ {H_MODIFY_QP, "H_MODIFY_QP"}, \
+ {H_QUERY_QP, "H_QUERY_QP"}, \
+ {H_REREGISTER_PMR, "H_REREGISTER_PMR"}, \
+ {H_REGISTER_SMR, "H_REGISTER_SMR"}, \
+ {H_QUERY_MR, "H_QUERY_MR"}, \
+ {H_QUERY_MW, "H_QUERY_MW"}, \
+ {H_QUERY_HCA, "H_QUERY_HCA"}, \
+ {H_QUERY_PORT, "H_QUERY_PORT"}, \
+ {H_MODIFY_PORT, "H_MODIFY_PORT"}, \
+ {H_DEFINE_AQP1, "H_DEFINE_AQP1"}, \
+ {H_GET_TRACE_BUFFER, "H_GET_TRACE_BUFFER"}, \
+ {H_DEFINE_AQP0, "H_DEFINE_AQP0"}, \
+ {H_RESIZE_MR, "H_RESIZE_MR"}, \
+ {H_ATTACH_MCQP, "H_ATTACH_MCQP"}, \
+ {H_DETACH_MCQP, "H_DETACH_MCQP"}, \
+ {H_CREATE_RPT, "H_CREATE_RPT"}, \
+ {H_REMOVE_RPT, "H_REMOVE_RPT"}, \
+ {H_REGISTER_RPAGES, "H_REGISTER_RPAGES"}, \
+ {H_DISABLE_AND_GETC, "H_DISABLE_AND_GETC"}, \
+ {H_ERROR_DATA, "H_ERROR_DATA"}, \
+ {H_GET_HCA_INFO, "H_GET_HCA_INFO"}, \
+ {H_GET_PERF_COUNT, "H_GET_PERF_COUNT"}, \
+ {H_MANAGE_TRACE, "H_MANAGE_TRACE"}, \
+ {H_FREE_LOGICAL_LAN_BUFFER, "H_FREE_LOGICAL_LAN_BUFFER"}, \
+ {H_QUERY_INT_STATE, "H_QUERY_INT_STATE"}, \
+ {H_POLL_PENDING, "H_POLL_PENDING"}, \
+ {H_ILLAN_ATTRIBUTES, "H_ILLAN_ATTRIBUTES"}, \
+ {H_MODIFY_HEA_QP, "H_MODIFY_HEA_QP"}, \
+ {H_QUERY_HEA_QP, "H_QUERY_HEA_QP"}, \
+ {H_QUERY_HEA, "H_QUERY_HEA"}, \
+ {H_QUERY_HEA_PORT, "H_QUERY_HEA_PORT"}, \
+ {H_MODIFY_HEA_PORT, "H_MODIFY_HEA_PORT"}, \
+ {H_REG_BCMC, "H_REG_BCMC"}, \
+ {H_DEREG_BCMC, "H_DEREG_BCMC"}, \
+ {H_REGISTER_HEA_RPAGES, "H_REGISTER_HEA_RPAGES"}, \
+ {H_DISABLE_AND_GET_HEA, "H_DISABLE_AND_GET_HEA"}, \
+ {H_GET_HEA_INFO, "H_GET_HEA_INFO"}, \
+ {H_ALLOC_HEA_RESOURCE, "H_ALLOC_HEA_RESOURCE"}, \
+ {H_ADD_CONN, "H_ADD_CONN"}, \
+ {H_DEL_CONN, "H_DEL_CONN"}, \
+ {H_JOIN, "H_JOIN"}, \
+ {H_VASI_STATE, "H_VASI_STATE"}, \
+ {H_ENABLE_CRQ, "H_ENABLE_CRQ"}, \
+ {H_GET_EM_PARMS, "H_GET_EM_PARMS"}, \
+ {H_SET_MPP, "H_SET_MPP"}, \
+ {H_GET_MPP, "H_GET_MPP"}, \
+ {H_HOME_NODE_ASSOCIATIVITY, "H_HOME_NODE_ASSOCIATIVITY"}, \
+ {H_BEST_ENERGY, "H_BEST_ENERGY"}, \
+ {H_XIRR_X, "H_XIRR_X"}, \
+ {H_RANDOM, "H_RANDOM"}, \
+ {H_COP, "H_COP"}, \
+ {H_GET_MPP_X, "H_GET_MPP_X"}, \
+ {H_SET_MODE, "H_SET_MODE"}, \
+ {H_RTAS, "H_RTAS"}
+
+#define kvm_trace_symbol_kvmret \
+ {RESUME_GUEST, "RESUME_GUEST"}, \
+ {RESUME_GUEST_NV, "RESUME_GUEST_NV"}, \
+ {RESUME_HOST, "RESUME_HOST"}, \
+ {RESUME_HOST_NV, "RESUME_HOST_NV"}
+
+#define kvm_trace_symbol_hcall_rc \
+ {H_SUCCESS, "H_SUCCESS"}, \
+ {H_BUSY, "H_BUSY"}, \
+ {H_CLOSED, "H_CLOSED"}, \
+ {H_NOT_AVAILABLE, "H_NOT_AVAILABLE"}, \
+ {H_CONSTRAINED, "H_CONSTRAINED"}, \
+ {H_PARTIAL, "H_PARTIAL"}, \
+ {H_IN_PROGRESS, "H_IN_PROGRESS"}, \
+ {H_PAGE_REGISTERED, "H_PAGE_REGISTERED"}, \
+ {H_PARTIAL_STORE, "H_PARTIAL_STORE"}, \
+ {H_PENDING, "H_PENDING"}, \
+ {H_CONTINUE, "H_CONTINUE"}, \
+ {H_LONG_BUSY_START_RANGE, "H_LONG_BUSY_START_RANGE"}, \
+ {H_LONG_BUSY_ORDER_1_MSEC, "H_LONG_BUSY_ORDER_1_MSEC"}, \
+ {H_LONG_BUSY_ORDER_10_MSEC, "H_LONG_BUSY_ORDER_10_MSEC"}, \
+ {H_LONG_BUSY_ORDER_100_MSEC, "H_LONG_BUSY_ORDER_100_MSEC"}, \
+ {H_LONG_BUSY_ORDER_1_SEC, "H_LONG_BUSY_ORDER_1_SEC"}, \
+ {H_LONG_BUSY_ORDER_10_SEC, "H_LONG_BUSY_ORDER_10_SEC"}, \
+ {H_LONG_BUSY_ORDER_100_SEC, "H_LONG_BUSY_ORDER_100_SEC"}, \
+ {H_LONG_BUSY_END_RANGE, "H_LONG_BUSY_END_RANGE"}, \
+ {H_TOO_HARD, "H_TOO_HARD"}, \
+ {H_HARDWARE, "H_HARDWARE"}, \
+ {H_FUNCTION, "H_FUNCTION"}, \
+ {H_PRIVILEGE, "H_PRIVILEGE"}, \
+ {H_PARAMETER, "H_PARAMETER"}, \
+ {H_BAD_MODE, "H_BAD_MODE"}, \
+ {H_PTEG_FULL, "H_PTEG_FULL"}, \
+ {H_NOT_FOUND, "H_NOT_FOUND"}, \
+ {H_RESERVED_DABR, "H_RESERVED_DABR"}, \
+ {H_NO_MEM, "H_NO_MEM"}, \
+ {H_AUTHORITY, "H_AUTHORITY"}, \
+ {H_PERMISSION, "H_PERMISSION"}, \
+ {H_DROPPED, "H_DROPPED"}, \
+ {H_SOURCE_PARM, "H_SOURCE_PARM"}, \
+ {H_DEST_PARM, "H_DEST_PARM"}, \
+ {H_REMOTE_PARM, "H_REMOTE_PARM"}, \
+ {H_RESOURCE, "H_RESOURCE"}, \
+ {H_ADAPTER_PARM, "H_ADAPTER_PARM"}, \
+ {H_RH_PARM, "H_RH_PARM"}, \
+ {H_RCQ_PARM, "H_RCQ_PARM"}, \
+ {H_SCQ_PARM, "H_SCQ_PARM"}, \
+ {H_EQ_PARM, "H_EQ_PARM"}, \
+ {H_RT_PARM, "H_RT_PARM"}, \
+ {H_ST_PARM, "H_ST_PARM"}, \
+ {H_SIGT_PARM, "H_SIGT_PARM"}, \
+ {H_TOKEN_PARM, "H_TOKEN_PARM"}, \
+ {H_MLENGTH_PARM, "H_MLENGTH_PARM"}, \
+ {H_MEM_PARM, "H_MEM_PARM"}, \
+ {H_MEM_ACCESS_PARM, "H_MEM_ACCESS_PARM"}, \
+ {H_ATTR_PARM, "H_ATTR_PARM"}, \
+ {H_PORT_PARM, "H_PORT_PARM"}, \
+ {H_MCG_PARM, "H_MCG_PARM"}, \
+ {H_VL_PARM, "H_VL_PARM"}, \
+ {H_TSIZE_PARM, "H_TSIZE_PARM"}, \
+ {H_TRACE_PARM, "H_TRACE_PARM"}, \
+ {H_MASK_PARM, "H_MASK_PARM"}, \
+ {H_MCG_FULL, "H_MCG_FULL"}, \
+ {H_ALIAS_EXIST, "H_ALIAS_EXIST"}, \
+ {H_P_COUNTER, "H_P_COUNTER"}, \
+ {H_TABLE_FULL, "H_TABLE_FULL"}, \
+ {H_ALT_TABLE, "H_ALT_TABLE"}, \
+ {H_MR_CONDITION, "H_MR_CONDITION"}, \
+ {H_NOT_ENOUGH_RESOURCES, "H_NOT_ENOUGH_RESOURCES"}, \
+ {H_R_STATE, "H_R_STATE"}, \
+ {H_RESCINDED, "H_RESCINDED"}, \
+ {H_P2, "H_P2"}, \
+ {H_P3, "H_P3"}, \
+ {H_P4, "H_P4"}, \
+ {H_P5, "H_P5"}, \
+ {H_P6, "H_P6"}, \
+ {H_P7, "H_P7"}, \
+ {H_P8, "H_P8"}, \
+ {H_P9, "H_P9"}, \
+ {H_TOO_BIG, "H_TOO_BIG"}, \
+ {H_OVERLAP, "H_OVERLAP"}, \
+ {H_INTERRUPT, "H_INTERRUPT"}, \
+ {H_BAD_DATA, "H_BAD_DATA"}, \
+ {H_NOT_ACTIVE, "H_NOT_ACTIVE"}, \
+ {H_SG_LIST, "H_SG_LIST"}, \
+ {H_OP_MODE, "H_OP_MODE"}, \
+ {H_COP_HW, "H_COP_HW"}, \
+ {H_UNSUPPORTED_FLAG_START, "H_UNSUPPORTED_FLAG_START"}, \
+ {H_UNSUPPORTED_FLAG_END, "H_UNSUPPORTED_FLAG_END"}, \
+ {H_MULTI_THREADS_ACTIVE, "H_MULTI_THREADS_ACTIVE"}, \
+ {H_OUTSTANDING_COP_OPS, "H_OUTSTANDING_COP_OPS"}
+
+TRACE_EVENT(kvm_guest_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, pc)
+ __field(unsigned long, pending_exceptions)
+ __field(u8, ceded)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->ceded = vcpu->arch.ceded;
+ __entry->pending_exceptions = vcpu->arch.pending_exceptions;
+ ),
+
+ TP_printk("VCPU %d: pc=0x%lx pexcp=0x%lx ceded=%d",
+ __entry->vcpu_id,
+ __entry->pc,
+ __entry->pending_exceptions, __entry->ceded)
+);
+
+TRACE_EVENT(kvm_guest_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(int, trap)
+ __field(unsigned long, pc)
+ __field(unsigned long, msr)
+ __field(u8, ceded)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->trap = vcpu->arch.trap;
+ __entry->ceded = vcpu->arch.ceded;
+ __entry->pc = kvmppc_get_pc(vcpu);
+ __entry->msr = vcpu->arch.shregs.msr;
+ ),
+
+ TP_printk("VCPU %d: trap=%s pc=0x%lx msr=0x%lx, ceded=%d",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->trap, kvm_trace_symbol_exit),
+ __entry->pc, __entry->msr, __entry->ceded
+ )
+);
+
+TRACE_EVENT(kvm_page_fault_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep,
+ struct kvm_memory_slot *memslot, unsigned long ea,
+ unsigned long dsisr),
+
+ TP_ARGS(vcpu, hptep, memslot, ea, dsisr),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, hpte_v)
+ __field(unsigned long, hpte_r)
+ __field(unsigned long, gpte_r)
+ __field(unsigned long, ea)
+ __field(u64, base_gfn)
+ __field(u32, slot_flags)
+ __field(u32, dsisr)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->hpte_v = hptep[0];
+ __entry->hpte_r = hptep[1];
+ __entry->gpte_r = hptep[2];
+ __entry->ea = ea;
+ __entry->dsisr = dsisr;
+ __entry->base_gfn = memslot ? memslot->base_gfn : -1UL;
+ __entry->slot_flags = memslot ? memslot->flags : 0;
+ ),
+
+ TP_printk("VCPU %d: hpte=0x%lx:0x%lx guest=0x%lx ea=0x%lx,%x slot=0x%llx,0x%x",
+ __entry->vcpu_id,
+ __entry->hpte_v, __entry->hpte_r, __entry->gpte_r,
+ __entry->ea, __entry->dsisr,
+ __entry->base_gfn, __entry->slot_flags)
+);
+
+TRACE_EVENT(kvm_page_fault_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, unsigned long *hptep, long ret),
+
+ TP_ARGS(vcpu, hptep, ret),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, hpte_v)
+ __field(unsigned long, hpte_r)
+ __field(long, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->hpte_v = hptep[0];
+ __entry->hpte_r = hptep[1];
+ __entry->ret = ret;
+ ),
+
+ TP_printk("VCPU %d: hpte=0x%lx:0x%lx ret=0x%lx",
+ __entry->vcpu_id,
+ __entry->hpte_v, __entry->hpte_r, __entry->ret)
+);
+
+TRACE_EVENT(kvm_hcall_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, req)
+ __field(unsigned long, gpr4)
+ __field(unsigned long, gpr5)
+ __field(unsigned long, gpr6)
+ __field(unsigned long, gpr7)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->req = kvmppc_get_gpr(vcpu, 3);
+ __entry->gpr4 = kvmppc_get_gpr(vcpu, 4);
+ __entry->gpr5 = kvmppc_get_gpr(vcpu, 5);
+ __entry->gpr6 = kvmppc_get_gpr(vcpu, 6);
+ __entry->gpr7 = kvmppc_get_gpr(vcpu, 7);
+ ),
+
+ TP_printk("VCPU %d: hcall=%s GPR4-7=0x%lx,0x%lx,0x%lx,0x%lx",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->req, kvm_trace_symbol_hcall),
+ __entry->gpr4, __entry->gpr5, __entry->gpr6, __entry->gpr7)
+);
+
+TRACE_EVENT(kvm_hcall_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, int ret),
+
+ TP_ARGS(vcpu, ret),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(unsigned long, ret)
+ __field(unsigned long, hcall_rc)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->ret = ret;
+ __entry->hcall_rc = kvmppc_get_gpr(vcpu, 3);
+ ),
+
+ TP_printk("VCPU %d: ret=%s hcall_rc=%s",
+ __entry->vcpu_id,
+ __print_symbolic(__entry->ret, kvm_trace_symbol_kvmret),
+ __print_symbolic(__entry->ret & RESUME_FLAG_HOST ?
+ H_TOO_HARD : __entry->hcall_rc,
+ kvm_trace_symbol_hcall_rc))
+);
+
+TRACE_EVENT(kvmppc_run_core,
+ TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+ TP_ARGS(vc, where),
+
+ TP_STRUCT__entry(
+ __field(int, n_runnable)
+ __field(int, runner_vcpu)
+ __field(int, where)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->runner_vcpu = vc->runner->vcpu_id;
+ __entry->n_runnable = vc->n_runnable;
+ __entry->where = where;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s runner_vcpu==%d runnable=%d tgid=%d",
+ __entry->where ? "Exit" : "Enter",
+ __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_vcore_blocked,
+ TP_PROTO(struct kvmppc_vcore *vc, int where),
+
+ TP_ARGS(vc, where),
+
+ TP_STRUCT__entry(
+ __field(int, n_runnable)
+ __field(int, runner_vcpu)
+ __field(int, where)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->runner_vcpu = vc->runner->vcpu_id;
+ __entry->n_runnable = vc->n_runnable;
+ __entry->where = where;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("%s runner_vcpu=%d runnable=%d tgid=%d",
+ __entry->where ? "Exit" : "Enter",
+ __entry->runner_vcpu, __entry->n_runnable, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_enter,
+ TP_PROTO(struct kvm_vcpu *vcpu),
+
+ TP_ARGS(vcpu),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(pid_t, tgid)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->tgid = current->tgid;
+ ),
+
+ TP_printk("VCPU %d: tgid=%d", __entry->vcpu_id, __entry->tgid)
+);
+
+TRACE_EVENT(kvmppc_run_vcpu_exit,
+ TP_PROTO(struct kvm_vcpu *vcpu, struct kvm_run *run),
+
+ TP_ARGS(vcpu, run),
+
+ TP_STRUCT__entry(
+ __field(int, vcpu_id)
+ __field(int, exit)
+ __field(int, ret)
+ ),
+
+ TP_fast_assign(
+ __entry->vcpu_id = vcpu->vcpu_id;
+ __entry->exit = run->exit_reason;
+ __entry->ret = vcpu->arch.ret;
+ ),
+
+ TP_printk("VCPU %d: exit=%d, ret=%d",
+ __entry->vcpu_id, __entry->exit, __entry->ret)
+);
+
+#endif /* _TRACE_KVM_HV_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h
index e1357cd8dc1f..810507cb688a 100644
--- a/arch/powerpc/kvm/trace_pr.h
+++ b/arch/powerpc/kvm/trace_pr.h
@@ -3,36 +3,13 @@
#define _TRACE_KVM_PR_H
#include <linux/tracepoint.h>
+#include "trace_book3s.h"
#undef TRACE_SYSTEM
#define TRACE_SYSTEM kvm_pr
#define TRACE_INCLUDE_PATH .
#define TRACE_INCLUDE_FILE trace_pr
-#define kvm_trace_symbol_exit \
- {0x100, "SYSTEM_RESET"}, \
- {0x200, "MACHINE_CHECK"}, \
- {0x300, "DATA_STORAGE"}, \
- {0x380, "DATA_SEGMENT"}, \
- {0x400, "INST_STORAGE"}, \
- {0x480, "INST_SEGMENT"}, \
- {0x500, "EXTERNAL"}, \
- {0x501, "EXTERNAL_LEVEL"}, \
- {0x502, "EXTERNAL_HV"}, \
- {0x600, "ALIGNMENT"}, \
- {0x700, "PROGRAM"}, \
- {0x800, "FP_UNAVAIL"}, \
- {0x900, "DECREMENTER"}, \
- {0x980, "HV_DECREMENTER"}, \
- {0xc00, "SYSCALL"}, \
- {0xd00, "TRACE"}, \
- {0xe00, "H_DATA_STORAGE"}, \
- {0xe20, "H_INST_STORAGE"}, \
- {0xe40, "H_EMUL_ASSIST"}, \
- {0xf00, "PERFMON"}, \
- {0xf20, "ALTIVEC"}, \
- {0xf40, "VSX"}
-
TRACE_EVENT(kvm_book3s_reenter,
TP_PROTO(int r, struct kvm_vcpu *vcpu),
TP_ARGS(r, vcpu),
diff --git a/arch/powerpc/perf/hv-24x7.c b/arch/powerpc/perf/hv-24x7.c
index dba34088da28..f162d0b8eea3 100644
--- a/arch/powerpc/perf/hv-24x7.c
+++ b/arch/powerpc/perf/hv-24x7.c
@@ -177,7 +177,7 @@ static ssize_t _name##_show(struct device *dev, \
} \
ret = sprintf(buf, _fmt, _expr); \
e_free: \
- kfree(page); \
+ kmem_cache_free(hv_page_cache, page); \
return ret; \
} \
static DEVICE_ATTR_RO(_name)
@@ -217,11 +217,14 @@ static bool is_physical_domain(int domain)
domain == HV_24X7_PERF_DOMAIN_PHYSICAL_CORE;
}
+DEFINE_PER_CPU(char, hv_24x7_reqb[4096]) __aligned(4096);
+DEFINE_PER_CPU(char, hv_24x7_resb[4096]) __aligned(4096);
+
static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
u16 lpar, u64 *res,
bool success_expected)
{
- unsigned long ret = -ENOMEM;
+ unsigned long ret;
/*
* request_buffer and result_buffer are not required to be 4k aligned,
@@ -243,13 +246,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
BUILD_BUG_ON(sizeof(*request_buffer) > 4096);
BUILD_BUG_ON(sizeof(*result_buffer) > 4096);
- request_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
- if (!request_buffer)
- goto out;
+ request_buffer = (void *)get_cpu_var(hv_24x7_reqb);
+ result_buffer = (void *)get_cpu_var(hv_24x7_resb);
- result_buffer = kmem_cache_zalloc(hv_page_cache, GFP_USER);
- if (!result_buffer)
- goto out_free_request_buffer;
+ memset(request_buffer, 0, 4096);
+ memset(result_buffer, 0, 4096);
*request_buffer = (struct reqb) {
.buf = {
@@ -278,15 +279,11 @@ static unsigned long single_24x7_request(u8 domain, u32 offset, u16 ix,
domain, offset, ix, lpar, ret, ret,
result_buffer->buf.detailed_rc,
result_buffer->buf.failing_request_ix);
- goto out_free_result_buffer;
+ goto out;
}
*res = be64_to_cpu(result_buffer->result);
-out_free_result_buffer:
- kfree(result_buffer);
-out_free_request_buffer:
- kfree(request_buffer);
out:
return ret;
}
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 0a299be588af..54eca8b3b288 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -158,6 +158,43 @@ opal_tracepoint_return:
blr
#endif
+/*
+ * Make opal call in realmode. This is a generic function to be called
+ * from realmode. It handles endianness.
+ *
+ * r13 - paca pointer
+ * r1 - stack pointer
+ * r0 - opal token
+ */
+_GLOBAL(opal_call_realmode)
+ mflr r12
+ std r12,PPC_LR_STKOFF(r1)
+ ld r2,PACATOC(r13)
+ /* Set opal return address */
+ LOAD_REG_ADDR(r12,return_from_opal_call)
+ mtlr r12
+
+ mfmsr r12
+#ifdef __LITTLE_ENDIAN__
+ /* Handle endian-ness */
+ li r11,MSR_LE
+ andc r12,r12,r11
+#endif
+ mtspr SPRN_HSRR1,r12
+ LOAD_REG_ADDR(r11,opal)
+ ld r12,8(r11)
+ ld r2,0(r11)
+ mtspr SPRN_HSRR0,r12
+ hrfid
+
+return_from_opal_call:
+#ifdef __LITTLE_ENDIAN__
+ FIXUP_ENDIAN
+#endif
+ ld r12,PPC_LR_STKOFF(r1)
+ mtlr r12
+ blr
+
OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL);
OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE);
OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ);
@@ -247,6 +284,7 @@ OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ);
OPAL_CALL(opal_get_param, OPAL_GET_PARAM);
OPAL_CALL(opal_set_param, OPAL_SET_PARAM);
OPAL_CALL(opal_handle_hmi, OPAL_HANDLE_HMI);
+OPAL_CALL(opal_slw_set_reg, OPAL_SLW_SET_REG);
OPAL_CALL(opal_register_dump_region, OPAL_REGISTER_DUMP_REGION);
OPAL_CALL(opal_unregister_dump_region, OPAL_UNREGISTER_DUMP_REGION);
OPAL_CALL(opal_pci_set_phb_cxl_mode, OPAL_PCI_SET_PHB_CXL_MODE);
@@ -254,3 +292,4 @@ OPAL_CALL(opal_tpo_write, OPAL_WRITE_TPO);
OPAL_CALL(opal_tpo_read, OPAL_READ_TPO);
OPAL_CALL(opal_ipmi_send, OPAL_IPMI_SEND);
OPAL_CALL(opal_ipmi_recv, OPAL_IPMI_RECV);
+OPAL_CALL(opal_i2c_request, OPAL_I2C_REQUEST);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index cb0b6de79cd4..f10b9ec8c1f5 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -9,8 +9,9 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG
+#define pr_fmt(fmt) "opal: " fmt
+#include <linux/printk.h>
#include <linux/types.h>
#include <linux/of.h>
#include <linux/of_fdt.h>
@@ -625,6 +626,39 @@ static int opal_sysfs_init(void)
return 0;
}
+static ssize_t symbol_map_read(struct file *fp, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t count)
+{
+ return memory_read_from_buffer(buf, count, &off, bin_attr->private,
+ bin_attr->size);
+}
+
+static BIN_ATTR_RO(symbol_map, 0);
+
+static void opal_export_symmap(void)
+{
+ const __be64 *syms;
+ unsigned int size;
+ struct device_node *fw;
+ int rc;
+
+ fw = of_find_node_by_path("/ibm,opal/firmware");
+ if (!fw)
+ return;
+ syms = of_get_property(fw, "symbol-map", &size);
+ if (!syms || size != 2 * sizeof(__be64))
+ return;
+
+ /* Setup attributes */
+ bin_attr_symbol_map.private = __va(be64_to_cpu(syms[0]));
+ bin_attr_symbol_map.size = be64_to_cpu(syms[1]);
+
+ rc = sysfs_create_bin_file(opal_kobj, &bin_attr_symbol_map);
+ if (rc)
+ pr_warn("Error %d creating OPAL symbols file\n", rc);
+}
+
static void __init opal_dump_region_init(void)
{
void *addr;
@@ -653,6 +687,14 @@ static void opal_ipmi_init(struct device_node *opal_node)
of_platform_device_create(np, NULL, NULL);
}
+static void opal_i2c_create_devs(void)
+{
+ struct device_node *np;
+
+ for_each_compatible_node(np, NULL, "ibm,opal-i2c")
+ of_platform_device_create(np, NULL, NULL);
+}
+
static int __init opal_init(void)
{
struct device_node *np, *consoles;
@@ -679,6 +721,9 @@ static int __init opal_init(void)
of_node_put(consoles);
}
+ /* Create i2c platform devices */
+ opal_i2c_create_devs();
+
/* Find all OPAL interrupts and request them */
irqs = of_get_property(opal_node, "opal-interrupts", &irqlen);
pr_debug("opal: Found %d interrupts reserved for OPAL\n",
@@ -702,6 +747,8 @@ static int __init opal_init(void)
/* Create "opal" kobject under /sys/firmware */
rc = opal_sysfs_init();
if (rc == 0) {
+ /* Export symbol map to userspace */
+ opal_export_symmap();
/* Setup dump region interface */
opal_dump_region_init();
/* Setup error log interface */
@@ -824,3 +871,4 @@ EXPORT_SYMBOL_GPL(opal_rtc_read);
EXPORT_SYMBOL_GPL(opal_rtc_write);
EXPORT_SYMBOL_GPL(opal_tpo_read);
EXPORT_SYMBOL_GPL(opal_tpo_write);
+EXPORT_SYMBOL_GPL(opal_i2c_request);
diff --git a/arch/powerpc/platforms/powernv/powernv.h b/arch/powerpc/platforms/powernv/powernv.h
index 6c8e2d188cd0..604c48e7879a 100644
--- a/arch/powerpc/platforms/powernv/powernv.h
+++ b/arch/powerpc/platforms/powernv/powernv.h
@@ -29,6 +29,8 @@ static inline u64 pnv_pci_dma_get_required_mask(struct pci_dev *pdev)
}
#endif
+extern u32 pnv_get_supported_cpuidle_states(void);
+
extern void pnv_lpc_init(void);
bool cpu_core_split_required(void);
diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c
index 30b1c3e298a6..b700a329c31d 100644
--- a/arch/powerpc/platforms/powernv/setup.c
+++ b/arch/powerpc/platforms/powernv/setup.c
@@ -36,8 +36,12 @@
#include <asm/opal.h>
#include <asm/kexec.h>
#include <asm/smp.h>
+#include <asm/cputhreads.h>
+#include <asm/cpuidle.h>
+#include <asm/code-patching.h>
#include "powernv.h"
+#include "subcore.h"
static void __init pnv_setup_arch(void)
{
@@ -288,6 +292,168 @@ static void __init pnv_setup_machdep_rtas(void)
}
#endif /* CONFIG_PPC_POWERNV_RTAS */
+static u32 supported_cpuidle_states;
+
+int pnv_save_sprs_for_winkle(void)
+{
+ int cpu;
+ int rc;
+
+ /*
+ * hid0, hid1, hid4, hid5, hmeer and lpcr values are symmetric accross
+ * all cpus at boot. Get these reg values of current cpu and use the
+ * same accross all cpus.
+ */
+ uint64_t lpcr_val = mfspr(SPRN_LPCR);
+ uint64_t hid0_val = mfspr(SPRN_HID0);
+ uint64_t hid1_val = mfspr(SPRN_HID1);
+ uint64_t hid4_val = mfspr(SPRN_HID4);
+ uint64_t hid5_val = mfspr(SPRN_HID5);
+ uint64_t hmeer_val = mfspr(SPRN_HMEER);
+
+ for_each_possible_cpu(cpu) {
+ uint64_t pir = get_hard_smp_processor_id(cpu);
+ uint64_t hsprg0_val = (uint64_t)&paca[cpu];
+
+ /*
+ * HSPRG0 is used to store the cpu's pointer to paca. Hence last
+ * 3 bits are guaranteed to be 0. Program slw to restore HSPRG0
+ * with 63rd bit set, so that when a thread wakes up at 0x100 we
+ * can use this bit to distinguish between fastsleep and
+ * deep winkle.
+ */
+ hsprg0_val |= 1;
+
+ rc = opal_slw_set_reg(pir, SPRN_HSPRG0, hsprg0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_LPCR, lpcr_val);
+ if (rc != 0)
+ return rc;
+
+ /* HIDs are per core registers */
+ if (cpu_thread_in_core(cpu) == 0) {
+
+ rc = opal_slw_set_reg(pir, SPRN_HMEER, hmeer_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID0, hid0_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID1, hid1_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID4, hid4_val);
+ if (rc != 0)
+ return rc;
+
+ rc = opal_slw_set_reg(pir, SPRN_HID5, hid5_val);
+ if (rc != 0)
+ return rc;
+ }
+ }
+
+ return 0;
+}
+
+static void pnv_alloc_idle_core_states(void)
+{
+ int i, j;
+ int nr_cores = cpu_nr_cores();
+ u32 *core_idle_state;
+
+ /*
+ * core_idle_state - First 8 bits track the idle state of each thread
+ * of the core. The 8th bit is the lock bit. Initially all thread bits
+ * are set. They are cleared when the thread enters deep idle state
+ * like sleep and winkle. Initially the lock bit is cleared.
+ * The lock bit has 2 purposes
+ * a. While the first thread is restoring core state, it prevents
+ * other threads in the core from switching to process context.
+ * b. While the last thread in the core is saving the core state, it
+ * prevents a different thread from waking up.
+ */
+ for (i = 0; i < nr_cores; i++) {
+ int first_cpu = i * threads_per_core;
+ int node = cpu_to_node(first_cpu);
+
+ core_idle_state = kmalloc_node(sizeof(u32), GFP_KERNEL, node);
+ *core_idle_state = PNV_CORE_IDLE_THREAD_BITS;
+
+ for (j = 0; j < threads_per_core; j++) {
+ int cpu = first_cpu + j;
+
+ paca[cpu].core_idle_state_ptr = core_idle_state;
+ paca[cpu].thread_idle_state = PNV_THREAD_RUNNING;
+ paca[cpu].thread_mask = 1 << j;
+ }
+ }
+
+ update_subcore_sibling_mask();
+
+ if (supported_cpuidle_states & OPAL_PM_WINKLE_ENABLED)
+ pnv_save_sprs_for_winkle();
+}
+
+u32 pnv_get_supported_cpuidle_states(void)
+{
+ return supported_cpuidle_states;
+}
+EXPORT_SYMBOL_GPL(pnv_get_supported_cpuidle_states);
+
+static int __init pnv_init_idle_states(void)
+{
+ struct device_node *power_mgt;
+ int dt_idle_states;
+ const __be32 *idle_state_flags;
+ u32 len_flags, flags;
+ int i;
+
+ supported_cpuidle_states = 0;
+
+ if (cpuidle_disable != IDLE_NO_OVERRIDE)
+ return 0;
+
+ if (!firmware_has_feature(FW_FEATURE_OPALv3))
+ return 0;
+
+ power_mgt = of_find_node_by_path("/ibm,opal/power-mgt");
+ if (!power_mgt) {
+ pr_warn("opal: PowerMgmt Node not found\n");
+ return 0;
+ }
+
+ idle_state_flags = of_get_property(power_mgt,
+ "ibm,cpu-idle-state-flags", &len_flags);
+ if (!idle_state_flags) {
+ pr_warn("DT-PowerMgmt: missing ibm,cpu-idle-state-flags\n");
+ return 0;
+ }
+
+ dt_idle_states = len_flags / sizeof(u32);
+
+ for (i = 0; i < dt_idle_states; i++) {
+ flags = be32_to_cpu(idle_state_flags[i]);
+ supported_cpuidle_states |= flags;
+ }
+ if (!(supported_cpuidle_states & OPAL_PM_SLEEP_ENABLED_ER1)) {
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_entry,
+ PPC_INST_NOP);
+ patch_instruction(
+ (unsigned int *)pnv_fastsleep_workaround_at_exit,
+ PPC_INST_NOP);
+ }
+ pnv_alloc_idle_core_states();
+ return 0;
+}
+
+subsys_initcall(pnv_init_idle_states);
+
static int __init pnv_probe(void)
{
unsigned long root = of_get_flat_dt_root();
diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c
index b716f666e48a..fc34025ef822 100644
--- a/arch/powerpc/platforms/powernv/smp.c
+++ b/arch/powerpc/platforms/powernv/smp.c
@@ -150,6 +150,7 @@ static void pnv_smp_cpu_kill_self(void)
{
unsigned int cpu;
unsigned long srr1;
+ u32 idle_states;
/* Standard hot unplug procedure */
local_irq_disable();
@@ -160,13 +161,23 @@ static void pnv_smp_cpu_kill_self(void)
generic_set_cpu_dead(cpu);
smp_wmb();
+ idle_states = pnv_get_supported_cpuidle_states();
/* We don't want to take decrementer interrupts while we are offline,
* so clear LPCR:PECE1. We keep PECE2 enabled.
*/
mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~(u64)LPCR_PECE1);
while (!generic_check_cpu_restart(cpu)) {
+
ppc64_runlatch_off();
- srr1 = power7_nap(1);
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED)
+ srr1 = power7_winkle();
+ else if ((idle_states & OPAL_PM_SLEEP_ENABLED) ||
+ (idle_states & OPAL_PM_SLEEP_ENABLED_ER1))
+ srr1 = power7_sleep();
+ else
+ srr1 = power7_nap(1);
+
ppc64_runlatch_on();
/*
@@ -198,13 +209,27 @@ static void pnv_smp_cpu_kill_self(void)
#endif /* CONFIG_HOTPLUG_CPU */
+static int pnv_cpu_bootable(unsigned int nr)
+{
+ /*
+ * Starting with POWER8, the subcore logic relies on all threads of a
+ * core being booted so that they can participate in split mode
+ * switches. So on those machines we ignore the smt_enabled_at_boot
+ * setting (smt-enabled on the kernel command line).
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ return 1;
+
+ return smp_generic_cpu_bootable(nr);
+}
+
static struct smp_ops_t pnv_smp_ops = {
.message_pass = smp_muxed_ipi_message_pass,
.cause_ipi = NULL, /* Filled at runtime by xics_smp_probe() */
.probe = xics_smp_probe,
.kick_cpu = pnv_smp_kick_cpu,
.setup_cpu = pnv_smp_setup_cpu,
- .cpu_bootable = smp_generic_cpu_bootable,
+ .cpu_bootable = pnv_cpu_bootable,
#ifdef CONFIG_HOTPLUG_CPU
.cpu_disable = pnv_smp_cpu_disable,
.cpu_die = generic_cpu_die,
diff --git a/arch/powerpc/platforms/powernv/subcore.c b/arch/powerpc/platforms/powernv/subcore.c
index c87f96b79d1a..f60f80ada903 100644
--- a/arch/powerpc/platforms/powernv/subcore.c
+++ b/arch/powerpc/platforms/powernv/subcore.c
@@ -160,6 +160,18 @@ static void wait_for_sync_step(int step)
mb();
}
+static void update_hid_in_slw(u64 hid0)
+{
+ u64 idle_states = pnv_get_supported_cpuidle_states();
+
+ if (idle_states & OPAL_PM_WINKLE_ENABLED) {
+ /* OPAL call to patch slw with the new HID0 value */
+ u64 cpu_pir = hard_smp_processor_id();
+
+ opal_slw_set_reg(cpu_pir, SPRN_HID0, hid0);
+ }
+}
+
static void unsplit_core(void)
{
u64 hid0, mask;
@@ -179,6 +191,7 @@ static void unsplit_core(void)
hid0 = mfspr(SPRN_HID0);
hid0 &= ~HID0_POWER8_DYNLPARDIS;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
while (mfspr(SPRN_HID0) & mask)
cpu_relax();
@@ -215,6 +228,7 @@ static void split_core(int new_mode)
hid0 = mfspr(SPRN_HID0);
hid0 |= HID0_POWER8_DYNLPARDIS | split_parms[i].value;
mtspr(SPRN_HID0, hid0);
+ update_hid_in_slw(hid0);
/* Wait for it to happen */
while (!(mfspr(SPRN_HID0) & split_parms[i].mask))
@@ -251,6 +265,25 @@ bool cpu_core_split_required(void)
return true;
}
+void update_subcore_sibling_mask(void)
+{
+ int cpu;
+ /*
+ * sibling mask for the first cpu. Left shift this by required bits
+ * to get sibling mask for the rest of the cpus.
+ */
+ int sibling_mask_first_cpu = (1 << threads_per_subcore) - 1;
+
+ for_each_possible_cpu(cpu) {
+ int tid = cpu_thread_in_core(cpu);
+ int offset = (tid / threads_per_subcore) * threads_per_subcore;
+ int mask = sibling_mask_first_cpu << offset;
+
+ paca[cpu].subcore_sibling_mask = mask;
+
+ }
+}
+
static int cpu_update_split_mode(void *data)
{
int cpu, new_mode = *(int *)data;
@@ -284,6 +317,7 @@ static int cpu_update_split_mode(void *data)
/* Make the new mode public */
subcores_per_core = new_mode;
threads_per_subcore = threads_per_core / subcores_per_core;
+ update_subcore_sibling_mask();
/* Make sure the new mode is written before we exit */
mb();
diff --git a/arch/powerpc/platforms/powernv/subcore.h b/arch/powerpc/platforms/powernv/subcore.h
index 148abc91debf..84e02ae52895 100644
--- a/arch/powerpc/platforms/powernv/subcore.h
+++ b/arch/powerpc/platforms/powernv/subcore.h
@@ -14,5 +14,12 @@
#define SYNC_STEP_FINISHED 3 /* Set by secondary when split/unsplit is done */
#ifndef __ASSEMBLY__
+
+#ifdef CONFIG_SMP
void split_core_secondary_loop(u8 *state);
-#endif
+extern void update_subcore_sibling_mask(void);
+#else
+static inline void update_subcore_sibling_mask(void) { };
+#endif /* CONFIG_SMP */
+
+#endif /* __ASSEMBLY__ */