aboutsummaryrefslogtreecommitdiff
path: root/arch/powerpc
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/boot/Makefile3
-rw-r--r--arch/powerpc/boot/main.c26
-rw-r--r--arch/powerpc/boot/opal-calls.S13
-rw-r--r--arch/powerpc/boot/opal.c13
-rw-r--r--arch/powerpc/boot/ops.h1
-rw-r--r--arch/powerpc/configs/dpaa.config3
-rw-r--r--arch/powerpc/include/asm/asm-prototypes.h12
-rw-r--r--arch/powerpc/include/asm/book3s/64/mmu-hash.h47
-rw-r--r--arch/powerpc/include/asm/book3s/64/pgtable.h13
-rw-r--r--arch/powerpc/include/asm/checksum.h12
-rw-r--r--arch/powerpc/include/asm/cpuidle.h2
-rw-r--r--arch/powerpc/include/asm/cputime.h14
-rw-r--r--arch/powerpc/include/asm/exception-64s.h31
-rw-r--r--arch/powerpc/include/asm/kvm_asm.h1
-rw-r--r--arch/powerpc/include/asm/kvm_host.h27
-rw-r--r--arch/powerpc/include/asm/kvm_ppc.h49
-rw-r--r--arch/powerpc/include/asm/mmu.h19
-rw-r--r--arch/powerpc/include/asm/mutex.h132
-rw-r--r--arch/powerpc/include/asm/opal.h3
-rw-r--r--arch/powerpc/include/asm/ppc-opcode.h1
-rw-r--r--arch/powerpc/include/asm/processor.h2
-rw-r--r--arch/powerpc/include/asm/reg.h15
-rw-r--r--arch/powerpc/include/asm/spinlock.h8
-rw-r--r--arch/powerpc/include/asm/tlb.h28
-rw-r--r--arch/powerpc/include/asm/unistd.h4
-rw-r--r--arch/powerpc/include/asm/xilinx_intc.h2
-rw-r--r--arch/powerpc/include/uapi/asm/kvm.h5
-rw-r--r--arch/powerpc/include/uapi/asm/socket.h2
-rw-r--r--arch/powerpc/kernel/asm-offsets.c4
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S10
-rw-r--r--arch/powerpc/kernel/eeh_driver.c4
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S57
-rw-r--r--arch/powerpc/kernel/hw_breakpoint.c2
-rw-r--r--arch/powerpc/kernel/idle_book3s.S35
-rw-r--r--arch/powerpc/kernel/process.c44
-rw-r--r--arch/powerpc/kernel/ptrace32.c5
-rw-r--r--arch/powerpc/kernel/setup_64.c20
-rw-r--r--arch/powerpc/kernel/sysfs.c50
-rw-r--r--arch/powerpc/kernel/time.c8
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S9
-rw-r--r--arch/powerpc/kvm/book3s_64_mmu_hv.c63
-rw-r--r--arch/powerpc/kvm/book3s_64_vio_hv.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv.c302
-rw-r--r--arch/powerpc/kvm/book3s_hv_builtin.c72
-rw-r--r--arch/powerpc/kvm/book3s_hv_ras.c1
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_mmu.c223
-rw-r--r--arch/powerpc/kvm/book3s_hv_rm_xics.c24
-rw-r--r--arch/powerpc/kvm/book3s_hv_rmhandlers.S140
-rw-r--r--arch/powerpc/kvm/powerpc.c16
-rw-r--r--arch/powerpc/kvm/trace_hv.h2
-rw-r--r--arch/powerpc/mm/copro_fault.c2
-rw-r--r--arch/powerpc/mm/hash64_4k.c2
-rw-r--r--arch/powerpc/mm/hash64_64k.c4
-rw-r--r--arch/powerpc/mm/hash_native_64.c30
-rw-r--r--arch/powerpc/mm/hash_utils_64.c40
-rw-r--r--arch/powerpc/mm/numa.c59
-rw-r--r--arch/powerpc/mm/pgtable-radix.c22
-rw-r--r--arch/powerpc/mm/pgtable_64.c34
-rw-r--r--arch/powerpc/mm/tlb-radix.c12
-rw-r--r--arch/powerpc/net/bpf_jit_comp64.c4
-rw-r--r--arch/powerpc/platforms/40x/Kconfig1
-rw-r--r--arch/powerpc/platforms/40x/virtex.c2
-rw-r--r--arch/powerpc/platforms/44x/Kconfig1
-rw-r--r--arch/powerpc/platforms/44x/virtex.c2
-rw-r--r--arch/powerpc/platforms/powernv/opal-wrappers.S3
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/ps3/htab.c2
-rw-r--r--arch/powerpc/platforms/pseries/lpar.c2
-rw-r--r--arch/powerpc/sysdev/xilinx_intc.c211
70 files changed, 1178 insertions, 838 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 65fba4c34cd7..c7f120aaa98f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -160,6 +160,7 @@ config PPC
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select GENERIC_CPU_AUTOPROBE
select HAVE_VIRT_CPU_ACCOUNTING
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
select HAVE_ARCH_HARDENED_USERCOPY
select HAVE_KERNEL_GZIP
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index eae2dc8bc218..9d47f2efa830 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -100,7 +100,8 @@ src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \
ns16550.c serial.c simple_alloc.c div64.S util.S \
elf_util.c $(zlib-y) devtree.c stdlib.c \
oflib.c ofconsole.c cuboot.c mpsc.c cpm-serial.c \
- uartlite.c mpc52xx-psc.c opal.c opal-calls.S
+ uartlite.c mpc52xx-psc.c opal.c
+src-wlib-$(CONFIG_PPC64_BOOT_WRAPPER) += opal-calls.S
src-wlib-$(CONFIG_40x) += 4xx.c planetcore.c
src-wlib-$(CONFIG_44x) += 4xx.c ebony.c bamboo.c
src-wlib-$(CONFIG_8xx) += mpc8xx.c planetcore.c fsl-soc.c
diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c
index f7a184b6c35b..78aaf4ffd7ab 100644
--- a/arch/powerpc/boot/main.c
+++ b/arch/powerpc/boot/main.c
@@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void)
void *addr = 0;
struct elf_info ei;
long len;
+ int uncompressed_image = 0;
- partial_decompress(vmlinuz_addr, vmlinuz_size,
+ len = partial_decompress(vmlinuz_addr, vmlinuz_size,
elfheader, sizeof(elfheader), 0);
+ /* assume uncompressed data if -1 is returned */
+ if (len == -1) {
+ uncompressed_image = 1;
+ memcpy(elfheader, vmlinuz_addr, sizeof(elfheader));
+ printf("No valid compressed data found, assume uncompressed data\n\r");
+ }
if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei))
fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r");
@@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void)
"device tree\n\r");
}
+ if (uncompressed_image) {
+ memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize);
+ printf("0x%lx bytes of uncompressed data copied\n\r",
+ ei.loadsize);
+ goto out;
+ }
+
/* Finally, decompress the kernel */
printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr,
vmlinuz_addr, vmlinuz_addr+vmlinuz_size);
@@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void)
len, ei.loadsize);
printf("Done! Decompressed 0x%lx bytes\n\r", len);
-
+out:
flush_cache(addr, ei.loadsize);
return (struct addr_range){addr, ei.memsize};
@@ -218,8 +232,12 @@ void start(void)
console_ops.close();
kentry = (kernel_entry_t) vmlinux.addr;
- if (ft_addr)
- kentry(ft_addr, 0, NULL);
+ if (ft_addr) {
+ if(platform_ops.kentry)
+ platform_ops.kentry(ft_addr, vmlinux.addr);
+ else
+ kentry(ft_addr, 0, NULL);
+ }
else
kentry((unsigned long)initrd.addr, initrd.size,
loader_info.promptr);
diff --git a/arch/powerpc/boot/opal-calls.S b/arch/powerpc/boot/opal-calls.S
index ff2f1b97bc53..2a99fc9a3ccf 100644
--- a/arch/powerpc/boot/opal-calls.S
+++ b/arch/powerpc/boot/opal-calls.S
@@ -12,6 +12,19 @@
.text
+ .globl opal_kentry
+opal_kentry:
+ /* r3 is the fdt ptr */
+ mtctr r4
+ li r4, 0
+ li r5, 0
+ li r6, 0
+ li r7, 0
+ ld r11,opal@got(r2)
+ ld r8,0(r11)
+ ld r9,8(r11)
+ bctr
+
#define OPAL_CALL(name, token) \
.globl name; \
name: \
diff --git a/arch/powerpc/boot/opal.c b/arch/powerpc/boot/opal.c
index 1f37e1c1d6d8..0272570d02de 100644
--- a/arch/powerpc/boot/opal.c
+++ b/arch/powerpc/boot/opal.c
@@ -13,7 +13,7 @@
#include <libfdt.h>
#include "../include/asm/opal-api.h"
-#ifdef __powerpc64__
+#ifdef CONFIG_PPC64_BOOT_WRAPPER
/* Global OPAL struct used by opal-call.S */
struct opal {
@@ -23,14 +23,25 @@ struct opal {
static u32 opal_con_id;
+/* see opal-wrappers.S */
int64_t opal_console_write(int64_t term_number, u64 *length, const u8 *buffer);
int64_t opal_console_read(int64_t term_number, uint64_t *length, u8 *buffer);
int64_t opal_console_write_buffer_space(uint64_t term_number, uint64_t *length);
int64_t opal_console_flush(uint64_t term_number);
int64_t opal_poll_events(uint64_t *outstanding_event_mask);
+void opal_kentry(unsigned long fdt_addr, void *vmlinux_addr);
+
static int opal_con_open(void)
{
+ /*
+ * When OPAL loads the boot kernel it stashes the OPAL base and entry
+ * address in r8 and r9 so the kernel can use the OPAL console
+ * before unflattening the devicetree. While executing the wrapper will
+ * probably trash r8 and r9 so this kentry hook restores them before
+ * entering the decompressed kernel.
+ */
+ platform_ops.kentry = opal_kentry;
return 0;
}
diff --git a/arch/powerpc/boot/ops.h b/arch/powerpc/boot/ops.h
index 309d1b127e96..fad1862f4b2d 100644
--- a/arch/powerpc/boot/ops.h
+++ b/arch/powerpc/boot/ops.h
@@ -30,6 +30,7 @@ struct platform_ops {
void * (*realloc)(void *ptr, unsigned long size);
void (*exit)(void);
void * (*vmlinux_alloc)(unsigned long size);
+ void (*kentry)(unsigned long fdt_addr, void *vmlinux_addr);
};
extern struct platform_ops platform_ops;
diff --git a/arch/powerpc/configs/dpaa.config b/arch/powerpc/configs/dpaa.config
index efa99c048543..2fe76f5e938a 100644
--- a/arch/powerpc/configs/dpaa.config
+++ b/arch/powerpc/configs/dpaa.config
@@ -1 +1,4 @@
CONFIG_FSL_DPAA=y
+CONFIG_FSL_PAMU=y
+CONFIG_FSL_FMAN=y
+CONFIG_FSL_DPAA_ETH=y
diff --git a/arch/powerpc/include/asm/asm-prototypes.h b/arch/powerpc/include/asm/asm-prototypes.h
index d1492736d852..e0baba1535e6 100644
--- a/arch/powerpc/include/asm/asm-prototypes.h
+++ b/arch/powerpc/include/asm/asm-prototypes.h
@@ -14,6 +14,10 @@
#include <linux/threads.h>
#include <linux/kprobes.h>
+#include <asm/cacheflush.h>
+#include <asm/checksum.h>
+#include <asm/uaccess.h>
+#include <asm/epapr_hcalls.h>
#include <uapi/asm/ucontext.h>
@@ -109,4 +113,12 @@ void early_setup_secondary(void);
/* time */
void accumulate_stolen_time(void);
+/* misc runtime */
+extern u64 __bswapdi2(u64);
+extern s64 __lshrdi3(s64, int);
+extern s64 __ashldi3(s64, int);
+extern s64 __ashrdi3(s64, int);
+extern int __cmpdi2(s64, s64);
+extern int __ucmpdi2(u64, u64);
+
#endif /* _ASM_POWERPC_ASM_PROTOTYPES_H */
diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index e407af2b7333..2e6a823fa502 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -70,7 +70,9 @@
#define HPTE_V_SSIZE_SHIFT 62
#define HPTE_V_AVPN_SHIFT 7
+#define HPTE_V_COMMON_BITS ASM_CONST(0x000fffffffffffff)
#define HPTE_V_AVPN ASM_CONST(0x3fffffffffffff80)
+#define HPTE_V_AVPN_3_0 ASM_CONST(0x000fffffffffff80)
#define HPTE_V_AVPN_VAL(x) (((x) & HPTE_V_AVPN) >> HPTE_V_AVPN_SHIFT)
#define HPTE_V_COMPARE(x,y) (!(((x) ^ (y)) & 0xffffffffffffff80UL))
#define HPTE_V_BOLTED ASM_CONST(0x0000000000000010)
@@ -80,14 +82,16 @@
#define HPTE_V_VALID ASM_CONST(0x0000000000000001)
/*
- * ISA 3.0 have a different HPTE format.
+ * ISA 3.0 has a different HPTE format.
*/
#define HPTE_R_3_0_SSIZE_SHIFT 58
+#define HPTE_R_3_0_SSIZE_MASK (3ull << HPTE_R_3_0_SSIZE_SHIFT)
#define HPTE_R_PP0 ASM_CONST(0x8000000000000000)
#define HPTE_R_TS ASM_CONST(0x4000000000000000)
#define HPTE_R_KEY_HI ASM_CONST(0x3000000000000000)
#define HPTE_R_RPN_SHIFT 12
#define HPTE_R_RPN ASM_CONST(0x0ffffffffffff000)
+#define HPTE_R_RPN_3_0 ASM_CONST(0x01fffffffffff000)
#define HPTE_R_PP ASM_CONST(0x0000000000000003)
#define HPTE_R_PPP ASM_CONST(0x8000000000000003)
#define HPTE_R_N ASM_CONST(0x0000000000000004)
@@ -316,12 +320,43 @@ static inline unsigned long hpte_encode_avpn(unsigned long vpn, int psize,
*/
v = (vpn >> (23 - VPN_SHIFT)) & ~(mmu_psize_defs[psize].avpnm);
v <<= HPTE_V_AVPN_SHIFT;
- if (!cpu_has_feature(CPU_FTR_ARCH_300))
- v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
+ v |= ((unsigned long) ssize) << HPTE_V_SSIZE_SHIFT;
return v;
}
/*
+ * ISA v3.0 defines a new HPTE format, which differs from the old
+ * format in having smaller AVPN and ARPN fields, and the B field
+ * in the second dword instead of the first.
+ */
+static inline unsigned long hpte_old_to_new_v(unsigned long v)
+{
+ /* trim AVPN, drop B */
+ return v & HPTE_V_COMMON_BITS;
+}
+
+static inline unsigned long hpte_old_to_new_r(unsigned long v, unsigned long r)
+{
+ /* move B field from 1st to 2nd dword, trim ARPN */
+ return (r & ~HPTE_R_3_0_SSIZE_MASK) |
+ (((v) >> HPTE_V_SSIZE_SHIFT) << HPTE_R_3_0_SSIZE_SHIFT);
+}
+
+static inline unsigned long hpte_new_to_old_v(unsigned long v, unsigned long r)
+{
+ /* insert B field */
+ return (v & HPTE_V_COMMON_BITS) |
+ ((r & HPTE_R_3_0_SSIZE_MASK) <<
+ (HPTE_V_SSIZE_SHIFT - HPTE_R_3_0_SSIZE_SHIFT));
+}
+
+static inline unsigned long hpte_new_to_old_r(unsigned long r)
+{
+ /* clear out B field */
+ return r & ~HPTE_R_3_0_SSIZE_MASK;
+}
+
+/*
* This function sets the AVPN and L fields of the HPTE appropriately
* using the base page size and actual page size.
*/
@@ -341,12 +376,8 @@ static inline unsigned long hpte_encode_v(unsigned long vpn, int base_psize,
* aligned for the requested page size
*/
static inline unsigned long hpte_encode_r(unsigned long pa, int base_psize,
- int actual_psize, int ssize)
+ int actual_psize)
{
-
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- pa |= ((unsigned long) ssize) << HPTE_R_3_0_SSIZE_SHIFT;
-
/* A 4K page needs no special encoding */
if (actual_psize == MMU_PAGE_4K)
return pa & HPTE_R_RPN;
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9fd77f8794a0..0ebfbc8f0449 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1009,7 +1009,8 @@ static inline void pmdp_huge_split_prepare(struct vm_area_struct *vma,
#define pmd_move_must_withdraw pmd_move_must_withdraw
struct spinlock;
static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
- struct spinlock *old_pmd_ptl)
+ struct spinlock *old_pmd_ptl,
+ struct vm_area_struct *vma)
{
if (radix_enabled())
return false;
@@ -1020,6 +1021,16 @@ static inline int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl,
*/
return true;
}
+
+
+#define arch_needs_pgtable_deposit arch_needs_pgtable_deposit
+static inline bool arch_needs_pgtable_deposit(void)
+{
+ if (radix_enabled())
+ return false;
+ return true;
+}
+
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
#endif /* __ASSEMBLY__ */
#endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index ee655ed1ff1b..1e8fceb308a5 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum)
return (__force __sum16)(~((__force u32)sum + tmp) >> 16);
}
-static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
#ifdef __powerpc64__
unsigned long s = (__force u32)sum;
@@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
* computes the checksum of the TCP/UDP pseudo-header
* returns a 16-bit checksum, already complemented
*/
-static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
- unsigned short len,
- unsigned short proto,
- __wsum sum)
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
{
return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
}
diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h
index 01b8a13f0224..3919332965af 100644
--- a/arch/powerpc/include/asm/cpuidle.h
+++ b/arch/powerpc/include/asm/cpuidle.h
@@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state;
std r0,0(r1); \
ptesync; \
ld r0,0(r1); \
-1: cmp cr0,r0,r0; \
+1: cmpd cr0,r0,r0; \
bne 1b; \
IDLE_INST; \
b .
diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h
index 4f60db074725..aa2e6a34b872 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -46,26 +46,12 @@ extern cputime_t cputime_one_jiffy;
* Convert cputime <-> jiffies
*/
extern u64 __cputime_jiffies_factor;
-DECLARE_PER_CPU(unsigned long, cputime_last_delta);
-DECLARE_PER_CPU(unsigned long, cputime_scaled_last_delta);
static inline unsigned long cputime_to_jiffies(const cputime_t ct)
{
return mulhdu((__force u64) ct, __cputime_jiffies_factor);
}
-/* Estimate the scaled cputime by scaling the real cputime based on
- * the last scaled to real ratio */
-static inline cputime_t cputime_to_scaled(const cputime_t ct)
-{
- if (cpu_has_feature(CPU_FTR_SPURR) &&
- __this_cpu_read(cputime_last_delta))
- return (__force u64) ct *
- __this_cpu_read(cputime_scaled_last_delta) /
- __this_cpu_read(cputime_last_delta);
- return ct;
-}
-
static inline cputime_t jiffies_to_cputime(const unsigned long jif)
{
u64 ct;
diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h
index 2e4e7d878c8e..9a3eee661297 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -91,7 +91,11 @@
*/
#define LOAD_HANDLER(reg, label) \
ld reg,PACAKBASE(r13); /* get high part of &label */ \
- ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l;
+ ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
+
+#define __LOAD_HANDLER(reg, label) \
+ ld reg,PACAKBASE(r13); \
+ ori reg,reg,(ABS_ADDR(label))@l;
/* Exception register prefixes */
#define EXC_HV H
@@ -154,14 +158,17 @@ BEGIN_FTR_SECTION_NESTED(943) \
std ra,offset(r13); \
END_FTR_SECTION_NESTED(ftr,ftr,943)
-#define EXCEPTION_PROLOG_0(area) \
- GET_PACA(r13); \
+#define EXCEPTION_PROLOG_0_PACA(area) \
std r9,area+EX_R9(r13); /* save r9 */ \
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
HMT_MEDIUM; \
std r10,area+EX_R10(r13); /* save r10 - r12 */ \
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+#define EXCEPTION_PROLOG_0(area) \
+ GET_PACA(r13); \
+ EXCEPTION_PROLOG_0_PACA(area)
+
#define __EXCEPTION_PROLOG_1(area, extra, vec) \
OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \
@@ -192,6 +199,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_PROLOG_1(area, extra, vec); \
EXCEPTION_PROLOG_PSERIES_1(label, h);
+/* Have the PACA in r13 already */
+#define EXCEPTION_PROLOG_PSERIES_PACA(area, label, h, extra, vec) \
+ EXCEPTION_PROLOG_0_PACA(area); \
+ EXCEPTION_PROLOG_1(area, extra, vec); \
+ EXCEPTION_PROLOG_PSERIES_1(label, h);
+
#define __KVMTEST(h, n) \
lbz r10,HSTATE_IN_GUEST(r13); \
cmpwi r10,0; \
@@ -208,6 +221,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
#define kvmppc_interrupt kvmppc_interrupt_pr
#endif
+#ifdef CONFIG_RELOCATABLE
+#define BRANCH_TO_COMMON(reg, label) \
+ __LOAD_HANDLER(reg, label); \
+ mtctr reg; \
+ bctr
+
+#else
+#define BRANCH_TO_COMMON(reg, label) \
+ b label
+
+#endif
+
#define __KVM_HANDLER_PROLOG(area, n) \
BEGIN_FTR_SECTION_NESTED(947) \
ld r10,area+EX_CFAR(r13); \
diff --git a/arch/powerpc/include/asm/kvm_asm.h b/arch/powerpc/include/asm/kvm_asm.h
index 05cabed3d1bd..09a802bb702f 100644
--- a/arch/powerpc/include/asm/kvm_asm.h
+++ b/arch/powerpc/include/asm/kvm_asm.h
@@ -99,6 +99,7 @@
#define BOOK3S_INTERRUPT_H_EMUL_ASSIST 0xe40
#define BOOK3S_INTERRUPT_HMI 0xe60
#define BOOK3S_INTERRUPT_H_DOORBELL 0xe80
+#define BOOK3S_INTERRUPT_H_VIRT 0xea0
#define BOOK3S_INTERRUPT_PERFMON 0xf00
#define BOOK3S_INTERRUPT_ALTIVEC 0xf20
#define BOOK3S_INTERRUPT_VSX 0xf40
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 28350a294b1e..e59b172666cd 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -48,7 +48,7 @@
#ifdef CONFIG_KVM_MMIO
#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
#endif
-#define KVM_HALT_POLL_NS_DEFAULT 500000
+#define KVM_HALT_POLL_NS_DEFAULT 10000 /* 10 us */
/* These values are internal and can be increased later */
#define KVM_NR_IRQCHIPS 1
@@ -244,8 +244,10 @@ struct kvm_arch_memory_slot {
struct kvm_arch {
unsigned int lpid;
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ unsigned int tlb_sets;
unsigned long hpt_virt;
struct revmap_entry *revmap;
+ atomic64_t mmio_update;
unsigned int host_lpid;
unsigned long host_lpcr;
unsigned long sdr1;
@@ -408,6 +410,24 @@ struct kvmppc_passthru_irqmap {
#define KVMPPC_IRQ_MPIC 1
#define KVMPPC_IRQ_XICS 2
+#define MMIO_HPTE_CACHE_SIZE 4
+
+struct mmio_hpte_cache_entry {
+ unsigned long hpte_v;
+ unsigned long hpte_r;
+ unsigned long rpte;
+ unsigned long pte_index;
+ unsigned long eaddr;
+ unsigned long slb_v;
+ long mmio_update;
+ unsigned int slb_base_pshift;
+};
+
+struct mmio_hpte_cache {
+ struct mmio_hpte_cache_entry entry[MMIO_HPTE_CACHE_SIZE];
+ unsigned int index;
+};
+
struct openpic;
struct kvm_vcpu_arch {
@@ -498,6 +518,8 @@ struct kvm_vcpu_arch {
ulong tcscr;
ulong acop;
ulong wort;
+ ulong tid;
+ ulong psscr;
ulong shadow_srr1;
#endif
u32 vrsave; /* also USPRG0 */
@@ -546,6 +568,7 @@ struct kvm_vcpu_arch {
u64 tfiar;
u32 cr_tm;
+ u64 xer_tm;
u64 lr_tm;
u64 ctr_tm;
u64 amr_tm;
@@ -655,9 +678,11 @@ struct kvm_vcpu_arch {
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
struct kvm_vcpu_arch_shared shregs;
+ struct mmio_hpte_cache mmio_cache;
unsigned long pgfault_addr;
long pgfault_index;
unsigned long pgfault_hpte[2];
+ struct mmio_hpte_cache_entry *pgfault_cache;
struct task_struct *run_task;
struct kvm_run *kvm_run;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index f6e49640dbe1..2da67bf1f2ec 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -483,9 +483,10 @@ extern void kvmppc_xics_set_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
extern void kvmppc_xics_clr_mapped(struct kvm *kvm, unsigned long guest_irq,
unsigned long host_irq);
-extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, u32 xirr,
- struct kvmppc_irq_map *irq_map,
- struct kvmppc_passthru_irqmap *pimap);
+extern long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu, __be32 xirr,
+ struct kvmppc_irq_map *irq_map,
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again);
extern int h_ipi_redirect;
#else
static inline struct kvmppc_passthru_irqmap *kvmppc_get_passthru_irqmap(
@@ -510,6 +511,48 @@ static inline int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd)
#endif
/*
+ * Prototypes for functions called only from assembler code.
+ * Having prototypes reduces sparse errors.
+ */
+long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn,
+ unsigned long ioba, unsigned long tce);
+long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_list, unsigned long npages);
+long kvmppc_rm_h_stuff_tce(struct kvm_vcpu *vcpu,
+ unsigned long liobn, unsigned long ioba,
+ unsigned long tce_value, unsigned long npages);
+long int kvmppc_rm_h_confer(struct kvm_vcpu *vcpu, int target,
+ unsigned int yield_count);
+long kvmppc_h_random(struct kvm_vcpu *vcpu);
+void kvmhv_commence_exit(int trap);
+long kvmppc_realmode_machine_check(struct kvm_vcpu *vcpu);
+void kvmppc_subcore_enter_guest(void);
+void kvmppc_subcore_exit_guest(void);
+long kvmppc_realmode_hmi_handler(void);
+long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
+ long pte_index, unsigned long pteh, unsigned long ptel);
+long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn);
+long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu);
+long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index, unsigned long avpn,
+ unsigned long va);
+long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_ref(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags,
+ unsigned long pte_index);
+long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
+ unsigned long slb_v, unsigned int status, bool data);
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu);
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+ unsigned long mfrr);
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr);
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr);
+
+/*
* Host-side operations we want to set up while running in real
* mode in the guest operating on the xics.
* Currently only VCPU wakeup is supported.
diff --git a/arch/powerpc/include/asm/mmu.h b/arch/powerpc/include/asm/mmu.h
index e88368354e49..8d1499334257 100644
--- a/arch/powerpc/include/asm/mmu.h
+++ b/arch/powerpc/include/asm/mmu.h
@@ -29,6 +29,12 @@
*/
/*
+ * Kernel read only support.
+ * We added the ppp value 0b110 in ISA 2.04.
+ */
+#define MMU_FTR_KERNEL_RO ASM_CONST(0x00004000)
+
+/*
* We need to clear top 16bits of va (from the remaining 64 bits )in
* tlbie* instructions
*/
@@ -103,10 +109,10 @@
#define MMU_FTRS_POWER4 MMU_FTRS_DEFAULT_HPTE_ARCH_V2
#define MMU_FTRS_PPC970 MMU_FTRS_POWER4 | MMU_FTR_TLBIE_CROP_VA
#define MMU_FTRS_POWER5 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
-#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE
+#define MMU_FTRS_POWER6 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER7 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER8 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
+#define MMU_FTRS_POWER9 MMU_FTRS_POWER4 | MMU_FTR_LOCKLESS_TLBIE | MMU_FTR_KERNEL_RO
#define MMU_FTRS_CELL MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
MMU_FTR_CI_LARGE_PAGE
#define MMU_FTRS_PA6T MMU_FTRS_DEFAULT_HPTE_ARCH_V2 | \
@@ -208,6 +214,11 @@ extern u64 ppc64_rma_size;
/* Cleanup function used by kexec */
extern void mmu_cleanup_all(void);
extern void radix__mmu_cleanup_all(void);
+
+/* Functions for creating and updating partition table on POWER9 */
+extern void mmu_partition_table_init(void);
+extern void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1);
#endif /* CONFIG_PPC64 */
struct mm_struct;
diff --git a/arch/powerpc/include/asm/mutex.h b/arch/powerpc/include/asm/mutex.h
deleted file mode 100644
index 078155fa1189..000000000000
--- a/arch/powerpc/include/asm/mutex.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Optimised mutex implementation of include/asm-generic/mutex-dec.h algorithm
- */
-#ifndef _ASM_POWERPC_MUTEX_H
-#define _ASM_POWERPC_MUTEX_H
-
-static inline int __mutex_cmpxchg_lock(atomic_t *v, int old, int new)
-{
- int t;
-
- __asm__ __volatile__ (
-"1: lwarx %0,0,%1 # mutex trylock\n\
- cmpw 0,%0,%2\n\
- bne- 2f\n"
- PPC405_ERR77(0,%1)
-" stwcx. %3,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- "\n\
-2:"
- : "=&r" (t)
- : "r" (&v->counter), "r" (old), "r" (new)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_dec_return_lock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
-"1: lwarx %0,0,%1 # mutex lock\n\
- addic %0,%0,-1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1\n\
- bne- 1b"
- PPC_ACQUIRE_BARRIER
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-static inline int __mutex_inc_return_unlock(atomic_t *v)
-{
- int t;
-
- __asm__ __volatile__(
- PPC_RELEASE_BARRIER
-"1: lwarx %0,0,%1 # mutex unlock\n\
- addic %0,%0,1\n"
- PPC405_ERR77(0,%1)
-" stwcx. %0,0,%1 \n\
- bne- 1b"
- : "=&r" (t)
- : "r" (&v->counter)
- : "cc", "memory");
-
- return t;
-}
-
-/**
- * __mutex_fastpath_lock - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 1
- *
- * Change the count from 1 to a value lower than 1, and call <fail_fn> if
- * it wasn't 1 originally. This function MUST leave the value lower than
- * 1 even when the "1" assertion wasn't true.
- */
-static inline void
-__mutex_fastpath_lock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- fail_fn(count);
-}
-
-/**
- * __mutex_fastpath_lock_retval - try to take the lock by moving the count
- * from 1 to a 0 value
- * @count: pointer of type atomic_t
- *
- * Change the count from 1 to a value lower than 1. This function returns 0
- * if the fastpath succeeds, or -1 otherwise.
- */
-static inline int
-__mutex_fastpath_lock_retval(atomic_t *count)
-{
- if (unlikely(__mutex_dec_return_lock(count) < 0))
- return -1;
- return 0;
-}
-
-/**
- * __mutex_fastpath_unlock - try to promote the count from 0 to 1
- * @count: pointer of type atomic_t
- * @fail_fn: function to call if the original value was not 0
- *
- * Try to promote the count from 0 to 1. If it wasn't 0, call <fail_fn>.
- * In the failure case, this function is allowed to either set the value to
- * 1, or to set it to a value lower than 1.
- */
-static inline void
-__mutex_fastpath_unlock(atomic_t *count, void (*fail_fn)(atomic_t *))
-{
- if (unlikely(__mutex_inc_return_unlock(count) <= 0))
- fail_fn(count);
-}
-
-#define __mutex_slowpath_needs_to_unlock() 1
-
-/**
- * __mutex_fastpath_trylock - try to acquire the mutex, without waiting
- *
- * @count: pointer of type atomic_t
- * @fail_fn: fallback function
- *
- * Change the count from 1 to 0, and return 1 (success), or if the count
- * was not 1, then return 0 (failure).
- */
-static inline int
-__mutex_fastpath_trylock(atomic_t *count, int (*fail_fn)(atomic_t *))
-{
- if (likely(atomic_read(count) == 1 && __mutex_cmpxchg_lock(count, 1, 0) == 1))
- return 1;
- return 0;
-}
-
-#endif
diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h
index e958b7096f19..5c7db0f1a708 100644
--- a/arch/powerpc/include/asm/opal.h
+++ b/arch/powerpc/include/asm/opal.h
@@ -220,9 +220,12 @@ int64_t opal_pci_set_power_state(uint64_t async_token, uint64_t id,
int64_t opal_pci_poll2(uint64_t id, uint64_t data);
int64_t opal_int_get_xirr(uint32_t *out_xirr, bool just_poll);
+int64_t opal_rm_int_get_xirr(__be32 *out_xirr, bool just_poll);
int64_t opal_int_set_cppr(uint8_t cppr);
int64_t opal_int_eoi(uint32_t xirr);
+int64_t opal_rm_int_eoi(uint32_t xirr);
int64_t opal_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
+int64_t opal_rm_int_set_mfrr(uint32_t cpu, uint8_t mfrr);
int64_t opal_pci_tce_kill(uint64_t phb_id, uint32_t kill_type,
uint32_t pe_num, uint32_t tce_size,
uint64_t dma_addr, uint32_t npages);
diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 0132831b3081..c56ea8c84abb 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -460,5 +460,6 @@
#define PPC_SLBIA(IH) stringify_in_c(.long PPC_INST_SLBIA | \
((IH & 0x7) << 21))
+#define PPC_INVALIDATE_ERAT PPC_SLBIA(7)
#endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h
index c07c31b0e89e..dac83fcb9445 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -404,8 +404,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define cpu_relax() barrier()
#endif
-#define cpu_relax_lowlatency() cpu_relax()
-
/* Check that a certain kernel stack pointer is valid in task_struct p */
int validate_sp(unsigned long sp, struct task_struct *p,
unsigned long nbytes);
diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h
index 9cd4e8cbc78c..04aa1ee8cdb6 100644
--- a/arch/powerpc/include/asm/reg.h
+++ b/arch/powerpc/include/asm/reg.h
@@ -153,6 +153,8 @@
#define PSSCR_EC 0x00100000 /* Exit Criterion */
#define PSSCR_ESL 0x00200000 /* Enable State Loss */
#define PSSCR_SD 0x00400000 /* Status Disable */
+#define PSSCR_PLS 0xf000000000000000 /* Power-saving Level Status */
+#define PSSCR_GUEST_VIS 0xf0000000000003ff /* Guest-visible PSSCR fields */
/* Floating Point Status and Control Register (FPSCR) Fields */
#define FPSCR_FX 0x80000000 /* FPU exception summary */
@@ -236,6 +238,7 @@
#define SPRN_TEXASRU 0x83 /* '' '' '' Upper 32 */
#define TEXASR_FS __MASK(63-36) /* TEXASR Failure Summary */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
+#define SPRN_TIDR 144 /* Thread ID register */
#define SPRN_CTRLF 0x088
#define SPRN_CTRLT 0x098
#define CTRL_CT 0xc0000000 /* current thread */
@@ -294,6 +297,7 @@
#define SPRN_HSRR1 0x13B /* Hypervisor Save/Restore 1 */
#define SPRN_LMRR 0x32D /* Load Monitor Region Register */
#define SPRN_LMSER 0x32E /* Load Monitor Section Enable Register */
+#define SPRN_ASDR 0x330 /* Access segment descriptor register */
#define SPRN_IC 0x350 /* Virtual Instruction Count */
#define SPRN_VTB 0x351 /* Virtual Time Base */
#define SPRN_LDBAR 0x352 /* LD Base Address Register */
@@ -305,6 +309,7 @@
/* HFSCR and FSCR bit numbers are the same */
#define FSCR_LM_LG 11 /* Enable Load Monitor Registers */
+#define FSCR_MSGP_LG 10 /* Enable MSGP */
#define FSCR_TAR_LG 8 /* Enable Target Address Register */
#define FSCR_EBB_LG 7 /* Enable Event Based Branching */
#define FSCR_TM_LG 5 /* Enable Transactional Memory */
@@ -320,6 +325,7 @@
#define FSCR_DSCR __MASK(FSCR_DSCR_LG)
#define SPRN_HFSCR 0xbe /* HV=1 Facility Status & Control Register */
#define HFSCR_LM __MASK(FSCR_LM_LG)
+#define HFSCR_MSGP __MASK(FSCR_MSGP_LG)
#define HFSCR_TAR __MASK(FSCR_TAR_LG)
#define HFSCR_EBB __MASK(FSCR_EBB_LG)
#define HFSCR_TM __MASK(FSCR_TM_LG)
@@ -355,8 +361,10 @@
#define LPCR_PECE0 ASM_CONST(0x0000000000004000) /* ext. exceptions can cause exit */
#define LPCR_PECE1 ASM_CONST(0x0000000000002000) /* decrementer can cause exit */
#define LPCR_PECE2 ASM_CONST(0x0000000000001000) /* machine check etc can cause exit */
+#define LPCR_PECE_HVEE ASM_CONST(0x0000400000000000) /* P9 Wakeup on HV interrupts */
#define LPCR_MER ASM_CONST(0x0000000000000800) /* Mediated External Exception */
#define LPCR_MER_SH 11
+#define LPCR_GTSE ASM_CONST(0x0000000000000400) /* Guest Translation Shootdown Enable */
#define LPCR_TC ASM_CONST(0x0000000000000200) /* Translation control */
#define LPCR_LPES 0x0000000c
#define LPCR_LPES0 ASM_CONST(0x0000000000000008) /* LPAR Env selector 0 */
@@ -377,6 +385,12 @@
#define PCR_VEC_DIS (1ul << (63-0)) /* Vec. disable (bit NA since POWER8) */
#define PCR_VSX_DIS (1ul << (63-1)) /* VSX disable (bit NA since POWER8) */
#define PCR_TM_DIS (1ul << (63-2)) /* Trans. memory disable (POWER8) */
+/*
+ * These bits are used in the function kvmppc_set_arch_compat() to specify and
+ * determine both the compatibility level which we want to emulate and the
+ * compatibility level which the host is capable of emulating.
+ */
+#define PCR_ARCH_207 0x8 /* Architecture 2.07 */
#define PCR_ARCH_206 0x4 /* Architecture 2.06 */
#define PCR_ARCH_205 0x2 /* Architecture 2.05 */
#define SPRN_HEIR 0x153 /* Hypervisor Emulated Instruction Register */
@@ -1218,6 +1232,7 @@
#define PVR_ARCH_206 0x0f000003
#define PVR_ARCH_206p 0x0f100003
#define PVR_ARCH_207 0x0f000004
+#define PVR_ARCH_300 0x0f000005
/* Macros for setting and retrieving special purpose registers */
#ifndef __ASSEMBLY__
diff --git a/arch/powerpc/include/asm/spinlock.h b/arch/powerpc/include/asm/spinlock.h
index fa37fe93bc02..8c1b913de6d7 100644
--- a/arch/powerpc/include/asm/spinlock.h
+++ b/arch/powerpc/include/asm/spinlock.h
@@ -52,6 +52,14 @@
#define SYNC_IO
#endif
+#ifdef CONFIG_PPC_PSERIES
+#define vcpu_is_preempted vcpu_is_preempted
+static inline bool vcpu_is_preempted(int cpu)
+{
+ return !!(be32_to_cpu(lppaca_of(cpu).yield_count) & 1);
+}
+#endif
+
static __always_inline int arch_spin_value_unlocked(arch_spinlock_t lock)
{
return lock.slock == 0;
diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
index f6f68f73e858..609557569f65 100644
--- a/arch/powerpc/include/asm/tlb.h
+++ b/arch/powerpc/include/asm/tlb.h
@@ -28,6 +28,7 @@
#define tlb_start_vma(tlb, vma) do { } while (0)
#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
+#define tlb_remove_check_page_size_change tlb_remove_check_page_size_change
extern void tlb_flush(struct mmu_gather *tlb);
@@ -46,17 +47,44 @@ static inline void __tlb_remove_tlb_entry(struct mmu_gather *tlb, pte_t *ptep,
#endif
}
+static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb,
+ unsigned int page_size)
+{
+ if (!tlb->page_size)
+ tlb->page_size = page_size;
+ else if (tlb->page_size != page_size) {
+ tlb_flush_mmu(tlb);
+ /*
+ * update the page size after flush for the new
+ * mmu_gather.
+ */
+ tlb->page_size = page_size;
+ }
+}
+
#ifdef CONFIG_SMP
static inline int mm_is_core_local(struct mm_struct *mm)
{
return cpumask_subset(mm_cpumask(mm),
topology_sibling_cpumask(smp_processor_id()));
}
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ return cpumask_equal(mm_cpumask(mm),
+ cpumask_of(smp_processor_id()));
+}
+
#else
static inline int mm_is_core_local(struct mm_struct *mm)
{
return 1;
}
+
+static inline int mm_is_thread_local(struct mm_struct *mm)
+{
+ return 1;
+}
#endif
#endif /* __KERNEL__ */
diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h
index cf12c580f6b2..e8cdfec8d512 100644
--- a/arch/powerpc/include/asm/unistd.h
+++ b/arch/powerpc/include/asm/unistd.h
@@ -16,6 +16,10 @@
#define __NR__exit __NR_exit
+#define __IGNORE_pkey_mprotect
+#define __IGNORE_pkey_alloc
+#define __IGNORE_pkey_free
+
#ifndef __ASSEMBLY__
#include <linux/types.h>
diff --git a/arch/powerpc/include/asm/xilinx_intc.h b/arch/powerpc/include/asm/xilinx_intc.h
index 343612f8fece..3192d7f0a05b 100644
--- a/arch/powerpc/include/asm/xilinx_intc.h
+++ b/arch/powerpc/include/asm/xilinx_intc.h
@@ -14,7 +14,7 @@
#ifdef __KERNEL__
extern void __init xilinx_intc_init_tree(void);
-extern unsigned int xilinx_intc_get_irq(void);
+extern unsigned int xintc_get_irq(void);
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_XILINX_INTC_H */
diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h
index c93cf35ce379..3603b6f51b11 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -573,6 +573,10 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_SPRG9 (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xba)
#define KVM_REG_PPC_DBSR (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0xbb)
+/* POWER9 registers */
+#define KVM_REG_PPC_TIDR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbc)
+#define KVM_REG_PPC_PSSCR (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0xbd)
+
/* Transactional Memory checkpointed state:
* This is all GPRs, all VSX regs and a subset of SPRs
*/
@@ -596,6 +600,7 @@ struct kvm_get_htab_header {
#define KVM_REG_PPC_TM_VSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U32 | 0x67)
#define KVM_REG_PPC_TM_DSCR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x68)
#define KVM_REG_PPC_TM_TAR (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x69)
+#define KVM_REG_PPC_TM_XER (KVM_REG_PPC_TM | KVM_REG_SIZE_U64 | 0x6a)
/* PPC64 eXternal Interrupt Controller Specification */
#define KVM_DEV_XICS_GRP_SOURCES 1 /* 64-bit source attributes */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 1672e3398270..44583a52f882 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -97,4 +97,6 @@
#define SO_CNX_ADVICE 53
+#define SCM_TIMESTAMPING_OPT_STATS 54
+
#endif /* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index caec7bf3b99a..195a9fc8f81c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -487,6 +487,7 @@ int main(void)
/* book3s */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ DEFINE(KVM_TLB_SETS, offsetof(struct kvm, arch.tlb_sets));
DEFINE(KVM_SDR1, offsetof(struct kvm, arch.sdr1));
DEFINE(KVM_HOST_LPID, offsetof(struct kvm, arch.host_lpid));
DEFINE(KVM_HOST_LPCR, offsetof(struct kvm, arch.host_lpcr));
@@ -548,6 +549,8 @@ int main(void)
DEFINE(VCPU_TCSCR, offsetof(struct kvm_vcpu, arch.tcscr));
DEFINE(VCPU_ACOP, offsetof(struct kvm_vcpu, arch.acop));
DEFINE(VCPU_WORT, offsetof(struct kvm_vcpu, arch.wort));
+ DEFINE(VCPU_TID, offsetof(struct kvm_vcpu, arch.tid));
+ DEFINE(VCPU_PSSCR, offsetof(struct kvm_vcpu, arch.psscr));
DEFINE(VCORE_ENTRY_EXIT, offsetof(struct kvmppc_vcore, entry_exit_map));
DEFINE(VCORE_IN_GUEST, offsetof(struct kvmppc_vcore, in_guest));
DEFINE(VCORE_NAPPING_THREADS, offsetof(struct kvmppc_vcore, napping_threads));
@@ -569,6 +572,7 @@ int main(void)
DEFINE(VCPU_VRS_TM, offsetof(struct kvm_vcpu, arch.vr_tm.vr));
DEFINE(VCPU_VRSAVE_TM, offsetof(struct kvm_vcpu, arch.vrsave_tm));
DEFINE(VCPU_CR_TM, offsetof(struct kvm_vcpu, arch.cr_tm));
+ DEFINE(VCPU_XER_TM, offsetof(struct kvm_vcpu, arch.xer_tm));
DEFINE(VCPU_LR_TM, offsetof(struct kvm_vcpu, arch.lr_tm));
DEFINE(VCPU_CTR_TM, offsetof(struct kvm_vcpu, arch.ctr_tm));
DEFINE(VCPU_AMR_TM, offsetof(struct kvm_vcpu, arch.amr_tm));
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index 52ff3f025437..f3e1f5d29dce 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -98,8 +98,8 @@ _GLOBAL(__setup_cpu_power9)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- ori r3, r3, LPCR_HVICE
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ or r3, r3, r4
bl __init_LPCR
bl __init_HFSCR
bl __init_tlb_power9
@@ -118,8 +118,8 @@ _GLOBAL(__restore_cpu_power9)
li r0,0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
- ori r3, r3, LPCR_PECEDH
- ori r3, r3, LPCR_HVICE
+ LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
+ or r3, r3, r4
bl __init_LPCR
bl __init_HFSCR
bl __init_tlb_power9
@@ -174,7 +174,7 @@ __init_FSCR:
__init_HFSCR:
mfspr r3,SPRN_HFSCR
ori r3,r3,HFSCR_TAR|HFSCR_TM|HFSCR_BHRB|HFSCR_PM|\
- HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB
+ HFSCR_DSCR|HFSCR_VECVSX|HFSCR_FP|HFSCR_EBB|HFSCR_MSGP
mtspr SPRN_HFSCR,r3
blr
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index a62be72da274..5c31369435f2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -671,8 +671,10 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
/* Clear frozen state */
rc = eeh_clear_pe_frozen_state(pe, false);
- if (rc)
+ if (rc) {
+ pci_unlock_rescan_remove();
return rc;
+ }
/* Give the system 5 seconds to finish running the user-space
* hotplug shutdown scripts, e.g. ifdown for ethernet. Yes,
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index f129408c6022..1ba82ea90230 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -95,19 +95,40 @@ __start_interrupts:
/* No virt vectors corresponding with 0x0..0x100 */
EXC_VIRT_NONE(0x4000, 0x4100)
-EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
- SET_SCRATCH0(r13)
+
#ifdef CONFIG_PPC_P7_NAP
-BEGIN_FTR_SECTION
- /* Running native on arch 2.06 or later, check if we are
- * waking up from nap/sleep/winkle.
+ /*
+ * If running native on arch 2.06 or later, check if we are waking up
+ * from nap/sleep/winkle, and branch to idle handler.
*/
- mfspr r13,SPRN_SRR1
- rlwinm. r13,r13,47-31,30,31
- beq 9f
+#define IDLETEST(n) \
+ BEGIN_FTR_SECTION ; \
+ mfspr r10,SPRN_SRR1 ; \
+ rlwinm. r10,r10,47-31,30,31 ; \
+ beq- 1f ; \
+ cmpwi cr3,r10,2 ; \
+ BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \
+1: \
+ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+#else
+#define IDLETEST NOTEST
+#endif
- cmpwi cr3,r13,2
+EXC_REAL_BEGIN(system_reset, 0x100, 0x200)
+ SET_SCRATCH0(r13)
GET_PACA(r13)
+ clrrdi r13,r13,1 /* Last bit of HSPRG0 is set if waking from winkle */
+ EXCEPTION_PROLOG_PSERIES_PACA(PACA_EXGEN, system_reset_common, EXC_STD,
+ IDLETEST, 0x100)
+
+EXC_REAL_END(system_reset, 0x100, 0x200)
+EXC_VIRT_NONE(0x4100, 0x4200)
+
+#ifdef CONFIG_PPC_P7_NAP
+EXC_COMMON_BEGIN(system_reset_idle_common)
+BEGIN_FTR_SECTION
+ GET_PACA(r13) /* Restore HSPRG0 to get the winkle bit in r13 */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
bl pnv_restore_hyp_resource
li r0,PNV_THREAD_RUNNING
@@ -130,14 +151,8 @@ BEGIN_FTR_SECTION
blt cr3,2f
b pnv_wakeup_loss
2: b pnv_wakeup_noloss
+#endif
-9:
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
-#endif /* CONFIG_PPC_P7_NAP */
- EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD,
- NOTEST, 0x100)
-EXC_REAL_END(system_reset, 0x100, 0x200)
-EXC_VIRT_NONE(0x4100, 0x4200)
EXC_COMMON(system_reset_common, 0x100, system_reset_exception)
#ifdef CONFIG_PPC_PSERIES
@@ -159,7 +174,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x300)
SET_SCRATCH0(r13) /* save r13 */
/*
* Running native on arch 2.06 or later, we may wakeup from winkle
- * inside machine check. If yes, then last bit of HSPGR0 would be set
+ * inside machine check. If yes, then last bit of HSPRG0 would be set
* to 1. Hence clear it unconditionally.
*/
GET_PACA(r13)
@@ -378,7 +393,7 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
/*
* Go back to winkle. Please note that this thread was woken up in
* machine check from winkle and have not restored the per-subcore
- * state. Hence before going back to winkle, set last bit of HSPGR0
+ * state. Hence before going back to winkle, set last bit of HSPRG0
* to 1. This will make sure that if this thread gets woken up
* again at reset vector 0x100 then it will get chance to restore
* the subcore state.
@@ -817,10 +832,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00)
TRAMP_KVM(PACA_EXGEN, 0xb00)
EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
-
-#define LOAD_SYSCALL_HANDLER(reg) \
- ld reg,PACAKBASE(r13); \
- ori reg,reg,(ABS_ADDR(system_call_common))@l;
+#define LOAD_SYSCALL_HANDLER(reg) \
+ __LOAD_HANDLER(reg, system_call_common)
/* Syscall routine is used twice, in reloc-off and reloc-on paths */
#define SYSCALL_PSERIES_1 \
diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c
index 9781c69eae57..03d089b3ed72 100644
--- a/arch/powerpc/kernel/hw_breakpoint.c
+++ b/arch/powerpc/kernel/hw_breakpoint.c
@@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args)
if (!stepped) {
WARN(1, "Unable to handle hardware breakpoint. Breakpoint at "
"0x%lx will be disabled.", info->address);
- perf_event_disable(bp);
+ perf_event_disable_inatomic(bp);
goto out;
}
/*
diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S
index bd739fed26e3..72dac0b58061 100644
--- a/arch/powerpc/kernel/idle_book3s.S
+++ b/arch/powerpc/kernel/idle_book3s.S
@@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300)
* Threads will spin in HMT_LOW until the lock bit is cleared.
* r14 - pointer to core_idle_state
* r15 - used to load contents of core_idle_state
+ * r9 - used as a temporary variable
*/
core_idle_lock_held:
@@ -99,6 +100,8 @@ core_idle_lock_held:
bne 3b
HMT_MEDIUM
lwarx r15,0,r14
+ andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT
+ bne core_idle_lock_held
blr
/*
@@ -163,12 +166,6 @@ _GLOBAL(pnv_powersave_common)
std r9,_MSR(r1)
std r1,PACAR1(r13)
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- /* Tell KVM we're entering idle */
- li r4,KVM_HWTHREAD_IN_IDLE
- stb r4,HSTATE_HWTHREAD_STATE(r13)
-#endif
-
/*
* Go to real mode to do the nap, as required by the architecture.
* Also, we need to be in real mode before setting hwthread_state,
@@ -185,6 +182,26 @@ _GLOBAL(pnv_powersave_common)
.globl pnv_enter_arch207_idle_mode
pnv_enter_arch207_idle_mode:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ /******************************************************/
+ /* N O T E W E L L ! ! ! N O T E W E L L */
+ /* The following store to HSTATE_HWTHREAD_STATE(r13) */
+ /* MUST occur in real mode, i.e. with the MMU off, */
+ /* and the MMU must stay off until we clear this flag */
+ /* and test HSTATE_HWTHREAD_REQ(r13) in the system */
+ /* reset interrupt vector in exceptions-64s.S. */
+ /* The reason is that another thread can switch the */
+ /* MMU to a guest context whenever this flag is set */
+ /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */
+ /* that would potentially cause this thread to start */
+ /* executing instructions from guest memory in */
+ /* hypervisor mode, leading to a host crash or data */
+ /* corruption, or worse. */
+ /******************************************************/
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
stb r3,PACA_THREAD_IDLE_STATE(r13)
cmpwi cr3,r3,PNV_THREAD_SLEEP
bge cr3,2f
@@ -250,6 +267,12 @@ enter_winkle:
* r3 - requested stop state
*/
power_enter_stop:
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+ /* Tell KVM we're entering idle */
+ li r4,KVM_HWTHREAD_IN_IDLE
+ /* DO THIS IN REAL MODE! See comment above. */
+ stb r4,HSTATE_HWTHREAD_STATE(r13)
+#endif
/*
* Check if the requested state is a deep idle state.
*/
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 9e7c10fe205f..49a680d5ae37 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs)
/* Ensure that restore_math() will restore */
if (msr_diff & MSR_FP)
current->thread.load_fp = 1;
-#ifdef CONFIG_ALIVEC
+#ifdef CONFIG_ALTIVEC
if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC)
current->thread.load_vec = 1;
#endif
@@ -1215,7 +1215,7 @@ static void show_instructions(struct pt_regs *regs)
int instr;
if (!(i % 8))
- printk("\n");
+ pr_cont("\n");
#if !defined(CONFIG_BOOKE)
/* If executing with the IMMU off, adjust pc rather
@@ -1227,18 +1227,18 @@ static void show_instructions(struct pt_regs *regs)
if (!__kernel_text_address(pc) ||
probe_kernel_address((unsigned int __user *)pc, instr)) {
- printk(KERN_CONT "XXXXXXXX ");
+ pr_cont("XXXXXXXX ");
} else {
if (regs->nip == pc)
- printk(KERN_CONT "<%08x> ", instr);
+ pr_cont("<%08x> ", instr);
else
- printk(KERN_CONT "%08x ", instr);
+ pr_cont("%08x ", instr);
}
pc += sizeof(int);
}
- printk("\n");
+ pr_cont("\n");
}
struct regbit {
@@ -1282,7 +1282,7 @@ static void print_bits(unsigned long val, struct regbit *bits, const char *sep)
for (; bits->bit; ++bits)
if (val & bits->bit) {
- printk("%s%s", s, bits->name);
+ pr_cont("%s%s", s, bits->name);
s = sep;
}
}
@@ -1305,9 +1305,9 @@ static void print_tm_bits(unsigned long val)
* T: Transactional (bit 34)
*/
if (val & (MSR_TM | MSR_TS_S | MSR_TS_T)) {
- printk(",TM[");
+ pr_cont(",TM[");
print_bits(val, msr_tm_bits, "");
- printk("]");
+ pr_cont("]");
}
}
#else
@@ -1316,10 +1316,10 @@ static void print_tm_bits(unsigned long val) {}
static void print_msr_bits(unsigned long val)
{
- printk("<");
+ pr_cont("<");
print_bits(val, msr_bits, ",");
print_tm_bits(val);
- printk(">");
+ pr_cont(">");
}
#ifdef CONFIG_PPC64
@@ -1347,29 +1347,29 @@ void show_regs(struct pt_regs * regs)
printk(" CR: %08lx XER: %08lx\n", regs->ccr, regs->xer);
trap = TRAP(regs);
if ((regs->trap != 0xc00) && cpu_has_feature(CPU_FTR_CFAR))
- printk("CFAR: "REG" ", regs->orig_gpr3);
+ pr_cont("CFAR: "REG" ", regs->orig_gpr3);
if (trap == 0x200 || trap == 0x300 || trap == 0x600)
#if defined(CONFIG_4xx) || defined(CONFIG_BOOKE)
- printk("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
+ pr_cont("DEAR: "REG" ESR: "REG" ", regs->dar, regs->dsisr);
#else
- printk("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
+ pr_cont("DAR: "REG" DSISR: %08lx ", regs->dar, regs->dsisr);
#endif
#ifdef CONFIG_PPC64
- printk("SOFTE: %ld ", regs->softe);
+ pr_cont("SOFTE: %ld ", regs->softe);
#endif
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (MSR_TM_ACTIVE(regs->msr))
- printk("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
+ pr_cont("\nPACATMSCRATCH: %016llx ", get_paca()->tm_scratch);
#endif
for (i = 0; i < 32; i++) {
if ((i % REGS_PER_LINE) == 0)
- printk("\nGPR%02d: ", i);
- printk(REG " ", regs->gpr[i]);
+ pr_cont("\nGPR%02d: ", i);
+ pr_cont(REG " ", regs->gpr[i]);
if (i == LAST_VOLATILE && !FULL_REGS(regs))
break;
}
- printk("\n");
+ pr_cont("\n");
#ifdef CONFIG_KALLSYMS
/*
* Lookup NIP late so we have the best change of getting the
@@ -1900,14 +1900,14 @@ void show_stack(struct task_struct *tsk, unsigned long *stack)
printk("["REG"] ["REG"] %pS", sp, ip, (void *)ip);
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if ((ip == rth) && curr_frame >= 0) {
- printk(" (%pS)",
+ pr_cont(" (%pS)",
(void *)current->ret_stack[curr_frame].ret);
curr_frame--;
}
#endif
if (firstframe)
- printk(" (unreliable)");
- printk("\n");
+ pr_cont(" (unreliable)");
+ pr_cont("\n");
}
firstframe = 0;
diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c
index f52b7db327c8..010b7b310237 100644
--- a/arch/powerpc/kernel/ptrace32.c
+++ b/arch/powerpc/kernel/ptrace32.c
@@ -74,7 +74,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
break;
copied = access_process_vm(child, (u64)addrOthers, &tmp,
- sizeof(tmp), 0);
+ sizeof(tmp), FOLL_FORCE);
if (copied != sizeof(tmp))
break;
ret = put_user(tmp, (u32 __user *)data);
@@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
break;
ret = 0;
if (access_process_vm(child, (u64)addrOthers, &tmp,
- sizeof(tmp), 1) == sizeof(tmp))
+ sizeof(tmp),
+ FOLL_FORCE | FOLL_WRITE) == sizeof(tmp))
break;
ret = -EIO;
break;
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 7ac8e6eaab5b..8d586cff8a41 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -226,17 +226,25 @@ static void __init configure_exceptions(void)
if (firmware_has_feature(FW_FEATURE_OPAL))
opal_configure_cores();
- /* Enable AIL if supported, and we are in hypervisor mode */
- if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
- early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
- unsigned long lpcr = mfspr(SPRN_LPCR);
- mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
- }
+ /* AIL on native is done in cpu_ready_for_interrupts() */
}
}
static void cpu_ready_for_interrupts(void)
{
+ /*
+ * Enable AIL if supported, and we are in hypervisor mode. This
+ * is called once for every processor.
+ *
+ * If we are not in hypervisor mode the job is done once for
+ * the whole partition in configure_exceptions().
+ */
+ if (early_cpu_has_feature(CPU_FTR_HVMODE) &&
+ early_cpu_has_feature(CPU_FTR_ARCH_207S)) {
+ unsigned long lpcr = mfspr(SPRN_LPCR);
+ mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3);
+ }
+
/* Set IR and DR in PACA MSR */
get_paca()->kernel_msr = MSR_KERNEL;
}
diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c
index c4f1d1f7bae0..c1fb255a60d6 100644
--- a/arch/powerpc/kernel/sysfs.c
+++ b/arch/powerpc/kernel/sysfs.c
@@ -703,7 +703,7 @@ static struct device_attribute pa6t_attrs[] = {
#endif /* HAS_PPC_PMC_PA6T */
#endif /* HAS_PPC_PMC_CLASSIC */
-static void register_cpu_online(unsigned int cpu)
+static int register_cpu_online(unsigned int cpu)
{
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
@@ -782,11 +782,12 @@ static void register_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_online(cpu);
+ return 0;
}
-#ifdef CONFIG_HOTPLUG_CPU
-static void unregister_cpu_online(unsigned int cpu)
+static int unregister_cpu_online(unsigned int cpu)
{
+#ifdef CONFIG_HOTPLUG_CPU
struct cpu *c = &per_cpu(cpu_devices, cpu);
struct device *s = &c->dev;
struct device_attribute *attrs, *pmc_attrs;
@@ -863,6 +864,8 @@ static void unregister_cpu_online(unsigned int cpu)
}
#endif
cacheinfo_cpu_offline(cpu);
+#endif /* CONFIG_HOTPLUG_CPU */
+ return 0;
}
#ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
@@ -883,32 +886,6 @@ ssize_t arch_cpu_release(const char *buf, size_t count)
}
#endif /* CONFIG_ARCH_CPU_PROBE_RELEASE */
-#endif /* CONFIG_HOTPLUG_CPU */
-
-static int sysfs_cpu_notify(struct notifier_block *self,
- unsigned long action, void *hcpu)
-{
- unsigned int cpu = (unsigned int)(long)hcpu;
-
- switch (action) {
- case CPU_ONLINE:
- case CPU_ONLINE_FROZEN:
- register_cpu_online(cpu);
- break;
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- unregister_cpu_online(cpu);
- break;
-#endif
- }
- return NOTIFY_OK;
-}
-
-static struct notifier_block sysfs_cpu_nb = {
- .notifier_call = sysfs_cpu_notify,
-};
-
static DEFINE_MUTEX(cpu_mutex);
int cpu_add_dev_attr(struct device_attribute *attr)
@@ -1023,12 +1000,10 @@ static DEVICE_ATTR(physical_id, 0444, show_physical_id, NULL);
static int __init topology_init(void)
{
- int cpu;
+ int cpu, r;
register_nodes();
- cpu_notifier_register_begin();
-
for_each_possible_cpu(cpu) {
struct cpu *c = &per_cpu(cpu_devices, cpu);
@@ -1047,15 +1022,10 @@ static int __init topology_init(void)
device_create_file(&c->dev, &dev_attr_physical_id);
}
-
- if (cpu_online(cpu))
- register_cpu_online(cpu);
}
-
- __register_cpu_notifier(&sysfs_cpu_nb);
-
- cpu_notifier_register_done();
-
+ r = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "powerpc/topology:online",
+ register_cpu_online, unregister_cpu_online);
+ WARN_ON(r < 0);
#ifdef CONFIG_PPC64
sysfs_create_dscr_default();
#endif /* CONFIG_PPC64 */
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index bc3f7d0d7b79..be9751f1cb2a 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -164,8 +164,6 @@ u64 __cputime_sec_factor;
EXPORT_SYMBOL(__cputime_sec_factor);
u64 __cputime_clockt_factor;
EXPORT_SYMBOL(__cputime_clockt_factor);
-DEFINE_PER_CPU(unsigned long, cputime_last_delta);
-DEFINE_PER_CPU(unsigned long, cputime_scaled_last_delta);
cputime_t cputime_one_jiffy;
@@ -360,7 +358,8 @@ void vtime_account_system(struct task_struct *tsk)
unsigned long delta, sys_scaled, stolen;
delta = vtime_delta(tsk, &sys_scaled, &stolen);
- account_system_time(tsk, 0, delta, sys_scaled);
+ account_system_time(tsk, 0, delta);
+ tsk->stimescaled += sys_scaled;
if (stolen)
account_steal_time(stolen);
}
@@ -393,7 +392,8 @@ void vtime_account_user(struct task_struct *tsk)
acct->user_time = 0;
acct->user_time_scaled = 0;
acct->utime_sspurr = 0;
- account_user_time(tsk, utime, utimescaled);
+ account_user_time(tsk, utime);
+ tsk->utimescaled += utimescaled;
}
#ifdef CONFIG_PPC32
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 8295f51c1a5f..7394b770ae1f 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -94,8 +94,17 @@ SECTIONS
* detected, and will result in a crash at boot due to offsets being
* wrong.
*/
+#ifdef CONFIG_PPC64
+ /*
+ * BLOCK(0) overrides the default output section alignment because
+ * this needs to start right after .head.text in order for fixed
+ * section placement to work.
+ */
+ .text BLOCK(0) : AT(ADDR(.text) - LOAD_OFFSET) {
+#else
.text : AT(ADDR(.text) - LOAD_OFFSET) {
ALIGN_FUNCTION();
+#endif
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text .fixup __ftr_alt_* .ref.text)
SCHED_TEXT
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 05f09ae82587..b795dd1ac2ef 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -88,6 +88,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
/* 128 (2**7) bytes in each HPTEG */
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
+ atomic64_set(&kvm->arch.mmio_update, 0);
+
/* Allocate reverse map array */
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) {
@@ -255,7 +257,7 @@ static void kvmppc_mmu_book3s_64_hv_reset_msr(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, msr);
}
-long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
+static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags,
long pte_index, unsigned long pteh,
unsigned long ptel, unsigned long *pte_idx_ret)
{
@@ -312,7 +314,7 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
struct kvmppc_slb *slbe;
unsigned long slb_v;
unsigned long pp, key;
- unsigned long v, gr;
+ unsigned long v, orig_v, gr;
__be64 *hptep;
int index;
int virtmode = vcpu->arch.shregs.msr & (data ? MSR_DR : MSR_IR);
@@ -337,10 +339,12 @@ static int kvmppc_mmu_book3s_64_hv_xlate(struct kvm_vcpu *vcpu, gva_t eaddr,
return -ENOENT;
}
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+ v = orig_v = be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hptep[1]));
gr = kvm->arch.revmap[index].guest_rpte;
- unlock_hpte(hptep, v);
+ unlock_hpte(hptep, orig_v);
preempt_enable();
gpte->eaddr = eaddr;
@@ -438,6 +442,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
{
struct kvm *kvm = vcpu->kvm;
unsigned long hpte[3], r;
+ unsigned long hnow_v, hnow_r;
__be64 *hptep;
unsigned long mmu_seq, psize, pte_size;
unsigned long gpa_base, gfn_base;
@@ -451,6 +456,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int writing, write_ok;
struct vm_area_struct *vma;
unsigned long rcbits;
+ long mmio_update;
/*
* Real-mode code has already searched the HPT and found the
@@ -460,6 +466,19 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (ea != vcpu->arch.pgfault_addr)
return RESUME_GUEST;
+
+ if (vcpu->arch.pgfault_cache) {
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ if (mmio_update == vcpu->arch.pgfault_cache->mmio_update) {
+ r = vcpu->arch.pgfault_cache->rpte;
+ psize = hpte_page_size(vcpu->arch.pgfault_hpte[0], r);
+ gpa_base = r & HPTE_R_RPN & ~(psize - 1);
+ gfn_base = gpa_base >> PAGE_SHIFT;
+ gpa = gpa_base | (ea & (psize - 1));
+ return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea,
+ dsisr & DSISR_ISSTORE);
+ }
+ }
index = vcpu->arch.pgfault_index;
hptep = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
rev = &kvm->arch.revmap[index];
@@ -472,6 +491,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
unlock_hpte(hptep, hpte[0]);
preempt_enable();
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte[0] = hpte_new_to_old_v(hpte[0], hpte[1]);
+ hpte[1] = hpte_new_to_old_r(hpte[1]);
+ }
if (hpte[0] != vcpu->arch.pgfault_hpte[0] ||
hpte[1] != vcpu->arch.pgfault_hpte[1])
return RESUME_GUEST;
@@ -575,16 +598,22 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
*/
if (psize < PAGE_SIZE)
psize = PAGE_SIZE;
- r = (r & ~(HPTE_R_PP0 - psize)) | ((pfn << PAGE_SHIFT) & ~(psize - 1));
+ r = (r & HPTE_R_KEY_HI) | (r & ~(HPTE_R_PP0 - psize)) |
+ ((pfn << PAGE_SHIFT) & ~(psize - 1));
if (hpte_is_writable(r) && !write_ok)
r = hpte_make_readonly(r);
ret = RESUME_GUEST;
preempt_disable();
while (!try_lock_hpte(hptep, HPTE_V_HVLOCK))
cpu_relax();
- if ((be64_to_cpu(hptep[0]) & ~HPTE_V_HVLOCK) != hpte[0] ||
- be64_to_cpu(hptep[1]) != hpte[1] ||
- rev->guest_rpte != hpte[2])
+ hnow_v = be64_to_cpu(hptep[0]);
+ hnow_r = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hnow_v = hpte_new_to_old_v(hnow_v, hnow_r);
+ hnow_r = hpte_new_to_old_r(hnow_r);
+ }
+ if ((hnow_v & ~HPTE_V_HVLOCK) != hpte[0] || hnow_r != hpte[1] ||
+ rev->guest_rpte != hpte[2])
/* HPTE has been changed under us; let the guest retry */
goto out_unlock;
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
@@ -615,6 +644,10 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ r = hpte_old_to_new_r(hpte[0], r);
+ hpte[0] = hpte_old_to_new_v(hpte[0]);
+ }
hptep[1] = cpu_to_be64(r);
eieio();
__unlock_hpte(hptep, hpte[0]);
@@ -758,6 +791,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp,
hpte_rpn(ptel, psize) == gfn) {
hptep[0] |= cpu_to_be64(HPTE_V_ABSENT);
kvmppc_invalidate_hpte(kvm, hptep, i);
+ hptep[1] &= ~cpu_to_be64(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
/* Harvest R and C */
rcbits = be64_to_cpu(hptep[1]) & (HPTE_R_R | HPTE_R_C);
*rmapp |= rcbits << KVMPPC_RMAP_RC_SHIFT;
@@ -1165,7 +1199,7 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
unsigned long *hpte, struct revmap_entry *revp,
int want_valid, int first_pass)
{
- unsigned long v, r;
+ unsigned long v, r, hr;
unsigned long rcbits_unset;
int ok = 1;
int valid, dirty;
@@ -1192,6 +1226,11 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
while (!try_lock_hpte(hptp, HPTE_V_HVLOCK))
cpu_relax();
v = be64_to_cpu(hptp[0]);
+ hr = be64_to_cpu(hptp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, hr);
+ hr = hpte_new_to_old_r(hr);
+ }
/* re-evaluate valid and dirty from synchronized HPTE value */
valid = !!(v & HPTE_V_VALID);
@@ -1199,8 +1238,8 @@ static long record_hpte(unsigned long flags, __be64 *hptp,
/* Harvest R and C into guest view if necessary */
rcbits_unset = ~revp->guest_rpte & (HPTE_R_R | HPTE_R_C);
- if (valid && (rcbits_unset & be64_to_cpu(hptp[1]))) {
- revp->guest_rpte |= (be64_to_cpu(hptp[1]) &
+ if (valid && (rcbits_unset & hr)) {
+ revp->guest_rpte |= (hr &
(HPTE_R_R | HPTE_R_C)) | HPTE_GR_MODIFIED;
dirty = 1;
}
@@ -1608,7 +1647,7 @@ static ssize_t debugfs_htab_read(struct file *file, char __user *buf,
return ret;
}
-ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
+static ssize_t debugfs_htab_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos)
{
return -EACCES;
diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c
index d461c440889a..e4c4ea973e57 100644
--- a/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@ -39,7 +39,6 @@
#include <asm/udbg.h>
#include <asm/iommu.h>
#include <asm/tce.h>
-#include <asm/iommu.h>
#define TCES_PER_PAGE (PAGE_SIZE / sizeof(u64))
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 3686471be32b..8dcbe37a4dac 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -54,6 +54,9 @@
#include <asm/dbell.h>
#include <asm/hmi.h>
#include <asm/pnv-pci.h>
+#include <asm/mmu.h>
+#include <asm/opal.h>
+#include <asm/xics.h>
#include <linux/gfp.h>
#include <linux/vmalloc.h>
#include <linux/highmem.h>
@@ -62,6 +65,7 @@
#include <linux/irqbypass.h>
#include <linux/module.h>
#include <linux/compiler.h>
+#include <linux/of.h>
#include "book3s.h"
@@ -104,23 +108,6 @@ module_param_cb(h_ipi_redirect, &module_param_ops, &h_ipi_redirect,
MODULE_PARM_DESC(h_ipi_redirect, "Redirect H_IPI wakeup to a free host core");
#endif
-/* Maximum halt poll interval defaults to KVM_HALT_POLL_NS_DEFAULT */
-static unsigned int halt_poll_max_ns = KVM_HALT_POLL_NS_DEFAULT;
-module_param(halt_poll_max_ns, uint, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(halt_poll_max_ns, "Maximum halt poll time in ns");
-
-/* Factor by which the vcore halt poll interval is grown, default is to double
- */
-static unsigned int halt_poll_ns_grow = 2;
-module_param(halt_poll_ns_grow, int, S_IRUGO);
-MODULE_PARM_DESC(halt_poll_ns_grow, "Factor halt poll time is grown by");
-
-/* Factor by which the vcore halt poll interval is shrunk, default is to reset
- */
-static unsigned int halt_poll_ns_shrink;
-module_param(halt_poll_ns_shrink, int, S_IRUGO);
-MODULE_PARM_DESC(halt_poll_ns_shrink, "Factor halt poll time is shrunk by");
-
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
@@ -146,12 +133,21 @@ static inline struct kvm_vcpu *next_runnable_thread(struct kvmppc_vcore *vc,
static bool kvmppc_ipi_thread(int cpu)
{
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
+
+ /* On POWER9 we can use msgsnd to IPI any cpu */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ smp_mb();
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return true;
+ }
+
/* On POWER8 for IPIs to threads in the same core, use msgsnd */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
preempt_disable();
if (cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(smp_processor_id())) {
- unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu);
smp_mb();
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
@@ -162,8 +158,12 @@ static bool kvmppc_ipi_thread(int cpu)
}
#if defined(CONFIG_PPC_ICP_NATIVE) && defined(CONFIG_SMP)
- if (cpu >= 0 && cpu < nr_cpu_ids && paca[cpu].kvm_hstate.xics_phys) {
- xics_wake_cpu(cpu);
+ if (cpu >= 0 && cpu < nr_cpu_ids) {
+ if (paca[cpu].kvm_hstate.xics_phys) {
+ xics_wake_cpu(cpu);
+ return true;
+ }
+ opal_int_set_mfrr(get_hard_smp_processor_id(cpu), IPI_PRIORITY);
return true;
}
#endif
@@ -299,41 +299,54 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr)
vcpu->arch.pvr = pvr;
}
+/* Dummy value used in computing PCR value below */
+#define PCR_ARCH_300 (PCR_ARCH_207 << 1)
+
static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat)
{
- unsigned long pcr = 0;
+ unsigned long host_pcr_bit = 0, guest_pcr_bit = 0;
struct kvmppc_vcore *vc = vcpu->arch.vcore;
+ /* We can (emulate) our own architecture version and anything older */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ host_pcr_bit = PCR_ARCH_300;
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ host_pcr_bit = PCR_ARCH_207;
+ else if (cpu_has_feature(CPU_FTR_ARCH_206))
+ host_pcr_bit = PCR_ARCH_206;
+ else
+ host_pcr_bit = PCR_ARCH_205;
+
+ /* Determine lowest PCR bit needed to run guest in given PVR level */
+ guest_pcr_bit = host_pcr_bit;
if (arch_compat) {
switch (arch_compat) {
case PVR_ARCH_205:
- /*
- * If an arch bit is set in PCR, all the defined
- * higher-order arch bits also have to be set.
- */
- pcr = PCR_ARCH_206 | PCR_ARCH_205;
+ guest_pcr_bit = PCR_ARCH_205;
break;
case PVR_ARCH_206:
case PVR_ARCH_206p:
- pcr = PCR_ARCH_206;
+ guest_pcr_bit = PCR_ARCH_206;
break;
case PVR_ARCH_207:
+ guest_pcr_bit = PCR_ARCH_207;
+ break;
+ case PVR_ARCH_300:
+ guest_pcr_bit = PCR_ARCH_300;
break;
default:
return -EINVAL;
}
-
- if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
- /* POWER7 can't emulate POWER8 */
- if (!(pcr & PCR_ARCH_206))
- return -EINVAL;
- pcr &= ~PCR_ARCH_206;
- }
}
+ /* Check requested PCR bits don't exceed our capabilities */
+ if (guest_pcr_bit > host_pcr_bit)
+ return -EINVAL;
+
spin_lock(&vc->lock);
vc->arch_compat = arch_compat;
- vc->pcr = pcr;
+ /* Set all PCR bits for which guest_pcr_bit <= bit < host_pcr_bit */
+ vc->pcr = host_pcr_bit - guest_pcr_bit;
spin_unlock(&vc->lock);
return 0;
@@ -945,6 +958,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu,
break;
case BOOK3S_INTERRUPT_EXTERNAL:
case BOOK3S_INTERRUPT_H_DOORBELL:
+ case BOOK3S_INTERRUPT_H_VIRT:
vcpu->stat.ext_intr_exits++;
r = RESUME_GUEST;
break;
@@ -1229,6 +1243,12 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_WORT:
*val = get_reg_val(id, vcpu->arch.wort);
break;
+ case KVM_REG_PPC_TIDR:
+ *val = get_reg_val(id, vcpu->arch.tid);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ *val = get_reg_val(id, vcpu->arch.psscr);
+ break;
case KVM_REG_PPC_VPA_ADDR:
spin_lock(&vcpu->arch.vpa_update_lock);
*val = get_reg_val(id, vcpu->arch.vpa.next_gpa);
@@ -1288,6 +1308,9 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
*val = get_reg_val(id, vcpu->arch.cr_tm);
break;
+ case KVM_REG_PPC_TM_XER:
+ *val = get_reg_val(id, vcpu->arch.xer_tm);
+ break;
case KVM_REG_PPC_TM_LR:
*val = get_reg_val(id, vcpu->arch.lr_tm);
break;
@@ -1427,6 +1450,12 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_WORT:
vcpu->arch.wort = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_TIDR:
+ vcpu->arch.tid = set_reg_val(id, *val);
+ break;
+ case KVM_REG_PPC_PSSCR:
+ vcpu->arch.psscr = set_reg_val(id, *val) & PSSCR_GUEST_VIS;
+ break;
case KVM_REG_PPC_VPA_ADDR:
addr = set_reg_val(id, *val);
r = -EINVAL;
@@ -1498,6 +1527,9 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
case KVM_REG_PPC_TM_CR:
vcpu->arch.cr_tm = set_reg_val(id, *val);
break;
+ case KVM_REG_PPC_TM_XER:
+ vcpu->arch.xer_tm = set_reg_val(id, *val);
+ break;
case KVM_REG_PPC_TM_LR:
vcpu->arch.lr_tm = set_reg_val(id, *val);
break;
@@ -1540,6 +1572,20 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
return r;
}
+/*
+ * On POWER9, threads are independent and can be in different partitions.
+ * Therefore we consider each thread to be a subcore.
+ * There is a restriction that all threads have to be in the same
+ * MMU mode (radix or HPT), unfortunately, but since we only support
+ * HPT guests on a HPT host so far, that isn't an impediment yet.
+ */
+static int threads_per_vcore(void)
+{
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return 1;
+ return threads_per_subcore;
+}
+
static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
{
struct kvmppc_vcore *vcore;
@@ -1554,7 +1600,7 @@ static struct kvmppc_vcore *kvmppc_vcore_create(struct kvm *kvm, int core)
init_swait_queue_head(&vcore->wq);
vcore->preempt_tb = TB_NIL;
vcore->lpcr = kvm->arch.lpcr;
- vcore->first_vcpuid = core * threads_per_subcore;
+ vcore->first_vcpuid = core * threads_per_vcore();
vcore->kvm = kvm;
INIT_LIST_HEAD(&vcore->preempt_list);
@@ -1717,7 +1763,7 @@ static struct kvm_vcpu *kvmppc_core_vcpu_create_hv(struct kvm *kvm,
int core;
struct kvmppc_vcore *vcore;
- core = id / threads_per_subcore;
+ core = id / threads_per_vcore();
if (core >= KVM_MAX_VCORES)
goto out;
@@ -1935,7 +1981,10 @@ static void kvmppc_wait_for_nap(void)
{
int cpu = smp_processor_id();
int i, loops;
+ int n_threads = threads_per_vcore();
+ if (n_threads <= 1)
+ return;
for (loops = 0; loops < 1000000; ++loops) {
/*
* Check if all threads are finished.
@@ -1943,17 +1992,17 @@ static void kvmppc_wait_for_nap(void)
* and the thread clears it when finished, so we look
* for any threads that still have a non-NULL vcore ptr.
*/
- for (i = 1; i < threads_per_subcore; ++i)
+ for (i = 1; i < n_threads; ++i)
if (paca[cpu + i].kvm_hstate.kvm_vcore)
break;
- if (i == threads_per_subcore) {
+ if (i == n_threads) {
HMT_medium();
return;
}
HMT_low();
}
HMT_medium();
- for (i = 1; i < threads_per_subcore; ++i)
+ for (i = 1; i < n_threads; ++i)
if (paca[cpu + i].kvm_hstate.kvm_vcore)
pr_err("KVM: CPU %d seems to be stuck\n", cpu + i);
}
@@ -2019,7 +2068,7 @@ static void kvmppc_vcore_preempt(struct kvmppc_vcore *vc)
vc->vcore_state = VCORE_PREEMPT;
vc->pcpu = smp_processor_id();
- if (vc->num_threads < threads_per_subcore) {
+ if (vc->num_threads < threads_per_vcore()) {
spin_lock(&lp->lock);
list_add_tail(&vc->preempt_list, &lp->list);
spin_unlock(&lp->lock);
@@ -2123,8 +2172,7 @@ static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip)
cip->subcore_threads[sub] = vc->num_threads;
cip->subcore_vm[sub] = vc->kvm;
init_master_vcore(vc);
- list_del(&vc->preempt_list);
- list_add_tail(&vc->preempt_list, &cip->vcs[sub]);
+ list_move_tail(&vc->preempt_list, &cip->vcs[sub]);
return true;
}
@@ -2254,12 +2302,12 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
* enter the guest. Only do this if it is the primary thread of the
* core (not if a subcore) that is entering the guest.
*/
-static inline void kvmppc_clear_host_core(int cpu)
+static inline int kvmppc_clear_host_core(unsigned int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
- return;
+ return 0;
/*
* Memory barrier can be omitted here as we will do a smp_wmb()
* later in kvmppc_start_thread and we need ensure that state is
@@ -2267,6 +2315,7 @@ static inline void kvmppc_clear_host_core(int cpu)
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 0;
+ return 0;
}
/*
@@ -2274,12 +2323,12 @@ static inline void kvmppc_clear_host_core(int cpu)
* Only need to do this if it is the primary thread of the core that is
* exiting.
*/
-static inline void kvmppc_set_host_core(int cpu)
+static inline int kvmppc_set_host_core(unsigned int cpu)
{
int core;
if (!kvmppc_host_rm_ops_hv || cpu_thread_in_core(cpu))
- return;
+ return 0;
/*
* Memory barrier can be omitted here because we do a spin_unlock
@@ -2287,6 +2336,7 @@ static inline void kvmppc_set_host_core(int cpu)
*/
core = cpu >> threads_shift;
kvmppc_host_rm_ops_hv->rm_core[core].rm_state.in_host = 1;
+ return 0;
}
/*
@@ -2307,6 +2357,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
unsigned long cmd_bit, stat_bit;
int pcpu, thr;
int target_threads;
+ int controlled_threads;
/*
* Remove from the list any threads that have a signal pending
@@ -2325,11 +2376,18 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
vc->preempt_tb = TB_NIL;
/*
+ * Number of threads that we will be controlling: the same as
+ * the number of threads per subcore, except on POWER9,
+ * where it's 1 because the threads are (mostly) independent.
+ */
+ controlled_threads = threads_per_vcore();
+
+ /*
* Make sure we are running on primary threads, and that secondary
* threads are offline. Also check if the number of threads in this
* guest are greater than the current system threads per guest.
*/
- if ((threads_per_core > 1) &&
+ if ((controlled_threads > 1) &&
((vc->num_threads > threads_per_subcore) || !on_primary_thread())) {
for_each_runnable_thread(i, vcpu, vc) {
vcpu->arch.ret = -EBUSY;
@@ -2345,7 +2403,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
*/
init_core_info(&core_info, vc);
pcpu = smp_processor_id();
- target_threads = threads_per_subcore;
+ target_threads = controlled_threads;
if (target_smt_mode && target_smt_mode < target_threads)
target_threads = target_smt_mode;
if (vc->num_threads < target_threads)
@@ -2381,7 +2439,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
smp_wmb();
}
pcpu = smp_processor_id();
- for (thr = 0; thr < threads_per_subcore; ++thr)
+ for (thr = 0; thr < controlled_threads; ++thr)
paca[pcpu + thr].kvm_hstate.kvm_split_mode = sip;
/* Initiate micro-threading (split-core) if required */
@@ -2491,7 +2549,7 @@ static noinline void kvmppc_run_core(struct kvmppc_vcore *vc)
}
/* Let secondaries go back to the offline loop */
- for (i = 0; i < threads_per_subcore; ++i) {
+ for (i = 0; i < controlled_threads; ++i) {
kvmppc_release_hwthread(pcpu + i);
if (sip && sip->napped[i])
kvmppc_ipi_thread(pcpu + i);
@@ -2543,9 +2601,6 @@ static void grow_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns = 10000;
else
vc->halt_poll_ns *= halt_poll_ns_grow;
-
- if (vc->halt_poll_ns > halt_poll_max_ns)
- vc->halt_poll_ns = halt_poll_max_ns;
}
static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
@@ -2556,7 +2611,8 @@ static void shrink_halt_poll_ns(struct kvmppc_vcore *vc)
vc->halt_poll_ns /= halt_poll_ns_shrink;
}
-/* Check to see if any of the runnable vcpus on the vcore have pending
+/*
+ * Check to see if any of the runnable vcpus on the vcore have pending
* exceptions or are no longer ceded
*/
static int kvmppc_vcore_check_block(struct kvmppc_vcore *vc)
@@ -2655,16 +2711,18 @@ out:
}
/* Adjust poll time */
- if (halt_poll_max_ns) {
+ if (halt_poll_ns) {
if (block_ns <= vc->halt_poll_ns)
;
/* We slept and blocked for longer than the max halt time */
- else if (vc->halt_poll_ns && block_ns > halt_poll_max_ns)
+ else if (vc->halt_poll_ns && block_ns > halt_poll_ns)
shrink_halt_poll_ns(vc);
/* We slept and our poll time is too small */
- else if (vc->halt_poll_ns < halt_poll_max_ns &&
- block_ns < halt_poll_max_ns)
+ else if (vc->halt_poll_ns < halt_poll_ns &&
+ block_ns < halt_poll_ns)
grow_halt_poll_ns(vc);
+ if (vc->halt_poll_ns > halt_poll_ns)
+ vc->halt_poll_ns = halt_poll_ns;
} else
vc->halt_poll_ns = 0;
@@ -2971,6 +3029,15 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm,
struct kvm_memslots *slots;
struct kvm_memory_slot *memslot;
+ /*
+ * If we are making a new memslot, it might make
+ * some address that was previously cached as emulated
+ * MMIO be no longer emulated MMIO, so invalidate
+ * all the caches of emulated MMIO translations.
+ */
+ if (npages)
+ atomic64_inc(&kvm->arch.mmio_update);
+
if (npages && old->npages) {
/*
* If modifying a memslot, reset all the rmap dirty bits.
@@ -3015,6 +3082,22 @@ static void kvmppc_mmu_destroy_hv(struct kvm_vcpu *vcpu)
return;
}
+static void kvmppc_setup_partition_table(struct kvm *kvm)
+{
+ unsigned long dw0, dw1;
+
+ /* PS field - page size for VRMA */
+ dw0 = ((kvm->arch.vrma_slb_v & SLB_VSID_L) >> 1) |
+ ((kvm->arch.vrma_slb_v & SLB_VSID_LP) << 1);
+ /* HTABSIZE and HTABORG fields */
+ dw0 |= kvm->arch.sdr1;
+
+ /* Second dword has GR=0; other fields are unused since UPRT=0 */
+ dw1 = 0;
+
+ mmu_partition_table_set_entry(kvm->arch.lpid, dw0, dw1);
+}
+
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
@@ -3066,17 +3149,20 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
psize == 0x1000000))
goto out_srcu;
- /* Update VRMASD field in the LPCR */
senc = slb_pgsize_encoding(psize);
kvm->arch.vrma_slb_v = senc | SLB_VSID_B_1T |
(VRMA_VSID << SLB_VSID_SHIFT_1T);
- /* the -4 is to account for senc values starting at 0x10 */
- lpcr = senc << (LPCR_VRMASD_SH - 4);
-
/* Create HPTEs in the hash page table for the VRMA */
kvmppc_map_vrma(vcpu, memslot, porder);
- kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ /* Update VRMASD field in the LPCR */
+ if (!cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /* the -4 is to account for senc values starting at 0x10 */
+ lpcr = senc << (LPCR_VRMASD_SH - 4);
+ kvmppc_update_lpcr(kvm, lpcr, LPCR_VRMASD);
+ } else {
+ kvmppc_setup_partition_table(kvm);
+ }
/* Order updates to kvm->arch.lpcr etc. vs. hpte_setup_done */
smp_wmb();
@@ -3094,36 +3180,6 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
}
#ifdef CONFIG_KVM_XICS
-static int kvmppc_cpu_notify(struct notifier_block *self, unsigned long action,
- void *hcpu)
-{
- unsigned long cpu = (long)hcpu;
-
- switch (action) {
- case CPU_UP_PREPARE:
- case CPU_UP_PREPARE_FROZEN:
- kvmppc_set_host_core(cpu);
- break;
-
-#ifdef CONFIG_HOTPLUG_CPU
- case CPU_DEAD:
- case CPU_DEAD_FROZEN:
- case CPU_UP_CANCELED:
- case CPU_UP_CANCELED_FROZEN:
- kvmppc_clear_host_core(cpu);
- break;
-#endif
- default:
- break;
- }
-
- return NOTIFY_OK;
-}
-
-static struct notifier_block kvmppc_cpu_notifier = {
- .notifier_call = kvmppc_cpu_notify,
-};
-
/*
* Allocate a per-core structure for managing state about which cores are
* running in the host versus the guest and for exchanging data between
@@ -3185,15 +3241,17 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
- register_cpu_notifier(&kvmppc_cpu_notifier);
-
+ cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
+ "ppc/kvm_book3s:prepare",
+ kvmppc_set_host_core,
+ kvmppc_clear_host_core);
put_online_cpus();
}
void kvmppc_free_host_rm_ops(void)
{
if (kvmppc_host_rm_ops_hv) {
- unregister_cpu_notifier(&kvmppc_cpu_notifier);
+ cpuhp_remove_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE);
kfree(kvmppc_host_rm_ops_hv->rm_core);
kfree(kvmppc_host_rm_ops_hv);
kvmppc_host_rm_ops_hv = NULL;
@@ -3219,14 +3277,18 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
* Since we don't flush the TLB when tearing down a VM,
* and this lpid might have previously been used,
* make sure we flush on each core before running the new VM.
+ * On POWER9, the tlbie in mmu_partition_table_set_entry()
+ * does this flush for us.
*/
- cpumask_setall(&kvm->arch.need_tlb_flush);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ cpumask_setall(&kvm->arch.need_tlb_flush);
/* Start out with the default set of hcalls enabled */
memcpy(kvm->arch.enabled_hcalls, default_enabled_hcalls,
sizeof(kvm->arch.enabled_hcalls));
- kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
+ if (!cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.host_sdr1 = mfspr(SPRN_SDR1);
/* Init LPCR for virtual RMA mode */
kvm->arch.host_lpid = mfspr(SPRN_LPID);
@@ -3239,9 +3301,29 @@ static int kvmppc_core_init_vm_hv(struct kvm *kvm)
/* On POWER8 turn on online bit to enable PURR/SPURR */
if (cpu_has_feature(CPU_FTR_ARCH_207S))
lpcr |= LPCR_ONL;
+ /*
+ * On POWER9, VPM0 bit is reserved (VPM0=1 behaviour is assumed)
+ * Set HVICE bit to enable hypervisor virtualization interrupts.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ lpcr &= ~LPCR_VPM0;
+ lpcr |= LPCR_HVICE;
+ }
+
kvm->arch.lpcr = lpcr;
/*
+ * Work out how many sets the TLB has, for the use of
+ * the TLB invalidation loop in book3s_hv_rmhandlers.S.
+ */
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ kvm->arch.tlb_sets = POWER9_TLB_SETS_HASH; /* 256 */
+ else if (cpu_has_feature(CPU_FTR_ARCH_207S))
+ kvm->arch.tlb_sets = POWER8_TLB_SETS; /* 512 */
+ else
+ kvm->arch.tlb_sets = POWER7_TLB_SETS; /* 128 */
+
+ /*
* Track that we now have a HV mode VM active. This blocks secondary
* CPU threads from coming online.
*/
@@ -3305,9 +3387,9 @@ static int kvmppc_core_check_processor_compat_hv(void)
!cpu_has_feature(CPU_FTR_ARCH_206))
return -EIO;
/*
- * Disable KVM for Power9, untill the required bits merged.
+ * Disable KVM for Power9 in radix mode.
*/
- if (cpu_has_feature(CPU_FTR_ARCH_300))
+ if (cpu_has_feature(CPU_FTR_ARCH_300) && radix_enabled())
return -EIO;
return 0;
@@ -3661,6 +3743,23 @@ static int kvmppc_book3s_init_hv(void)
if (r)
return r;
+ /*
+ * We need a way of accessing the XICS interrupt controller,
+ * either directly, via paca[cpu].kvm_hstate.xics_phys, or
+ * indirectly, via OPAL.
+ */
+#ifdef CONFIG_SMP
+ if (!get_paca()->kvm_hstate.xics_phys) {
+ struct device_node *np;
+
+ np = of_find_compatible_node(NULL, NULL, "ibm,opal-intc");
+ if (!np) {
+ pr_err("KVM-HV: Cannot determine method for accessing XICS\n");
+ return -ENODEV;
+ }
+ }
+#endif
+
kvm_ops_hv.owner = THIS_MODULE;
kvmppc_hv_ops = &kvm_ops_hv;
@@ -3683,3 +3782,4 @@ module_exit(kvmppc_book3s_exit_hv);
MODULE_LICENSE("GPL");
MODULE_ALIAS_MISCDEV(KVM_MINOR);
MODULE_ALIAS("devname:kvm");
+
diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 0c84d6bc8356..5bb24be0b346 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -26,6 +26,8 @@
#include <asm/dbell.h>
#include <asm/cputhreads.h>
#include <asm/io.h>
+#include <asm/opal.h>
+#include <asm/smp.h>
#define KVM_CMA_CHUNK_ORDER 18
@@ -205,12 +207,18 @@ static inline void rm_writeb(unsigned long paddr, u8 val)
void kvmhv_rm_send_ipi(int cpu)
{
unsigned long xics_phys;
+ unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
- /* On POWER8 for IPIs to threads in the same core, use msgsnd */
+ /* On POWER9 we can use msgsnd for any destination cpu. */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ msg |= get_hard_smp_processor_id(cpu);
+ __asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
+ return;
+ }
+ /* On POWER8 for IPIs to threads in the same core, use msgsnd. */
if (cpu_has_feature(CPU_FTR_ARCH_207S) &&
cpu_first_thread_sibling(cpu) ==
cpu_first_thread_sibling(raw_smp_processor_id())) {
- unsigned long msg = PPC_DBELL_TYPE(PPC_DBELL_SERVER);
msg |= cpu_thread_in_core(cpu);
__asm__ __volatile__ (PPC_MSGSND(%0) : : "r" (msg));
return;
@@ -218,7 +226,11 @@ void kvmhv_rm_send_ipi(int cpu)
/* Else poke the target with an IPI */
xics_phys = paca[cpu].kvm_hstate.xics_phys;
- rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ if (xics_phys)
+ rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ else
+ opal_rm_int_set_mfrr(get_hard_smp_processor_id(cpu),
+ IPI_PRIORITY);
}
/*
@@ -329,7 +341,7 @@ static struct kvmppc_irq_map *get_irqmap(struct kvmppc_passthru_irqmap *pimap,
* saved a copy of the XIRR in the PACA, it will be picked up by
* the host ICP driver.
*/
-static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+static int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{
struct kvmppc_passthru_irqmap *pimap;
struct kvmppc_irq_map *irq_map;
@@ -348,11 +360,11 @@ static int kvmppc_check_passthru(u32 xisr, __be32 xirr)
/* We're handling this interrupt, generic code doesn't need to */
local_paca->kvm_hstate.saved_xirr = 0;
- return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap);
+ return kvmppc_deliver_irq_passthru(vcpu, xirr, irq_map, pimap, again);
}
#else
-static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
+static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr, bool *again)
{
return 1;
}
@@ -367,14 +379,31 @@ static inline int kvmppc_check_passthru(u32 xisr, __be32 xirr)
* -1 if there was a guest wakeup IPI (which has now been cleared)
* -2 if there is PCI passthrough external interrupt that was handled
*/
+static long kvmppc_read_one_intr(bool *again);
long kvmppc_read_intr(void)
{
+ long ret = 0;
+ long rc;
+ bool again;
+
+ do {
+ again = false;
+ rc = kvmppc_read_one_intr(&again);
+ if (rc && (ret == 0 || rc > ret))
+ ret = rc;
+ } while (again);
+ return ret;
+}
+
+static long kvmppc_read_one_intr(bool *again)
+{
unsigned long xics_phys;
u32 h_xirr;
__be32 xirr;
u32 xisr;
u8 host_ipi;
+ int64_t rc;
/* see if a host IPI is pending */
host_ipi = local_paca->kvm_hstate.host_ipi;
@@ -383,8 +412,14 @@ long kvmppc_read_intr(void)
/* Now read the interrupt from the ICP */
xics_phys = local_paca->kvm_hstate.xics_phys;
- if (unlikely(!xics_phys))
- return 1;
+ if (!xics_phys) {
+ /* Use OPAL to read the XIRR */
+ rc = opal_rm_int_get_xirr(&xirr, false);
+ if (rc < 0)
+ return 1;
+ } else {
+ xirr = _lwzcix(xics_phys + XICS_XIRR);
+ }
/*
* Save XIRR for later. Since we get control in reverse endian
@@ -392,7 +427,6 @@ long kvmppc_read_intr(void)
* host endian. Note that xirr is the value read from the
* XIRR register, while h_xirr is the host endian version.
*/
- xirr = _lwzcix(xics_phys + XICS_XIRR);
h_xirr = be32_to_cpu(xirr);
local_paca->kvm_hstate.saved_xirr = h_xirr;
xisr = h_xirr & 0xffffff;
@@ -411,8 +445,16 @@ long kvmppc_read_intr(void)
* If it is an IPI, clear the MFRR and EOI it.
*/
if (xisr == XICS_IPI) {
- _stbcix(xics_phys + XICS_MFRR, 0xff);
- _stwcix(xics_phys + XICS_XIRR, xirr);
+ if (xics_phys) {
+ _stbcix(xics_phys + XICS_MFRR, 0xff);
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+ } else {
+ opal_rm_int_set_mfrr(hard_smp_processor_id(), 0xff);
+ rc = opal_rm_int_eoi(h_xirr);
+ /* If rc > 0, there is another interrupt pending */
+ *again = rc > 0;
+ }
+
/*
* Need to ensure side effects of above stores
* complete before proceeding.
@@ -429,7 +471,11 @@ long kvmppc_read_intr(void)
/* We raced with the host,
* we need to resend that IPI, bummer
*/
- _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ if (xics_phys)
+ _stbcix(xics_phys + XICS_MFRR, IPI_PRIORITY);
+ else
+ opal_rm_int_set_mfrr(hard_smp_processor_id(),
+ IPI_PRIORITY);
/* Let side effects complete */
smp_mb();
return 1;
@@ -440,5 +486,5 @@ long kvmppc_read_intr(void)
return -1;
}
- return kvmppc_check_passthru(xisr, xirr);
+ return kvmppc_check_passthru(xisr, xirr, again);
}
diff --git a/arch/powerpc/kvm/book3s_hv_ras.c b/arch/powerpc/kvm/book3s_hv_ras.c
index 0fa70a9618d7..7ef0993214f3 100644
--- a/arch/powerpc/kvm/book3s_hv_ras.c
+++ b/arch/powerpc/kvm/book3s_hv_ras.c
@@ -16,6 +16,7 @@
#include <asm/machdep.h>
#include <asm/cputhreads.h>
#include <asm/hmi.h>
+#include <asm/kvm_ppc.h>
/* SRR1 bits for machine check on POWER7 */
#define SRR1_MC_LDSTERR (1ul << (63-42))
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 99b4e9d5dd23..9ef3c4be952f 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -264,8 +264,10 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
if (pa)
pteh |= HPTE_V_VALID;
- else
+ else {
pteh |= HPTE_V_ABSENT;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
+ }
/*If we had host pte mapping then Check WIMG */
if (ptep && !hpte_cache_flags_ok(ptel, is_ci)) {
@@ -351,6 +353,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
/* inval in progress, write a non-present HPTE */
pteh |= HPTE_V_ABSENT;
pteh &= ~HPTE_V_VALID;
+ ptel &= ~(HPTE_R_KEY_HI | HPTE_R_KEY_LO);
unlock_rmap(rmap);
} else {
kvmppc_add_revmap_chain(kvm, rev, rmap, pte_index,
@@ -361,6 +364,11 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags,
}
}
+ /* Convert to new format on P9 */
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ ptel = hpte_old_to_new_r(pteh, ptel);
+ pteh = hpte_old_to_new_v(pteh);
+ }
hpte[1] = cpu_to_be64(ptel);
/* Write the first HPTE dword, unlocking the HPTE and making it valid */
@@ -386,6 +394,13 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
#define LOCK_TOKEN (*(u32 *)(&get_paca()->paca_index))
#endif
+static inline int is_mmio_hpte(unsigned long v, unsigned long r)
+{
+ return ((v & HPTE_V_ABSENT) &&
+ (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO));
+}
+
static inline int try_lock_tlbie(unsigned int *lock)
{
unsigned int tmp, old;
@@ -409,13 +424,18 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
{
long i;
+ /*
+ * We use the POWER9 5-operand versions of tlbie and tlbiel here.
+ * Since we are using RIC=0 PRS=0 R=0, and P7/P8 tlbiel ignores
+ * the RS field, this is backwards-compatible with P7 and P8.
+ */
if (global) {
while (!try_lock_tlbie(&kvm->arch.tlbie_lock))
cpu_relax();
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
- asm volatile(PPC_TLBIE(%1,%0) : :
+ asm volatile(PPC_TLBIE_5(%0,%1,0,0,0) : :
"r" (rbvalues[i]), "r" (kvm->arch.lpid));
asm volatile("eieio; tlbsync; ptesync" : : : "memory");
kvm->arch.tlbie_lock = 0;
@@ -423,7 +443,8 @@ static void do_tlbies(struct kvm *kvm, unsigned long *rbvalues,
if (need_sync)
asm volatile("ptesync" : : : "memory");
for (i = 0; i < npages; ++i)
- asm volatile("tlbiel %0" : : "r" (rbvalues[i]));
+ asm volatile(PPC_TLBIEL(%0,%1,0,0,0) : :
+ "r" (rbvalues[i]), "r" (0));
asm volatile("ptesync" : : : "memory");
}
}
@@ -435,18 +456,23 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
__be64 *hpte;
unsigned long v, r, rb;
struct revmap_entry *rev;
- u64 pte;
+ u64 pte, orig_pte, pte_r;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
+ pte = orig_pte = be64_to_cpu(hpte[0]);
+ pte_r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ pte = hpte_new_to_old_v(pte, pte_r);
+ pte_r = hpte_new_to_old_r(pte_r);
+ }
if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
((flags & H_AVPN) && (pte & ~0x7fUL) != avpn) ||
((flags & H_ANDCOND) && (pte & avpn) != 0)) {
- __unlock_hpte(hpte, pte);
+ __unlock_hpte(hpte, orig_pte);
return H_NOT_FOUND;
}
@@ -454,7 +480,7 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
v = pte & ~HPTE_V_HVLOCK;
if (v & HPTE_V_VALID) {
hpte[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(v, be64_to_cpu(hpte[1]), pte_index);
+ rb = compute_tlbie_rb(v, pte_r, pte_index);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags), true);
/*
* The reference (R) and change (C) bits in a HPT
@@ -472,6 +498,9 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags,
note_hpte_modification(kvm, rev);
unlock_hpte(hpte, 0);
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
if (v & HPTE_V_ABSENT)
v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID;
hpret[0] = v;
@@ -498,7 +527,7 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
int global;
long int ret = H_SUCCESS;
struct revmap_entry *rev, *revs[4];
- u64 hp0;
+ u64 hp0, hp1;
global = global_invalidates(kvm, 0);
for (i = 0; i < 4 && ret == H_SUCCESS; ) {
@@ -531,6 +560,11 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
}
found = 0;
hp0 = be64_to_cpu(hp[0]);
+ hp1 = be64_to_cpu(hp[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
if (hp0 & (HPTE_V_ABSENT | HPTE_V_VALID)) {
switch (flags & 3) {
case 0: /* absolute */
@@ -561,13 +595,14 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
rcbits = rev->guest_rpte & (HPTE_R_R|HPTE_R_C);
args[j] |= rcbits << (56 - 5);
hp[0] = 0;
+ if (is_mmio_hpte(hp0, hp1))
+ atomic64_inc(&kvm->arch.mmio_update);
continue;
}
/* leave it locked */
hp[0] &= ~cpu_to_be64(HPTE_V_VALID);
- tlbrb[n] = compute_tlbie_rb(be64_to_cpu(hp[0]),
- be64_to_cpu(hp[1]), pte_index);
+ tlbrb[n] = compute_tlbie_rb(hp0, hp1, pte_index);
indexes[n] = j;
hptes[n] = hp;
revs[n] = rev;
@@ -605,7 +640,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
__be64 *hpte;
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
- u64 pte;
+ u64 pte_v, pte_r;
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
@@ -613,14 +648,16 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
cpu_relax();
- pte = be64_to_cpu(hpte[0]);
- if ((pte & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
- ((flags & H_AVPN) && (pte & ~0x7fUL) != avpn)) {
- __unlock_hpte(hpte, pte);
+ v = pte_v = be64_to_cpu(hpte[0]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[1]));
+ if ((v & (HPTE_V_ABSENT | HPTE_V_VALID)) == 0 ||
+ ((flags & H_AVPN) && (v & ~0x7fUL) != avpn)) {
+ __unlock_hpte(hpte, pte_v);
return H_NOT_FOUND;
}
- v = pte;
+ pte_r = be64_to_cpu(hpte[1]);
bits = (flags << 55) & HPTE_R_PP0;
bits |= (flags << 48) & HPTE_R_KEY_HI;
bits |= flags & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO);
@@ -642,22 +679,26 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
* readonly to writable. If it should be writable, we'll
* take a trap and let the page fault code sort it out.
*/
- pte = be64_to_cpu(hpte[1]);
- r = (pte & ~mask) | bits;
- if (hpte_is_writable(r) && !hpte_is_writable(pte))
+ r = (pte_r & ~mask) | bits;
+ if (hpte_is_writable(r) && !hpte_is_writable(pte_r))
r = hpte_make_readonly(r);
/* If the PTE is changing, invalidate it first */
- if (r != pte) {
+ if (r != pte_r) {
rb = compute_tlbie_rb(v, r, pte_index);
- hpte[0] = cpu_to_be64((v & ~HPTE_V_VALID) |
+ hpte[0] = cpu_to_be64((pte_v & ~HPTE_V_VALID) |
HPTE_V_ABSENT);
do_tlbies(kvm, &rb, 1, global_invalidates(kvm, flags),
true);
+ /* Don't lose R/C bit updates done by hardware */
+ r |= be64_to_cpu(hpte[1]) & (HPTE_R_R | HPTE_R_C);
hpte[1] = cpu_to_be64(r);
}
}
- unlock_hpte(hpte, v & ~HPTE_V_HVLOCK);
+ unlock_hpte(hpte, pte_v & ~HPTE_V_HVLOCK);
asm volatile("ptesync" : : : "memory");
+ if (is_mmio_hpte(v, pte_r))
+ atomic64_inc(&kvm->arch.mmio_update);
+
return H_SUCCESS;
}
@@ -681,6 +722,10 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
hpte = (__be64 *)(kvm->arch.hpt_virt + (pte_index << 4));
v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
if (v & HPTE_V_ABSENT) {
v &= ~HPTE_V_ABSENT;
v |= HPTE_V_VALID;
@@ -798,10 +843,16 @@ void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep,
unsigned long pte_index)
{
unsigned long rb;
+ u64 hp0, hp1;
hptep[0] &= ~cpu_to_be64(HPTE_V_VALID);
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
do_tlbies(kvm, &rb, 1, 1, true);
}
EXPORT_SYMBOL_GPL(kvmppc_invalidate_hpte);
@@ -811,9 +862,15 @@ void kvmppc_clear_ref_hpte(struct kvm *kvm, __be64 *hptep,
{
unsigned long rb;
unsigned char rbyte;
+ u64 hp0, hp1;
- rb = compute_tlbie_rb(be64_to_cpu(hptep[0]), be64_to_cpu(hptep[1]),
- pte_index);
+ hp0 = be64_to_cpu(hptep[0]);
+ hp1 = be64_to_cpu(hptep[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hp0 = hpte_new_to_old_v(hp0, hp1);
+ hp1 = hpte_new_to_old_r(hp1);
+ }
+ rb = compute_tlbie_rb(hp0, hp1, pte_index);
rbyte = (be64_to_cpu(hptep[1]) & ~HPTE_R_R) >> 8;
/* modify only the second-last byte, which contains the ref bit */
*((char *)hptep + 14) = rbyte;
@@ -828,6 +885,37 @@ static int slb_base_page_shift[4] = {
20, /* 1M, unsupported */
};
+static struct mmio_hpte_cache_entry *mmio_cache_search(struct kvm_vcpu *vcpu,
+ unsigned long eaddr, unsigned long slb_v, long mmio_update)
+{
+ struct mmio_hpte_cache_entry *entry = NULL;
+ unsigned int pshift;
+ unsigned int i;
+
+ for (i = 0; i < MMIO_HPTE_CACHE_SIZE; i++) {
+ entry = &vcpu->arch.mmio_cache.entry[i];
+ if (entry->mmio_update == mmio_update) {
+ pshift = entry->slb_base_pshift;
+ if ((entry->eaddr >> pshift) == (eaddr >> pshift) &&
+ entry->slb_v == slb_v)
+ return entry;
+ }
+ }
+ return NULL;
+}
+
+static struct mmio_hpte_cache_entry *
+ next_mmio_cache_entry(struct kvm_vcpu *vcpu)
+{
+ unsigned int index = vcpu->arch.mmio_cache.index;
+
+ vcpu->arch.mmio_cache.index++;
+ if (vcpu->arch.mmio_cache.index == MMIO_HPTE_CACHE_SIZE)
+ vcpu->arch.mmio_cache.index = 0;
+
+ return &vcpu->arch.mmio_cache.entry[index];
+}
+
/* When called from virtmode, this func should be protected by
* preempt_disable(), otherwise, the holding of HPTE_V_HVLOCK
* can trigger deadlock issue.
@@ -842,7 +930,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
unsigned long avpn;
__be64 *hpte;
unsigned long mask, val;
- unsigned long v, r;
+ unsigned long v, r, orig_v;
/* Get page shift, work out hash and AVPN etc. */
mask = SLB_VSID_B | HPTE_V_AVPN | HPTE_V_SECONDARY;
@@ -877,6 +965,8 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
for (i = 0; i < 16; i += 2) {
/* Read the PTE racily */
v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ v = hpte_new_to_old_v(v, be64_to_cpu(hpte[i+1]));
/* Check valid/absent, hash, segment size and AVPN */
if (!(v & valid) || (v & mask) != val)
@@ -885,8 +975,12 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Lock the PTE and read it under the lock */
while (!try_lock_hpte(&hpte[i], HPTE_V_HVLOCK))
cpu_relax();
- v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
+ v = orig_v = be64_to_cpu(hpte[i]) & ~HPTE_V_HVLOCK;
r = be64_to_cpu(hpte[i+1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
/*
* Check the HPTE again, including base page size
@@ -896,7 +990,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
/* Return with the HPTE still locked */
return (hash << 3) + (i >> 1);
- __unlock_hpte(&hpte[i], v);
+ __unlock_hpte(&hpte[i], orig_v);
}
if (val & HPTE_V_SECONDARY)
@@ -924,30 +1018,45 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
{
struct kvm *kvm = vcpu->kvm;
long int index;
- unsigned long v, r, gr;
+ unsigned long v, r, gr, orig_v;
__be64 *hpte;
unsigned long valid;
struct revmap_entry *rev;
unsigned long pp, key;
+ struct mmio_hpte_cache_entry *cache_entry = NULL;
+ long mmio_update = 0;
/* For protection fault, expect to find a valid HPTE */
valid = HPTE_V_VALID;
- if (status & DSISR_NOHPTE)
+ if (status & DSISR_NOHPTE) {
valid |= HPTE_V_ABSENT;
-
- index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
- if (index < 0) {
- if (status & DSISR_NOHPTE)
- return status; /* there really was no HPTE */
- return 0; /* for prot fault, HPTE disappeared */
+ mmio_update = atomic64_read(&kvm->arch.mmio_update);
+ cache_entry = mmio_cache_search(vcpu, addr, slb_v, mmio_update);
}
- hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
- v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
- r = be64_to_cpu(hpte[1]);
- rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
- gr = rev->guest_rpte;
+ if (cache_entry) {
+ index = cache_entry->pte_index;
+ v = cache_entry->hpte_v;
+ r = cache_entry->hpte_r;
+ gr = cache_entry->rpte;
+ } else {
+ index = kvmppc_hv_find_lock_hpte(kvm, addr, slb_v, valid);
+ if (index < 0) {
+ if (status & DSISR_NOHPTE)
+ return status; /* there really was no HPTE */
+ return 0; /* for prot fault, HPTE disappeared */
+ }
+ hpte = (__be64 *)(kvm->arch.hpt_virt + (index << 4));
+ v = orig_v = be64_to_cpu(hpte[0]) & ~HPTE_V_HVLOCK;
+ r = be64_to_cpu(hpte[1]);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ v = hpte_new_to_old_v(v, r);
+ r = hpte_new_to_old_r(r);
+ }
+ rev = real_vmalloc_addr(&kvm->arch.revmap[index]);
+ gr = rev->guest_rpte;
- unlock_hpte(hpte, v);
+ unlock_hpte(hpte, orig_v);
+ }
/* For not found, if the HPTE is valid by now, retry the instruction */
if ((status & DSISR_NOHPTE) && (v & HPTE_V_VALID))
@@ -985,12 +1094,32 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr,
vcpu->arch.pgfault_index = index;
vcpu->arch.pgfault_hpte[0] = v;
vcpu->arch.pgfault_hpte[1] = r;
+ vcpu->arch.pgfault_cache = cache_entry;
/* Check the storage key to see if it is possibly emulated MMIO */
- if (data && (vcpu->arch.shregs.msr & MSR_IR) &&
- (r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
- (HPTE_R_KEY_HI | HPTE_R_KEY_LO))
- return -2; /* MMIO emulation - load instr word */
+ if ((r & (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) ==
+ (HPTE_R_KEY_HI | HPTE_R_KEY_LO)) {
+ if (!cache_entry) {
+ unsigned int pshift = 12;
+ unsigned int pshift_index;
+
+ if (slb_v & SLB_VSID_L) {
+ pshift_index = ((slb_v & SLB_VSID_LP) >> 4);
+ pshift = slb_base_page_shift[pshift_index];
+ }
+ cache_entry = next_mmio_cache_entry(vcpu);
+ cache_entry->eaddr = addr;
+ cache_entry->slb_base_pshift = pshift;
+ cache_entry->pte_index = index;
+ cache_entry->hpte_v = v;
+ cache_entry->hpte_r = r;
+ cache_entry->rpte = gr;
+ cache_entry->slb_v = slb_v;
+ cache_entry->mmio_update = mmio_update;
+ }
+ if (data && (vcpu->arch.shregs.msr & MSR_IR))
+ return -2; /* MMIO emulation - load instr word */
+ }
return -1; /* send fault up to host kernel mode */
}
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c
index 82ff5de8b1e7..06edc4366639 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_xics.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -23,6 +23,7 @@
#include <asm/ppc-opcode.h>
#include <asm/pnv-pci.h>
#include <asm/opal.h>
+#include <asm/smp.h>
#include "book3s_xics.h"
@@ -69,7 +70,11 @@ static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu)
hcpu = hcore << threads_shift;
kvmppc_host_rm_ops_hv->rm_core[hcore].rm_data = vcpu;
smp_muxed_ipi_set_message(hcpu, PPC_MSG_RM_HOST_ACTION);
- icp_native_cause_ipi_rm(hcpu);
+ if (paca[hcpu].kvm_hstate.xics_phys)
+ icp_native_cause_ipi_rm(hcpu);
+ else
+ opal_rm_int_set_mfrr(get_hard_smp_processor_id(hcpu),
+ IPI_PRIORITY);
}
#else
static inline void icp_send_hcore_msg(int hcore, struct kvm_vcpu *vcpu) { }
@@ -736,7 +741,7 @@ int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
unsigned long eoi_rc;
-static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
+static void icp_eoi(struct irq_chip *c, u32 hwirq, __be32 xirr, bool *again)
{
unsigned long xics_phys;
int64_t rc;
@@ -750,7 +755,12 @@ static void icp_eoi(struct irq_chip *c, u32 hwirq, u32 xirr)
/* EOI it */
xics_phys = local_paca->kvm_hstate.xics_phys;
- _stwcix(xics_phys + XICS_XIRR, xirr);
+ if (xics_phys) {
+ _stwcix(xics_phys + XICS_XIRR, xirr);
+ } else {
+ rc = opal_rm_int_eoi(be32_to_cpu(xirr));
+ *again = rc > 0;
+ }
}
static int xics_opal_rm_set_server(unsigned int hw_irq, int server_cpu)
@@ -808,9 +818,10 @@ static void kvmppc_rm_handle_irq_desc(struct irq_desc *desc)
}
long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
- u32 xirr,
+ __be32 xirr,
struct kvmppc_irq_map *irq_map,
- struct kvmppc_passthru_irqmap *pimap)
+ struct kvmppc_passthru_irqmap *pimap,
+ bool *again)
{
struct kvmppc_xics *xics;
struct kvmppc_icp *icp;
@@ -824,7 +835,8 @@ long kvmppc_deliver_irq_passthru(struct kvm_vcpu *vcpu,
icp_rm_deliver_irq(xics, icp, irq);
/* EOI the interrupt */
- icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr);
+ icp_eoi(irq_desc_get_chip(irq_map->desc), irq_map->r_hwirq, xirr,
+ again);
if (check_too_hard(xics, icp) == H_TOO_HARD)
return 2;
diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
index c3c1d1bcfc67..9338a818e05c 100644
--- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S
+++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S
@@ -501,17 +501,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
cmpwi r0, 0
beq 57f
li r3, (LPCR_PECEDH | LPCR_PECE0) >> 4
- mfspr r4, SPRN_LPCR
- rlwimi r4, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
- mtspr SPRN_LPCR, r4
- isync
- std r0, HSTATE_SCRATCH0(r13)
- ptesync
- ld r0, HSTATE_SCRATCH0(r13)
-1: cmpd r0, r0
- bne 1b
- nap
- b .
+ mfspr r5, SPRN_LPCR
+ rlwimi r5, r3, 4, (LPCR_PECEDP | LPCR_PECEDH | LPCR_PECE0 | LPCR_PECE1)
+ b kvm_nap_sequence
57: li r0, 0
stbx r0, r3, r4
@@ -523,6 +515,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
+/* Stack frame offsets */
+#define STACK_SLOT_TID (112-16)
+#define STACK_SLOT_PSSCR (112-24)
+
.global kvmppc_hv_entry
kvmppc_hv_entry:
@@ -581,12 +577,14 @@ kvmppc_hv_entry:
ld r9,VCORE_KVM(r5) /* pointer to struct kvm */
cmpwi r6,0
bne 10f
- ld r6,KVM_SDR1(r9)
lwz r7,KVM_LPID(r9)
+BEGIN_FTR_SECTION
+ ld r6,KVM_SDR1(r9)
li r0,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r0
ptesync
mtspr SPRN_SDR1,r6 /* switch to partition page table */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -607,12 +605,8 @@ kvmppc_hv_entry:
stdcx. r7,0,r6
bne 23b
/* Flush the TLB of any entries for this LPID */
- /* use arch 2.07S as a proxy for POWER8 */
-BEGIN_FTR_SECTION
- li r6,512 /* POWER8 has 512 sets */
-FTR_SECTION_ELSE
- li r6,128 /* POWER7 has 128 sets */
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_207S)
+ lwz r6,KVM_TLB_SETS(r9)
+ li r0,0 /* RS for P9 version of tlbiel */
mtctr r6
li r7,0x800 /* IS field = 0b10 */
ptesync
@@ -698,6 +692,14 @@ kvmppc_got_guest:
mtspr SPRN_PURR,r7
mtspr SPRN_SPURR,r8
+ /* Save host values of some registers */
+BEGIN_FTR_SECTION
+ mfspr r5, SPRN_TIDR
+ mfspr r6, SPRN_PSSCR
+ std r5, STACK_SLOT_TID(r1)
+ std r6, STACK_SLOT_PSSCR(r1)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
BEGIN_FTR_SECTION
/* Set partition DABR */
/* Do this before re-enabling PMU to avoid P7 DABR corruption bug */
@@ -750,14 +752,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_PMAO_BUG)
BEGIN_FTR_SECTION
ld r5, VCPU_MMCR + 24(r4)
ld r6, VCPU_SIER(r4)
+ mtspr SPRN_MMCR2, r5
+ mtspr SPRN_SIER, r6
+BEGIN_FTR_SECTION_NESTED(96)
lwz r7, VCPU_PMC + 24(r4)
lwz r8, VCPU_PMC + 28(r4)
ld r9, VCPU_MMCR + 32(r4)
- mtspr SPRN_MMCR2, r5
- mtspr SPRN_SIER, r6
mtspr SPRN_SPMC1, r7
mtspr SPRN_SPMC2, r8
mtspr SPRN_MMCRS, r9
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
mtspr SPRN_MMCR0, r3
isync
@@ -813,20 +817,30 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_EBBHR, r8
ld r5, VCPU_EBBRR(r4)
ld r6, VCPU_BESCR(r4)
- ld r7, VCPU_CSIGR(r4)
- ld r8, VCPU_TACR(r4)
+ lwz r7, VCPU_GUEST_PID(r4)
+ ld r8, VCPU_WORT(r4)
mtspr SPRN_EBBRR, r5
mtspr SPRN_BESCR, r6
- mtspr SPRN_CSIGR, r7
- mtspr SPRN_TACR, r8
+ mtspr SPRN_PID, r7
+ mtspr SPRN_WORT, r8
+BEGIN_FTR_SECTION
+ /* POWER8-only registers */
ld r5, VCPU_TCSCR(r4)
ld r6, VCPU_ACOP(r4)
- lwz r7, VCPU_GUEST_PID(r4)
- ld r8, VCPU_WORT(r4)
+ ld r7, VCPU_CSIGR(r4)
+ ld r8, VCPU_TACR(r4)
mtspr SPRN_TCSCR, r5
mtspr SPRN_ACOP, r6
- mtspr SPRN_PID, r7
- mtspr SPRN_WORT, r8
+ mtspr SPRN_CSIGR, r7
+ mtspr SPRN_TACR, r8
+FTR_SECTION_ELSE
+ /* POWER9-only registers */
+ ld r5, VCPU_TID(r4)
+ ld r6, VCPU_PSSCR(r4)
+ oris r6, r6, PSSCR_EC@h /* This makes stop trap to HV */
+ mtspr SPRN_TIDR, r5
+ mtspr SPRN_PSSCR, r6
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
8:
/*
@@ -1341,20 +1355,29 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
std r8, VCPU_EBBHR(r9)
mfspr r5, SPRN_EBBRR
mfspr r6, SPRN_BESCR
- mfspr r7, SPRN_CSIGR
- mfspr r8, SPRN_TACR
+ mfspr r7, SPRN_PID
+ mfspr r8, SPRN_WORT
std r5, VCPU_EBBRR(r9)
std r6, VCPU_BESCR(r9)
- std r7, VCPU_CSIGR(r9)
- std r8, VCPU_TACR(r9)
+ stw r7, VCPU_GUEST_PID(r9)
+ std r8, VCPU_WORT(r9)
+BEGIN_FTR_SECTION
mfspr r5, SPRN_TCSCR
mfspr r6, SPRN_ACOP
- mfspr r7, SPRN_PID
- mfspr r8, SPRN_WORT
+ mfspr r7, SPRN_CSIGR
+ mfspr r8, SPRN_TACR
std r5, VCPU_TCSCR(r9)
std r6, VCPU_ACOP(r9)
- stw r7, VCPU_GUEST_PID(r9)
- std r8, VCPU_WORT(r9)
+ std r7, VCPU_CSIGR(r9)
+ std r8, VCPU_TACR(r9)
+FTR_SECTION_ELSE
+ mfspr r5, SPRN_TIDR
+ mfspr r6, SPRN_PSSCR
+ std r5, VCPU_TID(r9)
+ rldicl r6, r6, 4, 50 /* r6 &= PSSCR_GUEST_VIS */
+ rotldi r6, r6, 60
+ std r6, VCPU_PSSCR(r9)
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/*
* Restore various registers to 0, where non-zero values
* set by the guest could disrupt the host.
@@ -1363,12 +1386,14 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
- mtspr SPRN_TCSCR, r0
mtspr SPRN_WORT, r0
+BEGIN_FTR_SECTION
+ mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
sldi r0, r0, 31
mtspr SPRN_MMCRS, r0
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
8:
/* Save and reset AMR and UAMOR before turning on the MMU */
@@ -1502,15 +1527,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
stw r8, VCPU_PMC + 20(r9)
BEGIN_FTR_SECTION
mfspr r5, SPRN_SIER
+ std r5, VCPU_SIER(r9)
+BEGIN_FTR_SECTION_NESTED(96)
mfspr r6, SPRN_SPMC1
mfspr r7, SPRN_SPMC2
mfspr r8, SPRN_MMCRS
- std r5, VCPU_SIER(r9)
stw r6, VCPU_PMC + 24(r9)
stw r7, VCPU_PMC + 28(r9)
std r8, VCPU_MMCR + 32(r9)
lis r4, 0x8000
mtspr SPRN_MMCRS, r4
+END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
22:
/* Clear out SLB */
@@ -1519,6 +1546,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
slbia
ptesync
+ /* Restore host values of some registers */
+BEGIN_FTR_SECTION
+ ld r5, STACK_SLOT_TID(r1)
+ ld r6, STACK_SLOT_PSSCR(r1)
+ mtspr SPRN_TIDR, r5
+ mtspr SPRN_PSSCR, r6
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
+
/*
* POWER7/POWER8 guest -> host partition switch code.
* We don't have to lock against tlbies but we do
@@ -1552,12 +1587,14 @@ kvmhv_switch_to_host:
beq 19f
/* Primary thread switches back to host partition */
- ld r6,KVM_HOST_SDR1(r4)
lwz r7,KVM_HOST_LPID(r4)
+BEGIN_FTR_SECTION
+ ld r6,KVM_HOST_SDR1(r4)
li r8,LPID_RSVD /* switch to reserved LPID */
mtspr SPRN_LPID,r8
ptesync
- mtspr SPRN_SDR1,r6 /* switch to partition page table */
+ mtspr SPRN_SDR1,r6 /* switch to host page table */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
mtspr SPRN_LPID,r7
isync
@@ -2211,6 +2248,21 @@ BEGIN_FTR_SECTION
ori r5, r5, LPCR_PECEDH
rlwimi r5, r3, 0, LPCR_PECEDP
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
+
+kvm_nap_sequence: /* desired LPCR value in r5 */
+BEGIN_FTR_SECTION
+ /*
+ * PSSCR bits: exit criterion = 1 (wakeup based on LPCR at sreset)
+ * enable state loss = 1 (allow SMT mode switch)
+ * requested level = 0 (just stop dispatching)
+ */
+ lis r3, (PSSCR_EC | PSSCR_ESL)@h
+ mtspr SPRN_PSSCR, r3
+ /* Set LPCR_PECE_HVEE bit to enable wakeup by HV interrupts */
+ li r4, LPCR_PECE_HVEE@higher
+ sldi r4, r4, 32
+ or r5, r5, r4
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mtspr SPRN_LPCR,r5
isync
li r0, 0
@@ -2219,7 +2271,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ld r0, HSTATE_SCRATCH0(r13)
1: cmpd r0, r0
bne 1b
+BEGIN_FTR_SECTION
nap
+FTR_SECTION_ELSE
+ PPC_STOP
+ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
b .
33: mr r4, r3
@@ -2600,11 +2656,13 @@ kvmppc_save_tm:
mfctr r7
mfspr r8, SPRN_AMR
mfspr r10, SPRN_TAR
+ mfxer r11
std r5, VCPU_LR_TM(r9)
stw r6, VCPU_CR_TM(r9)
std r7, VCPU_CTR_TM(r9)
std r8, VCPU_AMR_TM(r9)
std r10, VCPU_TAR_TM(r9)
+ std r11, VCPU_XER_TM(r9)
/* Restore r12 as trap number. */
lwz r12, VCPU_TRAP(r9)
@@ -2697,11 +2755,13 @@ kvmppc_restore_tm:
ld r7, VCPU_CTR_TM(r4)
ld r8, VCPU_AMR_TM(r4)
ld r9, VCPU_TAR_TM(r4)
+ ld r10, VCPU_XER_TM(r4)
mtlr r5
mtcr r6
mtctr r7
mtspr SPRN_AMR, r8
mtspr SPRN_TAR, r9
+ mtxer r10
/*
* Load up PPR and DSCR values but don't put them in the actual SPRs
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 70963c845e96..efd1183a6b16 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -536,7 +536,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_SPAPR_TCE_64:
- case KVM_CAP_PPC_ALLOC_HTAB:
case KVM_CAP_PPC_RTAS:
case KVM_CAP_PPC_FIXUP_HCALL:
case KVM_CAP_PPC_ENABLE_HCALL:
@@ -545,13 +544,20 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
#endif
r = 1;
break;
+
+ case KVM_CAP_PPC_ALLOC_HTAB:
+ r = hv_enabled;
+ break;
#endif /* CONFIG_PPC_BOOK3S_64 */
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
case KVM_CAP_PPC_SMT:
- if (hv_enabled)
- r = threads_per_subcore;
- else
- r = 0;
+ r = 0;
+ if (hv_enabled) {
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ r = 1;
+ else
+ r = threads_per_subcore;
+ }
break;
case KVM_CAP_PPC_RMA:
r = 0;
diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h
index fb21990c0fb4..ebc6dd449556 100644
--- a/arch/powerpc/kvm/trace_hv.h
+++ b/arch/powerpc/kvm/trace_hv.h
@@ -449,7 +449,7 @@ TRACE_EVENT(kvmppc_vcore_wakeup,
__entry->tgid = current->tgid;
),
- TP_printk("%s time %lld ns, tgid=%d",
+ TP_printk("%s time %llu ns, tgid=%d",
__entry->waited ? "wait" : "poll",
__entry->ns, __entry->tgid)
);
diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c
index bb0354222b11..362954f98029 100644
--- a/arch/powerpc/mm/copro_fault.c
+++ b/arch/powerpc/mm/copro_fault.c
@@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb)
switch (REGION_ID(ea)) {
case USER_REGION_ID:
pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea);
+ if (mm == NULL)
+ return 1;
psize = get_slice_psize(mm, ea);
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
diff --git a/arch/powerpc/mm/hash64_4k.c b/arch/powerpc/mm/hash64_4k.c
index 42c702b3be1f..6fa450c12d6d 100644
--- a/arch/powerpc/mm/hash64_4k.c
+++ b/arch/powerpc/mm/hash64_4k.c
@@ -55,7 +55,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
*/
rflags = htab_convert_pte_flags(new_pte);
- if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
diff --git a/arch/powerpc/mm/hash64_64k.c b/arch/powerpc/mm/hash64_64k.c
index 3bbbea07378c..1a68cb19b0e3 100644
--- a/arch/powerpc/mm/hash64_64k.c
+++ b/arch/powerpc/mm/hash64_64k.c
@@ -87,7 +87,7 @@ int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid,
subpg_pte = new_pte & ~subpg_prot;
rflags = htab_convert_pte_flags(subpg_pte);
- if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
/*
@@ -258,7 +258,7 @@ int __hash_page_64K(unsigned long ea, unsigned long access,
rflags = htab_convert_pte_flags(new_pte);
- if (!cpu_has_feature(CPU_FTR_NOEXECUTE) &&
+ if (cpu_has_feature(CPU_FTR_NOEXECUTE) &&
!cpu_has_feature(CPU_FTR_COHERENT_ICACHE))
rflags = hash_page_do_lazy_icache(rflags, __pte(old_pte), trap);
diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c
index 83ddc0e171b0..ad9fd5245be2 100644
--- a/arch/powerpc/mm/hash_native_64.c
+++ b/arch/powerpc/mm/hash_native_64.c
@@ -221,13 +221,18 @@ static long native_hpte_insert(unsigned long hpte_group, unsigned long vpn,
return -1;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r);
}
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_r = hpte_old_to_new_r(hpte_v, hpte_r);
+ hpte_v = hpte_old_to_new_v(hpte_v);
+ }
+
hptep->r = cpu_to_be64(hpte_r);
/* Guarantee the second dword is visible before the valid bit */
eieio();
@@ -295,6 +300,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
vpn, want_v & HPTE_V_AVPN, slot, newpp);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
* a tlb invalidate. If a hash bucket gets full, we "evict" a more/less
@@ -309,6 +316,8 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
native_lock_hpte(hptep);
/* recheck with locks held */
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (unlikely(!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))) {
ret = -1;
@@ -350,6 +359,8 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
for (i = 0; i < HPTES_PER_GROUP; i++) {
hptep = htab_address + slot;
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
if (HPTE_V_COMPARE(hpte_v, want_v) && (hpte_v & HPTE_V_VALID))
/* HPTE matches */
@@ -409,6 +420,8 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn,
want_v = hpte_encode_avpn(vpn, bpsize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/*
* We need to invalidate the TLB always because hpte_remove doesn't do
@@ -467,6 +480,8 @@ static void native_hugepage_invalidate(unsigned long vsid,
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v, be64_to_cpu(hptep->r));
/* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID))
@@ -504,6 +519,10 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
/* Look at the 8 bit LP value */
unsigned int lp = (hpte_r >> LP_SHIFT) & ((1 << LP_BITS) - 1);
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ hpte_v = hpte_new_to_old_v(hpte_v, hpte_r);
+ hpte_r = hpte_new_to_old_r(hpte_r);
+ }
if (!(hpte_v & HPTE_V_LARGE)) {
size = MMU_PAGE_4K;
a_size = MMU_PAGE_4K;
@@ -512,11 +531,7 @@ static void hpte_decode(struct hash_pte *hpte, unsigned long slot,
a_size = hpte_page_sizes[lp] >> 4;
}
/* This works for all page sizes, and for 256M and 1T segments */
- if (cpu_has_feature(CPU_FTR_ARCH_300))
- *ssize = hpte_r >> HPTE_R_3_0_SSIZE_SHIFT;
- else
- *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
-
+ *ssize = hpte_v >> HPTE_V_SSIZE_SHIFT;
shift = mmu_psize_defs[size].shift;
avpn = (HPTE_V_AVPN_VAL(hpte_v) & ~mmu_psize_defs[size].avpnm);
@@ -639,6 +654,9 @@ static void native_flush_hash_range(unsigned long number, int local)
want_v = hpte_encode_avpn(vpn, psize, ssize);
native_lock_hpte(hptep);
hpte_v = be64_to_cpu(hptep->v);
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ hpte_v = hpte_new_to_old_v(hpte_v,
+ be64_to_cpu(hptep->r));
if (!HPTE_V_COMPARE(hpte_v, want_v) ||
!(hpte_v & HPTE_V_VALID))
native_unlock_hpte(hptep);
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index 44d3c3a38e3e..8410b4bb36ed 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -193,8 +193,12 @@ unsigned long htab_convert_pte_flags(unsigned long pteflags)
/*
* Kernel read only mapped with ppp bits 0b110
*/
- if (!(pteflags & _PAGE_WRITE))
- rflags |= (HPTE_R_PP0 | 0x2);
+ if (!(pteflags & _PAGE_WRITE)) {
+ if (mmu_has_feature(MMU_FTR_KERNEL_RO))
+ rflags |= (HPTE_R_PP0 | 0x2);
+ else
+ rflags |= 0x3;
+ }
} else {
if (pteflags & _PAGE_RWX)
rflags |= 0x2;
@@ -792,37 +796,17 @@ static void update_hid_for_hash(void)
static void __init hash_init_partition_table(phys_addr_t hash_table,
unsigned long htab_size)
{
- unsigned long ps_field;
- unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+ mmu_partition_table_init();
/*
- * slb llp encoding for the page size used in VPM real mode.
- * We can ignore that for lpid 0
+ * PS field (VRMA page size) is not used for LPID 0, hence set to 0.
+ * For now, UPRT is 0 and we have no segment table.
*/
- ps_field = 0;
htab_size = __ilog2(htab_size) - 18;
-
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
- MEMBLOCK_ALLOC_ANYWHERE));
-
- /* Initialize the Partition Table with no entries */
- memset((void *)partition_tb, 0, patb_size);
- partition_tb->patb0 = cpu_to_be64(ps_field | hash_table | htab_size);
- /*
- * FIXME!! This should be done via update_partition table
- * For now UPRT is 0 for us.
- */
- partition_tb->patb1 = 0;
+ mmu_partition_table_set_entry(0, hash_table | htab_size, 0);
pr_info("Partition table %p\n", partition_tb);
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
update_hid_for_hash();
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
-
}
static void __init htab_initialize(void)
@@ -1029,6 +1013,10 @@ void hash__early_init_mmu_secondary(void)
{
/* Initialize hash table for that CPU */
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_hash();
+
if (!cpu_has_feature(CPU_FTR_ARCH_300))
mtspr(SPRN_SDR1, _SDR1);
else
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index 75b9cd6150cc..0cb6bd8bfccf 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -845,7 +845,7 @@ void __init dump_numa_cpu_topology(void)
return;
for_each_online_node(node) {
- printk(KERN_DEBUG "Node %d CPUs:", node);
+ pr_info("Node %d CPUs:", node);
count = 0;
/*
@@ -856,52 +856,18 @@ void __init dump_numa_cpu_topology(void)
if (cpumask_test_cpu(cpu,
node_to_cpumask_map[node])) {
if (count == 0)
- printk(" %u", cpu);
+ pr_cont(" %u", cpu);
++count;
} else {
if (count > 1)
- printk("-%u", cpu - 1);
+ pr_cont("-%u", cpu - 1);
count = 0;
}
}
if (count > 1)
- printk("-%u", nr_cpu_ids - 1);
- printk("\n");
- }
-}
-
-static void __init dump_numa_memory_topology(void)
-{
- unsigned int node;
- unsigned int count;
-
- if (min_common_depth == -1 || !numa_enabled)
- return;
-
- for_each_online_node(node) {
- unsigned long i;
-
- printk(KERN_DEBUG "Node %d Memory:", node);
-
- count = 0;
-
- for (i = 0; i < memblock_end_of_DRAM();
- i += (1 << SECTION_SIZE_BITS)) {
- if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) {
- if (count == 0)
- printk(" 0x%lx", i);
- ++count;
- } else {
- if (count > 0)
- printk("-0x%lx", i);
- count = 0;
- }
- }
-
- if (count > 0)
- printk("-0x%lx", i);
- printk("\n");
+ pr_cont("-%u", nr_cpu_ids - 1);
+ pr_cont("\n");
}
}
@@ -947,8 +913,6 @@ void __init initmem_init(void)
if (parse_numa_properties())
setup_nonnuma();
- else
- dump_numa_memory_topology();
memblock_dump_all();
@@ -1121,7 +1085,7 @@ static int hot_add_node_scn_to_nid(unsigned long scn_addr)
int hot_add_scn_to_nid(unsigned long scn_addr)
{
struct device_node *memory = NULL;
- int nid, found = 0;
+ int nid;
if (!numa_enabled || (min_common_depth < 0))
return first_online_node;
@@ -1137,17 +1101,6 @@ int hot_add_scn_to_nid(unsigned long scn_addr)
if (nid < 0 || !node_online(nid))
nid = first_online_node;
- if (NODE_DATA(nid)->node_spanned_pages)
- return nid;
-
- for_each_online_node(nid) {
- if (NODE_DATA(nid)->node_spanned_pages) {
- found = 1;
- break;
- }
- }
-
- BUG_ON(!found);
return nid;
}
diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c
index ed7bddc456b7..8d941c692eb3 100644
--- a/arch/powerpc/mm/pgtable-radix.c
+++ b/arch/powerpc/mm/pgtable-radix.c
@@ -177,23 +177,15 @@ redo:
static void __init radix_init_partition_table(void)
{
- unsigned long rts_field;
+ unsigned long rts_field, dw0;
+ mmu_partition_table_init();
rts_field = radix__get_tree_size();
+ dw0 = rts_field | __pa(init_mm.pgd) | RADIX_PGD_INDEX_SIZE | PATB_HR;
+ mmu_partition_table_set_entry(0, dw0, 0);
- BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 24), "Partition table size too large.");
- partition_tb = early_alloc_pgtable(1UL << PATB_SIZE_SHIFT);
- partition_tb->patb0 = cpu_to_be64(rts_field | __pa(init_mm.pgd) |
- RADIX_PGD_INDEX_SIZE | PATB_HR);
pr_info("Initializing Radix MMU\n");
pr_info("Partition table %p\n", partition_tb);
-
- memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
- /*
- * update partition table control register,
- * 64 K size.
- */
- mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
}
void __init radix_init_native(void)
@@ -378,6 +370,8 @@ void __init radix__early_init_mmu(void)
radix_init_partition_table();
}
+ memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
+
radix_init_pgtable();
}
@@ -388,6 +382,10 @@ void radix__early_init_mmu_secondary(void)
* update partition table control register and UPRT
*/
if (!firmware_has_feature(FW_FEATURE_LPAR)) {
+
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ update_hid_for_radix();
+
lpcr = mfspr(SPRN_LPCR);
mtspr(SPRN_LPCR, lpcr | LPCR_UPRT | LPCR_HR);
diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index f5e8d4edb808..8bca7f58afc4 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -431,3 +431,37 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, int shift)
}
}
#endif
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void __init mmu_partition_table_init(void)
+{
+ unsigned long patb_size = 1UL << PATB_SIZE_SHIFT;
+
+ BUILD_BUG_ON_MSG((PATB_SIZE_SHIFT > 36), "Partition table size too large.");
+ partition_tb = __va(memblock_alloc_base(patb_size, patb_size,
+ MEMBLOCK_ALLOC_ANYWHERE));
+
+ /* Initialize the Partition Table with no entries */
+ memset((void *)partition_tb, 0, patb_size);
+
+ /*
+ * update partition table control register,
+ * 64 K size.
+ */
+ mtspr(SPRN_PTCR, __pa(partition_tb) | (PATB_SIZE_SHIFT - 12));
+}
+
+void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0,
+ unsigned long dw1)
+{
+ partition_tb[lpid].patb0 = cpu_to_be64(dw0);
+ partition_tb[lpid].patb1 = cpu_to_be64(dw1);
+
+ /* Global flush of TLBs and partition table caches for this lpid */
+ asm volatile("ptesync" : : : "memory");
+ asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : :
+ "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid));
+ asm volatile("eieio; tlbsync; ptesync" : : : "memory");
+}
+EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c
index 0e49ec541ab5..3493cf4e0452 100644
--- a/arch/powerpc/mm/tlb-radix.c
+++ b/arch/powerpc/mm/tlb-radix.c
@@ -50,6 +50,8 @@ static inline void _tlbiel_pid(unsigned long pid, unsigned long ric)
for (set = 0; set < POWER9_TLB_SETS_RADIX ; set++) {
__tlbiel_pid(pid, set, ric);
}
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
return;
}
@@ -83,6 +85,8 @@ static inline void _tlbiel_va(unsigned long va, unsigned long pid,
asm volatile(PPC_TLBIEL(%0, %4, %3, %2, %1)
: : "r"(rb), "i"(r), "i"(prs), "i"(ric), "r"(rs) : "memory");
asm volatile("ptesync": : :"memory");
+ if (cpu_has_feature(CPU_FTR_POWER9_DD1))
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
static inline void _tlbie_va(unsigned long va, unsigned long pid,
@@ -175,7 +179,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm)
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -201,7 +205,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
if (unlikely(pid == MMU_NO_CONTEXT))
goto no_context;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -226,7 +230,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
pid = mm ? mm->context.id : 0;
if (unlikely(pid == MMU_NO_CONTEXT))
goto bail;
- if (!mm_is_core_local(mm)) {
+ if (!mm_is_thread_local(mm)) {
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
if (lock_tlbie)
@@ -321,7 +325,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start,
{
unsigned long pid;
unsigned long addr;
- int local = mm_is_core_local(mm);
+ int local = mm_is_thread_local(mm);
unsigned long ap = mmu_get_ap(psize);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long page_size = 1UL << mmu_psize_defs[psize].shift;
diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c
index 0fe98a567125..73a5cf18fd84 100644
--- a/arch/powerpc/net/bpf_jit_comp64.c
+++ b/arch/powerpc/net/bpf_jit_comp64.c
@@ -766,7 +766,7 @@ emit_clear:
func = (u8 *) __bpf_call_base + imm;
/* Save skb pointer if we need to re-cache skb data */
- if (bpf_helper_changes_skb_data(func))
+ if (bpf_helper_changes_pkt_data(func))
PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_func_call(image, ctx, (u64)func);
@@ -775,7 +775,7 @@ emit_clear:
PPC_MR(b2p[BPF_REG_0], 3);
/* refresh skb cache */
- if (bpf_helper_changes_skb_data(func)) {
+ if (bpf_helper_changes_pkt_data(func)) {
/* reload skb pointer to r3 */
PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx));
bpf_jit_emit_skb_loads(image, ctx);
diff --git a/arch/powerpc/platforms/40x/Kconfig b/arch/powerpc/platforms/40x/Kconfig
index e3257f24a8a1..1d7c1b142bf4 100644
--- a/arch/powerpc/platforms/40x/Kconfig
+++ b/arch/powerpc/platforms/40x/Kconfig
@@ -64,6 +64,7 @@ config XILINX_VIRTEX_GENERIC_BOARD
default n
select XILINX_VIRTEX_II_PRO
select XILINX_VIRTEX_4_FX
+ select XILINX_INTC
help
This option enables generic support for Xilinx Virtex based boards.
diff --git a/arch/powerpc/platforms/40x/virtex.c b/arch/powerpc/platforms/40x/virtex.c
index 91a08ea758a8..e3d5e095846b 100644
--- a/arch/powerpc/platforms/40x/virtex.c
+++ b/arch/powerpc/platforms/40x/virtex.c
@@ -48,7 +48,7 @@ define_machine(virtex) {
.probe = virtex_probe,
.setup_arch = xilinx_pci_init,
.init_IRQ = xilinx_intc_init_tree,
- .get_irq = xilinx_intc_get_irq,
+ .get_irq = xintc_get_irq,
.restart = ppc4xx_reset_system,
.calibrate_decr = generic_calibrate_decr,
};
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index 48fc18041ff6..25b8d641ff9f 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -241,6 +241,7 @@ config XILINX_VIRTEX440_GENERIC_BOARD
depends on 44x
default n
select XILINX_VIRTEX_5_FXT
+ select XILINX_INTC
help
This option enables generic support for Xilinx Virtex based boards
that use a 440 based processor in the Virtex 5 FXT FPGA architecture.
diff --git a/arch/powerpc/platforms/44x/virtex.c b/arch/powerpc/platforms/44x/virtex.c
index a7e08026097a..3eb13ed926ee 100644
--- a/arch/powerpc/platforms/44x/virtex.c
+++ b/arch/powerpc/platforms/44x/virtex.c
@@ -54,7 +54,7 @@ define_machine(virtex) {
.probe = virtex_probe,
.setup_arch = xilinx_pci_init,
.init_IRQ = xilinx_intc_init_tree,
- .get_irq = xilinx_intc_get_irq,
+ .get_irq = xintc_get_irq,
.calibrate_decr = generic_calibrate_decr,
.restart = ppc4xx_reset_system,
};
diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S
index 44d2d842cee7..3aa40f1b20f5 100644
--- a/arch/powerpc/platforms/powernv/opal-wrappers.S
+++ b/arch/powerpc/platforms/powernv/opal-wrappers.S
@@ -304,8 +304,11 @@ OPAL_CALL(opal_pci_get_presence_state, OPAL_PCI_GET_PRESENCE_STATE);
OPAL_CALL(opal_pci_get_power_state, OPAL_PCI_GET_POWER_STATE);
OPAL_CALL(opal_pci_set_power_state, OPAL_PCI_SET_POWER_STATE);
OPAL_CALL(opal_int_get_xirr, OPAL_INT_GET_XIRR);
+OPAL_CALL_REAL(opal_rm_int_get_xirr, OPAL_INT_GET_XIRR);
OPAL_CALL(opal_int_set_cppr, OPAL_INT_SET_CPPR);
OPAL_CALL(opal_int_eoi, OPAL_INT_EOI);
+OPAL_CALL_REAL(opal_rm_int_eoi, OPAL_INT_EOI);
OPAL_CALL(opal_int_set_mfrr, OPAL_INT_SET_MFRR);
+OPAL_CALL_REAL(opal_rm_int_set_mfrr, OPAL_INT_SET_MFRR);
OPAL_CALL(opal_pci_tce_kill, OPAL_PCI_TCE_KILL);
OPAL_CALL_REAL(opal_rm_pci_tce_kill, OPAL_PCI_TCE_KILL);
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index 6c9a65b52e63..b3b8930ac52f 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -896,3 +896,5 @@ EXPORT_SYMBOL_GPL(opal_leds_get_ind);
EXPORT_SYMBOL_GPL(opal_leds_set_ind);
/* Export this symbol for PowerNV Operator Panel class driver */
EXPORT_SYMBOL_GPL(opal_write_oppanel_async);
+/* Export this for KVM */
+EXPORT_SYMBOL_GPL(opal_int_set_mfrr);
diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c
index cb3c50328de8..cc2b281a3766 100644
--- a/arch/powerpc/platforms/ps3/htab.c
+++ b/arch/powerpc/platforms/ps3/htab.c
@@ -63,7 +63,7 @@ static long ps3_hpte_insert(unsigned long hpte_group, unsigned long vpn,
vflags &= ~HPTE_V_SECONDARY;
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(ps3_mm_phys_to_lpar(pa), psize, apsize) | rflags;
spin_lock_irqsave(&ps3_htab_lock, flags);
diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c
index aa35245d8d6d..f2c98f6c1c9c 100644
--- a/arch/powerpc/platforms/pseries/lpar.c
+++ b/arch/powerpc/platforms/pseries/lpar.c
@@ -145,7 +145,7 @@ static long pSeries_lpar_hpte_insert(unsigned long hpte_group,
hpte_group, vpn, pa, rflags, vflags, psize);
hpte_v = hpte_encode_v(vpn, psize, apsize, ssize) | vflags | HPTE_V_VALID;
- hpte_r = hpte_encode_r(pa, psize, apsize, ssize) | rflags;
+ hpte_r = hpte_encode_r(pa, psize, apsize) | rflags;
if (!(vflags & HPTE_V_BOLTED))
pr_devel(" hpte_v=%016lx, hpte_r=%016lx\n", hpte_v, hpte_r);
diff --git a/arch/powerpc/sysdev/xilinx_intc.c b/arch/powerpc/sysdev/xilinx_intc.c
index 0f52d7955796..4a86dcff3fcd 100644
--- a/arch/powerpc/sysdev/xilinx_intc.c
+++ b/arch/powerpc/sysdev/xilinx_intc.c
@@ -29,194 +29,7 @@
#include <asm/processor.h>
#include <asm/i8259.h>
#include <asm/irq.h>
-
-/*
- * INTC Registers
- */
-#define XINTC_ISR 0 /* Interrupt Status */
-#define XINTC_IPR 4 /* Interrupt Pending */
-#define XINTC_IER 8 /* Interrupt Enable */
-#define XINTC_IAR 12 /* Interrupt Acknowledge */
-#define XINTC_SIE 16 /* Set Interrupt Enable bits */
-#define XINTC_CIE 20 /* Clear Interrupt Enable bits */
-#define XINTC_IVR 24 /* Interrupt Vector */
-#define XINTC_MER 28 /* Master Enable */
-
-static struct irq_domain *master_irqhost;
-
-#define XILINX_INTC_MAXIRQS (32)
-
-/* The following table allows the interrupt type, edge or level,
- * to be cached after being read from the device tree until the interrupt
- * is mapped
- */
-static int xilinx_intc_typetable[XILINX_INTC_MAXIRQS];
-
-/* Map the interrupt type from the device tree to the interrupt types
- * used by the interrupt subsystem
- */
-static unsigned char xilinx_intc_map_senses[] = {
- IRQ_TYPE_EDGE_RISING,
- IRQ_TYPE_EDGE_FALLING,
- IRQ_TYPE_LEVEL_HIGH,
- IRQ_TYPE_LEVEL_LOW,
-};
-
-/*
- * The interrupt controller is setup such that it doesn't work well with
- * the level interrupt handler in the kernel because the handler acks the
- * interrupt before calling the application interrupt handler. To deal with
- * that, we use 2 different irq chips so that different functions can be
- * used for level and edge type interrupts.
- *
- * IRQ Chip common (across level and edge) operations
- */
-static void xilinx_intc_mask(struct irq_data *d)
-{
- int irq = irqd_to_hwirq(d);
- void * regs = irq_data_get_irq_chip_data(d);
- pr_debug("mask: %d\n", irq);
- out_be32(regs + XINTC_CIE, 1 << irq);
-}
-
-static int xilinx_intc_set_type(struct irq_data *d, unsigned int flow_type)
-{
- return 0;
-}
-
-/*
- * IRQ Chip level operations
- */
-static void xilinx_intc_level_unmask(struct irq_data *d)
-{
- int irq = irqd_to_hwirq(d);
- void * regs = irq_data_get_irq_chip_data(d);
- pr_debug("unmask: %d\n", irq);
- out_be32(regs + XINTC_SIE, 1 << irq);
-
- /* ack level irqs because they can't be acked during
- * ack function since the handle_level_irq function
- * acks the irq before calling the inerrupt handler
- */
- out_be32(regs + XINTC_IAR, 1 << irq);
-}
-
-static struct irq_chip xilinx_intc_level_irqchip = {
- .name = "Xilinx Level INTC",
- .irq_mask = xilinx_intc_mask,
- .irq_mask_ack = xilinx_intc_mask,
- .irq_unmask = xilinx_intc_level_unmask,
- .irq_set_type = xilinx_intc_set_type,
-};
-
-/*
- * IRQ Chip edge operations
- */
-static void xilinx_intc_edge_unmask(struct irq_data *d)
-{
- int irq = irqd_to_hwirq(d);
- void *regs = irq_data_get_irq_chip_data(d);
- pr_debug("unmask: %d\n", irq);
- out_be32(regs + XINTC_SIE, 1 << irq);
-}
-
-static void xilinx_intc_edge_ack(struct irq_data *d)
-{
- int irq = irqd_to_hwirq(d);
- void * regs = irq_data_get_irq_chip_data(d);
- pr_debug("ack: %d\n", irq);
- out_be32(regs + XINTC_IAR, 1 << irq);
-}
-
-static struct irq_chip xilinx_intc_edge_irqchip = {
- .name = "Xilinx Edge INTC",
- .irq_mask = xilinx_intc_mask,
- .irq_unmask = xilinx_intc_edge_unmask,
- .irq_ack = xilinx_intc_edge_ack,
- .irq_set_type = xilinx_intc_set_type,
-};
-
-/*
- * IRQ Host operations
- */
-
-/**
- * xilinx_intc_xlate - translate virq# from device tree interrupts property
- */
-static int xilinx_intc_xlate(struct irq_domain *h, struct device_node *ct,
- const u32 *intspec, unsigned int intsize,
- irq_hw_number_t *out_hwirq,
- unsigned int *out_flags)
-{
- if ((intsize < 2) || (intspec[0] >= XILINX_INTC_MAXIRQS))
- return -EINVAL;
-
- /* keep a copy of the interrupt type til the interrupt is mapped
- */
- xilinx_intc_typetable[intspec[0]] = xilinx_intc_map_senses[intspec[1]];
-
- /* Xilinx uses 2 interrupt entries, the 1st being the h/w
- * interrupt number, the 2nd being the interrupt type, edge or level
- */
- *out_hwirq = intspec[0];
- *out_flags = xilinx_intc_map_senses[intspec[1]];
-
- return 0;
-}
-static int xilinx_intc_map(struct irq_domain *h, unsigned int virq,
- irq_hw_number_t irq)
-{
- irq_set_chip_data(virq, h->host_data);
-
- if (xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_HIGH ||
- xilinx_intc_typetable[irq] == IRQ_TYPE_LEVEL_LOW) {
- irq_set_chip_and_handler(virq, &xilinx_intc_level_irqchip,
- handle_level_irq);
- } else {
- irq_set_chip_and_handler(virq, &xilinx_intc_edge_irqchip,
- handle_edge_irq);
- }
- return 0;
-}
-
-static const struct irq_domain_ops xilinx_intc_ops = {
- .map = xilinx_intc_map,
- .xlate = xilinx_intc_xlate,
-};
-
-struct irq_domain * __init
-xilinx_intc_init(struct device_node *np)
-{
- struct irq_domain * irq;
- void * regs;
-
- /* Find and map the intc registers */
- regs = of_iomap(np, 0);
- if (!regs) {
- pr_err("xilinx_intc: could not map registers\n");
- return NULL;
- }
-
- /* Setup interrupt controller */
- out_be32(regs + XINTC_IER, 0); /* disable all irqs */
- out_be32(regs + XINTC_IAR, ~(u32) 0); /* Acknowledge pending irqs */
- out_be32(regs + XINTC_MER, 0x3UL); /* Turn on the Master Enable. */
-
- /* Allocate and initialize an irq_domain structure. */
- irq = irq_domain_add_linear(np, XILINX_INTC_MAXIRQS, &xilinx_intc_ops,
- regs);
- if (!irq)
- panic(__FILE__ ": Cannot allocate IRQ host\n");
-
- return irq;
-}
-
-int xilinx_intc_get_irq(void)
-{
- void * regs = master_irqhost->host_data;
- pr_debug("get_irq:\n");
- return irq_linear_revmap(master_irqhost, in_be32(regs + XINTC_IVR));
-}
+#include <linux/irqchip.h>
#if defined(CONFIG_PPC_I8259)
/*
@@ -265,31 +78,11 @@ static void __init xilinx_i8259_setup_cascade(void)
static inline void xilinx_i8259_setup_cascade(void) { return; }
#endif /* defined(CONFIG_PPC_I8259) */
-static const struct of_device_id xilinx_intc_match[] __initconst = {
- { .compatible = "xlnx,opb-intc-1.00.c", },
- { .compatible = "xlnx,xps-intc-1.00.a", },
- {}
-};
-
/*
* Initialize master Xilinx interrupt controller
*/
void __init xilinx_intc_init_tree(void)
{
- struct device_node *np;
-
- /* find top level interrupt controller */
- for_each_matching_node(np, xilinx_intc_match) {
- if (!of_get_property(np, "interrupts", NULL))
- break;
- }
- BUG_ON(!np);
-
- master_irqhost = xilinx_intc_init(np);
- BUG_ON(!master_irqhost);
-
- irq_set_default_host(master_irqhost);
- of_node_put(np);
-
+ irqchip_init();
xilinx_i8259_setup_cascade();
}