aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/xchg.h30
-rw-r--r--arch/arc/Kconfig1
-rw-r--r--arch/arm/boot/dts/bcm283x.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm958625hr.dts2
-rw-r--r--arch/arm/boot/dts/imx7d-cl-som-imx7.dts52
-rw-r--r--arch/arm/boot/dts/r8a7791-porter.dts2
-rw-r--r--arch/arm/boot/dts/socfpga.dtsi2
-rw-r--r--arch/arm/include/asm/assembler.h10
-rw-r--r--arch/arm/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm/include/asm/vdso.h2
-rw-r--r--arch/arm/kernel/traps.c5
-rw-r--r--arch/arm/kernel/vdso.c12
-rw-r--r--arch/arm/lib/getuser.S10
-rw-r--r--arch/arm/mach-omap1/clock.c6
-rw-r--r--arch/arm/mach-omap2/pm.c4
-rw-r--r--arch/arm/mach-omap2/timer.c19
-rw-r--r--arch/arm/mach-orion5x/Kconfig3
-rw-r--r--arch/arm/mach-orion5x/dns323-setup.c53
-rw-r--r--arch/arm/mach-orion5x/tsx09-common.c49
-rw-r--r--arch/arm/plat-omap/dmtimer.c7
-rw-r--r--arch/arm/probes/kprobes/opt-arm.c4
-rw-r--r--arch/arm64/boot/dts/qcom/msm8996.dtsi4
-rw-r--r--arch/arm64/include/asm/kvm_mmu.h16
-rw-r--r--arch/arm64/include/asm/spinlock.h4
-rw-r--r--arch/arm64/include/asm/stacktrace.h2
-rw-r--r--arch/arm64/kernel/cpu_errata.c4
-rw-r--r--arch/arm64/kernel/stacktrace.c5
-rw-r--r--arch/arm64/kernel/time.c2
-rw-r--r--arch/ia64/kernel/err_inject.c2
-rw-r--r--arch/m68k/coldfire/device.c12
-rw-r--r--arch/mips/cavium-octeon/octeon-irq.c10
-rw-r--r--arch/mips/include/asm/mach-ath79/ar71xx_regs.h2
-rw-r--r--arch/mips/include/asm/machine.h2
-rw-r--r--arch/mips/kernel/ptrace.c22
-rw-r--r--arch/mips/kernel/ptrace32.c4
-rw-r--r--arch/mips/kvm/mips.c2
-rw-r--r--arch/mips/mm/c-r4k.c9
-rw-r--r--arch/mips/txx9/rbtx4939/setup.c4
-rw-r--r--arch/powerpc/boot/Makefile3
-rw-r--r--arch/powerpc/include/asm/irq_work.h1
-rw-r--r--arch/powerpc/kernel/setup-common.c11
-rw-r--r--arch/powerpc/kvm/book3s_hv.c12
-rw-r--r--arch/powerpc/mm/numa.c78
-rw-r--r--arch/powerpc/net/bpf_jit_comp.c3
-rw-r--r--arch/powerpc/perf/core-book3s.c25
-rw-r--r--arch/powerpc/platforms/powernv/opal-nvram.c14
-rw-r--r--arch/powerpc/sysdev/mpic.c2
-rw-r--r--arch/s390/crypto/crc32be-vx.S5
-rw-r--r--arch/s390/crypto/crc32le-vx.S4
-rw-r--r--arch/s390/include/asm/alternative-asm.h108
-rw-r--r--arch/s390/include/asm/nospec-insn.h195
-rw-r--r--arch/s390/kernel/Makefile1
-rw-r--r--arch/s390/kernel/asm-offsets.c1
-rw-r--r--arch/s390/kernel/base.S24
-rw-r--r--arch/s390/kernel/entry.S105
-rw-r--r--arch/s390/kernel/irq.c5
-rw-r--r--arch/s390/kernel/mcount.S14
-rw-r--r--arch/s390/kernel/nospec-branch.c43
-rw-r--r--arch/s390/kernel/nospec-sysfs.c21
-rw-r--r--arch/s390/kernel/perf_cpum_sf.c4
-rw-r--r--arch/s390/kernel/reipl.S7
-rw-r--r--arch/s390/kernel/swsusp.S9
-rw-r--r--arch/s390/kvm/vsie.c2
-rw-r--r--arch/s390/lib/mem.S9
-rw-r--r--arch/s390/net/bpf_jit.S16
-rw-r--r--arch/s390/net/bpf_jit_comp.c63
-rw-r--r--arch/sh/kernel/entry-common.S2
-rw-r--r--arch/sparc/include/asm/atomic_64.h6
-rw-r--r--arch/sparc/include/asm/pgtable_64.h2
-rw-r--r--arch/sparc/mm/tlb.c23
-rw-r--r--arch/x86/boot/compressed/eboot.c6
-rw-r--r--arch/x86/events/core.c15
-rw-r--r--arch/x86/events/intel/core.c12
-rw-r--r--arch/x86/events/intel/ds.c117
-rw-r--r--arch/x86/events/perf_event.h2
-rw-r--r--arch/x86/include/asm/cpufeatures.h20
-rw-r--r--arch/x86/include/asm/i8259.h5
-rw-r--r--arch/x86/include/asm/kvm_host.h2
-rw-r--r--arch/x86/include/asm/mmu_context.h2
-rw-r--r--arch/x86/include/asm/msr-index.h10
-rw-r--r--arch/x86/include/asm/nospec-branch.h43
-rw-r--r--arch/x86/include/asm/pkeys.h18
-rw-r--r--arch/x86/include/asm/spec-ctrl.h80
-rw-r--r--arch/x86/include/asm/thread_info.h6
-rw-r--r--arch/x86/include/asm/tlbflush.h10
-rw-r--r--arch/x86/kernel/apic/apic.c2
-rw-r--r--arch/x86/kernel/cpu/amd.c38
-rw-r--r--arch/x86/kernel/cpu/bugs.c397
-rw-r--r--arch/x86/kernel/cpu/common.c77
-rw-r--r--arch/x86/kernel/cpu/cpu.h3
-rw-r--r--arch/x86/kernel/cpu/intel.c3
-rw-r--r--arch/x86/kernel/devicetree.c21
-rw-r--r--arch/x86/kernel/machine_kexec_32.c6
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/process.c224
-rw-r--r--arch/x86/kernel/smpboot.c6
-rw-r--r--arch/x86/kernel/tsc.c18
-rw-r--r--arch/x86/kvm/cpuid.c26
-rw-r--r--arch/x86/kvm/cpuid.h17
-rw-r--r--arch/x86/kvm/lapic.c10
-rw-r--r--arch/x86/kvm/svm.c64
-rw-r--r--arch/x86/kvm/vmx.c53
-rw-r--r--arch/x86/kvm/x86.c25
-rw-r--r--arch/x86/mm/init_64.c3
-rw-r--r--arch/x86/mm/pageattr.c6
-rw-r--r--arch/x86/mm/pgtable.c9
-rw-r--r--arch/x86/mm/pkeys.c21
-rw-r--r--arch/x86/power/hibernate_32.c2
-rw-r--r--arch/x86/power/hibernate_64.c2
-rw-r--r--arch/x86/xen/enlighten.c4
-rw-r--r--arch/x86/xen/mmu.c4
111 files changed, 2103 insertions, 507 deletions
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index 0ca9724597c1..7081e52291d0 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -11,6 +11,10 @@
* Atomic exchange.
* Since it can be used to implement critical sections
* it must clobber "memory" (also for interrupts in UP).
+ *
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
*/
static inline unsigned long
@@ -18,6 +22,7 @@ ____xchg(_u8, volatile char *m, unsigned long val)
{
unsigned long ret, tmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" insbl %1,%4,%1\n"
@@ -42,6 +47,7 @@ ____xchg(_u16, volatile short *m, unsigned long val)
{
unsigned long ret, tmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %4,7,%3\n"
" inswl %1,%4,%1\n"
@@ -66,6 +72,7 @@ ____xchg(_u32, volatile int *m, unsigned long val)
{
unsigned long dummy;
+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -86,6 +93,7 @@ ____xchg(_u64, volatile long *m, unsigned long val)
{
unsigned long dummy;
+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%4\n"
" bis $31,%3,%1\n"
@@ -127,10 +135,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
* store NEW in MEM. Return the initial value in MEM. Success is
* indicated by comparing RETURN with OLD.
*
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The leading and the trailing memory barriers guarantee that these
+ * operations are fully ordered.
+ *
+ * The trailing memory barrier is placed in SMP unconditionally, in
+ * order to guarantee that dependency ordering is preserved when a
+ * dependency is headed by an unsuccessful operation.
*/
static inline unsigned long
@@ -138,6 +148,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
{
unsigned long prev, tmp, cmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" insbl %1,%5,%1\n"
@@ -149,8 +160,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
" or %1,%2,%2\n"
" stq_c %2,0(%4)\n"
" beq %2,3f\n"
- __ASM__MB
"2:\n"
+ __ASM__MB
".subsection 2\n"
"3: br 1b\n"
".previous"
@@ -165,6 +176,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
{
unsigned long prev, tmp, cmp, addr64;
+ smp_mb();
__asm__ __volatile__(
" andnot %5,7,%4\n"
" inswl %1,%5,%1\n"
@@ -176,8 +188,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
" or %1,%2,%2\n"
" stq_c %2,0(%4)\n"
" beq %2,3f\n"
- __ASM__MB
"2:\n"
+ __ASM__MB
".subsection 2\n"
"3: br 1b\n"
".previous"
@@ -192,6 +204,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
{
unsigned long prev, cmp;
+ smp_mb();
__asm__ __volatile__(
"1: ldl_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -199,8 +212,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
" mov %4,%1\n"
" stl_c %1,%2\n"
" beq %1,3f\n"
- __ASM__MB
"2:\n"
+ __ASM__MB
".subsection 2\n"
"3: br 1b\n"
".previous"
@@ -215,6 +228,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
{
unsigned long prev, cmp;
+ smp_mb();
__asm__ __volatile__(
"1: ldq_l %0,%5\n"
" cmpeq %0,%3,%1\n"
@@ -222,8 +236,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
" mov %4,%1\n"
" stq_c %1,%2\n"
" beq %1,3f\n"
- __ASM__MB
"2:\n"
+ __ASM__MB
".subsection 2\n"
"3: br 1b\n"
".previous"
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index 249e10190d20..b7b78cb09a37 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -495,7 +495,6 @@ config ARC_CURR_IN_REG
config ARC_EMUL_UNALIGNED
bool "Emulate unaligned memory access (userspace only)"
- default N
select SYSCTL_ARCH_UNALIGN_NO_WARN
select SYSCTL_ARCH_UNALIGN_ALLOW
depends on ISA_ARCOMPACT
diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi
index 74dd21b7373c..c51b88ee3cec 100644
--- a/arch/arm/boot/dts/bcm283x.dtsi
+++ b/arch/arm/boot/dts/bcm283x.dtsi
@@ -146,8 +146,8 @@
i2s: i2s@7e203000 {
compatible = "brcm,bcm2835-i2s";
- reg = <0x7e203000 0x20>,
- <0x7e101098 0x02>;
+ reg = <0x7e203000 0x24>;
+ clocks = <&clocks BCM2835_CLOCK_PCM>;
dmas = <&dma 2>,
<&dma 3>;
diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts
index a1658d0721b8..cf0de77f09c4 100644
--- a/arch/arm/boot/dts/bcm958625hr.dts
+++ b/arch/arm/boot/dts/bcm958625hr.dts
@@ -49,7 +49,7 @@
memory {
device_type = "memory";
- reg = <0x60000000 0x80000000>;
+ reg = <0x60000000 0x20000000>;
};
gpio-restart {
diff --git a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
index 58b09bf1ba2d..205130600853 100644
--- a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
+++ b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts
@@ -213,37 +213,37 @@
&iomuxc {
pinctrl_enet1: enet1grp {
fsl,pins = <
- MX7D_PAD_SD2_CD_B__ENET1_MDIO 0x3
- MX7D_PAD_SD2_WP__ENET1_MDC 0x3
- MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x1
- MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x1
- MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x1
- MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x1
- MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x1
- MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x1
- MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x1
- MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x1
- MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x1
- MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x1
- MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x1
- MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x1
+ MX7D_PAD_SD2_CD_B__ENET1_MDIO 0x30
+ MX7D_PAD_SD2_WP__ENET1_MDC 0x30
+ MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x11
+ MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x11
+ MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x11
+ MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x11
+ MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x11
+ MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x11
+ MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x11
+ MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x11
+ MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x11
+ MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x11
+ MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x11
+ MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x11
>;
};
pinctrl_enet2: enet2grp {
fsl,pins = <
- MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x1
- MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x1
- MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x1
- MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x1
- MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x1
- MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x1
- MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x1
- MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x1
- MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x1
- MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x1
- MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x1
- MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x1
+ MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x11
+ MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x11
+ MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x11
+ MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x11
+ MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x11
+ MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x11
+ MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x11
+ MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x11
+ MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x11
+ MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x11
+ MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x11
+ MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x11
>;
};
diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts
index 6761d11d3f9e..db0239c7e6c7 100644
--- a/arch/arm/boot/dts/r8a7791-porter.dts
+++ b/arch/arm/boot/dts/r8a7791-porter.dts
@@ -428,7 +428,7 @@
"dclkin.0", "dclkin.1";
ports {
- port@1 {
+ port@0 {
endpoint {
remote-endpoint = <&adv7511_in>;
};
diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi
index 9f48141270b8..f0702d8063d9 100644
--- a/arch/arm/boot/dts/socfpga.dtsi
+++ b/arch/arm/boot/dts/socfpga.dtsi
@@ -759,7 +759,7 @@
timer@fffec600 {
compatible = "arm,cortex-a9-twd-timer";
reg = <0xfffec600 0x100>;
- interrupts = <1 13 0xf04>;
+ interrupts = <1 13 0xf01>;
clocks = <&mpu_periph_clk>;
};
diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h
index 12f99fd2e3b2..3aed4492c9a7 100644
--- a/arch/arm/include/asm/assembler.h
+++ b/arch/arm/include/asm/assembler.h
@@ -534,4 +534,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#endif
.endm
+#ifdef CONFIG_KPROBES
+#define _ASM_NOKPROBE(entry) \
+ .pushsection "_kprobe_blacklist", "aw" ; \
+ .balign 4 ; \
+ .long entry; \
+ .popsection
+#else
+#define _ASM_NOKPROBE(entry)
+#endif
+
#endif /* __ASM_ASSEMBLER_H__ */
diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
index d10e36235438..7f66b1b3aca1 100644
--- a/arch/arm/include/asm/kvm_mmu.h
+++ b/arch/arm/include/asm/kvm_mmu.h
@@ -223,6 +223,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
return 8;
}
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
static inline void *kvm_get_hyp_vector(void)
{
return kvm_ksym_ref(__kvm_hyp_vector);
diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h
index d0295f1dd1a3..ff65b6d96c7e 100644
--- a/arch/arm/include/asm/vdso.h
+++ b/arch/arm/include/asm/vdso.h
@@ -11,8 +11,6 @@ struct mm_struct;
void arm_install_vdso(struct mm_struct *mm, unsigned long addr);
-extern char vdso_start, vdso_end;
-
extern unsigned int vdso_total_pages;
#else /* CONFIG_VDSO */
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 1b304897aa12..aa316a7562b1 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -19,6 +19,7 @@
#include <linux/uaccess.h>
#include <linux/hardirq.h>
#include <linux/kdebug.h>
+#include <linux/kprobes.h>
#include <linux/module.h>
#include <linux/kexec.h>
#include <linux/bug.h>
@@ -415,7 +416,8 @@ void unregister_undef_hook(struct undef_hook *hook)
raw_spin_unlock_irqrestore(&undef_lock, flags);
}
-static int call_undef_hook(struct pt_regs *regs, unsigned int instr)
+static nokprobe_inline
+int call_undef_hook(struct pt_regs *regs, unsigned int instr)
{
struct undef_hook *hook;
unsigned long flags;
@@ -488,6 +490,7 @@ die_sig:
arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6);
}
+NOKPROBE_SYMBOL(do_undefinstr)
/*
* Handle FIQ similarly to NMI on x86 systems.
diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c
index 53cf86cf2d1a..890439737374 100644
--- a/arch/arm/kernel/vdso.c
+++ b/arch/arm/kernel/vdso.c
@@ -39,6 +39,8 @@
static struct page **vdso_text_pagelist;
+extern char vdso_start[], vdso_end[];
+
/* Total number of pages needed for the data and text portions of the VDSO. */
unsigned int vdso_total_pages __ro_after_init;
@@ -179,13 +181,13 @@ static int __init vdso_init(void)
unsigned int text_pages;
int i;
- if (memcmp(&vdso_start, "\177ELF", 4)) {
+ if (memcmp(vdso_start, "\177ELF", 4)) {
pr_err("VDSO is not a valid ELF object!\n");
return -ENOEXEC;
}
- text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT;
- pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start);
+ text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+ pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start);
/* Allocate the VDSO text pagelist */
vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *),
@@ -200,7 +202,7 @@ static int __init vdso_init(void)
for (i = 0; i < text_pages; i++) {
struct page *page;
- page = virt_to_page(&vdso_start + i * PAGE_SIZE);
+ page = virt_to_page(vdso_start + i * PAGE_SIZE);
vdso_text_pagelist[i] = page;
}
@@ -211,7 +213,7 @@ static int __init vdso_init(void)
cntvct_ok = cntvct_functional();
- patch_vdso(&vdso_start);
+ patch_vdso(vdso_start);
return 0;
}
diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S
index df73914e81c8..746e7801dcdf 100644
--- a/arch/arm/lib/getuser.S
+++ b/arch/arm/lib/getuser.S
@@ -38,6 +38,7 @@ ENTRY(__get_user_1)
mov r0, #0
ret lr
ENDPROC(__get_user_1)
+_ASM_NOKPROBE(__get_user_1)
ENTRY(__get_user_2)
check_uaccess r0, 2, r1, r2, __get_user_bad
@@ -58,6 +59,7 @@ rb .req r0
mov r0, #0
ret lr
ENDPROC(__get_user_2)
+_ASM_NOKPROBE(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
@@ -65,6 +67,7 @@ ENTRY(__get_user_4)
mov r0, #0
ret lr
ENDPROC(__get_user_4)
+_ASM_NOKPROBE(__get_user_4)
ENTRY(__get_user_8)
check_uaccess r0, 8, r1, r2, __get_user_bad8
@@ -78,6 +81,7 @@ ENTRY(__get_user_8)
mov r0, #0
ret lr
ENDPROC(__get_user_8)
+_ASM_NOKPROBE(__get_user_8)
#ifdef __ARMEB__
ENTRY(__get_user_32t_8)
@@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8)
mov r0, #0
ret lr
ENDPROC(__get_user_32t_8)
+_ASM_NOKPROBE(__get_user_32t_8)
ENTRY(__get_user_64t_1)
check_uaccess r0, 1, r1, r2, __get_user_bad8
@@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1)
mov r0, #0
ret lr
ENDPROC(__get_user_64t_1)
+_ASM_NOKPROBE(__get_user_64t_1)
ENTRY(__get_user_64t_2)
check_uaccess r0, 2, r1, r2, __get_user_bad8
@@ -114,6 +120,7 @@ rb .req r0
mov r0, #0
ret lr
ENDPROC(__get_user_64t_2)
+_ASM_NOKPROBE(__get_user_64t_2)
ENTRY(__get_user_64t_4)
check_uaccess r0, 4, r1, r2, __get_user_bad8
@@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4)
mov r0, #0
ret lr
ENDPROC(__get_user_64t_4)
+_ASM_NOKPROBE(__get_user_64t_4)
#endif
__get_user_bad8:
@@ -131,6 +139,8 @@ __get_user_bad:
ret lr
ENDPROC(__get_user_bad)
ENDPROC(__get_user_bad8)
+_ASM_NOKPROBE(__get_user_bad)
+_ASM_NOKPROBE(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c
index 4f5fd4a084c0..034b89499bd7 100644
--- a/arch/arm/mach-omap1/clock.c
+++ b/arch/arm/mach-omap1/clock.c
@@ -1031,17 +1031,17 @@ static int clk_debugfs_register_one(struct clk *c)
return -ENOMEM;
c->dent = d;
- d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount);
+ d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount);
if (!d) {
err = -ENOMEM;
goto err_out;
}
- d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate);
+ d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate);
if (!d) {
err = -ENOMEM;
goto err_out;
}
- d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags);
+ d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags);
if (!d) {
err = -ENOMEM;
goto err_out;
diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c
index 678d2a31dcb8..3202015ecb83 100644
--- a/arch/arm/mach-omap2/pm.c
+++ b/arch/arm/mach-omap2/pm.c
@@ -225,7 +225,7 @@ static void omap_pm_end(void)
cpu_idle_poll_ctrl(false);
}
-static void omap_pm_finish(void)
+static void omap_pm_wake(void)
{
if (cpu_is_omap34xx())
omap_prcm_irq_complete();
@@ -235,7 +235,7 @@ static const struct platform_suspend_ops omap_pm_ops = {
.begin = omap_pm_begin,
.end = omap_pm_end,
.enter = omap_pm_enter,
- .finish = omap_pm_finish,
+ .wake = omap_pm_wake,
.valid = suspend_valid_only_mem,
};
diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c
index b2f2448bfa6d..a4cab2814655 100644
--- a/arch/arm/mach-omap2/timer.c
+++ b/arch/arm/mach-omap2/timer.c
@@ -136,12 +136,6 @@ static struct clock_event_device clockevent_gpt = {
.tick_resume = omap2_gp_timer_shutdown,
};
-static struct property device_disabled = {
- .name = "status",
- .length = sizeof("disabled"),
- .value = "disabled",
-};
-
static const struct of_device_id omap_timer_match[] __initconst = {
{ .compatible = "ti,omap2420-timer", },
{ .compatible = "ti,omap3430-timer", },
@@ -183,8 +177,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id *
of_get_property(np, "ti,timer-secure", NULL)))
continue;
- if (!of_device_is_compatible(np, "ti,omap-counter32k"))
- of_add_property(np, &device_disabled);
+ if (!of_device_is_compatible(np, "ti,omap-counter32k")) {
+ struct property *prop;
+
+ prop = kzalloc(sizeof(*prop), GFP_KERNEL);
+ if (!prop)
+ return NULL;
+ prop->name = "status";
+ prop->value = "disabled";
+ prop->length = strlen(prop->value);
+ of_add_property(np, prop);
+ }
return np;
}
diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
index 89bb0fc796bd..72905a442106 100644
--- a/arch/arm/mach-orion5x/Kconfig
+++ b/arch/arm/mach-orion5x/Kconfig
@@ -57,7 +57,6 @@ config MACH_KUROBOX_PRO
config MACH_DNS323
bool "D-Link DNS-323"
- select GENERIC_NET_UTILS
select I2C_BOARDINFO if I2C
help
Say 'Y' here if you want your kernel to support the
@@ -65,7 +64,6 @@ config MACH_DNS323
config MACH_TS209
bool "QNAP TS-109/TS-209"
- select GENERIC_NET_UTILS
help
Say 'Y' here if you want your kernel to support the
QNAP TS-109/TS-209 platform.
@@ -107,7 +105,6 @@ config MACH_LINKSTATION_LS_HGL
config MACH_TS409
bool "QNAP TS-409"
- select GENERIC_NET_UTILS
help
Say 'Y' here if you want your kernel to support the
QNAP TS-409 platform.
diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c
index cd483bfb5ca8..d13344b2ddcd 100644
--- a/arch/arm/mach-orion5x/dns323-setup.c
+++ b/arch/arm/mach-orion5x/dns323-setup.c
@@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = {
.phy_addr = MV643XX_ETH_PHY_ADDR(8),
};
+/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these
+ * functions be kept somewhere?
+ */
+static int __init dns323_parse_hex_nibble(char n)
+{
+ if (n >= '0' && n <= '9')
+ return n - '0';
+
+ if (n >= 'A' && n <= 'F')
+ return n - 'A' + 10;
+
+ if (n >= 'a' && n <= 'f')
+ return n - 'a' + 10;
+
+ return -1;
+}
+
+static int __init dns323_parse_hex_byte(const char *b)
+{
+ int hi;
+ int lo;
+
+ hi = dns323_parse_hex_nibble(b[0]);
+ lo = dns323_parse_hex_nibble(b[1]);
+
+ if (hi < 0 || lo < 0)
+ return -1;
+
+ return (hi << 4) | lo;
+}
+
static int __init dns323_read_mac_addr(void)
{
u_int8_t addr[6];
- void __iomem *mac_page;
+ int i;
+ char *mac_page;
/* MAC address is stored as a regular ol' string in /dev/mtdblock4
* (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80).
@@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void)
if (!mac_page)
return -ENOMEM;
- if (!mac_pton((__force const char *) mac_page, addr))
- goto error_fail;
+ /* Sanity check the string we're looking at */
+ for (i = 0; i < 5; i++) {
+ if (*(mac_page + (i * 3) + 2) != ':') {
+ goto error_fail;
+ }
+ }
+
+ for (i = 0; i < 6; i++) {
+ int byte;
+
+ byte = dns323_parse_hex_byte(mac_page + (i * 3));
+ if (byte < 0) {
+ goto error_fail;
+ }
+
+ addr[i] = byte;
+ }
iounmap(mac_page);
printk("DNS-323: Found ethernet MAC address: %pM\n", addr);
diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c
index 89774985d380..905d4f2dd0b8 100644
--- a/arch/arm/mach-orion5x/tsx09-common.c
+++ b/arch/arm/mach-orion5x/tsx09-common.c
@@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = {
.phy_addr = MV643XX_ETH_PHY_ADDR(8),
};
+static int __init qnap_tsx09_parse_hex_nibble(char n)
+{
+ if (n >= '0' && n <= '9')
+ return n - '0';
+
+ if (n >= 'A' && n <= 'F')
+ return n - 'A' + 10;
+
+ if (n >= 'a' && n <= 'f')
+ return n - 'a' + 10;
+
+ return -1;
+}
+
+static int __init qnap_tsx09_parse_hex_byte(const char *b)
+{
+ int hi;
+ int lo;
+
+ hi = qnap_tsx09_parse_hex_nibble(b[0]);
+ lo = qnap_tsx09_parse_hex_nibble(b[1]);
+
+ if (hi < 0 || lo < 0)
+ return -1;
+
+ return (hi << 4) | lo;
+}
+
static int __init qnap_tsx09_check_mac_addr(const char *addr_str)
{
u_int8_t addr[6];
+ int i;
- if (!mac_pton(addr_str, addr))
- return -1;
+ for (i = 0; i < 6; i++) {
+ int byte;
+
+ /*
+ * Enforce "xx:xx:xx:xx:xx:xx\n" format.
+ */
+ if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n'))
+ return -1;
+
+ byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3));
+ if (byte < 0)
+ return -1;
+ addr[i] = byte;
+ }
printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr);
@@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size)
unsigned long addr;
for (addr = mem_base; addr < (mem_base + size); addr += 1024) {
- void __iomem *nor_page;
+ char *nor_page;
int ret = 0;
nor_page = ioremap(addr, 1024);
if (nor_page != NULL) {
- ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page);
+ ret = qnap_tsx09_check_mac_addr(nor_page);
iounmap(nor_page);
}
diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c
index 7a327bd32521..ebef8aacea83 100644
--- a/arch/arm/plat-omap/dmtimer.c
+++ b/arch/arm/plat-omap/dmtimer.c
@@ -890,11 +890,8 @@ static int omap_dm_timer_probe(struct platform_device *pdev)
timer->irq = irq->start;
timer->pdev = pdev;
- /* Skip pm_runtime_enable for OMAP1 */
- if (!(timer->capability & OMAP_TIMER_NEEDS_RESET)) {
- pm_runtime_enable(dev);
- pm_runtime_irq_safe(dev);
- }
+ pm_runtime_enable(dev);
+ pm_runtime_irq_safe(dev);
if (!timer->reserved) {
ret = pm_runtime_get_sync(dev);
diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c
index bcdecc25461b..b2aa9b32bff2 100644
--- a/arch/arm/probes/kprobes/opt-arm.c
+++ b/arch/arm/probes/kprobes/opt-arm.c
@@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
{
unsigned long flags;
struct kprobe *p = &op->kp;
- struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+ struct kprobe_ctlblk *kcb;
/* Save skipped registers */
regs->ARM_pc = (unsigned long)op->kp.addr;
regs->ARM_ORIG_r0 = ~0UL;
local_irq_save(flags);
+ kcb = get_kprobe_ctlblk();
if (kprobe_running()) {
kprobes_inc_nmissed_count(&op->kp);
@@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs)
local_irq_restore(flags);
}
+NOKPROBE_SYMBOL(optimized_callback)
int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig)
{
diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi
index 338f82a7fdc7..2c93de7fffe5 100644
--- a/arch/arm64/boot/dts/qcom/msm8996.dtsi
+++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi
@@ -326,8 +326,8 @@
blsp2_spi5: spi@075ba000{
compatible = "qcom,spi-qup-v2.2.1";
reg = <0x075ba000 0x600>;
- interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&gcc GCC_BLSP2_QUP5_SPI_APPS_CLK>,
+ interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&gcc GCC_BLSP2_QUP6_SPI_APPS_CLK>,
<&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
pinctrl-names = "default", "sleep";
diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h
index 80bf33715ecb..eac73a640ea7 100644
--- a/arch/arm64/include/asm/kvm_mmu.h
+++ b/arch/arm64/include/asm/kvm_mmu.h
@@ -313,6 +313,22 @@ static inline unsigned int kvm_get_vmid_bits(void)
return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8;
}
+/*
+ * We are not in the kvm->srcu critical section most of the time, so we take
+ * the SRCU read lock here. Since we copy the data from the user page, we
+ * can immediately drop the lock again.
+ */
+static inline int kvm_read_guest_lock(struct kvm *kvm,
+ gpa_t gpa, void *data, unsigned long len)
+{
+ int srcu_idx = srcu_read_lock(&kvm->srcu);
+ int ret = kvm_read_guest(kvm, gpa, data, len);
+
+ srcu_read_unlock(&kvm->srcu, srcu_idx);
+
+ return ret;
+}
+
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
#include <asm/mmu.h>
diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h
index cae331d553f8..a9d2dd03c977 100644
--- a/arch/arm64/include/asm/spinlock.h
+++ b/arch/arm64/include/asm/spinlock.h
@@ -141,8 +141,8 @@ static inline int arch_spin_trylock(arch_spinlock_t *lock)
" cbnz %w1, 1f\n"
" add %w1, %w0, %3\n"
" casa %w0, %w1, %2\n"
- " and %w1, %w1, #0xffff\n"
- " eor %w1, %w1, %w0, lsr #16\n"
+ " sub %w1, %w1, %3\n"
+ " eor %w1, %w1, %w0\n"
"1:")
: "=&r" (lockval), "=&r" (tmp), "+Q" (*lock)
: "I" (1 << TICKET_SHIFT)
diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h
index 801a16dbbdf6..7d2a15a0f625 100644
--- a/arch/arm64/include/asm/stacktrace.h
+++ b/arch/arm64/include/asm/stacktrace.h
@@ -23,7 +23,7 @@ struct stackframe {
unsigned long sp;
unsigned long pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- unsigned int graph;
+ int graph;
#endif
};
diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c
index 74107134cc30..2de62aa91303 100644
--- a/arch/arm64/kernel/cpu_errata.c
+++ b/arch/arm64/kernel/cpu_errata.c
@@ -160,7 +160,7 @@ static int enable_smccc_arch_workaround_1(void *data)
case PSCI_CONDUIT_HVC:
arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if (res.a0)
+ if ((int)res.a0 < 0)
return 0;
cb = call_hvc_arch_workaround_1;
smccc_start = __smccc_workaround_1_hvc_start;
@@ -170,7 +170,7 @@ static int enable_smccc_arch_workaround_1(void *data)
case PSCI_CONDUIT_SMC:
arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID,
ARM_SMCCC_ARCH_WORKAROUND_1, &res);
- if (res.a0)
+ if ((int)res.a0 < 0)
return 0;
cb = call_smc_arch_workaround_1;
smccc_start = __smccc_workaround_1_smc_start;
diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c
index 8a552a33c6ef..5201bebcec07 100644
--- a/arch/arm64/kernel/stacktrace.c
+++ b/arch/arm64/kernel/stacktrace.c
@@ -73,6 +73,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
(frame->pc == (unsigned long)return_to_handler)) {
+ if (WARN_ON_ONCE(frame->graph == -1))
+ return -EINVAL;
+ if (frame->graph < -1)
+ frame->graph += FTRACE_NOTRACE_DEPTH;
+
/*
* This is a case where function graph tracer has
* modified a return address (LR) in a stack frame
diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c
index 59779699a1a4..5d9076e86200 100644
--- a/arch/arm64/kernel/time.c
+++ b/arch/arm64/kernel/time.c
@@ -53,7 +53,7 @@ unsigned long profile_pc(struct pt_regs *regs)
frame.sp = regs->sp;
frame.pc = regs->pc;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = -1; /* no task info */
+ frame.graph = current->curr_ret_stack;
#endif
do {
int ret = unwind_frame(NULL, &frame);
diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c
index 5ed0ea92c5bf..f851c9d651f0 100644
--- a/arch/ia64/kernel/err_inject.c
+++ b/arch/ia64/kernel/err_inject.c
@@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
u64 virt_addr=simple_strtoull(buf, NULL, 16);
int ret;
- ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL);
+ ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);
if (ret<=0) {
#ifdef ERR_INJ_DEBUG
printk("Virtual address %lx is not existing.\n",virt_addr);
diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c
index a0fc0c192427..3e8be0f54a44 100644
--- a/arch/m68k/coldfire/device.c
+++ b/arch/m68k/coldfire/device.c
@@ -135,7 +135,11 @@ static struct platform_device mcf_fec0 = {
.id = 0,
.num_resources = ARRAY_SIZE(mcf_fec0_resources),
.resource = mcf_fec0_resources,
- .dev.platform_data = FEC_PDATA,
+ .dev = {
+ .dma_mask = &mcf_fec0.dev.coherent_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = FEC_PDATA,
+ }
};
#ifdef MCFFEC_BASE1
@@ -167,7 +171,11 @@ static struct platform_device mcf_fec1 = {
.id = 1,
.num_resources = ARRAY_SIZE(mcf_fec1_resources),
.resource = mcf_fec1_resources,
- .dev.platform_data = FEC_PDATA,
+ .dev = {
+ .dma_mask = &mcf_fec1.dev.coherent_dma_mask,
+ .coherent_dma_mask = DMA_BIT_MASK(32),
+ .platform_data = FEC_PDATA,
+ }
};
#endif /* MCFFEC_BASE1 */
#endif /* CONFIG_FEC */
diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c
index 6ed1ded87b8f..6420c83c29d1 100644
--- a/arch/mips/cavium-octeon/octeon-irq.c
+++ b/arch/mips/cavium-octeon/octeon-irq.c
@@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
parent_irq = irq_of_parse_and_map(ciu_node, 0);
if (!parent_irq) {
- pr_err("ERROR: Couldn't acquire parent_irq for %s\n.",
+ pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
ciu_node->name);
return -EINVAL;
}
@@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
addr = of_get_address(ciu_node, 0, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
return -EINVAL;
}
host_data->raw_reg = (u64)phys_to_virt(
@@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
addr = of_get_address(ciu_node, 1, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
return -EINVAL;
}
host_data->en_reg = (u64)phys_to_virt(
@@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
if (r) {
- pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.",
+ pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
ciu_node->name);
return r;
}
@@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node,
&octeon_irq_domain_cib_ops,
host_data);
if (!cib_domain) {
- pr_err("ERROR: Couldn't irq_domain_add_linear()\n.");
+ pr_err("ERROR: Couldn't irq_domain_add_linear()\n");
return -ENOMEM;
}
diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
index aa3800c82332..d99ca862dae3 100644
--- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
+++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h
@@ -167,7 +167,7 @@
#define AR71XX_AHB_DIV_MASK 0x7
#define AR724X_PLL_REG_CPU_CONFIG 0x00
-#define AR724X_PLL_REG_PCIE_CONFIG 0x18
+#define AR724X_PLL_REG_PCIE_CONFIG 0x10
#define AR724X_PLL_FB_SHIFT 0
#define AR724X_PLL_FB_MASK 0x3ff
diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h
index 6b444cd9526f..db930cdc715f 100644
--- a/arch/mips/include/asm/machine.h
+++ b/arch/mips/include/asm/machine.h
@@ -52,7 +52,7 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt)
if (!mach->matches)
return NULL;
- for (match = mach->matches; match->compatible; match++) {
+ for (match = mach->matches; match->compatible[0]; match++) {
if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0)
return match;
}
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0c8ae2cc6380..8f7bf74d1c0b 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -483,7 +483,7 @@ static int fpr_get_msa(struct task_struct *target,
/*
* Copy the floating-point context to the supplied NT_PRFPREG buffer.
* Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR and FIR registers separately.
*/
static int fpr_get(struct task_struct *target,
const struct user_regset *regset,
@@ -491,6 +491,7 @@ static int fpr_get(struct task_struct *target,
void *kbuf, void __user *ubuf)
{
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ const int fir_pos = fcr31_pos + sizeof(u32);
int err;
if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t))
@@ -503,6 +504,12 @@ static int fpr_get(struct task_struct *target,
err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.fpu.fcr31,
fcr31_pos, fcr31_pos + sizeof(u32));
+ if (err)
+ return err;
+
+ err = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
+ &boot_cpu_data.fpu_id,
+ fir_pos, fir_pos + sizeof(u32));
return err;
}
@@ -551,7 +558,8 @@ static int fpr_set_msa(struct task_struct *target,
/*
* Copy the supplied NT_PRFPREG buffer to the floating-point context.
* Choose the appropriate helper for general registers, and then copy
- * the FCSR register separately.
+ * the FCSR register separately. Ignore the incoming FIR register
+ * contents though, as the register is read-only.
*
* We optimize for the case where `count % sizeof(elf_fpreg_t) == 0',
* which is supposed to have been guaranteed by the kernel before
@@ -565,6 +573,7 @@ static int fpr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t);
+ const int fir_pos = fcr31_pos + sizeof(u32);
u32 fcr31;
int err;
@@ -592,6 +601,11 @@ static int fpr_set(struct task_struct *target,
ptrace_setfcr31(target, fcr31);
}
+ if (count > 0)
+ err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf,
+ fir_pos,
+ fir_pos + sizeof(u32));
+
return err;
}
@@ -813,7 +827,7 @@ long arch_ptrace(struct task_struct *child, long request,
fregs = get_fpu_regs(child);
#ifdef CONFIG_32BIT
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
@@ -902,7 +916,7 @@ long arch_ptrace(struct task_struct *child, long request,
init_fp_ctx(child);
#ifdef CONFIG_32BIT
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 5fcbdcd7abd0..bc9afbabbe14 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -97,7 +97,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
break;
}
fregs = get_fpu_regs(child);
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
@@ -204,7 +204,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
sizeof(child->thread.fpu));
child->thread.fpu.fcr31 = 0;
}
- if (test_thread_flag(TIF_32BIT_FPREGS)) {
+ if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) {
/*
* The odd registers are actually the high
* order bits of the values stored in the even
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 29ec9ab3fd55..a2c46f539e3e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -42,7 +42,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU },
{ "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU },
{ "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU },
- { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
+ { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU },
{ "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU },
{ "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU },
{ "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU },
diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c
index 9d0107fbb169..43fa682e55da 100644
--- a/arch/mips/mm/c-r4k.c
+++ b/arch/mips/mm/c-r4k.c
@@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size)
/*
* Either no secondary cache or the available caches don't have the
* subset property so we have to flush the primary caches
- * explicitly
+ * explicitly.
+ * If we would need IPI to perform an INDEX-type operation, then
+ * we have to use the HIT-type alternative as IPI cannot be used
+ * here due to interrupts possibly being disabled.
*/
- if (size >= dcache_size) {
+ if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
@@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size)
return;
}
- if (size >= dcache_size) {
+ if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) {
r4k_blast_dcache();
} else {
R4600_HIT_CACHEOP_WAR_IMPL;
diff --git a/arch/mips/txx9/rbtx4939/setup.c b/arch/mips/txx9/rbtx4939/setup.c
index 8b937300fb7f..fd26fadc8617 100644
--- a/arch/mips/txx9/rbtx4939/setup.c
+++ b/arch/mips/txx9/rbtx4939/setup.c
@@ -186,7 +186,7 @@ static void __init rbtx4939_update_ioc_pen(void)
#define RBTX4939_MAX_7SEGLEDS 8
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
static u8 led_val[RBTX4939_MAX_7SEGLEDS];
struct rbtx4939_led_data {
struct led_classdev cdev;
@@ -261,7 +261,7 @@ static inline void rbtx4939_led_setup(void)
static void __rbtx4939_7segled_putc(unsigned int pos, unsigned char val)
{
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_BUILTIN(CONFIG_LEDS_CLASS)
unsigned long flags;
local_irq_save(flags);
/* bit7: reserved for LED class */
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile
index 9d47f2efa830..bb69f3955b59 100644
--- a/arch/powerpc/boot/Makefile
+++ b/arch/powerpc/boot/Makefile
@@ -92,7 +92,8 @@ $(addprefix $(obj)/,$(zlib-y)): \
libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c
libfdtheader := fdt.h libfdt.h libfdt_internal.h
-$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \
+$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \
+ treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \
$(addprefix $(obj)/,$(libfdtheader))
src-wlib-y := string.S crt0.S crtsavres.S stdio.c decompress.c main.c \
diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h
index 744fd54de374..1bcc84903930 100644
--- a/arch/powerpc/include/asm/irq_work.h
+++ b/arch/powerpc/include/asm/irq_work.h
@@ -5,5 +5,6 @@ static inline bool arch_irq_work_has_interrupt(void)
{
return true;
}
+extern void arch_irq_work_raise(void);
#endif /* _ASM_POWERPC_IRQ_WORK_H */
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index f516ac508ae3..bf0f712ac0e0 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -228,14 +228,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
unsigned short maj;
unsigned short min;
- /* We only show online cpus: disable preempt (overzealous, I
- * knew) to prevent cpu going down. */
- preempt_disable();
- if (!cpu_online(cpu_id)) {
- preempt_enable();
- return 0;
- }
-
#ifdef CONFIG_SMP
pvr = per_cpu(cpu_pvr, cpu_id);
#else
@@ -340,9 +332,6 @@ static int show_cpuinfo(struct seq_file *m, void *v)
#ifdef CONFIG_SMP
seq_printf(m, "\n");
#endif
-
- preempt_enable();
-
/* If this is the last cpu, print the summary */
if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids)
show_cpuinfo_summary(m);
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 218cba2f5699..0a2b247dbc6b 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3107,15 +3107,17 @@ static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
goto up_out;
psize = vma_kernel_pagesize(vma);
- porder = __ilog2(psize);
up_read(&current->mm->mmap_sem);
/* We can handle 4k, 64k or 16M pages in the VRMA */
- err = -EINVAL;
- if (!(psize == 0x1000 || psize == 0x10000 ||
- psize == 0x1000000))
- goto out_srcu;
+ if (psize >= 0x1000000)
+ psize = 0x1000000;
+ else if (psize >= 0x10000)
+ psize = 0x10000;
+ else
+ psize = 0x1000;
+ porder = __ilog2(psize);
/* Update VRMASD field in the LPCR */
senc = slb_pgsize_encoding(psize);
diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index a51c188b81f3..6cff96e0d77b 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -551,7 +551,7 @@ static int numa_setup_cpu(unsigned long lcpu)
nid = of_node_to_nid_single(cpu);
out_present:
- if (nid < 0 || !node_online(nid))
+ if (nid < 0 || !node_possible(nid))
nid = first_online_node;
map_cpu_to_node(lcpu, nid);
@@ -904,6 +904,32 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn)
NODE_DATA(nid)->node_spanned_pages = spanned_pages;
}
+static void __init find_possible_nodes(void)
+{
+ struct device_node *rtas;
+ u32 numnodes, i;
+
+ if (min_common_depth <= 0)
+ return;
+
+ rtas = of_find_node_by_path("/rtas");
+ if (!rtas)
+ return;
+
+ if (of_property_read_u32_index(rtas,
+ "ibm,max-associativity-domains",
+ min_common_depth, &numnodes))
+ goto out;
+
+ for (i = 0; i < numnodes; i++) {
+ if (!node_possible(i))
+ node_set(i, node_possible_map);
+ }
+
+out:
+ of_node_put(rtas);
+}
+
void __init initmem_init(void)
{
int nid, cpu;
@@ -917,12 +943,15 @@ void __init initmem_init(void)
memblock_dump_all();
/*
- * Reduce the possible NUMA nodes to the online NUMA nodes,
- * since we do not support node hotplug. This ensures that we
- * lower the maximum NUMA node ID to what is actually present.
+ * Modify the set of possible NUMA nodes to reflect information
+ * available about the set of online nodes, and the set of nodes
+ * that we expect to make use of for this platform's affinity
+ * calculations.
*/
nodes_and(node_possible_map, node_possible_map, node_online_map);
+ find_possible_nodes();
+
for_each_online_node(nid) {
unsigned long start_pfn, end_pfn;
@@ -1274,6 +1303,40 @@ static long vphn_get_associativity(unsigned long cpu,
return rc;
}
+static inline int find_and_online_cpu_nid(int cpu)
+{
+ __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
+ int new_nid;
+
+ /* Use associativity from first thread for all siblings */
+ vphn_get_associativity(cpu, associativity);
+ new_nid = associativity_to_nid(associativity);
+ if (new_nid < 0 || !node_possible(new_nid))
+ new_nid = first_online_node;
+
+ if (NODE_DATA(new_nid) == NULL) {
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /*
+ * Need to ensure that NODE_DATA is initialized for a node from
+ * available memory (see memblock_alloc_try_nid). If unable to
+ * init the node, then default to nearest node that has memory
+ * installed.
+ */
+ if (try_online_node(new_nid))
+ new_nid = first_online_node;
+#else
+ /*
+ * Default to using the nearest node that has memory installed.
+ * Otherwise, it would be necessary to patch the kernel MM code
+ * to deal with more memoryless-node error conditions.
+ */
+ new_nid = first_online_node;
+#endif
+ }
+
+ return new_nid;
+}
+
/*
* Update the CPU maps and sysfs entries for a single CPU when its NUMA
* characteristics change. This function doesn't perform any locking and is
@@ -1339,7 +1402,6 @@ int arch_update_cpu_topology(void)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
- __be32 associativity[VPHN_ASSOC_BUFSIZE] = {0};
cpumask_t updated_cpus;
struct device *dev;
int weight, new_nid, i = 0;
@@ -1374,11 +1436,7 @@ int arch_update_cpu_topology(void)
continue;
}
- /* Use associativity from first thread for all siblings */
- vphn_get_associativity(cpu, associativity);
- new_nid = associativity_to_nid(associativity);
- if (new_nid < 0 || !node_online(new_nid))
- new_nid = first_online_node;
+ new_nid = find_and_online_cpu_nid(cpu);
if (new_nid == numa_cpu_lookup_table[cpu]) {
cpumask_andnot(&cpu_associativity_changes_mask,
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 7e706f36e364..9c58194c7ea5 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -329,6 +329,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image,
BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4);
PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len));
break;
+ case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */
+ PPC_LWZ_OFFS(r_A, r_skb, K);
+ break;
case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */
PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len));
break;
diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c
index bf949623de90..771edffa2d40 100644
--- a/arch/powerpc/perf/core-book3s.c
+++ b/arch/powerpc/perf/core-book3s.c
@@ -448,6 +448,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw)
/* invalid entry */
continue;
+ /*
+ * BHRB rolling buffer could very much contain the kernel
+ * addresses at this point. Check the privileges before
+ * exporting it to userspace (avoid exposure of regions
+ * where we could have speculative execution)
+ */
+ if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) &&
+ is_kernel_addr(addr))
+ continue;
+
/* Branches are read most recent first (ie. mfbhrb 0 is
* the most recent branch).
* There are two types of valid entries:
@@ -1188,6 +1198,7 @@ static void power_pmu_disable(struct pmu *pmu)
*/
write_mmcr0(cpuhw, val);
mb();
+ isync();
/*
* Disable instruction sampling if it was enabled
@@ -1196,12 +1207,26 @@ static void power_pmu_disable(struct pmu *pmu)
mtspr(SPRN_MMCRA,
cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE);
mb();
+ isync();
}
cpuhw->disabled = 1;
cpuhw->n_added = 0;
ebb_switch_out(mmcr0);
+
+#ifdef CONFIG_PPC64
+ /*
+ * These are readable by userspace, may contain kernel
+ * addresses and are not switched by context switch, so clear
+ * them now to avoid leaking anything to userspace in general
+ * including to another process.
+ */
+ if (ppmu->flags & PPMU_ARCH_207S) {
+ mtspr(SPRN_SDAR, 0);
+ mtspr(SPRN_SIAR, 0);
+ }
+#endif
}
local_irq_restore(flags);
diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c
index 1bceb95f422d..5584247f5029 100644
--- a/arch/powerpc/platforms/powernv/opal-nvram.c
+++ b/arch/powerpc/platforms/powernv/opal-nvram.c
@@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index)
return count;
}
+/*
+ * This can be called in the panic path with interrupts off, so use
+ * mdelay in that case.
+ */
static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
{
s64 rc = OPAL_BUSY;
@@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index)
while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
rc = opal_write_nvram(__pa(buf), count, off);
if (rc == OPAL_BUSY_EVENT) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
opal_poll_events(NULL);
} else if (rc == OPAL_BUSY) {
- msleep(OPAL_BUSY_DELAY_MS);
+ if (in_interrupt() || irqs_disabled())
+ mdelay(OPAL_BUSY_DELAY_MS);
+ else
+ msleep(OPAL_BUSY_DELAY_MS);
}
}
diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c
index b9aac951a90f..f37567ed640c 100644
--- a/arch/powerpc/sysdev/mpic.c
+++ b/arch/powerpc/sysdev/mpic.c
@@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask)
int i;
u32 mask = 0;
- for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1)
+ for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1)
mask |= (cpumask & 1) << get_hard_smp_processor_id(i);
return mask;
}
diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S
index 8013989cd2e5..096affb74446 100644
--- a/arch/s390/crypto/crc32be-vx.S
+++ b/arch/s390/crypto/crc32be-vx.S
@@ -12,6 +12,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include <asm/vx-insn.h>
/* Vector register range containing CRC-32 constants */
@@ -66,6 +67,8 @@
.previous
+ GEN_BR_THUNK %r14
+
.text
/*
* The CRC-32 function(s) use these calling conventions:
@@ -202,6 +205,6 @@ ENTRY(crc32_be_vgfm_16)
.Ldone:
VLGVF %r2,%v2,3
- br %r14
+ BR_EX %r14
.previous
diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S
index 17f2504c2633..8dc98c1d7cb1 100644
--- a/arch/s390/crypto/crc32le-vx.S
+++ b/arch/s390/crypto/crc32le-vx.S
@@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include <asm/vx-insn.h>
/* Vector register range containing CRC-32 constants */
@@ -75,6 +76,7 @@
.previous
+ GEN_BR_THUNK %r14
.text
@@ -263,6 +265,6 @@ crc32_le_vgfm_generic:
.Ldone:
VLGVF %r2,%v2,2
- br %r14
+ BR_EX %r14
.previous
diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h
new file mode 100644
index 000000000000..955d620db23e
--- /dev/null
+++ b/arch/s390/include/asm/alternative-asm.h
@@ -0,0 +1,108 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_ALTERNATIVE_ASM_H
+#define _ASM_S390_ALTERNATIVE_ASM_H
+
+#ifdef __ASSEMBLY__
+
+/*
+ * Check the length of an instruction sequence. The length may not be larger
+ * than 254 bytes and it has to be divisible by 2.
+ */
+.macro alt_len_check start,end
+ .if ( \end - \start ) > 254
+ .error "cpu alternatives does not support instructions blocks > 254 bytes\n"
+ .endif
+ .if ( \end - \start ) % 2
+ .error "cpu alternatives instructions length is odd\n"
+ .endif
+.endm
+
+/*
+ * Issue one struct alt_instr descriptor entry (need to put it into
+ * the section .altinstructions, see below). This entry contains
+ * enough information for the alternatives patching code to patch an
+ * instruction. See apply_alternatives().
+ */
+.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature
+ .long \orig_start - .
+ .long \alt_start - .
+ .word \feature
+ .byte \orig_end - \orig_start
+ .byte \alt_end - \alt_start
+.endm
+
+/*
+ * Fill up @bytes with nops. The macro emits 6-byte nop instructions
+ * for the bulk of the area, possibly followed by a 4-byte and/or
+ * a 2-byte nop if the size of the area is not divisible by 6.
+ */
+.macro alt_pad_fill bytes
+ .fill ( \bytes ) / 6, 6, 0xc0040000
+ .fill ( \bytes ) % 6 / 4, 4, 0x47000000
+ .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700
+.endm
+
+/*
+ * Fill up @bytes with nops. If the number of bytes is larger
+ * than 6, emit a jg instruction to branch over all nops, then
+ * fill an area of size (@bytes - 6) with nop instructions.
+ */
+.macro alt_pad bytes
+ .if ( \bytes > 0 )
+ .if ( \bytes > 6 )
+ jg . + \bytes
+ alt_pad_fill \bytes - 6
+ .else
+ alt_pad_fill \bytes
+ .endif
+ .endif
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE oldinstr, newinstr, feature
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr
+771: .popsection
+772: \oldinstr
+773: alt_len_check 770b, 771b
+ alt_len_check 772b, 773b
+ alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) )
+774: .pushsection .altinstructions,"a"
+ alt_entry 772b, 774b, 770b, 771b, \feature
+ .popsection
+.endm
+
+/*
+ * Define an alternative between two instructions. If @feature is
+ * present, early code in apply_alternatives() replaces @oldinstr with
+ * @newinstr. ".skip" directive takes care of proper instruction padding
+ * in case @newinstr is longer than @oldinstr.
+ */
+.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
+ .pushsection .altinstr_replacement,"ax"
+770: \newinstr1
+771: \newinstr2
+772: .popsection
+773: \oldinstr
+774: alt_len_check 770b, 771b
+ alt_len_check 771b, 772b
+ alt_len_check 773b, 774b
+ .if ( 771b - 770b > 772b - 771b )
+ alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) )
+ .else
+ alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) )
+ .endif
+775: .pushsection .altinstructions,"a"
+ alt_entry 773b, 775b, 770b, 771b,\feature1
+ alt_entry 773b, 775b, 771b, 772b,\feature2
+ .popsection
+.endm
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_ALTERNATIVE_ASM_H */
diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h
new file mode 100644
index 000000000000..9a56e738d645
--- /dev/null
+++ b/arch/s390/include/asm/nospec-insn.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_NOSPEC_ASM_H
+#define _ASM_S390_NOSPEC_ASM_H
+
+#include <asm/alternative-asm.h>
+#include <asm/asm-offsets.h>
+
+#ifdef __ASSEMBLY__
+
+#ifdef CONFIG_EXPOLINE
+
+_LC_BR_R1 = __LC_BR_R1
+
+/*
+ * The expoline macros are used to create thunks in the same format
+ * as gcc generates them. The 'comdat' section flag makes sure that
+ * the various thunks are merged into a single copy.
+ */
+ .macro __THUNK_PROLOG_NAME name
+ .pushsection .text.\name,"axG",@progbits,\name,comdat
+ .globl \name
+ .hidden \name
+ .type \name,@function
+\name:
+ .cfi_startproc
+ .endm
+
+ .macro __THUNK_EPILOG
+ .cfi_endproc
+ .popsection
+ .endm
+
+ .macro __THUNK_PROLOG_BR r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_PROLOG_BC d0,r1,r2
+ __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BR r1,r2
+ jg __s390x_indirect_jump_r\r2\()use_r\r1
+ .endm
+
+ .macro __THUNK_BC d0,r1,r2
+ jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1
+ .endm
+
+ .macro __THUNK_BRASL r1,r2,r3
+ brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2
+ .endm
+
+ .macro __DECODE_RR expand,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_RRR expand,rsave,rtarget,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rsave,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \rtarget,%r\r2
+ .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r3
+ \expand \r1,\r2,\r3
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_RRR failed"
+ .endif
+ .endm
+
+ .macro __DECODE_DRR expand,disp,reg,ruse
+ .set __decode_fail,1
+ .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \reg,%r\r1
+ .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ .ifc \ruse,%r\r2
+ \expand \disp,\r1,\r2
+ .set __decode_fail,0
+ .endif
+ .endr
+ .endif
+ .endr
+ .if __decode_fail == 1
+ .error "__DECODE_DRR failed"
+ .endif
+ .endm
+
+ .macro __THUNK_EX_BR reg,ruse
+ # Be very careful when adding instructions to this macro!
+ # The ALTERNATIVE replacement code has a .+10 which targets
+ # the "br \reg" after the code has been patched.
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,555f
+ j .
+#else
+ .ifc \reg,%r1
+ ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35
+ j .
+ .else
+ larl \ruse,555f
+ ex 0,0(\ruse)
+ j .
+ .endif
+#endif
+555: br \reg
+ .endm
+
+ .macro __THUNK_EX_BC disp,reg,ruse
+#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
+ exrl 0,556f
+ j .
+#else
+ larl \ruse,556f
+ ex 0,0(\ruse)
+ j .
+#endif
+556: b \disp(\reg)
+ .endm
+
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse
+ __THUNK_EX_BR \reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse
+ __THUNK_EX_BC \disp,\reg,\ruse
+ __THUNK_EPILOG
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+557: __DECODE_RR __THUNK_BR,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 557b-.
+ .popsection
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 558b-.
+ .popsection
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse
+ .pushsection .s390_indirect_branches,"a",@progbits
+ .long 559b-.
+ .popsection
+ .endm
+
+#else
+ .macro GEN_BR_THUNK reg,ruse=%r1
+ .endm
+
+ .macro GEN_B_THUNK disp,reg,ruse=%r1
+ .endm
+
+ .macro BR_EX reg,ruse=%r1
+ br \reg
+ .endm
+
+ .macro B_EX disp,reg,ruse=%r1
+ b \disp(\reg)
+ .endm
+
+ .macro BASR_EX rsave,rtarget,ruse=%r1
+ basr \rsave,\rtarget
+ .endm
+#endif
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* _ASM_S390_NOSPEC_ASM_H */
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 0501cac2ab95..5b139977a3a4 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -63,6 +63,7 @@ obj-y += nospec-branch.o
extra-y += head.o head64.o vmlinux.lds
+obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c
index f3df9e0a5dec..85c8ead29582 100644
--- a/arch/s390/kernel/asm-offsets.c
+++ b/arch/s390/kernel/asm-offsets.c
@@ -175,6 +175,7 @@ int main(void)
OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags);
OFFSET(__LC_GMAP, lowcore, gmap);
OFFSET(__LC_PASTE, lowcore, paste);
+ OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline);
/* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */
OFFSET(__LC_DUMP_REIPL, lowcore, ipib);
/* hardware defined lowcore locations 0x1000 - 0x18ff */
diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S
index 326f717df587..61fca549a93b 100644
--- a/arch/s390/kernel/base.S
+++ b/arch/s390/kernel/base.S
@@ -8,18 +8,22 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
#include <asm/sigp.h>
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+
ENTRY(s390_base_mcck_handler)
basr %r13,0
0: lg %r15,__LC_PANIC_STACK # load panic stack
aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_mcck_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
1: la %r1,4095
lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1)
lpswe __LC_MCK_OLD_PSW
@@ -36,10 +40,10 @@ ENTRY(s390_base_ext_handler)
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_ext_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC
ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit
lpswe __LC_EXT_OLD_PSW
@@ -56,10 +60,10 @@ ENTRY(s390_base_pgm_handler)
basr %r13,0
0: aghi %r15,-STACK_FRAME_OVERHEAD
larl %r1,s390_base_pgm_handler_fn
- lg %r1,0(%r1)
- ltgr %r1,%r1
+ lg %r9,0(%r1)
+ ltgr %r9,%r9
jz 1f
- basr %r14,%r1
+ BASR_EX %r14,%r9
lmg %r0,%r15,__LC_SAVE_AREA_SYNC
lpswe __LC_PGM_OLD_PSW
1: lpswe disabled_wait_psw-0b(%r13)
@@ -116,7 +120,7 @@ ENTRY(diag308_reset)
larl %r4,.Lcontinue_psw # Restore PSW flags
lpswe 0(%r4)
.Lcontinue:
- br %r14
+ BR_EX %r14
.align 16
.Lrestart_psw:
.long 0x00080000,0x80000000 + .Lrestart_part2
diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
index 1996afeb2e81..a4fd00064c80 100644
--- a/arch/s390/kernel/entry.S
+++ b/arch/s390/kernel/entry.S
@@ -24,6 +24,7 @@
#include <asm/setup.h>
#include <asm/nmi.h>
#include <asm/export.h>
+#include <asm/nospec-insn.h>
__PT_R0 = __PT_GPRS
__PT_R1 = __PT_GPRS + 8
@@ -226,67 +227,9 @@ _PIF_WORK = (_PIF_PER_TRAP)
.popsection
.endm
-#ifdef CONFIG_EXPOLINE
-
- .macro GEN_BR_THUNK name,reg,tmp
- .section .text.\name,"axG",@progbits,\name,comdat
- .globl \name
- .hidden \name
- .type \name,@function
-\name:
- .cfi_startproc
-#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES
- exrl 0,0f
-#else
- larl \tmp,0f
- ex 0,0(\tmp)
-#endif
- j .
-0: br \reg
- .cfi_endproc
- .endm
-
- GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1
- GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1
- GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11
-
- .macro BASR_R14_R9
-0: brasl %r14,__s390x_indirect_jump_r1use_r9
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
- .macro BR_R1USE_R14
-0: jg __s390x_indirect_jump_r1use_r14
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
- .macro BR_R11USE_R14
-0: jg __s390x_indirect_jump_r11use_r14
- .pushsection .s390_indirect_branches,"a",@progbits
- .long 0b-.
- .popsection
- .endm
-
-#else /* CONFIG_EXPOLINE */
-
- .macro BASR_R14_R9
- basr %r14,%r9
- .endm
-
- .macro BR_R1USE_R14
- br %r14
- .endm
-
- .macro BR_R11USE_R14
- br %r14
- .endm
-
-#endif /* CONFIG_EXPOLINE */
-
+ GEN_BR_THUNK %r9
+ GEN_BR_THUNK %r14
+ GEN_BR_THUNK %r14,%r11
.section .kprobes.text, "ax"
.Ldummy:
@@ -303,7 +246,7 @@ _PIF_WORK = (_PIF_PER_TRAP)
ENTRY(__bpon)
.globl __bpon
BPON
- BR_R1USE_R14
+ BR_EX %r14
/*
* Scheduler resume function, called by switch_to
@@ -333,7 +276,7 @@ ENTRY(__switch_to)
TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP
jz 0f
.insn s,0xb2800000,__LC_LPP # set program parameter
-0: BR_R1USE_R14
+0: BR_EX %r14
.L__critical_start:
@@ -399,7 +342,7 @@ sie_exit:
xgr %r5,%r5
lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers
lg %r2,__SF_EMPTY+16(%r15) # return exit reason code
- BR_R1USE_R14
+ BR_EX %r14
.Lsie_fault:
lghi %r14,-EFAULT
stg %r14,__SF_EMPTY+16(%r15) # set exit reason code
@@ -458,7 +401,7 @@ ENTRY(system_call)
lgf %r9,0(%r8,%r10) # get system call add.
TSTMSK __TI_flags(%r12),_TIF_TRACE
jnz .Lsysc_tracesys
- BASR_R14_R9 # call sys_xxxx
+ BASR_EX %r14,%r9 # call sys_xxxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_return:
@@ -598,7 +541,7 @@ ENTRY(system_call)
lmg %r3,%r7,__PT_R3(%r11)
stg %r7,STACK_FRAME_OVERHEAD(%r15)
lg %r2,__PT_ORIG_GPR2(%r11)
- BASR_R14_R9 # call sys_xxx
+ BASR_EX %r14,%r9 # call sys_xxx
stg %r2,__PT_R2(%r11) # store return value
.Lsysc_tracenogo:
TSTMSK __TI_flags(%r12),_TIF_TRACE
@@ -622,7 +565,7 @@ ENTRY(ret_from_fork)
lmg %r9,%r10,__PT_R9(%r11) # load gprs
ENTRY(kernel_thread_starter)
la %r2,0(%r10)
- BASR_R14_R9
+ BASR_EX %r14,%r9
j .Lsysc_tracenogo
/*
@@ -698,7 +641,7 @@ ENTRY(pgm_check_handler)
je .Lpgm_return
lgf %r9,0(%r10,%r1) # load address of handler routine
lgr %r2,%r11 # pass pointer to pt_regs
- BASR_R14_R9 # branch to interrupt-handler
+ BASR_EX %r14,%r9 # branch to interrupt-handler
.Lpgm_return:
LOCKDEP_SYS_EXIT
tm __PT_PSW+1(%r11),0x01 # returning to user ?
@@ -976,7 +919,7 @@ ENTRY(psw_idle)
stpt __TIMER_IDLE_ENTER(%r2)
.Lpsw_idle_lpsw:
lpswe __SF_EMPTY(%r15)
- BR_R1USE_R14
+ BR_EX %r14
.Lpsw_idle_end:
/*
@@ -1021,7 +964,7 @@ ENTRY(save_fpu_regs)
.Lsave_fpu_regs_done:
oi __LC_CPU_FLAGS+7,_CIF_FPU
.Lsave_fpu_regs_exit:
- BR_R1USE_R14
+ BR_EX %r14
.Lsave_fpu_regs_end:
#if IS_ENABLED(CONFIG_KVM)
EXPORT_SYMBOL(save_fpu_regs)
@@ -1071,7 +1014,7 @@ load_fpu_regs:
.Lload_fpu_regs_done:
ni __LC_CPU_FLAGS+7,255-_CIF_FPU
.Lload_fpu_regs_exit:
- BR_R1USE_R14
+ BR_EX %r14
.Lload_fpu_regs_end:
.L__critical_end:
@@ -1244,7 +1187,7 @@ cleanup_critical:
jl 0f
clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end
jl .Lcleanup_load_fpu_regs
-0: BR_R11USE_R14
+0: BR_EX %r14
.align 8
.Lcleanup_table:
@@ -1274,7 +1217,7 @@ cleanup_critical:
ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE
lctlg %c1,%c1,__LC_USER_ASCE # load primary asce
larl %r9,sie_exit # skip forward to sie_exit
- BR_R11USE_R14
+ BR_EX %r14
#endif
.Lcleanup_system_call:
@@ -1332,7 +1275,7 @@ cleanup_critical:
stg %r15,56(%r11) # r15 stack pointer
# set new psw address and exit
larl %r9,.Lsysc_do_svc
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_system_call_insn:
.quad system_call
.quad .Lsysc_stmg
@@ -1342,7 +1285,7 @@ cleanup_critical:
.Lcleanup_sysc_tif:
larl %r9,.Lsysc_tif
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_sysc_restore:
# check if stpt has been executed
@@ -1359,14 +1302,14 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_sysc_restore_insn:
.quad .Lsysc_exit_timer
.quad .Lsysc_done - 4
.Lcleanup_io_tif:
larl %r9,.Lio_tif
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_io_restore:
# check if stpt has been executed
@@ -1380,7 +1323,7 @@ cleanup_critical:
mvc 0(64,%r11),__PT_R8(%r9)
lmg %r0,%r7,__PT_R0(%r9)
1: lmg %r8,%r9,__LC_RETURN_PSW
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_io_restore_insn:
.quad .Lio_exit_timer
.quad .Lio_done - 4
@@ -1433,17 +1376,17 @@ cleanup_critical:
# prepare return psw
nihh %r8,0xfcfd # clear irq & wait state bits
lg %r9,48(%r11) # return from psw_idle
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_idle_insn:
.quad .Lpsw_idle_lpsw
.Lcleanup_save_fpu_regs:
larl %r9,save_fpu_regs
- BR_R11USE_R14
+ BR_EX %r14,%r11
.Lcleanup_load_fpu_regs:
larl %r9,load_fpu_regs
- BR_R11USE_R14
+ BR_EX %r14,%r11
/*
* Integer constants
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 285d6561076d..7ff976737bb1 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -173,10 +173,9 @@ void do_softirq_own_stack(void)
new -= STACK_FRAME_OVERHEAD;
((struct stack_frame *) new)->back_chain = old;
asm volatile(" la 15,0(%0)\n"
- " basr 14,%2\n"
+ " brasl 14,__do_softirq\n"
" la 15,0(%1)\n"
- : : "a" (new), "a" (old),
- "a" (__do_softirq)
+ : : "a" (new), "a" (old)
: "0", "1", "2", "3", "4", "5", "14",
"cc", "memory" );
} else {
diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S
index 9a17e4475d27..be75e8e49e43 100644
--- a/arch/s390/kernel/mcount.S
+++ b/arch/s390/kernel/mcount.S
@@ -8,13 +8,17 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
#include <asm/ftrace.h>
+#include <asm/nospec-insn.h>
#include <asm/ptrace.h>
#include <asm/export.h>
+ GEN_BR_THUNK %r1
+ GEN_BR_THUNK %r14
+
.section .kprobes.text, "ax"
ENTRY(ftrace_stub)
- br %r14
+ BR_EX %r14
#define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE)
#define STACK_PTREGS (STACK_FRAME_OVERHEAD)
@@ -22,7 +26,7 @@ ENTRY(ftrace_stub)
#define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW)
ENTRY(_mcount)
- br %r14
+ BR_EX %r14
EXPORT_SYMBOL(_mcount)
@@ -52,7 +56,7 @@ ENTRY(ftrace_caller)
#endif
lgr %r3,%r14
la %r5,STACK_PTREGS(%r15)
- basr %r14,%r1
+ BASR_EX %r14,%r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
# The j instruction gets runtime patched to a nop instruction.
# See ftrace_enable_ftrace_graph_caller.
@@ -67,7 +71,7 @@ ftrace_graph_caller_end:
#endif
lg %r1,(STACK_PTREGS_PSW+8)(%r15)
lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15)
- br %r1
+ BR_EX %r1
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
@@ -80,6 +84,6 @@ ENTRY(return_to_handler)
aghi %r15,STACK_FRAME_OVERHEAD
lgr %r14,%r2
lmg %r2,%r5,32(%r15)
- br %r14
+ BR_EX %r14
#endif
diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c
index 9f3b5b382743..d5eed651b5ab 100644
--- a/arch/s390/kernel/nospec-branch.c
+++ b/arch/s390/kernel/nospec-branch.c
@@ -44,24 +44,6 @@ static int __init nospec_report(void)
}
arch_initcall(nospec_report);
-#ifdef CONFIG_SYSFS
-ssize_t cpu_show_spectre_v1(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
-}
-
-ssize_t cpu_show_spectre_v2(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
- return sprintf(buf, "Mitigation: execute trampolines\n");
- if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
- return sprintf(buf, "Mitigation: limited branch prediction.\n");
- return sprintf(buf, "Vulnerable\n");
-}
-#endif
-
#ifdef CONFIG_EXPOLINE
int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF);
@@ -112,7 +94,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
s32 *epo;
/* Second part of the instruction replace is always a nop */
- memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4);
for (epo = start; epo < end; epo++) {
instr = (u8 *) epo + *epo;
if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04)
@@ -133,18 +114,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end)
br = thunk + (*(int *)(thunk + 2)) * 2;
else
continue;
- if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0)
+ /* Check for unconditional branch 0x07f? or 0x47f???? */
+ if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0)
continue;
+
+ memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4);
switch (type) {
case BRCL_EXPOLINE:
- /* brcl to thunk, replace with br + nop */
insnbuf[0] = br[0];
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brcl to b, replace with bc + nopr */
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brcl to br, replace with bcr + nop */
+ }
break;
case BRASL_EXPOLINE:
- /* brasl to thunk, replace with basr + nop */
- insnbuf[0] = 0x0d;
insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f);
+ if (br[0] == 0x47) {
+ /* brasl to b, replace with bas + nopr */
+ insnbuf[0] = 0x4d;
+ insnbuf[2] = br[2];
+ insnbuf[3] = br[3];
+ } else {
+ /* brasl to br, replace with basr + nop */
+ insnbuf[0] = 0x0d;
+ }
break;
}
diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c
new file mode 100644
index 000000000000..8affad5f18cb
--- /dev/null
+++ b/arch/s390/kernel/nospec-sysfs.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/device.h>
+#include <linux/cpu.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
+
+ssize_t cpu_show_spectre_v1(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+}
+
+ssize_t cpu_show_spectre_v2(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable)
+ return sprintf(buf, "Mitigation: execute trampolines\n");
+ if (__test_facility(82, S390_lowcore.alt_stfle_fac_list))
+ return sprintf(buf, "Mitigation: limited branch prediction\n");
+ return sprintf(buf, "Vulnerable\n");
+}
diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index fcc634c1479a..96e4fcad57bf 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -739,6 +739,10 @@ static int __hw_perf_event_init(struct perf_event *event)
*/
rate = 0;
if (attr->freq) {
+ if (!attr->sample_freq) {
+ err = -EINVAL;
+ goto out;
+ }
rate = freq_to_sample_rate(&si, attr->sample_freq);
rate = hw_limit_rate(&si, rate);
attr->freq = 0;
diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S
index 89ea8c213d82..70d635da782c 100644
--- a/arch/s390/kernel/reipl.S
+++ b/arch/s390/kernel/reipl.S
@@ -6,8 +6,11 @@
#include <linux/linkage.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/sigp.h>
+ GEN_BR_THUNK %r9
+
#
# Issue "store status" for the current CPU to its prefix page
# and call passed function afterwards
@@ -66,9 +69,9 @@ ENTRY(store_status)
st %r4,0(%r1)
st %r5,4(%r1)
stg %r2,8(%r1)
- lgr %r1,%r2
+ lgr %r9,%r2
lgr %r2,%r3
- br %r1
+ BR_EX %r9
.section .bss
.align 8
diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S
index 2d6b6e81f812..4e76aaf7bb38 100644
--- a/arch/s390/kernel/swsusp.S
+++ b/arch/s390/kernel/swsusp.S
@@ -12,6 +12,7 @@
#include <asm/ptrace.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/nospec-insn.h>
#include <asm/sigp.h>
/*
@@ -23,6 +24,8 @@
* (see below) in the resume process.
* This function runs with disabled interrupts.
*/
+ GEN_BR_THUNK %r14
+
.section .text
ENTRY(swsusp_arch_suspend)
stmg %r6,%r15,__SF_GPRS(%r15)
@@ -102,7 +105,7 @@ ENTRY(swsusp_arch_suspend)
spx 0x318(%r1)
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
- br %r14
+ BR_EX %r14
/*
* Restore saved memory image to correct place and restore register context.
@@ -200,7 +203,7 @@ pgm_check_entry:
lghi %r1,0
sam31
sigp %r1,%r0,SIGP_SET_ARCHITECTURE
- basr %r14,%r3
+ brasl %r14,_sclp_print_early
larl %r3,.Ldisabled_wait_31
lpsw 0(%r3)
4:
@@ -266,7 +269,7 @@ restore_registers:
/* Return 0 */
lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15)
lghi %r2,0
- br %r14
+ BR_EX %r14
.section .data..nosave,"aw",@progbits
.align 8
diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c
index ced6c9b8f04d..51f842c0a175 100644
--- a/arch/s390/kvm/vsie.c
+++ b/arch/s390/kvm/vsie.c
@@ -549,7 +549,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page)
gpa = scb_o->itdba & ~0xffUL;
if (gpa && (scb_s->ecb & 0x10U)) {
- if (!(gpa & ~0x1fffU)) {
+ if (!(gpa & ~0x1fffUL)) {
rc = set_validity_icpt(scb_s, 0x0080U);
goto unpin;
}
diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S
index be9fa65bfac4..e7672edc284a 100644
--- a/arch/s390/lib/mem.S
+++ b/arch/s390/lib/mem.S
@@ -6,6 +6,9 @@
#include <linux/linkage.h>
#include <asm/export.h>
+#include <asm/nospec-insn.h>
+
+ GEN_BR_THUNK %r14
/*
* memset implementation
@@ -39,7 +42,7 @@ ENTRY(memset)
.Lmemset_clear_rest:
larl %r3,.Lmemset_xc
ex %r4,0(%r3)
- br %r14
+ BR_EX %r14
.Lmemset_fill:
stc %r3,0(%r2)
cghi %r4,1
@@ -56,7 +59,7 @@ ENTRY(memset)
.Lmemset_fill_rest:
larl %r3,.Lmemset_mvc
ex %r4,0(%r3)
- br %r14
+ BR_EX %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)
.Lmemset_mvc:
@@ -79,7 +82,7 @@ ENTRY(memcpy)
.Lmemcpy_rest:
larl %r5,.Lmemcpy_mvc
ex %r4,0(%r5)
- br %r14
+ BR_EX %r14
.Lmemcpy_loop:
mvc 0(256,%r1),0(%r3)
la %r1,256(%r1)
diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S
index a1c917d881ec..fa716f2a95a7 100644
--- a/arch/s390/net/bpf_jit.S
+++ b/arch/s390/net/bpf_jit.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
+#include <asm/nospec-insn.h>
#include "bpf_jit.h"
/*
@@ -53,7 +54,7 @@ ENTRY(sk_load_##NAME##_pos); \
clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \
jh sk_load_##NAME##_slow; \
LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \
- b OFF_OK(%r6); /* Return */ \
+ B_EX OFF_OK,%r6; /* Return */ \
\
sk_load_##NAME##_slow:; \
lgr %r2,%r7; /* Arg1 = skb pointer */ \
@@ -63,11 +64,14 @@ sk_load_##NAME##_slow:; \
brasl %r14,skb_copy_bits; /* Get data from skb */ \
LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \
ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \
- br %r6; /* Return */
+ BR_EX %r6; /* Return */
sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */
sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */
+ GEN_BR_THUNK %r6
+ GEN_B_THUNK OFF_OK,%r6
+
/*
* Load 1 byte from SKB (optimized version)
*/
@@ -79,7 +83,7 @@ ENTRY(sk_load_byte_pos)
clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen?
jnl sk_load_byte_slow
llgc %r14,0(%r3,%r12) # Get byte from skb
- b OFF_OK(%r6) # Return OK
+ B_EX OFF_OK,%r6 # Return OK
sk_load_byte_slow:
lgr %r2,%r7 # Arg1 = skb pointer
@@ -89,7 +93,7 @@ sk_load_byte_slow:
brasl %r14,skb_copy_bits # Get data from skb
llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer
ltgr %r2,%r2 # Set cc to (%r2 != 0)
- br %r6 # Return cc
+ BR_EX %r6 # Return cc
#define sk_negative_common(NAME, SIZE, LOAD) \
sk_load_##NAME##_slow_neg:; \
@@ -103,7 +107,7 @@ sk_load_##NAME##_slow_neg:; \
jz bpf_error; \
LOAD %r14,0(%r2); /* Get data from pointer */ \
xr %r3,%r3; /* Set cc to zero */ \
- br %r6; /* Return cc */
+ BR_EX %r6; /* Return cc */
sk_negative_common(word, 4, llgf)
sk_negative_common(half, 2, llgh)
@@ -112,4 +116,4 @@ sk_negative_common(byte, 1, llgc)
bpf_error:
# force a return 0 from jit handler
ltgr %r15,%r15 # Set condition code
- br %r6
+ BR_EX %r6
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index e8dee623d545..e7ce2577f0c9 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -24,6 +24,8 @@
#include <linux/bpf.h>
#include <asm/cacheflush.h>
#include <asm/dis.h>
+#include <asm/facility.h>
+#include <asm/nospec-branch.h>
#include "bpf_jit.h"
int bpf_jit_enable __read_mostly;
@@ -41,6 +43,8 @@ struct bpf_jit {
int base_ip; /* Base address for literal pool */
int ret0_ip; /* Address of return 0 */
int exit_ip; /* Address of exit */
+ int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */
+ int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */
int tail_call_start; /* Tail call start offset */
int labels[1]; /* Labels for local jumps */
};
@@ -251,6 +255,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
REG_SET_SEEN(b2); \
})
+#define EMIT6_PCREL_RILB(op, b, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \
+ REG_SET_SEEN(b); \
+})
+
+#define EMIT6_PCREL_RIL(op, target) \
+({ \
+ int rel = (target - jit->prg) / 2; \
+ _EMIT6(op | rel >> 16, rel & 0xffff); \
+})
+
#define _EMIT6_IMM(op, imm) \
({ \
unsigned int __imm = (imm); \
@@ -470,8 +487,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
EMIT4(0xb9040000, REG_2, BPF_REG_0);
/* Restore registers */
save_restore_regs(jit, REGS_RESTORE);
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ jit->r14_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r14 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ } else {
+ /* larl %r1,.+14 */
+ EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+ /* ex 0,0(%r1) */
+ EMIT4_DISP(0x44000000, REG_0, REG_1, 0);
+ }
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
/* br %r14 */
_EMIT2(0x07fe);
+
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable &&
+ (jit->seen & SEEN_FUNC)) {
+ jit->r1_thunk_ip = jit->prg;
+ /* Generate __s390_indirect_jump_r1 thunk */
+ if (test_facility(35)) {
+ /* exrl %r0,.+10 */
+ EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ /* br %r1 */
+ _EMIT2(0x07f1);
+ } else {
+ /* larl %r1,.+14 */
+ EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14);
+ /* ex 0,S390_lowcore.br_r1_tampoline */
+ EMIT4_DISP(0x44000000, REG_0, REG_0,
+ offsetof(struct lowcore, br_r1_trampoline));
+ /* j . */
+ EMIT4_PCREL(0xa7f40000, 0);
+ }
+ }
}
/*
@@ -977,8 +1031,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i
/* lg %w1,<d(imm)>(%l) */
EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L,
EMIT_CONST_U64(func));
- /* basr %r14,%w1 */
- EMIT2(0x0d00, REG_14, REG_W1);
+ if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) {
+ /* brasl %r14,__s390_indirect_jump_r1 */
+ EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
+ } else {
+ /* basr %r14,%w1 */
+ EMIT2(0x0d00, REG_14, REG_W1);
+ }
/* lgr %b0,%r2: load return value into %b0 */
EMIT4(0xb9040000, BPF_REG_0, REG_2);
if (bpf_helper_changes_skb_data((void *)func)) {
diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S
index c001f782c5f1..28cc61216b64 100644
--- a/arch/sh/kernel/entry-common.S
+++ b/arch/sh/kernel/entry-common.S
@@ -255,7 +255,7 @@ debug_trap:
mov.l @r8, r8
jsr @r8
nop
- bra __restore_all
+ bra ret_from_exception
nop
CFI_ENDPROC
diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h
index 24827a3f733a..89d299ccdfa6 100644
--- a/arch/sparc/include/asm/atomic_64.h
+++ b/arch/sparc/include/asm/atomic_64.h
@@ -82,7 +82,11 @@ ATOMIC_OPS(xor)
#define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0)
#define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n)))
-#define atomic_xchg(v, new) (xchg(&((v)->counter), new))
+
+static inline int atomic_xchg(atomic_t *v, int new)
+{
+ return xchg(&v->counter, new);
+}
static inline int __atomic_add_unless(atomic_t *v, int a, int u)
{
diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h
index b6802b978140..81ad06a1672f 100644
--- a/arch/sparc/include/asm/pgtable_64.h
+++ b/arch/sparc/include/asm/pgtable_64.h
@@ -952,7 +952,7 @@ void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr,
pmd_t *pmd);
#define __HAVE_ARCH_PMDP_INVALIDATE
-extern void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+extern pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp);
#define __HAVE_ARCH_PGTABLE_DEPOSIT
diff --git a/arch/sparc/mm/tlb.c b/arch/sparc/mm/tlb.c
index c56a195c9071..b2722ed31053 100644
--- a/arch/sparc/mm/tlb.c
+++ b/arch/sparc/mm/tlb.c
@@ -219,17 +219,28 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
}
}
+static inline pmd_t pmdp_establish(struct vm_area_struct *vma,
+ unsigned long address, pmd_t *pmdp, pmd_t pmd)
+{
+ pmd_t old;
+
+ do {
+ old = *pmdp;
+ } while (cmpxchg64(&pmdp->pmd, old.pmd, pmd.pmd) != old.pmd);
+
+ return old;
+}
+
/*
* This routine is only called when splitting a THP
*/
-void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
+pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
pmd_t *pmdp)
{
- pmd_t entry = *pmdp;
-
- pmd_val(entry) &= ~_PAGE_VALID;
+ pmd_t old, entry;
- set_pmd_at(vma->vm_mm, address, pmdp, entry);
+ entry = __pmd(pmd_val(*pmdp) & ~_PAGE_VALID);
+ old = pmdp_establish(vma, address, pmdp, entry);
flush_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
@@ -240,6 +251,8 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address,
if ((pmd_val(entry) & _PAGE_PMD_HUGE) &&
!is_huge_zero_page(pmd_page(entry)))
(vma->vm_mm)->context.thp_pte_count--;
+
+ return old;
}
void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp,
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index cc69e37548db..c0ad1bb27fa2 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -330,7 +330,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom)
if (status != EFI_SUCCESS)
goto free_struct;
- memcpy(rom->romdata, pci->romimage, pci->romsize);
+ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+ pci->romsize);
return status;
free_struct:
@@ -436,7 +437,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom)
if (status != EFI_SUCCESS)
goto free_struct;
- memcpy(rom->romdata, pci->romimage, pci->romsize);
+ memcpy(rom->romdata, (void *)(unsigned long)pci->romimage,
+ pci->romsize);
return status;
free_struct:
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 02e547f9ca3f..655a65eaf105 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -1155,16 +1155,13 @@ int x86_perf_event_set_period(struct perf_event *event)
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
- if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
- local64_read(&hwc->prev_count) != (u64)-left) {
- /*
- * The hw event starts counting from this event offset,
- * mark it to be able to extra future deltas:
- */
- local64_set(&hwc->prev_count, (u64)-left);
+ /*
+ * The hw event starts counting from this event offset,
+ * mark it to be able to extra future deltas:
+ */
+ local64_set(&hwc->prev_count, (u64)-left);
- wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
- }
+ wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
/*
* Due to erratum on certan cpu we need
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index 6f353a874178..815039327932 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -2066,16 +2066,23 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
int bit, loops;
u64 status;
int handled;
+ int pmu_enabled;
cpuc = this_cpu_ptr(&cpu_hw_events);
/*
+ * Save the PMU state.
+ * It needs to be restored when leaving the handler.
+ */
+ pmu_enabled = cpuc->enabled;
+ /*
* No known reason to not always do late ACK,
* but just in case do it opt-in.
*/
if (!x86_pmu.late_ack)
apic_write(APIC_LVTPC, APIC_DM_NMI);
intel_bts_disable_local();
+ cpuc->enabled = 0;
__intel_pmu_disable_all();
handled = intel_pmu_drain_bts_buffer();
handled += intel_bts_interrupt();
@@ -2173,7 +2180,8 @@ again:
done:
/* Only restore PMU state when it's active. See x86_pmu_disable(). */
- if (cpuc->enabled)
+ cpuc->enabled = pmu_enabled;
+ if (pmu_enabled)
__intel_pmu_enable_all(0, true);
intel_bts_enable_local();
@@ -3019,7 +3027,7 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, int idx,
* Therefore the effective (average) period matches the requested period,
* despite coarser hardware granularity.
*/
-static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+static u64 bdw_limit_period(struct perf_event *event, u64 left)
{
if ((event->hw.config & INTEL_ARCH_EVENT_MASK) ==
X86_CONFIG(.event=0xc0, .umask=0x01)) {
diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 8e7a3f1df3a5..f26e26e4d84f 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -1110,6 +1110,7 @@ static void setup_pebs_sample_data(struct perf_event *event,
if (pebs == NULL)
return;
+ regs->flags &= ~PERF_EFLAGS_EXACT;
sample_type = event->attr.sample_type;
dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
@@ -1154,7 +1155,6 @@ static void setup_pebs_sample_data(struct perf_event *event,
*/
*regs = *iregs;
regs->flags = pebs->flags;
- set_linear_ip(regs, pebs->ip);
if (sample_type & PERF_SAMPLE_REGS_INTR) {
regs->ax = pebs->ax;
@@ -1190,13 +1190,22 @@ static void setup_pebs_sample_data(struct perf_event *event,
#endif
}
- if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
- regs->ip = pebs->real_ip;
- regs->flags |= PERF_EFLAGS_EXACT;
- } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
- regs->flags |= PERF_EFLAGS_EXACT;
- else
- regs->flags &= ~PERF_EFLAGS_EXACT;
+ if (event->attr.precise_ip > 1) {
+ /* Haswell and later have the eventing IP, so use it: */
+ if (x86_pmu.intel_cap.pebs_format >= 2) {
+ set_linear_ip(regs, pebs->real_ip);
+ regs->flags |= PERF_EFLAGS_EXACT;
+ } else {
+ /* Otherwise use PEBS off-by-1 IP: */
+ set_linear_ip(regs, pebs->ip);
+
+ /* ... and try to fix it up using the LBR entries: */
+ if (intel_pmu_pebs_fixup_ip(regs))
+ regs->flags |= PERF_EFLAGS_EXACT;
+ }
+ } else
+ set_linear_ip(regs, pebs->ip);
+
if ((sample_type & PERF_SAMPLE_ADDR) &&
x86_pmu.intel_cap.pebs_format >= 1)
@@ -1263,17 +1272,84 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
return NULL;
}
+/*
+ * Special variant of intel_pmu_save_and_restart() for auto-reload.
+ */
+static int
+intel_pmu_save_and_restart_reload(struct perf_event *event, int count)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ int shift = 64 - x86_pmu.cntval_bits;
+ u64 period = hwc->sample_period;
+ u64 prev_raw_count, new_raw_count;
+ s64 new, old;
+
+ WARN_ON(!period);
+
+ /*
+ * drain_pebs() only happens when the PMU is disabled.
+ */
+ WARN_ON(this_cpu_read(cpu_hw_events.enabled));
+
+ prev_raw_count = local64_read(&hwc->prev_count);
+ rdpmcl(hwc->event_base_rdpmc, new_raw_count);
+ local64_set(&hwc->prev_count, new_raw_count);
+
+ /*
+ * Since the counter increments a negative counter value and
+ * overflows on the sign switch, giving the interval:
+ *
+ * [-period, 0]
+ *
+ * the difference between two consequtive reads is:
+ *
+ * A) value2 - value1;
+ * when no overflows have happened in between,
+ *
+ * B) (0 - value1) + (value2 - (-period));
+ * when one overflow happened in between,
+ *
+ * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period));
+ * when @n overflows happened in between.
+ *
+ * Here A) is the obvious difference, B) is the extension to the
+ * discrete interval, where the first term is to the top of the
+ * interval and the second term is from the bottom of the next
+ * interval and C) the extension to multiple intervals, where the
+ * middle term is the whole intervals covered.
+ *
+ * An equivalent of C, by reduction, is:
+ *
+ * value2 - value1 + n * period
+ */
+ new = ((s64)(new_raw_count << shift) >> shift);
+ old = ((s64)(prev_raw_count << shift) >> shift);
+ local64_add(new - old + count * period, &event->count);
+
+ perf_event_update_userpage(event);
+
+ return 0;
+}
+
static void __intel_pmu_pebs_event(struct perf_event *event,
struct pt_regs *iregs,
void *base, void *top,
int bit, int count)
{
+ struct hw_perf_event *hwc = &event->hw;
struct perf_sample_data data;
struct pt_regs regs;
void *at = get_next_pebs_record_by_bit(base, top, bit);
- if (!intel_pmu_save_and_restart(event) &&
- !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
+ if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+ /*
+ * Now, auto-reload is only enabled in fixed period mode.
+ * The reload value is always hwc->sample_period.
+ * May need to change it, if auto-reload is enabled in
+ * freq mode later.
+ */
+ intel_pmu_save_and_restart_reload(event, count);
+ } else if (!intel_pmu_save_and_restart(event))
return;
while (count > 1) {
@@ -1325,8 +1401,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
return;
n = top - at;
- if (n <= 0)
+ if (n <= 0) {
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
return;
+ }
__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
}
@@ -1349,8 +1428,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
ds->pebs_index = ds->pebs_buffer_base;
- if (unlikely(base >= top))
+ if (unlikely(base >= top)) {
+ /*
+ * The drain_pebs() could be called twice in a short period
+ * for auto-reload event in pmu::read(). There are no
+ * overflows have happened in between.
+ * It needs to call intel_pmu_save_and_restart_reload() to
+ * update the event->count for this case.
+ */
+ for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled,
+ x86_pmu.max_pebs_events) {
+ event = cpuc->events[bit];
+ if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)
+ intel_pmu_save_and_restart_reload(event, 0);
+ }
return;
+ }
for (at = base; at < top; at += x86_pmu.pebs_record_size) {
struct pebs_record_nhm *p = at;
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index bcbb1d2ae10b..f3563179290b 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -548,7 +548,7 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
bool late_ack;
- unsigned (*limit_period)(struct perf_event *event, unsigned l);
+ u64 (*limit_period)(struct perf_event *event, u64 l);
/*
* sysfs attrs
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index a2485311164b..c278f276c9b3 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -197,6 +197,9 @@
#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
#define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
+#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
+
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
@@ -204,6 +207,13 @@
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
+#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
+#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation */
+#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */
+
/* Virtualization flags: Linux defined, word 8 */
#define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
@@ -261,9 +271,10 @@
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */
-#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
-#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
-#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */
+#define X86_FEATURE_AMD_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */
+#define X86_FEATURE_AMD_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */
+#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
@@ -299,6 +310,7 @@
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */
+
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EDX), word 18 */
#define X86_FEATURE_AVX512_4VNNIW (18*32+ 2) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (18*32+ 3) /* AVX-512 Multiply Accumulation Single precision */
@@ -306,6 +318,7 @@
#define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */
#define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */
#define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */
+#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */
/*
* BUG word(s)
@@ -335,5 +348,6 @@
#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */
#define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */
#define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */
+#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */
#endif /* _ASM_X86_CPUFEATURES_H */
diff --git a/arch/x86/include/asm/i8259.h b/arch/x86/include/asm/i8259.h
index 39bcefc20de7..bb078786a323 100644
--- a/arch/x86/include/asm/i8259.h
+++ b/arch/x86/include/asm/i8259.h
@@ -68,6 +68,11 @@ struct legacy_pic {
extern struct legacy_pic *legacy_pic;
extern struct legacy_pic null_legacy_pic;
+static inline bool has_legacy_pic(void)
+{
+ return legacy_pic != &null_legacy_pic;
+}
+
static inline int nr_legacy_irqs(void)
{
return legacy_pic->nr_legacy_irqs;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 20cfeeb681c6..7598a6c26f76 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -864,7 +864,7 @@ struct kvm_x86_ops {
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
- bool (*cpu_has_high_real_mode_segbase)(void);
+ bool (*has_emulated_msr)(int index);
void (*cpuid_update)(struct kvm_vcpu *vcpu);
int (*vm_init)(struct kvm *kvm);
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 5a295bb97103..733650874b30 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -113,7 +113,7 @@ static inline int init_new_context(struct task_struct *tsk,
#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
if (cpu_feature_enabled(X86_FEATURE_OSPKE)) {
- /* pkey 0 is the default and always allocated */
+ /* pkey 0 is the default and allocated implicitly */
mm->context.pkey_allocation_map = 0x1;
/* -1 means unallocated or invalid */
mm->context.execute_only_pkey = -1;
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index c768bc1550a1..1ec13e253174 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -40,6 +40,8 @@
#define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */
#define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */
#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */
+#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
+#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */
@@ -61,6 +63,11 @@
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */
+#define ARCH_CAP_SSB_NO (1 << 4) /*
+ * Not susceptible to Speculative Store Bypass
+ * attack, so no Speculative Store Bypass
+ * control required.
+ */
#define MSR_IA32_BBL_CR_CTL 0x00000119
#define MSR_IA32_BBL_CR_CTL3 0x0000011e
@@ -135,6 +142,7 @@
/* DEBUGCTLMSR bits (others vary by model): */
#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */
+#define DEBUGCTLMSR_BTF_SHIFT 1
#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */
#define DEBUGCTLMSR_TR (1UL << 6)
#define DEBUGCTLMSR_BTS (1UL << 7)
@@ -315,6 +323,8 @@
#define MSR_AMD64_IBSOPDATA4 0xc001103d
#define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */
+#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f
+
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h
index f928ad9b143f..8b38df98548e 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -217,6 +217,14 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
};
+/* The Speculative Store Bypass disable variants */
+enum ssb_mitigation {
+ SPEC_STORE_BYPASS_NONE,
+ SPEC_STORE_BYPASS_DISABLE,
+ SPEC_STORE_BYPASS_PRCTL,
+ SPEC_STORE_BYPASS_SECCOMP,
+};
+
extern char __indirect_thunk_start[];
extern char __indirect_thunk_end[];
@@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void)
#endif
}
-#define alternative_msr_write(_msr, _val, _feature) \
- asm volatile(ALTERNATIVE("", \
- "movl %[msr], %%ecx\n\t" \
- "movl %[val], %%eax\n\t" \
- "movl $0, %%edx\n\t" \
- "wrmsr", \
- _feature) \
- : : [msr] "i" (_msr), [val] "i" (_val) \
- : "eax", "ecx", "edx", "memory")
+static __always_inline
+void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature)
+{
+ asm volatile(ALTERNATIVE("", "wrmsr", %c[feature])
+ : : "c" (msr),
+ "a" ((u32)val),
+ "d" ((u32)(val >> 32)),
+ [feature] "i" (feature)
+ : "memory");
+}
static inline void indirect_branch_prediction_barrier(void)
{
- alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB,
- X86_FEATURE_USE_IBPB);
+ u64 val = PRED_CMD_IBPB;
+
+ alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB);
}
+/* The Intel SPEC CTRL MSR base value cache */
+extern u64 x86_spec_ctrl_base;
+
/*
* With retpoline, we must use IBRS to restrict branch prediction
* before calling into firmware.
@@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void)
*/
#define firmware_restrict_branch_speculation_start() \
do { \
+ u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
+ \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \
+ u64 val = x86_spec_ctrl_base; \
+ \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h
index b3b09b98896d..c50d6dcf4a22 100644
--- a/arch/x86/include/asm/pkeys.h
+++ b/arch/x86/include/asm/pkeys.h
@@ -1,6 +1,8 @@
#ifndef _ASM_X86_PKEYS_H
#define _ASM_X86_PKEYS_H
+#define ARCH_DEFAULT_PKEY 0
+
#define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1)
extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey,
@@ -14,7 +16,7 @@ extern int __execute_only_pkey(struct mm_struct *mm);
static inline int execute_only_pkey(struct mm_struct *mm)
{
if (!boot_cpu_has(X86_FEATURE_OSPKE))
- return 0;
+ return ARCH_DEFAULT_PKEY;
return __execute_only_pkey(mm);
}
@@ -48,13 +50,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey)
{
/*
* "Allocated" pkeys are those that have been returned
- * from pkey_alloc(). pkey 0 is special, and never
- * returned from pkey_alloc().
+ * from pkey_alloc() or pkey 0 which is allocated
+ * implicitly when the mm is created.
*/
- if (pkey <= 0)
+ if (pkey < 0)
return false;
if (pkey >= arch_max_pkey())
return false;
+ /*
+ * The exec-only pkey is set in the allocation map, but
+ * is not available to any of the user interfaces like
+ * mprotect_pkey().
+ */
+ if (pkey == mm->context.execute_only_pkey)
+ return false;
+
return mm_pkey_allocation_map(mm) & (1U << pkey);
}
diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h
new file mode 100644
index 000000000000..ae7c2c5cd7f0
--- /dev/null
+++ b/arch/x86/include/asm/spec-ctrl.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SPECCTRL_H_
+#define _ASM_X86_SPECCTRL_H_
+
+#include <linux/thread_info.h>
+#include <asm/nospec-branch.h>
+
+/*
+ * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR
+ * the guest has, while on VMEXIT we restore the host view. This
+ * would be easier if SPEC_CTRL were architecturally maskable or
+ * shadowable for guests but this is not (currently) the case.
+ * Takes the guest view of SPEC_CTRL MSR as a parameter and also
+ * the guest's version of VIRT_SPEC_CTRL, if emulated.
+ */
+extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest);
+
+/**
+ * x86_spec_ctrl_set_guest - Set speculation control registers for the guest
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true);
+}
+
+/**
+ * x86_spec_ctrl_restore_host - Restore host speculation control registers
+ * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL
+ * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL
+ * (may get translated to MSR_AMD64_LS_CFG bits)
+ *
+ * Avoids writing to the MSR if the content/bits are the same
+ */
+static inline
+void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl)
+{
+ x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false);
+}
+
+/* AMD specific Speculative Store Bypass MSR data */
+extern u64 x86_amd_ls_cfg_base;
+extern u64 x86_amd_ls_cfg_ssbd_mask;
+
+static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl)
+{
+ BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT);
+ return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT);
+}
+
+static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn)
+{
+ return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL;
+}
+
+#ifdef CONFIG_SMP
+extern void speculative_store_bypass_ht_init(void);
+#else
+static inline void speculative_store_bypass_ht_init(void) { }
+#endif
+
+extern void speculative_store_bypass_update(unsigned long tif);
+
+static inline void speculative_store_bypass_update_current(void)
+{
+ speculative_store_bypass_update(current_thread_info()->flags);
+}
+
+#endif
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index b65c6705d6b9..51b0c652d02f 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -90,6 +90,7 @@ struct thread_info {
#define TIF_SIGPENDING 2 /* signal pending */
#define TIF_NEED_RESCHED 3 /* rescheduling necessary */
#define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/
+#define TIF_SSBD 5 /* Reduced data speculation */
#define TIF_SYSCALL_EMU 6 /* syscall emulation active */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_SECCOMP 8 /* secure computing */
@@ -112,8 +113,9 @@ struct thread_info {
#define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
#define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME)
#define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
-#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
#define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+#define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP)
+#define _TIF_SSBD (1 << TIF_SSBD)
#define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT)
#define _TIF_SECCOMP (1 << TIF_SECCOMP)
@@ -148,7 +150,7 @@ struct thread_info {
/* flags to check in __switch_to() */
#define _TIF_WORK_CTXSW \
- (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP)
+ (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD)
#define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY)
#define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW)
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 99185a064978..686a58d793e5 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -111,6 +111,16 @@ static inline void cr4_clear_bits(unsigned long mask)
}
}
+static inline void cr4_toggle_bits(unsigned long mask)
+{
+ unsigned long cr4;
+
+ cr4 = this_cpu_read(cpu_tlbstate.cr4);
+ cr4 ^= mask;
+ this_cpu_write(cpu_tlbstate.cr4, cr4);
+ __write_cr4(cr4);
+}
+
/* Read the CR4 shadow. */
static inline unsigned long cr4_read_shadow(void)
{
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index c6583efdbdaf..76cf21f887bd 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1403,7 +1403,7 @@ void setup_local_APIC(void)
* TODO: set up through-local-APIC from through-I/O-APIC? --macro
*/
value = apic_read(APIC_LVT0) & APIC_LVT_MASKED;
- if (!cpu && (pic_mode || !value)) {
+ if (!cpu && (pic_mode || !value || skip_ioapic_setup)) {
value = APIC_DM_EXTINT;
apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu);
} else {
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index c375bc672f82..4c2be99fa0fb 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cpu.h>
+#include <asm/spec-ctrl.h>
#include <asm/smp.h>
#include <asm/pci-direct.h>
#include <asm/delay.h>
@@ -542,6 +543,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c)
rdmsrl(MSR_FAM10H_NODE_ID, value);
nodes_per_socket = ((value >> 3) & 7) + 1;
}
+
+ if (c->x86 >= 0x15 && c->x86 <= 0x17) {
+ unsigned int bit;
+
+ switch (c->x86) {
+ case 0x15: bit = 54; break;
+ case 0x16: bit = 33; break;
+ case 0x17: bit = 10; break;
+ default: return;
+ }
+ /*
+ * Try to cache the base value so further operations can
+ * avoid RMW. If that faults, do not enable SSBD.
+ */
+ if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) {
+ setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD);
+ setup_force_cpu_cap(X86_FEATURE_SSBD);
+ x86_amd_ls_cfg_ssbd_mask = 1ULL << bit;
+ }
+ }
}
static void early_init_amd(struct cpuinfo_x86 *c)
@@ -728,6 +749,17 @@ static void init_amd_bd(struct cpuinfo_x86 *c)
}
}
+static void init_amd_zn(struct cpuinfo_x86 *c)
+{
+ set_cpu_cap(c, X86_FEATURE_ZEN);
+ /*
+ * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects
+ * all up to and including B1.
+ */
+ if (c->x86_model <= 1 && c->x86_stepping <= 1)
+ set_cpu_cap(c, X86_FEATURE_CPB);
+}
+
static void init_amd(struct cpuinfo_x86 *c)
{
u32 dummy;
@@ -758,6 +790,7 @@ static void init_amd(struct cpuinfo_x86 *c)
case 0x10: init_amd_gh(c); break;
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
+ case 0x17: init_amd_zn(c); break;
}
/* Enable workaround for FXSAVE leak */
@@ -824,8 +857,9 @@ static void init_amd(struct cpuinfo_x86 *c)
if (cpu_has(c, X86_FEATURE_3DNOW) || cpu_has(c, X86_FEATURE_LM))
set_cpu_cap(c, X86_FEATURE_3DNOWPREFETCH);
- /* AMD CPUs don't reset SS attributes on SYSRET */
- set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ /* AMD CPUs don't reset SS attributes on SYSRET, Xen does. */
+ if (!cpu_has(c, X86_FEATURE_XENPV))
+ set_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index b8b0b6e78371..86af9b1b049d 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -11,8 +11,10 @@
#include <linux/utsname.h>
#include <linux/cpu.h>
#include <linux/module.h>
+#include <linux/nospec.h>
+#include <linux/prctl.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/cmdline.h>
#include <asm/bugs.h>
#include <asm/processor.h>
@@ -26,6 +28,27 @@
#include <asm/intel-family.h>
static void __init spectre_v2_select_mitigation(void);
+static void __init ssb_select_mitigation(void);
+
+/*
+ * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any
+ * writes to SPEC_CTRL contain whatever reserved bits have been set.
+ */
+u64 __ro_after_init x86_spec_ctrl_base;
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/*
+ * The vendor and possibly platform specific bits which can be modified in
+ * x86_spec_ctrl_base.
+ */
+static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+
+/*
+ * AMD specific MSR info for Speculative Store Bypass control.
+ * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu().
+ */
+u64 __ro_after_init x86_amd_ls_cfg_base;
+u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask;
void __init check_bugs(void)
{
@@ -36,9 +59,27 @@ void __init check_bugs(void)
print_cpu_info(&boot_cpu_data);
}
+ /*
+ * Read the SPEC_CTRL MSR to account for reserved bits which may
+ * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD
+ * init code as it is not enumerated and depends on the family.
+ */
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ /* Allow STIBP in MSR_SPEC_CTRL if supported */
+ if (boot_cpu_has(X86_FEATURE_STIBP))
+ x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
+
/* Select the proper spectre mitigation before patching alternatives */
spectre_v2_select_mitigation();
+ /*
+ * Select proper mitigation for any exposure to the Speculative Store
+ * Bypass vulnerability.
+ */
+ ssb_select_mitigation();
+
#ifdef CONFIG_X86_32
/*
* Check whether we are able to run this kernel safely on SMP.
@@ -92,7 +133,76 @@ static const char *spectre_v2_strings[] = {
#undef pr_fmt
#define pr_fmt(fmt) "Spectre V2 : " fmt
-static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE;
+static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
+ SPECTRE_V2_NONE;
+
+void
+x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
+{
+ u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ struct thread_info *ti = current_thread_info();
+
+ /* Is MSR_SPEC_CTRL implemented ? */
+ if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
+ /*
+ * Restrict guest_spec_ctrl to supported values. Clear the
+ * modifiable bits in the host base value and or the
+ * modifiable bits from the guest value.
+ */
+ guestval = hostval & ~x86_spec_ctrl_mask;
+ guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
+
+ /* SSBD controlled in MSR_SPEC_CTRL */
+ if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD))
+ hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
+
+ if (hostval != guestval) {
+ msrval = setguest ? guestval : hostval;
+ wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
+ }
+ }
+
+ /*
+ * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update
+ * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported.
+ */
+ if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) &&
+ !static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ return;
+
+ /*
+ * If the host has SSBD mitigation enabled, force it in the host's
+ * virtual MSR value. If its not permanently enabled, evaluate
+ * current's TIF_SSBD thread flag.
+ */
+ if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE))
+ hostval = SPEC_CTRL_SSBD;
+ else
+ hostval = ssbd_tif_to_spec_ctrl(ti->flags);
+
+ /* Sanitize the guest value */
+ guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD;
+
+ if (hostval != guestval) {
+ unsigned long tif;
+
+ tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) :
+ ssbd_spec_ctrl_to_tif(hostval);
+
+ speculative_store_bypass_update(tif);
+ }
+}
+EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl);
+
+static void x86_amd_ssb_disable(void)
+{
+ u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask;
+
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD);
+ else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ wrmsrl(MSR_AMD64_LS_CFG, msrval);
+}
#ifdef RETPOLINE
static bool spectre_v2_bad_module;
@@ -311,32 +421,289 @@ retpoline_auto:
}
#undef pr_fmt
+#define pr_fmt(fmt) "Speculative Store Bypass: " fmt
+
+static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE;
+
+/* The kernel command line selection */
+enum ssb_mitigation_cmd {
+ SPEC_STORE_BYPASS_CMD_NONE,
+ SPEC_STORE_BYPASS_CMD_AUTO,
+ SPEC_STORE_BYPASS_CMD_ON,
+ SPEC_STORE_BYPASS_CMD_PRCTL,
+ SPEC_STORE_BYPASS_CMD_SECCOMP,
+};
+
+static const char *ssb_strings[] = {
+ [SPEC_STORE_BYPASS_NONE] = "Vulnerable",
+ [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled",
+ [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl",
+ [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp",
+};
+
+static const struct {
+ const char *option;
+ enum ssb_mitigation_cmd cmd;
+} ssb_mitigation_options[] = {
+ { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */
+ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */
+ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */
+ { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */
+ { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */
+};
+
+static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void)
+{
+ enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO;
+ char arg[20];
+ int ret, i;
+
+ if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) {
+ return SPEC_STORE_BYPASS_CMD_NONE;
+ } else {
+ ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable",
+ arg, sizeof(arg));
+ if (ret < 0)
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+
+ for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) {
+ if (!match_option(arg, ret, ssb_mitigation_options[i].option))
+ continue;
+
+ cmd = ssb_mitigation_options[i].cmd;
+ break;
+ }
+
+ if (i >= ARRAY_SIZE(ssb_mitigation_options)) {
+ pr_err("unknown option (%s). Switching to AUTO select\n", arg);
+ return SPEC_STORE_BYPASS_CMD_AUTO;
+ }
+ }
+
+ return cmd;
+}
+
+static enum ssb_mitigation __init __ssb_select_mitigation(void)
+{
+ enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE;
+ enum ssb_mitigation_cmd cmd;
+
+ if (!boot_cpu_has(X86_FEATURE_SSBD))
+ return mode;
+
+ cmd = ssb_parse_cmdline();
+ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) &&
+ (cmd == SPEC_STORE_BYPASS_CMD_NONE ||
+ cmd == SPEC_STORE_BYPASS_CMD_AUTO))
+ return mode;
+
+ switch (cmd) {
+ case SPEC_STORE_BYPASS_CMD_AUTO:
+ case SPEC_STORE_BYPASS_CMD_SECCOMP:
+ /*
+ * Choose prctl+seccomp as the default mode if seccomp is
+ * enabled.
+ */
+ if (IS_ENABLED(CONFIG_SECCOMP))
+ mode = SPEC_STORE_BYPASS_SECCOMP;
+ else
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_ON:
+ mode = SPEC_STORE_BYPASS_DISABLE;
+ break;
+ case SPEC_STORE_BYPASS_CMD_PRCTL:
+ mode = SPEC_STORE_BYPASS_PRCTL;
+ break;
+ case SPEC_STORE_BYPASS_CMD_NONE:
+ break;
+ }
+
+ /*
+ * We have three CPU feature flags that are in play here:
+ * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
+ * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass
+ * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation
+ */
+ if (mode == SPEC_STORE_BYPASS_DISABLE) {
+ setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
+ /*
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
+ * a completely different MSR and bit dependent on family.
+ */
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_INTEL:
+ x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
+ x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ break;
+ case X86_VENDOR_AMD:
+ x86_amd_ssb_disable();
+ break;
+ }
+ }
+
+ return mode;
+}
+
+static void ssb_select_mitigation(void)
+{
+ ssb_mode = __ssb_select_mitigation();
+
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ pr_info("%s\n", ssb_strings[ssb_mode]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Speculation prctl: " fmt
+
+static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
+{
+ bool update;
+
+ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
+ ssb_mode != SPEC_STORE_BYPASS_SECCOMP)
+ return -ENXIO;
+
+ switch (ctrl) {
+ case PR_SPEC_ENABLE:
+ /* If speculation is force disabled, enable is not allowed */
+ if (task_spec_ssb_force_disable(task))
+ return -EPERM;
+ task_clear_spec_ssb_disable(task);
+ update = test_and_clear_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_DISABLE:
+ task_set_spec_ssb_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ case PR_SPEC_FORCE_DISABLE:
+ task_set_spec_ssb_disable(task);
+ task_set_spec_ssb_force_disable(task);
+ update = !test_and_set_tsk_thread_flag(task, TIF_SSBD);
+ break;
+ default:
+ return -ERANGE;
+ }
+
+ /*
+ * If being set on non-current task, delay setting the CPU
+ * mitigation until it is next scheduled.
+ */
+ if (task == current && update)
+ speculative_store_bypass_update_current();
+
+ return 0;
+}
+
+int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which,
+ unsigned long ctrl)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_set(task, ctrl);
+ default:
+ return -ENODEV;
+ }
+}
+
+#ifdef CONFIG_SECCOMP
+void arch_seccomp_spec_mitigate(struct task_struct *task)
+{
+ if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
+ ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
+}
+#endif
+
+static int ssb_prctl_get(struct task_struct *task)
+{
+ switch (ssb_mode) {
+ case SPEC_STORE_BYPASS_DISABLE:
+ return PR_SPEC_DISABLE;
+ case SPEC_STORE_BYPASS_SECCOMP:
+ case SPEC_STORE_BYPASS_PRCTL:
+ if (task_spec_ssb_force_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
+ if (task_spec_ssb_disable(task))
+ return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
+ return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+ default:
+ if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS))
+ return PR_SPEC_ENABLE;
+ return PR_SPEC_NOT_AFFECTED;
+ }
+}
+
+int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
+{
+ switch (which) {
+ case PR_SPEC_STORE_BYPASS:
+ return ssb_prctl_get(task);
+ default:
+ return -ENODEV;
+ }
+}
+
+void x86_spec_ctrl_setup_ap(void)
+{
+ if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+
+ if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
+ x86_amd_ssb_disable();
+}
#ifdef CONFIG_SYSFS
-ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+
+static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
+ char *buf, unsigned int bug)
{
- if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN))
+ if (!boot_cpu_has_bug(bug))
return sprintf(buf, "Not affected\n");
- if (boot_cpu_has(X86_FEATURE_KAISER))
- return sprintf(buf, "Mitigation: PTI\n");
+
+ switch (bug) {
+ case X86_BUG_CPU_MELTDOWN:
+ if (boot_cpu_has(X86_FEATURE_KAISER))
+ return sprintf(buf, "Mitigation: PTI\n");
+
+ break;
+
+ case X86_BUG_SPECTRE_V1:
+ return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+
+ case X86_BUG_SPECTRE_V2:
+ return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
+ boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
+ boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
+ spectre_v2_module_string());
+
+ case X86_BUG_SPEC_STORE_BYPASS:
+ return sprintf(buf, "%s\n", ssb_strings[ssb_mode]);
+
+ default:
+ break;
+ }
+
return sprintf(buf, "Vulnerable\n");
}
+ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN);
+}
+
ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1))
- return sprintf(buf, "Not affected\n");
- return sprintf(buf, "Mitigation: __user pointer sanitization\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1);
}
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf)
{
- if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
- return sprintf(buf, "Not affected\n");
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2);
+}
- return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled],
- boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "",
- boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "",
- spectre_v2_module_string());
+ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS);
}
#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 301bbd1f2373..b0fd028b2eee 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -725,17 +725,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
* and they also have a different bit for STIBP support. Also,
* a hypervisor might have set the individual AMD bits even on
* Intel CPUs, for finer-grained selection of what's available.
- *
- * We use the AMD bits in 0x8000_0008 EBX as the generic hardware
- * features, which are visible in /proc/cpuinfo and used by the
- * kernel. So set those accordingly from the Intel bits.
*/
if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) {
set_cpu_cap(c, X86_FEATURE_IBRS);
set_cpu_cap(c, X86_FEATURE_IBPB);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
+
if (cpu_has(c, X86_FEATURE_INTEL_STIBP))
set_cpu_cap(c, X86_FEATURE_STIBP);
+
+ if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) ||
+ cpu_has(c, X86_FEATURE_VIRT_SSBD))
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBRS)) {
+ set_cpu_cap(c, X86_FEATURE_IBRS);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
+
+ if (cpu_has(c, X86_FEATURE_AMD_IBPB))
+ set_cpu_cap(c, X86_FEATURE_IBPB);
+
+ if (cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ set_cpu_cap(c, X86_FEATURE_STIBP);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ }
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -879,21 +894,55 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = {
{}
};
-static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c)
+static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = {
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL },
+ { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM },
+ { X86_VENDOR_CENTAUR, 5, },
+ { X86_VENDOR_INTEL, 5, },
+ { X86_VENDOR_NSC, 5, },
+ { X86_VENDOR_AMD, 0x12, },
+ { X86_VENDOR_AMD, 0x11, },
+ { X86_VENDOR_AMD, 0x10, },
+ { X86_VENDOR_AMD, 0xf, },
+ { X86_VENDOR_ANY, 4, },
+ {}
+};
+
+static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = 0;
- if (x86_match_cpu(cpu_no_meltdown))
- return false;
-
if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
+ if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
+ !(ia32_cap & ARCH_CAP_SSB_NO))
+ setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
+
+ if (x86_match_cpu(cpu_no_speculation))
+ return;
+
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
+ setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
+
+ if (x86_match_cpu(cpu_no_meltdown))
+ return;
+
/* Rogue Data Cache Load? No! */
if (ia32_cap & ARCH_CAP_RDCL_NO)
- return false;
+ return;
- return true;
+ setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
}
/*
@@ -942,12 +991,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
setup_force_cpu_cap(X86_FEATURE_ALWAYS);
- if (!x86_match_cpu(cpu_no_speculation)) {
- if (cpu_vulnerable_to_meltdown(c))
- setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V1);
- setup_force_cpu_bug(X86_BUG_SPECTRE_V2);
- }
+ cpu_set_bug_bits(c);
fpu__init_system(c);
@@ -1315,6 +1359,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c)
#endif
mtrr_ap_init();
validate_apic_and_package_id(c);
+ x86_spec_ctrl_setup_ap();
}
struct msr_range {
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 2584265d4745..3b19d82f7932 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -46,4 +46,7 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+
+extern void x86_spec_ctrl_setup_ap(void);
+
#endif /* ARCH_X86_CPU_H */
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8fb1d6522f8e..93781e3f05b2 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -153,7 +153,10 @@ static void early_init_intel(struct cpuinfo_x86 *c)
setup_clear_cpu_cap(X86_FEATURE_IBPB);
setup_clear_cpu_cap(X86_FEATURE_STIBP);
setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL);
+ setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL);
setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP);
+ setup_clear_cpu_cap(X86_FEATURE_SSBD);
+ setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD);
}
/*
diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c
index 3fe45f84ced4..7a07b15b451c 100644
--- a/arch/x86/kernel/devicetree.c
+++ b/arch/x86/kernel/devicetree.c
@@ -11,6 +11,7 @@
#include <linux/of_address.h>
#include <linux/of_platform.h>
#include <linux/of_irq.h>
+#include <linux/libfdt.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/of_pci.h>
@@ -199,19 +200,22 @@ static struct of_ioapic_type of_ioapic_type[] =
static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq,
unsigned int nr_irqs, void *arg)
{
- struct of_phandle_args *irq_data = (void *)arg;
+ struct irq_fwspec *fwspec = (struct irq_fwspec *)arg;
struct of_ioapic_type *it;
struct irq_alloc_info tmp;
+ int type_index;
- if (WARN_ON(irq_data->args_count < 2))
+ if (WARN_ON(fwspec->param_count < 2))
return -EINVAL;
- if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type))
+
+ type_index = fwspec->param[1];
+ if (type_index >= ARRAY_SIZE(of_ioapic_type))
return -EINVAL;
- it = &of_ioapic_type[irq_data->args[1]];
+ it = &of_ioapic_type[type_index];
ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity);
tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain));
- tmp.ioapic_pin = irq_data->args[0];
+ tmp.ioapic_pin = fwspec->param[0];
return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp);
}
@@ -276,14 +280,15 @@ static void __init x86_flattree_get_config(void)
map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128);
- initial_boot_params = dt = early_memremap(initial_dtb, map_len);
- size = of_get_flat_dt_size();
+ dt = early_memremap(initial_dtb, map_len);
+ size = fdt_totalsize(dt);
if (map_len < size) {
early_memunmap(dt, map_len);
- initial_boot_params = dt = early_memremap(initial_dtb, size);
+ dt = early_memremap(initial_dtb, size);
map_len = size;
}
+ early_init_dt_verify(dt);
unflatten_and_copy_device_tree();
early_memunmap(dt, map_len);
}
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 469b23d6acc2..fd7e9937ddd6 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -71,12 +71,17 @@ static void load_segments(void)
static void machine_kexec_free_page_tables(struct kimage *image)
{
free_page((unsigned long)image->arch.pgd);
+ image->arch.pgd = NULL;
#ifdef CONFIG_X86_PAE
free_page((unsigned long)image->arch.pmd0);
+ image->arch.pmd0 = NULL;
free_page((unsigned long)image->arch.pmd1);
+ image->arch.pmd1 = NULL;
#endif
free_page((unsigned long)image->arch.pte0);
+ image->arch.pte0 = NULL;
free_page((unsigned long)image->arch.pte1);
+ image->arch.pte1 = NULL;
}
static int machine_kexec_alloc_page_tables(struct kimage *image)
@@ -93,7 +98,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image)
!image->arch.pmd0 || !image->arch.pmd1 ||
#endif
!image->arch.pte0 || !image->arch.pte1) {
- machine_kexec_free_page_tables(image);
return -ENOMEM;
}
return 0;
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index a5784a14f8d1..eae59cad0b07 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -37,8 +37,11 @@ static struct kexec_file_ops *kexec_file_loaders[] = {
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.pud);
+ image->arch.pud = NULL;
free_page((unsigned long)image->arch.pmd);
+ image->arch.pmd = NULL;
free_page((unsigned long)image->arch.pte);
+ image->arch.pte = NULL;
}
static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
@@ -79,7 +82,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
return 0;
err:
- free_transition_pgtable(image);
return result;
}
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index a55b32007785..00a9047539d7 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -33,6 +33,7 @@
#include <asm/mce.h>
#include <asm/vm86.h>
#include <asm/switch_to.h>
+#include <asm/spec-ctrl.h>
/*
* per-CPU TSS segments. Threads are completely 'soft' on Linux,
@@ -134,11 +135,6 @@ void flush_thread(void)
fpu__clear(&tsk->thread.fpu);
}
-static void hard_disable_TSC(void)
-{
- cr4_set_bits(X86_CR4_TSD);
-}
-
void disable_TSC(void)
{
preempt_disable();
@@ -147,15 +143,10 @@ void disable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_disable_TSC();
+ cr4_set_bits(X86_CR4_TSD);
preempt_enable();
}
-static void hard_enable_TSC(void)
-{
- cr4_clear_bits(X86_CR4_TSD);
-}
-
static void enable_TSC(void)
{
preempt_disable();
@@ -164,7 +155,7 @@ static void enable_TSC(void)
* Must flip the CPU state synchronously with
* TIF_NOTSC in the current running context.
*/
- hard_enable_TSC();
+ cr4_clear_bits(X86_CR4_TSD);
preempt_enable();
}
@@ -192,48 +183,199 @@ int set_tsc_mode(unsigned int val)
return 0;
}
-void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
- struct tss_struct *tss)
+static inline void switch_to_bitmap(struct tss_struct *tss,
+ struct thread_struct *prev,
+ struct thread_struct *next,
+ unsigned long tifp, unsigned long tifn)
{
- struct thread_struct *prev, *next;
-
- prev = &prev_p->thread;
- next = &next_p->thread;
-
- if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^
- test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) {
- unsigned long debugctl = get_debugctlmsr();
-
- debugctl &= ~DEBUGCTLMSR_BTF;
- if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP))
- debugctl |= DEBUGCTLMSR_BTF;
-
- update_debugctlmsr(debugctl);
- }
-
- if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^
- test_tsk_thread_flag(next_p, TIF_NOTSC)) {
- /* prev and next are different */
- if (test_tsk_thread_flag(next_p, TIF_NOTSC))
- hard_disable_TSC();
- else
- hard_enable_TSC();
- }
-
- if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) {
+ if (tifn & _TIF_IO_BITMAP) {
/*
* Copy the relevant range of the IO bitmap.
* Normally this is 128 bytes or less:
*/
memcpy(tss->io_bitmap, next->io_bitmap_ptr,
max(prev->io_bitmap_max, next->io_bitmap_max));
- } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) {
+ } else if (tifp & _TIF_IO_BITMAP) {
/*
* Clear any possible leftover bits:
*/
memset(tss->io_bitmap, 0xff, prev->io_bitmap_max);
}
+}
+
+#ifdef CONFIG_SMP
+
+struct ssb_state {
+ struct ssb_state *shared_state;
+ raw_spinlock_t lock;
+ unsigned int disable_state;
+ unsigned long local_state;
+};
+
+#define LSTATE_SSB 0
+
+static DEFINE_PER_CPU(struct ssb_state, ssb_state);
+
+void speculative_store_bypass_ht_init(void)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ unsigned int this_cpu = smp_processor_id();
+ unsigned int cpu;
+
+ st->local_state = 0;
+
+ /*
+ * Shared state setup happens once on the first bringup
+ * of the CPU. It's not destroyed on CPU hotunplug.
+ */
+ if (st->shared_state)
+ return;
+
+ raw_spin_lock_init(&st->lock);
+
+ /*
+ * Go over HT siblings and check whether one of them has set up the
+ * shared state pointer already.
+ */
+ for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) {
+ if (cpu == this_cpu)
+ continue;
+
+ if (!per_cpu(ssb_state, cpu).shared_state)
+ continue;
+
+ /* Link it to the state of the sibling: */
+ st->shared_state = per_cpu(ssb_state, cpu).shared_state;
+ return;
+ }
+
+ /*
+ * First HT sibling to come up on the core. Link shared state of
+ * the first HT sibling to itself. The siblings on the same core
+ * which come up later will see the shared state pointer and link
+ * themself to the state of this CPU.
+ */
+ st->shared_state = st;
+}
+
+/*
+ * Logic is: First HT sibling enables SSBD for both siblings in the core
+ * and last sibling to disable it, disables it for the whole core. This how
+ * MSR_SPEC_CTRL works in "hardware":
+ *
+ * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL
+ */
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ struct ssb_state *st = this_cpu_ptr(&ssb_state);
+ u64 msr = x86_amd_ls_cfg_base;
+
+ if (!static_cpu_has(X86_FEATURE_ZEN)) {
+ msr |= ssbd_tif_to_amd_ls_cfg(tifn);
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ return;
+ }
+
+ if (tifn & _TIF_SSBD) {
+ /*
+ * Since this can race with prctl(), block reentry on the
+ * same CPU.
+ */
+ if (__test_and_set_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ msr |= x86_amd_ls_cfg_ssbd_mask;
+
+ raw_spin_lock(&st->shared_state->lock);
+ /* First sibling enables SSBD: */
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ st->shared_state->disable_state++;
+ raw_spin_unlock(&st->shared_state->lock);
+ } else {
+ if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state))
+ return;
+
+ raw_spin_lock(&st->shared_state->lock);
+ st->shared_state->disable_state--;
+ if (!st->shared_state->disable_state)
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+ raw_spin_unlock(&st->shared_state->lock);
+ }
+}
+#else
+static __always_inline void amd_set_core_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn);
+
+ wrmsrl(MSR_AMD64_LS_CFG, msr);
+}
+#endif
+
+static __always_inline void amd_set_ssb_virt_state(unsigned long tifn)
+{
+ /*
+ * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL,
+ * so ssbd_tif_to_spec_ctrl() just works.
+ */
+ wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn));
+}
+
+static __always_inline void intel_set_ssb_state(unsigned long tifn)
+{
+ u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn);
+
+ wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+}
+
+static __always_inline void __speculative_store_bypass_update(unsigned long tifn)
+{
+ if (static_cpu_has(X86_FEATURE_VIRT_SSBD))
+ amd_set_ssb_virt_state(tifn);
+ else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ amd_set_core_ssb_state(tifn);
+ else
+ intel_set_ssb_state(tifn);
+}
+
+void speculative_store_bypass_update(unsigned long tif)
+{
+ preempt_disable();
+ __speculative_store_bypass_update(tif);
+ preempt_enable();
+}
+
+void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p,
+ struct tss_struct *tss)
+{
+ struct thread_struct *prev, *next;
+ unsigned long tifp, tifn;
+
+ prev = &prev_p->thread;
+ next = &next_p->thread;
+
+ tifn = READ_ONCE(task_thread_info(next_p)->flags);
+ tifp = READ_ONCE(task_thread_info(prev_p)->flags);
+ switch_to_bitmap(tss, prev, next, tifp, tifn);
+
propagate_user_return_notify(prev_p, next_p);
+
+ if ((tifp & _TIF_BLOCKSTEP || tifn & _TIF_BLOCKSTEP) &&
+ arch_has_block_step()) {
+ unsigned long debugctl, msk;
+
+ rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ debugctl &= ~DEBUGCTLMSR_BTF;
+ msk = tifn & _TIF_BLOCKSTEP;
+ debugctl |= (msk >> TIF_BLOCKSTEP) << DEBUGCTLMSR_BTF_SHIFT;
+ wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl);
+ }
+
+ if ((tifp ^ tifn) & _TIF_NOTSC)
+ cr4_toggle_bits(X86_CR4_TSD);
+
+ if ((tifp ^ tifn) & _TIF_SSBD)
+ __speculative_store_bypass_update(tifn);
}
/*
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 83929cc47a4b..10b22fc6ef5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -75,6 +75,7 @@
#include <asm/i8259.h>
#include <asm/realmode.h>
#include <asm/misc.h>
+#include <asm/spec-ctrl.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
@@ -229,6 +230,8 @@ static void notrace start_secondary(void *unused)
*/
check_tsc_sync_target();
+ speculative_store_bypass_ht_init();
+
/*
* Lock vector_lock and initialize the vectors on this cpu
* before setting the cpu online. We must set it online with
@@ -1325,6 +1328,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
set_mtrr_aps_delayed_init();
smp_quirk_init_udelay();
+
+ speculative_store_bypass_ht_init();
}
void arch_enable_nonboot_cpus_begin(void)
@@ -1492,6 +1497,7 @@ static void remove_siblinginfo(int cpu)
cpumask_clear(topology_core_cpumask(cpu));
c->phys_proc_id = 0;
c->cpu_core_id = 0;
+ c->booted_cores = 0;
cpumask_clear_cpu(cpu, cpu_sibling_setup_mask);
recompute_smt_state();
}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index da6a287a11e4..769c370011d6 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -24,6 +24,7 @@
#include <asm/geode.h>
#include <asm/apic.h>
#include <asm/intel-family.h>
+#include <asm/i8259.h>
unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */
EXPORT_SYMBOL(cpu_khz);
@@ -456,6 +457,20 @@ static unsigned long pit_calibrate_tsc(u32 latch, unsigned long ms, int loopmin)
unsigned long tscmin, tscmax;
int pitcnt;
+ if (!has_legacy_pic()) {
+ /*
+ * Relies on tsc_early_delay_calibrate() to have given us semi
+ * usable udelay(), wait for the same 50ms we would have with
+ * the PIT loop below.
+ */
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ udelay(10 * USEC_PER_MSEC);
+ return ULONG_MAX;
+ }
+
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
@@ -580,6 +595,9 @@ static unsigned long quick_pit_calibrate(void)
u64 tsc, delta;
unsigned long d1, d2;
+ if (!has_legacy_pic())
+ return 0;
+
/* Set the Gate high, disable speaker */
outb((inb(0x61) & ~0x02) | 0x01, 0x61);
diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 93f924de06cf..7e5119c1d15c 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -357,7 +357,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
- F(IBPB) | F(IBRS);
+ F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@@ -382,7 +382,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
- F(SPEC_CTRL) | F(ARCH_CAPABILITIES);
+ F(SPEC_CTRL) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@@ -468,6 +468,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
+ /*
+ * We emulate ARCH_CAPABILITIES in software even
+ * if the host doesn't support it.
+ */
+ entry->edx |= F(ARCH_CAPABILITIES);
} else {
entry->ebx = 0;
entry->ecx = 0;
@@ -618,13 +623,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
g_phys_as = phys_as;
entry->eax = g_phys_as | (virt_as << 8);
entry->edx = 0;
- /* IBRS and IBPB aren't necessarily present in hardware cpuid */
- if (boot_cpu_has(X86_FEATURE_IBPB))
- entry->ebx |= F(IBPB);
- if (boot_cpu_has(X86_FEATURE_IBRS))
- entry->ebx |= F(IBRS);
+ /*
+ * IBRS, IBPB and VIRT_SSBD aren't necessarily present in
+ * hardware cpuid
+ */
+ if (boot_cpu_has(X86_FEATURE_AMD_IBPB))
+ entry->ebx |= F(AMD_IBPB);
+ if (boot_cpu_has(X86_FEATURE_AMD_IBRS))
+ entry->ebx |= F(AMD_IBRS);
+ if (boot_cpu_has(X86_FEATURE_VIRT_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX);
+ if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD))
+ entry->ebx |= F(VIRT_SSBD);
break;
}
case 0x80000019:
diff --git a/arch/x86/kvm/cpuid.h b/arch/x86/kvm/cpuid.h
index d1beb7156704..c38369781239 100644
--- a/arch/x86/kvm/cpuid.h
+++ b/arch/x86/kvm/cpuid.h
@@ -165,21 +165,21 @@ static inline bool guest_cpuid_has_ibpb(struct kvm_vcpu *vcpu)
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best && (best->ebx & bit(X86_FEATURE_IBPB)))
+ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBPB)))
return true;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
}
-static inline bool guest_cpuid_has_ibrs(struct kvm_vcpu *vcpu)
+static inline bool guest_cpuid_has_spec_ctrl(struct kvm_vcpu *vcpu)
{
struct kvm_cpuid_entry2 *best;
best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
- if (best && (best->ebx & bit(X86_FEATURE_IBRS)))
+ if (best && (best->ebx & bit(X86_FEATURE_AMD_IBRS)))
return true;
best = kvm_find_cpuid_entry(vcpu, 7, 0);
- return best && (best->edx & bit(X86_FEATURE_SPEC_CTRL));
+ return best && (best->edx & (bit(X86_FEATURE_SPEC_CTRL) | bit(X86_FEATURE_SSBD)));
}
static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
@@ -190,6 +190,15 @@ static inline bool guest_cpuid_has_arch_capabilities(struct kvm_vcpu *vcpu)
return best && (best->edx & bit(X86_FEATURE_ARCH_CAPABILITIES));
}
+static inline bool guest_cpuid_has_virt_ssbd(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *best;
+
+ best = kvm_find_cpuid_entry(vcpu, 0x80000008, 0);
+ return best && (best->ebx & bit(X86_FEATURE_VIRT_SSBD));
+}
+
+
/*
* NRIPS is provided through cpuidfn 0x8000000a.edx bit 3
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 1015f4c3b9ed..09c6e49e459c 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -299,8 +299,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu)
if (!lapic_in_kernel(vcpu))
return;
+ /*
+ * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation)
+ * which doesn't have EOI register; Some buggy OSes (e.g. Windows with
+ * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC
+ * version first and level-triggered interrupts never get EOIed in
+ * IOAPIC.
+ */
feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0);
- if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))))
+ if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) &&
+ !ioapic_in_kernel(vcpu->kvm))
v |= APIC_LVR_DIRECTED_EOI;
kvm_lapic_set_reg(apic, APIC_LVR, v);
}
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index aaa93b4b0380..a27f9e442ffc 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -45,7 +45,7 @@
#include <asm/kvm_para.h>
#include <asm/irq_remapping.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include <asm/virtext.h>
#include "trace.h"
@@ -185,6 +185,12 @@ struct vcpu_svm {
} host;
u64 spec_ctrl;
+ /*
+ * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
+ * translated into the appropriate L2_CFG bits on the host to
+ * perform speculative control.
+ */
+ u64 virt_spec_ctrl;
u32 *msrpm;
@@ -1561,6 +1567,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
u32 eax = 1;
svm->spec_ctrl = 0;
+ svm->virt_spec_ctrl = 0;
if (!init_event) {
svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE |
@@ -3545,11 +3552,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has_ibrs(vcpu))
+ !guest_cpuid_has_spec_ctrl(vcpu))
return 1;
msr_info->data = svm->spec_ctrl;
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr_info->host_initiated &&
+ !guest_cpuid_has_virt_ssbd(vcpu))
+ return 1;
+
+ msr_info->data = svm->virt_spec_ctrl;
+ break;
case MSR_IA32_UCODE_REV:
msr_info->data = 0x01000065;
break;
@@ -3643,7 +3657,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr->host_initiated &&
- !guest_cpuid_has_ibrs(vcpu))
+ !guest_cpuid_has_spec_ctrl(vcpu))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
@@ -3684,6 +3698,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
break;
set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1);
break;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ if (!msr->host_initiated &&
+ !guest_cpuid_has_virt_ssbd(vcpu))
+ return 1;
+
+ if (data & ~SPEC_CTRL_SSBD)
+ return 1;
+
+ svm->virt_spec_ctrl = data;
+ break;
case MSR_STAR:
svm->vmcb->save.star = data;
break;
@@ -4917,8 +4941,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl);
+ x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl);
asm volatile (
"push %%" _ASM_BP "; \n\t"
@@ -5012,6 +5035,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
#endif
);
+ /* Eliminate branch target predictions from guest mode */
+ vmexit_fill_RSB();
+
+#ifdef CONFIG_X86_64
+ wrmsrl(MSR_GS_BASE, svm->host.gs_base);
+#else
+ loadsegment(fs, svm->host.fs);
+#ifndef CONFIG_X86_32_LAZY_GS
+ loadsegment(gs, svm->host.gs);
+#endif
+#endif
+
/*
* We do not use IBRS in the kernel. If this vCPU has used the
* SPEC_CTRL MSR it may have left it on; save the value and
@@ -5030,20 +5065,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (svm->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
-
- /* Eliminate branch target predictions from guest mode */
- vmexit_fill_RSB();
-
-#ifdef CONFIG_X86_64
- wrmsrl(MSR_GS_BASE, svm->host.gs_base);
-#else
- loadsegment(fs, svm->host.fs);
-#ifndef CONFIG_X86_32_LAZY_GS
- loadsegment(gs, svm->host.gs);
-#endif
-#endif
+ x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl);
reload_tss(vcpu);
@@ -5145,7 +5167,7 @@ static bool svm_cpu_has_accelerated_tpr(void)
return false;
}
-static bool svm_has_high_real_mode_segbase(void)
+static bool svm_has_emulated_msr(int index)
{
return true;
}
@@ -5462,7 +5484,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.hardware_enable = svm_hardware_enable,
.hardware_disable = svm_hardware_disable,
.cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr,
- .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase,
+ .has_emulated_msr = svm_has_emulated_msr,
.vcpu_create = svm_create_vcpu,
.vcpu_free = svm_free_vcpu,
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b978aeccda78..2827a9622d97 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -50,7 +50,7 @@
#include <asm/apic.h>
#include <asm/irq_remapping.h>
#include <asm/microcode.h>
-#include <asm/nospec-branch.h>
+#include <asm/spec-ctrl.h>
#include "trace.h"
#include "pmu.h"
@@ -2558,6 +2558,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
return;
}
+ WARN_ON_ONCE(vmx->emulation_required);
+
if (kvm_exception_is_soft(nr)) {
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmx->vcpu.arch.event_exit_inst_len);
@@ -3020,7 +3022,7 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has_ibrs(vcpu))
+ !guest_cpuid_has_spec_ctrl(vcpu))
return 1;
msr_info->data = to_vmx(vcpu)->spec_ctrl;
@@ -3137,11 +3139,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
case MSR_IA32_SPEC_CTRL:
if (!msr_info->host_initiated &&
- !guest_cpuid_has_ibrs(vcpu))
+ !guest_cpuid_has_spec_ctrl(vcpu))
return 1;
/* The STIBP bit doesn't fault even if it's not advertised */
- if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP))
+ if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD))
return 1;
vmx->spec_ctrl = data;
@@ -6430,12 +6432,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
goto out;
}
- if (err != EMULATE_DONE) {
- vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
- vcpu->run->internal.ndata = 0;
- return 0;
- }
+ if (err != EMULATE_DONE)
+ goto emulation_error;
+
+ if (vmx->emulation_required && !vmx->rmode.vm86_active &&
+ vcpu->arch.exception.pending)
+ goto emulation_error;
if (vcpu->arch.halt_request) {
vcpu->arch.halt_request = 0;
@@ -6451,6 +6453,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
out:
return ret;
+
+emulation_error:
+ vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION;
+ vcpu->run->internal.ndata = 0;
+ return 0;
}
static int __grow_ple_window(int val)
@@ -8691,9 +8699,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
}
}
-static bool vmx_has_high_real_mode_segbase(void)
+static bool vmx_has_emulated_msr(int index)
{
- return enable_unrestricted_guest || emulate_invalid_guest_state;
+ switch (index) {
+ case MSR_IA32_SMBASE:
+ /*
+ * We cannot do SMM unless we can run the guest in big
+ * real mode.
+ */
+ return enable_unrestricted_guest || emulate_invalid_guest_state;
+ case MSR_AMD64_VIRT_SPEC_CTRL:
+ /* This is AMD only. */
+ return false;
+ default:
+ return true;
+ }
}
static bool vmx_mpx_supported(void)
@@ -8916,10 +8936,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
* is no need to worry about the conditional branch over the wrmsr
* being speculatively taken.
*/
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl);
+ x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
vmx->__launched = vmx->loaded_vmcs->launched;
+
asm(
/* Store host registers */
"push %%" _ASM_DX "; push %%" _ASM_BP ";"
@@ -9055,8 +9075,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL)))
vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
- if (vmx->spec_ctrl)
- native_wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+ x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
/* Eliminate branch target predictions from guest mode */
vmexit_fill_RSB();
@@ -11347,7 +11366,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.hardware_enable = hardware_enable,
.hardware_disable = hardware_disable,
.cpu_has_accelerated_tpr = report_flexpriority,
- .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase,
+ .has_emulated_msr = vmx_has_emulated_msr,
.vcpu_create = vmx_create_vcpu,
.vcpu_free = vmx_free_vcpu,
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0bf6ed591f16..719a4a5f7708 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -1002,6 +1002,7 @@ static u32 emulated_msrs[] = {
MSR_IA32_MCG_CTL,
MSR_IA32_MCG_EXT_CTL,
MSR_IA32_SMBASE,
+ MSR_AMD64_VIRT_SPEC_CTRL,
};
static unsigned num_emulated_msrs;
@@ -2664,7 +2665,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
* fringe case that is not enabled except via specific settings
* of the module parameters.
*/
- r = kvm_x86_ops->cpu_has_high_real_mode_segbase();
+ r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE);
break;
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
@@ -4130,13 +4131,14 @@ long kvm_arch_vm_ioctl(struct file *filp,
mutex_unlock(&kvm->lock);
break;
case KVM_XEN_HVM_CONFIG: {
+ struct kvm_xen_hvm_config xhc;
r = -EFAULT;
- if (copy_from_user(&kvm->arch.xen_hvm_config, argp,
- sizeof(struct kvm_xen_hvm_config)))
+ if (copy_from_user(&xhc, argp, sizeof(xhc)))
goto out;
r = -EINVAL;
- if (kvm->arch.xen_hvm_config.flags)
+ if (xhc.flags)
goto out;
+ memcpy(&kvm->arch.xen_hvm_config, &xhc, sizeof(xhc));
r = 0;
break;
}
@@ -4226,14 +4228,8 @@ static void kvm_init_msr_list(void)
num_msrs_to_save = j;
for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) {
- switch (emulated_msrs[i]) {
- case MSR_IA32_SMBASE:
- if (!kvm_x86_ops->cpu_has_high_real_mode_segbase())
- continue;
- break;
- default:
- break;
- }
+ if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i]))
+ continue;
if (j < i)
emulated_msrs[j] = emulated_msrs[i];
@@ -7270,6 +7266,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
{
struct msr_data apic_base_msr;
int mmu_reset_needed = 0;
+ int cpuid_update_needed = 0;
int pending_vec, max_bits, idx;
struct desc_ptr dt;
@@ -7301,8 +7298,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
vcpu->arch.cr0 = sregs->cr0;
mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4;
+ cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) &
+ (X86_CR4_OSXSAVE | X86_CR4_PKE));
kvm_x86_ops->set_cr4(vcpu, sregs->cr4);
- if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE))
+ if (cpuid_update_needed)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 7df8e3a79dc0..d35d0e4bbf99 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1014,8 +1014,7 @@ void __init mem_init(void)
after_bootmem = 1;
/* Register memory areas for /proc/kcore */
- kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR,
- PAGE_SIZE, KCORE_OTHER);
+ kclist_add(&kcore_vsyscall, (void *)VSYSCALL_ADDR, PAGE_SIZE, KCORE_USER);
mem_init_print_info(NULL);
}
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index c1085c7ee212..1dd1c2ad4e58 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -287,9 +287,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address,
/*
* The .rodata section needs to be read-only. Using the pfn
- * catches all aliases.
+ * catches all aliases. This also includes __ro_after_init,
+ * so do not enforce until kernel_set_to_readonly is true.
*/
- if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
+ if (kernel_set_to_readonly &&
+ within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT,
__pa_symbol(__end_rodata) >> PAGE_SHIFT))
pgprot_val(forbidden) |= _PAGE_RW;
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index b97ef29c940f..a3b63e5a527c 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -1,5 +1,6 @@
#include <linux/mm.h>
#include <linux/gfp.h>
+#include <linux/hugetlb.h>
#include <asm/pgalloc.h>
#include <asm/pgtable.h>
#include <asm/tlb.h>
@@ -577,6 +578,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot)
(mtrr != MTRR_TYPE_WRBACK))
return 0;
+ /* Bail out if we are we on a populated non-leaf entry: */
+ if (pud_present(*pud) && !pud_huge(*pud))
+ return 0;
+
prot = pgprot_4k_2_large(prot);
set_pte((pte_t *)pud, pfn_pte(
@@ -605,6 +610,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot)
return 0;
}
+ /* Bail out if we are we on a populated non-leaf entry: */
+ if (pmd_present(*pmd) && !pmd_huge(*pmd))
+ return 0;
+
prot = pgprot_4k_2_large(prot);
set_pte((pte_t *)pmd, pfn_pte(
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index f88ce0e5efd9..0bbec041c003 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -95,26 +95,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey
*/
if (pkey != -1)
return pkey;
- /*
- * Look for a protection-key-drive execute-only mapping
- * which is now being given permissions that are not
- * execute-only. Move it back to the default pkey.
- */
- if (vma_is_pkey_exec_only(vma) &&
- (prot & (PROT_READ|PROT_WRITE))) {
- return 0;
- }
+
/*
* The mapping is execute-only. Go try to get the
* execute-only protection key. If we fail to do that,
* fall through as if we do not have execute-only
- * support.
+ * support in this mm.
*/
if (prot == PROT_EXEC) {
pkey = execute_only_pkey(vma->vm_mm);
if (pkey > 0)
return pkey;
+ } else if (vma_is_pkey_exec_only(vma)) {
+ /*
+ * Protections are *not* PROT_EXEC, but the mapping
+ * is using the exec-only pkey. This mapping was
+ * PROT_EXEC and will no longer be. Move back to
+ * the default pkey.
+ */
+ return ARCH_DEFAULT_PKEY;
}
+
/*
* This is a vanilla, non-pkey mprotect (or we failed to
* setup execute-only), inherit the pkey from the VMA we
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index 9f14bd34581d..74b516cb39df 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -142,7 +142,7 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
#endif
}
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
{
int error;
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 9634557a5444..0cb1dd461529 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -149,7 +149,7 @@ static int relocate_restore_code(void)
return 0;
}
-int swsusp_arch_resume(void)
+asmlinkage int swsusp_arch_resume(void)
{
int error;
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 2bea87cc0ff2..081437b5f381 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1977,10 +1977,8 @@ EXPORT_SYMBOL_GPL(xen_hvm_need_lapic);
static void xen_set_cpu_features(struct cpuinfo_x86 *c)
{
- if (xen_pv_domain()) {
- clear_cpu_bug(c, X86_BUG_SYSRET_SS_ATTRS);
+ if (xen_pv_domain())
set_cpu_cap(c, X86_FEATURE_XENPV);
- }
}
static void xen_pin_vcpu(int cpu)
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 418f1b8576cf..c92f75f7ae33 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1317,8 +1317,6 @@ void xen_flush_tlb_all(void)
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb_all(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));
@@ -1336,8 +1334,6 @@ static void xen_flush_tlb(void)
struct mmuext_op *op;
struct multicall_space mcs;
- trace_xen_mmu_flush_tlb(0);
-
preempt_disable();
mcs = xen_mc_entry(sizeof(*op));