diff options
Diffstat (limited to 'arch')
152 files changed, 3008 insertions, 740 deletions
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h index 68dfb3cb7145..02a7c2fa6106 100644 --- a/arch/alpha/include/asm/xchg.h +++ b/arch/alpha/include/asm/xchg.h @@ -12,6 +12,10 @@ * Atomic exchange. * Since it can be used to implement critical sections * it must clobber "memory" (also for interrupts in UP). + * + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * */ static inline unsigned long @@ -19,6 +23,7 @@ ____xchg(_u8, volatile char *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " insbl %1,%4,%1\n" @@ -43,6 +48,7 @@ ____xchg(_u16, volatile short *m, unsigned long val) { unsigned long ret, tmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %4,7,%3\n" " inswl %1,%4,%1\n" @@ -67,6 +73,7 @@ ____xchg(_u32, volatile int *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%4\n" " bis $31,%3,%1\n" @@ -87,6 +94,7 @@ ____xchg(_u64, volatile long *m, unsigned long val) { unsigned long dummy; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%4\n" " bis $31,%3,%1\n" @@ -128,10 +136,12 @@ ____xchg(, volatile void *ptr, unsigned long x, int size) * store NEW in MEM. Return the initial value in MEM. Success is * indicated by comparing RETURN with OLD. * - * The memory barrier should be placed in SMP only when we actually - * make the change. If we don't change anything (so if the returned - * prev is equal to old) then we aren't acquiring anything new and - * we don't need any memory barrier as far I can tell. + * The leading and the trailing memory barriers guarantee that these + * operations are fully ordered. + * + * The trailing memory barrier is placed in SMP unconditionally, in + * order to guarantee that dependency ordering is preserved when a + * dependency is headed by an unsuccessful operation. */ static inline unsigned long @@ -139,6 +149,7 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " insbl %1,%5,%1\n" @@ -150,8 +161,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -166,6 +177,7 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) { unsigned long prev, tmp, cmp, addr64; + smp_mb(); __asm__ __volatile__( " andnot %5,7,%4\n" " inswl %1,%5,%1\n" @@ -177,8 +189,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new) " or %1,%2,%2\n" " stq_c %2,0(%4)\n" " beq %2,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -193,6 +205,7 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldl_l %0,%5\n" " cmpeq %0,%3,%1\n" @@ -200,8 +213,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new) " mov %4,%1\n" " stl_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" @@ -216,6 +229,7 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) { unsigned long prev, cmp; + smp_mb(); __asm__ __volatile__( "1: ldq_l %0,%5\n" " cmpeq %0,%3,%1\n" @@ -223,8 +237,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new) " mov %4,%1\n" " stq_c %1,%2\n" " beq %1,3f\n" - __ASM__MB "2:\n" + __ASM__MB ".subsection 2\n" "3: br 1b\n" ".previous" diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c84e67fdea09..4383313b064a 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -487,7 +487,6 @@ config ARC_CURR_IN_REG config ARC_EMUL_UNALIGNED bool "Emulate unaligned memory access (userspace only)" - default N select SYSCTL_ARCH_UNALIGN_NO_WARN select SYSCTL_ARCH_UNALIGN_ALLOW depends on ISA_ARCOMPACT diff --git a/arch/arc/include/asm/bug.h b/arch/arc/include/asm/bug.h index ea022d47896c..21ec82466d62 100644 --- a/arch/arc/include/asm/bug.h +++ b/arch/arc/include/asm/bug.h @@ -23,7 +23,8 @@ void die(const char *str, struct pt_regs *regs, unsigned long address); #define BUG() do { \ pr_warn("BUG: failure at %s:%d/%s()!\n", __FILE__, __LINE__, __func__); \ - dump_stack(); \ + barrier_before_unreachable(); \ + __builtin_trap(); \ } while (0) #define HAVE_ARCH_BUG diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index f61a52b01625..5fe84e481654 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -22,10 +22,79 @@ static DEFINE_RAW_SPINLOCK(mcip_lock); static char smp_cpuinfo_buf[128]; +/* + * Set mask to halt GFRC if any online core in SMP cluster is halted. + * Only works for ARC HS v3.0+, on earlier versions has no effect. + */ +static void mcip_update_gfrc_halt_mask(int cpu) +{ + struct bcr_generic gfrc; + unsigned long flags; + u32 gfrc_halt_mask; + + READ_BCR(ARC_REG_GFRC_BUILD, gfrc); + + /* + * CMD_GFRC_SET_CORE and CMD_GFRC_READ_CORE commands were added in + * GFRC 0x3 version. + */ + if (gfrc.ver < 0x3) + return; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + __mcip_cmd(CMD_GFRC_READ_CORE, 0); + gfrc_halt_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + gfrc_halt_mask |= BIT(cpu); + __mcip_cmd_data(CMD_GFRC_SET_CORE, 0, gfrc_halt_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + +static void mcip_update_debug_halt_mask(int cpu) +{ + u32 mcip_mask = 0; + unsigned long flags; + + raw_spin_lock_irqsave(&mcip_lock, flags); + + /* + * mcip_mask is same for CMD_DEBUG_SET_SELECT and CMD_DEBUG_SET_MASK + * commands. So read it once instead of reading both CMD_DEBUG_READ_MASK + * and CMD_DEBUG_READ_SELECT. + */ + __mcip_cmd(CMD_DEBUG_READ_SELECT, 0); + mcip_mask = read_aux_reg(ARC_REG_MCIP_READBACK); + + mcip_mask |= BIT(cpu); + + __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, mcip_mask); + /* + * Parameter specified halt cause: + * STATUS32[H]/actionpoint/breakpoint/self-halt + * We choose all of them (0xF). + */ + __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xF, mcip_mask); + + raw_spin_unlock_irqrestore(&mcip_lock, flags); +} + static void mcip_setup_per_cpu(int cpu) { + struct mcip_bcr mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); + smp_ipi_irq_setup(cpu, IPI_IRQ); smp_ipi_irq_setup(cpu, SOFTIRQ_IRQ); + + /* Update GFRC halt mask as new CPU came online */ + if (mp.gfrc) + mcip_update_gfrc_halt_mask(cpu); + + /* Update MCIP debug mask as new CPU came online */ + if (mp.dbg) + mcip_update_debug_halt_mask(cpu); } static void mcip_ipi_send(int cpu) @@ -101,11 +170,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - - if (mp.dbg) { - __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); - __mcip_cmd_data(CMD_DEBUG_SET_MASK, 0xf, 0xf); - } } struct plat_smp_ops plat_smp_ops = { diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 6df9d94a9537..115eecc0d9a4 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -24,6 +24,7 @@ #include <linux/reboot.h> #include <linux/irqdomain.h> #include <linux/export.h> +#include <linux/of_fdt.h> #include <asm/processor.h> #include <asm/setup.h> @@ -47,6 +48,42 @@ void __init smp_prepare_boot_cpu(void) { } +static int __init arc_get_cpu_map(const char *name, struct cpumask *cpumask) +{ + unsigned long dt_root = of_get_flat_dt_root(); + const char *buf; + + buf = of_get_flat_dt_prop(dt_root, name, NULL); + if (!buf) + return -EINVAL; + + if (cpulist_parse(buf, cpumask)) + return -EINVAL; + + return 0; +} + +/* + * Read from DeviceTree and setup cpu possible mask. If there is no + * "possible-cpus" property in DeviceTree pretend all [0..NR_CPUS-1] exist. + */ +static void __init arc_init_cpu_possible(void) +{ + struct cpumask cpumask; + + if (arc_get_cpu_map("possible-cpus", &cpumask)) { + pr_warn("Failed to get possible-cpus from dtb, pretending all %u cpus exist\n", + NR_CPUS); + + cpumask_setall(&cpumask); + } + + if (!cpumask_test_cpu(0, &cpumask)) + panic("Master cpu (cpu[0]) is missed in cpu possible mask!"); + + init_cpu_possible(&cpumask); +} + /* * Called from setup_arch() before calling setup_processor() * @@ -58,10 +95,7 @@ void __init smp_prepare_boot_cpu(void) */ void __init smp_init_cpus(void) { - unsigned int i; - - for (i = 0; i < NR_CPUS; i++) - set_cpu_possible(i, true); + arc_init_cpu_possible(); if (plat_smp_ops.init_early_smp) plat_smp_ops.init_early_smp(); @@ -70,16 +104,12 @@ void __init smp_init_cpus(void) /* called from init ( ) => process 1 */ void __init smp_prepare_cpus(unsigned int max_cpus) { - int i; - /* * if platform didn't set the present map already, do it now * boot cpu is set to present already by init/main.c */ - if (num_present_cpus() <= 1) { - for (i = 0; i < max_cpus; i++) - set_cpu_present(i, true); - } + if (num_present_cpus() <= 1) + init_cpu_present(cpu_possible_mask); } void __init smp_cpus_done(unsigned int max_cpus) diff --git a/arch/arm/boot/dts/at91-tse850-3.dts b/arch/arm/boot/dts/at91-tse850-3.dts index 5f29010cdbd8..4ef80a703eda 100644 --- a/arch/arm/boot/dts/at91-tse850-3.dts +++ b/arch/arm/boot/dts/at91-tse850-3.dts @@ -245,7 +245,7 @@ }; eeprom@50 { - compatible = "nxp,24c02", "atmel,24c02"; + compatible = "nxp,se97b", "atmel,24c02"; reg = <0x50>; pagesize = <16>; }; diff --git a/arch/arm/boot/dts/bcm2836.dtsi b/arch/arm/boot/dts/bcm2836.dtsi index 61e158003509..168c002f0ca0 100644 --- a/arch/arm/boot/dts/bcm2836.dtsi +++ b/arch/arm/boot/dts/bcm2836.dtsi @@ -9,7 +9,7 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; diff --git a/arch/arm/boot/dts/bcm2837.dtsi b/arch/arm/boot/dts/bcm2837.dtsi index bc1cca5cf43c..d5d058a568c3 100644 --- a/arch/arm/boot/dts/bcm2837.dtsi +++ b/arch/arm/boot/dts/bcm2837.dtsi @@ -8,7 +8,7 @@ <0x40000000 0x40000000 0x00001000>; dma-ranges = <0xc0000000 0x00000000 0x3f000000>; - local_intc: local_intc { + local_intc: local_intc@40000000 { compatible = "brcm,bcm2836-l1-intc"; reg = <0x40000000 0x100>; interrupt-controller; diff --git a/arch/arm/boot/dts/bcm283x.dtsi b/arch/arm/boot/dts/bcm283x.dtsi index 013431e3d7c3..4745e3c7806b 100644 --- a/arch/arm/boot/dts/bcm283x.dtsi +++ b/arch/arm/boot/dts/bcm283x.dtsi @@ -251,7 +251,7 @@ jtag_gpio4: jtag_gpio4 { brcm,pins = <4 5 6 12 13>; - brcm,function = <BCM2835_FSEL_ALT4>; + brcm,function = <BCM2835_FSEL_ALT5>; }; jtag_gpio22: jtag_gpio22 { brcm,pins = <22 23 24 25 26 27>; @@ -396,8 +396,8 @@ i2s: i2s@7e203000 { compatible = "brcm,bcm2835-i2s"; - reg = <0x7e203000 0x20>, - <0x7e101098 0x02>; + reg = <0x7e203000 0x24>; + clocks = <&clocks BCM2835_CLOCK_PCM>; dmas = <&dma 2>, <&dma 3>; diff --git a/arch/arm/boot/dts/bcm958625hr.dts b/arch/arm/boot/dts/bcm958625hr.dts index 6a44b8021702..f0e2008f7490 100644 --- a/arch/arm/boot/dts/bcm958625hr.dts +++ b/arch/arm/boot/dts/bcm958625hr.dts @@ -49,7 +49,7 @@ memory { device_type = "memory"; - reg = <0x60000000 0x80000000>; + reg = <0x60000000 0x20000000>; }; gpio-restart { diff --git a/arch/arm/boot/dts/dra71-evm.dts b/arch/arm/boot/dts/dra71-evm.dts index 41c9132eb550..64363f75c01a 100644 --- a/arch/arm/boot/dts/dra71-evm.dts +++ b/arch/arm/boot/dts/dra71-evm.dts @@ -24,13 +24,13 @@ regulator-name = "vddshv8"; regulator-min-microvolt = <1800000>; - regulator-max-microvolt = <3000000>; + regulator-max-microvolt = <3300000>; regulator-boot-on; vin-supply = <&evm_5v0>; gpios = <&gpio7 11 GPIO_ACTIVE_HIGH>; states = <1800000 0x0 - 3000000 0x1>; + 3300000 0x1>; }; evm_1v8_sw: fixedregulator-evm_1v8 { diff --git a/arch/arm/boot/dts/imx6dl-icore-rqs.dts b/arch/arm/boot/dts/imx6dl-icore-rqs.dts index cf42c2f5cdc7..1281bc39b7ab 100644 --- a/arch/arm/boot/dts/imx6dl-icore-rqs.dts +++ b/arch/arm/boot/dts/imx6dl-icore-rqs.dts @@ -42,7 +42,7 @@ /dts-v1/; -#include "imx6q.dtsi" +#include "imx6dl.dtsi" #include "imx6qdl-icore-rqs.dtsi" / { diff --git a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts index ae45af1ad062..3cc1fb9ce441 100644 --- a/arch/arm/boot/dts/imx7d-cl-som-imx7.dts +++ b/arch/arm/boot/dts/imx7d-cl-som-imx7.dts @@ -213,37 +213,37 @@ &iomuxc { pinctrl_enet1: enet1grp { fsl,pins = < - MX7D_PAD_SD2_CD_B__ENET1_MDIO 0x3 - MX7D_PAD_SD2_WP__ENET1_MDC 0x3 - MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x1 - MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x1 - MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x1 - MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x1 - MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x1 - MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x1 - MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x1 - MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x1 - MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x1 - MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x1 - MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x1 - MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x1 + MX7D_PAD_SD2_CD_B__ENET1_MDIO 0x30 + MX7D_PAD_SD2_WP__ENET1_MDC 0x30 + MX7D_PAD_ENET1_RGMII_TXC__ENET1_RGMII_TXC 0x11 + MX7D_PAD_ENET1_RGMII_TD0__ENET1_RGMII_TD0 0x11 + MX7D_PAD_ENET1_RGMII_TD1__ENET1_RGMII_TD1 0x11 + MX7D_PAD_ENET1_RGMII_TD2__ENET1_RGMII_TD2 0x11 + MX7D_PAD_ENET1_RGMII_TD3__ENET1_RGMII_TD3 0x11 + MX7D_PAD_ENET1_RGMII_TX_CTL__ENET1_RGMII_TX_CTL 0x11 + MX7D_PAD_ENET1_RGMII_RXC__ENET1_RGMII_RXC 0x11 + MX7D_PAD_ENET1_RGMII_RD0__ENET1_RGMII_RD0 0x11 + MX7D_PAD_ENET1_RGMII_RD1__ENET1_RGMII_RD1 0x11 + MX7D_PAD_ENET1_RGMII_RD2__ENET1_RGMII_RD2 0x11 + MX7D_PAD_ENET1_RGMII_RD3__ENET1_RGMII_RD3 0x11 + MX7D_PAD_ENET1_RGMII_RX_CTL__ENET1_RGMII_RX_CTL 0x11 >; }; pinctrl_enet2: enet2grp { fsl,pins = < - MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x1 - MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x1 - MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x1 - MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x1 - MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x1 - MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x1 - MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x1 - MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x1 - MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x1 - MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x1 - MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x1 - MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x1 + MX7D_PAD_EPDC_GDSP__ENET2_RGMII_TXC 0x11 + MX7D_PAD_EPDC_SDCE2__ENET2_RGMII_TD0 0x11 + MX7D_PAD_EPDC_SDCE3__ENET2_RGMII_TD1 0x11 + MX7D_PAD_EPDC_GDCLK__ENET2_RGMII_TD2 0x11 + MX7D_PAD_EPDC_GDOE__ENET2_RGMII_TD3 0x11 + MX7D_PAD_EPDC_GDRL__ENET2_RGMII_TX_CTL 0x11 + MX7D_PAD_EPDC_SDCE1__ENET2_RGMII_RXC 0x11 + MX7D_PAD_EPDC_SDCLK__ENET2_RGMII_RD0 0x11 + MX7D_PAD_EPDC_SDLE__ENET2_RGMII_RD1 0x11 + MX7D_PAD_EPDC_SDOE__ENET2_RGMII_RD2 0x11 + MX7D_PAD_EPDC_SDSHR__ENET2_RGMII_RD3 0x11 + MX7D_PAD_EPDC_SDCE0__ENET2_RGMII_RX_CTL 0x11 >; }; diff --git a/arch/arm/boot/dts/imx7d-sdb.dts b/arch/arm/boot/dts/imx7d-sdb.dts index 44637cabcc56..255e64ba32e2 100644 --- a/arch/arm/boot/dts/imx7d-sdb.dts +++ b/arch/arm/boot/dts/imx7d-sdb.dts @@ -82,7 +82,7 @@ enable-active-high; }; - reg_usb_otg2_vbus: regulator-usb-otg1-vbus { + reg_usb_otg2_vbus: regulator-usb-otg2-vbus { compatible = "regulator-fixed"; regulator-name = "usb_otg2_vbus"; regulator-min-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts index 95da5cb9d37a..b6ebe79261c6 100644 --- a/arch/arm/boot/dts/r8a7791-porter.dts +++ b/arch/arm/boot/dts/r8a7791-porter.dts @@ -427,7 +427,7 @@ "dclkin.0", "dclkin.1"; ports { - port@1 { + port@0 { endpoint { remote-endpoint = <&adv7511_in>; }; diff --git a/arch/arm/boot/dts/rk3036.dtsi b/arch/arm/boot/dts/rk3036.dtsi index 4916c65e0ace..5c0a76493d22 100644 --- a/arch/arm/boot/dts/rk3036.dtsi +++ b/arch/arm/boot/dts/rk3036.dtsi @@ -261,7 +261,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 15 IRQ_TYPE_LEVEL_HIGH>; resets = <&cru SRST_SDIO>; @@ -279,7 +279,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; default-sample-phase = <158>; disable-wp; dmas = <&pdma 12>; diff --git a/arch/arm/boot/dts/rk322x.dtsi b/arch/arm/boot/dts/rk322x.dtsi index 06814421eed2..f59f7cc62be6 100644 --- a/arch/arm/boot/dts/rk322x.dtsi +++ b/arch/arm/boot/dts/rk322x.dtsi @@ -600,7 +600,7 @@ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_bus4>; @@ -613,7 +613,7 @@ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; pinctrl-names = "default"; pinctrl-0 = <&sdio_clk &sdio_cmd &sdio_bus4>; @@ -628,7 +628,7 @@ max-frequency = <37500000>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; bus-width = <8>; default-sample-phase = <158>; fifo-depth = <0x100>; diff --git a/arch/arm/boot/dts/rk3288.dtsi b/arch/arm/boot/dts/rk3288.dtsi index 356ed1e62452..f7a951afd281 100644 --- a/arch/arm/boot/dts/rk3288.dtsi +++ b/arch/arm/boot/dts/rk3288.dtsi @@ -927,6 +927,7 @@ i2s: i2s@ff890000 { compatible = "rockchip,rk3288-i2s", "rockchip,rk3066-i2s"; reg = <0x0 0xff890000 0x0 0x10000>; + #sound-dai-cells = <0>; interrupts = <GIC_SPI 53 IRQ_TYPE_LEVEL_HIGH>; #address-cells = <1>; #size-cells = <0>; @@ -1122,6 +1123,7 @@ compatible = "rockchip,rk3288-dw-hdmi"; reg = <0x0 0xff980000 0x0 0x20000>; reg-io-width = <4>; + #sound-dai-cells = <0>; rockchip,grf = <&grf>; interrupts = <GIC_SPI 103 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru PCLK_HDMI_CTRL>, <&cru SCLK_HDMI_HDCP>; diff --git a/arch/arm/boot/dts/socfpga.dtsi b/arch/arm/boot/dts/socfpga.dtsi index 7e24dc8e82d4..8d9f42a422cb 100644 --- a/arch/arm/boot/dts/socfpga.dtsi +++ b/arch/arm/boot/dts/socfpga.dtsi @@ -827,7 +827,7 @@ timer@fffec600 { compatible = "arm,cortex-a9-twd-timer"; reg = <0xfffec600 0x100>; - interrupts = <1 13 0xf04>; + interrupts = <1 13 0xf01>; clocks = <&mpu_periph_clk>; }; diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index bc8d4bbd82e2..9342904cccca 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm +#ifdef CONFIG_KPROBES +#define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist", "aw" ; \ + .balign 4 ; \ + .long entry; \ + .popsection +#else +#define _ASM_NOKPROBE(entry) +#endif + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index eb46fc81a440..08cd720eae01 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -221,6 +221,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { return kvm_ksym_ref(__kvm_hyp_vector); diff --git a/arch/arm/include/asm/vdso.h b/arch/arm/include/asm/vdso.h index 9c99e817535e..5b85889f82ee 100644 --- a/arch/arm/include/asm/vdso.h +++ b/arch/arm/include/asm/vdso.h @@ -12,8 +12,6 @@ struct mm_struct; void arm_install_vdso(struct mm_struct *mm, unsigned long addr); -extern char vdso_start, vdso_end; - extern unsigned int vdso_total_pages; #else /* CONFIG_VDSO */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index b8dc3b516f93..f702f2b37052 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -19,6 +19,7 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <linux/module.h> #include <linux/kexec.h> #include <linux/bug.h> @@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook) raw_spin_unlock_irqrestore(&undef_lock, flags); } -static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +static nokprobe_inline +int call_undef_hook(struct pt_regs *regs, unsigned int instr) { struct undef_hook *hook; unsigned long flags; @@ -490,6 +492,7 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); } +NOKPROBE_SYMBOL(do_undefinstr) /* * Handle FIQ similarly to NMI on x86 systems. diff --git a/arch/arm/kernel/vdso.c b/arch/arm/kernel/vdso.c index a4d6dc0f2427..f4dd7f9663c1 100644 --- a/arch/arm/kernel/vdso.c +++ b/arch/arm/kernel/vdso.c @@ -39,6 +39,8 @@ static struct page **vdso_text_pagelist; +extern char vdso_start[], vdso_end[]; + /* Total number of pages needed for the data and text portions of the VDSO. */ unsigned int vdso_total_pages __ro_after_init; @@ -197,13 +199,13 @@ static int __init vdso_init(void) unsigned int text_pages; int i; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("VDSO is not a valid ELF object!\n"); return -ENOEXEC; } - text_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_debug("vdso: %i text pages at base %p\n", text_pages, &vdso_start); + text_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; + pr_debug("vdso: %i text pages at base %p\n", text_pages, vdso_start); /* Allocate the VDSO text pagelist */ vdso_text_pagelist = kcalloc(text_pages, sizeof(struct page *), @@ -218,7 +220,7 @@ static int __init vdso_init(void) for (i = 0; i < text_pages; i++) { struct page *page; - page = virt_to_page(&vdso_start + i * PAGE_SIZE); + page = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_text_pagelist[i] = page; } @@ -229,7 +231,7 @@ static int __init vdso_init(void) cntvct_ok = cntvct_functional(); - patch_vdso(&vdso_start); + patch_vdso(vdso_start); return 0; } diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index df73914e81c8..746e7801dcdf 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -38,6 +38,7 @@ ENTRY(__get_user_1) mov r0, #0 ret lr ENDPROC(__get_user_1) +_ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad @@ -58,6 +59,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_2) +_ASM_NOKPROBE(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad @@ -65,6 +67,7 @@ ENTRY(__get_user_4) mov r0, #0 ret lr ENDPROC(__get_user_4) +_ASM_NOKPROBE(__get_user_4) ENTRY(__get_user_8) check_uaccess r0, 8, r1, r2, __get_user_bad8 @@ -78,6 +81,7 @@ ENTRY(__get_user_8) mov r0, #0 ret lr ENDPROC(__get_user_8) +_ASM_NOKPROBE(__get_user_8) #ifdef __ARMEB__ ENTRY(__get_user_32t_8) @@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8) mov r0, #0 ret lr ENDPROC(__get_user_32t_8) +_ASM_NOKPROBE(__get_user_32t_8) ENTRY(__get_user_64t_1) check_uaccess r0, 1, r1, r2, __get_user_bad8 @@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1) mov r0, #0 ret lr ENDPROC(__get_user_64t_1) +_ASM_NOKPROBE(__get_user_64t_1) ENTRY(__get_user_64t_2) check_uaccess r0, 2, r1, r2, __get_user_bad8 @@ -114,6 +120,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_64t_2) +_ASM_NOKPROBE(__get_user_64t_2) ENTRY(__get_user_64t_4) check_uaccess r0, 4, r1, r2, __get_user_bad8 @@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4) mov r0, #0 ret lr ENDPROC(__get_user_64t_4) +_ASM_NOKPROBE(__get_user_64t_4) #endif __get_user_bad8: @@ -131,6 +139,8 @@ __get_user_bad: ret lr ENDPROC(__get_user_bad) ENDPROC(__get_user_bad8) +_ASM_NOKPROBE(__get_user_bad) +_ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index a3e78074be70..62eb7d668890 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -127,8 +127,8 @@ static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/ - GPIO_LOOKUP("davinci_gpio.1", 28, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.1", 29, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-omap1/clock.c b/arch/arm/mach-omap1/clock.c index 43e3e188f521..fa512413a471 100644 --- a/arch/arm/mach-omap1/clock.c +++ b/arch/arm/mach-omap1/clock.c @@ -1011,17 +1011,17 @@ static int clk_debugfs_register_one(struct clk *c) return -ENOMEM; c->dent = d; - d = debugfs_create_u8("usecount", S_IRUGO, c->dent, (u8 *)&c->usecount); + d = debugfs_create_u8("usecount", S_IRUGO, c->dent, &c->usecount); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_u32("rate", S_IRUGO, c->dent, (u32 *)&c->rate); + d = debugfs_create_ulong("rate", S_IRUGO, c->dent, &c->rate); if (!d) { err = -ENOMEM; goto err_out; } - d = debugfs_create_x32("flags", S_IRUGO, c->dent, (u32 *)&c->flags); + d = debugfs_create_x8("flags", S_IRUGO, c->dent, &c->flags); if (!d) { err = -ENOMEM; goto err_out; diff --git a/arch/arm/mach-omap2/omap-wakeupgen.c b/arch/arm/mach-omap2/omap-wakeupgen.c index 4bb6751864a5..fc5fb776a710 100644 --- a/arch/arm/mach-omap2/omap-wakeupgen.c +++ b/arch/arm/mach-omap2/omap-wakeupgen.c @@ -299,8 +299,6 @@ static void irq_save_context(void) if (soc_is_dra7xx()) return; - if (!sar_base) - sar_base = omap4_get_sar_ram_base(); if (wakeupgen_ops && wakeupgen_ops->save_context) wakeupgen_ops->save_context(); } @@ -598,6 +596,8 @@ static int __init wakeupgen_init(struct device_node *node, irq_hotplug_init(); irq_pm_init(); + sar_base = omap4_get_sar_ram_base(); + return 0; } IRQCHIP_DECLARE(ti_wakeupgen, "ti,omap4-wugen-mpu", wakeupgen_init); diff --git a/arch/arm/mach-omap2/pm.c b/arch/arm/mach-omap2/pm.c index 366158a54fcd..6f68576e5695 100644 --- a/arch/arm/mach-omap2/pm.c +++ b/arch/arm/mach-omap2/pm.c @@ -186,7 +186,7 @@ static void omap_pm_end(void) cpu_idle_poll_ctrl(false); } -static void omap_pm_finish(void) +static void omap_pm_wake(void) { if (soc_is_omap34xx()) omap_prcm_irq_complete(); @@ -196,7 +196,7 @@ static const struct platform_suspend_ops omap_pm_ops = { .begin = omap_pm_begin, .end = omap_pm_end, .enter = omap_pm_enter, - .finish = omap_pm_finish, + .wake = omap_pm_wake, .valid = suspend_valid_only_mem, }; diff --git a/arch/arm/mach-omap2/timer.c b/arch/arm/mach-omap2/timer.c index ece09c9461f7..d61fbd7a2840 100644 --- a/arch/arm/mach-omap2/timer.c +++ b/arch/arm/mach-omap2/timer.c @@ -156,12 +156,6 @@ static struct clock_event_device clockevent_gpt = { .tick_resume = omap2_gp_timer_shutdown, }; -static struct property device_disabled = { - .name = "status", - .length = sizeof("disabled"), - .value = "disabled", -}; - static const struct of_device_id omap_timer_match[] __initconst = { { .compatible = "ti,omap2420-timer", }, { .compatible = "ti,omap3430-timer", }, @@ -203,8 +197,17 @@ static struct device_node * __init omap_get_timer_dt(const struct of_device_id * of_get_property(np, "ti,timer-secure", NULL))) continue; - if (!of_device_is_compatible(np, "ti,omap-counter32k")) - of_add_property(np, &device_disabled); + if (!of_device_is_compatible(np, "ti,omap-counter32k")) { + struct property *prop; + + prop = kzalloc(sizeof(*prop), GFP_KERNEL); + if (!prop) + return NULL; + prop->name = "status"; + prop->value = "disabled"; + prop->length = strlen(prop->value); + of_add_property(np, prop); + } return np; } diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig index 2a7bb6ccdcb7..a810f4dd34b1 100644 --- a/arch/arm/mach-orion5x/Kconfig +++ b/arch/arm/mach-orion5x/Kconfig @@ -58,7 +58,6 @@ config MACH_KUROBOX_PRO config MACH_DNS323 bool "D-Link DNS-323" - select GENERIC_NET_UTILS select I2C_BOARDINFO if I2C help Say 'Y' here if you want your kernel to support the @@ -66,7 +65,6 @@ config MACH_DNS323 config MACH_TS209 bool "QNAP TS-109/TS-209" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-109/TS-209 platform. @@ -101,7 +99,6 @@ config MACH_LINKSTATION_LS_HGL config MACH_TS409 bool "QNAP TS-409" - select GENERIC_NET_UTILS help Say 'Y' here if you want your kernel to support the QNAP TS-409 platform. diff --git a/arch/arm/mach-orion5x/dns323-setup.c b/arch/arm/mach-orion5x/dns323-setup.c index cd483bfb5ca8..d13344b2ddcd 100644 --- a/arch/arm/mach-orion5x/dns323-setup.c +++ b/arch/arm/mach-orion5x/dns323-setup.c @@ -173,10 +173,42 @@ static struct mv643xx_eth_platform_data dns323_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +/* dns323_parse_hex_*() taken from tsx09-common.c; should a common copy of these + * functions be kept somewhere? + */ +static int __init dns323_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init dns323_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = dns323_parse_hex_nibble(b[0]); + lo = dns323_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init dns323_read_mac_addr(void) { u_int8_t addr[6]; - void __iomem *mac_page; + int i; + char *mac_page; /* MAC address is stored as a regular ol' string in /dev/mtdblock4 * (0x007d0000-0x00800000) starting at offset 196480 (0x2ff80). @@ -185,8 +217,23 @@ static int __init dns323_read_mac_addr(void) if (!mac_page) return -ENOMEM; - if (!mac_pton((__force const char *) mac_page, addr)) - goto error_fail; + /* Sanity check the string we're looking at */ + for (i = 0; i < 5; i++) { + if (*(mac_page + (i * 3) + 2) != ':') { + goto error_fail; + } + } + + for (i = 0; i < 6; i++) { + int byte; + + byte = dns323_parse_hex_byte(mac_page + (i * 3)); + if (byte < 0) { + goto error_fail; + } + + addr[i] = byte; + } iounmap(mac_page); printk("DNS-323: Found ethernet MAC address: %pM\n", addr); diff --git a/arch/arm/mach-orion5x/tsx09-common.c b/arch/arm/mach-orion5x/tsx09-common.c index 89774985d380..905d4f2dd0b8 100644 --- a/arch/arm/mach-orion5x/tsx09-common.c +++ b/arch/arm/mach-orion5x/tsx09-common.c @@ -53,12 +53,53 @@ struct mv643xx_eth_platform_data qnap_tsx09_eth_data = { .phy_addr = MV643XX_ETH_PHY_ADDR(8), }; +static int __init qnap_tsx09_parse_hex_nibble(char n) +{ + if (n >= '0' && n <= '9') + return n - '0'; + + if (n >= 'A' && n <= 'F') + return n - 'A' + 10; + + if (n >= 'a' && n <= 'f') + return n - 'a' + 10; + + return -1; +} + +static int __init qnap_tsx09_parse_hex_byte(const char *b) +{ + int hi; + int lo; + + hi = qnap_tsx09_parse_hex_nibble(b[0]); + lo = qnap_tsx09_parse_hex_nibble(b[1]); + + if (hi < 0 || lo < 0) + return -1; + + return (hi << 4) | lo; +} + static int __init qnap_tsx09_check_mac_addr(const char *addr_str) { u_int8_t addr[6]; + int i; - if (!mac_pton(addr_str, addr)) - return -1; + for (i = 0; i < 6; i++) { + int byte; + + /* + * Enforce "xx:xx:xx:xx:xx:xx\n" format. + */ + if (addr_str[(i * 3) + 2] != ((i < 5) ? ':' : '\n')) + return -1; + + byte = qnap_tsx09_parse_hex_byte(addr_str + (i * 3)); + if (byte < 0) + return -1; + addr[i] = byte; + } printk(KERN_INFO "tsx09: found ethernet mac address %pM\n", addr); @@ -77,12 +118,12 @@ void __init qnap_tsx09_find_mac_addr(u32 mem_base, u32 size) unsigned long addr; for (addr = mem_base; addr < (mem_base + size); addr += 1024) { - void __iomem *nor_page; + char *nor_page; int ret = 0; nor_page = ioremap(addr, 1024); if (nor_page != NULL) { - ret = qnap_tsx09_check_mac_addr((__force const char *)nor_page); + ret = qnap_tsx09_check_mac_addr(nor_page); iounmap(nor_page); } diff --git a/arch/arm/plat-omap/dmtimer.c b/arch/arm/plat-omap/dmtimer.c index 7a327bd32521..ebef8aacea83 100644 --- a/arch/arm/plat-omap/dmtimer.c +++ b/arch/arm/plat-omap/dmtimer.c @@ -890,11 +890,8 @@ static int omap_dm_timer_probe(struct platform_device *pdev) timer->irq = irq->start; timer->pdev = pdev; - /* Skip pm_runtime_enable for OMAP1 */ - if (!(timer->capability & OMAP_TIMER_NEEDS_RESET)) { - pm_runtime_enable(dev); - pm_runtime_irq_safe(dev); - } + pm_runtime_enable(dev); + pm_runtime_irq_safe(dev); if (!timer->reserved) { ret = pm_runtime_get_sync(dev); diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index bcdecc25461b..b2aa9b32bff2 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { unsigned long flags; struct kprobe *p = &op->kp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; /* Save skipped registers */ regs->ARM_pc = (unsigned long)op->kp.addr; regs->ARM_ORIG_r0 = ~0UL; local_irq_save(flags); + kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) { diff --git a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi index 4220fbdcb24a..ff5c4c47b22b 100644 --- a/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi +++ b/arch/arm64/boot/dts/cavium/thunder2-99xx.dtsi @@ -98,7 +98,7 @@ clock-output-names = "clk125mhz"; }; - pci { + pcie@30000000 { compatible = "pci-host-ecam-generic"; device_type = "pci"; #interrupt-cells = <1>; @@ -118,6 +118,7 @@ ranges = <0x02000000 0 0x40000000 0 0x40000000 0 0x20000000 0x43000000 0x40 0x00000000 0x40 0x00000000 0x20 0x00000000>; + bus-range = <0 0xff>; interrupt-map-mask = <0 0 0 7>; interrupt-map = /* addr pin ic icaddr icintr */ diff --git a/arch/arm64/boot/dts/qcom/msm8996.dtsi b/arch/arm64/boot/dts/qcom/msm8996.dtsi index 887b61c872dd..ab00be277c6f 100644 --- a/arch/arm64/boot/dts/qcom/msm8996.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8996.dtsi @@ -484,8 +484,8 @@ blsp2_spi5: spi@075ba000{ compatible = "qcom,spi-qup-v2.2.1"; reg = <0x075ba000 0x600>; - interrupts = <GIC_SPI 107 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&gcc GCC_BLSP2_QUP5_SPI_APPS_CLK>, + interrupts = <GIC_SPI 106 IRQ_TYPE_LEVEL_HIGH>; + clocks = <&gcc GCC_BLSP2_QUP6_SPI_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>; clock-names = "core", "iface"; pinctrl-names = "default", "sleep"; diff --git a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts index d4f80786e7c2..28257724a56e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3328-rock64.dts @@ -136,11 +136,12 @@ phy-mode = "rgmii"; pinctrl-names = "default"; pinctrl-0 = <&rgmiim1_pins>; + snps,force_thresh_dma_mode; snps,reset-gpio = <&gpio1 RK_PC2 GPIO_ACTIVE_LOW>; snps,reset-active-low; snps,reset-delays-us = <0 10000 50000>; - tx_delay = <0x26>; - rx_delay = <0x11>; + tx_delay = <0x24>; + rx_delay = <0x18>; status = "okay"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3328.dtsi b/arch/arm64/boot/dts/rockchip/rk3328.dtsi index 41d61840fb99..d70e409e2b0c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3328.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3328.dtsi @@ -683,7 +683,7 @@ interrupts = <GIC_SPI 12 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDMMC>, <&cru SCLK_SDMMC>, <&cru SCLK_SDMMC_DRV>, <&cru SCLK_SDMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -694,7 +694,7 @@ interrupts = <GIC_SPI 13 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_SDIO>, <&cru SCLK_SDIO>, <&cru SCLK_SDIO_DRV>, <&cru SCLK_SDIO_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; @@ -705,7 +705,7 @@ interrupts = <GIC_SPI 14 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cru HCLK_EMMC>, <&cru SCLK_EMMC>, <&cru SCLK_EMMC_DRV>, <&cru SCLK_EMMC_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368.dtsi b/arch/arm64/boot/dts/rockchip/rk3368.dtsi index 1070c8264c13..2313aea0e69e 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3368.dtsi @@ -257,7 +257,7 @@ max-frequency = <150000000>; clocks = <&cru HCLK_SDIO0>, <&cru SCLK_SDIO0>, <&cru SCLK_SDIO0_DRV>, <&cru SCLK_SDIO0_SAMPLE>; - clock-names = "biu", "ciu", "ciu_drv", "ciu_sample"; + clock-names = "biu", "ciu", "ciu-drive", "ciu-sample"; fifo-depth = <0x100>; interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>; resets = <&cru SRST_SDIO0>; diff --git a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi index 199a5118b20d..264a6bb60c53 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-gru.dtsi @@ -406,8 +406,9 @@ wlan_pd_n: wlan-pd-n { compatible = "regulator-fixed"; regulator-name = "wlan_pd_n"; + pinctrl-names = "default"; + pinctrl-0 = <&wlan_module_reset_l>; - /* Note the wlan_module_reset_l pinctrl */ enable-active-high; gpio = <&gpio1 11 GPIO_ACTIVE_HIGH>; @@ -940,12 +941,6 @@ ap_i2c_audio: &i2c8 { pinctrl-0 = < &ap_pwroff /* AP will auto-assert this when in S3 */ &clk_32k /* This pin is always 32k on gru boards */ - - /* - * We want this driven low ASAP; firmware should help us, but - * we can help ourselves too. - */ - &wlan_module_reset_l >; pcfg_output_low: pcfg-output-low { @@ -1125,12 +1120,7 @@ ap_i2c_audio: &i2c8 { }; wlan_module_reset_l: wlan-module-reset-l { - /* - * We want this driven low ASAP (As {Soon,Strongly} As - * Possible), to avoid leakage through the powered-down - * WiFi. - */ - rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_output_low>; + rockchip,pins = <1 11 RK_FUNC_GPIO &pcfg_pull_none>; }; bt_host_wake_l: bt-host-wake-l { diff --git a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi index 0f873c897d0d..ce592a4c0c4c 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-sapphire.dtsi @@ -457,7 +457,7 @@ assigned-clocks = <&cru SCLK_PCIEPHY_REF>; assigned-clock-parents = <&cru SCLK_PCIEPHY_REF100M>; assigned-clock-rates = <100000000>; - ep-gpios = <&gpio3 RK_PB5 GPIO_ACTIVE_HIGH>; + ep-gpios = <&gpio2 RK_PA4 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 2d6d4bd9de52..fe55b516f018 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_HARDEN_BRANCH_PREDICTOR #include <asm/mmu.h> diff --git a/arch/arm64/include/asm/stacktrace.h b/arch/arm64/include/asm/stacktrace.h index 6ad30776e984..99390755c0c4 100644 --- a/arch/arm64/include/asm/stacktrace.h +++ b/arch/arm64/include/asm/stacktrace.h @@ -27,7 +27,7 @@ struct stackframe { unsigned long fp; unsigned long pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - unsigned int graph; + int graph; #endif }; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 52f15cd896e1..b5a28336c077 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -178,7 +178,7 @@ static int enable_smccc_arch_workaround_1(void *data) case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if (res.a0) + if ((int)res.a0 < 0) return 0; cb = call_hvc_arch_workaround_1; smccc_start = __smccc_workaround_1_hvc_start; @@ -188,7 +188,7 @@ static int enable_smccc_arch_workaround_1(void *data) case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); - if (res.a0) + if ((int)res.a0 < 0) return 0; cb = call_smc_arch_workaround_1; smccc_start = __smccc_workaround_1_smc_start; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 9eaef51f83ff..1984e739f155 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -914,9 +914,9 @@ static void __armv8pmu_probe_pmu(void *info) int pmuver; dfr0 = read_sysreg(id_aa64dfr0_el1); - pmuver = cpuid_feature_extract_signed_field(dfr0, + pmuver = cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_PMUVER_SHIFT); - if (pmuver < 1) + if (pmuver == 0xf || pmuver == 0) return; probe->present = true; diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 76809ccd309c..d5718a060672 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -59,6 +59,11 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame) #ifdef CONFIG_FUNCTION_GRAPH_TRACER if (tsk->ret_stack && (frame->pc == (unsigned long)return_to_handler)) { + if (WARN_ON_ONCE(frame->graph == -1)) + return -EINVAL; + if (frame->graph < -1) + frame->graph += FTRACE_NOTRACE_DEPTH; + /* * This is a case where function graph tracer has * modified a return address (LR) in a stack frame diff --git a/arch/arm64/kernel/time.c b/arch/arm64/kernel/time.c index a4391280fba9..f258636273c9 100644 --- a/arch/arm64/kernel/time.c +++ b/arch/arm64/kernel/time.c @@ -52,7 +52,7 @@ unsigned long profile_pc(struct pt_regs *regs) frame.fp = regs->regs[29]; frame.pc = regs->pc; #ifdef CONFIG_FUNCTION_GRAPH_TRACER - frame.graph = -1; /* no task info */ + frame.graph = current->curr_ret_stack; #endif do { int ret = unwind_frame(NULL, &frame); diff --git a/arch/cris/include/arch-v10/arch/bug.h b/arch/cris/include/arch-v10/arch/bug.h index 905afeacfedf..06da9d49152a 100644 --- a/arch/cris/include/arch-v10/arch/bug.h +++ b/arch/cris/include/arch-v10/arch/bug.h @@ -44,18 +44,25 @@ struct bug_frame { * not be used like this with newer versions of gcc. */ #define BUG() \ +do { \ __asm__ __volatile__ ("clear.d [" __stringify(BUG_MAGIC) "]\n\t"\ "movu.w " __stringify(__LINE__) ",$r0\n\t"\ "jump 0f\n\t" \ ".section .rodata\n" \ "0:\t.string \"" __FILE__ "\"\n\t" \ - ".previous") + ".previous"); \ + unreachable(); \ +} while (0) #endif #else /* This just causes an oops. */ -#define BUG() (*(int *)0 = 0) +#define BUG() \ +do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif diff --git a/arch/ia64/include/asm/bug.h b/arch/ia64/include/asm/bug.h index bd3eeb8d1cfa..66b37a532765 100644 --- a/arch/ia64/include/asm/bug.h +++ b/arch/ia64/include/asm/bug.h @@ -4,7 +4,11 @@ #ifdef CONFIG_BUG #define ia64_abort() __builtin_trap() -#define BUG() do { printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); ia64_abort(); } while (0) +#define BUG() do { \ + printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ + ia64_abort(); \ +} while (0) /* should this BUG be made generic? */ #define HAVE_ARCH_BUG diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 85bba43e7d5d..658a8e06a69b 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, u64 virt_addr=simple_strtoull(buf, NULL, 16); int ret; - ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); + ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/m68k/coldfire/device.c b/arch/m68k/coldfire/device.c index 84938fdbbada..908d58347790 100644 --- a/arch/m68k/coldfire/device.c +++ b/arch/m68k/coldfire/device.c @@ -135,7 +135,11 @@ static struct platform_device mcf_fec0 = { .id = 0, .num_resources = ARRAY_SIZE(mcf_fec0_resources), .resource = mcf_fec0_resources, - .dev.platform_data = FEC_PDATA, + .dev = { + .dma_mask = &mcf_fec0.dev.coherent_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = FEC_PDATA, + } }; #ifdef MCFFEC_BASE1 @@ -167,7 +171,11 @@ static struct platform_device mcf_fec1 = { .id = 1, .num_resources = ARRAY_SIZE(mcf_fec1_resources), .resource = mcf_fec1_resources, - .dev.platform_data = FEC_PDATA, + .dev = { + .dma_mask = &mcf_fec1.dev.coherent_dma_mask, + .coherent_dma_mask = DMA_BIT_MASK(32), + .platform_data = FEC_PDATA, + } }; #endif /* MCFFEC_BASE1 */ #endif /* CONFIG_FEC */ diff --git a/arch/m68k/include/asm/bug.h b/arch/m68k/include/asm/bug.h index b7e2bf1ba4a6..275dca1435bf 100644 --- a/arch/m68k/include/asm/bug.h +++ b/arch/m68k/include/asm/bug.h @@ -8,16 +8,19 @@ #ifndef CONFIG_SUN3 #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else #define BUG() do { \ pr_crit("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + barrier_before_unreachable(); \ panic("BUG!"); \ } while (0) #endif #else #define BUG() do { \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #endif diff --git a/arch/mips/cavium-octeon/octeon-irq.c b/arch/mips/cavium-octeon/octeon-irq.c index d99f5242169e..b3aec101a65d 100644 --- a/arch/mips/cavium-octeon/octeon-irq.c +++ b/arch/mips/cavium-octeon/octeon-irq.c @@ -2271,7 +2271,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, parent_irq = irq_of_parse_and_map(ciu_node, 0); if (!parent_irq) { - pr_err("ERROR: Couldn't acquire parent_irq for %s\n.", + pr_err("ERROR: Couldn't acquire parent_irq for %s\n", ciu_node->name); return -EINVAL; } @@ -2283,7 +2283,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 0, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(0) %s\n.", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name); return -EINVAL; } host_data->raw_reg = (u64)phys_to_virt( @@ -2291,7 +2291,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, addr = of_get_address(ciu_node, 1, NULL, NULL); if (!addr) { - pr_err("ERROR: Couldn't acquire reg(1) %s\n.", ciu_node->name); + pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name); return -EINVAL; } host_data->en_reg = (u64)phys_to_virt( @@ -2299,7 +2299,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, r = of_property_read_u32(ciu_node, "cavium,max-bits", &val); if (r) { - pr_err("ERROR: Couldn't read cavium,max-bits from %s\n.", + pr_err("ERROR: Couldn't read cavium,max-bits from %s\n", ciu_node->name); return r; } @@ -2309,7 +2309,7 @@ static int __init octeon_irq_init_cib(struct device_node *ciu_node, &octeon_irq_domain_cib_ops, host_data); if (!cib_domain) { - pr_err("ERROR: Couldn't irq_domain_add_linear()\n."); + pr_err("ERROR: Couldn't irq_domain_add_linear()\n"); return -ENOMEM; } diff --git a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h index aa3800c82332..d99ca862dae3 100644 --- a/arch/mips/include/asm/mach-ath79/ar71xx_regs.h +++ b/arch/mips/include/asm/mach-ath79/ar71xx_regs.h @@ -167,7 +167,7 @@ #define AR71XX_AHB_DIV_MASK 0x7 #define AR724X_PLL_REG_CPU_CONFIG 0x00 -#define AR724X_PLL_REG_PCIE_CONFIG 0x18 +#define AR724X_PLL_REG_PCIE_CONFIG 0x10 #define AR724X_PLL_FB_SHIFT 0 #define AR724X_PLL_FB_MASK 0x3ff diff --git a/arch/mips/include/asm/machine.h b/arch/mips/include/asm/machine.h index e0d9b373d415..f83879dadd1e 100644 --- a/arch/mips/include/asm/machine.h +++ b/arch/mips/include/asm/machine.h @@ -52,7 +52,7 @@ mips_machine_is_compatible(const struct mips_machine *mach, const void *fdt) if (!mach->matches) return NULL; - for (match = mach->matches; match->compatible; match++) { + for (match = mach->matches; match->compatible[0]; match++) { if (fdt_node_check_compatible(fdt, 0, match->compatible) == 0) return match; } diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index c552c20237d4..006105fb12fe 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -454,7 +454,7 @@ static int fpr_get_msa(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR and FIR registers separately. */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -462,6 +462,7 @@ static int fpr_get(struct task_struct *target, void *kbuf, void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); int err; if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) @@ -474,6 +475,12 @@ static int fpr_get(struct task_struct *target, err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.fcr31, fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &boot_cpu_data.fpu_id, + fir_pos, fir_pos + sizeof(u32)); return err; } @@ -522,7 +529,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR register separately. Ignore the incoming FIR register + * contents though, as the register is read-only. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -536,6 +544,7 @@ static int fpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); u32 fcr31; int err; @@ -563,6 +572,11 @@ static int fpr_set(struct task_struct *target, ptrace_setfcr31(target, fcr31); } + if (count > 0) + err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + fir_pos, + fir_pos + sizeof(u32)); + return err; } @@ -784,7 +798,7 @@ long arch_ptrace(struct task_struct *child, long request, fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -873,7 +887,7 @@ long arch_ptrace(struct task_struct *child, long request, init_fp_ctx(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 40e212d6b26b..4a157d3249ac 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -98,7 +98,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; } fregs = get_fpu_regs(child); - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -205,7 +205,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, sizeof(child->thread.fpu)); child->thread.fpu.fcr31 = 0; } - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 75fdeaa8c62f..9730ba734afe 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 6f534b209971..e12dfa48b478 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* * Either no secondary cache or the available caches don't have the * subset property so we have to flush the primary caches - * explicitly + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. */ - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 651974192c4d..b479926f0167 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -101,7 +101,8 @@ $(addprefix $(obj)/,$(zlib-y)): \ libfdt := fdt.c fdt_ro.c fdt_wip.c fdt_sw.c fdt_rw.c fdt_strerror.c libfdtheader := fdt.h libfdt.h libfdt_internal.h -$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o): \ +$(addprefix $(obj)/,$(libfdt) libfdt-wrapper.o simpleboot.o epapr.o opal.o \ + treeboot-akebono.o treeboot-currituck.o treeboot-iss4xx.o): \ $(addprefix $(obj)/,$(libfdtheader)) src-wlib-y := string.S crt0.S stdio.c decompress.c main.c \ diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index ccf10c2f8899..c3bdd2d8ec90 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -69,6 +69,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -85,16 +106,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -103,21 +127,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -249,6 +277,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1(area, extra, vec) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; \ extra(vec); \ diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..a9b64df34e2a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -199,6 +215,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include <linux/types.h> +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; void apply_feature_fixups(void); diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index eca3f9c68907..5a740feb7bd7 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -337,6 +337,9 @@ #define H_CPU_CHAR_L1D_FLUSH_ORI30 (1ull << 61) // IBM bit 2 #define H_CPU_CHAR_L1D_FLUSH_TRIG2 (1ull << 60) // IBM bit 3 #define H_CPU_CHAR_L1D_THREAD_PRIV (1ull << 59) // IBM bit 4 +#define H_CPU_CHAR_BRANCH_HINTS_HONORED (1ull << 58) // IBM bit 5 +#define H_CPU_CHAR_THREAD_RECONFIG_CTRL (1ull << 57) // IBM bit 6 +#define H_CPU_CHAR_COUNT_CACHE_DISABLED (1ull << 56) // IBM bit 7 #define H_CPU_BEHAV_FAVOUR_SECURITY (1ull << 63) // IBM bit 0 #define H_CPU_BEHAV_L1D_FLUSH_PR (1ull << 62) // IBM bit 1 diff --git a/arch/powerpc/include/asm/irq_work.h b/arch/powerpc/include/asm/irq_work.h index c6d3078bd8c3..b8b0be8f1a07 100644 --- a/arch/powerpc/include/asm/irq_work.h +++ b/arch/powerpc/include/asm/irq_work.h @@ -6,5 +6,6 @@ static inline bool arch_irq_work_has_interrupt(void) { return true; } +extern void arch_irq_work_raise(void); #endif /* _ASM_POWERPC_IRQ_WORK_H */ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index b8366df50d19..e6bd59353e40 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -238,8 +238,7 @@ struct paca_struct { */ u64 exrfi[EX_SIZE] __aligned(0x80); void *rfi_flush_fallback_area; - u64 l1d_flush_congruence; - u64 l1d_flush_sets; + u64 l1d_flush_size; #endif }; diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h new file mode 100644 index 000000000000..44989b22383c --- /dev/null +++ b/arch/powerpc/include/asm/security_features.h @@ -0,0 +1,85 @@ +/* SPDX-License-Identifier: GPL-2.0+ */ +/* + * Security related feature bit definitions. + * + * Copyright 2018, Michael Ellerman, IBM Corporation. + */ + +#ifndef _ASM_POWERPC_SECURITY_FEATURES_H +#define _ASM_POWERPC_SECURITY_FEATURES_H + + +extern unsigned long powerpc_security_features; +extern bool rfi_flush; + +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + +static inline void security_ftr_set(unsigned long feature) +{ + powerpc_security_features |= feature; +} + +static inline void security_ftr_clear(unsigned long feature) +{ + powerpc_security_features &= ~feature; +} + +static inline bool security_ftr_enabled(unsigned long feature) +{ + return !!(powerpc_security_features & feature); +} + + +// Features indicating support for Spectre/Meltdown mitigations + +// The L1-D cache can be flushed with ori r30,r30,0 +#define SEC_FTR_L1D_FLUSH_ORI30 0x0000000000000001ull + +// The L1-D cache can be flushed with mtspr 882,r0 (aka SPRN_TRIG2) +#define SEC_FTR_L1D_FLUSH_TRIG2 0x0000000000000002ull + +// ori r31,r31,0 acts as a speculation barrier +#define SEC_FTR_SPEC_BAR_ORI31 0x0000000000000004ull + +// Speculation past bctr is disabled +#define SEC_FTR_BCCTRL_SERIALISED 0x0000000000000008ull + +// Entries in L1-D are private to a SMT thread +#define SEC_FTR_L1D_THREAD_PRIV 0x0000000000000010ull + +// Indirect branch prediction cache disabled +#define SEC_FTR_COUNT_CACHE_DISABLED 0x0000000000000020ull + + +// Features indicating need for Spectre/Meltdown mitigations + +// The L1-D cache should be flushed on MSR[HV] 1->0 transition (hypervisor to guest) +#define SEC_FTR_L1D_FLUSH_HV 0x0000000000000040ull + +// The L1-D cache should be flushed on MSR[PR] 0->1 transition (kernel to userspace) +#define SEC_FTR_L1D_FLUSH_PR 0x0000000000000080ull + +// A speculation barrier should be used for bounds checks (Spectre variant 1) +#define SEC_FTR_BNDS_CHK_SPEC_BAR 0x0000000000000100ull + +// Firmware configuration indicates user favours security over performance +#define SEC_FTR_FAVOUR_SECURITY 0x0000000000000200ull + + +// Features enabled by default +#define SEC_FTR_DEFAULT \ + (SEC_FTR_L1D_FLUSH_HV | \ + SEC_FTR_L1D_FLUSH_PR | \ + SEC_FTR_BNDS_CHK_SPEC_BAR | \ + SEC_FTR_FAVOUR_SECURITY) + +#endif /* _ASM_POWERPC_SECURITY_FEATURES_H */ diff --git a/arch/powerpc/include/asm/setup.h b/arch/powerpc/include/asm/setup.h index 469b7fdc9be4..bbcdf929be54 100644 --- a/arch/powerpc/include/asm/setup.h +++ b/arch/powerpc/include/asm/setup.h @@ -49,7 +49,7 @@ enum l1d_flush_type { L1D_FLUSH_MTTRIG = 0x8, }; -void __init setup_rfi_flush(enum l1d_flush_type, bool enable); +void setup_rfi_flush(enum l1d_flush_type, bool enable); void do_rfi_flush_fixups(enum l1d_flush_type types); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 6c6cce937dd8..1479c61e29c5 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -42,7 +42,7 @@ obj-$(CONFIG_VDSO32) += vdso32/ obj-$(CONFIG_PPC_WATCHDOG) += watchdog.o obj-$(CONFIG_HAVE_HW_BREAKPOINT) += hw_breakpoint.o obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_ppc970.o cpu_setup_pa6t.o -obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o +obj-$(CONFIG_PPC_BOOK3S_64) += cpu_setup_power.o security.o obj-$(CONFIG_PPC_BOOK3S_64) += mce.o mce_power.o obj-$(CONFIG_PPC_BOOK3E_64) += exceptions-64e.o idle_book3e.o obj-$(CONFIG_PPC64) += vdso64/ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 90c2d96aff6f..a2cb40098d7c 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -240,8 +240,7 @@ int main(void) OFFSET(PACA_IN_NMI, paca_struct, in_nmi); OFFSET(PACA_RFI_FLUSH_FALLBACK_AREA, paca_struct, rfi_flush_fallback_area); OFFSET(PACA_EXRFI, paca_struct, exrfi); - OFFSET(PACA_L1D_FLUSH_CONGRUENCE, paca_struct, l1d_flush_congruence); - OFFSET(PACA_L1D_FLUSH_SETS, paca_struct, l1d_flush_sets); + OFFSET(PACA_L1D_FLUSH_SIZE, paca_struct, l1d_flush_size); #endif OFFSET(PACAHWCPUID, paca_struct, hw_cpu_id); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 679bbe714e85..9daede99c131 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -42,6 +43,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -59,6 +61,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -81,6 +84,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -103,6 +107,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -128,6 +133,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index f047ae1b6271..2dba206b065a 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -137,6 +137,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); + mtspr(SPRN_PCR, 0); } mtspr(SPRN_FSCR, system_registers.fscr); diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f9ca4bb3d48e..c09f0a6f8495 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -825,7 +825,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -901,6 +901,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -909,7 +910,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1434,45 +1436,56 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); rfid @@ -1482,39 +1495,37 @@ TRAMP_REAL_BEGIN(hrfi_flush_fallback) std r9,PACA_EXRFI+EX_R9(r13) std r10,PACA_EXRFI+EX_R10(r13) std r11,PACA_EXRFI+EX_R11(r13) - std r12,PACA_EXRFI+EX_R12(r13) - std r8,PACA_EXRFI+EX_R13(r13) mfctr r9 ld r10,PACA_RFI_FLUSH_FALLBACK_AREA(r13) - ld r11,PACA_L1D_FLUSH_SETS(r13) - ld r12,PACA_L1D_FLUSH_CONGRUENCE(r13) - /* - * The load adresses are at staggered offsets within cachelines, - * which suits some pipelines better (on others it should not - * hurt). - */ - addi r12,r12,8 + ld r11,PACA_L1D_FLUSH_SIZE(r13) + srdi r11,r11,(7 + 3) /* 128 byte lines, unrolled 8x */ mtctr r11 DCBT_STOP_ALL_STREAM_IDS(r11) /* Stop prefetch streams */ /* order ld/st prior to dcbt stop all streams with flushing */ sync -1: li r8,0 - .rept 8 /* 8-way set associative */ - ldx r11,r10,r8 - add r8,r8,r12 - xor r11,r11,r11 // Ensure r11 is 0 even if fallback area is not - add r8,r8,r11 // Add 0, this creates a dependency on the ldx - .endr - addi r10,r10,128 /* 128 byte cache line */ + + /* + * The load adresses are at staggered offsets within cachelines, + * which suits some pipelines better (on others it should not + * hurt). + */ +1: + ld r11,(0x80 + 8)*0(r10) + ld r11,(0x80 + 8)*1(r10) + ld r11,(0x80 + 8)*2(r10) + ld r11,(0x80 + 8)*3(r10) + ld r11,(0x80 + 8)*4(r10) + ld r11,(0x80 + 8)*5(r10) + ld r11,(0x80 + 8)*6(r10) + ld r11,(0x80 + 8)*7(r10) + addi r10,r10,0x80*8 bdnz 1b mtctr r9 ld r9,PACA_EXRFI+EX_R9(r13) ld r10,PACA_EXRFI+EX_R10(r13) ld r11,PACA_EXRFI+EX_R11(r13) - ld r12,PACA_EXRFI+EX_R12(r13) - ld r8,PACA_EXRFI+EX_R13(r13) GET_SCRATCH0(r13); hrfid diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 1125c9be9e06..e35cebd45c35 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -838,6 +838,8 @@ BEGIN_FTR_SECTION mtspr SPRN_PTCR,r4 ld r4,_RPR(r1) mtspr SPRN_RPR,r4 + ld r4,_AMOR(r1) + mtspr SPRN_AMOR,r4 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) ld r4,_TSCR(r1) diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c new file mode 100644 index 000000000000..b98a722da915 --- /dev/null +++ b/arch/powerpc/kernel/security.c @@ -0,0 +1,237 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Security related flags and so on. +// +// Copyright 2018, Michael Ellerman, IBM Corporation. + +#include <linux/kernel.h> +#include <linux/device.h> +#include <linux/seq_buf.h> + +#include <asm/debugfs.h> +#include <asm/security_features.h> + + +unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT; + +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool thread_priv; + + thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV); + + if (rfi_flush || thread_priv) { + struct seq_buf s; + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + seq_buf_printf(&s, "Mitigation: "); + + if (rfi_flush) + seq_buf_printf(&s, "RFI Flush"); + + if (rfi_flush && thread_priv) + seq_buf_printf(&s, ", "); + + if (thread_priv) + seq_buf_printf(&s, "L1D private per thread"); + + seq_buf_printf(&s, "\n"); + + return s.len; + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (!security_ftr_enabled(SEC_FTR_BNDS_CHK_SPEC_BAR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) +{ + bool bcs, ccd, ori; + struct seq_buf s; + + seq_buf_init(&s, buf, PAGE_SIZE - 1); + + bcs = security_ftr_enabled(SEC_FTR_BCCTRL_SERIALISED); + ccd = security_ftr_enabled(SEC_FTR_COUNT_CACHE_DISABLED); + ori = security_ftr_enabled(SEC_FTR_SPEC_BAR_ORI31); + + if (bcs || ccd) { + seq_buf_printf(&s, "Mitigation: "); + + if (bcs) + seq_buf_printf(&s, "Indirect branch serialisation (kernel only)"); + + if (bcs && ccd) + seq_buf_printf(&s, ", "); + + if (ccd) + seq_buf_printf(&s, "Indirect branch cache disabled"); + } else + seq_buf_printf(&s, "Vulnerable"); + + if (ori) + seq_buf_printf(&s, ", ori31 speculation barrier enabled"); + + seq_buf_printf(&s, "\n"); + + return s.len; +} + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index 90bc20efb4c7..b4fcb54b9686 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -242,14 +242,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) unsigned short maj; unsigned short min; - /* We only show online cpus: disable preempt (overzealous, I - * knew) to prevent cpu going down. */ - preempt_disable(); - if (!cpu_online(cpu_id)) { - preempt_enable(); - return 0; - } - #ifdef CONFIG_SMP pvr = per_cpu(cpu_pvr, cpu_id); #else @@ -358,9 +350,6 @@ static int show_cpuinfo(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "\n"); #endif - - preempt_enable(); - /* If this is the last cpu, print the summary */ if (cpumask_next(cpu_id, cpu_online_mask) >= nr_cpu_ids) show_cpuinfo_summary(m); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 9527a4c6cbc2..0618aa61b26a 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -822,9 +822,6 @@ static void do_nothing(void *unused) void rfi_flush_enable(bool enable) { - if (rfi_flush == enable) - return; - if (enable) { do_rfi_flush_fixups(enabled_flush_types); on_each_cpu(do_nothing, NULL, 1); @@ -834,11 +831,15 @@ void rfi_flush_enable(bool enable) rfi_flush = enable; } -static void init_fallback_flush(void) +static void __ref init_fallback_flush(void) { u64 l1d_size, limit; int cpu; + /* Only allocate the fallback flush area once (at boot time). */ + if (l1d_flush_fallback_area) + return; + l1d_size = ppc64_caches.l1d.size; limit = min(safe_stack_limit(), ppc64_rma_size); @@ -851,34 +852,23 @@ static void init_fallback_flush(void) memset(l1d_flush_fallback_area, 0, l1d_size * 2); for_each_possible_cpu(cpu) { - /* - * The fallback flush is currently coded for 8-way - * associativity. Different associativity is possible, but it - * will be treated as 8-way and may not evict the lines as - * effectively. - * - * 128 byte lines are mandatory. - */ - u64 c = l1d_size / 8; - paca[cpu].rfi_flush_fallback_area = l1d_flush_fallback_area; - paca[cpu].l1d_flush_congruence = c; - paca[cpu].l1d_flush_sets = c / 128; + paca[cpu].l1d_flush_size = l1d_size; } } -void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) +void setup_rfi_flush(enum l1d_flush_type types, bool enable) { if (types & L1D_FLUSH_FALLBACK) { - pr_info("rfi-flush: Using fallback displacement flush\n"); + pr_info("rfi-flush: fallback displacement flush available\n"); init_fallback_flush(); } if (types & L1D_FLUSH_ORI) - pr_info("rfi-flush: Using ori type flush\n"); + pr_info("rfi-flush: ori type flush available\n"); if (types & L1D_FLUSH_MTTRIG) - pr_info("rfi-flush: Using mttrig type flush\n"); + pr_info("rfi-flush: mttrig type flush available\n"); enabled_flush_types = types; @@ -889,13 +879,19 @@ void __init setup_rfi_flush(enum l1d_flush_type types, bool enable) #ifdef CONFIG_DEBUG_FS static int rfi_flush_set(void *data, u64 val) { + bool enable; + if (val == 1) - rfi_flush_enable(true); + enable = true; else if (val == 0) - rfi_flush_enable(false); + enable = false; else return -EINVAL; + /* Only do anything if we're changing state */ + if (enable != rfi_flush) + rfi_flush_enable(enable); + return 0; } @@ -914,12 +910,4 @@ static __init int rfi_flush_debugfs_init(void) } device_initcall(rfi_flush_debugfs_init); #endif - -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) -{ - if (rfi_flush) - return sprintf(buf, "Mitigation: RFI Flush\n"); - - return sprintf(buf, "Vulnerable\n"); -} #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index d17007451f62..ac2e5e56a9f0 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -182,6 +182,12 @@ static void oops_end(unsigned long flags, struct pt_regs *regs, } raw_local_irq_restore(flags); + /* + * system_reset_excption handles debugger, crash dump, panic, for 0x100 + */ + if (TRAP(regs) == 0x100) + return; + crash_fadump(regs, "die oops"); if (kexec_should_crash(current)) @@ -246,8 +252,13 @@ void die(const char *str, struct pt_regs *regs, long err) { unsigned long flags; - if (debugger(regs)) - return; + /* + * system_reset_excption handles debugger, crash dump, panic, for 0x100 + */ + if (TRAP(regs) != 0x100) { + if (debugger(regs)) + return; + } flags = oops_begin(regs); if (__die(str, regs, err)) @@ -1379,6 +1390,22 @@ void facility_unavailable_exception(struct pt_regs *regs) value = mfspr(SPRN_FSCR); status = value >> 56; + if ((hv || status >= 2) && + (status < ARRAY_SIZE(facility_strings)) && + facility_strings[status]) + facility = facility_strings[status]; + + /* We should not have taken this interrupt in kernel */ + if (!user_mode(regs)) { + pr_emerg("Facility '%s' unavailable (%d) exception in kernel mode at %lx\n", + facility, status, regs->nip); + die("Unexpected facility unavailable exception", regs, SIGABRT); + } + + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + if (status == FSCR_DSCR_LG) { /* * User is accessing the DSCR register using the problem @@ -1445,25 +1472,11 @@ void facility_unavailable_exception(struct pt_regs *regs) return; } - if ((hv || status >= 2) && - (status < ARRAY_SIZE(facility_strings)) && - facility_strings[status]) - facility = facility_strings[status]; - - /* We restore the interrupt state now */ - if (!arch_irq_disabled_regs(regs)) - local_irq_enable(); - pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n", hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr); out: - if (user_mode(regs)) { - _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); - return; - } - - die("Unexpected facility unavailable exception", regs, SIGABRT); + _exception(SIGILL, regs, ILL_ILLOPC, regs->nip); } #endif diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 307843d23682..c89ffb88fa3b 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -134,6 +134,20 @@ SECTIONS #ifdef CONFIG_PPC64 . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; *(__rfi_flush_fixup) diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index d0c0b8443dcf..762a899e85a4 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/security_features.h> #include <asm/firmware.h> struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; @@ -153,7 +268,14 @@ void do_rfi_flush_fixups(enum l1d_flush_type types) patch_instruction(dest + 2, instrs[2]); } - printk(KERN_DEBUG "rfi-flush: patched %d locations\n", i); + printk(KERN_DEBUG "rfi-flush: patched %d locations (%s flush)\n", i, + (types == L1D_FLUSH_NONE) ? "no" : + (types == L1D_FLUSH_FALLBACK) ? "fallback displacement" : + (types & L1D_FLUSH_ORI) ? (types & L1D_FLUSH_MTTRIG) + ? "ori+mttrig type" + : "ori type" : + (types & L1D_FLUSH_MTTRIG) ? "mttrig type" + : "unknown"); } #endif /* CONFIG_PPC_BOOK3S_64 */ diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index f9941b3b5770..f760494ecd66 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -329,6 +329,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, BUILD_BUG_ON(FIELD_SIZEOF(struct sk_buff, len) != 4); PPC_LWZ_OFFS(r_A, r_skb, offsetof(struct sk_buff, len)); break; + case BPF_LDX | BPF_W | BPF_ABS: /* A = *((u32 *)(seccomp_data + K)); */ + PPC_LWZ_OFFS(r_A, r_skb, K); + break; case BPF_LDX | BPF_W | BPF_LEN: /* X = skb->len; */ PPC_LWZ_OFFS(r_X, r_skb, offsetof(struct sk_buff, len)); break; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index fce545774d50..b7a6044161e8 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -457,6 +457,16 @@ static void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) /* invalid entry */ continue; + /* + * BHRB rolling buffer could very much contain the kernel + * addresses at this point. Check the privileges before + * exporting it to userspace (avoid exposure of regions + * where we could have speculative execution) + */ + if (perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN) && + is_kernel_addr(addr)) + continue; + /* Branches are read most recent first (ie. mfbhrb 0 is * the most recent branch). * There are two types of valid entries: @@ -1226,6 +1236,7 @@ static void power_pmu_disable(struct pmu *pmu) */ write_mmcr0(cpuhw, val); mb(); + isync(); /* * Disable instruction sampling if it was enabled @@ -1234,12 +1245,26 @@ static void power_pmu_disable(struct pmu *pmu) mtspr(SPRN_MMCRA, cpuhw->mmcr[2] & ~MMCRA_SAMPLE_ENABLE); mb(); + isync(); } cpuhw->disabled = 1; cpuhw->n_added = 0; ebb_switch_out(mmcr0); + +#ifdef CONFIG_PPC64 + /* + * These are readable by userspace, may contain kernel + * addresses and are not switched by context switch, so clear + * them now to avoid leaking anything to userspace in general + * including to another process. + */ + if (ppmu->flags & PPMU_ARCH_207S) { + mtspr(SPRN_SDAR, 0); + mtspr(SPRN_SIAR, 0); + } +#endif } local_irq_restore(flags); diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 4043109f4051..63f007f2de7e 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -413,6 +413,11 @@ struct npu_context { void *priv; }; +struct mmio_atsd_reg { + struct npu *npu; + int reg; +}; + /* * Find a free MMIO ATSD register and mark it in use. Return -ENOSPC * if none are available. @@ -422,7 +427,7 @@ static int get_mmio_atsd_reg(struct npu *npu) int i; for (i = 0; i < npu->mmio_atsd_count; i++) { - if (!test_and_set_bit(i, &npu->mmio_atsd_usage)) + if (!test_and_set_bit_lock(i, &npu->mmio_atsd_usage)) return i; } @@ -431,86 +436,90 @@ static int get_mmio_atsd_reg(struct npu *npu) static void put_mmio_atsd_reg(struct npu *npu, int reg) { - clear_bit(reg, &npu->mmio_atsd_usage); + clear_bit_unlock(reg, &npu->mmio_atsd_usage); } /* MMIO ATSD register offsets */ #define XTS_ATSD_AVA 1 #define XTS_ATSD_STAT 2 -static int mmio_launch_invalidate(struct npu *npu, unsigned long launch, - unsigned long va) +static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg, + unsigned long launch, unsigned long va) { - int mmio_atsd_reg; - - do { - mmio_atsd_reg = get_mmio_atsd_reg(npu); - cpu_relax(); - } while (mmio_atsd_reg < 0); + struct npu *npu = mmio_atsd_reg->npu; + int reg = mmio_atsd_reg->reg; __raw_writeq(cpu_to_be64(va), - npu->mmio_atsd_regs[mmio_atsd_reg] + XTS_ATSD_AVA); + npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA); eieio(); - __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[mmio_atsd_reg]); - - return mmio_atsd_reg; + __raw_writeq(cpu_to_be64(launch), npu->mmio_atsd_regs[reg]); } -static int mmio_invalidate_pid(struct npu *npu, unsigned long pid, bool flush) +static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate matching PID */ - launch = PPC_BIT(12); + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* IS set to invalidate matching PID */ + launch = PPC_BIT(12); - /* PRS set to process-scoped */ - launch |= PPC_BIT(13); + /* PRS set to process-scoped */ + launch |= PPC_BIT(13); - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); - /* Invalidating the entire process doesn't use a va */ - return mmio_launch_invalidate(npu, launch, 0); + /* Invalidating the entire process doesn't use a va */ + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0); + } } -static int mmio_invalidate_va(struct npu *npu, unsigned long va, - unsigned long pid, bool flush) +static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], + unsigned long va, unsigned long pid, bool flush) { + int i; unsigned long launch; - /* IS set to invalidate target VA */ - launch = 0; + for (i = 0; i <= max_npu2_index; i++) { + if (mmio_atsd_reg[i].reg < 0) + continue; + + /* IS set to invalidate target VA */ + launch = 0; - /* PRS set to process scoped */ - launch |= PPC_BIT(13); + /* PRS set to process scoped */ + launch |= PPC_BIT(13); - /* AP */ - launch |= (u64) mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); + /* AP */ + launch |= (u64) + mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17); - /* PID */ - launch |= pid << PPC_BITLSHIFT(38); + /* PID */ + launch |= pid << PPC_BITLSHIFT(38); - /* No flush */ - launch |= !flush << PPC_BITLSHIFT(39); + /* No flush */ + launch |= !flush << PPC_BITLSHIFT(39); - return mmio_launch_invalidate(npu, launch, va); + mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va); + } } #define mn_to_npu_context(x) container_of(x, struct npu_context, mn) -struct mmio_atsd_reg { - struct npu *npu; - int reg; -}; - static void mmio_invalidate_wait( - struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS], bool flush) + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) { struct npu *npu; int i, reg; @@ -525,16 +534,67 @@ static void mmio_invalidate_wait( reg = mmio_atsd_reg[i].reg; while (__raw_readq(npu->mmio_atsd_regs[reg] + XTS_ATSD_STAT)) cpu_relax(); + } +} + +/* + * Acquires all the address translation shootdown (ATSD) registers required to + * launch an ATSD on all links this npu_context is active on. + */ +static void acquire_atsd_reg(struct npu_context *npu_context, + struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i, j; + struct npu *npu; + struct pci_dev *npdev; + struct pnv_phb *nphb; - put_mmio_atsd_reg(npu, reg); + for (i = 0; i <= max_npu2_index; i++) { + mmio_atsd_reg[i].reg = -1; + for (j = 0; j < NV_MAX_LINKS; j++) { + /* + * There are no ordering requirements with respect to + * the setup of struct npu_context, but to ensure + * consistent behaviour we need to ensure npdev[][] is + * only read once. + */ + npdev = READ_ONCE(npu_context->npdev[i][j]); + if (!npdev) + continue; + nphb = pci_bus_to_host(npdev->bus)->private_data; + npu = &nphb->npu; + mmio_atsd_reg[i].npu = npu; + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + while (mmio_atsd_reg[i].reg < 0) { + mmio_atsd_reg[i].reg = get_mmio_atsd_reg(npu); + cpu_relax(); + } + break; + } + } +} + +/* + * Release previously acquired ATSD registers. To avoid deadlocks the registers + * must be released in the same order they were acquired above in + * acquire_atsd_reg. + */ +static void release_atsd_reg(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]) +{ + int i; + + for (i = 0; i <= max_npu2_index; i++) { /* - * The GPU requires two flush ATSDs to ensure all entries have - * been flushed. We use PID 0 as it will never be used for a - * process on the GPU. + * We can't rely on npu_context->npdev[][] being the same here + * as when acquire_atsd_reg() was called, hence we use the + * values stored in mmio_atsd_reg during the acquire phase + * rather than re-reading npdev[][]. */ - if (flush) - mmio_invalidate_pid(npu, 0, true); + if (mmio_atsd_reg[i].reg < 0) + continue; + + put_mmio_atsd_reg(mmio_atsd_reg[i].npu, mmio_atsd_reg[i].reg); } } @@ -545,10 +605,6 @@ static void mmio_invalidate_wait( static void mmio_invalidate(struct npu_context *npu_context, int va, unsigned long address, bool flush) { - int i, j; - struct npu *npu; - struct pnv_phb *nphb; - struct pci_dev *npdev; struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS]; unsigned long pid = npu_context->mm->context.id; @@ -562,37 +618,25 @@ static void mmio_invalidate(struct npu_context *npu_context, int va, * Loop over all the NPUs this process is active on and launch * an invalidate. */ - for (i = 0; i <= max_npu2_index; i++) { - mmio_atsd_reg[i].reg = -1; - for (j = 0; j < NV_MAX_LINKS; j++) { - npdev = npu_context->npdev[i][j]; - if (!npdev) - continue; - - nphb = pci_bus_to_host(npdev->bus)->private_data; - npu = &nphb->npu; - mmio_atsd_reg[i].npu = npu; - - if (va) - mmio_atsd_reg[i].reg = - mmio_invalidate_va(npu, address, pid, - flush); - else - mmio_atsd_reg[i].reg = - mmio_invalidate_pid(npu, pid, flush); - - /* - * The NPU hardware forwards the shootdown to all GPUs - * so we only have to launch one shootdown per NPU. - */ - break; - } + acquire_atsd_reg(npu_context, mmio_atsd_reg); + if (va) + mmio_invalidate_va(mmio_atsd_reg, address, pid, flush); + else + mmio_invalidate_pid(mmio_atsd_reg, pid, flush); + + mmio_invalidate_wait(mmio_atsd_reg); + if (flush) { + /* + * The GPU requires two flush ATSDs to ensure all entries have + * been flushed. We use PID 0 as it will never be used for a + * process on the GPU. + */ + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); + mmio_invalidate_pid(mmio_atsd_reg, 0, true); + mmio_invalidate_wait(mmio_atsd_reg); } - - mmio_invalidate_wait(mmio_atsd_reg, flush); - if (flush) - /* Wait for the flush to complete */ - mmio_invalidate_wait(mmio_atsd_reg, false); + release_atsd_reg(mmio_atsd_reg); } static void pnv_npu2_mn_release(struct mmu_notifier *mn, @@ -735,7 +779,16 @@ struct npu_context *pnv_npu2_init_context(struct pci_dev *gpdev, if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return ERR_PTR(-ENODEV); - npu_context->npdev[npu->index][nvlink_index] = npdev; + + /* + * npdev is a pci_dev pointer setup by the PCI code. We assign it to + * npdev[][] to indicate to the mmu notifiers that an invalidation + * should also be sent over this nvlink. The notifiers don't use any + * other fields in npu_context, so we just need to ensure that when they + * deference npu_context->npdev[][] it is either a valid pointer or + * NULL. + */ + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], npdev); return npu_context; } @@ -774,7 +827,7 @@ void pnv_npu2_destroy_context(struct npu_context *npu_context, if (WARN_ON(of_property_read_u32(nvlink_dn, "ibm,npu-link-index", &nvlink_index))) return; - npu_context->npdev[npu->index][nvlink_index] = NULL; + WRITE_ONCE(npu_context->npdev[npu->index][nvlink_index], NULL); opal_npu_destroy_context(nphb->opal_id, npu_context->mm->context.id, PCI_DEVID(gpdev->bus->number, gpdev->devfn)); kref_put(&npu_context->kref, pnv_npu2_release_context); diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 1bceb95f422d..5584247f5029 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) return count; } +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) { s64 rc = OPAL_BUSY; @@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); if (rc == OPAL_BUSY_EVENT) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); } else if (rc == OPAL_BUSY) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 7966a314d93a..fd143c934768 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -37,53 +37,92 @@ #include <asm/kexec.h> #include <asm/smp.h> #include <asm/setup.h> +#include <asm/security_features.h> #include "powernv.h" + +static bool fw_feature_is(const char *state, const char *name, + struct device_node *fw_features) +{ + struct device_node *np; + bool rc = false; + + np = of_get_child_by_name(fw_features, name); + if (np) { + rc = of_property_read_bool(np, state); + of_node_put(np); + } + + return rc; +} + +static void init_fw_feat_flags(struct device_node *np) +{ + if (fw_feature_is("enabled", "inst-spec-barrier-ori31,31,0", np)) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (fw_feature_is("enabled", "fw-bcctrl-serialized", np)) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (fw_feature_is("enabled", "inst-l1d-flush-ori30,30,0", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (fw_feature_is("enabled", "inst-l1d-flush-trig2", np)) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (fw_feature_is("enabled", "fw-l1d-thread-split", np)) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (fw_feature_is("enabled", "fw-count-cache-disabled", np)) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (fw_feature_is("disabled", "speculation-policy-favor-security", np)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-pr-0-to-1", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (fw_feature_is("disabled", "needs-l1d-flush-msr-hv-1-to-0", np)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); + + if (fw_feature_is("disabled", "needs-spec-barrier-for-bound-checks", np)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + static void pnv_setup_rfi_flush(void) { struct device_node *np, *fw_features; enum l1d_flush_type type; - int enable; + bool enable; /* Default to fallback in case fw-features are not available */ type = L1D_FLUSH_FALLBACK; - enable = 1; np = of_find_node_by_name(NULL, "ibm,opal"); fw_features = of_get_child_by_name(np, "fw-features"); of_node_put(np); if (fw_features) { - np = of_get_child_by_name(fw_features, "inst-l1d-flush-trig2"); - if (np && of_property_read_bool(np, "enabled")) - type = L1D_FLUSH_MTTRIG; + init_fw_feat_flags(fw_features); + of_node_put(fw_features); - of_node_put(np); + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + type = L1D_FLUSH_MTTRIG; - np = of_get_child_by_name(fw_features, "inst-l1d-flush-ori30,30,0"); - if (np && of_property_read_bool(np, "enabled")) + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) type = L1D_FLUSH_ORI; - - of_node_put(np); - - /* Enable unless firmware says NOT to */ - enable = 2; - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-hv-1-to-0"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - of_node_put(np); - - np = of_get_child_by_name(fw_features, "needs-l1d-flush-msr-pr-0-to-1"); - if (np && of_property_read_bool(np, "disabled")) - enable--; - - of_node_put(np); - of_node_put(fw_features); } - setup_rfi_flush(type, enable > 0); + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV)); + + setup_rfi_flush(type, enable); } static void __init pnv_setup_arch(void) @@ -91,6 +130,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c index f7042ad492ba..fbea7db043fa 100644 --- a/arch/powerpc/platforms/pseries/mobility.c +++ b/arch/powerpc/platforms/pseries/mobility.c @@ -348,6 +348,9 @@ void post_mobility_fixup(void) printk(KERN_ERR "Post-mobility device tree update " "failed: %d\n", rc); + /* Possibly switch to a new RFI flush type */ + pseries_setup_rfi_flush(); + return; } diff --git a/arch/powerpc/platforms/pseries/pseries.h b/arch/powerpc/platforms/pseries/pseries.h index 1ae1d9f4dbe9..27cdcb69fd18 100644 --- a/arch/powerpc/platforms/pseries/pseries.h +++ b/arch/powerpc/platforms/pseries/pseries.h @@ -100,4 +100,6 @@ static inline unsigned long cmo_get_page_size(void) int dlpar_workqueue_init(void); +void pseries_setup_rfi_flush(void); + #endif /* _PSERIES_PSERIES_H */ diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index ae4f596273b5..45f814041448 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -68,6 +68,7 @@ #include <asm/plpar_wrappers.h> #include <asm/kexec.h> #include <asm/isa-bridge.h> +#include <asm/security_features.h> #include "pseries.h" @@ -459,35 +460,78 @@ static void __init find_and_init_phbs(void) of_pci_check_probe_only(); } -static void pseries_setup_rfi_flush(void) +static void init_cpu_char_feature_flags(struct h_cpu_char_result *result) +{ + /* + * The features below are disabled by default, so we instead look to see + * if firmware has *enabled* them, and set them if so. + */ + if (result->character & H_CPU_CHAR_SPEC_BAR_ORI31) + security_ftr_set(SEC_FTR_SPEC_BAR_ORI31); + + if (result->character & H_CPU_CHAR_BCCTRL_SERIALISED) + security_ftr_set(SEC_FTR_BCCTRL_SERIALISED); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_ORI30) + security_ftr_set(SEC_FTR_L1D_FLUSH_ORI30); + + if (result->character & H_CPU_CHAR_L1D_FLUSH_TRIG2) + security_ftr_set(SEC_FTR_L1D_FLUSH_TRIG2); + + if (result->character & H_CPU_CHAR_L1D_THREAD_PRIV) + security_ftr_set(SEC_FTR_L1D_THREAD_PRIV); + + if (result->character & H_CPU_CHAR_COUNT_CACHE_DISABLED) + security_ftr_set(SEC_FTR_COUNT_CACHE_DISABLED); + + /* + * The features below are enabled by default, so we instead look to see + * if firmware has *disabled* them, and clear them if so. + */ + if (!(result->behaviour & H_CPU_BEHAV_FAVOUR_SECURITY)) + security_ftr_clear(SEC_FTR_FAVOUR_SECURITY); + + if (!(result->behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) + security_ftr_clear(SEC_FTR_L1D_FLUSH_PR); + + if (!(result->behaviour & H_CPU_BEHAV_BNDS_CHK_SPEC_BAR)) + security_ftr_clear(SEC_FTR_BNDS_CHK_SPEC_BAR); +} + +void pseries_setup_rfi_flush(void) { struct h_cpu_char_result result; enum l1d_flush_type types; bool enable; long rc; - /* Enable by default */ - enable = true; + /* + * Set features to the defaults assumed by init_cpu_char_feature_flags() + * so it can set/clear again any features that might have changed after + * migration, and in case the hypercall fails and it is not even called. + */ + powerpc_security_features = SEC_FTR_DEFAULT; rc = plpar_get_cpu_characteristics(&result); - if (rc == H_SUCCESS) { - types = L1D_FLUSH_NONE; + if (rc == H_SUCCESS) + init_cpu_char_feature_flags(&result); - if (result.character & H_CPU_CHAR_L1D_FLUSH_TRIG2) - types |= L1D_FLUSH_MTTRIG; - if (result.character & H_CPU_CHAR_L1D_FLUSH_ORI30) - types |= L1D_FLUSH_ORI; + /* + * We're the guest so this doesn't apply to us, clear it to simplify + * handling of it elsewhere. + */ + security_ftr_clear(SEC_FTR_L1D_FLUSH_HV); - /* Use fallback if nothing set in hcall */ - if (types == L1D_FLUSH_NONE) - types = L1D_FLUSH_FALLBACK; + types = L1D_FLUSH_FALLBACK; - if (!(result.behaviour & H_CPU_BEHAV_L1D_FLUSH_PR)) - enable = false; - } else { - /* Default to fallback if case hcall is not available */ - types = L1D_FLUSH_FALLBACK; - } + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_TRIG2)) + types |= L1D_FLUSH_MTTRIG; + + if (security_ftr_enabled(SEC_FTR_L1D_FLUSH_ORI30)) + types |= L1D_FLUSH_ORI; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && \ + security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR); setup_rfi_flush(types, enable); } @@ -510,6 +554,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index ead3e2549ebf..205dec18d6b5 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -626,7 +626,7 @@ static inline u32 mpic_physmask(u32 cpumask) int i; u32 mask = 0; - for (i = 0; i < min(32, NR_CPUS); ++i, cpumask >>= 1) + for (i = 0; i < min(32, NR_CPUS) && cpu_possible(i); ++i, cpumask >>= 1) mask |= (cpumask & 1) << get_hard_smp_processor_id(i); return mask; } diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 2c8b325591cc..a5938fadd031 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -2348,6 +2348,8 @@ static void dump_one_paca(int cpu) DUMP(p, slb_cache_ptr, "x"); for (i = 0; i < SLB_CACHE_ENTRIES; i++) printf(" slb_cache[%d]: = 0x%016lx\n", i, p->slb_cache[i]); + + DUMP(p, rfi_flush_fallback_area, "px"); #endif DUMP(p, dscr_default, "llx"); #ifdef CONFIG_PPC_BOOK3E diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index e8077f0971f8..2bf01ba44107 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include <asm/vx-insn.h> /* Vector register range containing CRC-32 constants */ @@ -67,6 +68,8 @@ .previous + GEN_BR_THUNK %r14 + .text /* * The CRC-32 function(s) use these calling conventions: @@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16) .Ldone: VLGVF %r2,%v2,3 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index d8c67a58c0c5..7d6f568bd3ad 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include <asm/vx-insn.h> /* Vector register range containing CRC-32 constants */ @@ -76,6 +77,7 @@ .previous + GEN_BR_THUNK %r14 .text @@ -264,6 +266,6 @@ crc32_le_vgfm_generic: .Ldone: VLGVF %r2,%v2,2 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/include/asm/alternative-asm.h b/arch/s390/include/asm/alternative-asm.h new file mode 100644 index 000000000000..955d620db23e --- /dev/null +++ b/arch/s390/include/asm/alternative-asm.h @@ -0,0 +1,108 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_ALTERNATIVE_ASM_H +#define _ASM_S390_ALTERNATIVE_ASM_H + +#ifdef __ASSEMBLY__ + +/* + * Check the length of an instruction sequence. The length may not be larger + * than 254 bytes and it has to be divisible by 2. + */ +.macro alt_len_check start,end + .if ( \end - \start ) > 254 + .error "cpu alternatives does not support instructions blocks > 254 bytes\n" + .endif + .if ( \end - \start ) % 2 + .error "cpu alternatives instructions length is odd\n" + .endif +.endm + +/* + * Issue one struct alt_instr descriptor entry (need to put it into + * the section .altinstructions, see below). This entry contains + * enough information for the alternatives patching code to patch an + * instruction. See apply_alternatives(). + */ +.macro alt_entry orig_start, orig_end, alt_start, alt_end, feature + .long \orig_start - . + .long \alt_start - . + .word \feature + .byte \orig_end - \orig_start + .byte \alt_end - \alt_start +.endm + +/* + * Fill up @bytes with nops. The macro emits 6-byte nop instructions + * for the bulk of the area, possibly followed by a 4-byte and/or + * a 2-byte nop if the size of the area is not divisible by 6. + */ +.macro alt_pad_fill bytes + .fill ( \bytes ) / 6, 6, 0xc0040000 + .fill ( \bytes ) % 6 / 4, 4, 0x47000000 + .fill ( \bytes ) % 6 % 4 / 2, 2, 0x0700 +.endm + +/* + * Fill up @bytes with nops. If the number of bytes is larger + * than 6, emit a jg instruction to branch over all nops, then + * fill an area of size (@bytes - 6) with nop instructions. + */ +.macro alt_pad bytes + .if ( \bytes > 0 ) + .if ( \bytes > 6 ) + jg . + \bytes + alt_pad_fill \bytes - 6 + .else + alt_pad_fill \bytes + .endif + .endif +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE oldinstr, newinstr, feature + .pushsection .altinstr_replacement,"ax" +770: \newinstr +771: .popsection +772: \oldinstr +773: alt_len_check 770b, 771b + alt_len_check 772b, 773b + alt_pad ( ( 771b - 770b ) - ( 773b - 772b ) ) +774: .pushsection .altinstructions,"a" + alt_entry 772b, 774b, 770b, 771b, \feature + .popsection +.endm + +/* + * Define an alternative between two instructions. If @feature is + * present, early code in apply_alternatives() replaces @oldinstr with + * @newinstr. ".skip" directive takes care of proper instruction padding + * in case @newinstr is longer than @oldinstr. + */ +.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2 + .pushsection .altinstr_replacement,"ax" +770: \newinstr1 +771: \newinstr2 +772: .popsection +773: \oldinstr +774: alt_len_check 770b, 771b + alt_len_check 771b, 772b + alt_len_check 773b, 774b + .if ( 771b - 770b > 772b - 771b ) + alt_pad ( ( 771b - 770b ) - ( 774b - 773b ) ) + .else + alt_pad ( ( 772b - 771b ) - ( 774b - 773b ) ) + .endif +775: .pushsection .altinstructions,"a" + alt_entry 773b, 775b, 770b, 771b,\feature1 + alt_entry 773b, 775b, 771b, 772b,\feature2 + .popsection +.endm + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_ALTERNATIVE_ASM_H */ diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h new file mode 100644 index 000000000000..9a56e738d645 --- /dev/null +++ b/arch/s390/include/asm/nospec-insn.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_NOSPEC_ASM_H +#define _ASM_S390_NOSPEC_ASM_H + +#include <asm/alternative-asm.h> +#include <asm/asm-offsets.h> + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_EXPOLINE + +_LC_BR_R1 = __LC_BR_R1 + +/* + * The expoline macros are used to create thunks in the same format + * as gcc generates them. The 'comdat' section flag makes sure that + * the various thunks are merged into a single copy. + */ + .macro __THUNK_PROLOG_NAME name + .pushsection .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + .cfi_startproc + .endm + + .macro __THUNK_EPILOG + .cfi_endproc + .popsection + .endm + + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_PROLOG_BC d0,r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_BC d0,r1,r2 + jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2 + .endm + + .macro __DECODE_RR expand,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RR failed" + .endif + .endm + + .macro __DECODE_RRR expand,rsave,rtarget,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rtarget,%r\r2 + .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r3 + \expand \r1,\r2,\r3 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RRR failed" + .endif + .endm + + .macro __DECODE_DRR expand,disp,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \disp,\r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_DRR failed" + .endif + .endm + + .macro __THUNK_EX_BR reg,ruse + # Be very careful when adding instructions to this macro! + # The ALTERNATIVE replacement code has a .+10 which targets + # the "br \reg" after the code has been patched. +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,555f + j . +#else + .ifc \reg,%r1 + ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 + j . + .else + larl \ruse,555f + ex 0,0(\ruse) + j . + .endif +#endif +555: br \reg + .endm + + .macro __THUNK_EX_BC disp,reg,ruse +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,556f + j . +#else + larl \ruse,556f + ex 0,0(\ruse) + j . +#endif +556: b \disp(\reg) + .endm + + .macro GEN_BR_THUNK reg,ruse=%r1 + __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse + __THUNK_EX_BR \reg,\ruse + __THUNK_EPILOG + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse + __THUNK_EX_BC \disp,\reg,\ruse + __THUNK_EPILOG + .endm + + .macro BR_EX reg,ruse=%r1 +557: __DECODE_RR __THUNK_BR,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 557b-. + .popsection + .endm + + .macro B_EX disp,reg,ruse=%r1 +558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 558b-. + .popsection + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 +559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 559b-. + .popsection + .endm + +#else + .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + .endm + + .macro BR_EX reg,ruse=%r1 + br \reg + .endm + + .macro B_EX disp,reg,ruse=%r1 + b \disp(\reg) + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 + basr \rsave,\rtarget + .endm +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_NOSPEC_ASM_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index a3a4cafb6080..e0784fff07f5 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -63,6 +63,7 @@ obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index 0e6d2b032484..4e69bf909e87 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -177,6 +177,7 @@ int main(void) OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); OFFSET(__LC_PASTE, lowcore, paste); + OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f6c56009e822..b65874b0b412 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -9,18 +9,22 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/ptrace.h> #include <asm/sigp.h> + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_mcck_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: la %r1,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) lpswe __LC_MCK_OLD_PSW @@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -117,7 +121,7 @@ ENTRY(diag308_reset) larl %r4,.Lcontinue_psw # Restore PSW flags lpswe 0(%r4) .Lcontinue: - br %r14 + BR_EX %r14 .align 16 .Lrestart_psw: .long 0x00080000,0x80000000 + .Lrestart_part2 diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index ed9aaa212d4a..be20b1f73384 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -25,6 +25,7 @@ #include <asm/setup.h> #include <asm/nmi.h> #include <asm/export.h> +#include <asm/nospec-insn.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -221,67 +222,9 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) .popsection .endm -#ifdef CONFIG_EXPOLINE - - .macro GEN_BR_THUNK name,reg,tmp - .section .text.\name,"axG",@progbits,\name,comdat - .globl \name - .hidden \name - .type \name,@function -\name: - .cfi_startproc -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,0f -#else - larl \tmp,0f - ex 0,0(\tmp) -#endif - j . -0: br \reg - .cfi_endproc - .endm - - GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 - - .macro BASR_R14_R9 -0: brasl %r14,__s390x_indirect_jump_r1use_r9 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R1USE_R14 -0: jg __s390x_indirect_jump_r1use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R11USE_R14 -0: jg __s390x_indirect_jump_r11use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - -#else /* CONFIG_EXPOLINE */ - - .macro BASR_R14_R9 - basr %r14,%r9 - .endm - - .macro BR_R1USE_R14 - br %r14 - .endm - - .macro BR_R11USE_R14 - br %r14 - .endm - -#endif /* CONFIG_EXPOLINE */ - + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: @@ -298,7 +241,7 @@ _PIF_WORK = (_PIF_PER_TRAP | _PIF_SYSCALL_RESTART) ENTRY(__bpon) .globl __bpon BPON - BR_R1USE_R14 + BR_EX %r14 /* * Scheduler resume function, called by switch_to @@ -325,7 +268,7 @@ ENTRY(__switch_to) TSTMSK __LC_MACHINE_FLAGS,MACHINE_FLAG_LPP jz 0f .insn s,0xb2800000,__LC_LPP # set program parameter -0: BR_R1USE_R14 +0: BR_EX %r14 .L__critical_start: @@ -392,7 +335,7 @@ sie_exit: xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_EMPTY+16(%r15) # return exit reason code - BR_R1USE_R14 + BR_EX %r14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_EMPTY+16(%r15) # set exit reason code @@ -451,7 +394,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - BASR_R14_R9 # call sys_xxxx + BASR_EX %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -628,7 +571,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - BASR_R14_R9 # call sys_xxx + BASR_EX %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -652,7 +595,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - BASR_R14_R9 + BASR_EX %r14,%r9 j .Lsysc_tracenogo /* @@ -731,7 +674,7 @@ ENTRY(pgm_check_handler) je .Lpgm_return lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - BASR_R14_R9 # branch to interrupt-handler + BASR_EX %r14,%r9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -1041,7 +984,7 @@ ENTRY(psw_idle) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - BR_R1USE_R14 + BR_EX %r14 .Lpsw_idle_end: /* @@ -1083,7 +1026,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lsave_fpu_regs_end: EXPORT_SYMBOL(save_fpu_regs) @@ -1129,7 +1072,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lload_fpu_regs_end: .L__critical_end: @@ -1301,7 +1244,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_R11USE_R14 +0: BR_EX %r14 .align 8 .Lcleanup_table: @@ -1337,7 +1280,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_R11USE_R14 + BR_EX %r14 #endif .Lcleanup_system_call: @@ -1391,7 +1334,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1403,7 +1346,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1420,14 +1363,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore: # check if stpt has been executed @@ -1441,7 +1384,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1494,17 +1437,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 /* * Integer constants diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 94f2099bceb0..3d17c41074ca 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -176,10 +176,9 @@ void do_softirq_own_stack(void) new -= STACK_FRAME_OVERHEAD; ((struct stack_frame *) new)->back_chain = old; asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" + " brasl 14,__do_softirq\n" " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) + : : "a" (new), "a" (old) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); } else { diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 82df7d80fab2..27110f3294ed 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,13 +9,17 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#include <asm/nospec-insn.h> #include <asm/ptrace.h> #include <asm/export.h> + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + .section .kprobes.text, "ax" ENTRY(ftrace_stub) - br %r14 + BR_EX %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -23,7 +27,7 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) - br %r14 + BR_EX %r14 EXPORT_SYMBOL(_mcount) @@ -53,7 +57,7 @@ ENTRY(ftrace_caller) #endif lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r1 + BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. @@ -68,7 +72,7 @@ ftrace_graph_caller_end: #endif lg %r1,(STACK_PTREGS_PSW+8)(%r15) lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) - br %r1 + BR_EX %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -81,6 +85,6 @@ ENTRY(return_to_handler) aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) - br %r14 + BR_EX %r14 #endif diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 9f3b5b382743..d5eed651b5ab 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -44,24 +44,6 @@ static int __init nospec_report(void) } arch_initcall(nospec_report); -#ifdef CONFIG_SYSFS -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) - return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) - return sprintf(buf, "Mitigation: limited branch prediction.\n"); - return sprintf(buf, "Vulnerable\n"); -} -#endif - #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); @@ -112,7 +94,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ - memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -133,18 +114,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + /* Check for unconditional branch 0x07f? or 0x47f???? */ + if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) continue; + + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4); switch (type) { case BRCL_EXPOLINE: - /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brcl to b, replace with bc + nopr */ + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brcl to br, replace with bcr + nop */ + } break; case BRASL_EXPOLINE: - /* brasl to thunk, replace with basr + nop */ - insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brasl to b, replace with bas + nopr */ + insnbuf[0] = 0x4d; + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brasl to br, replace with basr + nop */ + insnbuf[0] = 0x0d; + } break; } diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c new file mode 100644 index 000000000000..8affad5f18cb --- /dev/null +++ b/arch/s390/kernel/nospec-sysfs.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/device.h> +#include <linux/cpu.h> +#include <asm/facility.h> +#include <asm/nospec-branch.h> + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction\n"); + return sprintf(buf, "Vulnerable\n"); +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 7e1e40323b78..d99155793c26 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -739,6 +739,10 @@ static int __hw_perf_event_init(struct perf_event *event) */ rate = 0; if (attr->freq) { + if (!attr->sample_freq) { + err = -EINVAL; + goto out; + } rate = freq_to_sample_rate(&si, attr->sample_freq); rate = hw_limit_rate(&si, rate); attr->freq = 0; diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index a40ebd1d29d0..8e954c102639 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -7,8 +7,11 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/sigp.h> + GEN_BR_THUNK %r9 + # # Issue "store status" for the current CPU to its prefix page # and call passed function afterwards @@ -67,9 +70,9 @@ ENTRY(store_status) st %r4,0(%r1) st %r5,4(%r1) stg %r2,8(%r1) - lgr %r1,%r2 + lgr %r9,%r2 lgr %r2,%r3 - br %r1 + BR_EX %r9 .section .bss .align 8 diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index e99187149f17..a049a7b9d6e8 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -13,6 +13,7 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/sigp.h> /* @@ -24,6 +25,8 @@ * (see below) in the resume process. * This function runs with disabled interrupts. */ + GEN_BR_THUNK %r14 + .section .text ENTRY(swsusp_arch_suspend) stmg %r6,%r15,__SF_GPRS(%r15) @@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend) spx 0x318(%r1) lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 /* * Restore saved memory image to correct place and restore register context. @@ -197,11 +200,10 @@ pgm_check_entry: larl %r15,init_thread_union ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) larl %r2,.Lpanic_string - larl %r3,sclp_early_printk lghi %r1,0 sam31 sigp %r1,%r0,SIGP_SET_ARCHITECTURE - basr %r14,%r3 + brasl %r14,sclp_early_printk larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: @@ -267,7 +269,7 @@ restore_registers: /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 .section .data..nosave,"aw",@progbits .align 8 diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index eb7b530d1783..4f1f5fc8139d 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -590,7 +590,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; if (gpa && (scb_s->ecb & ECB_TE)) { - if (!(gpa & ~0x1fffU)) { + if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index d66751397e72..e1fa974ac500 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -7,6 +7,9 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/nospec-insn.h> + + GEN_BR_THUNK %r14 /* * void *memmove(void *dest, const void *src, size_t n) @@ -33,14 +36,14 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) brctg %r4,.Lmemmove_reverse ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) - br %r14 + BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memmove) @@ -77,7 +80,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) - br %r14 + BR_EX %r14 .Lmemset_fill: stc %r3,0(%r2) cghi %r4,1 @@ -94,7 +97,7 @@ ENTRY(memset) .Lmemset_fill_remainder: larl %r3,.Lmemset_mvc ex %r4,0(%r3) - br %r14 + BR_EX %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) .Lmemset_mvc: @@ -117,7 +120,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) la %r1,256(%r1) diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S index 25bb4643c4f4..9f794869c1b0 100644 --- a/arch/s390/net/bpf_jit.S +++ b/arch/s390/net/bpf_jit.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include "bpf_jit.h" /* @@ -54,7 +55,7 @@ ENTRY(sk_load_##NAME##_pos); \ clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ jh sk_load_##NAME##_slow; \ LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ - b OFF_OK(%r6); /* Return */ \ + B_EX OFF_OK,%r6; /* Return */ \ \ sk_load_##NAME##_slow:; \ lgr %r2,%r7; /* Arg1 = skb pointer */ \ @@ -64,11 +65,14 @@ sk_load_##NAME##_slow:; \ brasl %r14,skb_copy_bits; /* Get data from skb */ \ LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ - br %r6; /* Return */ + BR_EX %r6; /* Return */ sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ + GEN_BR_THUNK %r6 + GEN_B_THUNK OFF_OK,%r6 + /* * Load 1 byte from SKB (optimized version) */ @@ -80,7 +84,7 @@ ENTRY(sk_load_byte_pos) clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? jnl sk_load_byte_slow llgc %r14,0(%r3,%r12) # Get byte from skb - b OFF_OK(%r6) # Return OK + B_EX OFF_OK,%r6 # Return OK sk_load_byte_slow: lgr %r2,%r7 # Arg1 = skb pointer @@ -90,7 +94,7 @@ sk_load_byte_slow: brasl %r14,skb_copy_bits # Get data from skb llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer ltgr %r2,%r2 # Set cc to (%r2 != 0) - br %r6 # Return cc + BR_EX %r6 # Return cc #define sk_negative_common(NAME, SIZE, LOAD) \ sk_load_##NAME##_slow_neg:; \ @@ -104,7 +108,7 @@ sk_load_##NAME##_slow_neg:; \ jz bpf_error; \ LOAD %r14,0(%r2); /* Get data from pointer */ \ xr %r3,%r3; /* Set cc to zero */ \ - br %r6; /* Return cc */ + BR_EX %r6; /* Return cc */ sk_negative_common(word, 4, llgf) sk_negative_common(half, 2, llgh) @@ -113,4 +117,4 @@ sk_negative_common(byte, 1, llgc) bpf_error: # force a return 0 from jit handler ltgr %r15,%r15 # Set condition code - br %r6 + BR_EX %r6 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 33e2785f6842..11cd151733d4 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,8 @@ #include <linux/bpf.h> #include <asm/cacheflush.h> #include <asm/dis.h> +#include <asm/facility.h> +#include <asm/nospec-branch.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -43,6 +45,8 @@ struct bpf_jit { int base_ip; /* Base address for literal pool */ int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ + int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ + int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int labels[1]; /* Labels for local jumps */ }; @@ -252,6 +256,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) REG_SET_SEEN(b2); \ }) +#define EMIT6_PCREL_RILB(op, b, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ + REG_SET_SEEN(b); \ +}) + +#define EMIT6_PCREL_RIL(op, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | rel >> 16, rel & 0xffff); \ +}) + #define _EMIT6_IMM(op, imm) \ ({ \ unsigned int __imm = (imm); \ @@ -471,8 +488,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,0(%r1) */ + EMIT4_DISP(0x44000000, REG_0, REG_1, 0); + } + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } /* br %r14 */ _EMIT2(0x07fe); + + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable && + (jit->seen & SEEN_FUNC)) { + jit->r1_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r1 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,S390_lowcore.br_r1_tampoline */ + EMIT4_DISP(0x44000000, REG_0, REG_0, + offsetof(struct lowcore, br_r1_trampoline)); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } + } } /* @@ -978,8 +1032,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* lg %w1,<d(imm)>(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, EMIT_CONST_U64(func)); - /* basr %r14,%w1 */ - EMIT2(0x0d00, REG_14, REG_W1); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + /* brasl %r14,__s390_indirect_jump_r1 */ + EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + } else { + /* basr %r14,%w1 */ + EMIT2(0x0d00, REG_14, REG_W1); + } /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); if ((jit->seen & SEEN_SKB) && diff --git a/arch/sh/kernel/entry-common.S b/arch/sh/kernel/entry-common.S index c001f782c5f1..28cc61216b64 100644 --- a/arch/sh/kernel/entry-common.S +++ b/arch/sh/kernel/entry-common.S @@ -255,7 +255,7 @@ debug_trap: mov.l @r8, r8 jsr @r8 nop - bra __restore_all + bra ret_from_exception nop CFI_ENDPROC diff --git a/arch/sparc/include/asm/atomic_64.h b/arch/sparc/include/asm/atomic_64.h index abad97edf736..28db058d471b 100644 --- a/arch/sparc/include/asm/atomic_64.h +++ b/arch/sparc/include/asm/atomic_64.h @@ -83,7 +83,11 @@ ATOMIC_OPS(xor) #define atomic64_add_negative(i, v) (atomic64_add_return(i, v) < 0) #define atomic_cmpxchg(v, o, n) (cmpxchg(&((v)->counter), (o), (n))) -#define atomic_xchg(v, new) (xchg(&((v)->counter), new)) + +static inline int atomic_xchg(atomic_t *v, int new) +{ + return xchg(&v->counter, new); +} static inline int __atomic_add_unless(atomic_t *v, int a, int u) { diff --git a/arch/sparc/include/asm/bug.h b/arch/sparc/include/asm/bug.h index 6f17528356b2..ea53e418f6c0 100644 --- a/arch/sparc/include/asm/bug.h +++ b/arch/sparc/include/asm/bug.h @@ -9,10 +9,14 @@ void do_BUG(const char *file, int line); #define BUG() do { \ do_BUG(__FILE__, __LINE__); \ + barrier_before_unreachable(); \ __builtin_trap(); \ } while (0) #else -#define BUG() __builtin_trap() +#define BUG() do { \ + barrier_before_unreachable(); \ + __builtin_trap(); \ +} while (0) #endif #define HAVE_ARCH_BUG diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 1a0fa10cb6b7..32bae68e34c1 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, if (err) { printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", dev_name(&vdev->dev), err); - kfree(vdev); + put_device(&vdev->dev); return NULL; } if (vdev->dp) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index e56dbc67e837..97c57b5f8d57 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: @@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 011a47b4587c..717c9219d00e 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1162,16 +1162,13 @@ int x86_perf_event_set_period(struct perf_event *event) per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; - if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) || - local64_read(&hwc->prev_count) != (u64)-left) { - /* - * The hw event starts counting from this event offset, - * mark it to be able to extra future deltas: - */ - local64_set(&hwc->prev_count, (u64)-left); + /* + * The hw event starts counting from this event offset, + * mark it to be able to extra future deltas: + */ + local64_set(&hwc->prev_count, (u64)-left); - wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); - } + wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask); /* * Due to erratum on certan cpu we need diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 9b18a227fff7..6965ee8c4b8a 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2201,16 +2201,23 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) int bit, loops; u64 status; int handled; + int pmu_enabled; cpuc = this_cpu_ptr(&cpu_hw_events); /* + * Save the PMU state. + * It needs to be restored when leaving the handler. + */ + pmu_enabled = cpuc->enabled; + /* * No known reason to not always do late ACK, * but just in case do it opt-in. */ if (!x86_pmu.late_ack) apic_write(APIC_LVTPC, APIC_DM_NMI); intel_bts_disable_local(); + cpuc->enabled = 0; __intel_pmu_disable_all(); handled = intel_pmu_drain_bts_buffer(); handled += intel_bts_interrupt(); @@ -2320,7 +2327,8 @@ again: done: /* Only restore PMU state when it's active. See x86_pmu_disable(). */ - if (cpuc->enabled) + cpuc->enabled = pmu_enabled; + if (pmu_enabled) __intel_pmu_enable_all(0, true); intel_bts_enable_local(); @@ -3188,7 +3196,7 @@ glp_get_event_constraints(struct cpu_hw_events *cpuc, int idx, * Therefore the effective (average) period matches the requested period, * despite coarser hardware granularity. */ -static unsigned bdw_limit_period(struct perf_event *event, unsigned left) +static u64 bdw_limit_period(struct perf_event *event, u64 left) { if ((event->hw.config & INTEL_ARCH_EVENT_MASK) == X86_CONFIG(.event=0xc0, .umask=0x01)) { diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 8156e47da7ba..10b39d44981c 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1150,6 +1150,7 @@ static void setup_pebs_sample_data(struct perf_event *event, if (pebs == NULL) return; + regs->flags &= ~PERF_EFLAGS_EXACT; sample_type = event->attr.sample_type; dsrc = sample_type & PERF_SAMPLE_DATA_SRC; @@ -1194,7 +1195,6 @@ static void setup_pebs_sample_data(struct perf_event *event, */ *regs = *iregs; regs->flags = pebs->flags; - set_linear_ip(regs, pebs->ip); if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1230,13 +1230,22 @@ static void setup_pebs_sample_data(struct perf_event *event, #endif } - if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) { - regs->ip = pebs->real_ip; - regs->flags |= PERF_EFLAGS_EXACT; - } else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs)) - regs->flags |= PERF_EFLAGS_EXACT; - else - regs->flags &= ~PERF_EFLAGS_EXACT; + if (event->attr.precise_ip > 1) { + /* Haswell and later have the eventing IP, so use it: */ + if (x86_pmu.intel_cap.pebs_format >= 2) { + set_linear_ip(regs, pebs->real_ip); + regs->flags |= PERF_EFLAGS_EXACT; + } else { + /* Otherwise use PEBS off-by-1 IP: */ + set_linear_ip(regs, pebs->ip); + + /* ... and try to fix it up using the LBR entries: */ + if (intel_pmu_pebs_fixup_ip(regs)) + regs->flags |= PERF_EFLAGS_EXACT; + } + } else + set_linear_ip(regs, pebs->ip); + if ((sample_type & (PERF_SAMPLE_ADDR | PERF_SAMPLE_PHYS_ADDR)) && x86_pmu.intel_cap.pebs_format >= 1) @@ -1303,17 +1312,84 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit) return NULL; } +/* + * Special variant of intel_pmu_save_and_restart() for auto-reload. + */ +static int +intel_pmu_save_and_restart_reload(struct perf_event *event, int count) +{ + struct hw_perf_event *hwc = &event->hw; + int shift = 64 - x86_pmu.cntval_bits; + u64 period = hwc->sample_period; + u64 prev_raw_count, new_raw_count; + s64 new, old; + + WARN_ON(!period); + + /* + * drain_pebs() only happens when the PMU is disabled. + */ + WARN_ON(this_cpu_read(cpu_hw_events.enabled)); + + prev_raw_count = local64_read(&hwc->prev_count); + rdpmcl(hwc->event_base_rdpmc, new_raw_count); + local64_set(&hwc->prev_count, new_raw_count); + + /* + * Since the counter increments a negative counter value and + * overflows on the sign switch, giving the interval: + * + * [-period, 0] + * + * the difference between two consequtive reads is: + * + * A) value2 - value1; + * when no overflows have happened in between, + * + * B) (0 - value1) + (value2 - (-period)); + * when one overflow happened in between, + * + * C) (0 - value1) + (n - 1) * (period) + (value2 - (-period)); + * when @n overflows happened in between. + * + * Here A) is the obvious difference, B) is the extension to the + * discrete interval, where the first term is to the top of the + * interval and the second term is from the bottom of the next + * interval and C) the extension to multiple intervals, where the + * middle term is the whole intervals covered. + * + * An equivalent of C, by reduction, is: + * + * value2 - value1 + n * period + */ + new = ((s64)(new_raw_count << shift) >> shift); + old = ((s64)(prev_raw_count << shift) >> shift); + local64_add(new - old + count * period, &event->count); + + perf_event_update_userpage(event); + + return 0; +} + static void __intel_pmu_pebs_event(struct perf_event *event, struct pt_regs *iregs, void *base, void *top, int bit, int count) { + struct hw_perf_event *hwc = &event->hw; struct perf_sample_data data; struct pt_regs regs; void *at = get_next_pebs_record_by_bit(base, top, bit); - if (!intel_pmu_save_and_restart(event) && - !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD)) + if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) { + /* + * Now, auto-reload is only enabled in fixed period mode. + * The reload value is always hwc->sample_period. + * May need to change it, if auto-reload is enabled in + * freq mode later. + */ + intel_pmu_save_and_restart_reload(event, count); + } else if (!intel_pmu_save_and_restart(event)) return; while (count > 1) { @@ -1365,8 +1441,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) return; n = top - at; - if (n <= 0) + if (n <= 0) { + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); return; + } __intel_pmu_pebs_event(event, iregs, at, top, 0, n); } @@ -1389,8 +1468,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) ds->pebs_index = ds->pebs_buffer_base; - if (unlikely(base >= top)) + if (unlikely(base >= top)) { + /* + * The drain_pebs() could be called twice in a short period + * for auto-reload event in pmu::read(). There are no + * overflows have happened in between. + * It needs to call intel_pmu_save_and_restart_reload() to + * update the event->count for this case. + */ + for_each_set_bit(bit, (unsigned long *)&cpuc->pebs_enabled, + x86_pmu.max_pebs_events) { + event = cpuc->events[bit]; + if (event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD) + intel_pmu_save_and_restart_reload(event, 0); + } return; + } for (at = base; at < top; at += x86_pmu.pebs_record_size) { struct pebs_record_nhm *p = at; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 8e4ea143ed96..dc4728eccfd8 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -556,7 +556,7 @@ struct x86_pmu { struct x86_pmu_quirk *quirks; int perfctr_second_write; bool late_ack; - unsigned (*limit_period)(struct perf_event *event, unsigned l); + u64 (*limit_period)(struct perf_event *event, u64 l); /* * sysfs attrs diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index cf5961ca8677..4cd6a3b71824 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -218,13 +218,11 @@ static inline int alternatives_text_reserved(void *start, void *end) */ #define alternative_call_2(oldfunc, newfunc1, feature1, newfunc2, feature2, \ output, input...) \ -{ \ asm volatile (ALTERNATIVE_2("call %P[old]", "call %P[new1]", feature1,\ "call %P[new2]", feature2) \ : output, ASM_CALL_CONSTRAINT \ : [old] "i" (oldfunc), [new1] "i" (newfunc1), \ - [new2] "i" (newfunc2), ## input); \ -} + [new2] "i" (newfunc2), ## input) /* * use this macro(s) if you need more than one output parameter diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 23a65439c37c..403e97d5e243 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -207,11 +206,19 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -272,9 +279,10 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -331,6 +339,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) @@ -360,5 +369,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 4f8b80199672..174b9c41efce 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -908,7 +908,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); int (*vm_init)(struct kvm *kvm); diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 1de72ce514cd..ed97ef3b48a7 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -192,7 +192,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ + /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index eb83ff1bae8f..504b21692d32 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,6 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -68,6 +70,11 @@ #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_SSB_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e @@ -337,6 +344,8 @@ #define MSR_AMD64_IBSOPDATA4 0xc001103d #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..8b38df98548e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,6 +217,14 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" ((u32)val), + "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_base; \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..851c04b7a092 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H +#define ARCH_DEFAULT_PKEY 0 + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, @@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return 0; + return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); } @@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { /* * "Allocated" pkeys are those that have been returned - * from pkey_alloc(). pkey 0 is special, and never - * returned from pkey_alloc(). + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. */ - if (pkey <= 0) + if (pkey < 0) return false; if (pkey >= arch_max_pkey()) return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + return mm_pkey_allocation_map(mm) & (1U << pkey); } diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..ae7c2c5cd7f0 --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include <linux/thread_info.h> +#include <asm/nospec-branch.h> + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_ssbd_mask; + +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +} + +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} + +#endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 0242d91734ee..b1c9129f64fc 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -88,6 +88,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -115,6 +116,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -155,7 +157,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 704f31315dde..875ca99b82ee 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -131,7 +131,12 @@ static inline unsigned long build_cr3(pgd_t *pgd, u16 asid) static inline unsigned long build_cr3_noflush(pgd_t *pgd, u16 asid) { VM_WARN_ON_ONCE(asid > MAX_ASID_AVAILABLE); - VM_WARN_ON_ONCE(!this_cpu_has(X86_FEATURE_PCID)); + /* + * Use boot_cpu_has() instead of this_cpu_has() as this function + * might be called during early boot. This should work even after + * boot because all CPU's the have same capabilities: + */ + VM_WARN_ON_ONCE(!boot_cpu_has(X86_FEATURE_PCID)); return __sme_pa(pgd) | kern_pcid(asid) | CR3_NOFLUSH; } diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5942aa5f569b..ebdcc368a2d3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1481,7 +1481,7 @@ void setup_local_APIC(void) * TODO: set up through-local-APIC from through-I/O-APIC? --macro */ value = apic_read(APIC_LVT0) & APIC_LVT_MASKED; - if (!cpu && (pic_mode || !value)) { + if (!cpu && (pic_mode || !value || skip_ioapic_setup)) { value = APIC_DM_EXTINT; apic_printk(APIC_VERBOSE, "enabled ExtINT on CPU#%d\n", cpu); } else { diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index e7d5a7883632..90574f731c05 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -10,6 +10,7 @@ #include <asm/processor.h> #include <asm/apic.h> #include <asm/cpu.h> +#include <asm/spec-ctrl.h> #include <asm/smp.h> #include <asm/pci-direct.h> #include <asm/delay.h> @@ -554,6 +555,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_FAM10H_NODE_ID, value); nodes_per_socket = ((value >> 3) & 7) + 1; } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); + setup_force_cpu_cap(X86_FEATURE_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } } static void early_init_amd(struct cpuinfo_x86 *c) @@ -765,6 +786,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bfca937bdcc3..7416fc206b4a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,8 +12,10 @@ #include <linux/utsname.h> #include <linux/cpu.h> #include <linux/module.h> +#include <linux/nospec.h> +#include <linux/prctl.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/cmdline.h> #include <asm/bugs.h> #include <asm/processor.h> @@ -27,6 +29,27 @@ #include <asm/intel-family.h> static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); + +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __ro_after_init x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; + +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ +u64 __ro_after_init x86_amd_ls_cfg_base; +u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -37,9 +60,27 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = { #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +{ + u64 msrval, guestval, hostval = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); + } + } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } +} +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); + +static void x86_amd_ssb_disable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -312,32 +422,289 @@ retpoline_auto: } #undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_SSBD)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode == SPEC_STORE_BYPASS_DISABLE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + break; + case X86_VENDOR_AMD: + x86_amd_ssb_disable(); + break; + } + } + + return mode; +} + +static void ssb_select_mitigation(void) +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + bool update; + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) + return -ENXIO; + + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && update) + speculative_store_bypass_update_current(); + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + +static int ssb_prctl_get(struct task_struct *task) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: + case SPEC_STORE_BYPASS_PRCTL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +} #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) + +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + + default: + break; + } + return sprintf(buf, "Vulnerable\n"); } +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); +} + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); +} - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); } #endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cf6380200dc2..48e98964ecad 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -733,17 +733,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { + set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -894,21 +909,55 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PINEVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_LINCROFT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_PENWELL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CLOVERVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_CEDARVIEW }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_CENTAUR, 5, }, + { X86_VENDOR_INTEL, 5, }, + { X86_VENDOR_NSC, 5, }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, + { X86_VENDOR_ANY, 4, }, + {} +}; + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_meltdown)) - return false; - if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + + if (x86_match_cpu(cpu_no_meltdown)) + return; + /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -958,12 +1007,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); @@ -1322,6 +1366,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); validate_apic_and_package_id(c); + x86_spec_ctrl_setup_ap(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e806b11a99af..37672d299e35 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -50,4 +50,6 @@ extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index c3af167d0a70..c895f38a7a5e 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 7be35b600299..2dae1b3c42fc 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1657,6 +1657,7 @@ static int rdtgroup_mkdir_ctrl_mon(struct kernfs_node *parent_kn, if (ret < 0) goto out_common_fail; closid = ret; + ret = 0; rdtgrp->closid = closid; list_add(&rdtgrp->rdtgroup_list, &rdt_all_groups); diff --git a/arch/x86/kernel/devicetree.c b/arch/x86/kernel/devicetree.c index 76e07698e6d1..7fa0855e4b9a 100644 --- a/arch/x86/kernel/devicetree.c +++ b/arch/x86/kernel/devicetree.c @@ -12,6 +12,7 @@ #include <linux/of_address.h> #include <linux/of_platform.h> #include <linux/of_irq.h> +#include <linux/libfdt.h> #include <linux/slab.h> #include <linux/pci.h> #include <linux/of_pci.h> @@ -200,19 +201,22 @@ static struct of_ioapic_type of_ioapic_type[] = static int dt_irqdomain_alloc(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs, void *arg) { - struct of_phandle_args *irq_data = (void *)arg; + struct irq_fwspec *fwspec = (struct irq_fwspec *)arg; struct of_ioapic_type *it; struct irq_alloc_info tmp; + int type_index; - if (WARN_ON(irq_data->args_count < 2)) + if (WARN_ON(fwspec->param_count < 2)) return -EINVAL; - if (irq_data->args[1] >= ARRAY_SIZE(of_ioapic_type)) + + type_index = fwspec->param[1]; + if (type_index >= ARRAY_SIZE(of_ioapic_type)) return -EINVAL; - it = &of_ioapic_type[irq_data->args[1]]; + it = &of_ioapic_type[type_index]; ioapic_set_alloc_attr(&tmp, NUMA_NO_NODE, it->trigger, it->polarity); tmp.ioapic_id = mpc_ioapic_id(mp_irqdomain_ioapic_idx(domain)); - tmp.ioapic_pin = irq_data->args[0]; + tmp.ioapic_pin = fwspec->param[0]; return mp_irqdomain_alloc(domain, virq, nr_irqs, &tmp); } @@ -276,14 +280,15 @@ static void __init x86_flattree_get_config(void) map_len = max(PAGE_SIZE - (initial_dtb & ~PAGE_MASK), (u64)128); - initial_boot_params = dt = early_memremap(initial_dtb, map_len); - size = of_get_flat_dt_size(); + dt = early_memremap(initial_dtb, map_len); + size = fdt_totalsize(dt); if (map_len < size) { early_memunmap(dt, map_len); - initial_boot_params = dt = early_memremap(initial_dtb, size); + dt = early_memremap(initial_dtb, size); map_len = size; } + early_init_dt_verify(dt); unflatten_and_copy_device_tree(); early_memunmap(dt, map_len); } diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index edfede768688..5167f3f74136 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -57,12 +57,17 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { free_page((unsigned long)image->arch.pgd); + image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); + image->arch.pmd0 = NULL; free_page((unsigned long)image->arch.pmd1); + image->arch.pmd1 = NULL; #endif free_page((unsigned long)image->arch.pte0); + image->arch.pte0 = NULL; free_page((unsigned long)image->arch.pte1); + image->arch.pte1 = NULL; } static int machine_kexec_alloc_page_tables(struct kimage *image) @@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) !image->arch.pmd0 || !image->arch.pmd1 || #endif !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); return -ENOMEM; } return 0; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 3b7427aa7d85..5bce2a88e8a3 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -38,9 +38,13 @@ static struct kexec_file_ops *kexec_file_loaders[] = { static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); + image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); + image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); + image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); + image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) @@ -90,7 +94,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); return 0; err: - free_transition_pgtable(image); return result; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8bd1d8292cf7..988a98f34c66 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -39,6 +39,7 @@ #include <asm/switch_to.h> #include <asm/desc.h> #include <asm/prctl.h> +#include <asm/spec-ctrl.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -279,6 +280,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); + + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); + } else { + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); + } +} +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); +} + +void speculative_store_bypass_update(unsigned long tif) +{ + preempt_disable(); + __speculative_store_bypass_update(tif); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -310,6 +453,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); + + if ((tifp ^ tifn) & _TIF_SSBD) + __speculative_store_bypass_update(tifn); } /* diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 9eb448c7859d..fa093b77689f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -528,6 +528,7 @@ void set_personality_64bit(void) clear_thread_flag(TIF_X32); /* Pretend that this comes from a 64bit execve */ task_pt_regs(current)->orig_ax = __NR_execve; + current_thread_info()->status &= ~TS_COMPAT; /* Ensure the corresponding mm is not marked. */ if (current->mm) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 6b841262b790..344d3c160f8d 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -77,6 +77,7 @@ #include <asm/i8259.h> #include <asm/realmode.h> #include <asm/misc.h> +#include <asm/spec-ctrl.h> /* Number of siblings per CPU package */ int smp_num_siblings = 1; @@ -245,6 +246,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock and initialize the vectors on this cpu * before setting the cpu online. We must set it online with @@ -1349,6 +1352,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) @@ -1516,6 +1521,7 @@ static void remove_siblinginfo(int cpu) cpumask_clear(topology_core_cpumask(cpu)); c->phys_proc_id = 0; c->cpu_core_id = 0; + c->booted_cores = 0; cpumask_clear_cpu(cpu, cpu_sibling_setup_mask); recompute_smt_state(); } diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 4f544f2a7b06..d1f5c744142b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -367,7 +367,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -395,7 +395,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(ARCH_CAPABILITIES); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -481,6 +481,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; @@ -632,13 +637,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index ddd83fb65da5..92f13ac70ad4 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -321,8 +321,16 @@ void kvm_apic_set_version(struct kvm_vcpu *vcpu) if (!lapic_in_kernel(vcpu)) return; + /* + * KVM emulates 82093AA datasheet (with in-kernel IOAPIC implementation) + * which doesn't have EOI register; Some buggy OSes (e.g. Windows with + * Hyper-V role) disable EOI broadcast in lapic not checking for IOAPIC + * version first and level-triggered interrupts never get EOIed in + * IOAPIC. + */ feat = kvm_find_cpuid_entry(apic->vcpu, 0x1, 0); - if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31)))) + if (feat && (feat->ecx & (1 << (X86_FEATURE_X2APIC & 31))) && + !ioapic_in_kernel(vcpu->kvm)) v |= APIC_LVR_DIRECTED_EOI; kvm_lapic_set_reg(apic, APIC_LVR, v); } @@ -1467,11 +1475,23 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); } static void start_sw_period(struct kvm_lapic *apic) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9fb0daf628cb..029aa1318874 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -46,7 +46,7 @@ #include <asm/kvm_para.h> #include <asm/irq_remapping.h> #include <asm/microcode.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/virtext.h> #include "trace.h" @@ -186,6 +186,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -1611,6 +1617,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) u32 eax = 1; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -3618,11 +3625,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_IA32_UCODE_REV: msr_info->data = 0x01000065; break; @@ -3716,7 +3730,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -3743,7 +3757,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) @@ -3757,6 +3771,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5015,8 +5039,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5110,6 +5133,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5128,20 +5163,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); @@ -5244,7 +5266,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -5551,7 +5573,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bdd84ce4491e..4c88572d2b81 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -52,7 +52,7 @@ #include <asm/irq_remapping.h> #include <asm/mmu_context.h> #include <asm/microcode.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include "trace.h" #include "pmu.h" @@ -2583,6 +2583,8 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu) return; } + WARN_ON_ONCE(vmx->emulation_required); + if (kvm_exception_is_soft(nr)) { vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, vmx->vcpu.arch.event_exit_inst_len); @@ -3293,7 +3295,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -3414,12 +3415,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; @@ -3445,7 +3445,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -6832,12 +6831,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) goto out; } - if (err != EMULATE_DONE) { - vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; - vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; - vcpu->run->internal.ndata = 0; - return 0; - } + if (err != EMULATE_DONE) + goto emulation_error; + + if (vmx->emulation_required && !vmx->rmode.vm86_active && + vcpu->arch.exception.pending) + goto emulation_error; if (vcpu->arch.halt_request) { vcpu->arch.halt_request = 0; @@ -6853,6 +6852,12 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu) out: return ret; + +emulation_error: + vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; + vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_EMULATION; + vcpu->run->internal.ndata = 0; + return 0; } static int __grow_ple_window(int val) @@ -9217,9 +9222,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -9452,10 +9469,10 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; + asm( /* Store host registers */ "push %%" _ASM_DX "; push %%" _ASM_BP ";" @@ -9591,8 +9608,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -11166,7 +11182,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (ret) return ret; - if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) + /* + * If we're entering a halted L2 vcpu and the L2 vcpu won't be woken + * by event injection, halt vcpu. + */ + if ((vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) && + !(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK)) return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; @@ -12182,7 +12203,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vcpu_create = vmx_create_vcpu, .vcpu_free = vmx_free_vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ba5b6693fbe7..9fc1712523d7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1036,6 +1036,7 @@ static u32 emulated_msrs[] = { MSR_IA32_SMBASE, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2721,7 +2722,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4324,14 +4325,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -7517,6 +7512,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; @@ -7554,8 +7550,10 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu, vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 3ed9a08885c5..4085897fef64 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -298,9 +298,11 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address, /* * The .rodata section needs to be read-only. Using the pfn - * catches all aliases. + * catches all aliases. This also includes __ro_after_init, + * so do not enforce until kernel_set_to_readonly is true. */ - if (within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, + if (kernel_set_to_readonly && + within(pfn, __pa_symbol(__start_rodata) >> PAGE_SHIFT, __pa_symbol(__end_rodata) >> PAGE_SHIFT)) pgprot_val(forbidden) |= _PAGE_RW; diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 34cda7e0551b..c03c85e4fb6a 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/mm.h> #include <linux/gfp.h> +#include <linux/hugetlb.h> #include <asm/pgalloc.h> #include <asm/pgtable.h> #include <asm/tlb.h> @@ -636,6 +637,10 @@ int pud_set_huge(pud_t *pud, phys_addr_t addr, pgprot_t prot) (mtrr != MTRR_TYPE_WRBACK)) return 0; + /* Bail out if we are we on a populated non-leaf entry: */ + if (pud_present(*pud) && !pud_huge(*pud)) + return 0; + prot = pgprot_4k_2_large(prot); set_pte((pte_t *)pud, pfn_pte( @@ -664,6 +669,10 @@ int pmd_set_huge(pmd_t *pmd, phys_addr_t addr, pgprot_t prot) return 0; } + /* Bail out if we are we on a populated non-leaf entry: */ + if (pmd_present(*pmd) && !pmd_huge(*pmd)) + return 0; + prot = pgprot_4k_2_large(prot); set_pte((pte_t *)pmd, pfn_pte( diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index d7bc0eea20a5..6e98e0a7c923 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey */ if (pkey != -1) return pkey; - /* - * Look for a protection-key-drive execute-only mapping - * which is now being given permissions that are not - * execute-only. Move it back to the default pkey. - */ - if (vma_is_pkey_exec_only(vma) && - (prot & (PROT_READ|PROT_WRITE))) { - return 0; - } + /* * The mapping is execute-only. Go try to get the * execute-only protection key. If we fail to do that, * fall through as if we do not have execute-only - * support. + * support in this mm. */ if (prot == PROT_EXEC) { pkey = execute_only_pkey(vma->vm_mm); if (pkey > 0) return pkey; + } else if (vma_is_pkey_exec_only(vma)) { + /* + * Protections are *not* PROT_EXEC, but the mapping + * is using the exec-only pkey. This mapping was + * PROT_EXEC and will no longer be. Move back to + * the default pkey. + */ + return ARCH_DEFAULT_PKEY; } + /* * This is a vanilla, non-pkey mprotect (or we failed to * setup execute-only), inherit the pkey from the VMA we diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 3e15345abfe7..de0263348f2d 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 042e9c422b21..b3526a98a5a5 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1280,13 +1280,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); |