From eef3dc34a1e0b01d53328b88c25237bcc7323777 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 17 Oct 2018 17:54:00 -0700 Subject: ARM: OMAP2+: prm44xx: Fix section annotation on omap44xx_prm_enable_io_wakeup When building the kernel with Clang, the following section mismatch warning appears: WARNING: vmlinux.o(.text+0x38b3c): Section mismatch in reference from the function omap44xx_prm_late_init() to the function .init.text:omap44xx_prm_enable_io_wakeup() The function omap44xx_prm_late_init() references the function __init omap44xx_prm_enable_io_wakeup(). This is often because omap44xx_prm_late_init lacks a __init annotation or the annotation of omap44xx_prm_enable_io_wakeup is wrong. Remove the __init annotation from omap44xx_prm_enable_io_wakeup so there is no more mismatch. Signed-off-by: Nathan Chancellor Signed-off-by: Tony Lindgren --- arch/arm/mach-omap2/prm44xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/prm44xx.c b/arch/arm/mach-omap2/prm44xx.c index 7b95729e8359..38a1be6c3694 100644 --- a/arch/arm/mach-omap2/prm44xx.c +++ b/arch/arm/mach-omap2/prm44xx.c @@ -351,7 +351,7 @@ static void omap44xx_prm_reconfigure_io_chain(void) * to occur, WAKEUPENABLE bits must be set in the pad mux registers, and * omap44xx_prm_reconfigure_io_chain() must be called. No return value. */ -static void __init omap44xx_prm_enable_io_wakeup(void) +static void omap44xx_prm_enable_io_wakeup(void) { s32 inst = omap4_prmst_get_prm_dev_inst(); -- cgit v1.2.3 From 3182215dd0b2120fb942ed88430cfb7c12d583e0 Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Tue, 30 Oct 2018 22:02:03 +1100 Subject: powerpc/powernv/npu: Remove NPU DMA ops The NPU IOMMU is setup to mirror the parent PCIe device IOMMU setup. Therefore it does not make sense to call dma operations such as dma_map_page(), etc. directly on these devices. The existing dma_ops simply print a warning if they are ever called, however this is unnecessary and the warnings are likely to go unnoticed. It is instead simpler to remove these operations and let the generic DMA code print warnings (eg. via a NULL pointer deref) in cases of buggy drivers attempting dma operations on NVLink devices. Signed-off-by: Alistair Popple Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/npu-dma.c | 64 ++------------------------------ 1 file changed, 4 insertions(+), 60 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 6f60e0931922..75b935252981 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -102,63 +102,6 @@ struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index) } EXPORT_SYMBOL(pnv_pci_get_npu_dev); -#define NPU_DMA_OP_UNSUPPORTED() \ - dev_err_once(dev, "%s operation unsupported for NVLink devices\n", \ - __func__) - -static void *dma_npu_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return NULL; -} - -static void dma_npu_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); -} - -static dma_addr_t dma_npu_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_map_sg(struct device *dev, struct scatterlist *sglist, - int nelems, enum dma_data_direction direction, - unsigned long attrs) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static int dma_npu_dma_supported(struct device *dev, u64 mask) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static u64 dma_npu_get_required_mask(struct device *dev) -{ - NPU_DMA_OP_UNSUPPORTED(); - return 0; -} - -static const struct dma_map_ops dma_npu_ops = { - .map_page = dma_npu_map_page, - .map_sg = dma_npu_map_sg, - .alloc = dma_npu_alloc, - .free = dma_npu_free, - .dma_supported = dma_npu_dma_supported, - .get_required_mask = dma_npu_get_required_mask, -}; - /* * Returns the PE assoicated with the PCI device of the given * NPU. Returns the linked pci device if pci_dev != NULL. @@ -270,10 +213,11 @@ static void pnv_npu_dma_set_32(struct pnv_ioda_pe *npe) rc = pnv_npu_set_window(npe, 0, gpe->table_group.tables[0]); /* - * We don't initialise npu_pe->tce32_table as we always use - * dma_npu_ops which are nops. + * NVLink devices use the same TCE table configuration as + * their parent device so drivers shouldn't be doing DMA + * operations directly on these devices. */ - set_dma_ops(&npe->pdev->dev, &dma_npu_ops); + set_dma_ops(&npe->pdev->dev, NULL); } /* -- cgit v1.2.3 From 13682e524167cbd7e2a26c5e91bec765f0f96273 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Wed, 17 Oct 2018 11:18:30 +0200 Subject: arm64: dts: rockchip: remove vdd_log from rock960 to fix a stability issues When the performance governor is set as default, the rock960 hangs around one minute after booting, whatever the activity is (idle, key pressed, loaded, ...). Based on the commit log found at https://patchwork.kernel.org/patch/10092377/ "vdd_log has no consumer and therefore will not be set to a specific voltage. Still the PWM output pin gets configured and thence the vdd_log output voltage will changed from it's default. Depending on the idle state of the PWM this will slightly over or undervoltage the logic supply of the RK3399 and cause instability with GbE (undervoltage) and PCIe (overvoltage). Since the default value set by a voltage divider is the correct supply voltage and we don't need to change it during runtime we remove the rail from the devicetree completely so the PWM pin will not be configured." After removing the vdd-log from the rock960's specific DT, the board does no longer hang and shows a stable behavior. Apply the same change for the rock960 by removing the vdd-log from the DT. Fixes: 874846f1fccd ("arm64: dts: rockchip: add 96boards RK3399 Ficus board") Cc: stable@vger.kernel.org Tested-by: Manivannan Sadhasivam Signed-off-by: Daniel Lezcano Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi index 6c8c4ab044aa..56abbb08c133 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi +++ b/arch/arm64/boot/dts/rockchip/rk3399-rock960.dtsi @@ -57,18 +57,6 @@ regulator-always-on; vin-supply = <&vcc_sys>; }; - - vdd_log: vdd-log { - compatible = "pwm-regulator"; - pwms = <&pwm2 0 25000 0>; - regulator-name = "vdd_log"; - regulator-min-microvolt = <800000>; - regulator-max-microvolt = <1400000>; - regulator-always-on; - regulator-boot-on; - vin-supply = <&vcc_sys>; - }; - }; &cpu_l0 { -- cgit v1.2.3 From 0773495b1f5f1c5e23551843f87b5ff37e7af8f7 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Sun, 4 Nov 2018 01:46:00 -0700 Subject: xtensa: make sure bFLT stack is 16 byte aligned Xtensa ABI requires stack alignment to be at least 16. In noMMU configuration ARCH_SLAB_MINALIGN is used to align stack. Make it at least 16. This fixes the following runtime error in noMMU configuration, caused by interaction between insufficiently aligned stack and alloca function, that results in corruption of on-stack variable in the libc function glob: Caught unhandled exception in 'sh' (pid = 47, pc = 0x02d05d65) - should not happen EXCCAUSE is 15 Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/include/asm/processor.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/include/asm/processor.h b/arch/xtensa/include/asm/processor.h index e4ccb88b7996..677bc76c1d70 100644 --- a/arch/xtensa/include/asm/processor.h +++ b/arch/xtensa/include/asm/processor.h @@ -23,7 +23,11 @@ # error Linux requires the Xtensa Windowed Registers Option. #endif -#define ARCH_SLAB_MINALIGN XCHAL_DATA_WIDTH +/* Xtensa ABI requires stack alignment to be at least 16 */ + +#define STACK_ALIGN (XCHAL_DATA_WIDTH > 16 ? XCHAL_DATA_WIDTH : 16) + +#define ARCH_SLAB_MINALIGN STACK_ALIGN /* * User space process size: 1 GB. -- cgit v1.2.3 From 0ae790683fc28bb718d74f87cdf753c6445fe28d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:23:28 +1100 Subject: powerpc/mm/64s: Consolidate SLB assertions The code for assert_slb_exists() and assert_slb_notexists() is almost identical, except for the polarity of the WARN_ON(). In a future patch we'll need to modify this code, so consolidate it now into a single function. Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index c3fdf2969d9f..f3e002ee457b 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -58,7 +58,7 @@ static inline unsigned long mk_vsid_data(unsigned long ea, int ssize, return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags); } -static void assert_slb_exists(unsigned long ea) +static void assert_slb_presence(bool present, unsigned long ea) { #ifdef CONFIG_DEBUG_VM unsigned long tmp; @@ -66,19 +66,8 @@ static void assert_slb_exists(unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp == 0); -#endif -} -static void assert_slb_notexists(unsigned long ea) -{ -#ifdef CONFIG_DEBUG_VM - unsigned long tmp; - - WARN_ON_ONCE(mfmsr() & MSR_EE); - - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); - WARN_ON(tmp != 0); + WARN_ON(present == (tmp == 0)); #endif } @@ -114,7 +103,7 @@ static inline void create_shadowed_slbe(unsigned long ea, int ssize, */ slb_shadow_update(ea, ssize, flags, index); - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0,%1" : : "r" (mk_vsid_data(ea, ssize, flags)), "r" (mk_esid_data(ea, ssize, index)) @@ -137,7 +126,7 @@ void __slb_restore_bolted_realmode(void) "r" (be64_to_cpu(p->save_area[index].esid))); } - assert_slb_exists(local_paca->kstack); + assert_slb_presence(true, local_paca->kstack); } /* @@ -185,7 +174,7 @@ void slb_flush_and_restore_bolted(void) :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)), "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid)) : "memory"); - assert_slb_exists(get_paca()->kstack); + assert_slb_presence(true, get_paca()->kstack); get_paca()->slb_cache_ptr = 0; @@ -443,9 +432,9 @@ void switch_slb(struct task_struct *tsk, struct mm_struct *mm) ea = (unsigned long) get_paca()->slb_cache[i] << SID_SHIFT; /* - * Could assert_slb_exists here, but hypervisor - * or machine check could have come in and - * removed the entry at this point. + * Could assert_slb_presence(true) here, but + * hypervisor or machine check could have come + * in and removed the entry at this point. */ slbie_data = ea; @@ -676,7 +665,7 @@ static long slb_insert_entry(unsigned long ea, unsigned long context, * User preloads should add isync afterwards in case the kernel * accesses user memory before it returns to userspace with rfid. */ - assert_slb_notexists(ea); + assert_slb_presence(false, ea); asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)); barrier(); -- cgit v1.2.3 From 08e6a3434e2125e4b21d0d3f84678d427345bc0d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:25:18 +1100 Subject: powerpc/mm/64s: Use PPC_SLBFEE macro Old toolchains don't know about slbfee and break the build, eg: {standard input}:37: Error: Unrecognized opcode: `slbfee.' Fix it by using the macro version. We need to add an underscore version that takes raw register numbers from the inline asm, rather than our Rx macros. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 2 ++ arch/powerpc/mm/slb.c | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index 6093bc8f74e5..a6e9e314c707 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -493,6 +493,8 @@ __PPC_RS(t) | __PPC_RA0(a) | __PPC_RB(b)) #define PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ __PPC_RT(t) | __PPC_RB(b)) +#define __PPC_SLBFEE_DOT(t, b) stringify_in_c(.long PPC_INST_SLBFEE | \ + ___PPC_RT(t) | ___PPC_RB(b)) #define PPC_ICBT(c,a,b) stringify_in_c(.long PPC_INST_ICBT | \ __PPC_CT(c) | __PPC_RA0(a) | __PPC_RB(b)) /* PASemi instructions */ diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index f3e002ee457b..457fd29448b1 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -65,7 +66,7 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); - asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0"); + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); #endif -- cgit v1.2.3 From 9586d569a369dc585a3e191dcabd72748e3c9c5c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 19:25:38 +1100 Subject: powerpc/mm/64s: Only use slbfee on CPUs that support it The slbfee instruction was only added in ISA 2.05 (Power6), it's not supported on older CPUs. We don't have a CPU feature for that ISA version though, so just use the ISA 2.06 feature flag. Fixes: e15a4fea4dee ("powerpc/64s/hash: Add some SLB debugging tests") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 457fd29448b1..b663a36f9ada 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -66,6 +66,9 @@ static void assert_slb_presence(bool present, unsigned long ea) WARN_ON_ONCE(mfmsr() & MSR_EE); + if (!cpu_has_feature(CPU_FTR_ARCH_206)) + return; + asm volatile(__PPC_SLBFEE_DOT(%0, %1) : "=r"(tmp) : "r"(ea) : "cr0"); WARN_ON(present == (tmp == 0)); -- cgit v1.2.3 From 86d4d068df573a8c2105554624796c086d6bec3d Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Tue, 6 Nov 2018 12:00:01 +0100 Subject: parisc: Revert "Release spinlocks using ordered store" This reverts commit d27dfa13b9f77ae7e6ed09d70a0426ed26c1a8f9. Unfortunately, this patch needs to be reverted. We need the full sync barrier and not the limited barrier provided by using an ordered store. The sync ensures that all accesses and cache purge instructions that follow the sync are performed after all such instructions prior the sync instruction have completed executing. The patch breaks the rwlock implementation in glibc. This caused the test-lock application in the libprelude testsuite to hang. With the change reverted, the test runs correctly and the libprelude package builds successfully. Signed-off-by: John David Anglin Signed-off-by: Helge Deller --- arch/parisc/include/asm/spinlock.h | 4 ++-- arch/parisc/kernel/syscall.S | 12 ++++++++---- 2 files changed, 10 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/parisc/include/asm/spinlock.h b/arch/parisc/include/asm/spinlock.h index 16aec9ba2580..8a63515f03bf 100644 --- a/arch/parisc/include/asm/spinlock.h +++ b/arch/parisc/include/asm/spinlock.h @@ -37,8 +37,8 @@ static inline void arch_spin_unlock(arch_spinlock_t *x) volatile unsigned int *a; a = __ldcw_align(x); - /* Release with ordered store. */ - __asm__ __volatile__("stw,ma %0,0(%1)" : : "r"(1), "r"(a) : "memory"); + mb(); + *a = 1; } static inline int arch_spin_trylock(arch_spinlock_t *x) diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 9505c317818d..a9bc90dc4ae7 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -640,7 +640,8 @@ cas_action: sub,<> %r28, %r25, %r0 2: stw %r24, 0(%r26) /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG /* Clear thread register indicator */ stw %r0, 4(%sr2,%r20) @@ -654,7 +655,8 @@ cas_action: 3: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) #if ENABLE_LWS_DEBUG stw %r0, 4(%sr2,%r20) #endif @@ -855,7 +857,8 @@ cas2_action: cas2_end: /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) /* Enable interrupts */ ssm PSW_SM_I, %r0 /* Return to userspace, set no error */ @@ -865,7 +868,8 @@ cas2_end: 22: /* Error occurred on load or store */ /* Free lock */ - stw,ma %r20, 0(%sr2,%r20) + sync + stw %r20, 0(%sr2,%r20) ssm PSW_SM_I, %r0 ldo 1(%r0),%r28 b lws_exit -- cgit v1.2.3 From 28c5bcf74fa07c25d5bd118d1271920f51ce2a98 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Tue, 6 Nov 2018 19:49:34 -0600 Subject: KVM: PPC: Move and undef TRACE_INCLUDE_PATH/FILE TRACE_INCLUDE_PATH and TRACE_INCLUDE_FILE are used by , so like that #include, they should be outside #ifdef protection. They also need to be #undefed before defining, in case multiple trace headers are included by the same C file. This became the case on book3e after commit cf4a6085151a ("powerpc/mm: Add missing tracepoint for tlbie"), leading to the following build error: CC arch/powerpc/kvm/powerpc.o In file included from arch/powerpc/kvm/powerpc.c:51:0: arch/powerpc/kvm/trace.h:9:0: error: "TRACE_INCLUDE_PATH" redefined [-Werror] #define TRACE_INCLUDE_PATH . ^ In file included from arch/powerpc/kvm/../mm/mmu_decl.h:25:0, from arch/powerpc/kvm/powerpc.c:48: ./arch/powerpc/include/asm/trace.h:224:0: note: this is the location of the previous definition #define TRACE_INCLUDE_PATH asm ^ cc1: all warnings being treated as errors Reported-by: Christian Zigotzky Signed-off-by: Scott Wood Signed-off-by: Michael Ellerman --- arch/powerpc/kvm/trace.h | 8 ++++++-- arch/powerpc/kvm/trace_booke.h | 9 +++++++-- arch/powerpc/kvm/trace_hv.h | 9 +++++++-- arch/powerpc/kvm/trace_pr.h | 9 +++++++-- 4 files changed, 27 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kvm/trace.h b/arch/powerpc/kvm/trace.h index 491b0f715d6b..ea1d7c808319 100644 --- a/arch/powerpc/kvm/trace.h +++ b/arch/powerpc/kvm/trace.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace /* * Tracepoint for guest mode entry. @@ -120,4 +118,10 @@ TRACE_EVENT(kvm_check_requests, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace + #include diff --git a/arch/powerpc/kvm/trace_booke.h b/arch/powerpc/kvm/trace_booke.h index ac640e81fdc5..3837842986aa 100644 --- a/arch/powerpc/kvm/trace_booke.h +++ b/arch/powerpc/kvm/trace_booke.h @@ -6,8 +6,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_booke -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_booke #define kvm_trace_symbol_exit \ {0, "CRITICAL"}, \ @@ -218,4 +216,11 @@ TRACE_EVENT(kvm_booke_queue_irqprio, #endif /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_booke + #include diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index bcfe8a987f6a..8a1e3b0047f1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -9,8 +9,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_hv -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_hv #define kvm_trace_symbol_hcall \ {H_REMOVE, "H_REMOVE"}, \ @@ -497,4 +495,11 @@ TRACE_EVENT(kvmppc_run_vcpu_exit, #endif /* _TRACE_KVM_HV_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_hv + #include diff --git a/arch/powerpc/kvm/trace_pr.h b/arch/powerpc/kvm/trace_pr.h index 2f9a8829552b..46a46d328fbf 100644 --- a/arch/powerpc/kvm/trace_pr.h +++ b/arch/powerpc/kvm/trace_pr.h @@ -8,8 +8,6 @@ #undef TRACE_SYSTEM #define TRACE_SYSTEM kvm_pr -#define TRACE_INCLUDE_PATH . -#define TRACE_INCLUDE_FILE trace_pr TRACE_EVENT(kvm_book3s_reenter, TP_PROTO(int r, struct kvm_vcpu *vcpu), @@ -257,4 +255,11 @@ TRACE_EVENT(kvm_exit, #endif /* _TRACE_KVM_H */ /* This part must be outside protection */ + +#undef TRACE_INCLUDE_PATH +#undef TRACE_INCLUDE_FILE + +#define TRACE_INCLUDE_PATH . +#define TRACE_INCLUDE_FILE trace_pr + #include -- cgit v1.2.3 From e7f4ffffa972db4820c23ff9831a6a4f3f6d8cc5 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:13:48 -0500 Subject: ARM: dts: am3517: Fix pinmuxing for CD on MMC1 The MMC1 is active low, not active high. For some reason, this worked with different combination of U-Boot and kernels, but it's supposed to be active low and is currently broken. Fixes: cfaa856a2510 ("ARM: dts: am3517: Add pinmuxing, CD and WP for MMC1") #kernel 4.18+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517-evm.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517-evm.dts b/arch/arm/boot/dts/am3517-evm.dts index d4d33cd7adad..1e2bb68231ad 100644 --- a/arch/arm/boot/dts/am3517-evm.dts +++ b/arch/arm/boot/dts/am3517-evm.dts @@ -228,7 +228,7 @@ vmmc-supply = <&vmmc_fixed>; bus-width = <4>; wp-gpios = <&gpio4 30 GPIO_ACTIVE_HIGH>; /* gpio_126 */ - cd-gpios = <&gpio4 31 GPIO_ACTIVE_HIGH>; /* gpio_127 */ + cd-gpios = <&gpio4 31 GPIO_ACTIVE_LOW>; /* gpio_127 */ }; &mmc3 { -- cgit v1.2.3 From 6809564d64ff1301d44c13334cc0e8369e825a20 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:28:32 -0500 Subject: ARM: dts: LogicPD Torpedo: Fix mmc3_dat1 interrupt When the Torpedo was first introduced back at Kernel 4.2, the interrupt extended flag has been set incorrectly. It was subsequently moved, so this patch corrects Kernel 4.18+ Fixes: a38867305203 ("ARM: dts: Move move WiFi bindings to logicpd-torpedo-37xx-devkit") # v4.18+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts index 9d5d53fbe9c0..c39cf2ca54da 100644 --- a/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts +++ b/arch/arm/boot/dts/logicpd-torpedo-37xx-devkit.dts @@ -35,7 +35,7 @@ * jumpering combinations for the long run. */ &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &mmc3_core2_pins>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; -- cgit v1.2.3 From 3d8b804bc528d3720ec0c39c212af92dafaf6e84 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:29:27 -0500 Subject: ARM: dts: logicpd-somlv: Fix interrupt on mmc3_dat1 The interrupt on mmc3_dat1 is wrong which prevents this from appearing in /proc/interrupts. Fixes: ab8dd3aed011 ("ARM: DTS: Add minimal Support for Logic PD DM3730 SOM-LV") #Kernel 4.9+ Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/logicpd-som-lv.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index ac343330d0c8..98b682a8080c 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -129,7 +129,7 @@ }; &mmc3 { - interrupts-extended = <&intc 94 &omap3_pmx_core2 0x46>; + interrupts-extended = <&intc 94 &omap3_pmx_core 0x136>; pinctrl-0 = <&mmc3_pins &wl127x_gpio>; pinctrl-names = "default"; vmmc-supply = <&wl12xx_vmmc>; -- cgit v1.2.3 From 419b194cdedc76d0d3cd5b0900db0fa8177c4a52 Mon Sep 17 00:00:00 2001 From: Adam Ford Date: Sun, 28 Oct 2018 15:34:21 -0500 Subject: ARM: dts: am3517-som: Fix WL127x Wifi interrupt At the same time the AM3517 EVM was gaining WiFi support, separate patches were introduced to move the interrupt from HIGH to RISING. Because they overlapped, this was not done to the AM3517-EVM. This patch fixes Kernel 4.19+ Fixes: 6bf5e3410f19 ("ARM: dts: am3517-som: Add WL127x Wifi") Signed-off-by: Adam Ford Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am3517-som.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am3517-som.dtsi b/arch/arm/boot/dts/am3517-som.dtsi index dae6e458e59f..b1c988eed87c 100644 --- a/arch/arm/boot/dts/am3517-som.dtsi +++ b/arch/arm/boot/dts/am3517-som.dtsi @@ -163,7 +163,7 @@ compatible = "ti,wl1271"; reg = <2>; interrupt-parent = <&gpio6>; - interrupts = <10 IRQ_TYPE_LEVEL_HIGH>; /* gpio_170 */ + interrupts = <10 IRQ_TYPE_EDGE_RISING>; /* gpio_170 */ ref-clock-frequency = <26000000>; tcxo-clock-frequency = <26000000>; }; -- cgit v1.2.3 From cec83ff1241ec98113a19385ea9e9cfa9aa4125b Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Wed, 7 Nov 2018 22:30:31 +0100 Subject: ARM: OMAP1: ams-delta: Fix possible use of uninitialized field While playing with initialization order of modem device, it has been discovered that under some circumstances (early console init, I believe) its .pm() callback may be called before the uart_port->private_data pointer is initialized from plat_serial8250_port->private_data, resulting in NULL pointer dereference. Fix it by checking for uninitialized pointer before using it in modem_pm(). Fixes: aabf31737a6a ("ARM: OMAP1: ams-delta: update the modem to use regulator API") Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 3d191fd52910..17886744dbe6 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -750,6 +750,9 @@ static void modem_pm(struct uart_port *port, unsigned int state, unsigned old) struct modem_private_data *priv = port->private_data; int ret; + if (!priv) + return; + if (IS_ERR(priv->regulator)) return; -- cgit v1.2.3 From 8588eac3ff66e77fb681591714fd76da3a4da80d Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Thu, 27 Sep 2018 10:31:46 +0530 Subject: arm64: dts: ti: k3-am654: Fix wakeup_uart reg address cbass_wakeup interconnect which is the parent of wakeup_uart node defines address-cells=1 and size-cells=1, therefore fix up reg property of wakeup_uart node accordingly. Otherwise, this UART instance fails to probe if enabled. Fixes: 4201af2544b3 ("arm64: dts: ti: am654: Add uart nodes") Signed-off-by: Vignesh R Reviewed-by: Lokesh Vutla Signed-off-by: Tero Kristo --- arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi index affc3c309353..8d7b47f9dfbf 100644 --- a/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi +++ b/arch/arm64/boot/dts/ti/k3-am65-wakeup.dtsi @@ -36,7 +36,7 @@ wkup_uart0: serial@42300000 { compatible = "ti,am654-uart"; - reg = <0x00 0x42300000 0x00 0x100>; + reg = <0x42300000 0x100>; reg-shift = <2>; reg-io-width = <4>; interrupts = ; -- cgit v1.2.3 From 5f8d3ab136d0ccb59c4d628d8f85e0d8f2761d07 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 2 Nov 2018 14:45:32 -0700 Subject: arm64: dts: sdm845-mtp: Reserve reserved gpios With the introduction of commit 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning") the gpiolib will attempt to read the direction of all pins, which triggers a read from protected register regions. The pins 0 through 3 and 81 through 84 are protected, so mark these as reserved. Signed-off-by: Bjorn Andersson Reviewed-by: Stephen Boyd Reviewed-by: Linus Walleij Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/sdm845-mtp.dts | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts index eedfaf8922e2..d667eee4e6d0 100644 --- a/arch/arm64/boot/dts/qcom/sdm845-mtp.dts +++ b/arch/arm64/boot/dts/qcom/sdm845-mtp.dts @@ -352,6 +352,10 @@ status = "okay"; }; +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; + &uart9 { status = "okay"; }; -- cgit v1.2.3 From 9134586715e389fe90d9d28cb37a668f374d686a Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 29 Oct 2018 22:45:54 -0700 Subject: arm64: dts: qcom: msm8998: Reserve gpio ranges on MTP GPIOs 0 through 3 and 81 through 84 are configured to not be accessible from the application CPUs. Mark them as reserved to allow the MSM8998 MTP to boot after the introduction of 3edfb7bd76bd ("gpiolib: Show correct direction from the beginning"). Signed-off-by: Bjorn Andersson Reviewed-by: Jeffrey Hugo Signed-off-by: Andy Gross --- arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi index b4276da1fb0d..11fd1fe8bdb5 100644 --- a/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi +++ b/arch/arm64/boot/dts/qcom/msm8998-mtp.dtsi @@ -241,3 +241,7 @@ }; }; }; + +&tlmm { + gpio-reserved-ranges = <0 4>, <81 4>; +}; -- cgit v1.2.3 From c8b00bb742dd036388f37d019dbb9db177f3e66c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 1 Nov 2018 16:21:05 +1100 Subject: powerpc/mm/64s: Fix preempt warning in slb_allocate_kernel() With preempt enabled we see warnings in do_slb_fault(): BUG: using smp_processor_id() in preemptible [00000000] code: kworker/u33:0/98 futex hash table entries: 4096 (order: 3, 524288 bytes) caller is do_slb_fault+0x204/0x230 CPU: 5 PID: 98 Comm: kworker/u33:0 Not tainted 4.19.0-rc3-gcc-7.3.1-00022-g1936f094e164 #138 Call Trace: dump_stack+0xb4/0x104 (unreliable) check_preemption_disabled+0x148/0x150 do_slb_fault+0x204/0x230 data_access_slb_common+0x138/0x180 This is caused by the get_paca() in slb_allocate_kernel(), which includes a call to debug_smp_processor_id(). slb_allocate_kernel() can only be called from do_slb_fault(), and in that path interrupts are hard disabled and so we can't be preempted, but we can't update the preempt flags (in thread_info) because that could cause an SLB fault. So just use local_paca which is safe and doesn't cause the warning. Fixes: 48e7b7695745 ("powerpc/64s/hash: Convert SLB miss handlers to C") Signed-off-by: Michael Ellerman --- arch/powerpc/mm/slb.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index b663a36f9ada..bc3914d54e26 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -708,7 +708,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned long id) return -EFAULT; if (ea < H_VMALLOC_END) - flags = get_paca()->vmalloc_sllp; + flags = local_paca->vmalloc_sllp; else flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp; } else { -- cgit v1.2.3 From 43c6494fa1499912c8177e71450c0279041152a6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 6 Nov 2018 23:37:58 +1100 Subject: powerpc/io: Fix the IO workarounds code to work with Radix Back in 2006 Ben added some workarounds for a misbehaviour in the Spider IO bridge used on early Cell machines, see commit 014da7ff47b5 ("[POWERPC] Cell "Spider" MMIO workarounds"). Later these were made to be generic, ie. not tied specifically to Spider. The code stashes a token in the high bits (59-48) of virtual addresses used for IO (eg. returned from ioremap()). This works fine when using the Hash MMU, but when we're using the Radix MMU the bits used for the token overlap with some of the bits of the virtual address. This is because the maximum virtual address is larger with Radix, up to c00fffffffffffff, and in fact we use that high part of the address range for ioremap(), see RADIX_KERN_IO_START. As it happens the bits that are used overlap with the bits that differentiate an IO address vs a linear map address. If the resulting address lies outside the linear mapping we will crash (see below), if not we just corrupt memory. virtio-pci 0000:00:00.0: Using 64-bit direct DMA at offset 800000000000000 Unable to handle kernel paging request for data at address 0xc000000080000014 ... CFAR: c000000000626b98 DAR: c000000080000014 DSISR: 42000000 IRQMASK: 0 GPR00: c0000000006c54fc c00000003e523378 c0000000016de600 0000000000000000 GPR04: c00c000080000014 0000000000000007 0fffffff000affff 0000000000000030 ^^^^ ... NIP [c000000000626c5c] .iowrite8+0xec/0x100 LR [c0000000006c992c] .vp_reset+0x2c/0x90 Call Trace: .pci_bus_read_config_dword+0xc4/0x120 (unreliable) .register_virtio_device+0x13c/0x1c0 .virtio_pci_probe+0x148/0x1f0 .local_pci_probe+0x68/0x140 .pci_device_probe+0x164/0x220 .really_probe+0x274/0x3b0 .driver_probe_device+0x80/0x170 .__driver_attach+0x14c/0x150 .bus_for_each_dev+0xb8/0x130 .driver_attach+0x34/0x50 .bus_add_driver+0x178/0x2f0 .driver_register+0x90/0x1a0 .__pci_register_driver+0x6c/0x90 .virtio_pci_driver_init+0x2c/0x40 .do_one_initcall+0x64/0x280 .kernel_init_freeable+0x36c/0x474 .kernel_init+0x24/0x160 .ret_from_kernel_thread+0x58/0x7c This hasn't been a problem because CONFIG_PPC_IO_WORKAROUNDS which enables this code is usually not enabled. It is only enabled when it's selected by PPC_CELL_NATIVE which is only selected by PPC_IBM_CELL_BLADE and that in turn depends on BIG_ENDIAN. So in order to hit the bug you need to build a big endian kernel, with IBM Cell Blade support enabled, as well as Radix MMU support, and then boot that on Power9 using Radix MMU. Still we can fix the bug, so let's do that. We simply use fewer bits for the token, taking the union of the restrictions on the address from both Hash and Radix, we end up with 8 bits we can use for the token. The only user of the token is iowa_mem_find_bus() which only supports 8 token values, so 8 bits is plenty for that. Fixes: 566ca99af026 ("powerpc/mm/radix: Add dummy radix_enabled()") Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/io.h | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/include/asm/io.h b/arch/powerpc/include/asm/io.h index 3ef40b703c4a..e746becd9d6f 100644 --- a/arch/powerpc/include/asm/io.h +++ b/arch/powerpc/include/asm/io.h @@ -268,19 +268,13 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, * their hooks, a bitfield is reserved for use by the platform near the * top of MMIO addresses (not PIO, those have to cope the hard way). * - * This bit field is 12 bits and is at the top of the IO virtual - * addresses PCI_IO_INDIRECT_TOKEN_MASK. + * The highest address in the kernel virtual space are: * - * The kernel virtual space is thus: + * d0003fffffffffff # with Hash MMU + * c00fffffffffffff # with Radix MMU * - * 0xD000000000000000 : vmalloc - * 0xD000080000000000 : PCI PHB IO space - * 0xD000080080000000 : ioremap - * 0xD0000fffffffffff : end of ioremap region - * - * Since the top 4 bits are reserved as the region ID, we use thus - * the next 12 bits and keep 4 bits available for the future if the - * virtual address space is ever to be extended. + * The top 4 bits are reserved as the region ID on hash, leaving us 8 bits + * that can be used for the field. * * The direct IO mapping operations will then mask off those bits * before doing the actual access, though that only happen when @@ -292,8 +286,8 @@ extern void _memcpy_toio(volatile void __iomem *dest, const void *src, */ #ifdef CONFIG_PPC_INDIRECT_MMIO -#define PCI_IO_IND_TOKEN_MASK 0x0fff000000000000ul -#define PCI_IO_IND_TOKEN_SHIFT 48 +#define PCI_IO_IND_TOKEN_SHIFT 52 +#define PCI_IO_IND_TOKEN_MASK (0xfful << PCI_IO_IND_TOKEN_SHIFT) #define PCI_FIX_ADDR(addr) \ ((PCI_IO_ADDR)(((unsigned long)(addr)) & ~PCI_IO_IND_TOKEN_MASK)) #define PCI_GET_ADDR_TOKEN(addr) \ -- cgit v1.2.3 From c10a8de0d32e95b0b8c7c17b6dc09baea5a5a899 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:18 -0700 Subject: perf/x86/intel/uncore: Add more IMC PCI IDs for KabyLake and CoffeeLake CPUs KabyLake and CoffeeLake CPUs have the same client uncore events as SkyLake. Add the PCI IDs for the KabyLake Y, U, S processor lines and CoffeeLake U, H, S processor lines. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-1-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 115 ++++++++++++++++++++++++++++++++++++- 1 file changed, 114 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 8527c3e1038b..bfa25814fe5f 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -15,6 +15,25 @@ #define PCI_DEVICE_ID_INTEL_SKL_HQ_IMC 0x1910 #define PCI_DEVICE_ID_INTEL_SKL_SD_IMC 0x190f #define PCI_DEVICE_ID_INTEL_SKL_SQ_IMC 0x191f +#define PCI_DEVICE_ID_INTEL_KBL_Y_IMC 0x590c +#define PCI_DEVICE_ID_INTEL_KBL_U_IMC 0x5904 +#define PCI_DEVICE_ID_INTEL_KBL_UQ_IMC 0x5914 +#define PCI_DEVICE_ID_INTEL_KBL_SD_IMC 0x590f +#define PCI_DEVICE_ID_INTEL_KBL_SQ_IMC 0x591f +#define PCI_DEVICE_ID_INTEL_CFL_2U_IMC 0x3ecc +#define PCI_DEVICE_ID_INTEL_CFL_4U_IMC 0x3ed0 +#define PCI_DEVICE_ID_INTEL_CFL_4H_IMC 0x3e10 +#define PCI_DEVICE_ID_INTEL_CFL_6H_IMC 0x3ec4 +#define PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC 0x3e0f +#define PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC 0x3e1f +#define PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC 0x3ec2 +#define PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC 0x3e30 +#define PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC 0x3e18 +#define PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC 0x3ec6 +#define PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC 0x3e31 +#define PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC 0x3e33 +#define PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC 0x3eca +#define PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC 0x3e32 /* SNB event control */ #define SNB_UNC_CTL_EV_SEL_MASK 0x000000ff @@ -569,7 +588,82 @@ static const struct pci_device_id skl_uncore_pci_ids[] = { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SKL_SQ_IMC), .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), }, - + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_Y_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_UQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SD_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_KBL_SQ_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4U_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6H_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_2S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_D_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_W_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_4S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_6S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, + { /* IMC */ + PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_CFL_8S_S_IMC), + .driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0), + }, { /* end: all zeroes */ }, }; @@ -618,6 +712,25 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = { IMC_DEV(SKL_HQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core H Quad Core */ IMC_DEV(SKL_SD_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Dual Core */ IMC_DEV(SKL_SQ_IMC, &skl_uncore_pci_driver), /* 6th Gen Core S Quad Core */ + IMC_DEV(KBL_Y_IMC, &skl_uncore_pci_driver), /* 7th Gen Core Y */ + IMC_DEV(KBL_U_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U */ + IMC_DEV(KBL_UQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core U Quad Core */ + IMC_DEV(KBL_SD_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Dual Core */ + IMC_DEV(KBL_SQ_IMC, &skl_uncore_pci_driver), /* 7th Gen Core S Quad Core */ + IMC_DEV(CFL_2U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 2 Cores */ + IMC_DEV(CFL_4U_IMC, &skl_uncore_pci_driver), /* 8th Gen Core U 4 Cores */ + IMC_DEV(CFL_4H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 4 Cores */ + IMC_DEV(CFL_6H_IMC, &skl_uncore_pci_driver), /* 8th Gen Core H 6 Cores */ + IMC_DEV(CFL_2S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 2 Cores Desktop */ + IMC_DEV(CFL_4S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Desktop */ + IMC_DEV(CFL_6S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Desktop */ + IMC_DEV(CFL_8S_D_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Desktop */ + IMC_DEV(CFL_4S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Work Station */ + IMC_DEV(CFL_6S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Work Station */ + IMC_DEV(CFL_8S_W_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Work Station */ + IMC_DEV(CFL_4S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 4 Cores Server */ + IMC_DEV(CFL_6S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 6 Cores Server */ + IMC_DEV(CFL_8S_S_IMC, &skl_uncore_pci_driver), /* 8th Gen Core S 8 Cores Server */ { /* end marker */ } }; -- cgit v1.2.3 From 4d47d6407ac7b4b442a4e717488a3bb137398b6c Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Fri, 19 Oct 2018 10:04:19 -0700 Subject: perf/x86/intel/uncore: Support CoffeeLake 8th CBOX Coffee Lake has 8 core products which has 8 Cboxes. The 8th CBOX is mapped into different MSR space. Increase the num_boxes to 8 to handle the new products. It will not impact the previous platforms, SkyLake, KabyLake and earlier CoffeeLake. Because the num_boxes will be recalculated in uncore_cpu_init and doesn't exceed the x86_max_cores. Introduce a new box flag bit to indicate the 8th CBOX. Signed-off-by: Kan Liang Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181019170419.378-2-kan.liang@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.h | 33 +++++++++++++++++++++++++-------- arch/x86/events/intel/uncore_snb.c | 6 +++++- 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index e17ab885b1e9..cb46d602a6b8 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -129,8 +129,15 @@ struct intel_uncore_box { struct intel_uncore_extra_reg shared_regs[0]; }; -#define UNCORE_BOX_FLAG_INITIATED 0 -#define UNCORE_BOX_FLAG_CTL_OFFS8 1 /* event config registers are 8-byte apart */ +/* CFL uncore 8th cbox MSRs */ +#define CFL_UNC_CBO_7_PERFEVTSEL0 0xf70 +#define CFL_UNC_CBO_7_PER_CTR0 0xf76 + +#define UNCORE_BOX_FLAG_INITIATED 0 +/* event config registers are 8-byte apart */ +#define UNCORE_BOX_FLAG_CTL_OFFS8 1 +/* CFL 8th CBOX has different MSR space */ +#define UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS 2 struct uncore_event_desc { struct kobj_attribute attr; @@ -297,17 +304,27 @@ unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { - return box->pmu->type->event_ctl + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PERFEVTSEL0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->event_ctl + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline unsigned uncore_msr_perf_ctr(struct intel_uncore_box *box, int idx) { - return box->pmu->type->perf_ctr + - (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + - uncore_msr_box_offset(box); + if (test_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags)) { + return CFL_UNC_CBO_7_PER_CTR0 + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx); + } else { + return box->pmu->type->perf_ctr + + (box->pmu->type->pair_ctr_ctl ? 2 * idx : idx) + + uncore_msr_box_offset(box); + } } static inline diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index bfa25814fe5f..2593b0d7aeee 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -221,6 +221,10 @@ static void skl_uncore_msr_init_box(struct intel_uncore_box *box) wrmsrl(SKL_UNC_PERF_GLOBAL_CTL, SNB_UNC_GLOBAL_CTL_EN | SKL_UNC_GLOBAL_CTL_CORE_ALL); } + + /* The 8th CBOX has different MSR space */ + if (box->pmu->pmu_idx == 7) + __set_bit(UNCORE_BOX_FLAG_CFL8_CBOX_MSR_OFFS, &box->flags); } static void skl_uncore_msr_enable_box(struct intel_uncore_box *box) @@ -247,7 +251,7 @@ static struct intel_uncore_ops skl_uncore_msr_ops = { static struct intel_uncore_type skl_uncore_cbox = { .name = "cbox", .num_counters = 4, - .num_boxes = 5, + .num_boxes = 8, .perf_ctr_bits = 44, .fixed_ctr_bits = 48, .perf_ctr = SNB_UNC_CBO_0_PER_CTR0, -- cgit v1.2.3 From e0c827aca0730b51f38081aa4e8ecf0912aab55f Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Sat, 10 Nov 2018 13:16:51 +0200 Subject: drm/omap: Populate DSS children in omapdss driver The DSS DT node contains children that describe the DSS components (DISPC and internal encoders). Each of those components is handled by a platform driver, and thus needs to be backed by a platform device. The corresponding platform devices are created in mach-omap2 code by a call to of_platform_populate(). While this approach has worked so far, it doesn't model the hardware architecture very well, as it creates child devices before the parent is ready to handle them. This would be akin to creating I2C slaves before the I2C master is available. The task can be easily performed in the omapdss driver code instead, simplifying mach-omap2 code. We however can't remove the mach-omap2 code completely as the omap2fb driver still depends on it, but we can move it to the omap2fb-specific section, where it can stay until the omap2fb driver gets removed. This has the added benefit of not allowing DSS components to probe before the DSS itself, which led to runtime PM issues when the DSS probe is deferred. Fixes: 27d624527d99 ("drm/omap: dss: Acquire next dssdev at probe time") Signed-off-by: Laurent Pinchart Acked-by: Tony Lindgren Reviewed-by: Sebastian Reichel Signed-off-by: Tomi Valkeinen Link: https://patchwork.freedesktop.org/patch/msgid/20181110111654.4387-2-laurent.pinchart@ideasonboard.com --- arch/arm/mach-omap2/display.c | 111 ++++++++++++++++++++---------------------- 1 file changed, 53 insertions(+), 58 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap2/display.c b/arch/arm/mach-omap2/display.c index 9500b6e27380..f86b72d1d59e 100644 --- a/arch/arm/mach-omap2/display.c +++ b/arch/arm/mach-omap2/display.c @@ -209,11 +209,61 @@ static int __init omapdss_init_fbdev(void) return 0; } -#else -static inline int omapdss_init_fbdev(void) + +static const char * const omapdss_compat_names[] __initconst = { + "ti,omap2-dss", + "ti,omap3-dss", + "ti,omap4-dss", + "ti,omap5-dss", + "ti,dra7-dss", +}; + +static struct device_node * __init omapdss_find_dss_of_node(void) { - return 0; + struct device_node *node; + int i; + + for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { + node = of_find_compatible_node(NULL, NULL, + omapdss_compat_names[i]); + if (node) + return node; + } + + return NULL; } + +static int __init omapdss_init_of(void) +{ + int r; + struct device_node *node; + struct platform_device *pdev; + + /* only create dss helper devices if dss is enabled in the .dts */ + + node = omapdss_find_dss_of_node(); + if (!node) + return 0; + + if (!of_device_is_available(node)) + return 0; + + pdev = of_find_device_by_node(node); + + if (!pdev) { + pr_err("Unable to find DSS platform device\n"); + return -ENODEV; + } + + r = of_platform_populate(node, NULL, NULL, &pdev->dev); + if (r) { + pr_err("Unable to populate DSS submodule devices\n"); + return r; + } + + return omapdss_init_fbdev(); +} +omap_device_initcall(omapdss_init_of); #endif /* CONFIG_FB_OMAP2 */ static void dispc_disable_outputs(void) @@ -361,58 +411,3 @@ int omap_dss_reset(struct omap_hwmod *oh) return r; } - -static const char * const omapdss_compat_names[] __initconst = { - "ti,omap2-dss", - "ti,omap3-dss", - "ti,omap4-dss", - "ti,omap5-dss", - "ti,dra7-dss", -}; - -static struct device_node * __init omapdss_find_dss_of_node(void) -{ - struct device_node *node; - int i; - - for (i = 0; i < ARRAY_SIZE(omapdss_compat_names); ++i) { - node = of_find_compatible_node(NULL, NULL, - omapdss_compat_names[i]); - if (node) - return node; - } - - return NULL; -} - -static int __init omapdss_init_of(void) -{ - int r; - struct device_node *node; - struct platform_device *pdev; - - /* only create dss helper devices if dss is enabled in the .dts */ - - node = omapdss_find_dss_of_node(); - if (!node) - return 0; - - if (!of_device_is_available(node)) - return 0; - - pdev = of_find_device_by_node(node); - - if (!pdev) { - pr_err("Unable to find DSS platform device\n"); - return -ENODEV; - } - - r = of_platform_populate(node, NULL, NULL, &pdev->dev); - if (r) { - pr_err("Unable to populate DSS submodule devices\n"); - return r; - } - - return omapdss_init_fbdev(); -} -omap_device_initcall(omapdss_init_of); -- cgit v1.2.3 From 899a42f836678a595f7d2bc36a5a0c2b03d08cbc Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 11:42:36 +0100 Subject: ARM: make lookup_processor_type() non-__init Move lookup_processor_type() out of the __init section so it is callable from (eg) the secondary startup code during hotplug. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/kernel/head-common.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/head-common.S b/arch/arm/kernel/head-common.S index 6e0375e7db05..997b02302c31 100644 --- a/arch/arm/kernel/head-common.S +++ b/arch/arm/kernel/head-common.S @@ -145,6 +145,9 @@ __mmap_switched_data: #endif .size __mmap_switched_data, . - __mmap_switched_data + __FINIT + .text + /* * This provides a C-API version of __lookup_processor_type */ @@ -156,9 +159,6 @@ ENTRY(lookup_processor_type) ldmfd sp!, {r4 - r6, r9, pc} ENDPROC(lookup_processor_type) - __FINIT - .text - /* * Read processor ID register (CP#15, CR0), and look up in the linker-built * supported processor list. Note that we can't use the absolute addresses -- cgit v1.2.3 From 65987a8553061515b5851b472081aedb9837a391 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 11:59:56 +0100 Subject: ARM: split out processor lookup Split out the lookup of the processor type and associated error handling from the rest of setup_processor() - we will need to use this in the secondary CPU bringup path for big.Little Spectre variant 2 mitigation. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/cputype.h | 1 + arch/arm/kernel/setup.c | 31 +++++++++++++++++++------------ 2 files changed, 20 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 26021980504d..f6df4bb4e543 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -107,6 +107,7 @@ #define ARM_CPU_PART_SCORPION 0x510002d0 extern unsigned int processor_id; +struct proc_info_list *lookup_processor(u32 midr); #ifdef CONFIG_CPU_CP15 #define read_cpuid(reg) \ diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index fc40a2b40595..05a4eb6b0d01 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -667,22 +667,29 @@ static void __init smp_build_mpidr_hash(void) } #endif -static void __init setup_processor(void) +/* + * locate processor in the list of supported processor types. The linker + * builds this table for us from the entries in arch/arm/mm/proc-*.S + */ +struct proc_info_list *lookup_processor(u32 midr) { - struct proc_info_list *list; + struct proc_info_list *list = lookup_processor_type(midr); - /* - * locate processor in the list of supported processor - * types. The linker builds this table for us from the - * entries in arch/arm/mm/proc-*.S - */ - list = lookup_processor_type(read_cpuid_id()); if (!list) { - pr_err("CPU configuration botched (ID %08x), unable to continue.\n", - read_cpuid_id()); - while (1); + pr_err("CPU%u: configuration botched (ID %08x), CPU halted\n", + smp_processor_id(), midr); + while (1) + /* can't use cpu_relax() here as it may require MMU setup */; } + return list; +} + +static void __init setup_processor(void) +{ + unsigned int midr = read_cpuid_id(); + struct proc_info_list *list = lookup_processor(midr); + cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); @@ -700,7 +707,7 @@ static void __init setup_processor(void) #endif pr_info("CPU: %s [%08x] revision %d (ARMv%s), cr=%08lx\n", - cpu_name, read_cpuid_id(), read_cpuid_id() & 15, + list->cpu_name, midr, midr & 15, proc_arch[cpu_architecture()], get_cr()); snprintf(init_utsname()->machine, __NEW_UTS_LEN + 1, "%s%c", -- cgit v1.2.3 From 945aceb1db8885d3a35790cf2e810f681db52756 Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:43:03 +0100 Subject: ARM: clean up per-processor check_bugs method call Call the per-processor type check_bugs() method in the same way as we do other per-processor functions - move the "processor." detail into proc-fns.h. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 1 + arch/arm/kernel/bugs.c | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index e25f4392e1b2..30c499146320 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -99,6 +99,7 @@ extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else #define cpu_proc_init processor._proc_init +#define cpu_check_bugs processor.check_bugs #define cpu_proc_fin processor._proc_fin #define cpu_reset processor.reset #define cpu_do_idle processor._do_idle diff --git a/arch/arm/kernel/bugs.c b/arch/arm/kernel/bugs.c index 7be511310191..d41d3598e5e5 100644 --- a/arch/arm/kernel/bugs.c +++ b/arch/arm/kernel/bugs.c @@ -6,8 +6,8 @@ void check_other_bugs(void) { #ifdef MULTI_CPU - if (processor.check_bugs) - processor.check_bugs(); + if (cpu_check_bugs) + cpu_check_bugs(); #endif } -- cgit v1.2.3 From e209950fdd065d2cc46e6338e47e52841b830cba Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:17:38 +0100 Subject: ARM: add PROC_VTABLE and PROC_TABLE macros Allow the way we access members of the processor vtable to be changed at compile time. We will need to move to per-CPU vtables to fix the Spectre variant 2 issues on big.Little systems. However, we have a couple of calls that do not need the vtable treatment, and indeed cause a kernel warning due to the (later) use of smp_processor_id(), so also introduce the PROC_TABLE macro for these which always use CPU 0's function pointers. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 39 ++++++++++++++++++++++++++------------- arch/arm/kernel/setup.c | 4 +--- 2 files changed, 27 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index 30c499146320..c259cc49c641 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -23,7 +23,7 @@ struct mm_struct; /* * Don't change this structure - ASM code relies on it. */ -extern struct processor { +struct processor { /* MISC * get data abort address/flags */ @@ -79,9 +79,13 @@ extern struct processor { unsigned int suspend_size; void (*do_suspend)(void *); void (*do_resume)(void *); -} processor; +}; #ifndef MULTI_CPU +static inline void init_proc_vtable(const struct processor *p) +{ +} + extern void cpu_proc_init(void); extern void cpu_proc_fin(void); extern int cpu_do_idle(void); @@ -98,18 +102,27 @@ extern void cpu_reset(unsigned long addr, bool hvc) __attribute__((noreturn)); extern void cpu_do_suspend(void *); extern void cpu_do_resume(void *); #else -#define cpu_proc_init processor._proc_init -#define cpu_check_bugs processor.check_bugs -#define cpu_proc_fin processor._proc_fin -#define cpu_reset processor.reset -#define cpu_do_idle processor._do_idle -#define cpu_dcache_clean_area processor.dcache_clean_area -#define cpu_set_pte_ext processor.set_pte_ext -#define cpu_do_switch_mm processor.switch_mm -/* These three are private to arch/arm/kernel/suspend.c */ -#define cpu_do_suspend processor.do_suspend -#define cpu_do_resume processor.do_resume +extern struct processor processor; +#define PROC_VTABLE(f) processor.f +#define PROC_TABLE(f) processor.f +static inline void init_proc_vtable(const struct processor *p) +{ + processor = *p; +} + +#define cpu_proc_init PROC_VTABLE(_proc_init) +#define cpu_check_bugs PROC_VTABLE(check_bugs) +#define cpu_proc_fin PROC_VTABLE(_proc_fin) +#define cpu_reset PROC_VTABLE(reset) +#define cpu_do_idle PROC_VTABLE(_do_idle) +#define cpu_dcache_clean_area PROC_TABLE(dcache_clean_area) +#define cpu_set_pte_ext PROC_TABLE(set_pte_ext) +#define cpu_do_switch_mm PROC_VTABLE(switch_mm) + +/* These two are private to arch/arm/kernel/suspend.c */ +#define cpu_do_suspend PROC_VTABLE(do_suspend) +#define cpu_do_resume PROC_VTABLE(do_resume) #endif extern void cpu_resume(void); diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 05a4eb6b0d01..c214bd14a1fe 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -693,9 +693,7 @@ static void __init setup_processor(void) cpu_name = list->cpu_name; __cpu_architecture = __get_cpu_architecture(); -#ifdef MULTI_CPU - processor = *list->proc; -#endif + init_proc_vtable(list->proc); #ifdef MULTI_TLB cpu_tlb = *list->tlb; #endif -- cgit v1.2.3 From 5df7a99bdd0de4a0480320264c44c04543c29d5a Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Thu, 8 Nov 2018 17:25:28 +0100 Subject: ARM: 8810/1: vfp: Fix wrong assignement to ufp_exc In vfp_preserve_user_clear_hwstate, ufp_exc->fpinst2 gets assigned to itself. It should actually be hwstate->fpinst2 that gets assigned to the ufp_exc field. Fixes commit 3aa2df6ec2ca6bc143a65351cca4266d03a8bc41 ("ARM: 8791/1: vfp: use __copy_to_user() when saving VFP state"). Reported-by: David Binderman Signed-off-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/vfp/vfpmodule.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 3b75f1d8a491..15bc3cf2a7fd 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -579,7 +579,7 @@ int vfp_preserve_user_clear_hwstate(struct user_vfp *ufp, */ ufp_exc->fpexc = hwstate->fpexc; ufp_exc->fpinst = hwstate->fpinst; - ufp_exc->fpinst2 = ufp_exc->fpinst2; + ufp_exc->fpinst2 = hwstate->fpinst2; /* Ensure that VFP is disabled. */ vfp_flush_hwstate(thread); -- cgit v1.2.3 From 383fb3ee8024d596f488d2dbaf45e572897acbdb Mon Sep 17 00:00:00 2001 From: Russell King Date: Thu, 19 Jul 2018 12:21:31 +0100 Subject: ARM: spectre-v2: per-CPU vtables to work around big.Little systems In big.Little systems, some CPUs require the Spectre workarounds in paths such as the context switch, but other CPUs do not. In order to handle these differences, we need per-CPU vtables. We are unable to use the kernel's per-CPU variables to support this as per-CPU is not initialised at times when we need access to the vtables, so we have to use an array indexed by logical CPU number. We use an array-of-pointers to avoid having function pointers in the kernel's read/write .data section. Reviewed-by: Julien Thierry Signed-off-by: Russell King --- arch/arm/include/asm/proc-fns.h | 23 +++++++++++++++++++++++ arch/arm/kernel/setup.c | 5 +++++ arch/arm/kernel/smp.c | 31 +++++++++++++++++++++++++++++++ arch/arm/mm/proc-v7-bugs.c | 17 ++--------------- 4 files changed, 61 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/arm/include/asm/proc-fns.h b/arch/arm/include/asm/proc-fns.h index c259cc49c641..e1b6f280ab08 100644 --- a/arch/arm/include/asm/proc-fns.h +++ b/arch/arm/include/asm/proc-fns.h @@ -104,12 +104,35 @@ extern void cpu_do_resume(void *); #else extern struct processor processor; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +#include +/* + * This can't be a per-cpu variable because we need to access it before + * per-cpu has been initialised. We have a couple of functions that are + * called in a pre-emptible context, and so can't use smp_processor_id() + * there, hence PROC_TABLE(). We insist in init_proc_vtable() that the + * function pointers for these are identical across all CPUs. + */ +extern struct processor *cpu_vtable[]; +#define PROC_VTABLE(f) cpu_vtable[smp_processor_id()]->f +#define PROC_TABLE(f) cpu_vtable[0]->f +static inline void init_proc_vtable(const struct processor *p) +{ + unsigned int cpu = smp_processor_id(); + *cpu_vtable[cpu] = *p; + WARN_ON_ONCE(cpu_vtable[cpu]->dcache_clean_area != + cpu_vtable[0]->dcache_clean_area); + WARN_ON_ONCE(cpu_vtable[cpu]->set_pte_ext != + cpu_vtable[0]->set_pte_ext); +} +#else #define PROC_VTABLE(f) processor.f #define PROC_TABLE(f) processor.f static inline void init_proc_vtable(const struct processor *p) { processor = *p; } +#endif #define cpu_proc_init PROC_VTABLE(_proc_init) #define cpu_check_bugs PROC_VTABLE(check_bugs) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index c214bd14a1fe..cd46a595422c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -115,6 +115,11 @@ EXPORT_SYMBOL(elf_hwcap2); #ifdef MULTI_CPU struct processor processor __ro_after_init; +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +struct processor *cpu_vtable[NR_CPUS] = { + [0] = &processor, +}; +#endif #endif #ifdef MULTI_TLB struct cpu_tlb_fns cpu_tlb __ro_after_init; diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5ad0b67b9e33..82b879db32ee 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include #include @@ -102,6 +103,30 @@ static unsigned long get_arch_pgd(pgd_t *pgd) #endif } +#if defined(CONFIG_BIG_LITTLE) && defined(CONFIG_HARDEN_BRANCH_PREDICTOR) +static int secondary_biglittle_prepare(unsigned int cpu) +{ + if (!cpu_vtable[cpu]) + cpu_vtable[cpu] = kzalloc(sizeof(*cpu_vtable[cpu]), GFP_KERNEL); + + return cpu_vtable[cpu] ? 0 : -ENOMEM; +} + +static void secondary_biglittle_init(void) +{ + init_proc_vtable(lookup_processor(read_cpuid_id())->proc); +} +#else +static int secondary_biglittle_prepare(unsigned int cpu) +{ + return 0; +} + +static void secondary_biglittle_init(void) +{ +} +#endif + int __cpu_up(unsigned int cpu, struct task_struct *idle) { int ret; @@ -109,6 +134,10 @@ int __cpu_up(unsigned int cpu, struct task_struct *idle) if (!smp_ops.smp_boot_secondary) return -ENOSYS; + ret = secondary_biglittle_prepare(cpu); + if (ret) + return ret; + /* * We need to tell the secondary core where to find * its stack and the page tables. @@ -360,6 +389,8 @@ asmlinkage void secondary_start_kernel(void) struct mm_struct *mm = &init_mm; unsigned int cpu; + secondary_biglittle_init(); + /* * The identity mapping is uncached (strongly ordered), so * switch away from it before attempting any exclusive accesses. diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 5544b82a2e7a..9a07916af8dd 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -52,8 +52,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A17: case ARM_CPU_PART_CORTEX_A73: case ARM_CPU_PART_CORTEX_A75: - if (processor.switch_mm != cpu_v7_bpiall_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_bpiall; spectre_v2_method = "BPIALL"; @@ -61,8 +59,6 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A15: case ARM_CPU_PART_BRAHMA_B15: - if (processor.switch_mm != cpu_v7_iciallu_switch_mm) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = harden_branch_predictor_iciallu; spectre_v2_method = "ICIALLU"; @@ -88,11 +84,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_hvc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_hvc_arch_workaround_1; - processor.switch_mm = cpu_v7_hvc_switch_mm; + cpu_do_switch_mm = cpu_v7_hvc_switch_mm; spectre_v2_method = "hypervisor"; break; @@ -101,11 +95,9 @@ static void cpu_v7_spectre_init(void) ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) break; - if (processor.switch_mm != cpu_v7_smc_switch_mm && cpu) - goto bl_error; per_cpu(harden_branch_predictor_fn, cpu) = call_smc_arch_workaround_1; - processor.switch_mm = cpu_v7_smc_switch_mm; + cpu_do_switch_mm = cpu_v7_smc_switch_mm; spectre_v2_method = "firmware"; break; @@ -119,11 +111,6 @@ static void cpu_v7_spectre_init(void) if (spectre_v2_method) pr_info("CPU%u: Spectre v2: using %s workaround\n", smp_processor_id(), spectre_v2_method); - return; - -bl_error: - pr_err("CPU%u: Spectre v2: incorrect context switching function, system vulnerable\n", - cpu); } #else static void cpu_v7_spectre_init(void) -- cgit v1.2.3 From 82fba2df7f7c019627f24c5036dc99f41731d770 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Sun, 11 Nov 2018 00:06:12 +0200 Subject: MIPS: OCTEON: cavium_octeon_defconfig: re-enable OCTEON USB driver Re-enable OCTEON USB driver which is needed on older hardware (e.g. EdgeRouter Lite) for mass storage etc. This got accidentally deleted when config options were changed for OCTEON2/3 USB. Signed-off-by: Aaro Koskinen Signed-off-by: Paul Burton Fixes: f922bc0ad08b ("MIPS: Octeon: cavium_octeon_defconfig: Enable more drivers") Patchwork: https://patchwork.linux-mips.org/patch/21077/ Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # 4.14+ --- arch/mips/configs/cavium_octeon_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/mips/configs/cavium_octeon_defconfig b/arch/mips/configs/cavium_octeon_defconfig index 490b12af103c..c52d0efacd14 100644 --- a/arch/mips/configs/cavium_octeon_defconfig +++ b/arch/mips/configs/cavium_octeon_defconfig @@ -140,6 +140,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_DS1307=y CONFIG_STAGING=y CONFIG_OCTEON_ETHERNET=y +CONFIG_OCTEON_USB=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_RAS=y CONFIG_EXT4_FS=y -- cgit v1.2.3 From 25517ed4e99b3be4244dfd61d1e5c753b09faf2c Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Sat, 10 Nov 2018 11:50:14 +0800 Subject: MIPS: Let early memblock_alloc*() allocate memories bottom-up After switched to NO_BOOTMEM, there are several boot failures. Some of them have been fixed and some of them haven't. I find that many of them are because of memory allocations are top-down, while the old behavior is bottom-up. This patch let early memblock_alloc*() allocate memories bottom-up to avoid some potential problems. Signed-off-by: Huacai Chen Signed-off-by: Paul Burton Fixes: bcec54bf3118 ("mips: switch to NO_BOOTMEM") Patchwork: https://patchwork.linux-mips.org/patch/21069/ References: https://patchwork.linux-mips.org/patch/21031/ Cc: Ralf Baechle Cc: James Hogan Cc: Steven J . Hill Cc: linux-mips@linux-mips.org Cc: Fuxin Zhang Cc: Zhangjin Wu --- arch/mips/kernel/setup.c | 1 + arch/mips/kernel/traps.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/setup.c b/arch/mips/kernel/setup.c index ea09ed6a80a9..8c6c48ed786a 100644 --- a/arch/mips/kernel/setup.c +++ b/arch/mips/kernel/setup.c @@ -794,6 +794,7 @@ static void __init arch_mem_init(char **cmdline_p) /* call board setup routine */ plat_mem_setup(); + memblock_set_bottom_up(true); /* * Make sure all kernel memory is in the maps. The "UP" and diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 0f852e1b5891..15e103c6d799 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2260,10 +2260,8 @@ void __init trap_init(void) unsigned long size = 0x200 + VECTORSPACING*64; phys_addr_t ebase_pa; - memblock_set_bottom_up(true); ebase = (unsigned long) memblock_alloc_from(size, 1 << fls(size), 0); - memblock_set_bottom_up(false); /* * Try to ensure ebase resides in KSeg0 if possible. @@ -2307,6 +2305,7 @@ void __init trap_init(void) if (board_ebase_setup) board_ebase_setup(); per_cpu_trap_init(true); + memblock_set_bottom_up(false); /* * Copy the generic exception handlers to their final destination. -- cgit v1.2.3 From 4ab49461d9d9b8274cc72d8656fe685ed394b8f7 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 1 Nov 2018 10:40:33 +0530 Subject: RISC-V: defconfig: Enable printk timestamps The printk timestamps are very useful information to visually see where kernel is spending time during boot. It also helps us see the timing of hotplug events at runtime. This patch enables printk timestamps in RISC-V defconfig so that we have it enabled by default (similar to other architectures such as x86_64, arm64, etc). Signed-off-by: Anup Patel Acked-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 07fa9ea75fea..ef4f15df9adf 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -76,4 +76,5 @@ CONFIG_NFS_V4_1=y CONFIG_NFS_V4_2=y CONFIG_ROOT_NFS=y CONFIG_CRYPTO_USER_API_HASH=y +CONFIG_PRINTK_TIME=y # CONFIG_RCU_TRACE is not set -- cgit v1.2.3 From 10febb3ecace4b557eaa0d52c9d2c3531c1a715a Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 5 Nov 2018 15:40:04 +0100 Subject: riscv: fix spacing in struct pt_regs Replace 8 spaces with tab to match styling. Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ptrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/ptrace.h b/arch/riscv/include/asm/ptrace.h index 2c5df945d43c..bbe1862e8f80 100644 --- a/arch/riscv/include/asm/ptrace.h +++ b/arch/riscv/include/asm/ptrace.h @@ -56,8 +56,8 @@ struct pt_regs { unsigned long sstatus; unsigned long sbadaddr; unsigned long scause; - /* a0 value before the syscall */ - unsigned long orig_a0; + /* a0 value before the syscall */ + unsigned long orig_a0; }; #ifdef CONFIG_64BIT -- cgit v1.2.3 From f157d411a9eb170d2ee6b766da7a381962017cc9 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Mon, 5 Nov 2018 15:35:37 +0100 Subject: riscv: add missing vdso_install target Building kernel 4.20 for Fedora as RPM fails, because riscv is missing vdso_install target in arch/riscv/Makefile. Signed-off-by: David Abdurachmanov Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index d10146197533..4af153a182b0 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -77,4 +77,8 @@ core-y += arch/riscv/kernel/ arch/riscv/mm/ libs-y += arch/riscv/lib/ +PHONY += vdso_install +vdso_install: + $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ + all: vmlinux -- cgit v1.2.3 From 85d90b91807bb0c4a0fcff6a144e73f11cda782a Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 30 Oct 2018 23:47:07 -0700 Subject: RISC-V: lib: Fix build error for 64-bit Fixes the following build error from tinyconfig: riscv64-unknown-linux-gnu-ld: kernel/sched/fair.o: in function `.L8': fair.c:(.text+0x70): undefined reference to `__lshrti3' riscv64-unknown-linux-gnu-ld: kernel/time/clocksource.o: in function `.L0 ': clocksource.c:(.text+0x334): undefined reference to `__lshrti3' Fixes: 7f47c73b355f ("RISC-V: Build tishift only on 64-bit") Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/lib/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/riscv/lib/Makefile b/arch/riscv/lib/Makefile index 5739bd05d289..4e2e600f7d53 100644 --- a/arch/riscv/lib/Makefile +++ b/arch/riscv/lib/Makefile @@ -3,6 +3,6 @@ lib-y += memcpy.o lib-y += memset.o lib-y += uaccess.o -lib-(CONFIG_64BIT) += tishift.o +lib-$(CONFIG_64BIT) += tishift.o lib-$(CONFIG_32BIT) += udivdi3.o -- cgit v1.2.3 From ef3a61406618291c46da168ff91acaa28d85944c Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Tue, 30 Oct 2018 23:47:09 -0700 Subject: RISC-V: Silence some module warnings on 32-bit Fixes: arch/riscv/kernel/module.c: In function 'apply_r_riscv_32_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:23:27: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_pcrel_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:104:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:146:23: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_got_hi20_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:190:60: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_plt_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:214:24: note: format string is defined here arch/riscv/kernel/module.c: In function 'apply_r_riscv_call_rela': ./include/linux/kern_levels.h:5:18: warning: format '%llx' expects argument of type 'long long unsigned int', but argument 3 has type 'Elf32_Addr' {aka 'unsigned int'} [-Wformat=] arch/riscv/kernel/module.c:236:23: note: format string is defined here Signed-off-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 3303ed2cd419..7dd308129b40 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -21,7 +21,7 @@ static int apply_r_riscv_32_rela(struct module *me, u32 *location, Elf_Addr v) { if (v != (u32)v) { pr_err("%s: value %016llx out of range for 32-bit field\n", - me->name, v); + me->name, (long long)v); return -EINVAL; } *location = v; @@ -102,7 +102,7 @@ static int apply_r_riscv_pcrel_hi20_rela(struct module *me, u32 *location, if (offset != (s32)offset) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -144,7 +144,7 @@ static int apply_r_riscv_hi20_rela(struct module *me, u32 *location, if (IS_ENABLED(CMODEL_MEDLOW)) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -188,7 +188,7 @@ static int apply_r_riscv_got_hi20_rela(struct module *me, u32 *location, } else { pr_err( "%s: can not generate the GOT entry for symbol = %016llx from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } @@ -212,7 +212,7 @@ static int apply_r_riscv_call_plt_rela(struct module *me, u32 *location, } else { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } } @@ -234,7 +234,7 @@ static int apply_r_riscv_call_rela(struct module *me, u32 *location, if (offset != fill_v) { pr_err( "%s: target %016llx can not be addressed by the 32-bit offset from PC = %p\n", - me->name, v, location); + me->name, (long long)v, location); return -EINVAL; } -- cgit v1.2.3 From c1d91f86a1b4c9c05854d59c6a0abd5d0f75b849 Mon Sep 17 00:00:00 2001 From: Christoph Muellner Date: Tue, 13 Nov 2018 11:25:35 +0100 Subject: arm64: dts: rockchip: Fix PCIe reset polarity for rk3399-puma-haikou. This patch fixes the wrong polarity setting for the PCIe host driver's pre-reset pin for rk3399-puma-haikou. Without this patch link training will most likely fail. Fixes: 60fd9f72ce8a ("arm64: dts: rockchip: add Haikou baseboard with RK3399-Q7 SoM") Cc: stable@vger.kernel.org Signed-off-by: Christoph Muellner Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts index 2dceeea29b83..1e6a71066c16 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-puma-haikou.dts @@ -153,7 +153,7 @@ }; &pcie0 { - ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_LOW>; + ep-gpios = <&gpio4 RK_PC6 GPIO_ACTIVE_HIGH>; num-lanes = <4>; pinctrl-names = "default"; pinctrl-0 = <&pcie_clkreqn_cpm>; -- cgit v1.2.3 From 437ccdc8ce629470babdda1a7086e2f477048cbd Mon Sep 17 00:00:00 2001 From: Satheesh Rajendran Date: Thu, 8 Nov 2018 10:47:56 +0530 Subject: powerpc/numa: Suppress "VPHN is not supported" messages When VPHN function is not supported and during cpu hotplug event, kernel prints message 'VPHN function not supported. Disabling polling...'. Currently it prints on every hotplug event, it floods dmesg when a KVM guest tries to hotplug huge number of vcpus, let's just print once and suppress further kernel prints. Signed-off-by: Satheesh Rajendran Signed-off-by: Michael Ellerman --- arch/powerpc/mm/numa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 3a048e98a132..ce28ae5ca080 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1178,7 +1178,7 @@ static long vphn_get_associativity(unsigned long cpu, switch (rc) { case H_FUNCTION: - printk(KERN_INFO + printk_once(KERN_INFO "VPHN is not supported. Disabling polling...\n"); stop_topology_update(); break; -- cgit v1.2.3 From 5f8208f557065163f9a8089ea2ea7888f9d96922 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 9 Jul 2018 19:51:54 +0000 Subject: ARM: dts: sun8i: a83t: bananapi-m3: increase vcc-pd voltage to 3.3V Since commit d7c5f6863550 ("ARM: dts: sun8i: a83t: bananapi-m3: Add AXP813 regulator nodes") my BPIM3 no longer works at gigabit speed. With the default setting, dldo3 is regulated at 2.9v which seems sufficient for the PHY but the aforementioned commit drops it to 2.5V which is insufficient. Note that this behaviour is random for all BPIM3. Some work with 2.5V, but some don't. Finnaly, someone from Bananapi confirmed that this regulator must be set to 3.3V. Fixes: d7c5f6863550 ("ARM: dts: sun8i: a83t: bananapi-m3: Add AXP813 regulator nodes") Signed-off-by: Corentin Labbe [wens@csie.org: Reworked commit message] Signed-off-by: Chen-Yu Tsai --- arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts index 742d2946b08b..583a5a01642f 100644 --- a/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts +++ b/arch/arm/boot/dts/sun8i-a83t-bananapi-m3.dts @@ -314,8 +314,8 @@ ®_dldo3 { regulator-always-on; - regulator-min-microvolt = <2500000>; - regulator-max-microvolt = <2500000>; + regulator-min-microvolt = <3300000>; + regulator-max-microvolt = <3300000>; regulator-name = "vcc-pd"; }; -- cgit v1.2.3 From 40dc948f234b73497c3278875eb08a01d5854d3f Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Tue, 13 Nov 2018 23:46:42 -0800 Subject: xtensa: fix boot parameters address translation The bootloader may pass physical address of the boot parameters structure to the MMUv3 kernel in the register a2. Code in the _SetupMMU block in the arch/xtensa/kernel/head.S is supposed to map that physical address to the virtual address in the configured virtual memory layout. This code haven't been updated when additional 256+256 and 512+512 memory layouts were introduced and it may produce wrong addresses when used with these layouts. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/head.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/head.S b/arch/xtensa/kernel/head.S index 2f76118ecf62..9053a5622d2c 100644 --- a/arch/xtensa/kernel/head.S +++ b/arch/xtensa/kernel/head.S @@ -88,9 +88,12 @@ _SetupMMU: initialize_mmu #if defined(CONFIG_MMU) && XCHAL_HAVE_PTP_MMU && XCHAL_HAVE_SPANNING_WAY rsr a2, excsave1 - movi a3, 0x08000000 + movi a3, XCHAL_KSEG_PADDR + bltu a2, a3, 1f + sub a2, a2, a3 + movi a3, XCHAL_KSEG_SIZE bgeu a2, a3, 1f - movi a3, 0xd0000000 + movi a3, XCHAL_KSEG_CACHED_VADDR add a2, a2, a3 wsr a2, excsave1 1: -- cgit v1.2.3 From 613a41b0d16e617f46776a93b975a1eeea96417c Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 13 Nov 2018 15:38:22 +0000 Subject: s390/cpum_cf: Reject request for sampling in event initialization On s390 command perf top fails [root@s35lp76 perf] # ./perf top -F100000 --stdio Error: cycles: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' [root@s35lp76 perf] # Using event -e rb0000 works as designed. Event rb0000 is the event number of the sampling facility for basic sampling. During system start up the following PMUs are installed in the kernel's PMU list (from head to tail): cpum_cf --> s390 PMU counter facility device driver cpum_sf --> s390 PMU sampling facility device driver uprobe kprobe tracepoint task_clock cpu_clock Perf top executes following functions and calls perf_event_open(2) system call with different parameters many times: cmd_top --> __cmd_top --> perf_evlist__add_default --> __perf_evlist__add_default --> perf_evlist__new_cycles (creates event type:0 (HW) config 0 (CPU_CYCLES) --> perf_event_attr__set_max_precise_ip Uses perf_event_open(2) to detect correct precise_ip level. Fails 3 times on s390 which is ok. Then functions cmd_top --> __cmd_top --> perf_top__start_counters -->perf_evlist__config --> perf_can_comm_exec --> perf_probe_api This functions test support for the following events: "cycles:u", "instructions:u", "cpu-clock:u" using --> perf_do_probe_api --> perf_event_open_cloexec Test the close on exec flag support with perf_event_open(2). perf_do_probe_api returns true if the event is supported. The function returns true because event cpu-clock is supported by the PMU cpu_clock. This is achieved by many calls to perf_event_open(2). Function perf_top__start_counters now calls perf_evsel__open() for every event, which is the default event cpu_cycles (config:0) and type HARDWARE (type:0) which a predfined frequence of 4000. Given the above order of the PMU list, the PMU cpum_cf gets called first and returns 0, which indicates support for this sampling. The event is fully allocated in the function perf_event_open (file kernel/event/core.c near line 10521 and the following check fails: event = perf_event_alloc(&attr, cpu, task, group_leader, NULL, NULL, NULL, cgroup_fd); if (IS_ERR(event)) { err = PTR_ERR(event); goto err_cred; } if (is_sampling_event(event)) { if (event->pmu->capabilities & PERF_PMU_CAP_NO_INTERRUPT) { err = -EOPNOTSUPP; goto err_alloc; } } The check for the interrupt capabilities fails and the system call perf_event_open() returns -EOPNOTSUPP (-95). Add a check to return -ENODEV when sampling is requested in PMU cpum_cf. This allows common kernel code in the perf_event_open() system call to test the next PMU in above list. Fixes: 97b1198fece0 (" "s390, perf: Use common PMU interrupt disabled code") Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Signed-off-by: Martin Schwidefsky --- arch/s390/kernel/perf_cpum_cf.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/s390/kernel/perf_cpum_cf.c b/arch/s390/kernel/perf_cpum_cf.c index 74091fd3101e..d5523adeddbf 100644 --- a/arch/s390/kernel/perf_cpum_cf.c +++ b/arch/s390/kernel/perf_cpum_cf.c @@ -346,6 +346,8 @@ static int __hw_perf_event_init(struct perf_event *event) break; case PERF_TYPE_HARDWARE: + if (is_sampling_event(event)) /* No sampling support */ + return -ENOENT; ev = attr->config; /* Count user space (problem-state) only */ if (!attr->exclude_user && attr->exclude_kernel) { -- cgit v1.2.3 From 6c08ec1216b7b2f84b9755f339d6009768599256 Mon Sep 17 00:00:00 2001 From: Michael Roth Date: Thu, 8 Nov 2018 21:27:23 -0600 Subject: KVM: PPC: Book3S HV: Fix handling for interrupted H_ENTER_NESTED While running a nested guest VCPU on L0 via H_ENTER_NESTED hcall, a pending signal in the L0 QEMU process can generate the following sequence: ret0 = kvmppc_pseries_do_hcall() ret1 = kvmhv_enter_nested_guest() ret2 = kvmhv_run_single_vcpu() if (ret2 == -EINTR) return H_INTERRUPT if (ret1 == H_INTERRUPT) kvmppc_set_gpr(vcpu, 3, 0) return -EINTR /* skipped: */ kvmppc_set_gpr(vcpu, 3, ret) vcpu->arch.hcall_needed = 0 return RESUME_GUEST which causes an exit to L0 userspace with ret0 == -EINTR. The intention seems to be to set the hcall return value to 0 (via VCPU r3) so that L1 will see a successful return from H_ENTER_NESTED once we resume executing the VCPU. However, because we don't set vcpu->arch.hcall_needed = 0, we do the following once userspace resumes execution via kvm_arch_vcpu_ioctl_run(): ... } else if (vcpu->arch.hcall_needed) { int i kvmppc_set_gpr(vcpu, 3, run->papr_hcall.ret); for (i = 0; i < 9; ++i) kvmppc_set_gpr(vcpu, 4 + i, run->papr_hcall.args[i]); vcpu->arch.hcall_needed = 0; since vcpu->arch.hcall_needed == 1 indicates that userspace should have handled the hcall and stored the return value in run->papr_hcall.ret. Since that's not the case here, we can get an unexpected value in VCPU r3, which can result in kvmhv_p9_guest_entry() reporting an unexpected trap value when it returns from H_ENTER_NESTED, causing the following register dump to console via subsequent call to kvmppc_handle_exit_hv() in L1: [ 350.612854] vcpu 00000000f9564cf8 (0): [ 350.612915] pc = c00000000013eb98 msr = 8000000000009033 trap = 1 [ 350.613020] r 0 = c0000000004b9044 r16 = 0000000000000000 [ 350.613075] r 1 = c00000007cffba30 r17 = 0000000000000000 [ 350.613120] r 2 = c00000000178c100 r18 = 00007fffc24f3b50 [ 350.613166] r 3 = c00000007ef52480 r19 = 00007fffc24fff58 [ 350.613212] r 4 = 0000000000000000 r20 = 00000a1e96ece9d0 [ 350.613253] r 5 = 70616d00746f6f72 r21 = 00000a1ea117c9b0 [ 350.613295] r 6 = 0000000000000020 r22 = 00000a1ea1184360 [ 350.613338] r 7 = c0000000783be440 r23 = 0000000000000003 [ 350.613380] r 8 = fffffffffffffffc r24 = 00000a1e96e9e124 [ 350.613423] r 9 = c00000007ef52490 r25 = 00000000000007ff [ 350.613469] r10 = 0000000000000004 r26 = c00000007eb2f7a0 [ 350.613513] r11 = b0616d0009eccdb2 r27 = c00000007cffbb10 [ 350.613556] r12 = c0000000004b9000 r28 = c00000007d83a2c0 [ 350.613597] r13 = c000000001b00000 r29 = c0000000783cdf68 [ 350.613639] r14 = 0000000000000000 r30 = 0000000000000000 [ 350.613681] r15 = 0000000000000000 r31 = c00000007cffbbf0 [ 350.613723] ctr = c0000000004b9000 lr = c0000000004b9044 [ 350.613765] srr0 = 0000772f954dd48c srr1 = 800000000280f033 [ 350.613808] sprg0 = 0000000000000000 sprg1 = c000000001b00000 [ 350.613859] sprg2 = 0000772f9565a280 sprg3 = 0000000000000000 [ 350.613911] cr = 88002848 xer = 0000000020040000 dsisr = 42000000 [ 350.613962] dar = 0000772f95390000 [ 350.614031] fault dar = c000000244b278c0 dsisr = 00000000 [ 350.614073] SLB (0 entries): [ 350.614157] lpcr = 0040000003d40413 sdr1 = 0000000000000000 last_inst = ffffffff [ 350.614252] trap=0x1 | pc=0xc00000000013eb98 | msr=0x8000000000009033 followed by L1's QEMU reporting the following before stopping execution of the nested guest: KVM: unknown exit, hardware reason 1 NIP c00000000013eb98 LR c0000000004b9044 CTR c0000000004b9000 XER 0000000020040000 CPU#0 MSR 8000000000009033 HID0 0000000000000000 HF 8000000000000000 iidx 3 didx 3 TB 00000000 00000000 DECR 00000000 GPR00 c0000000004b9044 c00000007cffba30 c00000000178c100 c00000007ef52480 GPR04 0000000000000000 70616d00746f6f72 0000000000000020 c0000000783be440 GPR08 fffffffffffffffc c00000007ef52490 0000000000000004 b0616d0009eccdb2 GPR12 c0000000004b9000 c000000001b00000 0000000000000000 0000000000000000 GPR16 0000000000000000 0000000000000000 00007fffc24f3b50 00007fffc24fff58 GPR20 00000a1e96ece9d0 00000a1ea117c9b0 00000a1ea1184360 0000000000000003 GPR24 00000a1e96e9e124 00000000000007ff c00000007eb2f7a0 c00000007cffbb10 GPR28 c00000007d83a2c0 c0000000783cdf68 0000000000000000 c00000007cffbbf0 CR 88002848 [ L L - - E L G L ] RES ffffffffffffffff SRR0 0000772f954dd48c SRR1 800000000280f033 PVR 00000000004e1202 VRSAVE 0000000000000000 SPRG0 0000000000000000 SPRG1 c000000001b00000 SPRG2 0000772f9565a280 SPRG3 0000000000000000 SPRG4 0000000000000000 SPRG5 0000000000000000 SPRG6 0000000000000000 SPRG7 0000000000000000 HSRR0 0000000000000000 HSRR1 0000000000000000 CFAR 0000000000000000 LPCR 0000000003d40413 PTCR 0000000000000000 DAR 0000772f95390000 DSISR 0000000042000000 Fix this by setting vcpu->arch.hcall_needed = 0 to indicate completion of H_ENTER_NESTED before we exit to L0 userspace. Fixes: 360cae313702 ("KVM: PPC: Book3S HV: Nested guest entry via hypercall") Cc: linuxppc-dev@ozlabs.org Cc: David Gibson Signed-off-by: Michael Roth Reviewed-by: Suraj Jitindar Singh Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d65b961661fb..a56f8413758a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -983,6 +983,7 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) ret = kvmhv_enter_nested_guest(vcpu); if (ret == H_INTERRUPT) { kvmppc_set_gpr(vcpu, 3, 0); + vcpu->arch.hcall_needed = 0; return -EINTR; } break; -- cgit v1.2.3 From 66f93c5a02d5ba6ef17fef459143961382593212 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 15 Nov 2018 12:34:27 +1000 Subject: powerpc/64: Fix kernel stack 16-byte alignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") changed sizeof(struct pt_regs) % 16 from 0 to 8, which causes the interrupt frame allocation on kernel entry to put the kernel stack out of alignment. Quadword (16-byte) alignment for the stack is required by both the 64-bit v1 ABI (v1.9 § 3.2.2) and the 64-bit v2 ABI (v1.1 § 2.2.2.1). Add a pad field to fix alignment, and add a BUILD_BUG_ON to catch this in future. Fixes: 4c2de74cc869 ("powerpc/64: Interrupts save PPR on stack rather than thread_struct") Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ptrace.h | 1 + arch/powerpc/kernel/setup_64.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch') diff --git a/arch/powerpc/include/asm/ptrace.h b/arch/powerpc/include/asm/ptrace.h index f73886a1a7f5..0b8a735b6d85 100644 --- a/arch/powerpc/include/asm/ptrace.h +++ b/arch/powerpc/include/asm/ptrace.h @@ -54,6 +54,7 @@ struct pt_regs #ifdef CONFIG_PPC64 unsigned long ppr; + unsigned long __pad; /* Maintain 16 byte interrupt stack alignment */ #endif }; #endif diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 2a51e4cc8246..236c1151a3a7 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -636,6 +636,8 @@ static void *__init alloc_stack(unsigned long limit, int cpu) { unsigned long pa; + BUILD_BUG_ON(STACK_INT_FRAME_SIZE % 16); + pa = memblock_alloc_base_nid(THREAD_SIZE, THREAD_SIZE, limit, early_cpu_to_node(cpu), MEMBLOCK_NONE); if (!pa) { -- cgit v1.2.3 From eff896288872d687d9662000ec9ae11b6d61766f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 14 Nov 2018 09:55:43 -0800 Subject: efi/arm: Defer persistent reservations until after paging_init() The new memory EFI reservation feature we introduced to allow memory reservations to persist across kexec may trigger an unbounded number of calls to memblock_reserve(). The memblock subsystem can deal with this fine, but not before memblock resizing is enabled, which we can only do after paging_init(), when the memory we reallocate the array into is actually mapped. So break out the memreserve table processing into a separate routine and call it after paging_init() on arm64. On ARM, because of limited reviewing bandwidth of the maintainer, we cannot currently fix this, so instead, disable the EFI persistent memreserve entirely on ARM so we can fix it later. Tested-by: Marc Zyngier Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20181114175544.12860-5-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/setup.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 953e316521fc..f4fc1e0544b7 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -313,6 +313,7 @@ void __init setup_arch(char **cmdline_p) arm64_memblock_init(); paging_init(); + efi_apply_persistent_mem_reservations(); acpi_table_upgrade(); -- cgit v1.2.3 From 1229ace4a4a2e2c982a32fb075dc1bf95423924f Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Mon, 12 Nov 2018 22:18:01 +0000 Subject: MIPS: Loongson3,SGI-IP27: Simplify max_low_pfn calculation Both the Loongson3 & SGI-IP27 platforms set max_low_pfn to the last available PFN describing memory. They both do it in paging_init() which is later than ideal since max_low_pfn is used before that function is called. Simplify both platforms to trivially initialize max_low_pfn using the end address of DRAM, and do it earlier in prom_meminit(). Signed-off-by: Paul Burton Suggested-by: Mike Rapoport Tested-by: Thomas Bogendoerfer Patchwork: https://patchwork.linux-mips.org/patch/21104/ References: https://patchwork.linux-mips.org/patch/21031/ Cc: Huacai Chen Cc: Mike Rapoport Cc: Thomas Bogendoerfer Cc: linux-mips@linux-mips.org --- arch/mips/loongson64/loongson-3/numa.c | 12 ++---------- arch/mips/sgi-ip27/ip27-memory.c | 11 +---------- 2 files changed, 3 insertions(+), 20 deletions(-) (limited to 'arch') diff --git a/arch/mips/loongson64/loongson-3/numa.c b/arch/mips/loongson64/loongson-3/numa.c index 622761878cd1..60bf0a1cb757 100644 --- a/arch/mips/loongson64/loongson-3/numa.c +++ b/arch/mips/loongson64/loongson-3/numa.c @@ -231,6 +231,8 @@ static __init void prom_meminit(void) cpumask_clear(&__node_data[(node)]->cpumask); } } + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); + for (cpu = 0; cpu < loongson_sysconf.nr_cpus; cpu++) { node = cpu / loongson_sysconf.cores_per_node; if (node >= num_online_nodes()) @@ -248,19 +250,9 @@ static __init void prom_meminit(void) void __init paging_init(void) { - unsigned node; unsigned long zones_size[MAX_NR_ZONES] = {0, }; pagetable_init(); - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } #ifdef CONFIG_ZONE_DMA32 zones_size[ZONE_DMA32] = MAX_DMA32_PFN; #endif diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index d8b8444d6795..813d13f92957 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -435,6 +435,7 @@ void __init prom_meminit(void) mlreset(); szmem(); + max_low_pfn = PHYS_PFN(memblock_end_of_DRAM()); for (node = 0; node < MAX_COMPACT_NODES; node++) { if (node_online(node)) { @@ -455,18 +456,8 @@ extern void setup_zero_pages(void); void __init paging_init(void) { unsigned long zones_size[MAX_NR_ZONES] = {0, }; - unsigned node; pagetable_init(); - - for_each_online_node(node) { - unsigned long start_pfn, end_pfn; - - get_pfn_range_for_nid(node, &start_pfn, &end_pfn); - - if (end_pfn > max_low_pfn) - max_low_pfn = end_pfn; - } zones_size[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(zones_size); } -- cgit v1.2.3 From 672e60b72bbe7aace88721db55b380b6a51fb8f9 Mon Sep 17 00:00:00 2001 From: Heiko Stuebner Date: Sun, 18 Nov 2018 20:03:02 +0100 Subject: ARM: dts: rockchip: Remove @0 from the veyron memory node The Coreboot version on veyron ChromeOS devices seems to ignore memory@0 nodes when updating the available memory and instead inserts another memory node without the address. This leads to 4GB systems only ever be using 2GB as the memory@0 node takes precedence. So remove the @0 for veyron devices. Fixes: 0b639b815f15 ("ARM: dts: rockchip: Add missing unit name to memory nodes in rk3288 boards") Cc: stable@vger.kernel.org Reported-by: Heikki Lindholm Signed-off-by: Heiko Stuebner --- arch/arm/boot/dts/rk3288-veyron.dtsi | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/rk3288-veyron.dtsi b/arch/arm/boot/dts/rk3288-veyron.dtsi index 2075120cfc4d..d8bf939a3aff 100644 --- a/arch/arm/boot/dts/rk3288-veyron.dtsi +++ b/arch/arm/boot/dts/rk3288-veyron.dtsi @@ -10,7 +10,11 @@ #include "rk3288.dtsi" / { - memory@0 { + /* + * The default coreboot on veyron devices ignores memory@0 nodes + * and would instead create another memory node. + */ + memory { device_type = "memory"; reg = <0x0 0x0 0x0 0x80000000>; }; -- cgit v1.2.3 From 512cab3e7e0be11234f965d9898936dce2325382 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Sat, 10 Nov 2018 15:53:59 -0200 Subject: ARM: dts: imx51-zii-rdu1: Remove EEPROM node The EEPROM under I2C2 was put by mistake in the dts. Remove it as it is not really present on the real hardware. Fixes: ceef0396f367 ("ARM: dts: imx: add ZII RDU1 board") Reported-by: Chris Healy Signed-off-by: Fabio Estevam Reviewed-by: Chris Healy Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx51-zii-rdu1.dts | 6 ------ 1 file changed, 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index e45a15ceb94b..69d753cac89a 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -492,12 +492,6 @@ pinctrl-0 = <&pinctrl_i2c2>; status = "okay"; - eeprom@50 { - compatible = "atmel,24c04"; - pagesize = <16>; - reg = <0x50>; - }; - hpa1: amp@60 { compatible = "ti,tpa6130a2"; reg = <0x60>; -- cgit v1.2.3 From 3841840449817ba6cf3e636008bc4e1061a03388 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Nov 2018 08:25:28 +0100 Subject: x86/boot: Mostly revert commit ae7e1238e68f2a ("Add ACPI RSDP address to setup_header") Peter Anvin pointed out that commit: ae7e1238e68f2a ("x86/boot: Add ACPI RSDP address to setup_header") should be reverted as setup_header should only contain items set by the legacy BIOS. So revert said commit. Instead of fully reverting the dependent commit of: e7b66d16fe4172 ("x86/acpi, x86/boot: Take RSDP address for boot params if available") just remove the setup_header reference in order to replace it by a boot_params in a followup patch. Suggested-by: "H. Peter Anvin" Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: daniel.kiper@oracle.com Cc: sstabellini@kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181120072529.5489-2-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/boot/header.S | 6 +----- arch/x86/include/asm/x86_init.h | 2 -- arch/x86/include/uapi/asm/bootparam.h | 4 ---- arch/x86/kernel/acpi/boot.c | 2 +- arch/x86/kernel/head32.c | 1 - arch/x86/kernel/head64.c | 2 -- arch/x86/kernel/setup.c | 17 ----------------- 7 files changed, 2 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/x86/boot/header.S b/arch/x86/boot/header.S index 4c881c850125..850b8762e889 100644 --- a/arch/x86/boot/header.S +++ b/arch/x86/boot/header.S @@ -300,7 +300,7 @@ _start: # Part 2 of the header, from the old setup.S .ascii "HdrS" # header signature - .word 0x020e # header version number (>= 0x0105) + .word 0x020d # header version number (>= 0x0105) # or else old loadlin-1.5 will fail) .globl realmode_swtch realmode_swtch: .word 0, 0 # default_switch, SETUPSEG @@ -558,10 +558,6 @@ pref_address: .quad LOAD_PHYSICAL_ADDR # preferred load addr init_size: .long INIT_SIZE # kernel initialization size handover_offset: .long 0 # Filled in by build.c -acpi_rsdp_addr: .quad 0 # 64-bit physical pointer to the - # ACPI RSDP table, added with - # version 2.14 - # End of setup header ##################################################### .section ".entrytext", "ax" diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 0f842104862c..b85a7c54c6a1 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -303,6 +303,4 @@ extern void x86_init_noop(void); extern void x86_init_uint_noop(unsigned int unused); extern bool x86_pnpbios_disabled(void); -void x86_verify_bootdata_version(void); - #endif diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 22f89d040ddd..a06cbf019744 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -16,9 +16,6 @@ #define RAMDISK_PROMPT_FLAG 0x8000 #define RAMDISK_LOAD_FLAG 0x4000 -/* version flags */ -#define VERSION_WRITTEN 0x8000 - /* loadflags */ #define LOADED_HIGH (1<<0) #define KASLR_FLAG (1<<1) @@ -89,7 +86,6 @@ struct setup_header { __u64 pref_address; __u32 init_size; __u32 handover_offset; - __u64 acpi_rsdp_addr; } __attribute__((packed)); struct sys_desc_table { diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 92c76bf97ad8..fb3b1f3a5aba 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return boot_params.hdr.acpi_rsdp_addr; + return 0; } diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 76fa3b836598..ec6fefbfd3c0 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -37,7 +37,6 @@ asmlinkage __visible void __init i386_start_kernel(void) cr4_init_shadow(); sanitize_boot_params(&boot_params); - x86_verify_bootdata_version(); x86_early_init_platform_quirks(); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 7663a8eb602b..16b1cbd3a61e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -457,8 +457,6 @@ void __init x86_64_start_reservations(char *real_mode_data) if (!boot_params.hdr.version) copy_bootdata(__va(real_mode_data)); - x86_verify_bootdata_version(); - x86_early_init_platform_quirks(); switch (boot_params.hdr.hardware_subarch) { diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index b74e7bfed6ab..d494b9bfe618 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1280,23 +1280,6 @@ void __init setup_arch(char **cmdline_p) unwind_init(); } -/* - * From boot protocol 2.14 onwards we expect the bootloader to set the - * version to "0x8000 | ". In case we find a version >= 2.14 - * without the 0x8000 we assume the boot loader supports 2.13 only and - * reset the version accordingly. The 0x8000 flag is removed in any case. - */ -void __init x86_verify_bootdata_version(void) -{ - if (boot_params.hdr.version & VERSION_WRITTEN) - boot_params.hdr.version &= ~VERSION_WRITTEN; - else if (boot_params.hdr.version >= 0x020e) - boot_params.hdr.version = 0x020d; - - if (boot_params.hdr.version < 0x020e) - boot_params.hdr.acpi_rsdp_addr = 0; -} - #ifdef CONFIG_X86_32 static struct resource video_ram_resource = { -- cgit v1.2.3 From e6e094e053af75cbc164e950814d3d084fb1e698 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 20 Nov 2018 08:25:29 +0100 Subject: x86/acpi, x86/boot: Take RSDP address from boot params if available In case the RSDP address in struct boot_params is specified don't try to find the table by searching, but take the address directly as set by the boot loader. Suggested-by: "H. Peter Anvin" Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: boris.ostrovsky@oracle.com Cc: bp@alien8.de Cc: daniel.kiper@oracle.com Cc: sstabellini@kernel.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/20181120072529.5489-3-jgross@suse.com Signed-off-by: Ingo Molnar --- arch/x86/include/uapi/asm/bootparam.h | 3 ++- arch/x86/kernel/acpi/boot.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index a06cbf019744..60733f137e9a 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -155,7 +155,8 @@ struct boot_params { __u8 _pad2[4]; /* 0x054 */ __u64 tboot_addr; /* 0x058 */ struct ist_info ist_info; /* 0x060 */ - __u8 _pad3[16]; /* 0x070 */ + __u64 acpi_rsdp_addr; /* 0x070 */ + __u8 _pad3[8]; /* 0x078 */ __u8 hd0_info[16]; /* obsolete! */ /* 0x080 */ __u8 hd1_info[16]; /* obsolete! */ /* 0x090 */ struct sys_desc_table sys_desc_table; /* obsolete! */ /* 0x0a0 */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index fb3b1f3a5aba..06635fbca81c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1776,5 +1776,5 @@ void __init arch_reserve_mem_area(acpi_physical_address addr, size_t size) u64 x86_default_get_root_pointer(void) { - return 0; + return boot_params.acpi_rsdp_addr; } -- cgit v1.2.3 From 21f70d4abf9e17c2e3d7e64b7bfa3424e017f176 Mon Sep 17 00:00:00 2001 From: Olof Johansson Date: Wed, 14 Nov 2018 16:27:55 -0800 Subject: RISC-V: Fix raw_copy_{to,from}_user() Sparse highlighted it, and appears to be a pure bug (from vs to). ./arch/riscv/include/asm/uaccess.h:403:35: warning: incorrect type in argument 1 (different address spaces) ./arch/riscv/include/asm/uaccess.h:403:39: warning: incorrect type in argument 2 (different address spaces) ./arch/riscv/include/asm/uaccess.h:409:37: warning: incorrect type in argument 1 (different address spaces) ./arch/riscv/include/asm/uaccess.h:409:41: warning: incorrect type in argument 2 (different address spaces) Signed-off-by: Olof Johansson Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/uaccess.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/riscv/include/asm/uaccess.h b/arch/riscv/include/asm/uaccess.h index 473cfc84e412..8c3e3e3c8be1 100644 --- a/arch/riscv/include/asm/uaccess.h +++ b/arch/riscv/include/asm/uaccess.h @@ -400,13 +400,13 @@ extern unsigned long __must_check __asm_copy_from_user(void *to, static inline unsigned long raw_copy_from_user(void *to, const void __user *from, unsigned long n) { - return __asm_copy_to_user(to, from, n); + return __asm_copy_from_user(to, from, n); } static inline unsigned long raw_copy_to_user(void __user *to, const void *from, unsigned long n) { - return __asm_copy_from_user(to, from, n); + return __asm_copy_to_user(to, from, n); } extern long strncpy_from_user(char *dest, const char __user *src, long count); -- cgit v1.2.3 From c0fbcd9918607e85c9598bfa3dd0a84ed77ea210 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 12 Nov 2018 11:25:15 +0530 Subject: RISC-V: Build flat and compressed kernel images This patch extends Linux RISC-V build system to build and install: Image - Flat uncompressed kernel image Image.gz - Flat and GZip compressed kernel image Quiet a few bootloaders (such as Uboot, UEFI, etc) are capable of booting flat and compressed kernel images. In case of Uboot, booting Image or Image.gz is achieved using bootm command. The flat and uncompressed kernel image (i.e. Image) is very useful in pre-silicon developent and testing because we can create back-door HEX files for RAM on FPGAs from Image. Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Makefile | 15 ++++++++++- arch/riscv/boot/.gitignore | 2 ++ arch/riscv/boot/Makefile | 33 +++++++++++++++++++++++ arch/riscv/boot/install.sh | 60 +++++++++++++++++++++++++++++++++++++++++ arch/riscv/kernel/head.S | 10 +++++++ arch/riscv/kernel/vmlinux.lds.S | 2 +- 6 files changed, 120 insertions(+), 2 deletions(-) create mode 100644 arch/riscv/boot/.gitignore create mode 100644 arch/riscv/boot/Makefile create mode 100644 arch/riscv/boot/install.sh (limited to 'arch') diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 4af153a182b0..4b594f2e4f7e 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -71,6 +71,10 @@ KBUILD_CFLAGS += $(call cc-option,-mstrict-align) # arch specific predefines for sparse CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) +# Default target when executing plain make +boot := arch/riscv/boot +KBUILD_IMAGE := $(boot)/Image.gz + head-y := arch/riscv/kernel/head.o core-y += arch/riscv/kernel/ arch/riscv/mm/ @@ -81,4 +85,13 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ -all: vmlinux +all: Image.gz + +Image: vmlinux + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +Image.%: Image + $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ + +zinstall install: + $(Q)$(MAKE) $(build)=$(boot) $@ diff --git a/arch/riscv/boot/.gitignore b/arch/riscv/boot/.gitignore new file mode 100644 index 000000000000..8dab0bb6ae66 --- /dev/null +++ b/arch/riscv/boot/.gitignore @@ -0,0 +1,2 @@ +Image +Image.gz diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile new file mode 100644 index 000000000000..0990a9fdbe5d --- /dev/null +++ b/arch/riscv/boot/Makefile @@ -0,0 +1,33 @@ +# +# arch/riscv/boot/Makefile +# +# This file is included by the global makefile so that you can add your own +# architecture-specific flags and dependencies. +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 2018, Anup Patel. +# Author: Anup Patel +# +# Based on the ia64 and arm64 boot/Makefile. +# + +OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S + +targets := Image + +$(obj)/Image: vmlinux FORCE + $(call if_changed,objcopy) + +$(obj)/Image.gz: $(obj)/Image FORCE + $(call if_changed,gzip) + +install: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image System.map "$(INSTALL_PATH)" + +zinstall: + $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \ + $(obj)/Image.gz System.map "$(INSTALL_PATH)" diff --git a/arch/riscv/boot/install.sh b/arch/riscv/boot/install.sh new file mode 100644 index 000000000000..18c39159c0ff --- /dev/null +++ b/arch/riscv/boot/install.sh @@ -0,0 +1,60 @@ +#!/bin/sh +# +# arch/riscv/boot/install.sh +# +# This file is subject to the terms and conditions of the GNU General Public +# License. See the file "COPYING" in the main directory of this archive +# for more details. +# +# Copyright (C) 1995 by Linus Torvalds +# +# Adapted from code in arch/i386/boot/Makefile by H. Peter Anvin +# Adapted from code in arch/i386/boot/install.sh by Russell King +# +# "make install" script for the RISC-V Linux port +# +# Arguments: +# $1 - kernel version +# $2 - kernel image file +# $3 - kernel map file +# $4 - default install path (blank if root directory) +# + +verify () { + if [ ! -f "$1" ]; then + echo "" 1>&2 + echo " *** Missing file: $1" 1>&2 + echo ' *** You need to run "make" before "make install".' 1>&2 + echo "" 1>&2 + exit 1 + fi +} + +# Make sure the files actually exist +verify "$2" +verify "$3" + +# User may have a custom install script +if [ -x ~/bin/${INSTALLKERNEL} ]; then exec ~/bin/${INSTALLKERNEL} "$@"; fi +if [ -x /sbin/${INSTALLKERNEL} ]; then exec /sbin/${INSTALLKERNEL} "$@"; fi + +if [ "$(basename $2)" = "Image.gz" ]; then +# Compressed install + echo "Installing compressed kernel" + base=vmlinuz +else +# Normal install + echo "Installing normal kernel" + base=vmlinux +fi + +if [ -f $4/$base-$1 ]; then + mv $4/$base-$1 $4/$base-$1.old +fi +cat $2 > $4/$base-$1 + +# Install system map file +if [ -f $4/System.map-$1 ]; then + mv $4/System.map-$1 $4/System.map-$1.old +fi +cp $3 $4/System.map-$1 diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 711190d473d4..fe884cd69abd 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -44,6 +44,16 @@ ENTRY(_start) amoadd.w a3, a2, (a3) bnez a3, .Lsecondary_start + /* Clear BSS for flat non-ELF images */ + la a3, __bss_start + la a4, __bss_stop + ble a4, a3, clear_bss_done +clear_bss: + REG_S zero, (a3) + add a3, a3, RISCV_SZPTR + blt a3, a4, clear_bss +clear_bss_done: + /* Save hart ID and DTB physical address */ mv s0, a0 mv s1, a1 diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index ece84991609c..65df1dfdc303 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -74,7 +74,7 @@ SECTIONS *(.sbss*) } - BSS_SECTION(0, 0, 0) + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) EXCEPTION_TABLE(0x10) NOTES -- cgit v1.2.3 From 0138ebb90c633f76bc71617f8f23635ce41c84fd Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Thu, 8 Nov 2018 20:07:00 +0100 Subject: riscv: fix warning in arch/riscv/include/asm/module.h Fixes warning: 'struct module' declared inside parameter list will not be visible outside of this definition or declaration Signed-off-by: David Abdurachmanov Acked-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/module.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/riscv/include/asm/module.h b/arch/riscv/include/asm/module.h index 349df33808c4..cd2af4b013e3 100644 --- a/arch/riscv/include/asm/module.h +++ b/arch/riscv/include/asm/module.h @@ -8,6 +8,7 @@ #define MODULE_ARCH_VERMAGIC "riscv" +struct module; u64 module_emit_got_entry(struct module *mod, u64 val); u64 module_emit_plt_entry(struct module *mod, u64 val); -- cgit v1.2.3 From 27f8899d6002e11a6e2d995e29b8deab5aa9cc25 Mon Sep 17 00:00:00 2001 From: David Abdurachmanov Date: Thu, 8 Nov 2018 20:02:39 +0100 Subject: riscv: add asm/unistd.h UAPI header Marcin Juszkiewicz reported issues while generating syscall table for riscv using 4.20-rc1. The patch refactors our unistd.h files to match some other architectures. - Add asm/unistd.h UAPI header, which has __ARCH_WANT_NEW_STAT only for 64-bit - Remove asm/syscalls.h UAPI header and merge to asm/unistd.h - Adjust kernel asm/unistd.h So now asm/unistd.h UAPI header should show all syscalls for riscv. Before this, Makefile simply put `#include ` into generated asm/unistd.h UAPI header thus user didn't see: - __NR_riscv_flush_icache - __NR_newfstatat - __NR_fstat which are supported by riscv kernel. Signed-off-by: David Abdurachmanov Cc: Arnd Bergmann Cc: Marcin Juszkiewicz Cc: Guenter Roeck Fixes: 67314ec7b025 ("RISC-V: Request newstat syscalls") Signed-off-by: David Abdurachmanov Acked-by: Olof Johansson Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/unistd.h | 5 ++--- arch/riscv/include/uapi/asm/syscalls.h | 29 ------------------------ arch/riscv/include/uapi/asm/unistd.h | 41 ++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 32 deletions(-) delete mode 100644 arch/riscv/include/uapi/asm/syscalls.h create mode 100644 arch/riscv/include/uapi/asm/unistd.h (limited to 'arch') diff --git a/arch/riscv/include/asm/unistd.h b/arch/riscv/include/asm/unistd.h index eff7aa9aa163..fef96f117b4d 100644 --- a/arch/riscv/include/asm/unistd.h +++ b/arch/riscv/include/asm/unistd.h @@ -13,10 +13,9 @@ /* * There is explicitly no include guard here because this file is expected to - * be included multiple times. See uapi/asm/syscalls.h for more info. + * be included multiple times. */ -#define __ARCH_WANT_NEW_STAT #define __ARCH_WANT_SYS_CLONE + #include -#include diff --git a/arch/riscv/include/uapi/asm/syscalls.h b/arch/riscv/include/uapi/asm/syscalls.h deleted file mode 100644 index 206dc4b0f6ea..000000000000 --- a/arch/riscv/include/uapi/asm/syscalls.h +++ /dev/null @@ -1,29 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2017-2018 SiFive - */ - -/* - * There is explicitly no include guard here because this file is expected to - * be included multiple times in order to define the syscall macros via - * __SYSCALL. - */ - -/* - * Allows the instruction cache to be flushed from userspace. Despite RISC-V - * having a direct 'fence.i' instruction available to userspace (which we - * can't trap!), that's not actually viable when running on Linux because the - * kernel might schedule a process on another hart. There is no way for - * userspace to handle this without invoking the kernel (as it doesn't know the - * thread->hart mappings), so we've defined a RISC-V specific system call to - * flush the instruction cache. - * - * __NR_riscv_flush_icache is defined to flush the instruction cache over an - * address range, with the flush applying to either all threads or just the - * caller. We don't currently do anything with the address range, that's just - * in there for forwards compatibility. - */ -#ifndef __NR_riscv_flush_icache -#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) -#endif -__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) diff --git a/arch/riscv/include/uapi/asm/unistd.h b/arch/riscv/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..1f3bd3ebbb0d --- /dev/null +++ b/arch/riscv/include/uapi/asm/unistd.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * Copyright (C) 2018 David Abdurachmanov + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifdef __LP64__ +#define __ARCH_WANT_NEW_STAT +#endif /* __LP64__ */ + +#include + +/* + * Allows the instruction cache to be flushed from userspace. Despite RISC-V + * having a direct 'fence.i' instruction available to userspace (which we + * can't trap!), that's not actually viable when running on Linux because the + * kernel might schedule a process on another hart. There is no way for + * userspace to handle this without invoking the kernel (as it doesn't know the + * thread->hart mappings), so we've defined a RISC-V specific system call to + * flush the instruction cache. + * + * __NR_riscv_flush_icache is defined to flush the instruction cache over an + * address range, with the flush applying to either all threads or just the + * caller. We don't currently do anything with the address range, that's just + * in there for forwards compatibility. + */ +#ifndef __NR_riscv_flush_icache +#define __NR_riscv_flush_icache (__NR_arch_specific_syscall + 15) +#endif +__SYSCALL(__NR_riscv_flush_icache, sys_riscv_flush_icache) -- cgit v1.2.3 From 5d8f81ba1da55210123b9595e87b913c79579d02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Patrick=20St=C3=A4hlin?= Date: Fri, 9 Nov 2018 22:42:16 +0100 Subject: RISC-V: recognize S/U mode bits in print_isa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removes the warning about an unsupported ISA when reading /proc/cpuinfo on QEMU. The "S" extension is not being returned as it is not accessible from userspace. Signed-off-by: Patrick Stählin Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/cpu.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/cpu.c b/arch/riscv/kernel/cpu.c index 3a5a2ee31547..b4a7d4427fbb 100644 --- a/arch/riscv/kernel/cpu.c +++ b/arch/riscv/kernel/cpu.c @@ -64,7 +64,7 @@ int riscv_of_processor_hartid(struct device_node *node) static void print_isa(struct seq_file *f, const char *orig_isa) { - static const char *ext = "mafdc"; + static const char *ext = "mafdcsu"; const char *isa = orig_isa; const char *e; @@ -88,11 +88,14 @@ static void print_isa(struct seq_file *f, const char *orig_isa) /* * Check the rest of the ISA string for valid extensions, printing those * we find. RISC-V ISA strings define an order, so we only print the - * extension bits when they're in order. + * extension bits when they're in order. Hide the supervisor (S) + * extension from userspace as it's not accessible from there. */ for (e = ext; *e != '\0'; ++e) { if (isa[0] == e[0]) { - seq_write(f, isa, 1); + if (isa[0] != 's') + seq_write(f, isa, 1); + isa++; } } -- cgit v1.2.3 From 68239654acafe6aad5a3c1dc7237e60accfebc03 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Tue, 20 Nov 2018 11:26:35 +0100 Subject: x86/fpu: Disable bottom halves while loading FPU registers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The sequence fpu->initialized = 1; /* step A */ preempt_disable(); /* step B */ fpu__restore(fpu); preempt_enable(); in __fpu__restore_sig() is racy in regard to a context switch. For 32bit frames, __fpu__restore_sig() prepares the FPU state within fpu->state. To ensure that a context switch (switch_fpu_prepare() in particular) does not modify fpu->state it uses fpu__drop() which sets fpu->initialized to 0. After fpu->initialized is cleared, the CPU's FPU state is not saved to fpu->state during a context switch. The new state is loaded via fpu__restore(). It gets loaded into fpu->state from userland and ensured it is sane. fpu->initialized is then set to 1 in order to avoid fpu__initialize() doing anything (overwrite the new state) which is part of fpu__restore(). A context switch between step A and B above would save CPU's current FPU registers to fpu->state and overwrite the newly prepared state. This looks like a tiny race window but the Kernel Test Robot reported this back in 2016 while we had lazy FPU support. Borislav Petkov made the link between that report and another patch that has been posted. Since the removal of the lazy FPU support, this race goes unnoticed because the warning has been removed. Disable bottom halves around the restore sequence to avoid the race. BH need to be disabled because BH is allowed to run (even with preemption disabled) and might invoke kernel_fpu_begin() by doing IPsec. [ bp: massage commit message a bit. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Borislav Petkov Acked-by: Ingo Molnar Acked-by: Thomas Gleixner Cc: Andy Lutomirski Cc: Dave Hansen Cc: "H. Peter Anvin" Cc: "Jason A. Donenfeld" Cc: kvm ML Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Rik van Riel Cc: stable@vger.kernel.org Cc: x86-ml Link: http://lkml.kernel.org/r/20181120102635.ddv3fvavxajjlfqk@linutronix.de Link: https://lkml.kernel.org/r/20160226074940.GA28911@pd.tnic --- arch/x86/kernel/fpu/signal.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c index 61a949d84dfa..d99a8ee9e185 100644 --- a/arch/x86/kernel/fpu/signal.c +++ b/arch/x86/kernel/fpu/signal.c @@ -344,10 +344,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size) sanitize_restored_xstate(tsk, &env, xfeatures, fx_only); } + local_bh_disable(); fpu->initialized = 1; - preempt_disable(); fpu__restore(fpu); - preempt_enable(); + local_bh_enable(); return err; } else { -- cgit v1.2.3 From 2a5bf23d5b795d5df33dc284e8f5cf8b6a5b4042 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 20 Nov 2018 18:08:42 +0100 Subject: perf/x86/intel: Fix regression by default disabling perfmon v4 interrupt handling Kyle Huey reported that 'rr', a replay debugger, broke due to the following commit: af3bdb991a5c ("perf/x86/intel: Add a separate Arch Perfmon v4 PMI handler") Rework the 'disable_counter_freezing' __setup() parameter such that we can explicitly enable/disable it and switch to default disabled. To this purpose, rename the parameter to "perf_v4_pmi=" which is a much better description and allows requiring a bool argument. [ mingo: Improved the changelog some more. ] Reported-by: Kyle Huey Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert O'Callahan Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: acme@kernel.org Link: http://lkml.kernel.org/r/20181120170842.GZ2131@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index 273c62e81546..af8bea9d4006 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2306,14 +2306,18 @@ static int handle_pmi_common(struct pt_regs *regs, u64 status) return handled; } -static bool disable_counter_freezing; +static bool disable_counter_freezing = true; static int __init intel_perf_counter_freezing_setup(char *s) { - disable_counter_freezing = true; - pr_info("Intel PMU Counter freezing feature disabled\n"); + bool res; + + if (kstrtobool(s, &res)) + return -EINVAL; + + disable_counter_freezing = !res; return 1; } -__setup("disable_counter_freezing", intel_perf_counter_freezing_setup); +__setup("perf_v4_pmi=", intel_perf_counter_freezing_setup); /* * Simplified handler for Arch Perfmon v4: -- cgit v1.2.3 From 4ab7ca092c3c7ac8b16aa28eba723a8868f82f14 Mon Sep 17 00:00:00 2001 From: Romain Izard Date: Tue, 20 Nov 2018 17:57:37 +0100 Subject: ARM: dts: at91: sama5d2: use the divided clock for SMC The SAMA5D2 is different from SAMA5D3 and SAMA5D4, as there are two different clocks for the peripherals in the SoC. The Static Memory controller is connected to the divided master clock. Unfortunately, the device tree does not correctly show this and uses the master clock directly. This clock is then used by the code for the NAND controller to calculate the timings for the controller, and we end up with slow NAND Flash access. Fix the device tree, and the performance of Flash access is improved. Signed-off-by: Romain Izard Signed-off-by: Alexandre Belloni --- arch/arm/boot/dts/sama5d2.dtsi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/sama5d2.dtsi b/arch/arm/boot/dts/sama5d2.dtsi index 843052f14f1c..dd0dda6ed44b 100644 --- a/arch/arm/boot/dts/sama5d2.dtsi +++ b/arch/arm/boot/dts/sama5d2.dtsi @@ -314,7 +314,7 @@ 0x1 0x0 0x60000000 0x10000000 0x2 0x0 0x70000000 0x10000000 0x3 0x0 0x80000000 0x10000000>; - clocks = <&mck>; + clocks = <&h32ck>; status = "disabled"; nand_controller: nand-controller { -- cgit v1.2.3 From c50cbd85cd7027d32ac5945bb60217936b4f7eaf Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Wed, 21 Nov 2018 22:14:39 +0300 Subject: mips: fix mips_get_syscall_arg o32 check When checking for TIF_32BIT_REGS flag, mips_get_syscall_arg() should use the task specified as its argument instead of the current task. This potentially affects all syscall_get_arguments() users who specify tasks different from the current. Fixes: c0ff3c53d4f99 ("MIPS: Enable HAVE_ARCH_TRACEHOOK.") Signed-off-by: Dmitry V. Levin Signed-off-by: Paul Burton Patchwork: https://patchwork.linux-mips.org/patch/21185/ Cc: Elvira Khabirova Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.13+ --- arch/mips/include/asm/syscall.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h index 0170602a1e4e..6cf8ffb5367e 100644 --- a/arch/mips/include/asm/syscall.h +++ b/arch/mips/include/asm/syscall.h @@ -73,7 +73,7 @@ static inline unsigned long mips_get_syscall_arg(unsigned long *arg, #ifdef CONFIG_64BIT case 4: case 5: case 6: case 7: #ifdef CONFIG_MIPS32_O32 - if (test_thread_flag(TIF_32BIT_REGS)) + if (test_tsk_thread_flag(task, TIF_32BIT_REGS)) return get_user(*arg, (int *)usp + n); else #endif -- cgit v1.2.3 From ed6101bbf6266ee83e620b19faa7c6ad56bb41ab Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:10 +0100 Subject: perf/x86/intel: Move branch tracing setup to the Intel-specific source file Moving branch tracing setup to Intel core object into separate intel_pmu_bts_config function, because it's Intel specific. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-1-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 20 -------------------- arch/x86/events/intel/core.c | 41 ++++++++++++++++++++++++++++++++++++++++- 2 files changed, 40 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 106911b603bd..374a19712e20 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -438,26 +438,6 @@ int x86_setup_perfctr(struct perf_event *event) if (config == -1LL) return -EINVAL; - /* - * Branch tracing: - */ - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { - /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts_active) - return -EOPNOTSUPP; - - /* BTS is currently only allowed for user-mode. */ - if (!attr->exclude_kernel) - return -EOPNOTSUPP; - - /* disallow bts if conflicting events are present */ - if (x86_add_exclusive(x86_lbr_exclusive_lbr)) - return -EBUSY; - - event->destroy = hw_perf_lbr_event_destroy; - } - hwc->config |= config; return 0; diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index af8bea9d4006..a95079abeb03 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3102,10 +3102,49 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) return flags; } +static int intel_pmu_bts_config(struct perf_event *event) +{ + struct perf_event_attr *attr = &event->attr; + struct hw_perf_event *hwc = &event->hw; + + if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && + !attr->freq && hwc->sample_period == 1) { + /* BTS is not supported by this architecture. */ + if (!x86_pmu.bts_active) + return -EOPNOTSUPP; + + /* BTS is currently only allowed for user-mode. */ + if (!attr->exclude_kernel) + return -EOPNOTSUPP; + + /* disallow bts if conflicting events are present */ + if (x86_add_exclusive(x86_lbr_exclusive_lbr)) + return -EBUSY; + + event->destroy = hw_perf_lbr_event_destroy; + } + + return 0; +} + +static int core_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + return intel_pmu_bts_config(event); +} + static int intel_pmu_hw_config(struct perf_event *event) { int ret = x86_pmu_hw_config(event); + if (ret) + return ret; + + ret = intel_pmu_bts_config(event); if (ret) return ret; @@ -3600,7 +3639,7 @@ static __initconst const struct x86_pmu core_pmu = { .enable_all = core_pmu_enable_all, .enable = core_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_pmu_hw_config, + .hw_config = core_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, -- cgit v1.2.3 From 67266c1080ad56c31af72b9c18355fde8ccc124a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:11 +0100 Subject: perf/x86/intel: Add generic branch tracing check to intel_pmu_has_bts() Currently we check the branch tracing only by checking for the PERF_COUNT_HW_BRANCH_INSTRUCTIONS event of PERF_TYPE_HARDWARE type. But we can define the same event with the PERF_TYPE_RAW type. Changing the intel_pmu_has_bts() code to check on event's final hw config value, so both HW types are covered. Adding unlikely to intel_pmu_has_bts() condition calls, because it was used in the original code in intel_bts_constraints. Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20181121101612.16272-2-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 17 +++-------------- arch/x86/events/perf_event.h | 13 +++++++++---- 2 files changed, 12 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a95079abeb03..a15a73788693 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2474,16 +2474,7 @@ done: static struct event_constraint * intel_bts_constraints(struct perf_event *event) { - struct hw_perf_event *hwc = &event->hw; - unsigned int hw_event, bts_event; - - if (event->attr.freq) - return NULL; - - hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; - bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - - if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) + if (unlikely(intel_pmu_has_bts(event))) return &bts_constraint; return NULL; @@ -3105,10 +3096,8 @@ static unsigned long intel_pmu_large_pebs_flags(struct perf_event *event) static int intel_pmu_bts_config(struct perf_event *event) { struct perf_event_attr *attr = &event->attr; - struct hw_perf_event *hwc = &event->hw; - if (attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !attr->freq && hwc->sample_period == 1) { + if (unlikely(intel_pmu_has_bts(event))) { /* BTS is not supported by this architecture. */ if (!x86_pmu.bts_active) return -EOPNOTSUPP; @@ -3170,7 +3159,7 @@ static int intel_pmu_hw_config(struct perf_event *event) /* * BTS is set up earlier in this path, so don't account twice */ - if (!intel_pmu_has_bts(event)) { + if (!unlikely(intel_pmu_has_bts(event))) { /* disallow lbr if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index adae087cecdd..78d7b7031bfc 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -859,11 +859,16 @@ static inline int amd_pmu_init(void) static inline bool intel_pmu_has_bts(struct perf_event *event) { - if (event->attr.config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS && - !event->attr.freq && event->hw.sample_period == 1) - return true; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; + + if (event->attr.freq) + return false; + + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); - return false; + return hw_event == bts_event && hwc->sample_period == 1; } int intel_pmu_save_and_restart(struct perf_event *event); -- cgit v1.2.3 From 472de49fdc53365c880ab81ae2b5cfdd83db0b06 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 21 Nov 2018 11:16:12 +0100 Subject: perf/x86/intel: Disallow precise_ip on BTS events Vince reported a crash in the BTS flush code when touching the callchain data, which was supposed to be initialized as an 'early' callchain, but intel_pmu_drain_bts_buffer() does not do that: BUG: unable to handle kernel NULL pointer dereference at 0000000000000000 ... Call Trace: intel_pmu_drain_bts_buffer+0x151/0x220 ? intel_get_event_constraints+0x219/0x360 ? perf_assign_events+0xe2/0x2a0 ? select_idle_sibling+0x22/0x3a0 ? __update_load_avg_se+0x1ec/0x270 ? enqueue_task_fair+0x377/0xdd0 ? cpumask_next_and+0x19/0x20 ? load_balance+0x134/0x950 ? check_preempt_curr+0x7a/0x90 ? ttwu_do_wakeup+0x19/0x140 x86_pmu_stop+0x3b/0x90 x86_pmu_del+0x57/0x160 event_sched_out.isra.106+0x81/0x170 group_sched_out.part.108+0x51/0xc0 __perf_event_disable+0x7f/0x160 event_function+0x8c/0xd0 remote_function+0x3c/0x50 flush_smp_call_function_queue+0x35/0xe0 smp_call_function_single_interrupt+0x3a/0xd0 call_function_single_interrupt+0xf/0x20 It was triggered by fuzzer but can be easily reproduced by: # perf record -e cpu/branch-instructions/pu -g -c 1 Peter suggested not to allow branch tracing for precise events: > Now arguably, this is really stupid behaviour. Who in his right mind > wants callchain output on BTS entries. And even if they do, BTS + > precise_ip is nonsensical. > > So in my mind disallowing precise_ip on BTS would be the simplest fix. Suggested-by: Peter Zijlstra Reported-by: Vince Weaver Signed-off-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Stephane Eranian Cc: Thomas Gleixner Fixes: 6cbc304f2f36 ("perf/x86/intel: Fix unwind errors from PEBS entries (mk-II)") Link: http://lkml.kernel.org/r/20181121101612.16272-3-jolsa@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/intel/core.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a15a73788693..ecc3e34ca955 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3106,6 +3106,10 @@ static int intel_pmu_bts_config(struct perf_event *event) if (!attr->exclude_kernel) return -EOPNOTSUPP; + /* BTS is not allowed for precise events. */ + if (attr->precise_ip) + return -EOPNOTSUPP; + /* disallow bts if conflicting events are present */ if (x86_add_exclusive(x86_lbr_exclusive_lbr)) return -EBUSY; -- cgit v1.2.3 From f2a5fef1248beccacec0deecb67c1be693d72ae6 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Mon, 19 Nov 2018 14:59:45 +0100 Subject: x86/xen: cleanup includes in arch/x86/xen/spinlock.c arch/x86/xen/spinlock.c includes several headers which are not needed. Remove the #includes. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/spinlock.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index 1c8a8816a402..3776122c87cc 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -3,22 +3,17 @@ * Split spinlock implementation out into its own file, so it can be * compiled in a FTRACE-compatible way. */ -#include +#include #include -#include -#include -#include #include #include #include #include -#include #include #include "xen-ops.h" -#include "debugfs.h" static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; static DEFINE_PER_CPU(char *, irq_name); -- cgit v1.2.3 From 6c05946e349d92f527d98644fbc9c41f06312c00 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Mon, 12 Nov 2018 09:28:06 +0800 Subject: arm64: dts: mt7622: fix no more console output on rfb1 No default serial console on boot. Fix this by using a 'stdout-path' property that points to the device. Fixes: c0d9f9ad4f76 ("arm64: dts: mt7622: add earlycon to mt7622-rfb1 board") Signed-off-by: Ryder Lee Tested-by: Kevin Hilman [mb: Fix commit message] Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts index dcad0869b84c..3f783348c66a 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-rfb1.dts @@ -17,8 +17,13 @@ model = "MediaTek MT7622 RFB1 board"; compatible = "mediatek,mt7622-rfb1", "mediatek,mt7622"; + aliases { + serial0 = &uart0; + }; + chosen { - bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512"; + stdout-path = "serial0:115200n8"; + bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512"; }; cpus { -- cgit v1.2.3 From 396defa8523372645d6d5a8b7f4b5403b119e360 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Mon, 12 Nov 2018 09:28:07 +0800 Subject: arm64: dts: mt7622: fix no more console output on BPI-R64 board Fix this by using a 'stdout-path' property that points to the device. Fixes: 0b6286dd96c0 ("arm64: dts: mt7622: add bananapi BPI-R64 board") Signed-off-by: Ryder Lee Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts index 5d6005c9b097..710c5c3d87d3 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts +++ b/arch/arm64/boot/dts/mediatek/mt7622-bananapi-bpi-r64.dts @@ -16,8 +16,13 @@ model = "Bananapi BPI-R64"; compatible = "bananapi,bpi-r64", "mediatek,mt7622"; + aliases { + serial0 = &uart0; + }; + chosen { - bootargs = "earlycon=uart8250,mmio32,0x11002000 console=ttyS0,115200n1 swiotlb=512"; + stdout-path = "serial0:115200n8"; + bootargs = "earlycon=uart8250,mmio32,0x11002000 swiotlb=512"; }; cpus { -- cgit v1.2.3 From b5d9a07ef7736b2456b9d3c90568de25e43d8ec3 Mon Sep 17 00:00:00 2001 From: Sergey Matyukevich Date: Fri, 16 Nov 2018 21:21:30 +0300 Subject: arm64: sysreg: fix sparse warnings Specify correct type for the constants to avoid the following sparse complaints: ./arch/arm64/include/asm/sysreg.h:471:42: warning: constant 0xffffffffffffffff is so big it is unsigned long ./arch/arm64/include/asm/sysreg.h:512:42: warning: constant 0xffffffffffffffff is so big it is unsigned long Acked-by: Will Deacon Acked-by: Olof Johansson Acked-by: Luc Van Oostenryck Signed-off-by: Sergey Matyukevich Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/sysreg.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0c909c4a932f..842fb9572661 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -468,7 +468,7 @@ SCTLR_ELx_SA | SCTLR_ELx_I | SCTLR_ELx_WXN | \ SCTLR_ELx_DSSBS | ENDIAN_CLEAR_EL2 | SCTLR_EL2_RES0) -#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffff +#if (SCTLR_EL2_SET ^ SCTLR_EL2_CLEAR) != 0xffffffffffffffffUL #error "Inconsistent SCTLR_EL2 set/clear bits" #endif @@ -509,7 +509,7 @@ SCTLR_EL1_UMA | SCTLR_ELx_WXN | ENDIAN_CLEAR_EL1 |\ SCTLR_ELx_DSSBS | SCTLR_EL1_NTWI | SCTLR_EL1_RES0) -#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffff +#if (SCTLR_EL1_SET ^ SCTLR_EL1_CLEAR) != 0xffffffffffffffffUL #error "Inconsistent SCTLR_EL1 set/clear bits" #endif -- cgit v1.2.3 From 4f9f49646a5733c0c2bd49940673dde89a9c5add Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 21 Nov 2018 15:07:00 +0000 Subject: arm64: cpufeature: Fix mismerge of CONFIG_ARM64_SSBD block When merging support for SSBD and the CRC32 instructions, the conflict resolution for the new capability entries in arm64_features[] inadvertedly predicated the availability of the CRC32 instructions on CONFIG_ARM64_SSBD, despite the functionality being entirely unrelated. Move the #ifdef CONFIG_ARM64_SSBD down so that it only covers the SSBD capability. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index af50064dea51..aec5ecb85737 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1333,7 +1333,6 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = cpu_enable_hw_dbm, }, #endif -#ifdef CONFIG_ARM64_SSBD { .desc = "CRC32 instructions", .capability = ARM64_HAS_CRC32, @@ -1343,6 +1342,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .field_pos = ID_AA64ISAR0_CRC32_SHIFT, .min_field_value = 1, }, +#ifdef CONFIG_ARM64_SSBD { .desc = "Speculative Store Bypassing Safe (SSBS)", .capability = ARM64_SSBS, -- cgit v1.2.3 From ce68cc6fad893eb33b69ef7ec186233a51696236 Mon Sep 17 00:00:00 2001 From: Ryder Lee Date: Mon, 12 Nov 2018 09:28:08 +0800 Subject: arm64: dts: mt7622: Drop the general purpose timer node MediaTeks general purpose timer register into system in early phase during kernel boot, but the clock sources aren't probed at this point. The system has the ARM architecture timer, so we don't need the GPT timer from mediatek. Drop the DT node for it. Fixes: 9cc7f0de9e67 ("arm64: dts: mt7622: add timer, CCI-400 and PMU nodes") Signed-off-by: Ryder Lee [mb: fix commit message] Signed-off-by: Matthias Brugger --- arch/arm64/boot/dts/mediatek/mt7622.dtsi | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/mediatek/mt7622.dtsi b/arch/arm64/boot/dts/mediatek/mt7622.dtsi index fe0c875f1d95..14a1028ca3a6 100644 --- a/arch/arm64/boot/dts/mediatek/mt7622.dtsi +++ b/arch/arm64/boot/dts/mediatek/mt7622.dtsi @@ -227,16 +227,6 @@ #reset-cells = <1>; }; - timer: timer@10004000 { - compatible = "mediatek,mt7622-timer", - "mediatek,mt6577-timer"; - reg = <0 0x10004000 0 0x80>; - interrupts = ; - clocks = <&infracfg CLK_INFRA_APXGPT_PD>, - <&topckgen CLK_TOP_RTC>; - clock-names = "system-clk", "rtc-clk"; - }; - scpsys: scpsys@10006000 { compatible = "mediatek,mt7622-scpsys", "syscon"; -- cgit v1.2.3 From 58a0afbf4c99ac355df16773af835b919b9432ee Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:12 +0100 Subject: ARM: davinci: da8xx: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/devices-da8xx.c | 40 +++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/devices-da8xx.c b/arch/arm/mach-davinci/devices-da8xx.c index 1fd3619f6a09..cf78da5ab054 100644 --- a/arch/arm/mach-davinci/devices-da8xx.c +++ b/arch/arm/mach-davinci/devices-da8xx.c @@ -701,6 +701,46 @@ static struct resource da8xx_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DA8XX_GPIO0, + .end = IRQ_DA8XX_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO1, + .end = IRQ_DA8XX_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO2, + .end = IRQ_DA8XX_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO3, + .end = IRQ_DA8XX_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO4, + .end = IRQ_DA8XX_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO5, + .end = IRQ_DA8XX_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO6, + .end = IRQ_DA8XX_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO7, + .end = IRQ_DA8XX_GPIO7, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DA8XX_GPIO8, .end = IRQ_DA8XX_GPIO8, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3 From 193c04374e281a56c7d4f96e66d329671945bebe Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:13 +0100 Subject: ARM: davinci: dm365: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index abcf2a5ed89b..42665914166a 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -267,6 +267,41 @@ static struct resource dm365_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM365_GPIO0, + .end = IRQ_DM365_GPIO0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO1, + .end = IRQ_DM365_GPIO1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO2, + .end = IRQ_DM365_GPIO2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO3, + .end = IRQ_DM365_GPIO3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO4, + .end = IRQ_DM365_GPIO4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO5, + .end = IRQ_DM365_GPIO5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO6, + .end = IRQ_DM365_GPIO6, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM365_GPIO7, .end = IRQ_DM365_GPIO7, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3 From 2c9c83491f30afbce25796e185cd4d5e36080e31 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:14 +0100 Subject: ARM: davinci: dm646x: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm646x.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index 6bd2ed069d0d..d9b93e2806d2 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -442,6 +442,16 @@ static struct resource dm646x_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM646X_GPIOBNK0, + .end = IRQ_DM646X_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK1, + .end = IRQ_DM646X_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM646X_GPIOBNK2, .end = IRQ_DM646X_GPIOBNK2, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3 From 27db7baab640ea28d7994eda943fef170e347081 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:15 +0100 Subject: ARM: davinci: dm355: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm355.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 9f7d38d12c88..2b0f5d97ab7c 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -548,6 +548,36 @@ static struct resource dm355_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_DM355_GPIOBNK0, + .end = IRQ_DM355_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK1, + .end = IRQ_DM355_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK2, + .end = IRQ_DM355_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK3, + .end = IRQ_DM355_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK4, + .end = IRQ_DM355_GPIOBNK4, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK5, + .end = IRQ_DM355_GPIOBNK5, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_DM355_GPIOBNK6, .end = IRQ_DM355_GPIOBNK6, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3 From adcf60ce14c8250761af9de907eb6c7d096c26d3 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:16 +0100 Subject: ARM: davinci: dm644x: define gpio interrupts as separate resources Since commit eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") the davinci GPIO driver fails to probe if we boot in legacy mode from any of the board files. Since the driver now expects every interrupt to be defined as a separate resource, split the definition of IRQ resources instead of having a single continuous interrupt range. Fixes: eb3744a2dd01 ("gpio: davinci: Do not assume continuous IRQ numbering") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm644x.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index 0720da7809a6..de1ec6dc01e9 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -492,6 +492,26 @@ static struct resource dm644_gpio_resources[] = { }, { /* interrupt */ .start = IRQ_GPIOBNK0, + .end = IRQ_GPIOBNK0, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK1, + .end = IRQ_GPIOBNK1, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK2, + .end = IRQ_GPIOBNK2, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK3, + .end = IRQ_GPIOBNK3, + .flags = IORESOURCE_IRQ, + }, + { + .start = IRQ_GPIOBNK4, .end = IRQ_GPIOBNK4, .flags = IORESOURCE_IRQ, }, -- cgit v1.2.3 From 45ed94b9e2d2e310fa7fb884b26080dfd4f0e31d Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:18 +0100 Subject: ARM: davinci: da850: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the network support in legacy boot mode for da850-evm since we can no longer request the MDIO clock GPIO. We now have the option to specify the GPIO base manually for davinci, so add the relevant fields to platform data. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/da850.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/da850.c b/arch/arm/mach-davinci/da850.c index 4528bbf0c861..e7b78df2bfef 100644 --- a/arch/arm/mach-davinci/da850.c +++ b/arch/arm/mach-davinci/da850.c @@ -719,7 +719,9 @@ int __init da850_register_vpif_capture(struct vpif_capture_config } static struct davinci_gpio_platform_data da850_gpio_platform_data = { - .ngpio = 144, + .no_auto_base = true, + .base = 0, + .ngpio = 144, }; int __init da850_register_gpio(void) -- cgit v1.2.3 From 133cd2e48305887709675847da1f6ee2b7363929 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:19 +0100 Subject: ARM: davinci: dm365: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm365.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm365.c b/arch/arm/mach-davinci/dm365.c index 42665914166a..01fb2b0c82de 100644 --- a/arch/arm/mach-davinci/dm365.c +++ b/arch/arm/mach-davinci/dm365.c @@ -308,6 +308,8 @@ static struct resource dm365_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm365_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, .gpio_unbanked = 8, }; -- cgit v1.2.3 From fb9e7f0bba151281d7587c1262fae8bdf0497296 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:20 +0100 Subject: ARM: davinci: dm646x: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm646x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index d9b93e2806d2..7dc54b2a610f 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -458,6 +458,8 @@ static struct resource dm646x_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm646x_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 43, }; -- cgit v1.2.3 From 7d35baa4e9ec4b717bc0e58a39cdb6a1c50f5465 Mon Sep 17 00:00:00 2001 From: Mathias Kresin Date: Mon, 26 Nov 2018 11:25:40 +0100 Subject: MIPS: ralink: Fix mt7620 nd_sd pinmux In case the nd_sd group is set to the sd-card function, Pins 45 + 46 are configured as GPIOs. If they are blocked by the sd function, they can't be used as GPIOs. Reported-by: Kristian Evensen Signed-off-by: Mathias Kresin Signed-off-by: Paul Burton Fixes: f576fb6a0700 ("MIPS: ralink: cleanup the soc specific pinmux data") Patchwork: https://patchwork.linux-mips.org/patch/21220/ Cc: John Crispin Cc: Ralf Baechle Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: linux-kernel@vger.kernel.org Cc: stable@vger.kernel.org # v3.18+ --- arch/mips/ralink/mt7620.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c index 41b71c4352c2..c1ce6f43642b 100644 --- a/arch/mips/ralink/mt7620.c +++ b/arch/mips/ralink/mt7620.c @@ -84,7 +84,7 @@ static struct rt2880_pmx_func pcie_rst_grp[] = { }; static struct rt2880_pmx_func nd_sd_grp[] = { FUNC("nand", MT7620_GPIO_MODE_NAND, 45, 15), - FUNC("sd", MT7620_GPIO_MODE_SD, 45, 15) + FUNC("sd", MT7620_GPIO_MODE_SD, 47, 13) }; static struct rt2880_pmx_group mt7620a_pinmux_data[] = { -- cgit v1.2.3 From e2c95a61656d29ceaac97b6a975c8a1f26e26f15 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 26 Nov 2018 14:05:38 +0100 Subject: bpf, ppc64: generalize fetching subprog into bpf_jit_get_func_addr Make fetching of the BPF call address from ppc64 JIT generic. ppc64 was using a slightly different variant rather than through the insns' imm field encoding as the target address would not fit into that space. Therefore, the target subprog number was encoded into the insns' offset and fetched through fp->aux->func[off]->bpf_func instead. Given there are other JITs with this issue and the mechanism of fetching the address is JIT-generic, move it into the core as a helper instead. On the JIT side, we get information on whether the retrieved address is a fixed one, that is, not changing through JIT passes, or a dynamic one. For the former, JITs can optimize their imm emission because this doesn't change jump offsets throughout JIT process. Signed-off-by: Daniel Borkmann Reviewed-by: Sandipan Das Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/bpf_jit_comp64.c | 57 ++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 19 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 50b129785aee..17482f5de3e2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -166,7 +166,33 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) PPC_BLR(); } -static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) +static void bpf_jit_emit_func_call_hlp(u32 *image, struct codegen_context *ctx, + u64 func) +{ +#ifdef PPC64_ELF_ABI_v1 + /* func points to the function descriptor */ + PPC_LI64(b2p[TMP_REG_2], func); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); + /* ... and move it to LR */ + PPC_MTLR(b2p[TMP_REG_1]); + /* + * Load TOC from function descriptor at offset 8. + * We can clobber r2 since we get called through a + * function pointer (so caller will save/restore r2) + * and since we don't use a TOC ourself. + */ + PPC_BPF_LL(2, b2p[TMP_REG_2], 8); +#else + /* We can clobber r12 */ + PPC_FUNC_ADDR(12, func); + PPC_MTLR(12); +#endif + PPC_BLRL(); +} + +static void bpf_jit_emit_func_call_rel(u32 *image, struct codegen_context *ctx, + u64 func) { unsigned int i, ctx_idx = ctx->idx; @@ -273,7 +299,7 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; - int i; + int i, ret; /* Start of epilogue code - will only be valid 2nd pass onwards */ u32 exit_addr = addrs[flen]; @@ -284,8 +310,9 @@ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 src_reg = b2p[insn[i].src_reg]; s16 off = insn[i].off; s32 imm = insn[i].imm; + bool func_addr_fixed; + u64 func_addr; u64 imm64; - u8 *func; u32 true_cond; u32 tmp_idx; @@ -711,23 +738,15 @@ emit_clear: case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - /* bpf function call */ - if (insn[i].src_reg == BPF_PSEUDO_CALL) - if (!extra_pass) - func = NULL; - else if (fp->aux->func && off < fp->aux->func_cnt) - /* use the subprog id from the off - * field to lookup the callee address - */ - func = (u8 *) fp->aux->func[off]->bpf_func; - else - return -EINVAL; - /* kernel helper call */ - else - func = (u8 *) __bpf_call_base + imm; - - bpf_jit_emit_func_call(image, ctx, (u64)func); + ret = bpf_jit_get_func_addr(fp, &insn[i], extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + if (func_addr_fixed) + bpf_jit_emit_func_call_hlp(image, ctx, func_addr); + else + bpf_jit_emit_func_call_rel(image, ctx, func_addr); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); break; -- cgit v1.2.3 From 8c11ea5ce13da0252fc92f91e90b0cb0c8fe5619 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 26 Nov 2018 14:05:39 +0100 Subject: bpf, arm64: fix getting subprog addr from aux for calls The arm64 JIT has the same issue as ppc64 JIT in that the relative BPF to BPF call offset can be too far away from core kernel in that relative encoding into imm is not sufficient and could potentially be truncated, see also fd045f6cd98e ("arm64: add support for module PLTs") which adds spill-over space for module_alloc() and therefore bpf_jit_binary_alloc(). Therefore, use the recently added bpf_jit_get_func_addr() helper for properly fetching the address through prog->aux->func[off]->bpf_func instead. This also has the benefit to optimize normal helper calls since their address can use the optimized emission. Tested on Cavium ThunderX CN8890. Fixes: db496944fdaa ("bpf: arm64: add JIT support for multi-function programs") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index a6fdaea07c63..89198017e8e6 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -351,7 +351,8 @@ static void build_epilogue(struct jit_ctx *ctx) * >0 - successfully JITed a 16-byte eBPF instruction. * <0 - failed to JIT. */ -static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) +static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx, + bool extra_pass) { const u8 code = insn->code; const u8 dst = bpf2a64[insn->dst_reg]; @@ -625,12 +626,19 @@ emit_cond_jmp: case BPF_JMP | BPF_CALL: { const u8 r0 = bpf2a64[BPF_REG_0]; - const u64 func = (u64)__bpf_call_base + imm; + bool func_addr_fixed; + u64 func_addr; + int ret; - if (ctx->prog->is_func) - emit_addr_mov_i64(tmp, func, ctx); + ret = bpf_jit_get_func_addr(ctx->prog, insn, extra_pass, + &func_addr, &func_addr_fixed); + if (ret < 0) + return ret; + if (func_addr_fixed) + /* We can use optimized emission here. */ + emit_a64_mov_i64(tmp, func_addr, ctx); else - emit_a64_mov_i64(tmp, func, ctx); + emit_addr_mov_i64(tmp, func_addr, ctx); emit(A64_BLR(tmp), ctx); emit(A64_MOV(1, r0, A64_R(0)), ctx); break; @@ -753,7 +761,7 @@ emit_cond_jmp: return 0; } -static int build_body(struct jit_ctx *ctx) +static int build_body(struct jit_ctx *ctx, bool extra_pass) { const struct bpf_prog *prog = ctx->prog; int i; @@ -762,7 +770,7 @@ static int build_body(struct jit_ctx *ctx) const struct bpf_insn *insn = &prog->insnsi[i]; int ret; - ret = build_insn(insn, ctx); + ret = build_insn(insn, ctx, extra_pass); if (ret > 0) { i++; if (ctx->image == NULL) @@ -858,7 +866,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) /* 1. Initial fake pass to compute ctx->idx. */ /* Fake pass to fill in ctx->offset. */ - if (build_body(&ctx)) { + if (build_body(&ctx, extra_pass)) { prog = orig_prog; goto out_off; } @@ -888,7 +896,7 @@ skip_init_ctx: build_prologue(&ctx, was_classic); - if (build_body(&ctx)) { + if (build_body(&ctx, extra_pass)) { bpf_jit_binary_free(header); prog = orig_prog; goto out_off; -- cgit v1.2.3 From c44768a33da81b4a0986e79bbf0588f1a0651dec Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 13:03:46 -0800 Subject: sparc: Fix JIT fused branch convergance. On T4 and later sparc64 cpus we can use the fused compare and branch instruction. However, it can only be used if the branch destination is in the range of a signed 10-bit immediate offset. This amounts to 1024 instructions forwards or backwards. After the commit referenced in the Fixes: tag, the largest possible size program seen by the JIT explodes by a significant factor. As a result of this convergance takes many more passes since the expanded "BPF_LDX | BPF_MSH | BPF_B" code sequence, for example, contains several embedded branch on condition instructions. On each pass, as suddenly new fused compare and branch instances become valid, this makes thousands more in range for the next pass. And so on and so forth. This is most greatly exemplified by "BPF_MAXINSNS: exec all MSH" which takes 35 passes to converge, and shrinks the image by about 64K. To decrease the cost of this number of convergance passes, do the convergance pass before we have the program image allocated, just like other JITs (such as x86) do. Fixes: e0cea7ce988c ("bpf: implement ld_abs/ld_ind in native bpf") Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/sparc/net/bpf_jit_comp_64.c | 77 +++++++++++++++++++++++++--------------- 1 file changed, 49 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 222785af550b..7217d6359643 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1425,12 +1425,12 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) struct bpf_prog *tmp, *orig_prog = prog; struct sparc64_jit_data *jit_data; struct bpf_binary_header *header; + u32 prev_image_size, image_size; bool tmp_blinded = false; bool extra_pass = false; struct jit_ctx ctx; - u32 image_size; u8 *image_ptr; - int pass; + int pass, i; if (!prog->jit_requested) return orig_prog; @@ -1461,61 +1461,82 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) header = jit_data->header; extra_pass = true; image_size = sizeof(u32) * ctx.idx; + prev_image_size = image_size; + pass = 1; goto skip_init_ctx; } memset(&ctx, 0, sizeof(ctx)); ctx.prog = prog; - ctx.offset = kcalloc(prog->len, sizeof(unsigned int), GFP_KERNEL); + ctx.offset = kmalloc_array(prog->len, sizeof(unsigned int), GFP_KERNEL); if (ctx.offset == NULL) { prog = orig_prog; goto out_off; } - /* Fake pass to detect features used, and get an accurate assessment - * of what the final image size will be. + /* Longest sequence emitted is for bswap32, 12 instructions. Pre-cook + * the offset array so that we converge faster. */ - if (build_body(&ctx)) { - prog = orig_prog; - goto out_off; - } - build_prologue(&ctx); - build_epilogue(&ctx); + for (i = 0; i < prog->len; i++) + ctx.offset[i] = i * (12 * 4); - /* Now we know the actual image size. */ - image_size = sizeof(u32) * ctx.idx; - header = bpf_jit_binary_alloc(image_size, &image_ptr, - sizeof(u32), jit_fill_hole); - if (header == NULL) { - prog = orig_prog; - goto out_off; - } - - ctx.image = (u32 *)image_ptr; -skip_init_ctx: - for (pass = 1; pass < 3; pass++) { + prev_image_size = ~0U; + for (pass = 1; pass < 40; pass++) { ctx.idx = 0; build_prologue(&ctx); - if (build_body(&ctx)) { - bpf_jit_binary_free(header); prog = orig_prog; goto out_off; } - build_epilogue(&ctx); if (bpf_jit_enable > 1) - pr_info("Pass %d: shrink = %d, seen = [%c%c%c%c%c%c]\n", pass, - image_size - (ctx.idx * 4), + pr_info("Pass %d: size = %u, seen = [%c%c%c%c%c%c]\n", pass, + ctx.idx * 4, ctx.tmp_1_used ? '1' : ' ', ctx.tmp_2_used ? '2' : ' ', ctx.tmp_3_used ? '3' : ' ', ctx.saw_frame_pointer ? 'F' : ' ', ctx.saw_call ? 'C' : ' ', ctx.saw_tail_call ? 'T' : ' '); + + if (ctx.idx * 4 == prev_image_size) + break; + prev_image_size = ctx.idx * 4; + cond_resched(); + } + + /* Now we know the actual image size. */ + image_size = sizeof(u32) * ctx.idx; + header = bpf_jit_binary_alloc(image_size, &image_ptr, + sizeof(u32), jit_fill_hole); + if (header == NULL) { + prog = orig_prog; + goto out_off; + } + + ctx.image = (u32 *)image_ptr; +skip_init_ctx: + ctx.idx = 0; + + build_prologue(&ctx); + + if (build_body(&ctx)) { + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; + } + + build_epilogue(&ctx); + + if (ctx.idx * 4 != prev_image_size) { + pr_err("bpf_jit: Failed to converge, prev_size=%u size=%d\n", + prev_image_size, ctx.idx * 4); + bpf_jit_binary_free(header); + prog = orig_prog; + goto out_off; } if (bpf_jit_enable > 1) -- cgit v1.2.3 From e2ac579a7a18bcd9e8cf14cf42eac0b8a2ba6c4b Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 14:52:18 -0800 Subject: sparc: Correct ctx->saw_frame_pointer logic. We need to initialize the frame pointer register not just if it is seen as a source operand, but also if it is seen as the destination operand of a store or an atomic instruction (which effectively is a source operand). This is exercised by test_verifier's "non-invalid fp arithmetic" Signed-off-by: David S. Miller Signed-off-by: Alexei Starovoitov --- arch/sparc/net/bpf_jit_comp_64.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index 7217d6359643..ec4da4dc98f1 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -1270,6 +1270,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_2_used = true; emit_loadimm(imm, tmp2, ctx); @@ -1308,6 +1311,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp = bpf2sparc[TMP_REG_1]; u32 opcode = 0, rs2; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + switch (BPF_SIZE(code)) { case BPF_W: opcode = ST32; @@ -1340,6 +1346,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; @@ -1360,6 +1369,9 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx) const u8 tmp2 = bpf2sparc[TMP_REG_2]; const u8 tmp3 = bpf2sparc[TMP_REG_3]; + if (insn->dst_reg == BPF_REG_FP) + ctx->saw_frame_pointer = true; + ctx->tmp_1_used = true; ctx->tmp_2_used = true; ctx->tmp_3_used = true; -- cgit v1.2.3 From ef78e5ec9214376c5cb989f5da70b02d0c117b66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 26 Nov 2018 11:27:03 -0800 Subject: ia64: export node_distance function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The numa_slit variable used by node_distance is available to a module as long as it is linked at compile-time. However, it is not available to loadable modules. Leading to errors such as: ERROR: "numa_slit" [drivers/nvme/host/nvme-core.ko] undefined! The error above is caused by the nvme multipath code that makes use of node_distance for its path calculation. When the patch was added, the lightnvm subsystem would select nvme and always compile it in, leading to the node_distance call to always succeed. However, when this requirement was removed, nvme could be compiled in as a module, which exposed this bug. This patch extracts node_distance to a function and exports it. Since ACPI is depending on node_distance being a simple lookup to numa_slit, the previous behavior is exposed as slit_distance and its users updated. Fixes: f333444708f82 "nvme: take node locality into account when selecting a path" Fixes: 73569e11032f "lightnvm: remove dependencies on BLK_DEV_NVME and PCI" Signed-off-by: Matias Bjøring Signed-off-by: Tony Luck Signed-off-by: Linus Torvalds --- arch/ia64/include/asm/numa.h | 4 +++- arch/ia64/kernel/acpi.c | 6 +++--- arch/ia64/mm/numa.c | 6 ++++++ 3 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/ia64/include/asm/numa.h b/arch/ia64/include/asm/numa.h index ebef7f40aabb..c5c253cb9bd6 100644 --- a/arch/ia64/include/asm/numa.h +++ b/arch/ia64/include/asm/numa.h @@ -59,7 +59,9 @@ extern struct node_cpuid_s node_cpuid[NR_CPUS]; */ extern u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; -#define node_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) +#define slit_distance(from,to) (numa_slit[(from) * MAX_NUMNODES + (to)]) +extern int __node_distance(int from, int to); +#define node_distance(from,to) __node_distance(from, to) extern int paddr_to_nid(unsigned long paddr); diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 1dacbf5e9e09..41eb281709da 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -578,8 +578,8 @@ void __init acpi_numa_fixup(void) if (!slit_table) { for (i = 0; i < MAX_NUMNODES; i++) for (j = 0; j < MAX_NUMNODES; j++) - node_distance(i, j) = i == j ? LOCAL_DISTANCE : - REMOTE_DISTANCE; + slit_distance(i, j) = i == j ? + LOCAL_DISTANCE : REMOTE_DISTANCE; return; } @@ -592,7 +592,7 @@ void __init acpi_numa_fixup(void) if (!pxm_bit_test(j)) continue; node_to = pxm_to_node(j); - node_distance(node_from, node_to) = + slit_distance(node_from, node_to) = slit_table->entry[i * slit_table->locality_count + j]; } } diff --git a/arch/ia64/mm/numa.c b/arch/ia64/mm/numa.c index 3861d6e32d5f..a03803506b0c 100644 --- a/arch/ia64/mm/numa.c +++ b/arch/ia64/mm/numa.c @@ -36,6 +36,12 @@ struct node_cpuid_s node_cpuid[NR_CPUS] = */ u8 numa_slit[MAX_NUMNODES * MAX_NUMNODES]; +int __node_distance(int from, int to) +{ + return slit_distance(from, to); +} +EXPORT_SYMBOL(__node_distance); + /* Identify which cnode a physical address resides on */ int paddr_to_nid(unsigned long paddr) -- cgit v1.2.3 From 2958b66694e018c552be0b60521fec27e8d12988 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 13:29:41 -0800 Subject: xtensa: enable coprocessors that are being flushed coprocessor_flush_all may be called from a context of a thread that is different from the thread being flushed. In that case contents of the cpenable special register may not match ti->cpenable of the target thread, resulting in unhandled coprocessor exception in the kernel context. Set cpenable special register to the ti->cpenable of the target register for the duration of the flush and restore it afterwards. This fixes the following crash caused by coprocessor register inspection in native gdb: (gdb) p/x $w0 Illegal instruction in kernel: sig: 9 [#1] PREEMPT Call Trace: ___might_sleep+0x184/0x1a4 __might_sleep+0x41/0xac exit_signals+0x14/0x218 do_exit+0xc9/0x8b8 die+0x99/0xa0 do_illegal_instruction+0x18/0x6c common_exception+0x77/0x77 coprocessor_flush+0x16/0x3c arch_ptrace+0x46c/0x674 sys_ptrace+0x2ce/0x3b4 system_call+0x54/0x80 common_exception+0x77/0x77 note: gdb[100] exited with preempt_count 1 Killed Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/process.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 483dcfb6e681..4bb68133a72a 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -94,18 +94,21 @@ void coprocessor_release_all(struct thread_info *ti) void coprocessor_flush_all(struct thread_info *ti) { - unsigned long cpenable; + unsigned long cpenable, old_cpenable; int i; preempt_disable(); + RSR_CPENABLE(old_cpenable); cpenable = ti->cpenable; + WSR_CPENABLE(cpenable); for (i = 0; i < XCHAL_CP_MAX; i++) { if ((cpenable & 1) != 0 && coprocessor_owner[i] == ti) coprocessor_flush(ti, i); cpenable >>= 1; } + WSR_CPENABLE(old_cpenable); preempt_enable(); } -- cgit v1.2.3 From 03bc996af0cc71c7f30c384d8ce7260172423b34 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 15:18:26 -0800 Subject: xtensa: fix coprocessor context offset definitions Coprocessor context offsets are used by the assembly code that moves coprocessor context between the individual fields of the thread_info::xtregs_cp structure and coprocessor registers. This fixes coprocessor context clobbering on flushing and reloading during normal user code execution and user process debugging in the presence of more than one coprocessor in the core configuration. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/asm-offsets.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/asm-offsets.c b/arch/xtensa/kernel/asm-offsets.c index 67904f55f188..120dd746a147 100644 --- a/arch/xtensa/kernel/asm-offsets.c +++ b/arch/xtensa/kernel/asm-offsets.c @@ -94,14 +94,14 @@ int main(void) DEFINE(THREAD_SP, offsetof (struct task_struct, thread.sp)); DEFINE(THREAD_CPENABLE, offsetof (struct thread_info, cpenable)); #if XTENSA_HAVE_COPROCESSORS - DEFINE(THREAD_XTREGS_CP0, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP1, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP2, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP3, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP4, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP5, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP6, offsetof (struct thread_info, xtregs_cp)); - DEFINE(THREAD_XTREGS_CP7, offsetof (struct thread_info, xtregs_cp)); + DEFINE(THREAD_XTREGS_CP0, offsetof(struct thread_info, xtregs_cp.cp0)); + DEFINE(THREAD_XTREGS_CP1, offsetof(struct thread_info, xtregs_cp.cp1)); + DEFINE(THREAD_XTREGS_CP2, offsetof(struct thread_info, xtregs_cp.cp2)); + DEFINE(THREAD_XTREGS_CP3, offsetof(struct thread_info, xtregs_cp.cp3)); + DEFINE(THREAD_XTREGS_CP4, offsetof(struct thread_info, xtregs_cp.cp4)); + DEFINE(THREAD_XTREGS_CP5, offsetof(struct thread_info, xtregs_cp.cp5)); + DEFINE(THREAD_XTREGS_CP6, offsetof(struct thread_info, xtregs_cp.cp6)); + DEFINE(THREAD_XTREGS_CP7, offsetof(struct thread_info, xtregs_cp.cp7)); #endif DEFINE(THREAD_XTREGS_USER, offsetof (struct thread_info, xtregs_user)); DEFINE(XTREGS_USER_SIZE, sizeof(xtregs_user_t)); -- cgit v1.2.3 From 38a35a78c5e270cbe53c4fef6b0d3c2da90dd849 Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Mon, 26 Nov 2018 18:06:01 -0800 Subject: xtensa: fix coprocessor part of ptrace_{get,set}xregs Layout of coprocessor registers in the elf_xtregs_t and xtregs_coprocessor_t may be different due to alignment. Thus it is not always possible to copy data between the xtregs_coprocessor_t structure and the elf_xtregs_t and get correct values for all registers. Use a table of offsets and sizes of individual coprocessor register groups to do coprocessor context copying in the ptrace_getxregs and ptrace_setxregs. This fixes incorrect coprocessor register values reading from the user process by the native gdb on an xtensa core with multiple coprocessors and registers with high alignment requirements. Cc: stable@vger.kernel.org Signed-off-by: Max Filippov --- arch/xtensa/kernel/ptrace.c | 42 ++++++++++++++++++++++++++++++++++++++---- 1 file changed, 38 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index c0845cb1cbb9..d9541be0605a 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -127,12 +127,37 @@ static int ptrace_setregs(struct task_struct *child, void __user *uregs) } +#if XTENSA_HAVE_COPROCESSORS +#define CP_OFFSETS(cp) \ + { \ + .elf_xtregs_offset = offsetof(elf_xtregs_t, cp), \ + .ti_offset = offsetof(struct thread_info, xtregs_cp.cp), \ + .sz = sizeof(xtregs_ ## cp ## _t), \ + } + +static const struct { + size_t elf_xtregs_offset; + size_t ti_offset; + size_t sz; +} cp_offsets[] = { + CP_OFFSETS(cp0), + CP_OFFSETS(cp1), + CP_OFFSETS(cp2), + CP_OFFSETS(cp3), + CP_OFFSETS(cp4), + CP_OFFSETS(cp5), + CP_OFFSETS(cp6), + CP_OFFSETS(cp7), +}; +#endif + static int ptrace_getxregs(struct task_struct *child, void __user *uregs) { struct pt_regs *regs = task_pt_regs(child); struct thread_info *ti = task_thread_info(child); elf_xtregs_t __user *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_WRITE, uregs, sizeof(elf_xtregs_t))) return -EIO; @@ -140,8 +165,13 @@ static int ptrace_getxregs(struct task_struct *child, void __user *uregs) #if XTENSA_HAVE_COPROCESSORS /* Flush all coprocessor registers to memory. */ coprocessor_flush_all(ti); - ret |= __copy_to_user(&xtregs->cp0, &ti->xtregs_cp, - sizeof(xtregs_coprocessor_t)); + + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_to_user((char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + (const char *)ti + + cp_offsets[i].ti_offset, + cp_offsets[i].sz); #endif ret |= __copy_to_user(&xtregs->opt, ®s->xtregs_opt, sizeof(xtregs->opt)); @@ -157,6 +187,7 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) struct pt_regs *regs = task_pt_regs(child); elf_xtregs_t *xtregs = uregs; int ret = 0; + int i __maybe_unused; if (!access_ok(VERIFY_READ, uregs, sizeof(elf_xtregs_t))) return -EFAULT; @@ -166,8 +197,11 @@ static int ptrace_setxregs(struct task_struct *child, void __user *uregs) coprocessor_flush_all(ti); coprocessor_release_all(ti); - ret |= __copy_from_user(&ti->xtregs_cp, &xtregs->cp0, - sizeof(xtregs_coprocessor_t)); + for (i = 0; i < ARRAY_SIZE(cp_offsets); ++i) + ret |= __copy_from_user((char *)ti + cp_offsets[i].ti_offset, + (const char __user *)xtregs + + cp_offsets[i].elf_xtregs_offset, + cp_offsets[i].sz); #endif ret |= __copy_from_user(®s->xtregs_opt, &xtregs->opt, sizeof(xtregs->opt)); -- cgit v1.2.3 From a6ca633e13f888cbb0a92309a1e090818cffcc86 Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:21 +0100 Subject: ARM: davinci: dm355: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm355.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm355.c b/arch/arm/mach-davinci/dm355.c index 2b0f5d97ab7c..4c6e0bef4509 100644 --- a/arch/arm/mach-davinci/dm355.c +++ b/arch/arm/mach-davinci/dm355.c @@ -584,6 +584,8 @@ static struct resource dm355_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm355_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 104, }; -- cgit v1.2.3 From 55a891d0d0bd3db9303c580857147c37e2c76a5a Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:22 +0100 Subject: ARM: davinci: da830: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/da830.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-davinci/da830.c b/arch/arm/mach-davinci/da830.c index 0bc5bd2665df..2cc9fe4c3a91 100644 --- a/arch/arm/mach-davinci/da830.c +++ b/arch/arm/mach-davinci/da830.c @@ -759,7 +759,9 @@ static struct davinci_id da830_ids[] = { }; static struct davinci_gpio_platform_data da830_gpio_platform_data = { - .ngpio = 128, + .no_auto_base = true, + .base = 0, + .ngpio = 128, }; int __init da830_register_gpio(void) -- cgit v1.2.3 From 27df7977099c2b8d32399cd1752f527c5a343dfa Mon Sep 17 00:00:00 2001 From: Bartosz Golaszewski Date: Wed, 21 Nov 2018 10:35:23 +0100 Subject: ARM: davinci: dm644x: set the GPIO base to 0 Commit 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") broke the GPIO support on DaVinci boards in legacy mode by allowing gpiolib to set the GPIO base automatically. DaVinci board files use the legacy GPIO API with hard-coded GPIO line numbers. Use the new fields in struct davinci_gpio_platform_data to manually set the GPIO base to 0. Fixes: 587f7a694f01 ("gpio: davinci: Use dev name for label and automatic base selection") Cc: stable@vger.kernel.org Signed-off-by: Bartosz Golaszewski Signed-off-by: Sekhar Nori --- arch/arm/mach-davinci/dm644x.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/mach-davinci/dm644x.c b/arch/arm/mach-davinci/dm644x.c index de1ec6dc01e9..38f92b7d413e 100644 --- a/arch/arm/mach-davinci/dm644x.c +++ b/arch/arm/mach-davinci/dm644x.c @@ -518,6 +518,8 @@ static struct resource dm644_gpio_resources[] = { }; static struct davinci_gpio_platform_data dm644_gpio_platform_data = { + .no_auto_base = true, + .base = 0, .ngpio = 71, }; -- cgit v1.2.3 From 2b9034b5eaddc09c0e9529b93446eb975f97f814 Mon Sep 17 00:00:00 2001 From: David Miller Date: Mon, 26 Nov 2018 21:55:09 -0800 Subject: sparc: Adjust bpf JIT prologue for PSEUDO calls. Move all arguments into output registers from input registers. This path is exercised by test_verifier.c's "calls: two calls with args" test. Adjust BPF_TAILCALL_PROLOGUE_SKIP as needed. Let's also make the prologue length a constant size regardless of the combination of ->saw_frame_pointer and ->saw_tail_call settings. Signed-off-by: David S. Miller Signed-off-by: Daniel Borkmann --- arch/sparc/net/bpf_jit_comp_64.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/net/bpf_jit_comp_64.c b/arch/sparc/net/bpf_jit_comp_64.c index ec4da4dc98f1..5fda4f7bf15d 100644 --- a/arch/sparc/net/bpf_jit_comp_64.c +++ b/arch/sparc/net/bpf_jit_comp_64.c @@ -791,7 +791,7 @@ static int emit_compare_and_branch(const u8 code, const u8 dst, u8 src, } /* Just skip the save instruction and the ctx register move. */ -#define BPF_TAILCALL_PROLOGUE_SKIP 16 +#define BPF_TAILCALL_PROLOGUE_SKIP 32 #define BPF_TAILCALL_CNT_SP_OFF (STACK_BIAS + 128) static void build_prologue(struct jit_ctx *ctx) @@ -824,9 +824,15 @@ static void build_prologue(struct jit_ctx *ctx) const u8 vfp = bpf2sparc[BPF_REG_FP]; emit(ADD | IMMED | RS1(FP) | S13(STACK_BIAS) | RD(vfp), ctx); + } else { + emit_nop(ctx); } emit_reg_move(I0, O0, ctx); + emit_reg_move(I1, O1, ctx); + emit_reg_move(I2, O2, ctx); + emit_reg_move(I3, O3, ctx); + emit_reg_move(I4, O4, ctx); /* If you add anything here, adjust BPF_TAILCALL_PROLOGUE_SKIP above. */ } -- cgit v1.2.3 From a87c99e61236ba8ca962ce97a19fab5ebd588d35 Mon Sep 17 00:00:00 2001 From: Luiz Capitulino Date: Fri, 23 Nov 2018 12:02:14 -0500 Subject: KVM: VMX: re-add ple_gap module parameter Apparently, the ple_gap parameter was accidentally removed by commit c8e88717cfc6b36bedea22368d97667446318291. Add it back. Signed-off-by: Luiz Capitulino Cc: stable@vger.kernel.org Fixes: c8e88717cfc6b36bedea22368d97667446318291 Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4555077d69ce..be6f13f1c25f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -174,6 +174,7 @@ module_param_named(preemption_timer, enable_preemption_timer, bool, S_IRUGO); * refer SDM volume 3b section 21.6.13 & 22.1.3. */ static unsigned int ple_gap = KVM_DEFAULT_PLE_GAP; +module_param(ple_gap, uint, 0444); static unsigned int ple_window = KVM_VMX_DEFAULT_PLE_WINDOW; module_param(ple_window, uint, 0444); -- cgit v1.2.3 From 38ab012f109caf10f471db1adf284e620dd8d701 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 20 Nov 2018 09:39:30 +0800 Subject: KVM: LAPIC: Fix pv ipis use-before-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 0000000000000014 PGD 800000040410c067 P4D 800000040410c067 PUD 40410d067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 3 PID: 2567 Comm: poc Tainted: G OE 4.19.0-rc5 #16 RIP: 0010:kvm_pv_send_ipi+0x94/0x350 [kvm] Call Trace: kvm_emulate_hypercall+0x3cc/0x700 [kvm] handle_vmcall+0xe/0x10 [kvm_intel] vmx_handle_exit+0xc1/0x11b0 [kvm_intel] vcpu_enter_guest+0x9fb/0x1910 [kvm] kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm] kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x83/0x6e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The reason is that the apic map has not yet been initialized, the testcase triggers pv_send_ipi interface by vmcall which results in kvm->arch.apic_map is dereferenced. This patch fixes it by checking whether or not apic map is NULL and bailing out immediately if that is the case. Fixes: 4180bf1b65 (KVM: X86: Implement "send IPI" hypercall) Reported-by: Wei Wu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wei Wu Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 89db20f8cb70..02f2291dcf7e 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -576,6 +576,11 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low, rcu_read_lock(); map = rcu_dereference(kvm->arch.apic_map); + if (unlikely(!map)) { + count = -EOPNOTSUPP; + goto out; + } + if (min > map->max_apic_id) goto out; /* Bits above cluster_size are masked in the caller. */ -- cgit v1.2.3 From e97f852fd4561e77721bb9a4e0ea9d98305b1e93 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Tue, 20 Nov 2018 16:34:18 +0800 Subject: KVM: X86: Fix scan ioapic use-before-initialization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: BUG: unable to handle kernel NULL pointer dereference at 00000000000001c8 PGD 80000003ec4da067 P4D 80000003ec4da067 PUD 3f7bfa067 PMD 0 Oops: 0000 [#1] PREEMPT SMP PTI CPU: 7 PID: 5059 Comm: debug Tainted: G OE 4.19.0-rc5 #16 RIP: 0010:__lock_acquire+0x1a6/0x1990 Call Trace: lock_acquire+0xdb/0x210 _raw_spin_lock+0x38/0x70 kvm_ioapic_scan_entry+0x3e/0x110 [kvm] vcpu_enter_guest+0x167e/0x1910 [kvm] kvm_arch_vcpu_ioctl_run+0x35c/0x610 [kvm] kvm_vcpu_ioctl+0x3e9/0x6d0 [kvm] do_vfs_ioctl+0xa5/0x690 ksys_ioctl+0x6d/0x80 __x64_sys_ioctl+0x1a/0x20 do_syscall_64+0x83/0x6e0 entry_SYSCALL_64_after_hwframe+0x49/0xbe The reason is that the testcase writes hyperv synic HV_X64_MSR_SINT6 msr and triggers scan ioapic logic to load synic vectors into EOI exit bitmap. However, irqchip is not initialized by this simple testcase, ioapic/apic objects should not be accessed. This can be triggered by the following program: #define _GNU_SOURCE #include #include #include #include #include #include #include #include uint64_t r[3] = {0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff}; int main(void) { syscall(__NR_mmap, 0x20000000, 0x1000000, 3, 0x32, -1, 0); long res = 0; memcpy((void*)0x20000040, "/dev/kvm", 9); res = syscall(__NR_openat, 0xffffffffffffff9c, 0x20000040, 0, 0); if (res != -1) r[0] = res; res = syscall(__NR_ioctl, r[0], 0xae01, 0); if (res != -1) r[1] = res; res = syscall(__NR_ioctl, r[1], 0xae41, 0); if (res != -1) r[2] = res; memcpy( (void*)0x20000080, "\x01\x00\x00\x00\x00\x5b\x61\xbb\x96\x00\x00\x40\x00\x00\x00\x00\x01\x00" "\x08\x00\x00\x00\x00\x00\x0b\x77\xd1\x78\x4d\xd8\x3a\xed\xb1\x5c\x2e\x43" "\xaa\x43\x39\xd6\xff\xf5\xf0\xa8\x98\xf2\x3e\x37\x29\x89\xde\x88\xc6\x33" "\xfc\x2a\xdb\xb7\xe1\x4c\xac\x28\x61\x7b\x9c\xa9\xbc\x0d\xa0\x63\xfe\xfe" "\xe8\x75\xde\xdd\x19\x38\xdc\x34\xf5\xec\x05\xfd\xeb\x5d\xed\x2e\xaf\x22" "\xfa\xab\xb7\xe4\x42\x67\xd0\xaf\x06\x1c\x6a\x35\x67\x10\x55\xcb", 106); syscall(__NR_ioctl, r[2], 0x4008ae89, 0x20000080); syscall(__NR_ioctl, r[2], 0xae80, 0); return 0; } This patch fixes it by bailing out scan ioapic if ioapic is not initialized in kernel. Reported-by: Wei Wu Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Wei Wu Signed-off-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5cd5647120f2..64cae03b2c20 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7455,7 +7455,8 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) else { if (vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); + if (ioapic_in_kernel(vcpu->kvm)) + kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } if (is_guest_mode(vcpu)) -- cgit v1.2.3 From 30510387a5e45bfcf8190e03ec7aa15b295828e2 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Mon, 12 Nov 2018 12:23:14 +0000 Subject: svm: Add mutex_lock to protect apic_access_page_done on AMD systems There is a race condition when accessing kvm->arch.apic_access_page_done. Due to it, x86_set_memory_region will fail when creating the second vcpu for a svm guest. Add a mutex_lock to serialize the accesses to apic_access_page_done. This lock is also used by vmx for the same purpose. Signed-off-by: Wei Wang Signed-off-by: Amadeusz Juskowiak Signed-off-by: Julian Stecklina Signed-off-by: Suravee Suthikulpanit Reviewed-by: Joerg Roedel Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 0e21ccc46792..033ec01661b9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1664,20 +1664,23 @@ static u64 *avic_get_physical_id_entry(struct kvm_vcpu *vcpu, static int avic_init_access_page(struct kvm_vcpu *vcpu) { struct kvm *kvm = vcpu->kvm; - int ret; + int ret = 0; + mutex_lock(&kvm->slots_lock); if (kvm->arch.apic_access_page_done) - return 0; + goto out; - ret = x86_set_memory_region(kvm, - APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, - APIC_DEFAULT_PHYS_BASE, - PAGE_SIZE); + ret = __x86_set_memory_region(kvm, + APIC_ACCESS_PAGE_PRIVATE_MEMSLOT, + APIC_DEFAULT_PHYS_BASE, + PAGE_SIZE); if (ret) - return ret; + goto out; kvm->arch.apic_access_page_done = true; - return 0; +out: + mutex_unlock(&kvm->slots_lock); + return ret; } static int avic_init_backing_page(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 7f9ad1dfa3c768d1116c2dbacd7a09f9a871534e Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Sat, 17 Nov 2018 14:05:06 +0200 Subject: KVM: nVMX: Fix kernel info-leak when enabling KVM_CAP_HYPERV_ENLIGHTENED_VMCS more than once Consider the case that userspace enables KVM_CAP_HYPERV_ENLIGHTENED_VMCS twice: 1) kvm_vcpu_ioctl_enable_cap() is called to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS which calls nested_enable_evmcs(). 2) nested_enable_evmcs() sets enlightened_vmcs_enabled to true and fills vmcs_version which is then copied to userspace. 3) kvm_vcpu_ioctl_enable_cap() is called again to enable KVM_CAP_HYPERV_ENLIGHTENED_VMCS which calls nested_enable_evmcs(). 4) This time nested_enable_evmcs() just returns 0 as enlightened_vmcs_enabled is already true. *Without filling vmcs_version*. 5) kvm_vcpu_ioctl_enable_cap() continues as usual and copies *uninitialized* vmcs_version to userspace which leads to kernel info-leak. Fix this issue by simply changing nested_enable_evmcs() to always fill vmcs_version output argument. Even when enlightened_vmcs_enabled is already set to true. Note that SVM's nested_enable_evmcs() should not be modified because it always returns a non-zero value (-ENODEV) which results in kvm_vcpu_ioctl_enable_cap() skipping the copy of vmcs_version to userspace (as it should). Fixes: 57b119da3594 ("KVM: nVMX: add KVM_CAP_HYPERV_ENLIGHTENED_VMCS capability") Reported-by: syzbot+cfbc368e283d381f8cef@syzkaller.appspotmail.com Reviewed-by: Krish Sadhukhan Reviewed-by: Vitaly Kuznetsov Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index be6f13f1c25f..065f1df74000 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1611,12 +1611,6 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, { struct vcpu_vmx *vmx = to_vmx(vcpu); - /* We don't support disabling the feature for simplicity. */ - if (vmx->nested.enlightened_vmcs_enabled) - return 0; - - vmx->nested.enlightened_vmcs_enabled = true; - /* * vmcs_version represents the range of supported Enlightened VMCS * versions: lower 8 bits is the minimal version, higher 8 bits is the @@ -1626,6 +1620,12 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu, if (vmcs_version) *vmcs_version = (KVM_EVMCS_VERSION << 8) | 1; + /* We don't support disabling the feature for simplicity. */ + if (vmx->nested.enlightened_vmcs_enabled) + return 0; + + vmx->nested.enlightened_vmcs_enabled = true; + vmx->nested.msrs.pinbased_ctls_high &= ~EVMCS1_UNSUPPORTED_PINCTRL; vmx->nested.msrs.entry_ctls_high &= ~EVMCS1_UNSUPPORTED_VMENTRY_CTRL; vmx->nested.msrs.exit_ctls_high &= ~EVMCS1_UNSUPPORTED_VMEXIT_CTRL; -- cgit v1.2.3 From bcbfbd8ec21096027f1ee13ce6c185e8175166f6 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 8 Nov 2018 00:43:06 +0200 Subject: KVM: x86: Fix kernel info-leak in KVM_HC_CLOCK_PAIRING hypercall kvm_pv_clock_pairing() allocates local var "struct kvm_clock_pairing clock_pairing" on stack and initializes all it's fields besides padding (clock_pairing.pad[]). Because clock_pairing var is written completely (including padding) to guest memory, failure to init struct padding results in kernel info-leak. Fix the issue by making sure to also init the padding with zeroes. Fixes: 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall") Reported-by: syzbot+a8ef68d71211ba264f56@syzkaller.appspotmail.com Reviewed-by: Mark Kanda Signed-off-by: Liran Alon Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64cae03b2c20..7e4be1f2f253 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6918,6 +6918,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, clock_pairing.nsec = ts.tv_nsec; clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); clock_pairing.flags = 0; + memset(&clock_pairing.pad, 0, sizeof(clock_pairing.pad)); ret = 0; if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, -- cgit v1.2.3 From f48b4711dd6e1cf282f9dfd159c14a305909c97c Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Tue, 20 Nov 2018 18:03:25 +0200 Subject: KVM: VMX: Update shared MSRs to be saved/restored on MSR_EFER.LMA changes When guest transitions from/to long-mode by modifying MSR_EFER.LMA, the list of shared MSRs to be saved/restored on guest<->host transitions is updated (See vmx_set_efer() call to setup_msrs()). On every entry to guest, vcpu_enter_guest() calls vmx_prepare_switch_to_guest(). This function should also take care of setting the shared MSRs to be saved/restored. However, the function does nothing in case we are already running with loaded guest state (vmx->loaded_cpu_state != NULL). This means that even when guest modifies MSR_EFER.LMA which results in updating the list of shared MSRs, it isn't being taken into account by vmx_prepare_switch_to_guest() because it happens while we are running with loaded guest state. To fix above mentioned issue, add a flag to mark that the list of shared MSRs has been updated and modify vmx_prepare_switch_to_guest() to set shared MSRs when running with host state *OR* list of shared MSRs has been updated. Note that this issue was mistakenly introduced by commit 678e315e78a7 ("KVM: vmx: add dedicated utility to access guest's kernel_gs_base") because previously vmx_set_efer() always called vmx_load_host_state() which resulted in vmx_prepare_switch_to_guest() to set shared MSRs. Fixes: 678e315e78a7 ("KVM: vmx: add dedicated utility to access guest's kernel_gs_base") Reported-by: Eyal Moscovici Reviewed-by: Mihai Carabas Reviewed-by: Liam Merwick Reviewed-by: Jim Mattson Signed-off-by: Liran Alon Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 065f1df74000..d09d67310012 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -985,6 +985,7 @@ struct vcpu_vmx { struct shared_msr_entry *guest_msrs; int nmsrs; int save_nmsrs; + bool guest_msrs_dirty; unsigned long host_idt_base; #ifdef CONFIG_X86_64 u64 msr_host_kernel_gs_base; @@ -2898,6 +2899,20 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmx->req_immediate_exit = false; + /* + * Note that guest MSRs to be saved/restored can also be changed + * when guest state is loaded. This happens when guest transitions + * to/from long-mode by setting MSR_EFER.LMA. + */ + if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) { + vmx->guest_msrs_dirty = false; + for (i = 0; i < vmx->save_nmsrs; ++i) + kvm_set_shared_msr(vmx->guest_msrs[i].index, + vmx->guest_msrs[i].data, + vmx->guest_msrs[i].mask); + + } + if (vmx->loaded_cpu_state) return; @@ -2958,11 +2973,6 @@ static void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu) vmcs_writel(HOST_GS_BASE, gs_base); host_state->gs_base = gs_base; } - - for (i = 0; i < vmx->save_nmsrs; ++i) - kvm_set_shared_msr(vmx->guest_msrs[i].index, - vmx->guest_msrs[i].data, - vmx->guest_msrs[i].mask); } static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx) @@ -3437,6 +3447,7 @@ static void setup_msrs(struct vcpu_vmx *vmx) move_msr_up(vmx, index, save_nmsrs++); vmx->save_nmsrs = save_nmsrs; + vmx->guest_msrs_dirty = true; if (cpu_has_vmx_msr_bitmap()) vmx_update_msr_bitmap(&vmx->vcpu); -- cgit v1.2.3 From 354cb410d87314e2eda344feea84809e4261570a Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 16:48:36 +0800 Subject: KVM: x86: fix empty-body warnings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We get the following warnings about empty statements when building with 'W=1': arch/x86/kvm/lapic.c:632:53: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1907:42: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1936:65: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] arch/x86/kvm/lapic.c:1975:44: warning: suggest braces around empty body in an ‘if’ statement [-Wempty-body] Rework the debug helper macro to get rid of these warnings. Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 02f2291dcf7e..c4533d05c214 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -55,7 +55,7 @@ #define PRIo64 "o" /* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */ -#define apic_debug(fmt, arg...) +#define apic_debug(fmt, arg...) do {} while (0) /* 14 is the version for Xeon and Pentium 8.4.8*/ #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) -- cgit v1.2.3 From 1e4329ee2c52692ea42cc677fb2133519718b34a Mon Sep 17 00:00:00 2001 From: Yi Wang Date: Thu, 8 Nov 2018 11:22:21 +0800 Subject: x86/kvm/vmx: fix old-style function declaration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The inline keyword which is not at the beginning of the function declaration may trigger the following build warnings, so let's fix it: arch/x86/kvm/vmx.c:1309:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5947:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:5985:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] arch/x86/kvm/vmx.c:6023:1: warning: ‘inline’ is not at beginning of declaration [-Wold-style-declaration] Signed-off-by: Yi Wang Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d09d67310012..5f43fcfc225b 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1308,7 +1308,7 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked); static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12, u16 error_code); static void vmx_update_msr_bitmap(struct kvm_vcpu *vcpu); -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type); static DEFINE_PER_CPU(struct vmcs *, vmxarea); @@ -5956,7 +5956,7 @@ static void free_vpid(int vpid) spin_unlock(&vmx_vpid_lock); } -static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_disable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -5994,7 +5994,7 @@ static void __always_inline vmx_disable_intercept_for_msr(unsigned long *msr_bit } } -static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_enable_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type) { int f = sizeof(unsigned long); @@ -6032,7 +6032,7 @@ static void __always_inline vmx_enable_intercept_for_msr(unsigned long *msr_bitm } } -static void __always_inline vmx_set_intercept_for_msr(unsigned long *msr_bitmap, +static __always_inline void vmx_set_intercept_for_msr(unsigned long *msr_bitmap, u32 msr, int type, bool value) { if (value) -- cgit v1.2.3 From 326e742533bf0a23f0127d8ea62fb558ba665f08 Mon Sep 17 00:00:00 2001 From: Leonid Shatz Date: Tue, 6 Nov 2018 12:14:25 +0200 Subject: KVM: nVMX/nSVM: Fix bug which sets vcpu->arch.tsc_offset to L1 tsc_offset Since commit e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest"), vcpu->arch.tsc_offset meaning was changed to always reflect the tsc_offset value set on active VMCS. Regardless if vCPU is currently running L1 or L2. However, above mentioned commit failed to also change kvm_vcpu_write_tsc_offset() to set vcpu->arch.tsc_offset correctly. This is because vmx_write_tsc_offset() could set the tsc_offset value in active VMCS to given offset parameter *plus vmcs12->tsc_offset*. However, kvm_vcpu_write_tsc_offset() just sets vcpu->arch.tsc_offset to given offset parameter. Without taking into account the possible addition of vmcs12->tsc_offset. (Same is true for SVM case). Fix this issue by changing kvm_x86_ops->write_tsc_offset() to return actually set tsc_offset in active VMCS and modify kvm_vcpu_write_tsc_offset() to set returned value in vcpu->arch.tsc_offset. In addition, rename write_tsc_offset() callback to write_l1_tsc_offset() to make it clear that it is meant to set L1 TSC offset. Fixes: e79f245ddec1 ("X86/KVM: Properly update 'tsc_offset' to represent the running guest") Reviewed-by: Liran Alon Reviewed-by: Mihai Carabas Reviewed-by: Krish Sadhukhan Signed-off-by: Leonid Shatz Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 ++- arch/x86/kvm/svm.c | 5 +++-- arch/x86/kvm/vmx.c | 21 +++++++++------------ arch/x86/kvm/x86.c | 6 +++--- 4 files changed, 17 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 55e51ff7e421..fbda5a917c5b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1094,7 +1094,8 @@ struct kvm_x86_ops { bool (*has_wbinvd_exit)(void); u64 (*read_l1_tsc_offset)(struct kvm_vcpu *vcpu); - void (*write_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); + /* Returns actual tsc_offset set in active VMCS */ + u64 (*write_l1_tsc_offset)(struct kvm_vcpu *vcpu, u64 offset); void (*get_exit_info)(struct kvm_vcpu *vcpu, u64 *info1, u64 *info2); diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 033ec01661b9..a24733aade4c 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1446,7 +1446,7 @@ static u64 svm_read_l1_tsc_offset(struct kvm_vcpu *vcpu) return vcpu->arch.tsc_offset; } -static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { struct vcpu_svm *svm = to_svm(vcpu); u64 g_tsc_offset = 0; @@ -1464,6 +1464,7 @@ static void svm_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) svm->vmcb->control.tsc_offset = offset + g_tsc_offset; mark_dirty(svm->vmcb, VMCB_INTERCEPTS); + return svm->vmcb->control.tsc_offset; } static void avic_init_vmcb(struct vcpu_svm *svm) @@ -7152,7 +7153,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .has_wbinvd_exit = svm_has_wbinvd_exit, .read_l1_tsc_offset = svm_read_l1_tsc_offset, - .write_tsc_offset = svm_write_tsc_offset, + .write_l1_tsc_offset = svm_write_l1_tsc_offset, .set_tdp_cr3 = set_tdp_cr3, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5f43fcfc225b..764c23dc444f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3464,11 +3464,9 @@ static u64 vmx_read_l1_tsc_offset(struct kvm_vcpu *vcpu) return vcpu->arch.tsc_offset; } -/* - * writes 'offset' into guest's timestamp counter offset register - */ -static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) +static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { + u64 active_offset = offset; if (is_guest_mode(vcpu)) { /* * We're here if L1 chose not to trap WRMSR to TSC. According @@ -3476,17 +3474,16 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) * set for L2 remains unchanged, and still needs to be added * to the newly set TSC to get L2's TSC. */ - struct vmcs12 *vmcs12; - /* recalculate vmcs02.TSC_OFFSET: */ - vmcs12 = get_vmcs12(vcpu); - vmcs_write64(TSC_OFFSET, offset + - (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ? - vmcs12->tsc_offset : 0)); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING)) + active_offset += vmcs12->tsc_offset; } else { trace_kvm_write_tsc_offset(vcpu->vcpu_id, vmcs_read64(TSC_OFFSET), offset); - vmcs_write64(TSC_OFFSET, offset); } + + vmcs_write64(TSC_OFFSET, active_offset); + return active_offset; } /* @@ -15074,7 +15071,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .has_wbinvd_exit = cpu_has_vmx_wbinvd_exit, .read_l1_tsc_offset = vmx_read_l1_tsc_offset, - .write_tsc_offset = vmx_write_tsc_offset, + .write_l1_tsc_offset = vmx_write_l1_tsc_offset, .set_tdp_cr3 = vmx_set_cr3, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 7e4be1f2f253..d02937760c3b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1665,8 +1665,7 @@ EXPORT_SYMBOL_GPL(kvm_read_l1_tsc); static void kvm_vcpu_write_tsc_offset(struct kvm_vcpu *vcpu, u64 offset) { - kvm_x86_ops->write_tsc_offset(vcpu, offset); - vcpu->arch.tsc_offset = offset; + vcpu->arch.tsc_offset = kvm_x86_ops->write_l1_tsc_offset(vcpu, offset); } static inline bool kvm_check_tsc_unstable(void) @@ -1794,7 +1793,8 @@ EXPORT_SYMBOL_GPL(kvm_write_tsc); static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu, s64 adjustment) { - kvm_vcpu_write_tsc_offset(vcpu, vcpu->arch.tsc_offset + adjustment); + u64 tsc_offset = kvm_x86_ops->read_l1_tsc_offset(vcpu); + kvm_vcpu_write_tsc_offset(vcpu, tsc_offset + adjustment); } static inline void adjust_tsc_offset_host(struct kvm_vcpu *vcpu, s64 adjustment) -- cgit v1.2.3 From 72aeb60c52bf74a0eeec77d6b41ce40145697d76 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Thu, 1 Nov 2018 10:57:39 +0200 Subject: KVM: nVMX: Verify eVMCS revision id match supported eVMCS version on eVMCS VMPTRLD According to TLFS section 16.11.2 Enlightened VMCS, the first u32 field of eVMCS should specify eVMCS VersionNumber. This version should be in the range of supported eVMCS versions exposed to guest via CPUID.0x4000000A.EAX[0:15]. The range which KVM expose to guest in this CPUID field should be the same as the value returned in vmcs_version by nested_enable_evmcs(). According to the above, eVMCS VMPTRLD should verify that version specified in given eVMCS is in the supported range. However, current code mistakenly verfies this field against VMCS12_REVISION. One can also see that when KVM use eVMCS, it makes sure that alloc_vmcs_cpu() sets allocated eVMCS revision_id to KVM_EVMCS_VERSION. Obvious fix should just change eVMCS VMPTRLD to verify first u32 field of eVMCS is equal to KVM_EVMCS_VERSION. However, it turns out that Microsoft Hyper-V fails to comply to their own invented interface: When Hyper-V use eVMCS, it just sets first u32 field of eVMCS to revision_id specified in MSR_IA32_VMX_BASIC (In our case: VMCS12_REVISION). Instead of used eVMCS version number which is one of the supported versions specified in CPUID.0x4000000A.EAX[0:15]. To overcome Hyper-V bug, we accept either a supported eVMCS version or VMCS12_REVISION as valid values for first u32 field of eVMCS. Cc: Vitaly Kuznetsov Reviewed-by: Nikita Leshenko Reviewed-by: Mark Kanda Signed-off-by: Liran Alon Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 764c23dc444f..0ffd8b2dbfe2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9378,7 +9378,30 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, vmx->nested.hv_evmcs = kmap(vmx->nested.hv_evmcs_page); - if (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION) { + /* + * Currently, KVM only supports eVMCS version 1 + * (== KVM_EVMCS_VERSION) and thus we expect guest to set this + * value to first u32 field of eVMCS which should specify eVMCS + * VersionNumber. + * + * Guest should be aware of supported eVMCS versions by host by + * examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is + * expected to set this CPUID leaf according to the value + * returned in vmcs_version from nested_enable_evmcs(). + * + * However, it turns out that Microsoft Hyper-V fails to comply + * to their own invented interface: When Hyper-V use eVMCS, it + * just sets first u32 field of eVMCS to revision_id specified + * in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number + * which is one of the supported versions specified in + * CPUID.0x4000000A.EAX[0:15]. + * + * To overcome Hyper-V bug, we accept here either a supported + * eVMCS version or VMCS12 revision_id as valid values for first + * u32 field of eVMCS. + */ + if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) && + (vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) { nested_release_evmcs(vcpu); return 0; } -- cgit v1.2.3 From 52ad7eb3d668867f9ee2e34d715cfb4860d36a93 Mon Sep 17 00:00:00 2001 From: Liran Alon Date: Tue, 13 Nov 2018 17:44:46 +0200 Subject: KVM: nVMX: vmcs12 revision_id is always VMCS12_REVISION even when copied from eVMCS vmcs12 represents the per-CPU cache of L1 active vmcs12. This cache can be loaded by one of the following: 1) Guest making a vmcs12 active by exeucting VMPTRLD 2) Guest specifying eVMCS in VP assist page and executing VMLAUNCH/VMRESUME. Either way, vmcs12 should have revision_id of VMCS12_REVISION. Which is not equal to eVMCS revision_id which specifies used VersionNumber of eVMCS struct (e.g. KVM_EVMCS_VERSION). Specifically, this causes an issue in restoring a nested VM state because vmx_set_nested_state() verifies that vmcs12->revision_id is equal to VMCS12_REVISION which was not true in case vmcs12 was populated from an eVMCS by vmx_get_nested_state() which calls copy_enlightened_to_vmcs12(). Reviewed-by: Darren Kenny Signed-off-by: Liran Alon Reviewed-by: Vitaly Kuznetsov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0ffd8b2dbfe2..02edd9960e9d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8673,8 +8673,6 @@ static int copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx) struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12; struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs; - vmcs12->hdr.revision_id = evmcs->revision_id; - /* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */ vmcs12->tpr_threshold = evmcs->tpr_threshold; vmcs12->guest_rip = evmcs->guest_rip; @@ -9422,9 +9420,11 @@ static int nested_vmx_handle_enlightened_vmptrld(struct kvm_vcpu *vcpu, * present in struct hv_enlightened_vmcs, ...). Make sure there * are no leftovers. */ - if (from_launch) - memset(vmx->nested.cached_vmcs12, 0, - sizeof(*vmx->nested.cached_vmcs12)); + if (from_launch) { + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + memset(vmcs12, 0, sizeof(*vmcs12)); + vmcs12->hdr.revision_id = VMCS12_REVISION; + } } return 1; -- cgit v1.2.3 From 0e0fee5c539b61fdd098332e0e2cc375d9073706 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 31 Oct 2018 14:53:57 -0700 Subject: kvm: mmu: Fix race in emulated page table writes When a guest page table is updated via an emulated write, kvm_mmu_pte_write() is called to update the shadow PTE using the just written guest PTE value. But if two emulated guest PTE writes happened concurrently, it is possible that the guest PTE and the shadow PTE end up being out of sync. Emulated writes do not mark the shadow page as unsync-ed, so this inconsistency will not be resolved even by a guest TLB flush (unless the page was marked as unsync-ed at some other point). This is fixed by re-reading the current value of the guest PTE after the MMU lock has been acquired instead of just using the value that was written prior to calling kvm_mmu_pte_write(). Signed-off-by: Junaid Shahid Reviewed-by: Wanpeng Li Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 27 +++++++++------------------ 1 file changed, 9 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cf5f572f2305..7c03c0f35444 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -5074,9 +5074,9 @@ static bool need_remote_flush(u64 old, u64 new) } static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, - const u8 *new, int *bytes) + int *bytes) { - u64 gentry; + u64 gentry = 0; int r; /* @@ -5088,22 +5088,12 @@ static u64 mmu_pte_write_fetch_gpte(struct kvm_vcpu *vcpu, gpa_t *gpa, /* Handle a 32-bit guest writing two halves of a 64-bit gpte */ *gpa &= ~(gpa_t)7; *bytes = 8; - r = kvm_vcpu_read_guest(vcpu, *gpa, &gentry, 8); - if (r) - gentry = 0; - new = (const u8 *)&gentry; } - switch (*bytes) { - case 4: - gentry = *(const u32 *)new; - break; - case 8: - gentry = *(const u64 *)new; - break; - default: - gentry = 0; - break; + if (*bytes == 4 || *bytes == 8) { + r = kvm_vcpu_read_guest_atomic(vcpu, *gpa, &gentry, *bytes); + if (r) + gentry = 0; } return gentry; @@ -5207,8 +5197,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, pgprintk("%s: gpa %llx bytes %d\n", __func__, gpa, bytes); - gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, new, &bytes); - /* * No need to care whether allocation memory is successful * or not since pte prefetch is skiped if it does not have @@ -5217,6 +5205,9 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, mmu_topup_memory_caches(vcpu); spin_lock(&vcpu->kvm->mmu_lock); + + gentry = mmu_pte_write_fetch_gpte(vcpu, &gpa, &bytes); + ++vcpu->kvm->stat.mmu_pte_write; kvm_mmu_audit(vcpu, AUDIT_PRE_PTE_WRITE); -- cgit v1.2.3 From fd65d3142f734bc4376053c8d75670041903134d Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 22 May 2018 09:54:20 -0700 Subject: kvm: svm: Ensure an IBPB on all affected CPUs when freeing a vmcb Previously, we only called indirect_branch_prediction_barrier on the logical CPU that freed a vmcb. This function should be called on all logical CPUs that last loaded the vmcb in question. Fixes: 15d45071523d ("KVM/x86: Add IBPB support") Reported-by: Neel Natu Signed-off-by: Jim Mattson Reviewed-by: Konrad Rzeszutek Wilk Cc: stable@vger.kernel.org Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a24733aade4c..cc6467b35a85 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2193,21 +2193,31 @@ out: return ERR_PTR(err); } +static void svm_clear_current_vmcb(struct vmcb *vmcb) +{ + int i; + + for_each_online_cpu(i) + cmpxchg(&per_cpu(svm_data, i)->current_vmcb, vmcb, NULL); +} + static void svm_free_vcpu(struct kvm_vcpu *vcpu) { struct vcpu_svm *svm = to_svm(vcpu); + /* + * The vmcb page can be recycled, causing a false negative in + * svm_vcpu_load(). So, ensure that no logical CPU has this + * vmcb page recorded as its current vmcb. + */ + svm_clear_current_vmcb(svm->vmcb); + __free_page(pfn_to_page(__sme_clr(svm->vmcb_pa) >> PAGE_SHIFT)); __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); __free_page(virt_to_page(svm->nested.hsave)); __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); kvm_vcpu_uninit(vcpu); kmem_cache_free(kvm_vcpu_cache, svm); - /* - * The vmcb page can be recycled, causing a false negative in - * svm_vcpu_load(). So do a full IBPB now. - */ - indirect_branch_prediction_barrier(); } static void svm_vcpu_load(struct kvm_vcpu *vcpu, int cpu) -- cgit v1.2.3 From 814cedbc0b78d75e335c96da9b9391142eab5600 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Tue, 27 Nov 2018 14:04:04 +0100 Subject: s390/mm: correct pgtable_bytes on page table downgrade The downgrade of a page table from 3 levels to 2 levels for a 31-bit compat process removes a pmd table which has to be counted against pgtable_bytes. Signed-off-by: Martin Schwidefsky --- arch/s390/mm/pgalloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 814f26520aa2..6791562779ee 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -131,6 +131,7 @@ void crst_table_downgrade(struct mm_struct *mm) } pgd = mm->pgd; + mm_dec_nr_pmds(mm); mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN); mm->context.asce_limit = _REGION3_SIZE; mm->context.asce = __pa(mm->pgd) | _ASCE_TABLE_LENGTH | -- cgit v1.2.3 From ac26d1f74cfc19c8dc9d533b5f20e99dbee3d9bd Mon Sep 17 00:00:00 2001 From: Jann Horn Date: Tue, 27 Nov 2018 14:32:00 +0100 Subject: x86/fpu: Use the correct exception table macro in the XSTATE_OP wrapper Commit 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups") incorrectly replaced the fixup entry for XSTATE_OP with a user-#PF-only fixup. XRSTOR can also raise #GP if the xstate content is invalid, and _ASM_EXTABLE_UA doesn't expect that. Change this fixup back to _ASM_EXTABLE so that #GP gets fixed up. Fixes: 75045f77f7a7 ("x86/extable: Introduce _ASM_EXTABLE_UA for uaccess fixups") Reported-by: Sebastian Andrzej Siewior Signed-off-by: Jann Horn Signed-off-by: Borislav Petkov Acked-by: Sebastian Andrzej Siewior Tested-by: Sebastian Andrzej Siewior Cc: "H. Peter Anvin" Cc: "Naveen N. Rao" Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Thomas Gleixner Cc: kernel-hardening@lists.openwall.com Cc: x86-ml Link: https://lkml.kernel.org/r/20181126165957.xhsyu2dhyy45mrjo@linutronix.de Link: https://lkml.kernel.org/r/20181127133200.38322-1-jannh@google.com --- arch/x86/include/asm/fpu/internal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index 5f7290e6e954..69dcdf195b61 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -226,7 +226,7 @@ static inline void copy_fxregs_to_kernel(struct fpu *fpu) "3: movl $-2,%[err]\n\t" \ "jmp 2b\n\t" \ ".popsection\n\t" \ - _ASM_EXTABLE_UA(1b, 3b) \ + _ASM_EXTABLE(1b, 3b) \ : [err] "=r" (err) \ : "D" (st), "m" (*st), "a" (lmask), "d" (hmask) \ : "memory") -- cgit v1.2.3 From 07f7175b43827640d1e69c9eded89aa089a234b4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:14:10 -0500 Subject: x86/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have x86 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: x86@kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/x86/kernel/ftrace.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 01ebcb6f263e..7ee8067cbf45 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -994,7 +994,6 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, { unsigned long old; int faulted; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -1046,19 +1045,7 @@ void prepare_ftrace_return(unsigned long self_addr, unsigned long *parent, return; } - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, parent)) *parent = old; - return; - } - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent) == -EBUSY) { - *parent = old; - return; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From f1f5b14afd7cce39e6a9b25c685e1ea34c231096 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:19:26 -0500 Subject: ARM: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have ARM use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Russell King Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/arm/kernel/ftrace.c | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/arm/kernel/ftrace.c b/arch/arm/kernel/ftrace.c index 0142fcfcc3d3..bda949fd84e8 100644 --- a/arch/arm/kernel/ftrace.c +++ b/arch/arm/kernel/ftrace.c @@ -183,9 +183,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -193,21 +191,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, old = *parent; *parent = return_hooker; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { + if (function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = old; - return; - } - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) { - *parent = old; - return; - } } #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 01e0ab2c4ff12358f15a856fd1a7bbea0670972b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:21:51 -0500 Subject: arm64: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have arm64 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Acked-by: Will Deacon Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/arm64/kernel/ftrace.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 50986e388d2b..57e962290df3 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -216,8 +216,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; @@ -229,18 +227,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - if (err == -EBUSY) - return; - else + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) *parent = return_hooker; } -- cgit v1.2.3 From 556763e5a500d71879d632867b75826551acd49c Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:23:30 -0500 Subject: microblaze: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have microblaze use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Michal Simek Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/microblaze/kernel/ftrace.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/microblaze/kernel/ftrace.c b/arch/microblaze/kernel/ftrace.c index d57563c58a26..224eea40e1ee 100644 --- a/arch/microblaze/kernel/ftrace.c +++ b/arch/microblaze/kernel/ftrace.c @@ -22,8 +22,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long) &return_to_handler; @@ -63,18 +62,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) *parent = old; - return; - } - - trace.func = self_addr; - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - *parent = old; - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From 8712b27c5723c26400a2b350faf1d6d9fd7ffaad Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:25:18 -0500 Subject: MIPS: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have MIPS use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Ralf Baechle Cc: Paul Burton Cc: James Hogan Cc: linux-mips@linux-mips.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/mips/kernel/ftrace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/mips/kernel/ftrace.c b/arch/mips/kernel/ftrace.c index 7f3dfdbc3657..b122cbb4aad1 100644 --- a/arch/mips/kernel/ftrace.c +++ b/arch/mips/kernel/ftrace.c @@ -322,7 +322,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, unsigned long fp) { unsigned long old_parent_ra; - struct ftrace_graph_ent trace; unsigned long return_hooker = (unsigned long) &return_to_handler; int faulted, insns; @@ -369,12 +368,6 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, if (unlikely(faulted)) goto out; - if (ftrace_push_return_trace(old_parent_ra, self_ra, &trace.depth, fp, - NULL) == -EBUSY) { - *parent_ra_addr = old_parent_ra; - return; - } - /* * Get the recorded ip of the current mcount calling site in the * __mcount_loc section, which will be used to filter the function @@ -382,13 +375,10 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra, */ insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1; - trace.func = self_ra - (MCOUNT_INSN_SIZE * insns); + self_ra -= (MCOUNT_INSN_SIZE * insns); - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; + if (function_graph_enter(old_parent_ra, self_ra, fp, NULL)) *parent_ra_addr = old_parent_ra; - } return; out: ftrace_graph_stop(); -- cgit v1.2.3 From d48ebb24866edea2c35be02a878f25bc65529370 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:26:35 -0500 Subject: nds32: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have nds32 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Greentime Hu Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/nds32/kernel/ftrace.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/nds32/kernel/ftrace.c b/arch/nds32/kernel/ftrace.c index a0a9679ad5de..8a41372551ff 100644 --- a/arch/nds32/kernel/ftrace.c +++ b/arch/nds32/kernel/ftrace.c @@ -211,29 +211,15 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long)&return_to_handler; - struct ftrace_graph_ent trace; unsigned long old; - int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return; old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, NULL); - - if (err == -EBUSY) - return; - - *parent = return_hooker; + if (!function_graph_enter(old, self_addr, frame_pointer, NULL)) + *parent = return_hooker; } noinline void ftrace_graph_caller(void) -- cgit v1.2.3 From a87532c78d291265efadc4b20a8c7a70cd59ea29 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:27:43 -0500 Subject: parisc: function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have parisc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: linux-parisc@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/parisc/kernel/ftrace.c | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/parisc/kernel/ftrace.c b/arch/parisc/kernel/ftrace.c index 6fa8535d3cce..e46a4157a894 100644 --- a/arch/parisc/kernel/ftrace.c +++ b/arch/parisc/kernel/ftrace.c @@ -30,7 +30,6 @@ static void __hot prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - struct ftrace_graph_ent trace; extern int parisc_return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -41,19 +40,9 @@ static void __hot prepare_ftrace_return(unsigned long *parent, old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return; - - if (ftrace_push_return_trace(old, self_addr, &trace.depth, - 0, NULL) == -EBUSY) - return; - - /* activate parisc_return_to_handler() as return point */ - *parent = (unsigned long) &parisc_return_to_handler; + if (!function_graph_enter(old, self_addr, 0, NULL)) + /* activate parisc_return_to_handler() as return point */ + *parent = (unsigned long) &parisc_return_to_handler; } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From fe60522ec60082a1dd735691b82c64f65d4ad15e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:28:53 -0500 Subject: powerpc/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have powerpc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: linuxppc-dev@lists.ozlabs.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/powerpc/kernel/trace/ftrace.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 4bf051d3e21e..b65c8a34ad6e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -950,7 +950,6 @@ int ftrace_disable_ftrace_graph_caller(void) */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; unsigned long return_hooker; if (unlikely(ftrace_graph_is_dead())) @@ -961,18 +960,8 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) return_hooker = ppc_function_entry(return_to_handler); - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - goto out; - - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - - parent = return_hooker; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = return_hooker; out: return parent; } -- cgit v1.2.3 From e949b6db51dc172a35c962bc4414ca148315fe21 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:31:44 -0500 Subject: riscv/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have riscv use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Greentime Hu Cc: Alan Kao Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Palmer Dabbelt Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/riscv/kernel/ftrace.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/riscv/kernel/ftrace.c b/arch/riscv/kernel/ftrace.c index 1157b6b52d25..c433f6d3dd64 100644 --- a/arch/riscv/kernel/ftrace.c +++ b/arch/riscv/kernel/ftrace.c @@ -132,7 +132,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, { unsigned long return_hooker = (unsigned long)&return_to_handler; unsigned long old; - struct ftrace_graph_ent trace; int err; if (unlikely(atomic_read(¤t->tracing_graph_pause))) @@ -144,17 +143,8 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, */ old = *parent; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - if (!ftrace_graph_entry(&trace)) - return; - - err = ftrace_push_return_trace(old, self_addr, &trace.depth, - frame_pointer, parent); - if (err == -EBUSY) - return; - *parent = return_hooker; + if (function_graph_enter(old, self_addr, frame_pointer, parent)) + *parent = return_hooker; } #ifdef CONFIG_DYNAMIC_FTRACE -- cgit v1.2.3 From 18588e1487b19e45bd90bd55ec8d3a1d44f3257f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:33:17 -0500 Subject: s390/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have s390 use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Acked-by: Martin Schwidefsky Cc: Heiko Carstens Cc: Julian Wiedmann Cc: linux-s390@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/s390/kernel/ftrace.c | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 84be7f02d0c2..39b13d71a8fe 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -203,22 +203,13 @@ device_initcall(ftrace_plt_init); */ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip) { - struct ftrace_graph_ent trace; - if (unlikely(ftrace_graph_is_dead())) goto out; if (unlikely(atomic_read(¤t->tracing_graph_pause))) goto out; ip -= MCOUNT_INSN_SIZE; - trace.func = ip; - trace.depth = current->curr_ret_stack + 1; - /* Only trace if the calling function expects to. */ - if (!ftrace_graph_entry(&trace)) - goto out; - if (ftrace_push_return_trace(parent, ip, &trace.depth, 0, - NULL) == -EBUSY) - goto out; - parent = (unsigned long) return_to_handler; + if (!function_graph_enter(parent, ip, 0, NULL)) + parent = (unsigned long) return_to_handler; out: return parent; } -- cgit v1.2.3 From bc715ee4dbc5db462c59b9cfba92d31b3274fe3a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:35:37 -0500 Subject: sh/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have superh use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: Yoshinori Sato Cc: Rich Felker Cc: linux-sh@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/sh/kernel/ftrace.c | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/sh/kernel/ftrace.c b/arch/sh/kernel/ftrace.c index 96dd9f7da250..1b04270e5460 100644 --- a/arch/sh/kernel/ftrace.c +++ b/arch/sh/kernel/ftrace.c @@ -321,8 +321,7 @@ int ftrace_disable_ftrace_graph_caller(void) void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) { unsigned long old; - int faulted, err; - struct ftrace_graph_ent trace; + int faulted; unsigned long return_hooker = (unsigned long)&return_to_handler; if (unlikely(ftrace_graph_is_dead())) @@ -365,18 +364,7 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr) return; } - err = ftrace_push_return_trace(old, self_addr, &trace.depth, 0, NULL); - if (err == -EBUSY) { + if (function_graph_enter(old, self_addr, 0, NULL)) __raw_writel(old, parent); - return; - } - - trace.func = self_addr; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) { - current->curr_ret_stack--; - __raw_writel(old, parent); - } } #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ -- cgit v1.2.3 From 9c4bf5e0db164f330a2d3e128e9832661f69f0e9 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Sun, 18 Nov 2018 17:37:40 -0500 Subject: sparc/function_graph: Simplify with function_graph_enter() The function_graph_enter() function does the work of calling the function graph hook function and the management of the shadow stack, simplifying the work done in the architecture dependent prepare_ftrace_return(). Have sparc use the new code, and remove the shadow stack management as well as having to set up the trace structure. This is needed to prepare for a fix of a design bug on how the curr_ret_stack is used. Cc: "David S. Miller" Cc: sparclinux@vger.kernel.org Cc: stable@kernel.org Fixes: 03274a3ffb449 ("tracing/fgraph: Adjust fgraph depth before calling trace return callback") Reviewed-by: Masami Hiramatsu Signed-off-by: Steven Rostedt (VMware) --- arch/sparc/kernel/ftrace.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/ftrace.c b/arch/sparc/kernel/ftrace.c index 915dda4ae412..684b84ce397f 100644 --- a/arch/sparc/kernel/ftrace.c +++ b/arch/sparc/kernel/ftrace.c @@ -126,20 +126,11 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long frame_pointer) { unsigned long return_hooker = (unsigned long) &return_to_handler; - struct ftrace_graph_ent trace; if (unlikely(atomic_read(¤t->tracing_graph_pause))) return parent + 8UL; - trace.func = self_addr; - trace.depth = current->curr_ret_stack + 1; - - /* Only trace if the calling function expects to */ - if (!ftrace_graph_entry(&trace)) - return parent + 8UL; - - if (ftrace_push_return_trace(parent, self_addr, &trace.depth, - frame_pointer, NULL) == -EBUSY) + if (function_graph_enter(parent, self_addr, frame_pointer, NULL)) return parent + 8UL; return return_hooker; -- cgit v1.2.3 From 60c8144afc287ef09ce8c1230c6aa972659ba1bb Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 27 Nov 2018 14:41:37 +0100 Subject: x86/MCE/AMD: Fix the thresholding machinery initialization order MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the code sets up the thresholding interrupt vector and only then goes about initializing the thresholding banks. Which is wrong, because an early thresholding interrupt would cause a NULL pointer dereference when accessing those banks and prevent the machine from booting. Therefore, set the thresholding interrupt vector only *after* having initialized the banks successfully. Fixes: 18807ddb7f88 ("x86/mce/AMD: Reset Threshold Limit after logging error") Reported-by: Rafał Miłecki Reported-by: John Clemens Signed-off-by: Borislav Petkov Tested-by: Rafał Miłecki Tested-by: John Clemens Cc: Aravind Gopalakrishnan Cc: linux-edac@vger.kernel.org Cc: stable@vger.kernel.org Cc: Tony Luck Cc: x86@kernel.org Cc: Yazen Ghannam Link: https://lkml.kernel.org/r/20181127101700.2964-1-zajec5@gmail.com Link: https://bugzilla.kernel.org/show_bug.cgi?id=201291 --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index dd33c357548f..e12454e21b8a 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -56,7 +56,7 @@ /* Threshold LVT offset is at MSR0xC0000410[15:12] */ #define SMCA_THR_LVT_OFF 0xF000 -static bool thresholding_en; +static bool thresholding_irq_en; static const char * const th_names[] = { "load_store", @@ -534,9 +534,8 @@ prepare_threshold_block(unsigned int bank, unsigned int block, u32 addr, set_offset: offset = setup_APIC_mce_threshold(offset, new); - - if ((offset == new) && (mce_threshold_vector != amd_threshold_interrupt)) - mce_threshold_vector = amd_threshold_interrupt; + if (offset == new) + thresholding_irq_en = true; done: mce_threshold_block_init(&b, offset); @@ -1357,9 +1356,6 @@ int mce_threshold_remove_device(unsigned int cpu) { unsigned int bank; - if (!thresholding_en) - return 0; - for (bank = 0; bank < mca_cfg.banks; ++bank) { if (!(per_cpu(bank_map, cpu) & (1 << bank))) continue; @@ -1377,9 +1373,6 @@ int mce_threshold_create_device(unsigned int cpu) struct threshold_bank **bp; int err = 0; - if (!thresholding_en) - return 0; - bp = per_cpu(threshold_banks, cpu); if (bp) return 0; @@ -1408,9 +1401,6 @@ static __init int threshold_init_device(void) { unsigned lcpu = 0; - if (mce_threshold_vector == amd_threshold_interrupt) - thresholding_en = true; - /* to hit CPUs online before the notifier is up */ for_each_online_cpu(lcpu) { int err = mce_threshold_create_device(lcpu); @@ -1419,6 +1409,9 @@ static __init int threshold_init_device(void) return err; } + if (thresholding_irq_en) + mce_threshold_vector = amd_threshold_interrupt; + return 0; } /* -- cgit v1.2.3 From 4cd24de3a0980bf3100c9dcb08ef65ca7c31af48 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: x86/retpoline: Make CONFIG_RETPOLINE depend on compiler support Since retpoline capable compilers are widely available, make CONFIG_RETPOLINE hard depend on the compiler capability. Break the build when CONFIG_RETPOLINE is enabled and the compiler does not support it. Emit an error message in that case: "arch/x86/Makefile:226: *** You are building kernel with non-retpoline compiler, please update your compiler.. Stop." [dwmw: Fail the build with non-retpoline compiler] Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: Daniel Borkmann Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Andy Lutomirski Cc: Masahiro Yamada Cc: Michal Marek Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/cca0cb20-f9e2-4094-840b-fb0f8810cd34@default --- arch/x86/Kconfig | 4 ---- arch/x86/Makefile | 5 +++-- arch/x86/include/asm/nospec-branch.h | 10 ++++++---- arch/x86/kernel/cpu/bugs.c | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9d734f3c8234..b5286ad2a982 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -444,10 +444,6 @@ config RETPOLINE branches. Requires a compiler with -mindirect-branch=thunk-extern support for full protection. The kernel may run slower. - Without compiler support, at least indirect branches in assembler - code are eliminated. Since this includes the syscall entry path, - it is not entirely pointless. - config INTEL_RDT bool "Intel Resource Director Technology support" depends on X86 && CPU_SUP_INTEL diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 88398fdf8129..f5d7f4134524 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -220,9 +220,10 @@ KBUILD_CFLAGS += -fno-asynchronous-unwind-tables # Avoid indirect branches in kernel to deal with Spectre ifdef CONFIG_RETPOLINE -ifneq ($(RETPOLINE_CFLAGS),) - KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) -DRETPOLINE +ifeq ($(RETPOLINE_CFLAGS),) + $(error You are building kernel with non-retpoline compiler, please update your compiler.) endif + KBUILD_CFLAGS += $(RETPOLINE_CFLAGS) endif archscripts: scripts_basic diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 80dc14422495..8b09cbb2d52c 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -162,11 +162,12 @@ _ASM_PTR " 999b\n\t" \ ".popsection\n\t" -#if defined(CONFIG_X86_64) && defined(RETPOLINE) +#ifdef CONFIG_RETPOLINE +#ifdef CONFIG_X86_64 /* - * Since the inline asm uses the %V modifier which is only in newer GCC, - * the 64-bit one is dependent on RETPOLINE not CONFIG_RETPOLINE. + * Inline asm uses the %V modifier which is only in newer GCC + * which is ensured when CONFIG_RETPOLINE is defined. */ # define CALL_NOSPEC \ ANNOTATE_NOSPEC_ALTERNATIVE \ @@ -181,7 +182,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "r" (addr) -#elif defined(CONFIG_X86_32) && defined(CONFIG_RETPOLINE) +#else /* CONFIG_X86_32 */ /* * For i386 we use the original ret-equivalent retpoline, because * otherwise we'll run out of registers. We don't care about CET @@ -211,6 +212,7 @@ X86_FEATURE_RETPOLINE_AMD) # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) +#endif #else /* No retpoline for C / inline asm */ # define CALL_NOSPEC "call *%[thunk_target]\n" # define THUNK_TARGET(addr) [thunk_target] "rm" (addr) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c37e66e493bf..d0108fb6e4dd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -252,7 +252,7 @@ static void __init spec2_print_if_secure(const char *reason) static inline bool retp_compiler(void) { - return __is_defined(RETPOLINE); + return __is_defined(CONFIG_RETPOLINE); } static inline bool match_option(const char *arg, int arglen, const char *opt) -- cgit v1.2.3 From ef014aae8f1cd2793e4e014bbb102bed53f852b7 Mon Sep 17 00:00:00 2001 From: Zhenzhong Duan Date: Fri, 2 Nov 2018 01:45:41 -0700 Subject: x86/retpoline: Remove minimal retpoline support Now that CONFIG_RETPOLINE hard depends on compiler support, there is no reason to keep the minimal retpoline support around which only provided basic protection in the assembly files. Suggested-by: Peter Zijlstra Signed-off-by: Zhenzhong Duan Signed-off-by: Thomas Gleixner Cc: David Woodhouse Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Konrad Rzeszutek Wilk Cc: Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/f06f0a89-5587-45db-8ed2-0a9d6638d5c0@default --- arch/x86/include/asm/nospec-branch.h | 2 -- arch/x86/kernel/cpu/bugs.c | 13 ++----------- 2 files changed, 2 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8b09cbb2d52c..c202a64edd95 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -221,8 +221,6 @@ /* The Spectre V2 mitigation variants */ enum spectre_v2_mitigation { SPECTRE_V2_NONE, - SPECTRE_V2_RETPOLINE_MINIMAL, - SPECTRE_V2_RETPOLINE_MINIMAL_AMD, SPECTRE_V2_RETPOLINE_GENERIC, SPECTRE_V2_RETPOLINE_AMD, SPECTRE_V2_IBRS_ENHANCED, diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0108fb6e4dd..7f6d8159398e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -135,8 +135,6 @@ enum spectre_v2_mitigation_cmd { static const char *spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_MINIMAL] = "Vulnerable: Minimal generic ASM retpoline", - [SPECTRE_V2_RETPOLINE_MINIMAL_AMD] = "Vulnerable: Minimal AMD ASM retpoline", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", @@ -250,11 +248,6 @@ static void __init spec2_print_if_secure(const char *reason) pr_info("%s selected on command line.\n", reason); } -static inline bool retp_compiler(void) -{ - return __is_defined(CONFIG_RETPOLINE); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -417,14 +410,12 @@ retpoline_auto: pr_err("Spectre mitigation: LFENCE not serializing, switching to generic retpoline\n"); goto retpoline_generic; } - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_AMD : - SPECTRE_V2_RETPOLINE_MINIMAL_AMD; + mode = SPECTRE_V2_RETPOLINE_AMD; setup_force_cpu_cap(X86_FEATURE_RETPOLINE_AMD); setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } else { retpoline_generic: - mode = retp_compiler() ? SPECTRE_V2_RETPOLINE_GENERIC : - SPECTRE_V2_RETPOLINE_MINIMAL; + mode = SPECTRE_V2_RETPOLINE_GENERIC; setup_force_cpu_cap(X86_FEATURE_RETPOLINE); } -- cgit v1.2.3 From 8eb729b77faf83ac4c1f363a9ad68d042415f24c Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:29 +0100 Subject: x86/speculation: Update the TIF_SSBD comment "Reduced Data Speculation" is an obsolete term. The correct new name is "Speculative store bypass disable" - which is abbreviated into SSBD. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.593893901@linutronix.de --- arch/x86/include/asm/thread_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2ff2a30a264f..523c69efc38a 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,7 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ -#define TIF_SSBD 5 /* Reduced data speculation */ +#define TIF_SSBD 5 /* Speculative store bypass disable */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ -- cgit v1.2.3 From 24848509aa55eac39d524b587b051f4e86df3c12 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:30 +0100 Subject: x86/speculation: Clean up spectre_v2_parse_cmdline() Remove the unnecessary 'else' statement in spectre_v2_parse_cmdline() to save an indentation level. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.688010903@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7f6d8159398e..839ab4103e89 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -276,22 +276,21 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; - else { - ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); - if (ret < 0) - return SPECTRE_V2_CMD_AUTO; - for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { - if (!match_option(arg, ret, mitigation_options[i].option)) - continue; - cmd = mitigation_options[i].cmd; - break; - } + ret = cmdline_find_option(boot_command_line, "spectre_v2", arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_CMD_AUTO; - if (i >= ARRAY_SIZE(mitigation_options)) { - pr_err("unknown option (%s). Switching to AUTO select\n", arg); - return SPECTRE_V2_CMD_AUTO; - } + for (i = 0; i < ARRAY_SIZE(mitigation_options); i++) { + if (!match_option(arg, ret, mitigation_options[i].option)) + continue; + cmd = mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_CMD_AUTO; } if ((cmd == SPECTRE_V2_CMD_RETPOLINE || -- cgit v1.2.3 From b86bda0426853bfe8a3506c7d2a5b332760ae46b Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:31 +0100 Subject: x86/speculation: Remove unnecessary ret variable in cpu_show_common() Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.783903657@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 839ab4103e89..b52a48966e01 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -847,8 +847,6 @@ static ssize_t l1tf_show_state(char *buf) static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { - int ret; - if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); @@ -866,13 +864,12 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return sprintf(buf, "Mitigation: __user pointer sanitization\n"); case X86_BUG_SPECTRE_V2: - ret = sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); - return ret; case X86_BUG_SPEC_STORE_BYPASS: return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); -- cgit v1.2.3 From a8f76ae41cd633ac00be1b3019b1eb4741be3828 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:32 +0100 Subject: x86/speculation: Move STIPB/IBPB string conditionals out of cpu_show_common() The Spectre V2 printout in cpu_show_common() handles conditionals for the various mitigation methods directly in the sprintf() argument list. That's hard to read and will become unreadable if more complex decisions need to be made for a particular method. Move the conditionals for STIBP and IBPB string selection into helper functions, so they can be extended later on. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.874479208@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index b52a48966e01..a1502bce9eb8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -844,6 +844,22 @@ static ssize_t l1tf_show_state(char *buf) } #endif +static char *stibp_state(void) +{ + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) + return ", STIBP"; + else + return ""; +} + +static char *ibpb_state(void) +{ + if (boot_cpu_has(X86_FEATURE_USE_IBPB)) + return ", IBPB"; + else + return ""; +} + static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, char *buf, unsigned int bug) { @@ -865,9 +881,9 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr case X86_BUG_SPECTRE_V2: return sprintf(buf, "%s%s%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + ibpb_state(), boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - (x86_spec_ctrl_base & SPEC_CTRL_STIBP) ? ", STIBP" : "", + stibp_state(), boot_cpu_has(X86_FEATURE_RSB_CTXSW) ? ", RSB filling" : "", spectre_v2_module_string()); -- cgit v1.2.3 From 34bce7c9690b1d897686aac89604ba7adc365556 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:33 +0100 Subject: x86/speculation: Disable STIBP when enhanced IBRS is in use If enhanced IBRS is active, STIBP is redundant for mitigating Spectre v2 user space exploits from hyperthread sibling. Disable STIBP when enhanced IBRS is used. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185003.966801480@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a1502bce9eb8..924cd06dd43b 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -321,6 +321,10 @@ static bool stibp_needed(void) if (spectre_v2_enabled == SPECTRE_V2_NONE) return false; + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + if (!boot_cpu_has(X86_FEATURE_STIBP)) return false; @@ -846,6 +850,9 @@ static ssize_t l1tf_show_state(char *buf) static char *stibp_state(void) { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return ""; + if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) return ", STIBP"; else -- cgit v1.2.3 From 26c4d75b234040c11728a8acb796b3a85ba7507c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:34 +0100 Subject: x86/speculation: Rename SSBD update functions During context switch, the SSBD bit in SPEC_CTRL MSR is updated according to changes of the TIF_SSBD flag in the current and next running task. Currently, only the bit controlling speculative store bypass disable in SPEC_CTRL MSR is updated and the related update functions all have "speculative_store" or "ssb" in their names. For enhanced mitigation control other bits in SPEC_CTRL MSR need to be updated as well, which makes the SSB names inadequate. Rename the "speculative_store*" functions to a more generic name. No functional change. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.058866968@linutronix.de --- arch/x86/include/asm/spec-ctrl.h | 6 +++--- arch/x86/kernel/cpu/bugs.c | 4 ++-- arch/x86/kernel/process.c | 12 ++++++------ 3 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index ae7c2c5cd7f0..8e2f8411c7a7 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -70,11 +70,11 @@ extern void speculative_store_bypass_ht_init(void); static inline void speculative_store_bypass_ht_init(void) { } #endif -extern void speculative_store_bypass_update(unsigned long tif); +extern void speculation_ctrl_update(unsigned long tif); -static inline void speculative_store_bypass_update_current(void) +static inline void speculation_ctrl_update_current(void) { - speculative_store_bypass_update(current_thread_info()->flags); + speculation_ctrl_update(current_thread_info()->flags); } #endif diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 924cd06dd43b..a723af0c4400 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -200,7 +200,7 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : ssbd_spec_ctrl_to_tif(hostval); - speculative_store_bypass_update(tif); + speculation_ctrl_update(tif); } } EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); @@ -632,7 +632,7 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) * mitigation until it is next scheduled. */ if (task == current && update) - speculative_store_bypass_update_current(); + speculation_ctrl_update_current(); return 0; } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index c93fcfdf1673..8aa49604f9ae 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -395,27 +395,27 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void intel_set_ssb_state(unsigned long tifn) +static __always_inline void spec_ctrl_update_msr(unsigned long tifn) { u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +static __always_inline void __speculation_ctrl_update(unsigned long tifn) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) amd_set_ssb_virt_state(tifn); else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) amd_set_core_ssb_state(tifn); else - intel_set_ssb_state(tifn); + spec_ctrl_update_msr(tifn); } -void speculative_store_bypass_update(unsigned long tif) +void speculation_ctrl_update(unsigned long tif) { preempt_disable(); - __speculative_store_bypass_update(tif); + __speculation_ctrl_update(tif); preempt_enable(); } @@ -452,7 +452,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); if ((tifp ^ tifn) & _TIF_SSBD) - __speculative_store_bypass_update(tifn); + __speculation_ctrl_update(tifn); } /* -- cgit v1.2.3 From 01daf56875ee0cd50ed496a09b20eb369b45dfa5 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:35 +0100 Subject: x86/speculation: Reorganize speculation control MSRs update The logic to detect whether there's a change in the previous and next task's flag relevant to update speculation control MSRs is spread out across multiple functions. Consolidate all checks needed for updating speculation control MSRs into the new __speculation_ctrl_update() helper function. This makes it easy to pick the right speculation control MSR and the bits in MSR_IA32_SPEC_CTRL that need updating based on TIF flags changes. Originally-by: Thomas Lendacky Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.151077005@linutronix.de --- arch/x86/kernel/process.c | 46 +++++++++++++++++++++++++++++----------------- 1 file changed, 29 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 8aa49604f9ae..70e9832379e1 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -395,27 +395,40 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); } -static __always_inline void spec_ctrl_update_msr(unsigned long tifn) -{ - u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); - - wrmsrl(MSR_IA32_SPEC_CTRL, msr); -} +/* + * Update the MSRs managing speculation control, during context switch. + * + * tifp: Previous task's thread flags + * tifn: Next task's thread flags + */ +static __always_inline void __speculation_ctrl_update(unsigned long tifp, + unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base; + bool updmsr = false; + + /* If TIF_SSBD is different, select the proper mitigation method */ + if ((tifp ^ tifn) & _TIF_SSBD) { + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { + amd_set_ssb_virt_state(tifn); + } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { + amd_set_core_ssb_state(tifn); + } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) || + static_cpu_has(X86_FEATURE_AMD_SSBD)) { + msr |= ssbd_tif_to_spec_ctrl(tifn); + updmsr = true; + } + } -static __always_inline void __speculation_ctrl_update(unsigned long tifn) -{ - if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) - amd_set_ssb_virt_state(tifn); - else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) - amd_set_core_ssb_state(tifn); - else - spec_ctrl_update_msr(tifn); + if (updmsr) + wrmsrl(MSR_IA32_SPEC_CTRL, msr); } void speculation_ctrl_update(unsigned long tif) { + /* Forced update. Make sure all relevant TIF flags are different */ preempt_disable(); - __speculation_ctrl_update(tif); + __speculation_ctrl_update(~tif, tif); preempt_enable(); } @@ -451,8 +464,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - if ((tifp ^ tifn) & _TIF_SSBD) - __speculation_ctrl_update(tifn); + __speculation_ctrl_update(tifp, tifn); } /* -- cgit v1.2.3 From dbe733642e01dd108f71436aaea7b328cb28fd87 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:37 +0100 Subject: x86/Kconfig: Select SCHED_SMT if SMP enabled CONFIG_SCHED_SMT is enabled by all distros, so there is not a real point to have it configurable. The runtime overhead in the core scheduler code is minimal because the actual SMT scheduling parts are conditional on a static key. This allows to expose the scheduler's SMT state static key to the speculation control code. Alternatively the scheduler's static key could be made always available when CONFIG_SMP is enabled, but that's just adding an unused static key to every other architecture for nothing. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.337452245@linutronix.de --- arch/x86/Kconfig | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index b5286ad2a982..8689e794a43c 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1000,13 +1000,7 @@ config NR_CPUS to the kernel image. config SCHED_SMT - bool "SMT (Hyperthreading) scheduler support" - depends on SMP - ---help--- - SMT scheduler support improves the CPU scheduler's decision making - when dealing with Intel Pentium 4 chips with HyperThreading at a - cost of slightly increased overhead in some places. If unsure say - N here. + def_bool y if SMP config SCHED_MC def_bool y -- cgit v1.2.3 From a74cfffb03b73d41e08f84c2e5c87dec0ce3db9f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:39 +0100 Subject: x86/speculation: Rework SMT state change arch_smt_update() is only called when the sysfs SMT control knob is changed. This means that when SMT is enabled in the sysfs control knob the system is considered to have SMT active even if all siblings are offline. To allow finegrained control of the speculation mitigations, the actual SMT state is more interesting than the fact that siblings could be enabled. Rework the code, so arch_smt_update() is invoked from each individual CPU hotplug function, and simplify the update function while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.521974984@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a723af0c4400..5625b323ff32 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -344,16 +345,14 @@ void arch_smt_update(void) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base; - if (cpu_smt_control == CPU_SMT_ENABLED) + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) mask |= SPEC_CTRL_STIBP; - else - mask &= ~SPEC_CTRL_STIBP; if (mask != x86_spec_ctrl_base) { pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - cpu_smt_control == CPU_SMT_ENABLED ? - "Enabling" : "Disabling"); + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); x86_spec_ctrl_base = mask; on_each_cpu(update_stibp_msr, NULL, 1); } -- cgit v1.2.3 From 130d6f946f6f2a972ee3ec8540b7243ab99abe97 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:40 +0100 Subject: x86/l1tf: Show actual SMT state Use the now exposed real SMT state, not the SMT sysfs control knob state. This reflects the state of the system when the mitigation status is queried. This does not change the warning in the VMX launch code. There the dependency on the control knob makes sense because siblings could be brought online anytime after launching the VM. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.613357354@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 5625b323ff32..2dc4ee2bedcb 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -832,13 +832,14 @@ static ssize_t l1tf_show_state(char *buf) if (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_EPT_DISABLED || (l1tf_vmx_mitigation == VMENTER_L1D_FLUSH_NEVER && - cpu_smt_control == CPU_SMT_ENABLED)) + sched_smt_active())) { return sprintf(buf, "%s; VMX: %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation]); + } return sprintf(buf, "%s; VMX: %s, SMT %s\n", L1TF_DEFAULT_MSG, l1tf_vmx_states[l1tf_vmx_mitigation], - cpu_smt_control == CPU_SMT_ENABLED ? "vulnerable" : "disabled"); + sched_smt_active() ? "vulnerable" : "disabled"); } #else static ssize_t l1tf_show_state(char *buf) -- cgit v1.2.3 From 15d6b7aab0793b2de8a05d8a828777dd24db424e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:41 +0100 Subject: x86/speculation: Reorder the spec_v2 code Reorder the code so it is better grouped. No functional change. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.707122879@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 168 ++++++++++++++++++++++----------------------- 1 file changed, 84 insertions(+), 84 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 2dc4ee2bedcb..c9542b9fb329 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -124,29 +124,6 @@ void __init check_bugs(void) #endif } -/* The kernel command line selection */ -enum spectre_v2_mitigation_cmd { - SPECTRE_V2_CMD_NONE, - SPECTRE_V2_CMD_AUTO, - SPECTRE_V2_CMD_FORCE, - SPECTRE_V2_CMD_RETPOLINE, - SPECTRE_V2_CMD_RETPOLINE_GENERIC, - SPECTRE_V2_CMD_RETPOLINE_AMD, -}; - -static const char *spectre_v2_strings[] = { - [SPECTRE_V2_NONE] = "Vulnerable", - [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", - [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", - [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", -}; - -#undef pr_fmt -#define pr_fmt(fmt) "Spectre V2 : " fmt - -static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = - SPECTRE_V2_NONE; - void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) { @@ -216,6 +193,12 @@ static void x86_amd_ssb_disable(void) wrmsrl(MSR_AMD64_LS_CFG, msrval); } +#undef pr_fmt +#define pr_fmt(fmt) "Spectre V2 : " fmt + +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -237,18 +220,6 @@ static inline const char *spectre_v2_module_string(void) static inline const char *spectre_v2_module_string(void) { return ""; } #endif -static void __init spec2_print_if_insecure(const char *reason) -{ - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - static inline bool match_option(const char *arg, int arglen, const char *opt) { int len = strlen(opt); @@ -256,24 +227,53 @@ static inline bool match_option(const char *arg, int arglen, const char *opt) return len == arglen && !strncmp(arg, opt, len); } +/* The kernel command line selection for spectre v2 */ +enum spectre_v2_mitigation_cmd { + SPECTRE_V2_CMD_NONE, + SPECTRE_V2_CMD_AUTO, + SPECTRE_V2_CMD_FORCE, + SPECTRE_V2_CMD_RETPOLINE, + SPECTRE_V2_CMD_RETPOLINE_GENERIC, + SPECTRE_V2_CMD_RETPOLINE_AMD, +}; + +static const char *spectre_v2_strings[] = { + [SPECTRE_V2_NONE] = "Vulnerable", + [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", + [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", + [SPECTRE_V2_IBRS_ENHANCED] = "Mitigation: Enhanced IBRS", +}; + static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; } mitigation_options[] = { - { "off", SPECTRE_V2_CMD_NONE, false }, - { "on", SPECTRE_V2_CMD_FORCE, true }, - { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, - { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, - { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, - { "auto", SPECTRE_V2_CMD_AUTO, false }, + { "off", SPECTRE_V2_CMD_NONE, false }, + { "on", SPECTRE_V2_CMD_FORCE, true }, + { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, + { "retpoline,amd", SPECTRE_V2_CMD_RETPOLINE_AMD, false }, + { "retpoline,generic", SPECTRE_V2_CMD_RETPOLINE_GENERIC, false }, + { "auto", SPECTRE_V2_CMD_AUTO, false }, }; +static void __init spec2_print_if_insecure(const char *reason) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + +static void __init spec2_print_if_secure(const char *reason) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + pr_info("%s selected on command line.\n", reason); +} + static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) { + enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; char arg[20]; int ret, i; - enum spectre_v2_mitigation_cmd cmd = SPECTRE_V2_CMD_AUTO; if (cmdline_find_option_bool(boot_command_line, "nospectre_v2")) return SPECTRE_V2_CMD_NONE; @@ -317,48 +317,6 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return cmd; } -static bool stibp_needed(void) -{ - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; -} - -static void update_stibp_msr(void *info) -{ - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); -} - -void arch_smt_update(void) -{ - u64 mask; - - if (!stibp_needed()) - return; - - mutex_lock(&spec_ctrl_mutex); - - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); - } - mutex_unlock(&spec_ctrl_mutex); -} - static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -462,6 +420,48 @@ specv2_set_mode: arch_smt_update(); } +static bool stibp_needed(void) +{ + if (spectre_v2_enabled == SPECTRE_V2_NONE) + return false; + + /* Enhanced IBRS makes using STIBP unnecessary. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return false; + + if (!boot_cpu_has(X86_FEATURE_STIBP)) + return false; + + return true; +} + +static void update_stibp_msr(void *info) +{ + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); +} + +void arch_smt_update(void) +{ + u64 mask; + + if (!stibp_needed()) + return; + + mutex_lock(&spec_ctrl_mutex); + + mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask != x86_spec_ctrl_base) { + pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", + mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); + } + mutex_unlock(&spec_ctrl_mutex); +} + #undef pr_fmt #define pr_fmt(fmt) "Speculative Store Bypass: " fmt -- cgit v1.2.3 From 8770709f411763884535662744a3786a1806afd3 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:42 +0100 Subject: x86/speculation: Mark string arrays const correctly checkpatch.pl muttered when reshuffling the code: WARNING: static const char * array should probably be static const char * const Fix up all the string arrays. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.800018931@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9542b9fb329..4fcbccb16200 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -237,7 +237,7 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; -static const char *spectre_v2_strings[] = { +static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", [SPECTRE_V2_RETPOLINE_AMD] = "Mitigation: Full AMD retpoline", @@ -476,7 +476,7 @@ enum ssb_mitigation_cmd { SPEC_STORE_BYPASS_CMD_SECCOMP, }; -static const char *ssb_strings[] = { +static const char * const ssb_strings[] = { [SPEC_STORE_BYPASS_NONE] = "Vulnerable", [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", @@ -816,7 +816,7 @@ early_param("l1tf", l1tf_cmdline); #define L1TF_DEFAULT_MSG "Mitigation: PTE Inversion" #if IS_ENABLED(CONFIG_KVM_INTEL) -static const char *l1tf_vmx_states[] = { +static const char * const l1tf_vmx_states[] = { [VMENTER_L1D_FLUSH_AUTO] = "auto", [VMENTER_L1D_FLUSH_NEVER] = "vulnerable", [VMENTER_L1D_FLUSH_COND] = "conditional cache flushes", -- cgit v1.2.3 From 30ba72a990f5096ae08f284de17986461efcc408 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:43 +0100 Subject: x86/speculataion: Mark command line parser data __initdata No point to keep that around. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.893886356@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4fcbccb16200..9279cbabe16e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -248,7 +248,7 @@ static const struct { const char *option; enum spectre_v2_mitigation_cmd cmd; bool secure; -} mitigation_options[] = { +} mitigation_options[] __initdata = { { "off", SPECTRE_V2_CMD_NONE, false }, { "on", SPECTRE_V2_CMD_FORCE, true }, { "retpoline", SPECTRE_V2_CMD_RETPOLINE, false }, @@ -486,7 +486,7 @@ static const char * const ssb_strings[] = { static const struct { const char *option; enum ssb_mitigation_cmd cmd; -} ssb_mitigation_options[] = { +} ssb_mitigation_options[] __initdata = { { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ -- cgit v1.2.3 From 495d470e9828500e0155027f230449ac5e29c025 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:44 +0100 Subject: x86/speculation: Unify conditional spectre v2 print functions There is no point in having two functions and a conditional at the call site. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185004.986890749@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9279cbabe16e..4f5a6319dca6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -257,15 +257,9 @@ static const struct { { "auto", SPECTRE_V2_CMD_AUTO, false }, }; -static void __init spec2_print_if_insecure(const char *reason) +static void __init spec_v2_print_cond(const char *reason, bool secure) { - if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - pr_info("%s selected on command line.\n", reason); -} - -static void __init spec2_print_if_secure(const char *reason) -{ - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) pr_info("%s selected on command line.\n", reason); } @@ -309,11 +303,8 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_AUTO; } - if (mitigation_options[i].secure) - spec2_print_if_secure(mitigation_options[i].option); - else - spec2_print_if_insecure(mitigation_options[i].option); - + spec_v2_print_cond(mitigation_options[i].option, + mitigation_options[i].secure); return cmd; } -- cgit v1.2.3 From fa1202ef224391b6f5b26cdd44cc50495e8fab54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:45 +0100 Subject: x86/speculation: Add command line control for indirect branch speculation Add command line control for user space indirect branch speculation mitigations. The new option is: spectre_v2_user= The initial options are: - on: Unconditionally enabled - off: Unconditionally disabled -auto: Kernel selects mitigation (default off for now) When the spectre_v2= command line argument is either 'on' or 'off' this implies that the application to application control follows that state even if a contradicting spectre_v2_user= argument is supplied. Originally-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.082720373@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 10 +++ arch/x86/kernel/cpu/bugs.c | 133 ++++++++++++++++++++++++++++++----- 2 files changed, 126 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index c202a64edd95..be0b0aa780e2 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -3,6 +3,8 @@ #ifndef _ASM_X86_NOSPEC_BRANCH_H_ #define _ASM_X86_NOSPEC_BRANCH_H_ +#include + #include #include #include @@ -226,6 +228,12 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS_ENHANCED, }; +/* The indirect branch speculation control variants */ +enum spectre_v2_user_mitigation { + SPECTRE_V2_USER_NONE, + SPECTRE_V2_USER_STRICT, +}; + /* The Speculative Store Bypass disable variants */ enum ssb_mitigation { SPEC_STORE_BYPASS_NONE, @@ -303,6 +311,8 @@ do { \ preempt_enable(); \ } while (0) +DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); + #endif /* __ASSEMBLY__ */ /* diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4f5a6319dca6..3a223cce1fac 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -54,6 +54,9 @@ static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; u64 __ro_after_init x86_amd_ls_cfg_base; u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; +/* Control conditional STIPB in switch_to() */ +DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); + void __init check_bugs(void) { identify_boot_cpu(); @@ -199,6 +202,9 @@ static void x86_amd_ssb_disable(void) static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = SPECTRE_V2_NONE; +static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init = + SPECTRE_V2_USER_NONE; + #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -237,6 +243,104 @@ enum spectre_v2_mitigation_cmd { SPECTRE_V2_CMD_RETPOLINE_AMD, }; +enum spectre_v2_user_cmd { + SPECTRE_V2_USER_CMD_NONE, + SPECTRE_V2_USER_CMD_AUTO, + SPECTRE_V2_USER_CMD_FORCE, +}; + +static const char * const spectre_v2_user_strings[] = { + [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", + [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", +}; + +static const struct { + const char *option; + enum spectre_v2_user_cmd cmd; + bool secure; +} v2_user_options[] __initdata = { + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, +}; + +static void __init spec_v2_user_print_cond(const char *reason, bool secure) +{ + if (boot_cpu_has_bug(X86_BUG_SPECTRE_V2) != secure) + pr_info("spectre_v2_user=%s forced on command line.\n", reason); +} + +static enum spectre_v2_user_cmd __init +spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd) +{ + char arg[20]; + int ret, i; + + switch (v2_cmd) { + case SPECTRE_V2_CMD_NONE: + return SPECTRE_V2_USER_CMD_NONE; + case SPECTRE_V2_CMD_FORCE: + return SPECTRE_V2_USER_CMD_FORCE; + default: + break; + } + + ret = cmdline_find_option(boot_command_line, "spectre_v2_user", + arg, sizeof(arg)); + if (ret < 0) + return SPECTRE_V2_USER_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(v2_user_options); i++) { + if (match_option(arg, ret, v2_user_options[i].option)) { + spec_v2_user_print_cond(v2_user_options[i].option, + v2_user_options[i].secure); + return v2_user_options[i].cmd; + } + } + + pr_err("Unknown user space protection option (%s). Switching to AUTO select\n", arg); + return SPECTRE_V2_USER_CMD_AUTO; +} + +static void __init +spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) +{ + enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; + bool smt_possible = IS_ENABLED(CONFIG_SMP); + + if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) + return; + + if (cpu_smt_control == CPU_SMT_FORCE_DISABLED || + cpu_smt_control == CPU_SMT_NOT_SUPPORTED) + smt_possible = false; + + switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_NONE: + goto set_mode; + case SPECTRE_V2_USER_CMD_FORCE: + mode = SPECTRE_V2_USER_STRICT; + break; + } + + /* Initialize Indirect Branch Prediction Barrier */ + if (boot_cpu_has(X86_FEATURE_IBPB)) { + setup_force_cpu_cap(X86_FEATURE_USE_IBPB); + pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + } + + /* If enhanced IBRS is enabled no STIPB required */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) + return; + +set_mode: + spectre_v2_user = mode; + /* Only print the STIBP mode when SMT possible */ + if (smt_possible) + pr_info("%s\n", spectre_v2_user_strings[mode]); +} + static const char * const spectre_v2_strings[] = { [SPECTRE_V2_NONE] = "Vulnerable", [SPECTRE_V2_RETPOLINE_GENERIC] = "Mitigation: Full generic retpoline", @@ -385,12 +489,6 @@ specv2_set_mode: setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n"); - /* Initialize Indirect Branch Prediction Barrier if supported */ - if (boot_cpu_has(X86_FEATURE_IBPB)) { - setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); - } - /* * Retpoline means the kernel is safe because it has no indirect * branches. Enhanced IBRS protects firmware too, so, enable restricted @@ -407,23 +505,21 @@ specv2_set_mode: pr_info("Enabling Restricted Speculation for firmware calls\n"); } + /* Set up IBPB and STIBP depending on the general spectre V2 command */ + spectre_v2_user_select_mitigation(cmd); + /* Enable STIBP if appropriate */ arch_smt_update(); } static bool stibp_needed(void) { - if (spectre_v2_enabled == SPECTRE_V2_NONE) - return false; - /* Enhanced IBRS makes using STIBP unnecessary. */ if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return false; - if (!boot_cpu_has(X86_FEATURE_STIBP)) - return false; - - return true; + /* Check for strict user mitigation mode */ + return spectre_v2_user == SPECTRE_V2_USER_STRICT; } static void update_stibp_msr(void *info) @@ -844,10 +940,13 @@ static char *stibp_state(void) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return ""; - if (x86_spec_ctrl_base & SPEC_CTRL_STIBP) - return ", STIBP"; - else - return ""; + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", STIBP: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", STIBP: forced"; + } + return ""; } static char *ibpb_state(void) -- cgit v1.2.3 From 5bfbe3ad5840d941b89bcac54b821ba14f50a0ba Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Sun, 25 Nov 2018 19:33:46 +0100 Subject: x86/speculation: Prepare for per task indirect branch speculation control To avoid the overhead of STIBP always on, it's necessary to allow per task control of STIBP. Add a new task flag TIF_SPEC_IB and evaluate it during context switch if SMT is active and flag evaluation is enabled by the speculation control code. Add the conditional evaluation to x86_virt_spec_ctrl() as well so the guest/host switch works properly. This has no effect because TIF_SPEC_IB cannot be set yet and the static key which controls evaluation is off. Preparatory patch for adding the control code. [ tglx: Simplify the context switch logic and make the TIF evaluation depend on SMP=y and on the static key controlling the conditional update. Rename it to TIF_SPEC_IB because it controls both STIBP and IBPB ] Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.176917199@linutronix.de --- arch/x86/include/asm/msr-index.h | 5 +++-- arch/x86/include/asm/spec-ctrl.h | 12 ++++++++++++ arch/x86/include/asm/thread_info.h | 5 ++++- arch/x86/kernel/cpu/bugs.c | 4 ++++ arch/x86/kernel/process.c | 20 ++++++++++++++++++-- 5 files changed, 41 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 80f4a4f38c79..c8f73efb4ece 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -41,9 +41,10 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ -#define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_STIBP_SHIFT 1 /* Single Thread Indirect Branch Predictor (STIBP) bit */ +#define SPEC_CTRL_STIBP (1 << SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */ #define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ -#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 8e2f8411c7a7..27b0bce3933b 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -53,12 +53,24 @@ static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline u64 stibp_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (tifn & _TIF_SPEC_IB) >> (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) { BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); } +static inline unsigned long stibp_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SPEC_IB < SPEC_CTRL_STIBP_SHIFT); + return (spec_ctrl & SPEC_CTRL_STIBP) << (TIF_SPEC_IB - SPEC_CTRL_STIBP_SHIFT); +} + static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) { return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 523c69efc38a..fa583ec99e3e 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -83,6 +83,7 @@ struct thread_info { #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ +#define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -110,6 +111,7 @@ struct thread_info { #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) +#define _TIF_SPEC_IB (1 << TIF_SPEC_IB) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -146,7 +148,8 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ + _TIF_SSBD|_TIF_SPEC_IB) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3a223cce1fac..1e13dbfc0919 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -148,6 +148,10 @@ x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) static_cpu_has(X86_FEATURE_AMD_SSBD)) hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + /* Conditional STIBP enabled? */ + if (static_branch_unlikely(&switch_to_cond_stibp)) + hostval |= stibp_tif_to_spec_ctrl(ti->flags); + if (hostval != guestval) { msrval = setguest ? guestval : hostval; wrmsrl(MSR_IA32_SPEC_CTRL, msrval); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 70e9832379e1..574b144d2b53 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -404,11 +404,17 @@ static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) static __always_inline void __speculation_ctrl_update(unsigned long tifp, unsigned long tifn) { + unsigned long tif_diff = tifp ^ tifn; u64 msr = x86_spec_ctrl_base; bool updmsr = false; - /* If TIF_SSBD is different, select the proper mitigation method */ - if ((tifp ^ tifn) & _TIF_SSBD) { + /* + * If TIF_SSBD is different, select the proper mitigation + * method. Note that if SSBD mitigation is disabled or permanentely + * enabled this branch can't be taken because nothing can set + * TIF_SSBD. + */ + if (tif_diff & _TIF_SSBD) { if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) { amd_set_ssb_virt_state(tifn); } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) { @@ -420,6 +426,16 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, } } + /* + * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled, + * otherwise avoid the MSR write. + */ + if (IS_ENABLED(CONFIG_SMP) && + static_branch_unlikely(&switch_to_cond_stibp)) { + updmsr |= !!(tif_diff & _TIF_SPEC_IB); + msr |= stibp_tif_to_spec_ctrl(tifn); + } + if (updmsr) wrmsrl(MSR_IA32_SPEC_CTRL, msr); } -- cgit v1.2.3 From ff16701a29cba3aafa0bd1656d766813b2d0a811 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:47 +0100 Subject: x86/process: Consolidate and simplify switch_to_xtra() code Move the conditional invocation of __switch_to_xtra() into an inline function so the logic can be shared between 32 and 64 bit. Remove the handthrough of the TSS pointer and retrieve the pointer directly in the bitmap handling function. Use this_cpu_ptr() instead of the per_cpu() indirection. This is a preparatory change so integration of conditional indirect branch speculation optimization happens only in one place. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.280855518@linutronix.de --- arch/x86/include/asm/switch_to.h | 3 --- arch/x86/kernel/process.c | 12 +++++++----- arch/x86/kernel/process.h | 24 ++++++++++++++++++++++++ arch/x86/kernel/process_32.c | 10 +++------- arch/x86/kernel/process_64.c | 10 +++------- 5 files changed, 37 insertions(+), 22 deletions(-) create mode 100644 arch/x86/kernel/process.h (limited to 'arch') diff --git a/arch/x86/include/asm/switch_to.h b/arch/x86/include/asm/switch_to.h index 36bd243843d6..7cf1a270d891 100644 --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -11,9 +11,6 @@ struct task_struct *__switch_to_asm(struct task_struct *prev, __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); -struct tss_struct; -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss); /* This runs runs on the previous thread's stack. */ static inline void prepare_switch_to(struct task_struct *next) diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 574b144d2b53..cdf8e6694f71 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -40,6 +40,8 @@ #include #include +#include "process.h" + /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, * no more per-task TSS's. The TSS size is kept cacheline-aligned @@ -252,11 +254,12 @@ void arch_setup_new_exec(void) enable_cpuid(); } -static inline void switch_to_bitmap(struct tss_struct *tss, - struct thread_struct *prev, +static inline void switch_to_bitmap(struct thread_struct *prev, struct thread_struct *next, unsigned long tifp, unsigned long tifn) { + struct tss_struct *tss = this_cpu_ptr(&cpu_tss_rw); + if (tifn & _TIF_IO_BITMAP) { /* * Copy the relevant range of the IO bitmap. @@ -448,8 +451,7 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } -void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, - struct tss_struct *tss) +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; unsigned long tifp, tifn; @@ -459,7 +461,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, tifn = READ_ONCE(task_thread_info(next_p)->flags); tifp = READ_ONCE(task_thread_info(prev_p)->flags); - switch_to_bitmap(tss, prev, next, tifp, tifn); + switch_to_bitmap(prev, next, tifp, tifn); propagate_user_return_notify(prev_p, next_p); diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h new file mode 100644 index 000000000000..020fbfac3a27 --- /dev/null +++ b/arch/x86/kernel/process.h @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Code shared between 32 and 64 bit + +void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); + +/* + * This needs to be inline to optimize for the common case where no extra + * work needs to be done. + */ +static inline void switch_to_extra(struct task_struct *prev, + struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long prev_tif = task_thread_info(prev)->flags; + + /* + * __switch_to_xtra() handles debug registers, i/o bitmaps, + * speculation mitigations etc. + */ + if (unlikely(next_tif & _TIF_WORK_CTXSW_NEXT || + prev_tif & _TIF_WORK_CTXSW_PREV)) + __switch_to_xtra(prev, next); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5046a3c9dec2..d3e593eb189f 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -59,6 +59,8 @@ #include #include +#include "process.h" + void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { unsigned long cr0 = 0L, cr2 = 0L, cr3 = 0L, cr4 = 0L; @@ -232,7 +234,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ @@ -264,12 +265,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) if (get_kernel_rpl() && unlikely(prev->iopl != next->iopl)) set_iopl_mask(next->iopl); - /* - * Now maybe handle debug registers and/or IO bitmaps - */ - if (unlikely(task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV || - task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); /* * Leave lazy mode, flushing any hypercalls made here. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 0e0b4288a4b2..bbfbf017065c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -60,6 +60,8 @@ #include #endif +#include "process.h" + /* Prints also some state that isn't saved in the pt_regs */ void __show_regs(struct pt_regs *regs, enum show_regs_mode mode) { @@ -553,7 +555,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) struct fpu *prev_fpu = &prev->fpu; struct fpu *next_fpu = &next->fpu; int cpu = smp_processor_id(); - struct tss_struct *tss = &per_cpu(cpu_tss_rw, cpu); WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) && this_cpu_read(irq_count) != -1); @@ -617,12 +618,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* Reload sp0. */ update_task_stack(next_p); - /* - * Now maybe reload the debug registers and handle I/O bitmaps - */ - if (unlikely(task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT || - task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) - __switch_to_xtra(prev_p, next_p, tss); + switch_to_extra(prev_p, next_p); #ifdef CONFIG_XEN_PV /* -- cgit v1.2.3 From 5635d99953f04b550738f6f4c1c532667c3fd872 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:48 +0100 Subject: x86/speculation: Avoid __switch_to_xtra() calls The TIF_SPEC_IB bit does not need to be evaluated in the decision to invoke __switch_to_xtra() when: - CONFIG_SMP is disabled - The conditional STIPB mode is disabled The TIF_SPEC_IB bit still controls IBPB in both cases so the TIF work mask checks might invoke __switch_to_xtra() for nothing if TIF_SPEC_IB is the only set bit in the work masks. Optimize it out by masking the bit at compile time for CONFIG_SMP=n and at run time when the static key controlling the conditional STIBP mode is disabled. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.374062201@linutronix.de --- arch/x86/include/asm/thread_info.h | 13 +++++++++++-- arch/x86/kernel/process.h | 15 +++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index fa583ec99e3e..6d201699c651 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -147,9 +147,18 @@ struct thread_info { _TIF_FSCHECK) /* flags to check in __switch_to() */ -#define _TIF_WORK_CTXSW \ +#define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD|_TIF_SPEC_IB) + _TIF_SSBD) + +/* + * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. + */ +#ifdef CONFIG_SMP +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE | _TIF_SPEC_IB) +#else +# define _TIF_WORK_CTXSW (_TIF_WORK_CTXSW_BASE) +#endif #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/kernel/process.h b/arch/x86/kernel/process.h index 020fbfac3a27..898e97cf6629 100644 --- a/arch/x86/kernel/process.h +++ b/arch/x86/kernel/process.h @@ -2,6 +2,8 @@ // // Code shared between 32 and 64 bit +#include + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p); /* @@ -14,6 +16,19 @@ static inline void switch_to_extra(struct task_struct *prev, unsigned long next_tif = task_thread_info(next)->flags; unsigned long prev_tif = task_thread_info(prev)->flags; + if (IS_ENABLED(CONFIG_SMP)) { + /* + * Avoid __switch_to_xtra() invocation when conditional + * STIPB is disabled and the only different bit is + * TIF_SPEC_IB. For CONFIG_SMP=n TIF_SPEC_IB is not + * in the TIF_WORK_CTXSW masks. + */ + if (!static_branch_likely(&switch_to_cond_stibp)) { + prev_tif &= ~_TIF_SPEC_IB; + next_tif &= ~_TIF_SPEC_IB; + } + } + /* * __switch_to_xtra() handles debug registers, i/o bitmaps, * speculation mitigations etc. -- cgit v1.2.3 From 4c71a2b6fd7e42814aa68a6dec88abf3b42ea573 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:49 +0100 Subject: x86/speculation: Prepare for conditional IBPB in switch_mm() The IBPB speculation barrier is issued from switch_mm() when the kernel switches to a user space task with a different mm than the user space task which ran last on the same CPU. An additional optimization is to avoid IBPB when the incoming task can be ptraced by the outgoing task. This optimization only works when switching directly between two user space tasks. When switching from a kernel task to a user space task the optimization fails because the previous task cannot be accessed anymore. So for quite some scenarios the optimization is just adding overhead. The upcoming conditional IBPB support will issue IBPB only for user space tasks which have the TIF_SPEC_IB bit set. This requires to handle the following cases: 1) Switch from a user space task (potential attacker) which has TIF_SPEC_IB set to a user space task (potential victim) which has TIF_SPEC_IB not set. 2) Switch from a user space task (potential attacker) which has TIF_SPEC_IB not set to a user space task (potential victim) which has TIF_SPEC_IB set. This needs to be optimized for the case where the IBPB can be avoided when only kernel threads ran in between user space tasks which belong to the same process. The current check whether two tasks belong to the same context is using the tasks context id. While correct, it's simpler to use the mm pointer because it allows to mangle the TIF_SPEC_IB bit into it. The context id based mechanism requires extra storage, which creates worse code. When a task is scheduled out its TIF_SPEC_IB bit is mangled as bit 0 into the per CPU storage which is used to track the last user space mm which was running on a CPU. This bit can be used together with the TIF_SPEC_IB bit of the incoming task to make the decision whether IBPB needs to be issued or not to cover the two cases above. As conditional IBPB is going to be the default, remove the dubious ptrace check for the IBPB always case and simply issue IBPB always when the process changes. Move the storage to a different place in the struct as the original one created a hole. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.466447057@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 2 + arch/x86/include/asm/tlbflush.h | 8 ++- arch/x86/kernel/cpu/bugs.c | 29 +++++++-- arch/x86/mm/tlb.c | 115 ++++++++++++++++++++++++++--------- 4 files changed, 118 insertions(+), 36 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index be0b0aa780e2..d4d35baf0430 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -312,6 +312,8 @@ do { \ } while (0) DECLARE_STATIC_KEY_FALSE(switch_to_cond_stibp); +DECLARE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +DECLARE_STATIC_KEY_FALSE(switch_mm_always_ibpb); #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index d760611cfc35..f4204bf377fc 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -169,10 +169,14 @@ struct tlb_state { #define LOADED_MM_SWITCHING ((struct mm_struct *)1) + /* Last user mm for optimizing IBPB */ + union { + struct mm_struct *last_user_mm; + unsigned long last_user_mm_ibpb; + }; + u16 loaded_mm_asid; u16 next_asid; - /* last user mm's ctx id */ - u64 last_ctx_id; /* * We can be in one of several states: diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 1e13dbfc0919..7c946a9af947 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -56,6 +56,10 @@ u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; /* Control conditional STIPB in switch_to() */ DEFINE_STATIC_KEY_FALSE(switch_to_cond_stibp); +/* Control conditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_cond_ibpb); +/* Control unconditional IBPB in switch_mm() */ +DEFINE_STATIC_KEY_FALSE(switch_mm_always_ibpb); void __init check_bugs(void) { @@ -331,7 +335,17 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) /* Initialize Indirect Branch Prediction Barrier */ if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - pr_info("Spectre v2 mitigation: Enabling Indirect Branch Prediction Barrier\n"); + + switch (mode) { + case SPECTRE_V2_USER_STRICT: + static_branch_enable(&switch_mm_always_ibpb); + break; + default: + break; + } + + pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", + mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ @@ -955,10 +969,15 @@ static char *stibp_state(void) static char *ibpb_state(void) { - if (boot_cpu_has(X86_FEATURE_USE_IBPB)) - return ", IBPB"; - else - return ""; + if (boot_cpu_has(X86_FEATURE_IBPB)) { + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return ", IBPB: disabled"; + case SPECTRE_V2_USER_STRICT: + return ", IBPB: always-on"; + } + } + return ""; } static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index bddd6b3cee1d..03b6b4c2238d 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -7,7 +7,6 @@ #include #include #include -#include #include #include @@ -30,6 +29,12 @@ * Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi */ +/* + * Use bit 0 to mangle the TIF_SPEC_IB state into the mm pointer which is + * stored in cpu_tlb_state.last_user_mm_ibpb. + */ +#define LAST_USER_MM_IBPB 0x1UL + /* * We get here when we do something requiring a TLB invalidation * but could not go invalidate all of the contexts. We do the @@ -181,17 +186,87 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) } } -static bool ibpb_needed(struct task_struct *tsk, u64 last_ctx_id) +static inline unsigned long mm_mangle_tif_spec_ib(struct task_struct *next) +{ + unsigned long next_tif = task_thread_info(next)->flags; + unsigned long ibpb = (next_tif >> TIF_SPEC_IB) & LAST_USER_MM_IBPB; + + return (unsigned long)next->mm | ibpb; +} + +static void cond_ibpb(struct task_struct *next) { + if (!next || !next->mm) + return; + /* - * Check if the current (previous) task has access to the memory - * of the @tsk (next) task. If access is denied, make sure to - * issue a IBPB to stop user->user Spectre-v2 attacks. - * - * Note: __ptrace_may_access() returns 0 or -ERRNO. + * Both, the conditional and the always IBPB mode use the mm + * pointer to avoid the IBPB when switching between tasks of the + * same process. Using the mm pointer instead of mm->context.ctx_id + * opens a hypothetical hole vs. mm_struct reuse, which is more or + * less impossible to control by an attacker. Aside of that it + * would only affect the first schedule so the theoretically + * exposed data is not really interesting. */ - return (tsk && tsk->mm && tsk->mm->context.ctx_id != last_ctx_id && - ptrace_may_access_sched(tsk, PTRACE_MODE_SPEC_IBPB)); + if (static_branch_likely(&switch_mm_cond_ibpb)) { + unsigned long prev_mm, next_mm; + + /* + * This is a bit more complex than the always mode because + * it has to handle two cases: + * + * 1) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB set to a user space task + * (potential victim) which has TIF_SPEC_IB not set. + * + * 2) Switch from a user space task (potential attacker) + * which has TIF_SPEC_IB not set to a user space task + * (potential victim) which has TIF_SPEC_IB set. + * + * This could be done by unconditionally issuing IBPB when + * a task which has TIF_SPEC_IB set is either scheduled in + * or out. Though that results in two flushes when: + * + * - the same user space task is scheduled out and later + * scheduled in again and only a kernel thread ran in + * between. + * + * - a user space task belonging to the same process is + * scheduled in after a kernel thread ran in between + * + * - a user space task belonging to the same process is + * scheduled in immediately. + * + * Optimize this with reasonably small overhead for the + * above cases. Mangle the TIF_SPEC_IB bit into the mm + * pointer of the incoming task which is stored in + * cpu_tlbstate.last_user_mm_ibpb for comparison. + */ + next_mm = mm_mangle_tif_spec_ib(next); + prev_mm = this_cpu_read(cpu_tlbstate.last_user_mm_ibpb); + + /* + * Issue IBPB only if the mm's are different and one or + * both have the IBPB bit set. + */ + if (next_mm != prev_mm && + (next_mm | prev_mm) & LAST_USER_MM_IBPB) + indirect_branch_prediction_barrier(); + + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, next_mm); + } + + if (static_branch_unlikely(&switch_mm_always_ibpb)) { + /* + * Only flush when switching to a user space task with a + * different context than the user space task which ran + * last on this CPU. + */ + if (this_cpu_read(cpu_tlbstate.last_user_mm) != next->mm) { + indirect_branch_prediction_barrier(); + this_cpu_write(cpu_tlbstate.last_user_mm, next->mm); + } + } } void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, @@ -292,22 +367,12 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, new_asid = prev_asid; need_flush = true; } else { - u64 last_ctx_id = this_cpu_read(cpu_tlbstate.last_ctx_id); - /* * Avoid user/user BTB poisoning by flushing the branch * predictor when switching between processes. This stops * one process from doing Spectre-v2 attacks on another. - * - * As an optimization, flush indirect branches only when - * switching into a processes that can't be ptrace by the - * current one (as in such case, attacker has much more - * convenient way how to tamper with the next process than - * branch buffer poisoning). */ - if (static_cpu_has(X86_FEATURE_USE_IBPB) && - ibpb_needed(tsk, last_ctx_id)) - indirect_branch_prediction_barrier(); + cond_ibpb(tsk); if (IS_ENABLED(CONFIG_VMAP_STACK)) { /* @@ -365,14 +430,6 @@ void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, trace_tlb_flush_rcuidle(TLB_FLUSH_ON_TASK_SWITCH, 0); } - /* - * Record last user mm's context id, so we can avoid - * flushing branch buffer with IBPB if we switch back - * to the same user. - */ - if (next != &init_mm) - this_cpu_write(cpu_tlbstate.last_ctx_id, next->context.ctx_id); - /* Make sure we write CR3 before loaded_mm. */ barrier(); @@ -441,7 +498,7 @@ void initialize_tlbstate_and_flush(void) write_cr3(build_cr3(mm->pgd, 0)); /* Reinitialize tlbstate. */ - this_cpu_write(cpu_tlbstate.last_ctx_id, mm->context.ctx_id); + this_cpu_write(cpu_tlbstate.last_user_mm_ibpb, LAST_USER_MM_IBPB); this_cpu_write(cpu_tlbstate.loaded_mm_asid, 0); this_cpu_write(cpu_tlbstate.next_asid, 1); this_cpu_write(cpu_tlbstate.ctxs[0].ctx_id, mm->context.ctx_id); -- cgit v1.2.3 From e6da8bb6f9abb2628381904b24163c770e630bac Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:51 +0100 Subject: x86/speculation: Split out TIF update The update of the TIF_SSBD flag and the conditional speculation control MSR update is done in the ssb_prctl_set() function directly. The upcoming prctl support for controlling indirect branch speculation via STIBP needs the same mechanism. Split the code out and make it reusable. Reword the comment about updates for other tasks. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.652305076@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 7c946a9af947..3b65a53d2c33 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -702,10 +702,29 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) { bool update; + if (on) + update = !test_and_set_tsk_thread_flag(tsk, tifbit); + else + update = test_and_clear_tsk_thread_flag(tsk, tifbit); + + /* + * Immediately update the speculation control MSRs for the current + * task, but for a non-current task delay setting the CPU + * mitigation until it is scheduled next. + * + * This can only happen for SECCOMP mitigation. For PRCTL it's + * always the current task. + */ + if (tsk == current && update) + speculation_ctrl_update_current(); +} + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && ssb_mode != SPEC_STORE_BYPASS_SECCOMP) return -ENXIO; @@ -716,28 +735,20 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, false); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + task_update_spec_tif(task, TIF_SSBD, true); break; default: return -ERANGE; } - - /* - * If being set on non-current task, delay setting the CPU - * mitigation until it is next scheduled. - */ - if (task == current && update) - speculation_ctrl_update_current(); - return 0; } -- cgit v1.2.3 From 6d991ba509ebcfcc908e009d1db51972a4f7a064 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 28 Nov 2018 10:56:57 +0100 Subject: x86/speculation: Prevent stale SPEC_CTRL msr content The seccomp speculation control operates on all tasks of a process, but only the current task of a process can update the MSR immediately. For the other threads the update is deferred to the next context switch. This creates the following situation with Process A and B: Process A task 2 and Process B task 1 are pinned on CPU1. Process A task 2 does not have the speculation control TIF bit set. Process B task 1 has the speculation control TIF bit set. CPU0 CPU1 MSR bit is set ProcB.T1 schedules out ProcA.T2 schedules in MSR bit is cleared ProcA.T1 seccomp_update() set TIF bit on ProcA.T2 ProcB.T1 schedules in MSR is not updated <-- FAIL This happens because the context switch code tries to avoid the MSR update if the speculation control TIF bits of the incoming and the outgoing task are the same. In the worst case ProcB.T1 and ProcA.T2 are the only tasks scheduling back and forth on CPU1, which keeps the MSR stale forever. In theory this could be remedied by IPIs, but chasing the remote task which could be migrated is complex and full of races. The straight forward solution is to avoid the asychronous update of the TIF bit and defer it to the next context switch. The speculation control state is stored in task_struct::atomic_flags by the prctl and seccomp updates already. Add a new TIF_SPEC_FORCE_UPDATE bit and set this after updating the atomic_flags. Check the bit on context switch and force a synchronous update of the speculation control if set. Use the same mechanism for updating the current task. Reported-by: Tim Chen Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.21.1811272247140.1875@nanos.tec.linutronix.de --- arch/x86/include/asm/spec-ctrl.h | 6 +----- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/cpu/bugs.c | 18 +++++++----------- arch/x86/kernel/process.c | 30 +++++++++++++++++++++++++++++- 4 files changed, 40 insertions(+), 18 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h index 27b0bce3933b..5393babc0598 100644 --- a/arch/x86/include/asm/spec-ctrl.h +++ b/arch/x86/include/asm/spec-ctrl.h @@ -83,10 +83,6 @@ static inline void speculative_store_bypass_ht_init(void) { } #endif extern void speculation_ctrl_update(unsigned long tif); - -static inline void speculation_ctrl_update_current(void) -{ - speculation_ctrl_update(current_thread_info()->flags); -} +extern void speculation_ctrl_update_current(void); #endif diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 6d201699c651..82b73b75d67c 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -84,6 +84,7 @@ struct thread_info { #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ #define TIF_SPEC_IB 9 /* Indirect branch speculation mitigation */ +#define TIF_SPEC_FORCE_UPDATE 10 /* Force speculation MSR update in context switch */ #define TIF_USER_RETURN_NOTIFY 11 /* notify kernel of userspace return */ #define TIF_UPROBE 12 /* breakpointed or singlestepping */ #define TIF_PATCH_PENDING 13 /* pending live patching update */ @@ -112,6 +113,7 @@ struct thread_info { #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) #define _TIF_SPEC_IB (1 << TIF_SPEC_IB) +#define _TIF_SPEC_FORCE_UPDATE (1 << TIF_SPEC_FORCE_UPDATE) #define _TIF_USER_RETURN_NOTIFY (1 << TIF_USER_RETURN_NOTIFY) #define _TIF_UPROBE (1 << TIF_UPROBE) #define _TIF_PATCH_PENDING (1 << TIF_PATCH_PENDING) @@ -149,7 +151,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW_BASE \ (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP| \ - _TIF_SSBD) + _TIF_SSBD | _TIF_SPEC_FORCE_UPDATE) /* * Avoid calls to __switch_to_xtra() on UP as STIBP is not evaluated. diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 3b65a53d2c33..29f40a92f5a8 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -702,14 +702,10 @@ static void ssb_select_mitigation(void) #undef pr_fmt #define pr_fmt(fmt) "Speculation prctl: " fmt -static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) +static void task_update_spec_tif(struct task_struct *tsk) { - bool update; - - if (on) - update = !test_and_set_tsk_thread_flag(tsk, tifbit); - else - update = test_and_clear_tsk_thread_flag(tsk, tifbit); + /* Force the update of the real TIF bits */ + set_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE); /* * Immediately update the speculation control MSRs for the current @@ -719,7 +715,7 @@ static void task_update_spec_tif(struct task_struct *tsk, int tifbit, bool on) * This can only happen for SECCOMP mitigation. For PRCTL it's * always the current task. */ - if (tsk == current && update) + if (tsk == current) speculation_ctrl_update_current(); } @@ -735,16 +731,16 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) if (task_spec_ssb_force_disable(task)) return -EPERM; task_clear_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, false); + task_update_spec_tif(task); break; case PR_SPEC_DISABLE: task_set_spec_ssb_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; case PR_SPEC_FORCE_DISABLE: task_set_spec_ssb_disable(task); task_set_spec_ssb_force_disable(task); - task_update_spec_tif(task, TIF_SSBD, true); + task_update_spec_tif(task); break; default: return -ERANGE; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index cdf8e6694f71..afbe2eb4a1c6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -443,6 +443,18 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp, wrmsrl(MSR_IA32_SPEC_CTRL, msr); } +static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) +{ + if (test_and_clear_tsk_thread_flag(tsk, TIF_SPEC_FORCE_UPDATE)) { + if (task_spec_ssb_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SSBD); + else + clear_tsk_thread_flag(tsk, TIF_SSBD); + } + /* Return the updated threadinfo flags*/ + return task_thread_info(tsk)->flags; +} + void speculation_ctrl_update(unsigned long tif) { /* Forced update. Make sure all relevant TIF flags are different */ @@ -451,6 +463,14 @@ void speculation_ctrl_update(unsigned long tif) preempt_enable(); } +/* Called from seccomp/prctl update */ +void speculation_ctrl_update_current(void) +{ + preempt_disable(); + speculation_ctrl_update(speculation_ctrl_update_tif(current)); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) { struct thread_struct *prev, *next; @@ -482,7 +502,15 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p) if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); - __speculation_ctrl_update(tifp, tifn); + if (likely(!((tifp | tifn) & _TIF_SPEC_FORCE_UPDATE))) { + __speculation_ctrl_update(tifp, tifn); + } else { + speculation_ctrl_update_tif(prev_p); + tifn = speculation_ctrl_update_tif(next_p); + + /* Enforce MSR update to ensure consistent state */ + __speculation_ctrl_update(~tifn, tifn); + } } /* -- cgit v1.2.3 From 6893a959d7fdebbab5f5aa112c277d5a44435ba1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:52 +0100 Subject: x86/speculation: Prepare arch_smt_update() for PRCTL mode The upcoming fine grained per task STIBP control needs to be updated on CPU hotplug as well. Split out the code which controls the strict mode so the prctl control code can be added later. Mark the SMP function call argument __unused while at it. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.759457117@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 46 +++++++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 21 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 29f40a92f5a8..9cab538e10f1 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -530,40 +530,44 @@ specv2_set_mode: arch_smt_update(); } -static bool stibp_needed(void) +static void update_stibp_msr(void * __unused) { - /* Enhanced IBRS makes using STIBP unnecessary. */ - if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) - return false; - - /* Check for strict user mitigation mode */ - return spectre_v2_user == SPECTRE_V2_USER_STRICT; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); } -static void update_stibp_msr(void *info) +/* Update x86_spec_ctrl_base in case SMT state changed. */ +static void update_stibp_strict(void) { - wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + u64 mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; + + if (sched_smt_active()) + mask |= SPEC_CTRL_STIBP; + + if (mask == x86_spec_ctrl_base) + return; + + pr_info("Update user space SMT mitigation: STIBP %s\n", + mask & SPEC_CTRL_STIBP ? "always-on" : "off"); + x86_spec_ctrl_base = mask; + on_each_cpu(update_stibp_msr, NULL, 1); } void arch_smt_update(void) { - u64 mask; - - if (!stibp_needed()) + /* Enhanced IBRS implies STIBP. No update required. */ + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; mutex_lock(&spec_ctrl_mutex); - mask = x86_spec_ctrl_base & ~SPEC_CTRL_STIBP; - if (sched_smt_active()) - mask |= SPEC_CTRL_STIBP; - - if (mask != x86_spec_ctrl_base) { - pr_info("Spectre v2 cross-process SMT mitigation: %s STIBP\n", - mask & SPEC_CTRL_STIBP ? "Enabling" : "Disabling"); - x86_spec_ctrl_base = mask; - on_each_cpu(update_stibp_msr, NULL, 1); + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + break; + case SPECTRE_V2_USER_STRICT: + update_stibp_strict(); + break; } + mutex_unlock(&spec_ctrl_mutex); } -- cgit v1.2.3 From 9137bb27e60e554dab694eafa4cca241fa3a694f Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:53 +0100 Subject: x86/speculation: Add prctl() control for indirect branch speculation Add the PR_SPEC_INDIRECT_BRANCH option for the PR_GET_SPECULATION_CTRL and PR_SET_SPECULATION_CTRL prctls to allow fine grained per task control of indirect branch speculation via STIBP and IBPB. Invocations: Check indirect branch speculation status with - prctl(PR_GET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, 0, 0, 0); Enable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_ENABLE, 0, 0); Disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_DISABLE, 0, 0); Force disable indirect branch speculation with - prctl(PR_SET_SPECULATION_CTRL, PR_SPEC_INDIRECT_BRANCH, PR_SPEC_FORCE_DISABLE, 0, 0); See Documentation/userspace-api/spec_ctrl.rst. Signed-off-by: Tim Chen Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.866780996@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 67 ++++++++++++++++++++++++++++++++++++ arch/x86/kernel/process.c | 5 +++ 3 files changed, 73 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index d4d35baf0430..2adbe7b047fa 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -232,6 +232,7 @@ enum spectre_v2_mitigation { enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, + SPECTRE_V2_USER_PRCTL, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 9cab538e10f1..74359fff87fd 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -566,6 +566,8 @@ void arch_smt_update(void) case SPECTRE_V2_USER_STRICT: update_stibp_strict(); break; + case SPECTRE_V2_USER_PRCTL: + break; } mutex_unlock(&spec_ctrl_mutex); @@ -752,12 +754,50 @@ static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) return 0; } +static int ib_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + switch (ctrl) { + case PR_SPEC_ENABLE: + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return 0; + /* + * Indirect branch speculation is always disabled in strict + * mode. + */ + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return -EPERM; + task_clear_spec_ib_disable(task); + task_update_spec_tif(task); + break; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + /* + * Indirect branch speculation is always allowed when + * mitigation is force disabled. + */ + if (spectre_v2_user == SPECTRE_V2_USER_NONE) + return -EPERM; + if (spectre_v2_user == SPECTRE_V2_USER_STRICT) + return 0; + task_set_spec_ib_disable(task); + if (ctrl == PR_SPEC_FORCE_DISABLE) + task_set_spec_ib_force_disable(task); + task_update_spec_tif(task); + break; + default: + return -ERANGE; + } + return 0; +} + int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_set(task, ctrl); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_set(task, ctrl); default: return -ENODEV; } @@ -790,11 +830,34 @@ static int ssb_prctl_get(struct task_struct *task) } } +static int ib_prctl_get(struct task_struct *task) +{ + if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) + return PR_SPEC_NOT_AFFECTED; + + switch (spectre_v2_user) { + case SPECTRE_V2_USER_NONE: + return PR_SPEC_ENABLE; + case SPECTRE_V2_USER_PRCTL: + if (task_spec_ib_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ib_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case SPECTRE_V2_USER_STRICT: + return PR_SPEC_DISABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) { switch (which) { case PR_SPEC_STORE_BYPASS: return ssb_prctl_get(task); + case PR_SPEC_INDIRECT_BRANCH: + return ib_prctl_get(task); default: return -ENODEV; } @@ -974,6 +1037,8 @@ static char *stibp_state(void) return ", STIBP: disabled"; case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; + case SPECTRE_V2_USER_PRCTL: + return ""; } return ""; } @@ -986,6 +1051,8 @@ static char *ibpb_state(void) return ", IBPB: disabled"; case SPECTRE_V2_USER_STRICT: return ", IBPB: always-on"; + case SPECTRE_V2_USER_PRCTL: + return ""; } } return ""; diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index afbe2eb4a1c6..7d31192296a8 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -450,6 +450,11 @@ static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk) set_tsk_thread_flag(tsk, TIF_SSBD); else clear_tsk_thread_flag(tsk, TIF_SSBD); + + if (task_spec_ib_disable(tsk)) + set_tsk_thread_flag(tsk, TIF_SPEC_IB); + else + clear_tsk_thread_flag(tsk, TIF_SPEC_IB); } /* Return the updated threadinfo flags*/ return task_thread_info(tsk)->flags; -- cgit v1.2.3 From 7cc765a67d8e04ef7d772425ca5a2a1e2b894c15 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:54 +0100 Subject: x86/speculation: Enable prctl mode for spectre_v2_user Now that all prerequisites are in place: - Add the prctl command line option - Default the 'auto' mode to 'prctl' - When SMT state changes, update the static key which controls the conditional STIBP evaluation on context switch. - At init update the static key which controls the conditional IBPB evaluation on context switch. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185005.958421388@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 41 ++++++++++++++++++++++++++++++++--------- 1 file changed, 32 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 74359fff87fd..d0137d10f9a6 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -255,11 +255,13 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_NONE, SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, + SPECTRE_V2_USER_CMD_PRCTL, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", + [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", }; static const struct { @@ -270,6 +272,7 @@ static const struct { { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -324,12 +327,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) smt_possible = false; switch (spectre_v2_parse_user_cmdline(v2_cmd)) { - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_PRCTL: + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -340,6 +346,9 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_STRICT: static_branch_enable(&switch_mm_always_ibpb); break; + case SPECTRE_V2_USER_PRCTL: + static_branch_enable(&switch_mm_cond_ibpb); + break; default: break; } @@ -352,6 +361,12 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) return; + /* + * If SMT is not possible or STIBP is not available clear the STIPB + * mode. + */ + if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP)) + mode = SPECTRE_V2_USER_NONE; set_mode: spectre_v2_user = mode; /* Only print the STIBP mode when SMT possible */ @@ -552,6 +567,15 @@ static void update_stibp_strict(void) on_each_cpu(update_stibp_msr, NULL, 1); } +/* Update the static key controlling the evaluation of TIF_SPEC_IB */ +static void update_indir_branch_cond(void) +{ + if (sched_smt_active()) + static_branch_enable(&switch_to_cond_stibp); + else + static_branch_disable(&switch_to_cond_stibp); +} + void arch_smt_update(void) { /* Enhanced IBRS implies STIBP. No update required. */ @@ -567,6 +591,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + update_indir_branch_cond(); break; } @@ -1038,7 +1063,8 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: - return ""; + if (static_key_enabled(&switch_to_cond_stibp)) + return ", STIBP: conditional"; } return ""; } @@ -1046,14 +1072,11 @@ static char *stibp_state(void) static char *ibpb_state(void) { if (boot_cpu_has(X86_FEATURE_IBPB)) { - switch (spectre_v2_user) { - case SPECTRE_V2_USER_NONE: - return ", IBPB: disabled"; - case SPECTRE_V2_USER_STRICT: + if (static_key_enabled(&switch_mm_always_ibpb)) return ", IBPB: always-on"; - case SPECTRE_V2_USER_PRCTL: - return ""; - } + if (static_key_enabled(&switch_mm_cond_ibpb)) + return ", IBPB: conditional"; + return ", IBPB: disabled"; } return ""; } -- cgit v1.2.3 From 6b3e64c237c072797a9ec918654a60e3a46488e2 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:55 +0100 Subject: x86/speculation: Add seccomp Spectre v2 user space protection mode If 'prctl' mode of user space protection from spectre v2 is selected on the kernel command-line, STIBP and IBPB are applied on tasks which restrict their indirect branch speculation via prctl. SECCOMP enables the SSBD mitigation for sandboxed tasks already, so it makes sense to prevent spectre v2 user space to user space attacks as well. The Intel mitigation guide documents how STIPB works: Setting bit 1 (STIBP) of the IA32_SPEC_CTRL MSR on a logical processor prevents the predicted targets of indirect branches on any logical processor of that core from being controlled by software that executes (or executed previously) on another logical processor of the same core. Ergo setting STIBP protects the task itself from being attacked from a task running on a different hyper-thread and protects the tasks running on different hyper-threads from being attacked. While the document suggests that the branch predictors are shielded between the logical processors, the observed performance regressions suggest that STIBP simply disables the branch predictor more or less completely. Of course the document wording is vague, but the fact that there is also no requirement for issuing IBPB when STIBP is used points clearly in that direction. The kernel still issues IBPB even when STIBP is used until Intel clarifies the whole mechanism. IBPB is issued when the task switches out, so malicious sandbox code cannot mistrain the branch predictor for the next user space task on the same logical processor. Signed-off-by: Jiri Kosina Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.051663132@linutronix.de --- arch/x86/include/asm/nospec-branch.h | 1 + arch/x86/kernel/cpu/bugs.c | 17 ++++++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 2adbe7b047fa..032b6009baab 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -233,6 +233,7 @@ enum spectre_v2_user_mitigation { SPECTRE_V2_USER_NONE, SPECTRE_V2_USER_STRICT, SPECTRE_V2_USER_PRCTL, + SPECTRE_V2_USER_SECCOMP, }; /* The Speculative Store Bypass disable variants */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index d0137d10f9a6..c9e304960534 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,12 +256,14 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_SECCOMP, }; static const char * const spectre_v2_user_strings[] = { [SPECTRE_V2_USER_NONE] = "User space: Vulnerable", [SPECTRE_V2_USER_STRICT] = "User space: Mitigation: STIBP protection", [SPECTRE_V2_USER_PRCTL] = "User space: Mitigation: STIBP via prctl", + [SPECTRE_V2_USER_SECCOMP] = "User space: Mitigation: STIBP via seccomp and prctl", }; static const struct { @@ -273,6 +275,7 @@ static const struct { { "off", SPECTRE_V2_USER_CMD_NONE, false }, { "on", SPECTRE_V2_USER_CMD_FORCE, true }, { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -332,10 +335,16 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; - case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_PRCTL: mode = SPECTRE_V2_USER_PRCTL; break; + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPECTRE_V2_USER_SECCOMP; + else + mode = SPECTRE_V2_USER_PRCTL; + break; } /* Initialize Indirect Branch Prediction Barrier */ @@ -347,6 +356,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) static_branch_enable(&switch_mm_always_ibpb); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -591,6 +601,7 @@ void arch_smt_update(void) update_stibp_strict(); break; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: update_indir_branch_cond(); break; } @@ -833,6 +844,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task) { if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); + if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP) + ib_prctl_set(task, PR_SPEC_FORCE_DISABLE); } #endif @@ -864,6 +877,7 @@ static int ib_prctl_get(struct task_struct *task) case SPECTRE_V2_USER_NONE: return PR_SPEC_ENABLE; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (task_spec_ib_force_disable(task)) return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; if (task_spec_ib_disable(task)) @@ -1063,6 +1077,7 @@ static char *stibp_state(void) case SPECTRE_V2_USER_STRICT: return ", STIBP: forced"; case SPECTRE_V2_USER_PRCTL: + case SPECTRE_V2_USER_SECCOMP: if (static_key_enabled(&switch_to_cond_stibp)) return ", STIBP: conditional"; } -- cgit v1.2.3 From 55a974021ec952ee460dc31ca08722158639de72 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 25 Nov 2018 19:33:56 +0100 Subject: x86/speculation: Provide IBPB always command line options Provide the possibility to enable IBPB always in combination with 'prctl' and 'seccomp'. Add the extra command line options and rework the IBPB selection to evaluate the command instead of the mode selected by the STIPB switch case. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Cc: Peter Zijlstra Cc: Andy Lutomirski Cc: Linus Torvalds Cc: Jiri Kosina Cc: Tom Lendacky Cc: Josh Poimboeuf Cc: Andrea Arcangeli Cc: David Woodhouse Cc: Tim Chen Cc: Andi Kleen Cc: Dave Hansen Cc: Casey Schaufler Cc: Asit Mallick Cc: Arjan van de Ven Cc: Jon Masters Cc: Waiman Long Cc: Greg KH Cc: Dave Stewart Cc: Kees Cook Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/20181125185006.144047038@linutronix.de --- arch/x86/kernel/cpu/bugs.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c9e304960534..500278f5308e 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -256,7 +256,9 @@ enum spectre_v2_user_cmd { SPECTRE_V2_USER_CMD_AUTO, SPECTRE_V2_USER_CMD_FORCE, SPECTRE_V2_USER_CMD_PRCTL, + SPECTRE_V2_USER_CMD_PRCTL_IBPB, SPECTRE_V2_USER_CMD_SECCOMP, + SPECTRE_V2_USER_CMD_SECCOMP_IBPB, }; static const char * const spectre_v2_user_strings[] = { @@ -271,11 +273,13 @@ static const struct { enum spectre_v2_user_cmd cmd; bool secure; } v2_user_options[] __initdata = { - { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, - { "off", SPECTRE_V2_USER_CMD_NONE, false }, - { "on", SPECTRE_V2_USER_CMD_FORCE, true }, - { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, - { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "auto", SPECTRE_V2_USER_CMD_AUTO, false }, + { "off", SPECTRE_V2_USER_CMD_NONE, false }, + { "on", SPECTRE_V2_USER_CMD_FORCE, true }, + { "prctl", SPECTRE_V2_USER_CMD_PRCTL, false }, + { "prctl,ibpb", SPECTRE_V2_USER_CMD_PRCTL_IBPB, false }, + { "seccomp", SPECTRE_V2_USER_CMD_SECCOMP, false }, + { "seccomp,ibpb", SPECTRE_V2_USER_CMD_SECCOMP_IBPB, false }, }; static void __init spec_v2_user_print_cond(const char *reason, bool secure) @@ -321,6 +325,7 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) { enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE; bool smt_possible = IS_ENABLED(CONFIG_SMP); + enum spectre_v2_user_cmd cmd; if (!boot_cpu_has(X86_FEATURE_IBPB) && !boot_cpu_has(X86_FEATURE_STIBP)) return; @@ -329,17 +334,20 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) cpu_smt_control == CPU_SMT_NOT_SUPPORTED) smt_possible = false; - switch (spectre_v2_parse_user_cmdline(v2_cmd)) { + cmd = spectre_v2_parse_user_cmdline(v2_cmd); + switch (cmd) { case SPECTRE_V2_USER_CMD_NONE: goto set_mode; case SPECTRE_V2_USER_CMD_FORCE: mode = SPECTRE_V2_USER_STRICT; break; case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: mode = SPECTRE_V2_USER_PRCTL; break; case SPECTRE_V2_USER_CMD_AUTO: case SPECTRE_V2_USER_CMD_SECCOMP: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: if (IS_ENABLED(CONFIG_SECCOMP)) mode = SPECTRE_V2_USER_SECCOMP; else @@ -351,12 +359,15 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) if (boot_cpu_has(X86_FEATURE_IBPB)) { setup_force_cpu_cap(X86_FEATURE_USE_IBPB); - switch (mode) { - case SPECTRE_V2_USER_STRICT: + switch (cmd) { + case SPECTRE_V2_USER_CMD_FORCE: + case SPECTRE_V2_USER_CMD_PRCTL_IBPB: + case SPECTRE_V2_USER_CMD_SECCOMP_IBPB: static_branch_enable(&switch_mm_always_ibpb); break; - case SPECTRE_V2_USER_PRCTL: - case SPECTRE_V2_USER_SECCOMP: + case SPECTRE_V2_USER_CMD_PRCTL: + case SPECTRE_V2_USER_CMD_AUTO: + case SPECTRE_V2_USER_CMD_SECCOMP: static_branch_enable(&switch_mm_cond_ibpb); break; default: @@ -364,7 +375,8 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd) } pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n", - mode == SPECTRE_V2_USER_STRICT ? "always-on" : "conditional"); + static_key_enabled(&switch_mm_always_ibpb) ? + "always-on" : "conditional"); } /* If enhanced IBRS is enabled no STIPB required */ -- cgit v1.2.3 From ce8c80c536dac9f325a051b30bf7730ee505eddc Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 19 Nov 2018 11:27:28 +0000 Subject: arm64: Add workaround for Cortex-A76 erratum 1286807 On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual address for a cacheable mapping of a location is being accessed by a core while another core is remapping the virtual address to a new physical page using the recommended break-before-make sequence, then under very rare circumstances TLBI+DSB completes before a read using the translation being invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation and is shared with the Qualcomm Falkor erratum 1009 Reviewed-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 25 +++++++++++++++++++++++++ arch/arm64/include/asm/tlbflush.h | 4 ++-- arch/arm64/kernel/cpu_errata.c | 20 +++++++++++++++++--- 3 files changed, 44 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 787d7850e064..ea2ab0330e3a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -497,6 +497,24 @@ config ARM64_ERRATUM_1188873 If unsure, say Y. +config ARM64_ERRATUM_1286807 + bool "Cortex-A76: Modification of the translation table for a virtual address might lead to read-after-read ordering violation" + default y + select ARM64_WORKAROUND_REPEAT_TLBI + help + This option adds workaround for ARM Cortex-A76 erratum 1286807 + + On the affected Cortex-A76 cores (r0p0 to r3p0), if a virtual + address for a cacheable mapping of a location is being + accessed by a core while another core is remapping the virtual + address to a new physical page using the recommended + break-before-make sequence, then under very rare circumstances + TLBI+DSB completes before a read using the translation being + invalidated has been observed by other observers. The + workaround repeats the TLBI+DSB operation. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y @@ -566,9 +584,16 @@ config QCOM_FALKOR_ERRATUM_1003 is unchanged. Work around the erratum by invalidating the walk cache entries for the trampoline before entering the kernel proper. +config ARM64_WORKAROUND_REPEAT_TLBI + bool + help + Enable the repeat TLBI workaround for Falkor erratum 1009 and + Cortex-A76 erratum 1286807. + config QCOM_FALKOR_ERRATUM_1009 bool "Falkor E1009: Prematurely complete a DSB after a TLBI" default y + select ARM64_WORKAROUND_REPEAT_TLBI help On Falkor v1, the CPU may prematurely complete a DSB following a TLBI xxIS invalidate maintenance operation. Repeat the TLBI operation diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index c3c0387aee18..5dfd23897dea 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -41,14 +41,14 @@ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op, \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : ) #define __TLBI_1(op, arg) asm ("tlbi " #op ", %0\n" \ ALTERNATIVE("nop\n nop", \ "dsb ish\n tlbi " #op ", %0", \ ARM64_WORKAROUND_REPEAT_TLBI, \ - CONFIG_QCOM_FALKOR_ERRATUM_1009) \ + CONFIG_ARM64_WORKAROUND_REPEAT_TLBI) \ : : "r" (arg)) #define __TLBI_N(op, arg, n, ...) __TLBI_##n(op, arg) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a509e35132d2..6ad715d67df8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -570,6 +570,20 @@ static const struct midr_range arm64_harden_el2_vectors[] = { #endif +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI + +static const struct midr_range arm64_repeat_tlbi_cpus[] = { +#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 + MIDR_RANGE(MIDR_QCOM_FALKOR_V1, 0, 0, 0, 0), +#endif +#ifdef CONFIG_ARM64_ERRATUM_1286807 + MIDR_RANGE(MIDR_CORTEX_A76, 0, 0, 3, 0), +#endif + {}, +}; + +#endif + const struct arm64_cpu_capabilities arm64_errata[] = { #if defined(CONFIG_ARM64_ERRATUM_826319) || \ defined(CONFIG_ARM64_ERRATUM_827319) || \ @@ -695,11 +709,11 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .matches = is_kryo_midr, }, #endif -#ifdef CONFIG_QCOM_FALKOR_ERRATUM_1009 +#ifdef CONFIG_ARM64_WORKAROUND_REPEAT_TLBI { - .desc = "Qualcomm Technologies Falkor erratum 1009", + .desc = "Qualcomm erratum 1009, ARM erratum 1286807", .capability = ARM64_WORKAROUND_REPEAT_TLBI, - ERRATA_MIDR_REV(MIDR_QCOM_FALKOR_V1, 0, 0), + ERRATA_MIDR_RANGE_LIST(arm64_repeat_tlbi_cpus), }, #endif #ifdef CONFIG_ARM64_ERRATUM_858921 -- cgit v1.2.3 From 874bfc6e5422d2421f7e4d5ea318d30e91679dfe Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Thu, 29 Nov 2018 14:39:33 +0900 Subject: arm64: ftrace: Fix to enable syscall events on arm64 Since commit 4378a7d4be30 ("arm64: implement syscall wrappers") introduced "__arm64_" prefix to all syscall wrapper symbols in sys_call_table, syscall tracer can not find corresponding metadata from syscall name. In the result, we have no syscall ftrace events on arm64 kernel, and some bpf testcases are failed on arm64. To fix this issue, this introduces custom arch_syscall_match_sym_name() which skips first 8 bytes when comparing the syscall and symbol names. Fixes: 4378a7d4be30 ("arm64: implement syscall wrappers") Reported-by: Naresh Kamboju Signed-off-by: Masami Hiramatsu Acked-by: Will Deacon Tested-by: Naresh Kamboju Cc: stable@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/ftrace.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch') diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index caa955f10e19..fac54fb050d0 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -56,6 +56,19 @@ static inline bool arch_trace_is_compat_syscall(struct pt_regs *regs) { return is_compat_task(); } + +#define ARCH_HAS_SYSCALL_MATCH_SYM_NAME + +static inline bool arch_syscall_match_sym_name(const char *sym, + const char *name) +{ + /* + * Since all syscall functions have __arm64_ prefix, we must skip it. + * However, as we described above, we decided to ignore compat + * syscalls, so we don't care about __arm64_compat_ prefix here. + */ + return !strcmp(sym + 8, name); +} #endif /* ifndef __ASSEMBLY__ */ #endif /* __ASM_FTRACE_H */ -- cgit v1.2.3 From a7b403104e17209ea71eea59d4a71bf9e0d8cb83 Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Fri, 23 Nov 2018 17:24:51 +0100 Subject: xen/x86: add diagnostic printout to xen_mc_flush() in case of error Failure of an element of a Xen multicall is signalled via a WARN() only if the kernel is compiled with MC_DEBUG. It is impossible to know which element failed and why it did so. Change that by printing the related information even without MC_DEBUG, even if maybe in some limited form (e.g. without information which caller produced the failing element). Move the printing out of the switch statement in order to have the same information for a single call. Signed-off-by: Juergen Gross Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/multicalls.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/multicalls.c b/arch/x86/xen/multicalls.c index 2bce7958ce8b..0766a08bdf45 100644 --- a/arch/x86/xen/multicalls.c +++ b/arch/x86/xen/multicalls.c @@ -69,6 +69,11 @@ void xen_mc_flush(void) trace_xen_mc_flush(b->mcidx, b->argidx, b->cbidx); +#if MC_DEBUG + memcpy(b->debug, b->entries, + b->mcidx * sizeof(struct multicall_entry)); +#endif + switch (b->mcidx) { case 0: /* no-op */ @@ -87,32 +92,34 @@ void xen_mc_flush(void) break; default: -#if MC_DEBUG - memcpy(b->debug, b->entries, - b->mcidx * sizeof(struct multicall_entry)); -#endif - if (HYPERVISOR_multicall(b->entries, b->mcidx) != 0) BUG(); for (i = 0; i < b->mcidx; i++) if (b->entries[i].result < 0) ret++; + } + if (WARN_ON(ret)) { + pr_err("%d of %d multicall(s) failed: cpu %d\n", + ret, b->mcidx, smp_processor_id()); + for (i = 0; i < b->mcidx; i++) { + if (b->entries[i].result < 0) { #if MC_DEBUG - if (ret) { - printk(KERN_ERR "%d multicall(s) failed: cpu %d\n", - ret, smp_processor_id()); - dump_stack(); - for (i = 0; i < b->mcidx; i++) { - printk(KERN_DEBUG " call %2d/%d: op=%lu arg=[%lx] result=%ld\t%pF\n", - i+1, b->mcidx, + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\t%pF\n", + i + 1, b->debug[i].op, b->debug[i].args[0], b->entries[i].result, b->caller[i]); +#else + pr_err(" call %2d: op=%lu arg=[%lx] result=%ld\n", + i + 1, + b->entries[i].op, + b->entries[i].args[0], + b->entries[i].result); +#endif } } -#endif } b->mcidx = 0; @@ -126,8 +133,6 @@ void xen_mc_flush(void) b->cbidx = 0; local_irq_restore(flags); - - WARN_ON(ret); } struct multicall_space __xen_mc_entry(size_t args) -- cgit v1.2.3 From 123664101aa2156d05251704fc63f9bcbf77741a Mon Sep 17 00:00:00 2001 From: Igor Druzhinin Date: Tue, 27 Nov 2018 20:58:21 +0000 Subject: Revert "xen/balloon: Mark unallocated host memory as UNUSABLE" This reverts commit b3cf8528bb21febb650a7ecbf080d0647be40b9f. That commit unintentionally broke Xen balloon memory hotplug with "hotplug_unpopulated" set to 1. As long as "System RAM" resource got assigned under a new "Unusable memory" resource in IO/Mem tree any attempt to online this memory would fail due to general kernel restrictions on having "System RAM" resources as 1st level only. The original issue that commit has tried to workaround fa564ad96366 ("x86/PCI: Enable a 64bit BAR on AMD Family 15h (Models 00-1f, 30-3f, 60-7f)") also got amended by the following 03a551734 ("x86/PCI: Move and shrink AMD 64-bit window to avoid conflict") which made the original fix to Xen ballooning unnecessary. Signed-off-by: Igor Druzhinin Reviewed-by: Boris Ostrovsky Signed-off-by: Juergen Gross --- arch/x86/xen/enlighten.c | 78 ------------------------------------------------ arch/x86/xen/setup.c | 6 ++-- 2 files changed, 4 insertions(+), 80 deletions(-) (limited to 'arch') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 67b2f31a1265..aa1cc483bd2a 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -10,7 +10,6 @@ #include #include #include -#include #include #include @@ -346,80 +345,3 @@ void xen_arch_unregister_cpu(int num) } EXPORT_SYMBOL(xen_arch_unregister_cpu); #endif - -#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG -void __init arch_xen_balloon_init(struct resource *hostmem_resource) -{ - struct xen_memory_map memmap; - int rc; - unsigned int i, last_guest_ram; - phys_addr_t max_addr = PFN_PHYS(max_pfn); - struct e820_table *xen_e820_table; - const struct e820_entry *entry; - struct resource *res; - - if (!xen_initial_domain()) - return; - - xen_e820_table = kmalloc(sizeof(*xen_e820_table), GFP_KERNEL); - if (!xen_e820_table) - return; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_table->entries); - set_xen_guest_handle(memmap.buffer, xen_e820_table->entries); - rc = HYPERVISOR_memory_op(XENMEM_machine_memory_map, &memmap); - if (rc) { - pr_warn("%s: Can't read host e820 (%d)\n", __func__, rc); - goto out; - } - - last_guest_ram = 0; - for (i = 0; i < memmap.nr_entries; i++) { - if (xen_e820_table->entries[i].addr >= max_addr) - break; - if (xen_e820_table->entries[i].type == E820_TYPE_RAM) - last_guest_ram = i; - } - - entry = &xen_e820_table->entries[last_guest_ram]; - if (max_addr >= entry->addr + entry->size) - goto out; /* No unallocated host RAM. */ - - hostmem_resource->start = max_addr; - hostmem_resource->end = entry->addr + entry->size; - - /* - * Mark non-RAM regions between the end of dom0 RAM and end of host RAM - * as unavailable. The rest of that region can be used for hotplug-based - * ballooning. - */ - for (; i < memmap.nr_entries; i++) { - entry = &xen_e820_table->entries[i]; - - if (entry->type == E820_TYPE_RAM) - continue; - - if (entry->addr >= hostmem_resource->end) - break; - - res = kzalloc(sizeof(*res), GFP_KERNEL); - if (!res) - goto out; - - res->name = "Unavailable host RAM"; - res->start = entry->addr; - res->end = (entry->addr + entry->size < hostmem_resource->end) ? - entry->addr + entry->size : hostmem_resource->end; - rc = insert_resource(hostmem_resource, res); - if (rc) { - pr_warn("%s: Can't insert [%llx - %llx) (%d)\n", - __func__, res->start, res->end, rc); - kfree(res); - goto out; - } - } - - out: - kfree(xen_e820_table); -} -#endif /* CONFIG_XEN_BALLOON_MEMORY_HOTPLUG */ diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index 1163e33121fb..075ed47993bb 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -808,6 +808,7 @@ char * __init xen_memory_setup(void) addr = xen_e820_table.entries[0].addr; size = xen_e820_table.entries[0].size; while (i < xen_e820_table.nr_entries) { + bool discard = false; chunk_size = size; type = xen_e820_table.entries[i].type; @@ -823,10 +824,11 @@ char * __init xen_memory_setup(void) xen_add_extra_mem(pfn_s, n_pfns); xen_max_p2m_pfn = pfn_s + n_pfns; } else - type = E820_TYPE_UNUSABLE; + discard = true; } - xen_align_and_add_e820_region(addr, chunk_size, type); + if (!discard) + xen_align_and_add_e820_region(addr, chunk_size, type); addr += chunk_size; size -= chunk_size; -- cgit v1.2.3 From e25b6783c7b1bb79103d4617336879423f86b05e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Mon, 3 Dec 2018 19:37:08 +0100 Subject: ARM: dts: bcm2837: Fix polarity of wifi reset GPIOs The commit b1b8f45b3130 ("ARM: dts: bcm2837: Add missing GPIOs of Expander") introduced a wifi power sequence. Unfortunately the polarity of the reset GPIOs were wrong and broke the wifi support on Raspberry Pi 3 B and later in 3 B+. This wasn't discovered before since the power sequence takes only effect in case the relevant MMC driver is compiled as a module. Fixes: b1b8f45b3130 ("ARM: dts: bcm2837: Add missing GPIOs of Expander") Cc: stable@vger.kernel.org Reported-by: Matthias Lueschner Link: https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911443 Signed-off-by: Stefan Wahren Reviewed-by: Eric Anholt Signed-off-by: Florian Fainelli --- arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts | 2 +- arch/arm/boot/dts/bcm2837-rpi-3-b.dts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts index 4adb85e66be3..93762244be7f 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b-plus.dts @@ -31,7 +31,7 @@ wifi_pwrseq: wifi-pwrseq { compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>; + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; }; }; diff --git a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts index c318bcbc6ba7..89e6fd547c75 100644 --- a/arch/arm/boot/dts/bcm2837-rpi-3-b.dts +++ b/arch/arm/boot/dts/bcm2837-rpi-3-b.dts @@ -26,7 +26,7 @@ wifi_pwrseq: wifi-pwrseq { compatible = "mmc-pwrseq-simple"; - reset-gpios = <&expgpio 1 GPIO_ACTIVE_HIGH>; + reset-gpios = <&expgpio 1 GPIO_ACTIVE_LOW>; }; }; -- cgit v1.2.3 From 76f4e2c3b6a560cdd7a75b87df543e04d05a9e5f Mon Sep 17 00:00:00 2001 From: Lubomir Rintel Date: Sun, 2 Dec 2018 12:12:24 +0100 Subject: ARM: mmp/mmp2: fix cpu_is_mmp2() on mmp2-dt cpu_is_mmp2() was equivalent to cpu_is_pj4(), wouldn't be correct for multiplatform kernels. Fix it by also considering mmp_chip_id, as is done for cpu_is_pxa168() and cpu_is_pxa910() above. Moreover, it is only available with CONFIG_CPU_MMP2 and thus doesn't work on DT-based MMP2 machines. Enable it on CONFIG_MACH_MMP2_DT too. Note: CONFIG_CPU_MMP2 is only used for machines that use board files instead of DT. It should perhaps be renamed. I'm not doing it now, because I don't have a better idea. Signed-off-by: Lubomir Rintel Acked-by: Arnd Bergmann Cc: stable@vger.kernel.org Signed-off-by: Olof Johansson --- arch/arm/mach-mmp/cputype.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-mmp/cputype.h b/arch/arm/mach-mmp/cputype.h index 446edaeb78a7..a96abcf521b4 100644 --- a/arch/arm/mach-mmp/cputype.h +++ b/arch/arm/mach-mmp/cputype.h @@ -44,10 +44,12 @@ static inline int cpu_is_pxa910(void) #define cpu_is_pxa910() (0) #endif -#ifdef CONFIG_CPU_MMP2 +#if defined(CONFIG_CPU_MMP2) || defined(CONFIG_MACH_MMP2_DT) static inline int cpu_is_mmp2(void) { - return (((read_cpuid_id() >> 8) & 0xff) == 0x58); + return (((read_cpuid_id() >> 8) & 0xff) == 0x58) && + (((mmp_chip_id & 0xfff) == 0x410) || + ((mmp_chip_id & 0xfff) == 0x610)); } #else #define cpu_is_mmp2() (0) -- cgit v1.2.3 From f3b2f758ec1e6cdb13c925647cbd8ad4938b78fb Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 3 Dec 2018 13:12:48 -0600 Subject: ARM: dts: realview: Fix some more duplicate regulator nodes There's a bug in dtc in checking for duplicate node names when there's another section (e.g. "/ { };"). In this case, skeleton.dtsi provides another section. Upon removal of skeleton.dtsi, the dtb fails to build due to a duplicate node 'fixedregulator@0'. As both nodes were pretty much the same 3.3V fixed regulator, it hasn't really mattered. Fix this by renaming the nodes to something unique. In the process, drop the unit-address which shouldn't be present wtihout reg property. Signed-off-by: Rob Herring Reviewed-by: Linus Walleij Signed-off-by: Olof Johansson --- arch/arm/boot/dts/arm-realview-pb1176.dts | 4 ++-- arch/arm/boot/dts/arm-realview-pb11mp.dts | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/arm-realview-pb1176.dts b/arch/arm/boot/dts/arm-realview-pb1176.dts index f2a1d25eb6cf..83e0fbc4a1a1 100644 --- a/arch/arm/boot/dts/arm-realview-pb1176.dts +++ b/arch/arm/boot/dts/arm-realview-pb1176.dts @@ -45,7 +45,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -53,7 +53,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; diff --git a/arch/arm/boot/dts/arm-realview-pb11mp.dts b/arch/arm/boot/dts/arm-realview-pb11mp.dts index 7f9cbdf33a51..2f6aa24a0b67 100644 --- a/arch/arm/boot/dts/arm-realview-pb11mp.dts +++ b/arch/arm/boot/dts/arm-realview-pb11mp.dts @@ -145,7 +145,7 @@ }; /* The voltage to the MMC card is hardwired at 3.3V */ - vmmc: fixedregulator@0 { + vmmc: regulator-vmmc { compatible = "regulator-fixed"; regulator-name = "vmmc"; regulator-min-microvolt = <3300000>; @@ -153,7 +153,7 @@ regulator-boot-on; }; - veth: fixedregulator@0 { + veth: regulator-veth { compatible = "regulator-fixed"; regulator-name = "veth"; regulator-min-microvolt = <3300000>; -- cgit v1.2.3 From c3b9ab5db11d8098ca7674175f12ab21cdce1bbb Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 30 Nov 2018 08:31:29 -0200 Subject: ARM: dts: imx7d-pico: Describe the Wifi clock The Wifi chip should be clocked by a 32kHz clock coming from i.MX7D CLKO2 output pin, so describe the pinmux and clock hierarchy in the device tree to allow the Wifi chip to be properly clocked. Managed to successfully test Wifi with such change. Used the standard nvram.txt file provided by TechNexion, which selects an external 32kHz clock for the Wifi chip by default. Fixes: 99a52450c707 ("ARM: dts: imx7d-pico: Add Wifi support") Suggested-by: Arend van Spriel Tested-by: Otavio Salvador Signed-off-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-pico.dtsi | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx7d-pico.dtsi b/arch/arm/boot/dts/imx7d-pico.dtsi index 21973eb55671..f27b3849d3ff 100644 --- a/arch/arm/boot/dts/imx7d-pico.dtsi +++ b/arch/arm/boot/dts/imx7d-pico.dtsi @@ -100,6 +100,19 @@ regulator-min-microvolt = <1800000>; regulator-max-microvolt = <1800000>; }; + + usdhc2_pwrseq: usdhc2_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; + clock-names = "ext_clock"; + }; +}; + +&clks { + assigned-clocks = <&clks IMX7D_CLKO2_ROOT_SRC>, + <&clks IMX7D_CLKO2_ROOT_DIV>; + assigned-clock-parents = <&clks IMX7D_CKIL>; + assigned-clock-rates = <0>, <32768>; }; &i2c4 { @@ -199,12 +212,13 @@ &usdhc2 { /* Wifi SDIO */ pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usdhc2>; + pinctrl-0 = <&pinctrl_usdhc2 &pinctrl_wifi_clk>; no-1-8-v; non-removable; keep-power-in-suspend; wakeup-source; vmmc-supply = <®_ap6212>; + mmc-pwrseq = <&usdhc2_pwrseq>; status = "okay"; }; @@ -301,6 +315,12 @@ }; &iomuxc_lpsr { + pinctrl_wifi_clk: wificlkgrp { + fsl,pins = < + MX7D_PAD_LPSR_GPIO1_IO03__CCM_CLKO2 0x7d + >; + }; + pinctrl_wdog: wdoggrp { fsl,pins = < MX7D_PAD_LPSR_GPIO1_IO00__WDOG1_WDOG_B 0x74 -- cgit v1.2.3 From dae522045094ebfa9a10cf7951a8f79b02da8e15 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Tue, 4 Dec 2018 17:46:02 +0200 Subject: Revert "arm64: dts: marvell: add CPU Idle power state support on Armada 7K/8K" This reverts commit 8ed46368776b3bc93d74c1f8f2bfb9fd8a9ad805. This commit breaks boot on Armada 8K based systems. Reverting it makes affected systems boot again. Reported-by: Sergey Matyukevich Signed-off-by: Baruch Siach Signed-off-by: Gregory CLEMENT --- arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi | 4 ---- arch/arm64/boot/dts/marvell/armada-ap806.dtsi | 27 ---------------------- 2 files changed, 31 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi index 64632c873888..01ea662afba8 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806-quad.dtsi @@ -20,28 +20,24 @@ compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x000>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu1: cpu@1 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x001>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu2: cpu@100 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x100>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; cpu3: cpu@101 { device_type = "cpu"; compatible = "arm,cortex-a72", "arm,armv8"; reg = <0x101>; enable-method = "psci"; - cpu-idle-states = <&CPU_SLEEP_0>; }; }; }; diff --git a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi index 073610ac0a53..7d94c1fa592a 100644 --- a/arch/arm64/boot/dts/marvell/armada-ap806.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-ap806.dtsi @@ -28,33 +28,6 @@ method = "smc"; }; - cpus { - #address-cells = <1>; - #size-cells = <0>; - - idle_states { - entry_method = "arm,pcsi"; - - CPU_SLEEP_0: cpu-sleep-0 { - compatible = "arm,idle-state"; - local-timer-stop; - arm,psci-suspend-param = <0x0010000>; - entry-latency-us = <80>; - exit-latency-us = <160>; - min-residency-us = <320>; - }; - - CLUSTER_SLEEP_0: cluster-sleep-0 { - compatible = "arm,idle-state"; - local-timer-stop; - arm,psci-suspend-param = <0x1010000>; - entry-latency-us = <500>; - exit-latency-us = <1000>; - min-residency-us = <2500>; - }; - }; - }; - ap806 { #address-cells = <2>; #size-cells = <2>; -- cgit v1.2.3 From 1e434b703248580b7aaaf8a115d93e682f57d29f Mon Sep 17 00:00:00 2001 From: Anson Huang Date: Tue, 4 Dec 2018 03:17:45 +0000 Subject: ARM: imx: update the cpu power up timing setting on i.mx6sx The sw2iso count should cover ARM LDO ramp-up time, the MAX ARM LDO ramp-up time may be up to more than 100us on some boards, this patch sets sw2iso to 0xf (~384us) which is the reset value, and it is much more safe to cover different boards, since we have observed that some customer boards failed with current setting of 0x2. Fixes: 05136f0897b5 ("ARM: imx: support arm power off in cpuidle for i.mx6sx") Signed-off-by: Anson Huang Reviewed-by: Fabio Estevam Signed-off-by: Shawn Guo --- arch/arm/mach-imx/cpuidle-imx6sx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/mach-imx/cpuidle-imx6sx.c b/arch/arm/mach-imx/cpuidle-imx6sx.c index 243a108a940b..fd0053e47a15 100644 --- a/arch/arm/mach-imx/cpuidle-imx6sx.c +++ b/arch/arm/mach-imx/cpuidle-imx6sx.c @@ -110,7 +110,7 @@ int __init imx6sx_cpuidle_init(void) * except for power up sw2iso which need to be * larger than LDO ramp up time. */ - imx_gpc_set_arm_power_up_timing(2, 1); + imx_gpc_set_arm_power_up_timing(0xf, 1); imx_gpc_set_arm_power_down_timing(1, 1); return cpuidle_register(&imx6sx_cpuidle_driver, NULL); -- cgit v1.2.3 From f15096f12a4e9340168df5fdd9201aa8ed60d59e Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Wed, 5 Dec 2018 09:05:30 -0200 Subject: ARM: dts: imx7d-nitrogen7: Fix the description of the Wifi clock According to bindings/regulator/fixed-regulator.txt the 'clocks' and 'clock-names' properties are not valid ones. In order to turn on the Wifi clock the correct location for describing the CLKO2 clock is via a mmc-pwrseq handle, so do it accordingly. Fixes: 56354959cfec ("ARM: dts: imx: add Boundary Devices Nitrogen7 board") Signed-off-by: Fabio Estevam Acked-by: Troy Kisky Signed-off-by: Shawn Guo --- arch/arm/boot/dts/imx7d-nitrogen7.dts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx7d-nitrogen7.dts b/arch/arm/boot/dts/imx7d-nitrogen7.dts index d8aac4a2d02a..177d21fdeb28 100644 --- a/arch/arm/boot/dts/imx7d-nitrogen7.dts +++ b/arch/arm/boot/dts/imx7d-nitrogen7.dts @@ -86,13 +86,17 @@ compatible = "regulator-fixed"; regulator-min-microvolt = <3300000>; regulator-max-microvolt = <3300000>; - clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; - clock-names = "slow"; regulator-name = "reg_wlan"; startup-delay-us = <70000>; gpio = <&gpio4 21 GPIO_ACTIVE_HIGH>; enable-active-high; }; + + usdhc2_pwrseq: usdhc2_pwrseq { + compatible = "mmc-pwrseq-simple"; + clocks = <&clks IMX7D_CLKO2_ROOT_DIV>; + clock-names = "ext_clock"; + }; }; &adc1 { @@ -375,6 +379,7 @@ bus-width = <4>; non-removable; vmmc-supply = <®_wlan>; + mmc-pwrseq = <&usdhc2_pwrseq>; cap-power-off-card; keep-power-in-suspend; status = "okay"; -- cgit v1.2.3 From 690e16bada6029694740d5501025faf483d14339 Mon Sep 17 00:00:00 2001 From: Oskari Lemmela Date: Sat, 1 Dec 2018 12:08:16 +0200 Subject: arm64: dts: rockchip: fix rk3399-rockpro64 regulator gpios Rockpro64 is not able boot if GPIO1_C1 pin is pulled high before loading linux kernel. In rockpro64 GPIO1_C1 pin is connected vdd_cpu_b regulator VSEL pin. Pin should be pulled down in normal operation and pulled high in suspend. PMIC LDO_REG2 is connected to touch panel connector. Rename regulator and set it to correct voltage. PCIe power is controller by GPIO1_D0. Schematics can be downloaded from: http://files.pine64.org/doc/rockpro64/rockpro64_v21-SCH.pdf Signed-off-by: Oskari Lemmela Acked-by: Akash Gajjar Signed-off-by: Heiko Stuebner --- arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts index 1d35f5406b5e..5bd4d69914bd 100644 --- a/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts +++ b/arch/arm64/boot/dts/rockchip/rk3399-rockpro64.dts @@ -97,7 +97,7 @@ vcc3v3_pcie: vcc3v3-pcie-regulator { compatible = "regulator-fixed"; enable-active-high; - gpio = <&gpio1 RK_PC1 GPIO_ACTIVE_HIGH>; + gpio = <&gpio1 RK_PD0 GPIO_ACTIVE_HIGH>; pinctrl-names = "default"; pinctrl-0 = <&pcie_pwr_en>; regulator-name = "vcc3v3_pcie"; @@ -293,12 +293,12 @@ }; }; - vcc2v8_dvp: LDO_REG2 { - regulator-name = "vcc2v8_dvp"; + vcc3v0_touch: LDO_REG2 { + regulator-name = "vcc3v0_touch"; regulator-always-on; regulator-boot-on; - regulator-min-microvolt = <2800000>; - regulator-max-microvolt = <2800000>; + regulator-min-microvolt = <3000000>; + regulator-max-microvolt = <3000000>; regulator-state-mem { regulator-off-in-suspend; }; @@ -397,7 +397,9 @@ vdd_cpu_b: regulator@40 { compatible = "silergy,syr827"; reg = <0x40>; - fcs,suspend-voltage-selector = <0>; + fcs,suspend-voltage-selector = <1>; + pinctrl-names = "default"; + pinctrl-0 = <&vsel1_gpio>; regulator-name = "vdd_cpu_b"; regulator-min-microvolt = <712500>; regulator-max-microvolt = <1500000>; @@ -415,6 +417,8 @@ compatible = "silergy,syr828"; reg = <0x41>; fcs,suspend-voltage-selector = <1>; + pinctrl-names = "default"; + pinctrl-0 = <&vsel2_gpio>; regulator-name = "vdd_gpu"; regulator-min-microvolt = <712500>; regulator-max-microvolt = <1500000>; @@ -519,7 +523,7 @@ pcie { pcie_pwr_en: pcie-pwr-en { - rockchip,pins = <1 RK_PC1 RK_FUNC_GPIO &pcfg_pull_none>; + rockchip,pins = <1 RK_PD0 RK_FUNC_GPIO &pcfg_pull_none>; }; }; @@ -529,7 +533,7 @@ }; vsel1_gpio: vsel1-gpio { - rockchip,pins = <1 RK_PC2 RK_FUNC_GPIO &pcfg_pull_down>; + rockchip,pins = <1 RK_PC1 RK_FUNC_GPIO &pcfg_pull_down>; }; vsel2_gpio: vsel2-gpio { -- cgit v1.2.3 From a763ecc15d0e37c3a15ff6825183061209832685 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Wed, 5 Dec 2018 19:27:44 +0200 Subject: ARM: dts: omap5: Fix dual-role mode on Super-Speed port OMAP5's Super-Speed USB port has a software mailbox register that needs to be fed with VBUS and ID events from an external VBUS/ID comparator. Without this, Host role will not work correctly. Fixes: 656c1a65ab55 ("ARM: dts: omap5: enable OTG role for DWC3 controller") Reported-by: H. Nikolaus Schaller Signed-off-by: Roger Quadros Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap5-board-common.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap5-board-common.dtsi b/arch/arm/boot/dts/omap5-board-common.dtsi index bf7ca00f4c21..bc853ebeda22 100644 --- a/arch/arm/boot/dts/omap5-board-common.dtsi +++ b/arch/arm/boot/dts/omap5-board-common.dtsi @@ -701,6 +701,7 @@ }; &dwc3 { + extcon = <&extcon_usb3>; dr_mode = "otg"; }; -- cgit v1.2.3 From 2afdb4c41d7876e430b9bc6e2d7e2fe28609fd6a Mon Sep 17 00:00:00 2001 From: Janusz Krzysztofik Date: Fri, 23 Nov 2018 12:19:45 +0100 Subject: ARM: OMAP1: ams-delta: Fix audio permanently muted Since commit 1137ceee76ba ("ARM: OMAP1: ams-delta: Don't request unused GPIOs"), on-board audio has appeared muted. Believed to be unused GPIO pin "hookflash1", apparently set high regardless of the corresponding bit of "latch2" port attempted to be set low during .init_machine(), has been identified as the reason. According to Amstrad E3 wiki, the purpose of the pin hasn't been clearly identified. Original Amstrad software used to produce a high pulse on it when the phone was taken off hook or recall was pressed. With the current finding, we can assume the pin provides a kind of audio mute function. Proper resolution of the issue should be done in two steps: - resolution of an issue with the pin state not reflecting the value the corresponding bit of the port was attempted to be initialized with, - extension of on-board audio driver with a new control. For now, rename the pin to "audio_mute" to reflect its function and, as a quick fix, hogg it as output low so on-board audio can produce audible sound again. Fixes: 1137ceee76ba ("ARM: OMAP1: ams-delta: Don't request unused GPIOs") Signed-off-by: Janusz Krzysztofik Signed-off-by: Tony Lindgren --- arch/arm/mach-omap1/board-ams-delta.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/arm/mach-omap1/board-ams-delta.c b/arch/arm/mach-omap1/board-ams-delta.c index 3d191fd52910..1d801f5e8308 100644 --- a/arch/arm/mach-omap1/board-ams-delta.c +++ b/arch/arm/mach-omap1/board-ams-delta.c @@ -247,8 +247,8 @@ static struct platform_device latch2_gpio_device = { #define LATCH2_PIN_SCARD_CMDVCC 11 #define LATCH2_PIN_MODEM_NRESET 12 #define LATCH2_PIN_MODEM_CODEC 13 -#define LATCH2_PIN_HOOKFLASH1 14 -#define LATCH2_PIN_HOOKFLASH2 15 +#define LATCH2_PIN_AUDIO_MUTE 14 +#define LATCH2_PIN_HOOKFLASH 15 static struct regulator_consumer_supply modem_nreset_consumers[] = { REGULATOR_SUPPLY("RESET#", "serial8250.1"), @@ -588,6 +588,8 @@ static int gpiochip_match_by_label(struct gpio_chip *chip, void *data) static struct gpiod_hog ams_delta_gpio_hogs[] = { GPIO_HOG(LATCH2_LABEL, LATCH2_PIN_KEYBRD_DATAOUT, "keybrd_dataout", GPIO_ACTIVE_HIGH, GPIOD_OUT_LOW), + GPIO_HOG(LATCH2_LABEL, LATCH2_PIN_AUDIO_MUTE, "audio_mute", + GPIO_ACTIVE_HIGH, GPIOD_OUT_LOW), {}, }; -- cgit v1.2.3 From 5760367298a37c459ef0b1364463d70fd9a1f972 Mon Sep 17 00:00:00 2001 From: Felix Brack Date: Fri, 30 Nov 2018 15:54:46 +0100 Subject: ARM: dts: am335x-pdu001: Fix polarity of card detection input When a micro SD card is inserted in the PDU001 card cage, the card detection switch is opened and the corresponding GPIO input is driven by a pull-up. Hence change the active level of the card detection input from low to high. Signed-off-by: Felix Brack Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/am335x-pdu001.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm/boot/dts/am335x-pdu001.dts b/arch/arm/boot/dts/am335x-pdu001.dts index 6dd9d487aaeb..ae43d61f4e8b 100644 --- a/arch/arm/boot/dts/am335x-pdu001.dts +++ b/arch/arm/boot/dts/am335x-pdu001.dts @@ -585,7 +585,7 @@ bus-width = <4>; pinctrl-names = "default"; pinctrl-0 = <&mmc2_pins>; - cd-gpios = <&gpio2 2 GPIO_ACTIVE_LOW>; + cd-gpios = <&gpio2 2 GPIO_ACTIVE_HIGH>; }; &sham { -- cgit v1.2.3 From 84fb6c7feb1494ebb7d1ec8b95cfb7ada0264465 Mon Sep 17 00:00:00 2001 From: Russell King - ARM Linux Date: Fri, 7 Dec 2018 09:17:07 -0800 Subject: ARM: dts: Fix OMAP4430 SDP Ethernet startup It was noticed that unbinding and rebinding the KSZ8851 ethernet resulted in the driver reporting "failed to read device ID" at probe. Probing the reset line with a 'scope while repeatedly attempting to bind the driver in a shell loop revealed that the KSZ8851 RSTN pin is constantly held at zero, meaning the device is held in reset, and does not respond on the SPI bus. Experimentation with the startup delay on the regulator set to 50ms shows that the reset is positively released after 20ms. Schematics for this board are not available, and the traces are buried in the inner layers of the board which makes tracing where the RSTN pin extremely difficult. We can only guess that the RSTN pin is wired to a reset generator chip driven off the ethernet supply, which fits the observed behaviour. Include this delay in the regulator startup delay - effectively treating the reset as a "supply stable" indicator. This can not be modelled as a delay in the KSZ8851 driver since the reset generation is board specific - if the RSTN pin had been wired to a GPIO, reset could be released earlier via the already provided support in the KSZ8851 driver. This also got confirmed by Peter Ujfalusi based on Blaze schematics that should be very close to SDP4430: TPS22902YFPR is used as the regulator switch (gpio48 controlled): Convert arm boot_lock to raw The VOUT is routed to TPS3808G01DBV. (SCH Note: Threshold set at 90%. Vsense: 0.405V). According to the TPS3808 data sheet the RESET delay time when Ct is open (this is the case in the schema): MIN/TYP/MAX: 12/20/28 ms. Signed-off-by: Russell King Reviewed-by: Peter Ujfalusi [tony@atomide.com: updated with notes from schematics from Peter] Signed-off-by: Tony Lindgren --- arch/arm/boot/dts/omap4-sdp.dts | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/omap4-sdp.dts b/arch/arm/boot/dts/omap4-sdp.dts index 490726b52216..9dc7ec7655cb 100644 --- a/arch/arm/boot/dts/omap4-sdp.dts +++ b/arch/arm/boot/dts/omap4-sdp.dts @@ -33,6 +33,7 @@ gpio = <&gpio2 16 GPIO_ACTIVE_HIGH>; /* gpio line 48 */ enable-active-high; regulator-boot-on; + startup-delay-us = <25000>; }; vbat: fixedregulator-vbat { -- cgit v1.2.3 From 6f61a2c8f1f6163c7e08c77c5f71df0427e4d2f6 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Dec 2018 11:43:55 +0200 Subject: arm64: dts: renesas: draak: Fix CVBS input A typo in the adv7180 DT node prevents successful probing of the VIN. Fix it. Fixes: 6a0942c20f5c ("arm64: dts: renesas: draak: Describe CVBS input") Signed-off-by: Laurent Pinchart Acked-by: Jacopo Mondi Signed-off-by: Simon Horman --- arch/arm64/boot/dts/renesas/r8a77995-draak.dts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts index 2405eaad0296..0f2523296b8a 100644 --- a/arch/arm64/boot/dts/renesas/r8a77995-draak.dts +++ b/arch/arm64/boot/dts/renesas/r8a77995-draak.dts @@ -195,7 +195,7 @@ compatible = "adi,adv7180cp"; reg = <0x20>; - port { + ports { #address-cells = <1>; #size-cells = <0>; -- cgit v1.2.3