diff options
author | Kevin Hilman <khilman@linaro.org> | 2015-11-04 09:14:16 -0800 |
---|---|---|
committer | Kevin Hilman <khilman@linaro.org> | 2015-11-04 09:14:16 -0800 |
commit | 9ce0d731981a1489331e12bb7f94907438515622 (patch) | |
tree | dbaf88cffe4c6207b21fc39061fae46f38b6fb2c /arch | |
parent | c1fa16368f309a6f636ad193a47cde9253db97a7 (diff) | |
parent | b12403044336e7d567f309eb443aa9acf76380af (diff) |
Merge tag 'v3.18.24' of git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable into linux-linaro-lsk-v3.18
Linux 3.18.24
# gpg: Signature made Sat Oct 31 13:44:30 2015 PDT using RSA key ID 97772CDC
# gpg: Can't check signature: public key not found
* tag 'v3.18.24' of git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable: (234 commits)
Linux 3.18.24
tty: fix stall caused by missing memory barrier in drivers/tty/n_tty.c
Revert "tty: fix stall caused by missing memory barrier in drivers/tty/n_tty.c"
Linux 3.18.23
x86: Init per-cpu shadow copy of CR4 on 32-bit CPUs too
3w-9xxx: don't unmap bounce buffered commands
fib_rules: Fix dump_rules() not to exit early
vfs: Test for and handle paths that are unreachable from their mnt_root
md: flush ->event_work before stopping array.
x86/nmi/64: Fix a paravirt stack-clobbering bug in the NMI code
Revert "iio: bmg160: IIO_BUFFER and IIO_TRIGGERED_BUFFER are required"
net: Fix skb_set_peeked use-after-free bug
mm: check if section present during memory block registering
hpfs: update ctime and mtime on directory modification
drivercore: Fix unregistration path of platform devices
ARM: OMAP2+: DRA7: clockdomain: change l4per2_7xx_clkdm to SW_WKUP
of/address: Don't loop forever in of_find_matching_node_by_address().
auxdisplay: ks0108: fix refcount
Doc: ABI: testing: configfs-usb-gadget-sourcesink
Doc: ABI: testing: configfs-usb-gadget-loopback
...
Diffstat (limited to 'arch')
60 files changed, 615 insertions, 236 deletions
diff --git a/arch/arm/Makefile b/arch/arm/Makefile index b61adfae29d1..93a30a285ad2 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -50,6 +50,14 @@ AS += -EL LD += -EL endif +# +# The Scalar Replacement of Aggregates (SRA) optimization pass in GCC 4.9 and +# later may result in code being generated that handles signed short and signed +# char struct members incorrectly. So disable it. +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65932) +# +KBUILD_CFLAGS += $(call cc-option,-fno-ipa-sra) + # This selects which instruction set is used. # Note that GCC does not numerically define an architecture version # macro, but instead defines a whole series of macros which makes diff --git a/arch/arm/boot/dts/imx25-pdk.dts b/arch/arm/boot/dts/imx25-pdk.dts index 9c21b1583762..300507fc722f 100644 --- a/arch/arm/boot/dts/imx25-pdk.dts +++ b/arch/arm/boot/dts/imx25-pdk.dts @@ -10,6 +10,7 @@ */ /dts-v1/; +#include <dt-bindings/gpio/gpio.h> #include <dt-bindings/input/input.h> #include "imx25.dtsi" @@ -93,8 +94,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 1 0>; - wp-gpios = <&gpio2 0 0>; + cd-gpios = <&gpio2 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 0 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx51-apf51dev.dts b/arch/arm/boot/dts/imx51-apf51dev.dts index c5a9a24c280a..cdd72e0eb4d4 100644 --- a/arch/arm/boot/dts/imx51-apf51dev.dts +++ b/arch/arm/boot/dts/imx51-apf51dev.dts @@ -90,7 +90,7 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio2 29 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio2 29 GPIO_ACTIVE_LOW>; bus-width = <4>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-ard.dts b/arch/arm/boot/dts/imx53-ard.dts index e9337ad52f59..3bc18835fb4b 100644 --- a/arch/arm/boot/dts/imx53-ard.dts +++ b/arch/arm/boot/dts/imx53-ard.dts @@ -103,8 +103,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-m53evk.dts b/arch/arm/boot/dts/imx53-m53evk.dts index d0e0f57eb432..53f40885c530 100644 --- a/arch/arm/boot/dts/imx53-m53evk.dts +++ b/arch/arm/boot/dts/imx53-m53evk.dts @@ -124,8 +124,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio1 1 0>; - wp-gpios = <&gpio1 9 0>; + cd-gpios = <&gpio1 1 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio1 9 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-qsb-common.dtsi b/arch/arm/boot/dts/imx53-qsb-common.dtsi index 181ae5ebf23f..1f55187ed9ce 100644 --- a/arch/arm/boot/dts/imx53-qsb-common.dtsi +++ b/arch/arm/boot/dts/imx53-qsb-common.dtsi @@ -147,8 +147,8 @@ &esdhc3 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc3>; - cd-gpios = <&gpio3 11 0>; - wp-gpios = <&gpio3 12 0>; + cd-gpios = <&gpio3 11 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio3 12 GPIO_ACTIVE_HIGH>; bus-width = <8>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-smd.dts b/arch/arm/boot/dts/imx53-smd.dts index 1d325576bcc0..fc89ce1e5763 100644 --- a/arch/arm/boot/dts/imx53-smd.dts +++ b/arch/arm/boot/dts/imx53-smd.dts @@ -41,8 +41,8 @@ &esdhc1 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; - cd-gpios = <&gpio3 13 0>; - wp-gpios = <&gpio4 11 0>; + cd-gpios = <&gpio3 13 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio4 11 GPIO_ACTIVE_HIGH>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx53-tqma53.dtsi b/arch/arm/boot/dts/imx53-tqma53.dtsi index 4f1f0e2868bf..e03373a58760 100644 --- a/arch/arm/boot/dts/imx53-tqma53.dtsi +++ b/arch/arm/boot/dts/imx53-tqma53.dtsi @@ -41,8 +41,8 @@ pinctrl-0 = <&pinctrl_esdhc2>, <&pinctrl_esdhc2_cdwp>; vmmc-supply = <®_3p3v>; - wp-gpios = <&gpio1 2 0>; - cd-gpios = <&gpio1 4 0>; + wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>; status = "disabled"; }; diff --git a/arch/arm/boot/dts/imx53-tx53.dtsi b/arch/arm/boot/dts/imx53-tx53.dtsi index 704bd72cbfec..d3e50b22064f 100644 --- a/arch/arm/boot/dts/imx53-tx53.dtsi +++ b/arch/arm/boot/dts/imx53-tx53.dtsi @@ -183,7 +183,7 @@ }; &esdhc1 { - cd-gpios = <&gpio3 24 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc1>; @@ -191,7 +191,7 @@ }; &esdhc2 { - cd-gpios = <&gpio3 25 GPIO_ACTIVE_HIGH>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; fsl,wp-controller; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; diff --git a/arch/arm/boot/dts/imx53-voipac-bsb.dts b/arch/arm/boot/dts/imx53-voipac-bsb.dts index c17d3ad6dba5..fc51b87ad208 100644 --- a/arch/arm/boot/dts/imx53-voipac-bsb.dts +++ b/arch/arm/boot/dts/imx53-voipac-bsb.dts @@ -119,8 +119,8 @@ &esdhc2 { pinctrl-names = "default"; pinctrl-0 = <&pinctrl_esdhc2>; - cd-gpios = <&gpio3 25 0>; - wp-gpios = <&gpio2 19 0>; + cd-gpios = <&gpio3 25 GPIO_ACTIVE_LOW>; + wp-gpios = <&gpio2 19 GPIO_ACTIVE_HIGH>; vmmc-supply = <®_3p3v>; status = "okay"; }; diff --git a/arch/arm/boot/dts/imx6qdl-rex.dtsi b/arch/arm/boot/dts/imx6qdl-rex.dtsi index df7bcf86c156..4b5e8c87e53f 100644 --- a/arch/arm/boot/dts/imx6qdl-rex.dtsi +++ b/arch/arm/boot/dts/imx6qdl-rex.dtsi @@ -35,7 +35,6 @@ compatible = "regulator-fixed"; reg = <1>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbh1>; regulator-name = "usbh1_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; @@ -47,7 +46,6 @@ compatible = "regulator-fixed"; reg = <2>; pinctrl-names = "default"; - pinctrl-0 = <&pinctrl_usbotg>; regulator-name = "usb_otg_vbus"; regulator-min-microvolt = <5000000>; regulator-max-microvolt = <5000000>; diff --git a/arch/arm/boot/dts/omap3-beagle.dts b/arch/arm/boot/dts/omap3-beagle.dts index a9aae88b74f5..bd603aa2cd82 100644 --- a/arch/arm/boot/dts/omap3-beagle.dts +++ b/arch/arm/boot/dts/omap3-beagle.dts @@ -176,7 +176,7 @@ tfp410_pins: pinmux_tfp410_pins { pinctrl-single,pins = < - 0x194 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ + 0x196 (PIN_OUTPUT | MUX_MODE4) /* hdq_sio.gpio_170 */ >; }; diff --git a/arch/arm/boot/dts/omap5-uevm.dts b/arch/arm/boot/dts/omap5-uevm.dts index 159720d6c956..ec23e86e7e4f 100644 --- a/arch/arm/boot/dts/omap5-uevm.dts +++ b/arch/arm/boot/dts/omap5-uevm.dts @@ -174,8 +174,8 @@ i2c5_pins: pinmux_i2c5_pins { pinctrl-single,pins = < - 0x184 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ - 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ + 0x186 (PIN_INPUT | MUX_MODE0) /* i2c5_scl */ + 0x188 (PIN_INPUT | MUX_MODE0) /* i2c5_sda */ >; }; diff --git a/arch/arm/kernel/signal.c b/arch/arm/kernel/signal.c index bd1983437205..ea6d69125dde 100644 --- a/arch/arm/kernel/signal.c +++ b/arch/arm/kernel/signal.c @@ -354,12 +354,17 @@ setup_return(struct pt_regs *regs, struct ksignal *ksig, */ thumb = handler & 1; -#if __LINUX_ARM_ARCH__ >= 7 +#if __LINUX_ARM_ARCH__ >= 6 /* - * Clear the If-Then Thumb-2 execution state - * ARM spec requires this to be all 000s in ARM mode - * Snapdragon S4/Krait misbehaves on a Thumb=>ARM - * signal transition without this. + * Clear the If-Then Thumb-2 execution state. ARM spec + * requires this to be all 000s in ARM mode. Snapdragon + * S4/Krait misbehaves on a Thumb=>ARM signal transition + * without this. + * + * We must do this whenever we are running on a Thumb-2 + * capable CPU, which includes ARMv6T2. However, we elect + * to do this whenever we're on an ARMv6 or later CPU for + * simplicity. */ cpsr &= ~PSR_IT_MASK; #endif diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index cba52cf6ed3f..3535480e0e6b 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -1439,8 +1439,10 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm, if (vma->vm_flags & VM_PFNMAP) { gpa_t gpa = mem->guest_phys_addr + (vm_start - mem->userspace_addr); - phys_addr_t pa = (vma->vm_pgoff << PAGE_SHIFT) + - vm_start - vma->vm_start; + phys_addr_t pa; + + pa = (phys_addr_t)vma->vm_pgoff << PAGE_SHIFT; + pa += vm_start - vma->vm_start; ret = kvm_phys_addr_ioremap(kvm, gpa, pa, vm_end - vm_start, diff --git a/arch/arm/mach-omap2/clockdomains7xx_data.c b/arch/arm/mach-omap2/clockdomains7xx_data.c index 57d5df0c1fbd..7581e036bda6 100644 --- a/arch/arm/mach-omap2/clockdomains7xx_data.c +++ b/arch/arm/mach-omap2/clockdomains7xx_data.c @@ -331,7 +331,7 @@ static struct clockdomain l4per2_7xx_clkdm = { .dep_bit = DRA7XX_L4PER2_STATDEP_SHIFT, .wkdep_srcs = l4per2_wkup_sleep_deps, .sleepdep_srcs = l4per2_wkup_sleep_deps, - .flags = CLKDM_CAN_HWSUP_SWSUP, + .flags = CLKDM_CAN_SWSUP, }; static struct clockdomain mpu0_7xx_clkdm = { diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dc2d66cdf311..00b9c4870230 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -91,6 +91,10 @@ config NO_IOPORT_MAP config STACKTRACE_SUPPORT def_bool y +config ILLEGAL_POINTER_VALUE + hex + default 0xdead000000000000 + config LOCKDEP_SUPPORT def_bool y @@ -575,6 +579,22 @@ source "drivers/cpuidle/Kconfig" source "drivers/cpufreq/Kconfig" +config ARM64_ERRATUM_843419 + bool "Cortex-A53: 843419: A load or store might access an incorrect address" + depends on MODULES + default y + help + This option builds kernel modules using the large memory model in + order to avoid the use of the ADRP instruction, which can cause + a subsequent memory access to use an incorrect address on Cortex-A53 + parts up to r0p4. + + Note that the kernel itself must be linked with a version of ld + which fixes potentially affected ADRP instructions through the + use of veneers. + + If unsure, say Y. + endmenu source "net/Kconfig" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 1c43cec971b5..aba8de0cf4c0 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -32,6 +32,10 @@ endif CHECKFLAGS += -D__aarch64__ +ifeq ($(CONFIG_ARM64_ERRATUM_843419), y) +CFLAGS_MODULE += -mcmodel=large +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 38e704e597f7..c85a02b6cca0 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -177,6 +177,24 @@ ENTRY(ftrace_stub) ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER + /* save return value regs*/ + .macro save_return_regs + sub sp, sp, #64 + stp x0, x1, [sp] + stp x2, x3, [sp, #16] + stp x4, x5, [sp, #32] + stp x6, x7, [sp, #48] + .endm + + /* restore return value regs*/ + .macro restore_return_regs + ldp x0, x1, [sp] + ldp x2, x3, [sp, #16] + ldp x4, x5, [sp, #32] + ldp x6, x7, [sp, #48] + add sp, sp, #64 + .endm + /* * void ftrace_graph_caller(void) * @@ -203,11 +221,11 @@ ENDPROC(ftrace_graph_caller) * only when CONFIG_HAVE_FUNCTION_GRAPH_FP_TEST is enabled. */ ENTRY(return_to_handler) - str x0, [sp, #-16]! + save_return_regs mov x0, x29 // parent's fp bl ftrace_return_to_handler// addr = ftrace_return_to_hander(fp); mov x30, x0 // restore the original return address - ldr x0, [sp], #16 + restore_return_regs ret END(return_to_handler) #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 0a6e4f924df8..2877dd818977 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -327,6 +327,11 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems msr hstr_el2, xzr // Disable CP15 traps to EL2 #endif + /* EL2 debug */ + mrs x0, pmcr_el0 // Disable debug access traps + ubfx x0, x0, #11, #5 // to EL2 and allow access to + msr mdcr_el2, x0 // all PMU counters from EL1 + /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 1eb1cc955139..e366329d96d8 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -330,12 +330,14 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, ovf = reloc_insn_imm(RELOC_OP_PREL, loc, val, 0, 21, AARCH64_INSN_IMM_ADR); break; +#ifndef CONFIG_ARM64_ERRATUM_843419 case R_AARCH64_ADR_PREL_PG_HI21_NC: overflow_check = false; case R_AARCH64_ADR_PREL_PG_HI21: ovf = reloc_insn_imm(RELOC_OP_PAGE, loc, val, 12, 21, AARCH64_INSN_IMM_ADR); break; +#endif case R_AARCH64_ADD_ABS_LO12_NC: case R_AARCH64_LDST8_ABS_LO12_NC: overflow_check = false; diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index b4efc2e38336..15dd021b0025 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -206,14 +206,32 @@ int copy_siginfo_from_user32(siginfo_t *to, compat_siginfo_t __user *from) /* * VFP save/restore code. + * + * We have to be careful with endianness, since the fpsimd context-switch + * code operates on 128-bit (Q) register values whereas the compat ABI + * uses an array of 64-bit (D) registers. Consequently, we need to swap + * the two halves of each Q register when running on a big-endian CPU. */ +union __fpsimd_vreg { + __uint128_t raw; + struct { +#ifdef __AARCH64EB__ + u64 hi; + u64 lo; +#else + u64 lo; + u64 hi; +#endif + }; +}; + static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) { struct fpsimd_state *fpsimd = ¤t->thread.fpsimd_state; compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr, fpexc; - int err = 0; + int i, err = 0; /* * Save the hardware registers to the fpsimd_state structure. @@ -229,10 +247,15 @@ static int compat_preserve_vfp_context(struct compat_vfp_sigframe __user *frame) /* * Now copy the FP registers. Since the registers are packed, * we can copy the prefix we want (V0-V15) as it is. - * FIXME: Won't work if big endian. */ - err |= __copy_to_user(&frame->ufp.fpregs, fpsimd->vregs, - sizeof(frame->ufp.fpregs)); + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg = { + .raw = fpsimd->vregs[i >> 1], + }; + + __put_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __put_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + } /* Create an AArch32 fpscr from the fpsr and the fpcr. */ fpscr = (fpsimd->fpsr & VFP_FPSCR_STAT_MASK) | @@ -257,7 +280,7 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) compat_ulong_t magic = VFP_MAGIC; compat_ulong_t size = VFP_STORAGE_SIZE; compat_ulong_t fpscr; - int err = 0; + int i, err = 0; __get_user_error(magic, &frame->magic, err); __get_user_error(size, &frame->size, err); @@ -267,12 +290,14 @@ static int compat_restore_vfp_context(struct compat_vfp_sigframe __user *frame) if (magic != VFP_MAGIC || size != VFP_STORAGE_SIZE) return -EINVAL; - /* - * Copy the FP registers into the start of the fpsimd_state. - * FIXME: Won't work if big endian. - */ - err |= __copy_from_user(fpsimd.vregs, frame->ufp.fpregs, - sizeof(frame->ufp.fpregs)); + /* Copy the FP registers into the start of the fpsimd_state. */ + for (i = 0; i < ARRAY_SIZE(frame->ufp.fpregs); i += 2) { + union __fpsimd_vreg vreg; + + __get_user_error(vreg.lo, &frame->ufp.fpregs[i], err); + __get_user_error(vreg.hi, &frame->ufp.fpregs[i + 1], err); + fpsimd.vregs[i >> 1] = vreg.raw; + } /* Extract the fpsr and the fpcr from the fpscr */ __get_user_error(fpscr, &frame->ufp.fpscr, err); diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index a767f6a4ce54..566a457d1803 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -843,8 +843,6 @@ mrs x3, cntv_ctl_el0 and x3, x3, #3 str w3, [x0, #VCPU_TIMER_CNTV_CTL] - bic x3, x3, #1 // Clear Enable - msr cntv_ctl_el0, x3 isb @@ -852,6 +850,9 @@ str x3, [x0, #VCPU_TIMER_CNTV_CVAL] 1: + // Disable the virtual timer + msr cntv_ctl_el0, xzr + // Allow physical timer/counter access for the host mrs x2, cnthctl_el2 orr x2, x2, #3 diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 41cb6d3d6075..6094c64b3380 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -279,6 +279,7 @@ retry: * starvation. */ mm_flags &= ~FAULT_FLAG_ALLOW_RETRY; + mm_flags |= FAULT_FLAG_TRIED; goto retry; } } diff --git a/arch/m68k/include/asm/linkage.h b/arch/m68k/include/asm/linkage.h index 5a822bb790f7..066e74f666ae 100644 --- a/arch/m68k/include/asm/linkage.h +++ b/arch/m68k/include/asm/linkage.h @@ -4,4 +4,34 @@ #define __ALIGN .align 4 #define __ALIGN_STR ".align 4" +/* + * Make sure the compiler doesn't do anything stupid with the + * arguments on the stack - they are owned by the *caller*, not + * the callee. This just fools gcc into not spilling into them, + * and keeps it from doing tailcall recursion and/or using the + * stack slots for temporaries, since they are live and "used" + * all the way to the end of the function. + */ +#define asmlinkage_protect(n, ret, args...) \ + __asmlinkage_protect##n(ret, ##args) +#define __asmlinkage_protect_n(ret, args...) \ + __asm__ __volatile__ ("" : "=r" (ret) : "0" (ret), ##args) +#define __asmlinkage_protect0(ret) \ + __asmlinkage_protect_n(ret) +#define __asmlinkage_protect1(ret, arg1) \ + __asmlinkage_protect_n(ret, "m" (arg1)) +#define __asmlinkage_protect2(ret, arg1, arg2) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2)) +#define __asmlinkage_protect3(ret, arg1, arg2, arg3) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3)) +#define __asmlinkage_protect4(ret, arg1, arg2, arg3, arg4) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4)) +#define __asmlinkage_protect5(ret, arg1, arg2, arg3, arg4, arg5) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5)) +#define __asmlinkage_protect6(ret, arg1, arg2, arg3, arg4, arg5, arg6) \ + __asmlinkage_protect_n(ret, "m" (arg1), "m" (arg2), "m" (arg3), \ + "m" (arg4), "m" (arg5), "m" (arg6)) + #endif diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index 33ba3c558fe4..027ad1f24e32 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -95,7 +95,7 @@ static gfp_t massage_gfp_flags(const struct device *dev, gfp_t gfp) else #endif #if defined(CONFIG_ZONE_DMA) && !defined(CONFIG_ZONE_DMA32) - if (dev->coherent_dma_mask < DMA_BIT_MASK(64)) + if (dev->coherent_dma_mask < DMA_BIT_MASK(sizeof(phys_addr_t) * 8)) dma_flag = __GFP_DMA; else #endif diff --git a/arch/parisc/kernel/irq.c b/arch/parisc/kernel/irq.c index cfe056fe7f5c..34f06be569d9 100644 --- a/arch/parisc/kernel/irq.c +++ b/arch/parisc/kernel/irq.c @@ -507,8 +507,8 @@ void do_cpu_irq_mask(struct pt_regs *regs) struct pt_regs *old_regs; unsigned long eirr_val; int irq, cpu = smp_processor_id(); -#ifdef CONFIG_SMP struct irq_desc *desc; +#ifdef CONFIG_SMP cpumask_t dest; #endif @@ -521,8 +521,12 @@ void do_cpu_irq_mask(struct pt_regs *regs) goto set_out; irq = eirr_to_irq(eirr_val); -#ifdef CONFIG_SMP + /* Filter out spurious interrupts, mostly from serial port at bootup */ desc = irq_to_desc(irq); + if (unlikely(!desc->action)) + goto set_out; + +#ifdef CONFIG_SMP cpumask_copy(&dest, desc->irq_data.affinity); if (irqd_is_per_cpu(&desc->irq_data) && !cpu_isset(smp_processor_id(), dest)) { diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S index 7ef22e3387e0..0b8d26d3ba43 100644 --- a/arch/parisc/kernel/syscall.S +++ b/arch/parisc/kernel/syscall.S @@ -821,7 +821,7 @@ cas2_action: /* 64bit CAS */ #ifdef CONFIG_64BIT 19: ldd,ma 0(%sr3,%r26), %r29 - sub,= %r29, %r25, %r0 + sub,*= %r29, %r25, %r0 b,n cas2_end 20: std,ma %r24, 0(%sr3,%r26) copy %r0, %r28 diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h index ae153c40ab7c..daf4add50743 100644 --- a/arch/powerpc/include/asm/pgtable-ppc64.h +++ b/arch/powerpc/include/asm/pgtable-ppc64.h @@ -135,7 +135,19 @@ #define pte_iterate_hashed_end() } while(0) #ifdef CONFIG_PPC_HAS_HASH_64K -#define pte_pagesize_index(mm, addr, pte) get_slice_psize(mm, addr) +/* + * We expect this to be called only for user addresses or kernel virtual + * addresses other than the linear mapping. + */ +#define pte_pagesize_index(mm, addr, pte) \ + ({ \ + unsigned int psize; \ + if (is_kernel_addr(addr)) \ + psize = MMU_PAGE_4K; \ + else \ + psize = get_slice_psize(mm, addr); \ + psize; \ + }) #else #define pte_pagesize_index(mm, addr, pte) MMU_PAGE_4K #endif diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index b390f55b0df1..af37e69b3b74 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -316,6 +316,7 @@ extern void rtas_power_off(void); extern void rtas_halt(void); extern void rtas_os_term(char *str); extern int rtas_get_sensor(int sensor, int index, int *state); +extern int rtas_get_sensor_fast(int sensor, int index, int *state); extern int rtas_get_power_level(int powerdomain, int *level); extern int rtas_set_power_level(int powerdomain, int level, int *setlevel); extern bool rtas_indicator_present(int token, int *maxindex); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index 8b4c857c1421..af0dafab5807 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -584,6 +584,23 @@ int rtas_get_sensor(int sensor, int index, int *state) } EXPORT_SYMBOL(rtas_get_sensor); +int rtas_get_sensor_fast(int sensor, int index, int *state) +{ + int token = rtas_token("get-sensor-state"); + int rc; + + if (token == RTAS_UNKNOWN_SERVICE) + return -ENOENT; + + rc = rtas_call(token, 2, 2, state, sensor, index); + WARN_ON(rc == RTAS_BUSY || (rc >= RTAS_EXTENDED_DELAY_MIN && + rc <= RTAS_EXTENDED_DELAY_MAX)); + + if (rc < 0) + return rtas_error_rc(rc); + return rc; +} + bool rtas_indicator_present(int token, int *maxindex) { int proplen, count, i; diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 5f5e6328c21c..5061c6f676da 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -136,7 +136,6 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, BUG_ON(index >= 4096); vpn = hpt_vpn(ea, vsid, ssize); - hash = hpt_hash(vpn, shift, ssize); hpte_slot_array = get_hpte_slot_array(pmdp); if (psize == MMU_PAGE_4K) { /* @@ -151,6 +150,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, valid = hpte_valid(hpte_slot_array, index); if (valid) { /* update the hpte bits */ + hash = hpt_hash(vpn, shift, ssize); hidx = hpte_hash_index(hpte_slot_array, index); if (hidx & _PTEIDX_SECONDARY) hash = ~hash; @@ -176,6 +176,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, if (!valid) { unsigned long hpte_group; + hash = hpt_hash(vpn, shift, ssize); /* insert new entry */ pa = pmd_pfn(__pmd(old_pmd)) << PAGE_SHIFT; new_pmd |= _PAGE_HASHPTE; diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index 4b20f2c6b3b2..9ff55d59ac76 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -100,6 +100,7 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) struct pci_controller *hose = pci_bus_to_host(pdev->bus); struct pnv_phb *phb = hose->private_data; struct msi_desc *entry; + irq_hw_number_t hwirq; if (WARN_ON(!phb)) return; @@ -107,10 +108,10 @@ static void pnv_teardown_msi_irqs(struct pci_dev *pdev) list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&phb->msi_bmp, - virq_to_hw(entry->irq) - phb->msi_base, 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&phb->msi_bmp, hwirq - phb->msi_base, 1); } } #endif /* CONFIG_PCI_MSI */ diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 5a4d0fc03b03..d263f7bc80fc 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -187,7 +187,8 @@ static irqreturn_t ras_epow_interrupt(int irq, void *dev_id) int state; int critical; - status = rtas_get_sensor(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, &state); + status = rtas_get_sensor_fast(EPOW_SENSOR_TOKEN, EPOW_SENSOR_INDEX, + &state); if (state > 3) critical = 1; /* Time Critical */ diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c index da08ed088157..ea6b3a1c79d8 100644 --- a/arch/powerpc/sysdev/fsl_msi.c +++ b/arch/powerpc/sysdev/fsl_msi.c @@ -129,15 +129,16 @@ static void fsl_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; struct fsl_msi *msi_data; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); msi_data = irq_get_chip_data(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c index 15dccd35fa11..a6add4ae6c5a 100644 --- a/arch/powerpc/sysdev/mpic_pasemi_msi.c +++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c @@ -66,6 +66,7 @@ static struct irq_chip mpic_pasemi_msi_chip = { static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; pr_debug("pasemi_msi_teardown_msi_irqs, pdev %p\n", pdev); @@ -73,10 +74,11 @@ static void pasemi_msi_teardown_msi_irqs(struct pci_dev *pdev) if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), ALLOC_CHUNK); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, + hwirq, ALLOC_CHUNK); } return; diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c index 623d7fba15b4..db35a4073127 100644 --- a/arch/powerpc/sysdev/mpic_u3msi.c +++ b/arch/powerpc/sysdev/mpic_u3msi.c @@ -108,15 +108,16 @@ static u64 find_u4_magic_addr(struct pci_dev *pdev, unsigned int hwirq) static void u3msi_teardown_msi_irqs(struct pci_dev *pdev) { struct msi_desc *entry; + irq_hw_number_t hwirq; list_for_each_entry(entry, &pdev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_mpic->msi_bitmap, hwirq, 1); } return; diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c index 22b5200636e7..85d9c1852d19 100644 --- a/arch/powerpc/sysdev/ppc4xx_msi.c +++ b/arch/powerpc/sysdev/ppc4xx_msi.c @@ -125,16 +125,17 @@ void ppc4xx_teardown_msi_irqs(struct pci_dev *dev) { struct msi_desc *entry; struct ppc4xx_msi *msi_data = &ppc4xx_msi; + irq_hw_number_t hwirq; dev_dbg(&dev->dev, "PCIE-MSI: tearing down msi irqs\n"); list_for_each_entry(entry, &dev->msi_list, list) { if (entry->irq == NO_IRQ) continue; + hwirq = virq_to_hw(entry->irq); irq_set_msi_desc(entry->irq, NULL); - msi_bitmap_free_hwirqs(&msi_data->bitmap, - virq_to_hw(entry->irq), 1); irq_dispose_mapping(entry->irq); + msi_bitmap_free_hwirqs(&msi_data->bitmap, hwirq, 1); } } diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile index f90d1fc6d603..f70b2321071e 100644 --- a/arch/s390/boot/compressed/Makefile +++ b/arch/s390/boot/compressed/Makefile @@ -12,7 +12,7 @@ targets += misc.o piggy.o sizes.h head$(BITS).o KBUILD_CFLAGS := -m$(BITS) -D__KERNEL__ $(LINUX_INCLUDE) -O2 KBUILD_CFLAGS += -DDISABLE_BRANCH_PROFILING -KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks +KBUILD_CFLAGS += $(cflags-y) -fno-delete-null-pointer-checks -msoft-float KBUILD_CFLAGS += $(call cc-option,-mpacked-stack) KBUILD_CFLAGS += $(call cc-option,-ffreestanding) diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 009f5eb11125..14c3e80e003a 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -48,6 +48,19 @@ typedef struct struct ucontext32 uc; } rt_sigframe32; +static inline void sigset_to_sigset32(unsigned long *set64, + compat_sigset_word *set32) +{ + set32[0] = (compat_sigset_word) set64[0]; + set32[1] = (compat_sigset_word)(set64[0] >> 32); +} + +static inline void sigset32_to_sigset(compat_sigset_word *set32, + unsigned long *set64) +{ + set64[0] = (unsigned long) set32[0] | ((unsigned long) set32[1] << 32); +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err; @@ -303,10 +316,12 @@ COMPAT_SYSCALL_DEFINE0(sigreturn) { struct pt_regs *regs = task_pt_regs(current); sigframe32 __user *frame = (sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) + if (__copy_from_user(&cset.sig, &frame->sc.oldmask, _SIGMASK_COPY_SIZE32)) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (restore_sigregs32(regs, &frame->sregs)) goto badframe; @@ -323,10 +338,12 @@ COMPAT_SYSCALL_DEFINE0(rt_sigreturn) { struct pt_regs *regs = task_pt_regs(current); rt_sigframe32 __user *frame = (rt_sigframe32 __user *)regs->gprs[15]; + compat_sigset_t cset; sigset_t set; - if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) + if (__copy_from_user(&cset, &frame->uc.uc_sigmask, sizeof(cset))) goto badframe; + sigset32_to_sigset(cset.sig, set.sig); set_current_blocked(&set); if (compat_restore_altstack(&frame->uc.uc_stack)) goto badframe; @@ -407,7 +424,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, return -EFAULT; /* Create struct sigcontext32 on the signal stack */ - memcpy(&sc.oldmask, &set->sig, _SIGMASK_COPY_SIZE32); + sigset_to_sigset32(set->sig, sc.oldmask); sc.sregs = (__u32)(unsigned long __force) &frame->sregs; if (__copy_to_user(&frame->sc, &sc, sizeof(frame->sc))) return -EFAULT; @@ -468,6 +485,7 @@ static int setup_frame32(struct ksignal *ksig, sigset_t *set, static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { + compat_sigset_t cset; rt_sigframe32 __user *frame; unsigned long restorer; size_t frame_size; @@ -515,11 +533,12 @@ static int setup_rt_frame32(struct ksignal *ksig, sigset_t *set, store_sigregs(); /* Create ucontext on the signal stack. */ + sigset_to_sigset32(set->sig, cset.sig); if (__put_user(uc_flags, &frame->uc.uc_flags) || __put_user(0, &frame->uc.uc_link) || __compat_save_altstack(&frame->uc.uc_stack, regs->gprs[15]) || save_sigregs32(regs, &frame->uc.uc_mcontext) || - __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)) || + __copy_to_user(&frame->uc.uc_sigmask, &cset, sizeof(cset)) || save_sigregs_ext32(regs, &frame->uc.uc_mcontext_ext)) return -EFAULT; diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 8253d85aa165..de1d72e3ec59 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -291,6 +291,7 @@ static struct ahash_alg ghash_async_alg = { .cra_name = "ghash", .cra_driver_name = "ghash-clmulni", .cra_priority = 400, + .cra_ctxsize = sizeof(struct ghash_async_ctx), .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC, .cra_blocksize = GHASH_BLOCK_SIZE, .cra_type = &crypto_ahash_type, diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index ddd8d13a010f..26d5e05a7def 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -852,7 +852,8 @@ extern unsigned long thread_saved_pc(struct task_struct *tsk); #define task_pt_regs(task) \ ({ \ struct pt_regs *__regs__; \ - __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task))-8); \ + __regs__ = (struct pt_regs *)(KSTK_TOP(task_stack_page(task)) - \ + TOP_OF_KERNEL_STACK_PADDING); \ __regs__ - 1; \ }) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 547e344a6dc6..c4d96943e666 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -13,6 +13,33 @@ #include <asm/types.h> /* + * TOP_OF_KERNEL_STACK_PADDING is a number of unused bytes that we + * reserve at the top of the kernel stack. We do it because of a nasty + * 32-bit corner case. On x86_32, the hardware stack frame is + * variable-length. Except for vm86 mode, struct pt_regs assumes a + * maximum-length frame. If we enter from CPL 0, the top 8 bytes of + * pt_regs don't actually exist. Ordinarily this doesn't matter, but it + * does in at least one case: + * + * If we take an NMI early enough in SYSENTER, then we can end up with + * pt_regs that extends above sp0. On the way out, in the espfix code, + * we can read the saved SS value, but that value will be above sp0. + * Without this offset, that can result in a page fault. (We are + * careful that, in this case, the value we read doesn't matter.) + * + * In vm86 mode, the hardware frame is much longer still, but we neither + * access the extra members from NMI context, nor do we write such a + * frame at sp0 at all. + * + * x86_64 has a fixed-length stack frame. + */ +#ifdef CONFIG_X86_32 +# define TOP_OF_KERNEL_STACK_PADDING 8 +#else +# define TOP_OF_KERNEL_STACK_PADDING 0 +#endif + +/* * low level task data that entry.S needs immediate access to * - this struct should fit entirely inside of one cache line * - this struct shares the supervisor stack pages diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index ba6cc041edb1..f7eef03fd4b3 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -366,6 +366,13 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) apic_write(APIC_LVTT, lvtt_value); if (lvtt_value & APIC_LVT_TIMER_TSCDEADLINE) { + /* + * See Intel SDM: TSC-Deadline Mode chapter. In xAPIC mode, + * writing to the APIC LVTT and TSC_DEADLINE MSR isn't serialized. + * According to Intel, MFENCE can do the serialization here. + */ + asm volatile("mfence" : : : "memory"); + printk_once(KERN_DEBUG "TSC deadline timer enabled\n"); return; } diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index e757fcbe90db..88635b301694 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1405,6 +1405,12 @@ void cpu_init(void) wait_for_master_cpu(cpu); + /* + * Initialize the CR4 shadow before doing anything that could + * try to read it. + */ + cr4_init_shadow(); + show_ucode_info_early(); printk(KERN_INFO "Initializing CPU#%d\n", cpu); diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index f5ab56d14287..3af40315a127 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -183,10 +183,9 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(unsigned long start_pfn, - unsigned long nr_pfn, void *arg) +static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) { - int *nr_ranges = arg; + unsigned int *nr_ranges = arg; (*nr_ranges)++; return 0; @@ -212,7 +211,7 @@ static void fill_up_crash_elf_data(struct crash_elf_data *ced, ced->image = image; - walk_system_ram_range(0, -1, &nr_ranges, + walk_system_ram_res(0, -1, &nr_ranges, get_nr_ram_ranges_callback); ced->max_nr_ranges = nr_ranges; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 3dddb89ba320..fe611c4ae3ff 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -398,7 +398,7 @@ sysenter_past_esp: * A tiny bit of offset fixup is necessary - 4*4 means the 4 words * pushed above; +8 corresponds to copy_thread's esp0 setting. */ - pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+8+4*4)(%esp) + pushl_cfi ((TI_sysenter_return)-THREAD_SIZE+TOP_OF_KERNEL_STACK_PADDING+4*4)(%esp) CFI_REL_OFFSET eip, 0 pushl_cfi %eax diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fad5cd9d7c4b..a3255ca219ea 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1428,7 +1428,18 @@ END(error_exit) /* runs on exception stack */ ENTRY(nmi) INTR_FRAME + /* + * Fix up the exception frame if we're on Xen. + * PARAVIRT_ADJUST_EXCEPTION_FRAME is guaranteed to push at most + * one value to the stack on native, so it may clobber the rdx + * scratch slot, but it won't clobber any of the important + * slots past it. + * + * Xen is a different story, because the Xen frame itself overlaps + * the "NMI executing" variable. + */ PARAVIRT_ADJUST_EXCEPTION_FRAME + /* * We allow breakpoints in NMIs. If a breakpoint occurs, then * the iretq it performs will take us out of NMI context. @@ -1446,11 +1457,12 @@ ENTRY(nmi) * If the variable is not set and the stack is not the NMI * stack then: * o Set the special variable on the stack - * o Copy the interrupt frame into a "saved" location on the stack - * o Copy the interrupt frame into a "copy" location on the stack + * o Copy the interrupt frame into an "outermost" location on the + * stack + * o Copy the interrupt frame into an "iret" location on the stack * o Continue processing the NMI * If the variable is set or the previous stack is the NMI stack: - * o Modify the "copy" location to jump to the repeate_nmi + * o Modify the "iret" location to jump to the repeat_nmi * o return back to the first NMI * * Now on exit of the first NMI, we first clear the stack variable @@ -1479,9 +1491,11 @@ ENTRY(nmi) * we don't want to enable interrupts, because then we'll end * up in an awkward situation in which IRQs are on but NMIs * are off. + * + * We also must not push anything to the stack before switching + * stacks lest we corrupt the "NMI executing" variable. */ - - SWAPGS + SWAPGS_UNSAFE_STACK cld movq %rsp, %rdx movq PER_CPU_VAR(kernel_stack), %rsp @@ -1530,38 +1544,101 @@ ENTRY(nmi) .Lnmi_from_kernel: /* - * Check the special variable on the stack to see if NMIs are - * executing. + * Here's what our stack frame will look like: + * +---------------------------------------------------------+ + * | original SS | + * | original Return RSP | + * | original RFLAGS | + * | original CS | + * | original RIP | + * +---------------------------------------------------------+ + * | temp storage for rdx | + * +---------------------------------------------------------+ + * | "NMI executing" variable | + * +---------------------------------------------------------+ + * | iret SS } Copied from "outermost" frame | + * | iret Return RSP } on each loop iteration; overwritten | + * | iret RFLAGS } by a nested NMI to force another | + * | iret CS } iteration if needed. | + * | iret RIP } | + * +---------------------------------------------------------+ + * | outermost SS } initialized in first_nmi; | + * | outermost Return RSP } will not be changed before | + * | outermost RFLAGS } NMI processing is done. | + * | outermost CS } Copied to "iret" frame on each | + * | outermost RIP } iteration. | + * +---------------------------------------------------------+ + * | pt_regs | + * +---------------------------------------------------------+ + * + * The "original" frame is used by hardware. Before re-enabling + * NMIs, we need to be done with it, and we need to leave enough + * space for the asm code here. + * + * We return by executing IRET while RSP points to the "iret" frame. + * That will either return for real or it will loop back into NMI + * processing. + * + * The "outermost" frame is copied to the "iret" frame on each + * iteration of the loop, so each iteration starts with the "iret" + * frame pointing to the final return target. + */ + + /* + * Determine whether we're a nested NMI. + * + * If we interrupted kernel code between repeat_nmi and + * end_repeat_nmi, then we are a nested NMI. We must not + * modify the "iret" frame because it's being written by + * the outer NMI. That's okay; the outer NMI handler is + * about to about to call do_nmi anyway, so we can just + * resume the outer NMI. + */ + movq $repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja 1f + movq $end_repeat_nmi, %rdx + cmpq 8(%rsp), %rdx + ja nested_nmi_out +1: + + /* + * Now check "NMI executing". If it's set, then we're nested. + * This will not detect if we interrupted an outer NMI just + * before IRET. */ cmpl $1, -8(%rsp) je nested_nmi /* - * Now test if the previous stack was an NMI stack. - * We need the double check. We check the NMI stack to satisfy the - * race when the first NMI clears the variable before returning. - * We check the variable because the first NMI could be in a - * breakpoint routine using a breakpoint stack. + * Now test if the previous stack was an NMI stack. This covers + * the case where we interrupt an outer NMI after it clears + * "NMI executing" but before IRET. We need to be careful, though: + * there is one case in which RSP could point to the NMI stack + * despite there being no NMI active: naughty userspace controls + * RSP at the very beginning of the SYSCALL targets. We can + * pull a fast one on naughty userspace, though: we program + * SYSCALL to mask DF, so userspace cannot cause DF to be set + * if it controls the kernel's RSP. We set DF before we clear + * "NMI executing". */ lea 6*8(%rsp), %rdx test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi + + /* Ah, it is within the NMI stack. */ + + testb $(X86_EFLAGS_DF >> 8), (3*8 + 1)(%rsp) + jz first_nmi /* RSP was user controlled. */ + + /* This is a nested NMI. */ + CFI_REMEMBER_STATE nested_nmi: /* - * Do nothing if we interrupted the fixup in repeat_nmi. - * It's about to repeat the NMI handler, so we are fine - * with ignoring this one. + * Modify the "iret" frame to point to repeat_nmi, forcing another + * iteration of NMI handling. */ - movq $repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja 1f - movq $end_repeat_nmi, %rdx - cmpq 8(%rsp), %rdx - ja nested_nmi_out - -1: - /* Set up the interrupted NMIs stack to jump to repeat_nmi */ leaq -1*8(%rsp), %rdx movq %rdx, %rsp CFI_ADJUST_CFA_OFFSET 1*8 @@ -1580,60 +1657,23 @@ nested_nmi_out: popq_cfi %rdx CFI_RESTORE rdx - /* No need to check faults here */ + /* We are returning to kernel mode, so this cannot result in a fault. */ INTERRUPT_RETURN CFI_RESTORE_STATE first_nmi: - /* - * Because nested NMIs will use the pushed location that we - * stored in rdx, we must keep that space available. - * Here's what our stack frame will look like: - * +-------------------------+ - * | original SS | - * | original Return RSP | - * | original RFLAGS | - * | original CS | - * | original RIP | - * +-------------------------+ - * | temp storage for rdx | - * +-------------------------+ - * | NMI executing variable | - * +-------------------------+ - * | copied SS | - * | copied Return RSP | - * | copied RFLAGS | - * | copied CS | - * | copied RIP | - * +-------------------------+ - * | Saved SS | - * | Saved Return RSP | - * | Saved RFLAGS | - * | Saved CS | - * | Saved RIP | - * +-------------------------+ - * | pt_regs | - * +-------------------------+ - * - * The saved stack frame is used to fix up the copied stack frame - * that a nested NMI may change to make the interrupted NMI iret jump - * to the repeat_nmi. The original stack frame and the temp storage - * is also used by nested NMIs and can not be trusted on exit. - */ - /* Do not pop rdx, nested NMIs will corrupt that part of the stack */ + /* Restore rdx. */ movq (%rsp), %rdx CFI_RESTORE rdx - /* Set the NMI executing variable on the stack. */ + /* Set "NMI executing" on the stack. */ pushq_cfi $1 - /* - * Leave room for the "copied" frame - */ + /* Leave room for the "iret" frame */ subq $(5*8), %rsp CFI_ADJUST_CFA_OFFSET 5*8 - /* Copy the stack frame to the Saved frame */ + /* Copy the "original" frame to the "outermost" frame */ .rept 5 pushq_cfi 11*8(%rsp) .endr @@ -1641,6 +1681,7 @@ first_nmi: /* Everything up to here is safe from nested NMIs */ +repeat_nmi: /* * If there was a nested NMI, the first NMI's iret will return * here. But NMIs are still enabled and we can take another @@ -1649,16 +1690,21 @@ first_nmi: * it will just return, as we are about to repeat an NMI anyway. * This makes it safe to copy to the stack frame that a nested * NMI will update. - */ -repeat_nmi: - /* - * Update the stack variable to say we are still in NMI (the update - * is benign for the non-repeat case, where 1 was pushed just above - * to this very stack slot). + * + * RSP is pointing to "outermost RIP". gsbase is unknown, but, if + * we're repeating an NMI, gsbase has the same value that it had on + * the first iteration. paranoid_entry will load the kernel + * gsbase if needed before we call do_nmi. + * + * Set "NMI executing" in case we came back here via IRET. */ movq $1, 10*8(%rsp) - /* Make another copy, this one may be modified by nested NMIs */ + /* + * Copy the "outermost" frame to the "iret" frame. NMIs that nest + * here must not modify the "iret" frame while we're writing to + * it or it will end up containing garbage. + */ addq $(10*8), %rsp CFI_ADJUST_CFA_OFFSET -10*8 .rept 5 @@ -1669,9 +1715,9 @@ repeat_nmi: end_repeat_nmi: /* - * Everything below this point can be preempted by a nested - * NMI if the first NMI took an exception and reset our iret stack - * so that we repeat another NMI. + * Everything below this point can be preempted by a nested NMI. + * If this happens, then the inner NMI will change the "iret" + * frame to point back to repeat_nmi. */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $ORIG_RAX-R15, %rsp @@ -1699,9 +1745,23 @@ nmi_restore: /* Pop the extra iret frame at once */ RESTORE_ALL 6*8 - /* Clear the NMI executing stack variable */ - movq $0, 5*8(%rsp) - jmp irq_return + /* + * Clear "NMI executing". Set DF first so that we can easily + * distinguish the remaining code between here and IRET from + * the SYSCALL entry and exit paths. On a native kernel, we + * could just inspect RIP, but, on paravirt kernels, + * INTERRUPT_RETURN can translate into a jump into a + * hypercall page. + */ + std + movq $0, 5*8(%rsp) /* clear "NMI executing" */ + + /* + * INTERRUPT_RETURN reads the "iret" frame and exits the NMI + * stack in a single instruction. We are returning to kernel + * mode, so this cannot result in a fault. + */ + INTERRUPT_RETURN CFI_ENDPROC END(nmi) diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 5c5ec7d28d9b..a701b49e8c87 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -408,8 +408,8 @@ static void default_do_nmi(struct pt_regs *regs) NOKPROBE_SYMBOL(default_do_nmi); /* - * NMIs can hit breakpoints which will cause it to lose its NMI context - * with the CPU when the breakpoint or page fault does an IRET. + * NMIs can page fault or hit breakpoints which will cause it to lose + * its NMI context with the CPU when the breakpoint or page fault does an IRET. * * As a result, NMIs can nest if NMIs get unmasked due an IRET during * NMI processing. On x86_64, the asm glue protects us from nested NMIs diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 548d25f00c90..8d12f0546dfc 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -41,10 +41,18 @@ #include <asm/timer.h> #include <asm/special_insns.h> -/* nop stub */ -void _paravirt_nop(void) -{ -} +/* + * nop stub, which must not clobber anything *including the stack* to + * avoid confusing the entry prologues. + */ +extern void _paravirt_nop(void); +asm (".pushsection .entry.text, \"ax\"\n" + ".global _paravirt_nop\n" + "_paravirt_nop:\n\t" + "ret\n\t" + ".size _paravirt_nop, . - _paravirt_nop\n\t" + ".type _paravirt_nop, @function\n\t" + ".popsection"); /* identity function, which can be inlined */ u32 _paravirt_ident_32(u32 x) diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 63a4b5092203..54cfd5ebd96c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -476,27 +476,59 @@ void set_personality_ia32(bool x32) } EXPORT_SYMBOL_GPL(set_personality_ia32); +/* + * Called from fs/proc with a reference on @p to find the function + * which called into schedule(). This needs to be done carefully + * because the task might wake up and we might look at a stack + * changing under us. + */ unsigned long get_wchan(struct task_struct *p) { - unsigned long stack; - u64 fp, ip; + unsigned long start, bottom, top, sp, fp, ip; int count = 0; if (!p || p == current || p->state == TASK_RUNNING) return 0; - stack = (unsigned long)task_stack_page(p); - if (p->thread.sp < stack || p->thread.sp >= stack+THREAD_SIZE) + + start = (unsigned long)task_stack_page(p); + if (!start) + return 0; + + /* + * Layout of the stack page: + * + * ----------- topmax = start + THREAD_SIZE - sizeof(unsigned long) + * PADDING + * ----------- top = topmax - TOP_OF_KERNEL_STACK_PADDING + * stack + * ----------- bottom = start + sizeof(thread_info) + * thread_info + * ----------- start + * + * The tasks stack pointer points at the location where the + * framepointer is stored. The data on the stack is: + * ... IP FP ... IP FP + * + * We need to read FP and IP, so we need to adjust the upper + * bound by another unsigned long. + */ + top = start + THREAD_SIZE - TOP_OF_KERNEL_STACK_PADDING; + top -= 2 * sizeof(unsigned long); + bottom = start + sizeof(struct thread_info); + + sp = READ_ONCE(p->thread.sp); + if (sp < bottom || sp > top) return 0; - fp = *(u64 *)(p->thread.sp); + + fp = READ_ONCE(*(unsigned long *)sp); do { - if (fp < (unsigned long)stack || - fp >= (unsigned long)stack+THREAD_SIZE) + if (fp < bottom || fp > top) return 0; - ip = *(u64 *)(fp+8); + ip = READ_ONCE(*(unsigned long *)(fp + sizeof(unsigned long))); if (!in_sched_functions(ip)) return ip; - fp = *(u64 *)fp; - } while (count++ < 16); + fp = READ_ONCE(*(unsigned long *)fp); + } while (count++ < 16 && p->state != TASK_RUNNING); return 0; } diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 505449700e0c..21187ebee7d0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -21,6 +21,7 @@ #include <asm/hypervisor.h> #include <asm/nmi.h> #include <asm/x86_init.h> +#include <asm/geode.h> unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ EXPORT_SYMBOL(cpu_khz); @@ -1004,15 +1005,17 @@ EXPORT_SYMBOL_GPL(mark_tsc_unstable); static void __init check_system_tsc_reliable(void) { -#ifdef CONFIG_MGEODE_LX - /* RTSC counts during suspend */ +#if defined(CONFIG_MGEODEGX1) || defined(CONFIG_MGEODE_LX) || defined(CONFIG_X86_GENERIC) + if (is_geode_lx()) { + /* RTSC counts during suspend */ #define RTSC_SUSP 0x100 - unsigned long res_low, res_high; + unsigned long res_low, res_high; - rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); - /* Geode_LX - the OLPC CPU has a very reliable TSC */ - if (res_low & RTSC_SUSP) - tsc_clocksource_reliable = 1; + rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high); + /* Geode_LX - the OLPC CPU has a very reliable TSC */ + if (res_low & RTSC_SUSP) + tsc_clocksource_reliable = 1; + } #endif if (boot_cpu_has(X86_FEATURE_TSC_RELIABLE)) tsc_clocksource_reliable = 1; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f696dedb0fa7..23875c26fb34 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -372,12 +372,6 @@ static u64 __get_spte_lockless(u64 *sptep) { return ACCESS_ONCE(*sptep); } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - /* It is valid if the spte is zapped. */ - return spte == 0ull; -} #else union split_spte { struct { @@ -493,23 +487,6 @@ retry: return spte.spte; } - -static bool __check_direct_spte_mmio_pf(u64 spte) -{ - union split_spte sspte = (union split_spte)spte; - u32 high_mmio_mask = shadow_mmio_mask >> 32; - - /* It is valid if the spte is zapped. */ - if (spte == 0ull) - return true; - - /* It is valid if the spte is being zapped. */ - if (sspte.spte_low == 0ull && - (sspte.spte_high & high_mmio_mask) == high_mmio_mask) - return true; - - return false; -} #endif static bool spte_is_locklessly_modifiable(u64 spte) @@ -3230,21 +3207,6 @@ static bool quickly_check_mmio_pf(struct kvm_vcpu *vcpu, u64 addr, bool direct) return vcpu_match_mmio_gva(vcpu, addr); } - -/* - * On direct hosts, the last spte is only allows two states - * for mmio page fault: - * - It is the mmio spte - * - It is zapped or it is being zapped. - * - * This function completely checks the spte when the last spte - * is not the mmio spte. - */ -static bool check_direct_spte_mmio_pf(u64 spte) -{ - return __check_direct_spte_mmio_pf(spte); -} - static u64 walk_shadow_page_get_mmio_spte(struct kvm_vcpu *vcpu, u64 addr) { struct kvm_shadow_walk_iterator iterator; @@ -3287,13 +3249,6 @@ int handle_mmio_page_fault_common(struct kvm_vcpu *vcpu, u64 addr, bool direct) } /* - * It's ok if the gva is remapped by other cpus on shadow guest, - * it's a BUG if the gfn is not a mmio page. - */ - if (direct && !check_direct_spte_mmio_pf(spte)) - return RET_MMIO_PF_BUG; - - /* * If the page table is zapped by other cpus, let CPU fault again on * the address. */ diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index b83bff87408f..f98baebfa9a7 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -512,7 +512,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu) struct vcpu_svm *svm = to_svm(vcpu); if (svm->vmcb->control.next_rip != 0) { - WARN_ON(!static_cpu_has(X86_FEATURE_NRIPS)); + WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS)); svm->next_rip = svm->vmcb->control.next_rip; } diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index c8140e12816a..c23ab1ee3a9a 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -137,6 +137,7 @@ page_table_range_init_count(unsigned long start, unsigned long end) vaddr = start; pgd_idx = pgd_index(vaddr); + pmd_idx = pmd_index(vaddr); for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd_idx++) { for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 4e5dfec750fc..fa77995b62a4 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1144,7 +1144,7 @@ void mark_rodata_ro(void) * has been zapped already via cleanup_highmem(). */ all_end = roundup((unsigned long)_brk_end, PMD_SIZE); - set_memory_nx(rodata_start, (all_end - rodata_start) >> PAGE_SHIFT); + set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index dbc8627a5cdf..6d6080f3fa35 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -670,6 +670,70 @@ out: } /* + * Iterate the EFI memory map in reverse order because the regions + * will be mapped top-down. The end result is the same as if we had + * mapped things forward, but doesn't require us to change the + * existing implementation of efi_map_region(). + */ +static inline void *efi_map_next_entry_reverse(void *entry) +{ + /* Initial call */ + if (!entry) + return memmap.map_end - memmap.desc_size; + + entry -= memmap.desc_size; + if (entry < memmap.map) + return NULL; + + return entry; +} + +/* + * efi_map_next_entry - Return the next EFI memory map descriptor + * @entry: Previous EFI memory map descriptor + * + * This is a helper function to iterate over the EFI memory map, which + * we do in different orders depending on the current configuration. + * + * To begin traversing the memory map @entry must be %NULL. + * + * Returns %NULL when we reach the end of the memory map. + */ +static void *efi_map_next_entry(void *entry) +{ + if (!efi_enabled(EFI_OLD_MEMMAP) && efi_enabled(EFI_64BIT)) { + /* + * Starting in UEFI v2.5 the EFI_PROPERTIES_TABLE + * config table feature requires us to map all entries + * in the same order as they appear in the EFI memory + * map. That is to say, entry N must have a lower + * virtual address than entry N+1. This is because the + * firmware toolchain leaves relative references in + * the code/data sections, which are split and become + * separate EFI memory regions. Mapping things + * out-of-order leads to the firmware accessing + * unmapped addresses. + * + * Since we need to map things this way whether or not + * the kernel actually makes use of + * EFI_PROPERTIES_TABLE, let's just switch to this + * scheme by default for 64-bit. + */ + return efi_map_next_entry_reverse(entry); + } + + /* Initial call */ + if (!entry) + return memmap.map; + + entry += memmap.desc_size; + if (entry >= memmap.map_end) + return NULL; + + return entry; +} + +/* * Map the efi memory ranges of the runtime services and update new_mmap with * virtual addresses. */ @@ -679,7 +743,8 @@ static void * __init efi_map_regions(int *count, int *pg_shift) unsigned long left = 0; efi_memory_desc_t *md; - for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { + p = NULL; + while ((p = efi_map_next_entry(p))) { md = p; if (!(md->attribute & EFI_MEMORY_RUNTIME)) { #ifdef CONFIG_X86_64 diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d8d81d1aa1d5..7e365d231a93 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -33,6 +33,10 @@ #include <linux/memblock.h> #include <linux/edd.h> +#ifdef CONFIG_KEXEC_CORE +#include <linux/kexec.h> +#endif + #include <xen/xen.h> #include <xen/events.h> #include <xen/interface/xen.h> @@ -1859,6 +1863,21 @@ static struct notifier_block xen_hvm_cpu_notifier = { .notifier_call = xen_hvm_cpu_notify, }; +#ifdef CONFIG_KEXEC_CORE +static void xen_hvm_shutdown(void) +{ + native_machine_shutdown(); + if (kexec_in_progress) + xen_reboot(SHUTDOWN_soft_reset); +} + +static void xen_hvm_crash_shutdown(struct pt_regs *regs) +{ + native_machine_crash_shutdown(regs); + xen_reboot(SHUTDOWN_soft_reset); +} +#endif + static void __init xen_hvm_guest_init(void) { init_hvm_pv_info(); @@ -1875,6 +1894,10 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); +#ifdef CONFIG_KEXEC_CORE + machine_ops.shutdown = xen_hvm_shutdown; + machine_ops.crash_shutdown = xen_hvm_crash_shutdown; +#endif } static bool xen_nopv = false; diff --git a/arch/xtensa/include/asm/traps.h b/arch/xtensa/include/asm/traps.h index 677bfcf4ee5d..28f33a8b7f5f 100644 --- a/arch/xtensa/include/asm/traps.h +++ b/arch/xtensa/include/asm/traps.h @@ -25,30 +25,39 @@ static inline void spill_registers(void) { #if XCHAL_NUM_AREGS > 16 __asm__ __volatile__ ( - " call12 1f\n" + " call8 1f\n" " _j 2f\n" " retw\n" " .align 4\n" "1:\n" +#if XCHAL_NUM_AREGS == 32 + " _entry a1, 32\n" + " addi a8, a0, 3\n" + " _entry a1, 16\n" + " mov a12, a12\n" + " retw\n" +#else " _entry a1, 48\n" - " addi a12, a0, 3\n" -#if XCHAL_NUM_AREGS > 32 - " .rept (" __stringify(XCHAL_NUM_AREGS) " - 32) / 12\n" + " call12 1f\n" + " retw\n" + " .align 4\n" + "1:\n" + " .rept (" __stringify(XCHAL_NUM_AREGS) " - 16) / 12\n" " _entry a1, 48\n" " mov a12, a0\n" " .endr\n" -#endif - " _entry a1, 48\n" + " _entry a1, 16\n" #if XCHAL_NUM_AREGS % 12 == 0 - " mov a8, a8\n" -#elif XCHAL_NUM_AREGS % 12 == 4 " mov a12, a12\n" -#elif XCHAL_NUM_AREGS % 12 == 8 +#elif XCHAL_NUM_AREGS % 12 == 4 " mov a4, a4\n" +#elif XCHAL_NUM_AREGS % 12 == 8 + " mov a8, a8\n" #endif " retw\n" +#endif "2:\n" - : : : "a12", "a13", "memory"); + : : : "a8", "a9", "memory"); #else __asm__ __volatile__ ( " mov a12, a12\n" diff --git a/arch/xtensa/kernel/entry.S b/arch/xtensa/kernel/entry.S index 82bbfa5a05b3..a2a902140c4e 100644 --- a/arch/xtensa/kernel/entry.S +++ b/arch/xtensa/kernel/entry.S @@ -568,12 +568,13 @@ user_exception_exit: * (if we have restored WSBITS-1 frames). */ +2: #if XCHAL_HAVE_THREADPTR l32i a3, a1, PT_THREADPTR wur a3, threadptr #endif -2: j common_exception_exit + j common_exception_exit /* This is the kernel exception exit. * We avoided to do a MOVSP when we entered the exception, but we @@ -1820,7 +1821,7 @@ ENDPROC(system_call) mov a12, a0 .endr #endif - _entry a1, 48 + _entry a1, 16 #if XCHAL_NUM_AREGS % 12 == 0 mov a8, a8 #elif XCHAL_NUM_AREGS % 12 == 4 @@ -1844,7 +1845,7 @@ ENDPROC(system_call) ENTRY(_switch_to) - entry a1, 16 + entry a1, 48 mov a11, a3 # and 'next' (a3) |