aboutsummaryrefslogtreecommitdiff
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/alpha/include/asm/types.h2
-rw-r--r--arch/alpha/include/uapi/asm/types.h12
-rw-r--r--arch/arc/kernel/entry.S6
-rw-r--r--arch/arc/mm/tlb.c3
-rw-r--r--arch/arm/Kconfig-nommu3
-rw-r--r--arch/arm/boot/dts/am335x-chilisom.dtsi8
-rw-r--r--arch/arm/boot/dts/armada-375.dtsi4
-rw-r--r--arch/arm/boot/dts/armada-38x.dtsi4
-rw-r--r--arch/arm/boot/dts/armada-39x.dtsi4
-rw-r--r--arch/arm/boot/dts/bcm953012k.dts2
-rw-r--r--arch/arm/boot/dts/exynos4412-odroid-common.dtsi4
-rw-r--r--arch/arm/boot/dts/exynos4412-odroidu3.dts5
-rw-r--r--arch/arm/boot/dts/exynos4412-odroidx2.dts1
-rw-r--r--arch/arm/boot/dts/exynos4412-prime.dtsi41
-rw-r--r--arch/arm/boot/dts/exynos4412.dtsi2
-rw-r--r--arch/arm/boot/dts/mt2701.dtsi36
-rw-r--r--arch/arm/boot/dts/r8a7790.dtsi12
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm/include/asm/unaligned.h27
-rw-r--r--arch/arm/kvm/emulate.c6
-rw-r--r--arch/arm/kvm/hyp/Makefile2
-rw-r--r--arch/arm/kvm/mmu.c2
-rw-r--r--arch/arm/mach-at91/pm.c2
-rw-r--r--arch/arm/mach-bcm/bcm_kona_smc.c2
-rw-r--r--arch/arm/mach-cns3xxx/core.c2
-rw-r--r--arch/arm/mach-omap2/prm_common.c2
-rw-r--r--arch/arm/mach-omap2/vc.c2
-rw-r--r--arch/arm/mach-spear/time.c2
-rw-r--r--arch/arm/mm/fault.c5
-rw-r--r--arch/arm/xen/mm.c1
-rw-r--r--arch/arm64/boot/dts/marvell/armada-37xx.dtsi1
-rw-r--r--arch/arm64/kernel/head.S1
-rw-r--r--arch/arm64/kernel/traps.c2
-rw-r--r--arch/arm64/kvm/hyp/Makefile2
-rw-r--r--arch/arm64/kvm/inject_fault.c16
-rw-r--r--arch/arm64/mm/fault.c2
-rw-r--r--arch/mips/ath79/clock.c7
-rw-r--r--arch/mips/include/asm/irq.h15
-rw-r--r--arch/mips/kernel/asm-offsets.c1
-rw-r--r--arch/mips/kernel/cps-vec.S2
-rw-r--r--arch/mips/kernel/genex.S8
-rw-r--r--arch/mips/kernel/process.c56
-rw-r--r--arch/mips/kernel/vmlinux.lds.S2
-rw-r--r--arch/mips/lantiq/xway/sysctrl.c4
-rw-r--r--arch/mips/math-emu/cp1emu.c2
-rw-r--r--arch/mips/math-emu/dp_fmax.c84
-rw-r--r--arch/mips/math-emu/dp_fmin.c86
-rw-r--r--arch/mips/math-emu/dp_maddf.c245
-rw-r--r--arch/mips/math-emu/ieee754int.h4
-rw-r--r--arch/mips/math-emu/ieee754sp.h4
-rw-r--r--arch/mips/math-emu/sp_fmax.c84
-rw-r--r--arch/mips/math-emu/sp_fmin.c86
-rw-r--r--arch/mips/math-emu/sp_maddf.c226
-rw-r--r--arch/mips/ralink/mt7620.c18
-rw-r--r--arch/mips/ralink/rt3883.c2
-rw-r--r--arch/parisc/kernel/perf.c94
-rw-r--r--arch/parisc/kernel/syscall.S6
-rw-r--r--arch/powerpc/kernel/align.c119
-rw-r--r--arch/powerpc/kernel/entry_64.S20
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S24
-rw-r--r--arch/powerpc/kernel/ptrace.c2
-rw-r--r--arch/powerpc/kernel/signal_64.c13
-rw-r--r--arch/powerpc/kvm/book3s_64_vio.c57
-rw-r--r--arch/powerpc/kvm/powerpc.c3
-rw-r--r--arch/powerpc/mm/init_64.c33
-rw-r--r--arch/powerpc/perf/isa207-common.h4
-rw-r--r--arch/powerpc/perf/power9-pmu.c2
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c4
-rw-r--r--arch/s390/crypto/aes_s390.c7
-rw-r--r--arch/s390/crypto/prng.c40
-rw-r--r--arch/s390/include/asm/mmu.h2
-rw-r--r--arch/s390/include/asm/mmu_context.h8
-rw-r--r--arch/s390/include/asm/pgtable.h6
-rw-r--r--arch/s390/include/asm/tlbflush.h30
-rw-r--r--arch/s390/mm/gmap.c39
-rw-r--r--arch/s390/mm/gup.c7
-rw-r--r--arch/sparc/include/asm/setup.h5
-rw-r--r--arch/sparc/kernel/smp_64.c6
-rw-r--r--arch/x86/include/asm/alternative-asm.h4
-rw-r--r--arch/x86/include/asm/alternative.h6
-rw-r--r--arch/x86/include/asm/elf.h5
-rw-r--r--arch/x86/include/asm/kvm_emulate.h1
-rw-r--r--arch/x86/kernel/acpi/boot.c7
-rw-r--r--arch/x86/kernel/apic/apic.c26
-rw-r--r--arch/x86/kernel/cpu/microcode/intel.c19
-rw-r--r--arch/x86/kernel/fpu/regset.c9
-rw-r--r--arch/x86/kernel/fpu/signal.c4
-rw-r--r--arch/x86/kernel/kvm.c3
-rw-r--r--arch/x86/kernel/process_64.c236
-rw-r--r--arch/x86/kvm/emulate.c1
-rw-r--r--arch/x86/kvm/mmu.c15
-rw-r--r--arch/x86/kvm/paging_tmpl.h3
-rw-r--r--arch/x86/kvm/vmx.c242
-rw-r--r--arch/x86/kvm/x86.c52
-rw-r--r--arch/x86/mm/fault.c47
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/purgatory/Makefile1
97 files changed, 1505 insertions, 874 deletions
diff --git a/arch/alpha/include/asm/types.h b/arch/alpha/include/asm/types.h
index 4cb4b6d3452c..0bc66e1d3a7e 100644
--- a/arch/alpha/include/asm/types.h
+++ b/arch/alpha/include/asm/types.h
@@ -1,6 +1,6 @@
#ifndef _ALPHA_TYPES_H
#define _ALPHA_TYPES_H
-#include <asm-generic/int-ll64.h>
+#include <uapi/asm/types.h>
#endif /* _ALPHA_TYPES_H */
diff --git a/arch/alpha/include/uapi/asm/types.h b/arch/alpha/include/uapi/asm/types.h
index 9fd3cd459777..8d1024d7be05 100644
--- a/arch/alpha/include/uapi/asm/types.h
+++ b/arch/alpha/include/uapi/asm/types.h
@@ -9,8 +9,18 @@
* need to be careful to avoid a name clashes.
*/
-#ifndef __KERNEL__
+/*
+ * This is here because we used to use l64 for alpha
+ * and we don't want to impact user mode with our change to ll64
+ * in the kernel.
+ *
+ * However, some user programs are fine with this. They can
+ * flag __SANE_USERSPACE_TYPES__ to get int-ll64.h here.
+ */
+#if !defined(__SANE_USERSPACE_TYPES__) && !defined(__KERNEL__)
#include <asm-generic/int-l64.h>
+#else
+#include <asm-generic/int-ll64.h>
#endif
#endif /* _UAPI_ALPHA_TYPES_H */
diff --git a/arch/arc/kernel/entry.S b/arch/arc/kernel/entry.S
index 1eea99beecc3..85d9ea4a0acc 100644
--- a/arch/arc/kernel/entry.S
+++ b/arch/arc/kernel/entry.S
@@ -92,6 +92,12 @@ ENTRY(EV_MachineCheck)
lr r0, [efa]
mov r1, sp
+ ; hardware auto-disables MMU, re-enable it to allow kernel vaddr
+ ; access for say stack unwinding of modules for crash dumps
+ lr r3, [ARC_REG_PID]
+ or r3, r3, MMU_ENABLE
+ sr r3, [ARC_REG_PID]
+
lsr r3, r2, 8
bmsk r3, r3, 7
brne r3, ECR_C_MCHK_DUP_TLB, 1f
diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c
index bdb295e09160..a4dc881da277 100644
--- a/arch/arc/mm/tlb.c
+++ b/arch/arc/mm/tlb.c
@@ -896,9 +896,6 @@ void do_tlb_overlap_fault(unsigned long cause, unsigned long address,
local_irq_save(flags);
- /* re-enable the MMU */
- write_aux_reg(ARC_REG_PID, MMU_ENABLE | read_aux_reg(ARC_REG_PID));
-
/* loop thru all sets of TLB */
for (set = 0; set < mmu->sets; set++) {
diff --git a/arch/arm/Kconfig-nommu b/arch/arm/Kconfig-nommu
index aed66d5df7f1..b7576349528c 100644
--- a/arch/arm/Kconfig-nommu
+++ b/arch/arm/Kconfig-nommu
@@ -34,8 +34,7 @@ config PROCESSOR_ID
used instead of the auto-probing which utilizes the register.
config REMAP_VECTORS_TO_RAM
- bool 'Install vectors to the beginning of RAM' if DRAM_BASE
- depends on DRAM_BASE
+ bool 'Install vectors to the beginning of RAM'
help
The kernel needs to change the hardware exception vectors.
In nommu mode, the hardware exception vectors are normally
diff --git a/arch/arm/boot/dts/am335x-chilisom.dtsi b/arch/arm/boot/dts/am335x-chilisom.dtsi
index f9ee5859c154..1b43ebd08b38 100644
--- a/arch/arm/boot/dts/am335x-chilisom.dtsi
+++ b/arch/arm/boot/dts/am335x-chilisom.dtsi
@@ -124,6 +124,14 @@
&rtc {
system-power-controller;
+
+ pinctrl-0 = <&ext_wakeup>;
+ pinctrl-names = "default";
+
+ ext_wakeup: ext-wakeup {
+ pins = "ext_wakeup0";
+ input-enable;
+ };
};
/* NAND Flash */
diff --git a/arch/arm/boot/dts/armada-375.dtsi b/arch/arm/boot/dts/armada-375.dtsi
index cc952cf8ec30..024f1b75b0a3 100644
--- a/arch/arm/boot/dts/armada-375.dtsi
+++ b/arch/arm/boot/dts/armada-375.dtsi
@@ -176,9 +176,9 @@
reg = <0x8000 0x1000>;
cache-unified;
cache-level = <2>;
- arm,double-linefill-incr = <1>;
+ arm,double-linefill-incr = <0>;
arm,double-linefill-wrap = <0>;
- arm,double-linefill = <1>;
+ arm,double-linefill = <0>;
prefetch-data = <1>;
};
diff --git a/arch/arm/boot/dts/armada-38x.dtsi b/arch/arm/boot/dts/armada-38x.dtsi
index 2d7668848c5a..c60cfe9fd033 100644
--- a/arch/arm/boot/dts/armada-38x.dtsi
+++ b/arch/arm/boot/dts/armada-38x.dtsi
@@ -143,9 +143,9 @@
reg = <0x8000 0x1000>;
cache-unified;
cache-level = <2>;
- arm,double-linefill-incr = <1>;
+ arm,double-linefill-incr = <0>;
arm,double-linefill-wrap = <0>;
- arm,double-linefill = <1>;
+ arm,double-linefill = <0>;
prefetch-data = <1>;
};
diff --git a/arch/arm/boot/dts/armada-39x.dtsi b/arch/arm/boot/dts/armada-39x.dtsi
index 34cba87f9200..aeecfa7e5ea3 100644
--- a/arch/arm/boot/dts/armada-39x.dtsi
+++ b/arch/arm/boot/dts/armada-39x.dtsi
@@ -111,9 +111,9 @@
reg = <0x8000 0x1000>;
cache-unified;
cache-level = <2>;
- arm,double-linefill-incr = <1>;
+ arm,double-linefill-incr = <0>;
arm,double-linefill-wrap = <0>;
- arm,double-linefill = <1>;
+ arm,double-linefill = <0>;
prefetch-data = <1>;
};
diff --git a/arch/arm/boot/dts/bcm953012k.dts b/arch/arm/boot/dts/bcm953012k.dts
index 05a985a20378..6208e85acd9d 100644
--- a/arch/arm/boot/dts/bcm953012k.dts
+++ b/arch/arm/boot/dts/bcm953012k.dts
@@ -48,7 +48,7 @@
};
memory {
- reg = <0x00000000 0x10000000>;
+ reg = <0x80000000 0x10000000>;
};
};
diff --git a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
index 8aa19ba14436..5282d69e55bd 100644
--- a/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
+++ b/arch/arm/boot/dts/exynos4412-odroid-common.dtsi
@@ -97,11 +97,11 @@
thermal-zones {
cpu_thermal: cpu-thermal {
cooling-maps {
- map0 {
+ cooling_map0: map0 {
/* Corresponds to 800MHz at freq_table */
cooling-device = <&cpu0 7 7>;
};
- map1 {
+ cooling_map1: map1 {
/* Corresponds to 200MHz at freq_table */
cooling-device = <&cpu0 13 13>;
};
diff --git a/arch/arm/boot/dts/exynos4412-odroidu3.dts b/arch/arm/boot/dts/exynos4412-odroidu3.dts
index 99634c54dca9..7504a5aa538e 100644
--- a/arch/arm/boot/dts/exynos4412-odroidu3.dts
+++ b/arch/arm/boot/dts/exynos4412-odroidu3.dts
@@ -13,6 +13,7 @@
/dts-v1/;
#include "exynos4412-odroid-common.dtsi"
+#include "exynos4412-prime.dtsi"
/ {
model = "Hardkernel ODROID-U3 board based on Exynos4412";
@@ -47,11 +48,11 @@
cooling-maps {
map0 {
trip = <&cpu_alert1>;
- cooling-device = <&cpu0 7 7>;
+ cooling-device = <&cpu0 9 9>;
};
map1 {
trip = <&cpu_alert2>;
- cooling-device = <&cpu0 13 13>;
+ cooling-device = <&cpu0 15 15>;
};
map2 {
trip = <&cpu_alert0>;
diff --git a/arch/arm/boot/dts/exynos4412-odroidx2.dts b/arch/arm/boot/dts/exynos4412-odroidx2.dts
index 4d228858f172..d6e92ebc3874 100644
--- a/arch/arm/boot/dts/exynos4412-odroidx2.dts
+++ b/arch/arm/boot/dts/exynos4412-odroidx2.dts
@@ -12,6 +12,7 @@
*/
#include "exynos4412-odroidx.dts"
+#include "exynos4412-prime.dtsi"
/ {
model = "Hardkernel ODROID-X2 board based on Exynos4412";
diff --git a/arch/arm/boot/dts/exynos4412-prime.dtsi b/arch/arm/boot/dts/exynos4412-prime.dtsi
new file mode 100644
index 000000000000..e75bc170c89c
--- /dev/null
+++ b/arch/arm/boot/dts/exynos4412-prime.dtsi
@@ -0,0 +1,41 @@
+/*
+ * Samsung's Exynos4412 Prime SoC device tree source
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * http://www.samsung.com
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+/*
+ * Exynos4412 Prime SoC revision supports higher CPU frequencies than
+ * non-Prime version. Therefore we need to update OPPs table and
+ * thermal maps accordingly.
+ */
+
+&cpu0_opp_1500 {
+ /delete-property/turbo-mode;
+};
+
+&cpu0_opp_table {
+ opp@1600000000 {
+ opp-hz = /bits/ 64 <1600000000>;
+ opp-microvolt = <1350000>;
+ clock-latency-ns = <200000>;
+ };
+ opp@1704000000 {
+ opp-hz = /bits/ 64 <1704000000>;
+ opp-microvolt = <1350000>;
+ clock-latency-ns = <200000>;
+ };
+};
+
+&cooling_map0 {
+ cooling-device = <&cpu0 9 9>;
+};
+
+&cooling_map1 {
+ cooling-device = <&cpu0 15 15>;
+};
diff --git a/arch/arm/boot/dts/exynos4412.dtsi b/arch/arm/boot/dts/exynos4412.dtsi
index 40beede46e55..3ebdf01d814c 100644
--- a/arch/arm/boot/dts/exynos4412.dtsi
+++ b/arch/arm/boot/dts/exynos4412.dtsi
@@ -130,7 +130,7 @@
opp-microvolt = <1287500>;
clock-latency-ns = <200000>;
};
- opp@1500000000 {
+ cpu0_opp_1500: opp@1500000000 {
opp-hz = /bits/ 64 <1500000000>;
opp-microvolt = <1350000>;
clock-latency-ns = <200000>;
diff --git a/arch/arm/boot/dts/mt2701.dtsi b/arch/arm/boot/dts/mt2701.dtsi
index 18596a2c58a1..77c6b931dc24 100644
--- a/arch/arm/boot/dts/mt2701.dtsi
+++ b/arch/arm/boot/dts/mt2701.dtsi
@@ -174,4 +174,40 @@
clocks = <&uart_clk>;
status = "disabled";
};
+
+ mmsys: syscon@14000000 {
+ compatible = "mediatek,mt2701-mmsys", "syscon";
+ reg = <0 0x14000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ imgsys: syscon@15000000 {
+ compatible = "mediatek,mt2701-imgsys", "syscon";
+ reg = <0 0x15000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ vdecsys: syscon@16000000 {
+ compatible = "mediatek,mt2701-vdecsys", "syscon";
+ reg = <0 0x16000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ hifsys: syscon@1a000000 {
+ compatible = "mediatek,mt2701-hifsys", "syscon";
+ reg = <0 0x1a000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ ethsys: syscon@1b000000 {
+ compatible = "mediatek,mt2701-ethsys", "syscon";
+ reg = <0 0x1b000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
+
+ bdpsys: syscon@1c000000 {
+ compatible = "mediatek,mt2701-bdpsys", "syscon";
+ reg = <0 0x1c000000 0 0x1000>;
+ #clock-cells = <1>;
+ };
};
diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi
index 351fcc2f87df..b6c6410ca384 100644
--- a/arch/arm/boot/dts/r8a7790.dtsi
+++ b/arch/arm/boot/dts/r8a7790.dtsi
@@ -1493,7 +1493,8 @@
};
msiof0: spi@e6e20000 {
- compatible = "renesas,msiof-r8a7790";
+ compatible = "renesas,msiof-r8a7790",
+ "renesas,rcar-gen2-msiof";
reg = <0 0xe6e20000 0 0x0064>;
interrupts = <GIC_SPI 156 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&mstp0_clks R8A7790_CLK_MSIOF0>;
@@ -1507,7 +1508,8 @@
};
msiof1: spi@e6e10000 {
- compatible = "renesas,msiof-r8a7790";
+ compatible = "renesas,msiof-r8a7790",
+ "renesas,rcar-gen2-msiof";
reg = <0 0xe6e10000 0 0x0064>;
interrupts = <GIC_SPI 157 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&mstp2_clks R8A7790_CLK_MSIOF1>;
@@ -1521,7 +1523,8 @@
};
msiof2: spi@e6e00000 {
- compatible = "renesas,msiof-r8a7790";
+ compatible = "renesas,msiof-r8a7790",
+ "renesas,rcar-gen2-msiof";
reg = <0 0xe6e00000 0 0x0064>;
interrupts = <GIC_SPI 158 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&mstp2_clks R8A7790_CLK_MSIOF2>;
@@ -1535,7 +1538,8 @@
};
msiof3: spi@e6c90000 {
- compatible = "renesas,msiof-r8a7790";
+ compatible = "renesas,msiof-r8a7790",
+ "renesas,rcar-gen2-msiof";
reg = <0 0xe6c90000 0 0x0064>;
interrupts = <GIC_SPI 159 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&mstp2_clks R8A7790_CLK_MSIOF3>;
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 55e0e3ea9cb6..bd12b98e2589 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -37,4 +37,3 @@ generic-y += termbits.h
generic-y += termios.h
generic-y += timex.h
generic-y += trace_clock.h
-generic-y += unaligned.h
diff --git a/arch/arm/include/asm/unaligned.h b/arch/arm/include/asm/unaligned.h
new file mode 100644
index 000000000000..ab905ffcf193
--- /dev/null
+++ b/arch/arm/include/asm/unaligned.h
@@ -0,0 +1,27 @@
+#ifndef __ASM_ARM_UNALIGNED_H
+#define __ASM_ARM_UNALIGNED_H
+
+/*
+ * We generally want to set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS on ARMv6+,
+ * but we don't want to use linux/unaligned/access_ok.h since that can lead
+ * to traps on unaligned stm/ldm or strd/ldrd.
+ */
+#include <asm/byteorder.h>
+
+#if defined(__LITTLE_ENDIAN)
+# include <linux/unaligned/le_struct.h>
+# include <linux/unaligned/be_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_le
+# define put_unaligned __put_unaligned_le
+#elif defined(__BIG_ENDIAN)
+# include <linux/unaligned/be_struct.h>
+# include <linux/unaligned/le_byteshift.h>
+# include <linux/unaligned/generic.h>
+# define get_unaligned __get_unaligned_be
+# define put_unaligned __put_unaligned_be
+#else
+# error need to define endianess
+#endif
+
+#endif /* __ASM_ARM_UNALIGNED_H */
diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c
index 0064b86a2c87..30a13647c54c 100644
--- a/arch/arm/kvm/emulate.c
+++ b/arch/arm/kvm/emulate.c
@@ -227,7 +227,7 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
u32 return_offset = (is_thumb) ? 2 : 4;
kvm_update_psr(vcpu, UND_MODE);
- *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) - return_offset;
+ *vcpu_reg(vcpu, 14) = *vcpu_pc(vcpu) + return_offset;
/* Branch to exception vector */
*vcpu_pc(vcpu) = exc_vector_base(vcpu) + vect_offset;
@@ -239,10 +239,8 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu)
*/
static void inject_abt(struct kvm_vcpu *vcpu, bool is_pabt, unsigned long addr)
{
- unsigned long cpsr = *vcpu_cpsr(vcpu);
- bool is_thumb = (cpsr & PSR_T_BIT);
u32 vect_offset;
- u32 return_offset = (is_thumb) ? 4 : 0;
+ u32 return_offset = (is_pabt) ? 4 : 8;
bool is_lpae;
kvm_update_psr(vcpu, ABT_MODE);
diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile
index 8679405b0b2b..92eab1d51785 100644
--- a/arch/arm/kvm/hyp/Makefile
+++ b/arch/arm/kvm/hyp/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-ccflags-y += -fno-stack-protector
+ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
index 0c060c5e844a..2206e0e00934 100644
--- a/arch/arm/kvm/mmu.c
+++ b/arch/arm/kvm/mmu.c
@@ -837,7 +837,7 @@ void kvm_free_stage2_pgd(struct kvm *kvm)
spin_lock(&kvm->mmu_lock);
if (kvm->arch.pgd) {
unmap_stage2_range(kvm, 0, KVM_PHYS_SIZE);
- pgd = kvm->arch.pgd;
+ pgd = READ_ONCE(kvm->arch.pgd);
kvm->arch.pgd = NULL;
}
spin_unlock(&kvm->mmu_lock);
diff --git a/arch/arm/mach-at91/pm.c b/arch/arm/mach-at91/pm.c
index 31dde8b6f2ea..8ba0e2e5ad97 100644
--- a/arch/arm/mach-at91/pm.c
+++ b/arch/arm/mach-at91/pm.c
@@ -335,7 +335,7 @@ static void at91sam9_sdram_standby(void)
at91_ramc_write(1, AT91_SDRAMC_LPR, saved_lpr1);
}
-static const struct of_device_id const ramc_ids[] __initconst = {
+static const struct of_device_id ramc_ids[] __initconst = {
{ .compatible = "atmel,at91rm9200-sdramc", .data = at91rm9200_standby },
{ .compatible = "atmel,at91sam9260-sdramc", .data = at91sam9_sdram_standby },
{ .compatible = "atmel,at91sam9g45-ddramc", .data = at91_ddr_standby },
diff --git a/arch/arm/mach-bcm/bcm_kona_smc.c b/arch/arm/mach-bcm/bcm_kona_smc.c
index cf3f8658f0e5..a55a7ecf146a 100644
--- a/arch/arm/mach-bcm/bcm_kona_smc.c
+++ b/arch/arm/mach-bcm/bcm_kona_smc.c
@@ -33,7 +33,7 @@ struct bcm_kona_smc_data {
unsigned result;
};
-static const struct of_device_id const bcm_kona_smc_ids[] __initconst = {
+static const struct of_device_id bcm_kona_smc_ids[] __initconst = {
{.compatible = "brcm,kona-smc"},
{.compatible = "bcm,kona-smc"}, /* deprecated name */
{},
diff --git a/arch/arm/mach-cns3xxx/core.c b/arch/arm/mach-cns3xxx/core.c
index 03da3813f1ab..7d5a44a06648 100644
--- a/arch/arm/mach-cns3xxx/core.c
+++ b/arch/arm/mach-cns3xxx/core.c
@@ -346,7 +346,7 @@ static struct usb_ohci_pdata cns3xxx_usb_ohci_pdata = {
.power_off = csn3xxx_usb_power_off,
};
-static const struct of_dev_auxdata const cns3xxx_auxdata[] __initconst = {
+static const struct of_dev_auxdata cns3xxx_auxdata[] __initconst = {
{ "intel,usb-ehci", CNS3XXX_USB_BASE, "ehci-platform", &cns3xxx_usb_ehci_pdata },
{ "intel,usb-ohci", CNS3XXX_USB_OHCI_BASE, "ohci-platform", &cns3xxx_usb_ohci_pdata },
{ "cavium,cns3420-ahci", CNS3XXX_SATA2_BASE, "ahci", NULL },
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 5b2f5138d938..f1ca9479491b 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -713,7 +713,7 @@ static struct omap_prcm_init_data scrm_data __initdata = {
};
#endif
-static const struct of_device_id const omap_prcm_dt_match_table[] __initconst = {
+static const struct of_device_id omap_prcm_dt_match_table[] __initconst = {
#ifdef CONFIG_SOC_AM33XX
{ .compatible = "ti,am3-prcm", .data = &am3_prm_data },
#endif
diff --git a/arch/arm/mach-omap2/vc.c b/arch/arm/mach-omap2/vc.c
index 2028167fff31..d76b1e5eb8ba 100644
--- a/arch/arm/mach-omap2/vc.c
+++ b/arch/arm/mach-omap2/vc.c
@@ -559,7 +559,7 @@ struct i2c_init_data {
u8 hsscll_12;
};
-static const struct i2c_init_data const omap4_i2c_timing_data[] __initconst = {
+static const struct i2c_init_data omap4_i2c_timing_data[] __initconst = {
{
.load = 50,
.loadbits = 0x3,
diff --git a/arch/arm/mach-spear/time.c b/arch/arm/mach-spear/time.c
index 9ccffc1d0f28..aaaa6781b9fe 100644
--- a/arch/arm/mach-spear/time.c
+++ b/arch/arm/mach-spear/time.c
@@ -204,7 +204,7 @@ static void __init spear_clockevent_init(int irq)
setup_irq(irq, &spear_timer_irq);
}
-static const struct of_device_id const timer_of_match[] __initconst = {
+static const struct of_device_id timer_of_match[] __initconst = {
{ .compatible = "st,spear-timer", },
{ },
};
diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 926b1be48043..ce47dfe25fb0 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -314,8 +314,11 @@ retry:
* signal first. We do not need to release the mmap_sem because
* it would already be released in __lock_page_or_retry in
* mm/filemap.c. */
- if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current))
+ if ((fault & VM_FAULT_RETRY) && fatal_signal_pending(current)) {
+ if (!user_mode(regs))
+ goto no_context;
return 0;
+ }
/*
* Major/minor page fault accounting is only done on the
diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c
index d062f08f5020..4b24964a520a 100644
--- a/arch/arm/xen/mm.c
+++ b/arch/arm/xen/mm.c
@@ -199,6 +199,7 @@ static struct dma_map_ops xen_swiotlb_dma_ops = {
.unmap_page = xen_swiotlb_unmap_page,
.dma_supported = xen_swiotlb_dma_supported,
.set_dma_mask = xen_swiotlb_set_dma_mask,
+ .mmap = xen_swiotlb_dma_mmap,
};
int __init xen_mm_init(void)
diff --git a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
index 49a5d8ccae27..68e6f88bdcfe 100644
--- a/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
+++ b/arch/arm64/boot/dts/marvell/armada-37xx.dtsi
@@ -170,6 +170,7 @@
interrupt-controller;
reg = <0x1d00000 0x10000>, /* GICD */
<0x1d40000 0x40000>; /* GICR */
+ interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
};
};
diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S
index 7ee6d74101cf..c18658690993 100644
--- a/arch/arm64/kernel/head.S
+++ b/arch/arm64/kernel/head.S
@@ -487,6 +487,7 @@ ENTRY(kimage_vaddr)
* booted in EL1 or EL2 respectively.
*/
ENTRY(el2_setup)
+ msr SPsel, #1 // We want to use SP_EL{1,2}
mrs x0, CurrentEL
cmp x0, #CurrentEL_EL2
b.ne 1f
diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c
index 7501b4097c7a..f6c003e9edbf 100644
--- a/arch/arm64/kernel/traps.c
+++ b/arch/arm64/kernel/traps.c
@@ -113,7 +113,7 @@ static void __dump_instr(const char *lvl, struct pt_regs *regs)
for (i = -4; i < 1; i++) {
unsigned int val, bad;
- bad = __get_user(val, &((u32 *)addr)[i]);
+ bad = get_user(val, &((u32 *)addr)[i]);
if (!bad)
p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile
index 14c4e3b14bcb..48b03547a969 100644
--- a/arch/arm64/kvm/hyp/Makefile
+++ b/arch/arm64/kvm/hyp/Makefile
@@ -2,7 +2,7 @@
# Makefile for Kernel-based Virtual Machine module, HYP part
#
-ccflags-y += -fno-stack-protector
+ccflags-y += -fno-stack-protector -DDISABLE_BRANCH_PROFILING
KVM=../../../../virt/kvm
diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c
index da6a8cfa54a0..3556715a774e 100644
--- a/arch/arm64/kvm/inject_fault.c
+++ b/arch/arm64/kvm/inject_fault.c
@@ -33,12 +33,26 @@
#define LOWER_EL_AArch64_VECTOR 0x400
#define LOWER_EL_AArch32_VECTOR 0x600
+/*
+ * Table taken from ARMv8 ARM DDI0487B-B, table G1-10.
+ */
+static const u8 return_offsets[8][2] = {
+ [0] = { 0, 0 }, /* Reset, unused */
+ [1] = { 4, 2 }, /* Undefined */
+ [2] = { 0, 0 }, /* SVC, unused */
+ [3] = { 4, 4 }, /* Prefetch abort */
+ [4] = { 8, 8 }, /* Data abort */
+ [5] = { 0, 0 }, /* HVC, unused */
+ [6] = { 4, 4 }, /* IRQ, unused */
+ [7] = { 4, 4 }, /* FIQ, unused */
+};
+
static void prepare_fault32(struct kvm_vcpu *vcpu, u32 mode, u32 vect_offset)
{
unsigned long cpsr;
unsigned long new_spsr_value = *vcpu_cpsr(vcpu);
bool is_thumb = (new_spsr_value & COMPAT_PSR_T_BIT);
- u32 return_offset = (is_thumb) ? 4 : 0;
+ u32 return_offset = return_offsets[vect_offset >> 2][is_thumb];
u32 sctlr = vcpu_cp15(vcpu, c1_SCTLR);
cpsr = mode | COMPAT_PSR_I_BIT;
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 523b9d3c8143..cf4e3f53a1e3 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -515,7 +515,7 @@ static const struct fault_info fault_info[] = {
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 0 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 1 translation fault" },
{ do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 2 translation fault" },
- { do_page_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
+ { do_translation_fault, SIGSEGV, SEGV_MAPERR, "level 3 translation fault" },
{ do_bad, SIGBUS, 0, "unknown 8" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 1 access flag fault" },
{ do_page_fault, SIGSEGV, SEGV_ACCERR, "level 2 access flag fault" },
diff --git a/arch/mips/ath79/clock.c b/arch/mips/ath79/clock.c
index cc3a1e33a600..7e2bb12b64ea 100644
--- a/arch/mips/ath79/clock.c
+++ b/arch/mips/ath79/clock.c
@@ -508,16 +508,19 @@ static void __init ath79_clocks_init_dt_ng(struct device_node *np)
ar9330_clk_init(ref_clk, pll_base);
else {
pr_err("%s: could not find any appropriate clk_init()\n", dnfn);
- goto err_clk;
+ goto err_iounmap;
}
if (of_clk_add_provider(np, of_clk_src_onecell_get, &clk_data)) {
pr_err("%s: could not register clk provider\n", dnfn);
- goto err_clk;
+ goto err_iounmap;
}
return;
+err_iounmap:
+ iounmap(pll_base);
+
err_clk:
clk_put(ref_clk);
diff --git a/arch/mips/include/asm/irq.h b/arch/mips/include/asm/irq.h
index 956db6e201d1..c5d351786416 100644
--- a/arch/mips/include/asm/irq.h
+++ b/arch/mips/include/asm/irq.h
@@ -18,9 +18,24 @@
#include <irq.h>
#define IRQ_STACK_SIZE THREAD_SIZE
+#define IRQ_STACK_START (IRQ_STACK_SIZE - 16)
extern void *irq_stack[NR_CPUS];
+/*
+ * The highest address on the IRQ stack contains a dummy frame put down in
+ * genex.S (handle_int & except_vec_vi_handler) which is structured as follows:
+ *
+ * top ------------
+ * | task sp | <- irq_stack[cpu] + IRQ_STACK_START
+ * ------------
+ * | | <- First frame of IRQ context
+ * ------------
+ *
+ * task sp holds a copy of the task stack pointer where the struct pt_regs
+ * from exception entry can be found.
+ */
+
static inline bool on_irq_stack(int cpu, unsigned long sp)
{
unsigned long low = (unsigned long)irq_stack[cpu];
diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index 4be2763f835d..bfff6ea45d51 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -103,6 +103,7 @@ void output_thread_info_defines(void)
DEFINE(_THREAD_SIZE, THREAD_SIZE);
DEFINE(_THREAD_MASK, THREAD_MASK);
DEFINE(_IRQ_STACK_SIZE, IRQ_STACK_SIZE);
+ DEFINE(_IRQ_STACK_START, IRQ_STACK_START);
BLANK();
}
diff --git a/arch/mips/kernel/cps-vec.S b/arch/mips/kernel/cps-vec.S
index 59476a607add..a00e87b0256d 100644
--- a/arch/mips/kernel/cps-vec.S
+++ b/arch/mips/kernel/cps-vec.S
@@ -361,7 +361,7 @@ LEAF(mips_cps_get_bootcfg)
END(mips_cps_get_bootcfg)
LEAF(mips_cps_boot_vpes)
- PTR_L ta2, COREBOOTCFG_VPEMASK(a0)
+ lw ta2, COREBOOTCFG_VPEMASK(a0)
PTR_L ta3, COREBOOTCFG_VPECONFIG(a0)
#if defined(CONFIG_CPU_MIPSR6)
diff --git a/arch/mips/kernel/genex.S b/arch/mips/kernel/genex.S
index 2ac6c2625c13..ae810da4d499 100644
--- a/arch/mips/kernel/genex.S
+++ b/arch/mips/kernel/genex.S
@@ -215,9 +215,11 @@ NESTED(handle_int, PT_SIZE, sp)
beq t0, t1, 2f
/* Switch to IRQ stack */
- li t1, _IRQ_STACK_SIZE
+ li t1, _IRQ_STACK_START
PTR_ADD sp, t0, t1
+ /* Save task's sp on IRQ stack so that unwinding can follow it */
+ LONG_S s1, 0(sp)
2:
jal plat_irq_dispatch
@@ -325,9 +327,11 @@ NESTED(except_vec_vi_handler, 0, sp)
beq t0, t1, 2f
/* Switch to IRQ stack */
- li t1, _IRQ_STACK_SIZE
+ li t1, _IRQ_STACK_START
PTR_ADD sp, t0, t1
+ /* Save task's sp on IRQ stack so that unwinding can follow it */
+ LONG_S s1, 0(sp)
2:
jalr v0
diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c
index fbbf5fcc695a..1b50958a1373 100644
--- a/arch/mips/kernel/process.c
+++ b/arch/mips/kernel/process.c
@@ -487,31 +487,52 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
unsigned long pc,
unsigned long *ra)
{
+ unsigned long low, high, irq_stack_high;
struct mips_frame_info info;
unsigned long size, ofs;
+ struct pt_regs *regs;
int leaf;
- extern void ret_from_irq(void);
- extern void ret_from_exception(void);
if (!stack_page)
return 0;
/*
- * If we reached the bottom of interrupt context,
- * return saved pc in pt_regs.
+ * IRQ stacks start at IRQ_STACK_START
+ * task stacks at THREAD_SIZE - 32
*/
- if (pc == (unsigned long)ret_from_irq ||
- pc == (unsigned long)ret_from_exception) {
- struct pt_regs *regs;
- if (*sp >= stack_page &&
- *sp + sizeof(*regs) <= stack_page + THREAD_SIZE - 32) {
- regs = (struct pt_regs *)*sp;
- pc = regs->cp0_epc;
- if (!user_mode(regs) && __kernel_text_address(pc)) {
- *sp = regs->regs[29];
- *ra = regs->regs[31];
- return pc;
- }
+ low = stack_page;
+ if (!preemptible() && on_irq_stack(raw_smp_processor_id(), *sp)) {
+ high = stack_page + IRQ_STACK_START;
+ irq_stack_high = high;
+ } else {
+ high = stack_page + THREAD_SIZE - 32;
+ irq_stack_high = 0;
+ }
+
+ /*
+ * If we reached the top of the interrupt stack, start unwinding
+ * the interrupted task stack.
+ */
+ if (unlikely(*sp == irq_stack_high)) {
+ unsigned long task_sp = *(unsigned long *)*sp;
+
+ /*
+ * Check that the pointer saved in the IRQ stack head points to
+ * something within the stack of the current task
+ */
+ if (!object_is_on_stack((void *)task_sp))
+ return 0;
+
+ /*
+ * Follow pointer to tasks kernel stack frame where interrupted
+ * state was saved.
+ */
+ regs = (struct pt_regs *)task_sp;
+ pc = regs->cp0_epc;
+ if (!user_mode(regs) && __kernel_text_address(pc)) {
+ *sp = regs->regs[29];
+ *ra = regs->regs[31];
+ return pc;
}
return 0;
}
@@ -532,8 +553,7 @@ unsigned long notrace unwind_stack_by_address(unsigned long stack_page,
if (leaf < 0)
return 0;
- if (*sp < stack_page ||
- *sp + info.frame_size > stack_page + THREAD_SIZE - 32)
+ if (*sp < low || *sp + info.frame_size > high)
return 0;
if (leaf)
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index d5de67591735..f0a0e6d62be3 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -182,7 +182,7 @@ SECTIONS
* Force .bss to 64K alignment so that .bss..swapper_pg_dir
* gets that alignment. .sbss should be empty, so there will be
* no holes after __init_end. */
- BSS_SECTION(0, 0x10000, 0)
+ BSS_SECTION(0, 0x10000, 8)
_end = . ;
diff --git a/arch/mips/lantiq/xway/sysctrl.c b/arch/mips/lantiq/xway/sysctrl.c
index 90565477dfbd..95bec460b651 100644
--- a/arch/mips/lantiq/xway/sysctrl.c
+++ b/arch/mips/lantiq/xway/sysctrl.c
@@ -469,8 +469,8 @@ void __init ltq_soc_init(void)
panic("Failed to load xbar nodes from devicetree");
if (of_address_to_resource(np_xbar, 0, &res_xbar))
panic("Failed to get xbar resources");
- if (request_mem_region(res_xbar.start, resource_size(&res_xbar),
- res_xbar.name) < 0)
+ if (!request_mem_region(res_xbar.start, resource_size(&res_xbar),
+ res_xbar.name))
panic("Failed to get xbar resources");
ltq_xbar_membase = ioremap_nocache(res_xbar.start,
diff --git a/arch/mips/math-emu/cp1emu.c b/arch/mips/math-emu/cp1emu.c
index e9385bcd9723..9ade60ca08e0 100644
--- a/arch/mips/math-emu/cp1emu.c
+++ b/arch/mips/math-emu/cp1emu.c
@@ -2386,7 +2386,6 @@ dcopuop:
break;
default:
/* Reserved R6 ops */
- pr_err("Reserved MIPS R6 CMP.condn.S operation\n");
return SIGILL;
}
}
@@ -2460,7 +2459,6 @@ dcopuop:
break;
default:
/* Reserved R6 ops */
- pr_err("Reserved MIPS R6 CMP.condn.D operation\n");
return SIGILL;
}
}
diff --git a/arch/mips/math-emu/dp_fmax.c b/arch/mips/math-emu/dp_fmax.c
index fd71b8daaaf2..5bec64f2884e 100644
--- a/arch/mips/math-emu/dp_fmax.c
+++ b/arch/mips/math-emu/dp_fmax.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
return ys ? x : y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmax(union ieee754dp x, union ieee754dp y)
else if (xs < ys)
return x;
- /* Compare exponent */
- if (xe > ye)
- return x;
- else if (xe < ye)
- return y;
+ /* Signs of inputs are equal, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return y;
+ return x;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return y;
- return x;
+ return x;
+ return y;
}
union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754dp_inf(xs & ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
return x;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
return y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmaxa(union ieee754dp x, union ieee754dp y)
return y;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
return y;
- return x;
+ else if (xm > ym)
+ return x;
+ else if (xs == 0)
+ return x;
+ return y;
}
diff --git a/arch/mips/math-emu/dp_fmin.c b/arch/mips/math-emu/dp_fmin.c
index c1072b0dfb95..a287b23818d8 100644
--- a/arch/mips/math-emu/dp_fmin.c
+++ b/arch/mips/math-emu/dp_fmin.c
@@ -47,14 +47,26 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
return ys ? y : x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -106,16 +116,32 @@ union ieee754dp ieee754dp_fmin(union ieee754dp x, union ieee754dp y)
else if (xs < ys)
return y;
- /* Compare exponent */
- if (xe > ye)
- return y;
- else if (xe < ye)
- return x;
+ /* Signs of inputs are the same, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return x;
+ return y;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return x;
- return y;
+ return y;
+ return x;
}
union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
@@ -147,14 +173,26 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754dp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754dp_inf(xs | ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- return x;
+ return y;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- return y;
+ return x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754dp_zero(1);
+ return ieee754dp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
@@ -207,7 +245,11 @@ union ieee754dp ieee754dp_fmina(union ieee754dp x, union ieee754dp y)
return x;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
+ return x;
+ else if (xm > ym)
+ return y;
+ else if (xs == 1)
return x;
return y;
}
diff --git a/arch/mips/math-emu/dp_maddf.c b/arch/mips/math-emu/dp_maddf.c
index 4a2d03c72959..e0d9be5fbf4c 100644
--- a/arch/mips/math-emu/dp_maddf.c
+++ b/arch/mips/math-emu/dp_maddf.c
@@ -14,22 +14,45 @@
#include "ieee754dp.h"
-enum maddf_flags {
- maddf_negate_product = 1 << 0,
-};
+
+/* 128 bits shift right logical with rounding. */
+void srl128(u64 *hptr, u64 *lptr, int count)
+{
+ u64 low;
+
+ if (count >= 128) {
+ *lptr = *hptr != 0 || *lptr != 0;
+ *hptr = 0;
+ } else if (count >= 64) {
+ if (count == 64) {
+ *lptr = *hptr | (*lptr != 0);
+ } else {
+ low = *lptr;
+ *lptr = *hptr >> (count - 64);
+ *lptr |= (*hptr << (128 - count)) != 0 || low != 0;
+ }
+ *hptr = 0;
+ } else {
+ low = *lptr;
+ *lptr = low >> count | *hptr << (64 - count);
+ *lptr |= (low << (64 - count)) != 0;
+ *hptr = *hptr >> count;
+ }
+}
static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp y, enum maddf_flags flags)
{
int re;
int rs;
- u64 rm;
unsigned lxm;
unsigned hxm;
unsigned lym;
unsigned hym;
u64 lrm;
u64 hrm;
+ u64 lzm;
+ u64 hzm;
u64 t;
u64 at;
int s;
@@ -48,52 +71,34 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- DPDNORMZ;
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
- return ieee754dp_nanxcpt(y);
-
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
+ if (xc == IEEE754_CLASS_SNAN)
return ieee754dp_nanxcpt(x);
-
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
+ if (yc == IEEE754_CLASS_SNAN)
+ return ieee754dp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
return y;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
+ if (zc == IEEE754_CLASS_DNORM)
+ DPDNORMZ;
+ /* ZERO z cases are handled separately below */
+ switch (CLPAIR(xc, yc)) {
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754dp_indef();
@@ -102,9 +107,27 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754dp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754dp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754dp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754dp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -113,32 +136,42 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
DPDNORMX;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
DPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754dp_inf(zs);
/* fall through to real computations */
}
@@ -157,7 +190,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
re = xe + ye;
rs = xs ^ ys;
- if (flags & maddf_negate_product)
+ if (flags & MADDF_NEGATE_PRODUCT)
rs ^= 1;
/* shunt to top of word */
@@ -165,7 +198,7 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
ym <<= 64 - (DP_FBITS + 1);
/*
- * Multiply 64 bits xm, ym to give high 64 bits rm with stickness.
+ * Multiply 64 bits xm and ym to give 128 bits result in hrm:lrm.
*/
/* 32 * 32 => 64 */
@@ -195,78 +228,110 @@ static union ieee754dp _dp_maddf(union ieee754dp z, union ieee754dp x,
hrm = hrm + (t >> 32);
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((s64) rm < 0) {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3))) |
- ((rm << (DP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 126 if necessary */
+ if ((int64_t)hrm < 0) {
+ lrm = (hrm << 63) | (lrm >> 1);
+ hrm = hrm >> 1;
re++;
- } else {
- rm = (rm >> (64 - (DP_FBITS + 1 + 3 + 1))) |
- ((rm << (DP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (DP_HIDDEN_BIT << 3));
- /* And now the addition */
- assert(zm & DP_HIDDEN_BIT);
+ assert(hrm & (1 << 62));
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hrm, &lrm, (126 - 55));
+ return ieee754dp_format(rs, re, lrm);
+ }
+
+ /* Move explicit bit from bit 52 to bit 126 */
+ lzm = 0;
+ hzm = zm << 10;
+ assert(hzm & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift y fraction right to align.
*/
s = ze - re;
- rm = XDPSRS(rm, s);
+ srl128(&hrm, &lrm, s);
re += s;
} else if (re > ze) {
/*
* Have to shift x fraction right to align.
*/
s = re - ze;
- zm = XDPSRS(zm, s);
+ srl128(&hzm, &lzm, s);
ze += s;
}
assert(ze == re);
assert(ze <= DP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in xm, xs and xe.
+ * Generate 128 bit result by adding two 127 bit numbers
+ * leaving result in hzm:lzm, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (DP_FBITS + 1 + 3)) { /* carry out */
- zm = XDPSRS1(zm);
+ hzm = hzm + hrm + (lzm > (lzm + lrm));
+ lzm = lzm + lrm;
+ if ((int64_t)hzm < 0) { /* carry out */
+ srl128(&hzm, &lzm, 1);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (hzm > hrm || (hzm == hrm && lzm >= lrm)) {
+ hzm = hzm - hrm - (lzm < lrm);
+ lzm = lzm - lrm;
} else {
- zm = rm - zm;
+ hzm = hrm - hzm - (lrm < lzm);
+ lzm = lrm - lzm;
zs = rs;
}
- if (zm == 0)
+ if (lzm == 0 && hzm == 0)
return ieee754dp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize to rounding precision.
+ * Put explicit bit at bit 126 if necessary.
*/
- while ((zm >> (DP_FBITS + 3)) == 0) {
- zm <<= 1;
- ze--;
+ if (hzm == 0) {
+ /* left shift by 63 or 64 bits */
+ if ((int64_t)lzm < 0) {
+ /* MSB of lzm is the explicit bit */
+ hzm = lzm >> 1;
+ lzm = lzm << 63;
+ ze -= 63;
+ } else {
+ hzm = lzm;
+ lzm = 0;
+ ze -= 64;
+ }
+ }
+
+ t = 0;
+ while ((hzm >> (62 - t)) == 0)
+ t++;
+
+ assert(t <= 62);
+ if (t) {
+ hzm = hzm << t | lzm >> (64 - t);
+ lzm = lzm << t;
+ ze -= t;
}
}
- return ieee754dp_format(zs, ze, zm);
+ /*
+ * Move explicit bit from bit 126 to bit 55 since the
+ * ieee754dp_format code expects the mantissa to be
+ * 56 bits wide (53 + 3 rounding bits).
+ */
+ srl128(&hzm, &lzm, (126 - 55));
+
+ return ieee754dp_format(zs, ze, lzm);
}
union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
@@ -278,5 +343,5 @@ union ieee754dp ieee754dp_maddf(union ieee754dp z, union ieee754dp x,
union ieee754dp ieee754dp_msubf(union ieee754dp z, union ieee754dp x,
union ieee754dp y)
{
- return _dp_maddf(z, x, y, maddf_negate_product);
+ return _dp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
diff --git a/arch/mips/math-emu/ieee754int.h b/arch/mips/math-emu/ieee754int.h
index 8bc2f6963324..dd2071f430e0 100644
--- a/arch/mips/math-emu/ieee754int.h
+++ b/arch/mips/math-emu/ieee754int.h
@@ -26,6 +26,10 @@
#define CLPAIR(x, y) ((x)*6+(y))
+enum maddf_flags {
+ MADDF_NEGATE_PRODUCT = 1 << 0,
+};
+
static inline void ieee754_clearcx(void)
{
ieee754_csr.cx = 0;
diff --git a/arch/mips/math-emu/ieee754sp.h b/arch/mips/math-emu/ieee754sp.h
index 8476067075fe..0f63e4202cff 100644
--- a/arch/mips/math-emu/ieee754sp.h
+++ b/arch/mips/math-emu/ieee754sp.h
@@ -45,6 +45,10 @@ static inline int ieee754sp_finite(union ieee754sp x)
return SPBEXP(x) != SP_EMAX + 1 + SP_EBIAS;
}
+/* 64 bit right shift with rounding */
+#define XSPSRS64(v, rs) \
+ (((rs) >= 64) ? ((v) != 0) : ((v) >> (rs)) | ((v) << (64-(rs)) != 0))
+
/* 3bit extended single precision sticky right shift */
#define XSPSRS(v, rs) \
((rs > (SP_FBITS+3))?1:((v) >> (rs)) | ((v) << (32-(rs)) != 0))
diff --git a/arch/mips/math-emu/sp_fmax.c b/arch/mips/math-emu/sp_fmax.c
index 4d000844e48e..74a5a00d2f22 100644
--- a/arch/mips/math-emu/sp_fmax.c
+++ b/arch/mips/math-emu/sp_fmax.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
return ys ? x : y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmax(union ieee754sp x, union ieee754sp y)
else if (xs < ys)
return x;
- /* Compare exponent */
- if (xe > ye)
- return x;
- else if (xe < ye)
- return y;
+ /* Signs of inputs are equal, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return y;
+ return x;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return y;
- return x;
+ return x;
+ return y;
}
union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,6 +202,9 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754sp_inf(xs & ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
@@ -171,7 +212,6 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
return x;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
@@ -180,9 +220,7 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
return y;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs & ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmaxa(union ieee754sp x, union ieee754sp y)
return y;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
return y;
- return x;
+ else if (xm > ym)
+ return x;
+ else if (xs == 0)
+ return x;
+ return y;
}
diff --git a/arch/mips/math-emu/sp_fmin.c b/arch/mips/math-emu/sp_fmin.c
index 4eb1bb9e9dec..c51385f46b09 100644
--- a/arch/mips/math-emu/sp_fmin.c
+++ b/arch/mips/math-emu/sp_fmin.c
@@ -47,14 +47,26 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -80,9 +92,7 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
return ys ? y : x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -106,16 +116,32 @@ union ieee754sp ieee754sp_fmin(union ieee754sp x, union ieee754sp y)
else if (xs < ys)
return y;
- /* Compare exponent */
- if (xe > ye)
- return y;
- else if (xe < ye)
- return x;
+ /* Signs of inputs are the same, let's compare exponents */
+ if (xs == 0) {
+ /* Inputs are both positive */
+ if (xe > ye)
+ return y;
+ else if (xe < ye)
+ return x;
+ } else {
+ /* Inputs are both negative */
+ if (xe > ye)
+ return x;
+ else if (xe < ye)
+ return y;
+ }
- /* Compare mantissa */
+ /* Signs and exponents of inputs are equal, let's compare mantissas */
+ if (xs == 0) {
+ /* Inputs are both positive, with equal signs and exponents */
+ if (xm <= ym)
+ return x;
+ return y;
+ }
+ /* Inputs are both negative, with equal signs and exponents */
if (xm <= ym)
- return x;
- return y;
+ return y;
+ return x;
}
union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
@@ -147,14 +173,26 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
return ieee754sp_nanxcpt(x);
- /* numbers are preferred to NaNs */
+ /*
+ * Quiet NaN handling
+ */
+
+ /*
+ * The case of both inputs quiet NaNs
+ */
+ case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
+ return x;
+
+ /*
+ * The cases of exactly one input quiet NaN (numbers
+ * are here preferred as returned values to NaNs)
+ */
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
return x;
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
@@ -164,25 +202,25 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
/*
* Infinity and zero handling
*/
+ case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
+ return ieee754sp_inf(xs | ys);
+
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
- return x;
+ return y;
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_DNORM):
- return y;
+ return x;
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
- if (xs == ys)
- return x;
- return ieee754sp_zero(1);
+ return ieee754sp_zero(xs | ys);
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
@@ -207,7 +245,11 @@ union ieee754sp ieee754sp_fmina(union ieee754sp x, union ieee754sp y)
return x;
/* Compare mantissa */
- if (xm <= ym)
+ if (xm < ym)
+ return x;
+ else if (xm > ym)
+ return y;
+ else if (xs == 1)
return x;
return y;
}
diff --git a/arch/mips/math-emu/sp_maddf.c b/arch/mips/math-emu/sp_maddf.c
index a8cd8b4f235e..7195fe785d81 100644
--- a/arch/mips/math-emu/sp_maddf.c
+++ b/arch/mips/math-emu/sp_maddf.c
@@ -14,9 +14,6 @@
#include "ieee754sp.h"
-enum maddf_flags {
- maddf_negate_product = 1 << 0,
-};
static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp y, enum maddf_flags flags)
@@ -24,14 +21,8 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
int re;
int rs;
unsigned rm;
- unsigned short lxm;
- unsigned short hxm;
- unsigned short lym;
- unsigned short hym;
- unsigned lrm;
- unsigned hrm;
- unsigned t;
- unsigned at;
+ uint64_t rm64;
+ uint64_t zm64;
int s;
COMPXSP;
@@ -48,51 +39,35 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
ieee754_clearcx();
- switch (zc) {
- case IEEE754_CLASS_SNAN:
- ieee754_setcx(IEEE754_INVALID_OPERATION);
+ /*
+ * Handle the cases when at least one of x, y or z is a NaN.
+ * Order of precedence is sNaN, qNaN and z, x, y.
+ */
+ if (zc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(z);
- case IEEE754_CLASS_DNORM:
- SPDNORMZ;
- /* QNAN is handled separately below */
- }
-
- switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_SNAN):
+ if (xc == IEEE754_CLASS_SNAN)
+ return ieee754sp_nanxcpt(x);
+ if (yc == IEEE754_CLASS_SNAN)
return ieee754sp_nanxcpt(y);
+ if (zc == IEEE754_CLASS_QNAN)
+ return z;
+ if (xc == IEEE754_CLASS_QNAN)
+ return x;
+ if (yc == IEEE754_CLASS_QNAN)
+ return y;
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_SNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_SNAN, IEEE754_CLASS_INF):
- return ieee754sp_nanxcpt(x);
+ if (zc == IEEE754_CLASS_DNORM)
+ SPDNORMZ;
+ /* ZERO z cases are handled separately below */
- case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_QNAN):
- return y;
+ switch (CLPAIR(xc, yc)) {
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_QNAN):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_ZERO):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_NORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_DNORM):
- case CLPAIR(IEEE754_CLASS_QNAN, IEEE754_CLASS_INF):
- return x;
/*
* Infinity handling
*/
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
ieee754_setcx(IEEE754_INVALID_OPERATION);
return ieee754sp_indef();
@@ -101,9 +76,27 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_NORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_DNORM):
case CLPAIR(IEEE754_CLASS_INF, IEEE754_CLASS_INF):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- return ieee754sp_inf(xs ^ ys);
+ if ((zc == IEEE754_CLASS_INF) &&
+ ((!(flags & MADDF_NEGATE_PRODUCT) && (zs != (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT) && (zs == (xs ^ ys))))) {
+ /*
+ * Cases of addition of infinities with opposite signs
+ * or subtraction of infinities with same signs.
+ */
+ ieee754_setcx(IEEE754_INVALID_OPERATION);
+ return ieee754sp_indef();
+ }
+ /*
+ * z is here either not an infinity, or an infinity having the
+ * same sign as product (x*y) (in case of MADDF.D instruction)
+ * or product -(x*y) (in MSUBF.D case). The result must be an
+ * infinity, and its sign is determined only by the value of
+ * (flags & MADDF_NEGATE_PRODUCT) and the signs of x and y.
+ */
+ if (flags & MADDF_NEGATE_PRODUCT)
+ return ieee754sp_inf(1 ^ (xs ^ ys));
+ else
+ return ieee754sp_inf(xs ^ ys);
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_ZERO):
case CLPAIR(IEEE754_CLASS_ZERO, IEEE754_CLASS_NORM):
@@ -112,32 +105,42 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_ZERO):
if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
- /* Multiplication is 0 so just return z */
+ if (zc == IEEE754_CLASS_ZERO) {
+ /* Handle cases +0 + (-0) and similar ones. */
+ if ((!(flags & MADDF_NEGATE_PRODUCT)
+ && (zs == (xs ^ ys))) ||
+ ((flags & MADDF_NEGATE_PRODUCT)
+ && (zs != (xs ^ ys))))
+ /*
+ * Cases of addition of zeros of equal signs
+ * or subtraction of zeroes of opposite signs.
+ * The sign of the resulting zero is in any
+ * such case determined only by the sign of z.
+ */
+ return z;
+
+ return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
+ }
+ /* x*y is here 0, and z is not 0, so just return z */
return z;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_DNORM):
SPDNORMX;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_DNORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMY;
break;
case CLPAIR(IEEE754_CLASS_DNORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
SPDNORMX;
break;
case CLPAIR(IEEE754_CLASS_NORM, IEEE754_CLASS_NORM):
- if (zc == IEEE754_CLASS_QNAN)
- return z;
- else if (zc == IEEE754_CLASS_INF)
+ if (zc == IEEE754_CLASS_INF)
return ieee754sp_inf(zs);
/* fall through to real computations */
}
@@ -158,108 +161,93 @@ static union ieee754sp _sp_maddf(union ieee754sp z, union ieee754sp x,
re = xe + ye;
rs = xs ^ ys;
- if (flags & maddf_negate_product)
+ if (flags & MADDF_NEGATE_PRODUCT)
rs ^= 1;
- /* shunt to top of word */
- xm <<= 32 - (SP_FBITS + 1);
- ym <<= 32 - (SP_FBITS + 1);
-
- /*
- * Multiply 32 bits xm, ym to give high 32 bits rm with stickness.
- */
- lxm = xm & 0xffff;
- hxm = xm >> 16;
- lym = ym & 0xffff;
- hym = ym >> 16;
-
- lrm = lxm * lym; /* 16 * 16 => 32 */
- hrm = hxm * hym; /* 16 * 16 => 32 */
+ /* Multiple 24 bit xm and ym to give 48 bit results */
+ rm64 = (uint64_t)xm * ym;
- t = lxm * hym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
+ /* Shunt to top of word */
+ rm64 = rm64 << 16;
- t = hxm * lym; /* 16 * 16 => 32 */
- at = lrm + (t << 16);
- hrm += at < lrm;
- lrm = at;
- hrm = hrm + (t >> 16);
-
- rm = hrm | (lrm != 0);
-
- /*
- * Sticky shift down to normal rounding precision.
- */
- if ((int) rm < 0) {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3))) |
- ((rm << (SP_FBITS + 1 + 3)) != 0);
+ /* Put explicit bit at bit 62 if necessary */
+ if ((int64_t) rm64 < 0) {
+ rm64 = rm64 >> 1;
re++;
- } else {
- rm = (rm >> (32 - (SP_FBITS + 1 + 3 + 1))) |
- ((rm << (SP_FBITS + 1 + 3 + 1)) != 0);
}
- assert(rm & (SP_HIDDEN_BIT << 3));
- /* And now the addition */
+ assert(rm64 & (1 << 62));
- assert(zm & SP_HIDDEN_BIT);
+ if (zc == IEEE754_CLASS_ZERO) {
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ rm = XSPSRS64(rm64, (62 - 26));
+ return ieee754sp_format(rs, re, rm);
+ }
- /*
- * Provide guard,round and stick bit space.
- */
- zm <<= 3;
+ /* Move explicit bit from bit 23 to bit 62 */
+ zm64 = (uint64_t)zm << (62 - 23);
+ assert(zm64 & (1 << 62));
+ /* Make the exponents the same */
if (ze > re) {
/*
* Have to shift r fraction right to align.
*/
s = ze - re;
- rm = XSPSRS(rm, s);
+ rm64 = XSPSRS64(rm64, s);
re += s;
} else if (re > ze) {
/*
* Have to shift z fraction right to align.
*/
s = re - ze;
- zm = XSPSRS(zm, s);
+ zm64 = XSPSRS64(zm64, s);
ze += s;
}
assert(ze == re);
assert(ze <= SP_EMAX);
+ /* Do the addition */
if (zs == rs) {
/*
- * Generate 28 bit result of adding two 27 bit numbers
- * leaving result in zm, zs and ze.
+ * Generate 64 bit result by adding two 63 bit numbers
+ * leaving result in zm64, zs and ze.
*/
- zm = zm + rm;
-
- if (zm >> (SP_FBITS + 1 + 3)) { /* carry out */
- zm = XSPSRS1(zm);
+ zm64 = zm64 + rm64;
+ if ((int64_t)zm64 < 0) { /* carry out */
+ zm64 = XSPSRS1(zm64);
ze++;
}
} else {
- if (zm >= rm) {
- zm = zm - rm;
+ if (zm64 >= rm64) {
+ zm64 = zm64 - rm64;
} else {
- zm = rm - zm;
+ zm64 = rm64 - zm64;
zs = rs;
}
- if (zm == 0)
+ if (zm64 == 0)
return ieee754sp_zero(ieee754_csr.rm == FPU_CSR_RD);
/*
- * Normalize in extended single precision
+ * Put explicit bit at bit 62 if necessary.
*/
- while ((zm >> (SP_MBITS + 3)) == 0) {
- zm <<= 1;
+ while ((zm64 >> 62) == 0) {
+ zm64 <<= 1;
ze--;
}
-
}
+
+ /*
+ * Move explicit bit from bit 62 to bit 26 since the
+ * ieee754sp_format code expects the mantissa to be
+ * 27 bits wide (24 + 3 rounding bits).
+ */
+ zm = XSPSRS64(zm64, (62 - 26));
+
return ieee754sp_format(zs, ze, zm);
}
@@ -272,5 +260,5 @@ union ieee754sp ieee754sp_maddf(union ieee754sp z, union ieee754sp x,
union ieee754sp ieee754sp_msubf(union ieee754sp z, union ieee754sp x,
union ieee754sp y)
{
- return _sp_maddf(z, x, y, maddf_negate_product);
+ return _sp_maddf(z, x, y, MADDF_NEGATE_PRODUCT);
}
diff --git a/arch/mips/ralink/mt7620.c b/arch/mips/ralink/mt7620.c
index 3c7c9bf57bf3..6f892c1f3ad7 100644
--- a/arch/mips/ralink/mt7620.c
+++ b/arch/mips/ralink/mt7620.c
@@ -176,7 +176,7 @@ static struct rt2880_pmx_func spi_cs1_grp_mt7628[] = {
static struct rt2880_pmx_func spis_grp_mt7628[] = {
FUNC("pwm_uart2", 3, 14, 4),
- FUNC("util", 2, 14, 4),
+ FUNC("utif", 2, 14, 4),
FUNC("gpio", 1, 14, 4),
FUNC("spis", 0, 14, 4),
};
@@ -190,28 +190,28 @@ static struct rt2880_pmx_func gpio_grp_mt7628[] = {
static struct rt2880_pmx_func p4led_kn_grp_mt7628[] = {
FUNC("jtag", 3, 30, 1),
- FUNC("util", 2, 30, 1),
+ FUNC("utif", 2, 30, 1),
FUNC("gpio", 1, 30, 1),
FUNC("p4led_kn", 0, 30, 1),
};
static struct rt2880_pmx_func p3led_kn_grp_mt7628[] = {
FUNC("jtag", 3, 31, 1),
- FUNC("util", 2, 31, 1),
+ FUNC("utif", 2, 31, 1),
FUNC("gpio", 1, 31, 1),
FUNC("p3led_kn", 0, 31, 1),
};
static struct rt2880_pmx_func p2led_kn_grp_mt7628[] = {
FUNC("jtag", 3, 32, 1),
- FUNC("util", 2, 32, 1),
+ FUNC("utif", 2, 32, 1),
FUNC("gpio", 1, 32, 1),
FUNC("p2led_kn", 0, 32, 1),
};
static struct rt2880_pmx_func p1led_kn_grp_mt7628[] = {
FUNC("jtag", 3, 33, 1),
- FUNC("util", 2, 33, 1),
+ FUNC("utif", 2, 33, 1),
FUNC("gpio", 1, 33, 1),
FUNC("p1led_kn", 0, 33, 1),
};
@@ -232,28 +232,28 @@ static struct rt2880_pmx_func wled_kn_grp_mt7628[] = {
static struct rt2880_pmx_func p4led_an_grp_mt7628[] = {
FUNC("jtag", 3, 39, 1),
- FUNC("util", 2, 39, 1),
+ FUNC("utif", 2, 39, 1),
FUNC("gpio", 1, 39, 1),
FUNC("p4led_an", 0, 39, 1),
};
static struct rt2880_pmx_func p3led_an_grp_mt7628[] = {
FUNC("jtag", 3, 40, 1),
- FUNC("util", 2, 40, 1),
+ FUNC("utif", 2, 40, 1),
FUNC("gpio", 1, 40, 1),
FUNC("p3led_an", 0, 40, 1),
};
static struct rt2880_pmx_func p2led_an_grp_mt7628[] = {
FUNC("jtag", 3, 41, 1),
- FUNC("util", 2, 41, 1),
+ FUNC("utif", 2, 41, 1),
FUNC("gpio", 1, 41, 1),
FUNC("p2led_an", 0, 41, 1),
};
static struct rt2880_pmx_func p1led_an_grp_mt7628[] = {
FUNC("jtag", 3, 42, 1),
- FUNC("util", 2, 42, 1),
+ FUNC("utif", 2, 42, 1),
FUNC("gpio", 1, 42, 1),
FUNC("p1led_an", 0, 42, 1),
};
diff --git a/arch/mips/ralink/rt3883.c b/arch/mips/ralink/rt3883.c
index 9e4631acfcb5..3e68e35daf21 100644
--- a/arch/mips/ralink/rt3883.c
+++ b/arch/mips/ralink/rt3883.c
@@ -145,5 +145,5 @@ void prom_soc_init(struct ralink_soc_info *soc_info)
rt2880_pinmux_data = rt3883_pinmux_data;
- ralink_soc == RT3883_SOC;
+ ralink_soc = RT3883_SOC;
}
diff --git a/arch/parisc/kernel/perf.c b/arch/parisc/kernel/perf.c
index 518f4f5f1f43..d63d42533133 100644
--- a/arch/parisc/kernel/perf.c
+++ b/arch/parisc/kernel/perf.c
@@ -39,7 +39,7 @@
* the PDC INTRIGUE calls. This is done to eliminate bugs introduced
* in various PDC revisions. The code is much more maintainable
* and reliable this way vs having to debug on every version of PDC
- * on every box.
+ * on every box.
*/
#include <linux/capability.h>
@@ -195,8 +195,8 @@ static int perf_config(uint32_t *image_ptr);
static int perf_release(struct inode *inode, struct file *file);
static int perf_open(struct inode *inode, struct file *file);
static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t *ppos);
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
- loff_t *ppos);
+static ssize_t perf_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos);
static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
static void perf_start_counters(void);
static int perf_stop_counters(uint32_t *raddr);
@@ -222,7 +222,7 @@ extern void perf_intrigue_disable_perf_counters (void);
/*
* configure:
*
- * Configure the cpu with a given data image. First turn off the counters,
+ * Configure the cpu with a given data image. First turn off the counters,
* then download the image, then turn the counters back on.
*/
static int perf_config(uint32_t *image_ptr)
@@ -234,7 +234,7 @@ static int perf_config(uint32_t *image_ptr)
error = perf_stop_counters(raddr);
if (error != 0) {
printk("perf_config: perf_stop_counters = %ld\n", error);
- return -EINVAL;
+ return -EINVAL;
}
printk("Preparing to write image\n");
@@ -242,7 +242,7 @@ printk("Preparing to write image\n");
error = perf_write_image((uint64_t *)image_ptr);
if (error != 0) {
printk("perf_config: DOWNLOAD = %ld\n", error);
- return -EINVAL;
+ return -EINVAL;
}
printk("Preparing to start counters\n");
@@ -254,7 +254,7 @@ printk("Preparing to start counters\n");
}
/*
- * Open the device and initialize all of its memory. The device is only
+ * Open the device and initialize all of its memory. The device is only
* opened once, but can be "queried" by multiple processes that know its
* file descriptor.
*/
@@ -298,8 +298,8 @@ static ssize_t perf_read(struct file *file, char __user *buf, size_t cnt, loff_t
* called on the processor that the download should happen
* on.
*/
-static ssize_t perf_write(struct file *file, const char __user *buf, size_t count,
- loff_t *ppos)
+static ssize_t perf_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppos)
{
int err;
size_t image_size;
@@ -307,11 +307,11 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
uint32_t interface_type;
uint32_t test;
- if (perf_processor_interface == ONYX_INTF)
+ if (perf_processor_interface == ONYX_INTF)
image_size = PCXU_IMAGE_SIZE;
- else if (perf_processor_interface == CUDA_INTF)
+ else if (perf_processor_interface == CUDA_INTF)
image_size = PCXW_IMAGE_SIZE;
- else
+ else
return -EFAULT;
if (!capable(CAP_SYS_ADMIN))
@@ -331,22 +331,22 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
/* First check the machine type is correct for
the requested image */
- if (((perf_processor_interface == CUDA_INTF) &&
- (interface_type != CUDA_INTF)) ||
- ((perf_processor_interface == ONYX_INTF) &&
- (interface_type != ONYX_INTF)))
+ if (((perf_processor_interface == CUDA_INTF) &&
+ (interface_type != CUDA_INTF)) ||
+ ((perf_processor_interface == ONYX_INTF) &&
+ (interface_type != ONYX_INTF)))
return -EINVAL;
/* Next check to make sure the requested image
is valid */
- if (((interface_type == CUDA_INTF) &&
+ if (((interface_type == CUDA_INTF) &&
(test >= MAX_CUDA_IMAGES)) ||
- ((interface_type == ONYX_INTF) &&
- (test >= MAX_ONYX_IMAGES)))
+ ((interface_type == ONYX_INTF) &&
+ (test >= MAX_ONYX_IMAGES)))
return -EINVAL;
/* Copy the image into the processor */
- if (interface_type == CUDA_INTF)
+ if (interface_type == CUDA_INTF)
return perf_config(cuda_images[test]);
else
return perf_config(onyx_images[test]);
@@ -360,7 +360,7 @@ static ssize_t perf_write(struct file *file, const char __user *buf, size_t coun
static void perf_patch_images(void)
{
#if 0 /* FIXME!! */
-/*
+/*
* NOTE: this routine is VERY specific to the current TLB image.
* If the image is changed, this routine might also need to be changed.
*/
@@ -368,9 +368,9 @@ static void perf_patch_images(void)
extern void $i_dtlb_miss_2_0();
extern void PA2_0_iva();
- /*
+ /*
* We can only use the lower 32-bits, the upper 32-bits should be 0
- * anyway given this is in the kernel
+ * anyway given this is in the kernel
*/
uint32_t itlb_addr = (uint32_t)&($i_itlb_miss_2_0);
uint32_t dtlb_addr = (uint32_t)&($i_dtlb_miss_2_0);
@@ -378,21 +378,21 @@ static void perf_patch_images(void)
if (perf_processor_interface == ONYX_INTF) {
/* clear last 2 bytes */
- onyx_images[TLBMISS][15] &= 0xffffff00;
+ onyx_images[TLBMISS][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[TLBMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[TLBMISS][16] = (dtlb_addr << 8)&0xffffff00;
onyx_images[TLBMISS][17] = itlb_addr;
/* clear last 2 bytes */
- onyx_images[TLBHANDMISS][15] &= 0xffffff00;
+ onyx_images[TLBHANDMISS][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[TLBHANDMISS][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[TLBHANDMISS][16] = (dtlb_addr << 8)&0xffffff00;
onyx_images[TLBHANDMISS][17] = itlb_addr;
/* clear last 2 bytes */
- onyx_images[BIG_CPI][15] &= 0xffffff00;
+ onyx_images[BIG_CPI][15] &= 0xffffff00;
/* set 2 bytes */
onyx_images[BIG_CPI][15] |= (0x000000ff&((dtlb_addr) >> 24));
onyx_images[BIG_CPI][16] = (dtlb_addr << 8)&0xffffff00;
@@ -405,24 +405,24 @@ static void perf_patch_images(void)
} else if (perf_processor_interface == CUDA_INTF) {
/* Cuda interface */
- cuda_images[TLBMISS][16] =
+ cuda_images[TLBMISS][16] =
(cuda_images[TLBMISS][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[TLBMISS][17] =
+ cuda_images[TLBMISS][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[TLBMISS][18] = (itlb_addr << 16)&0xffff0000;
- cuda_images[TLBHANDMISS][16] =
+ cuda_images[TLBHANDMISS][16] =
(cuda_images[TLBHANDMISS][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[TLBHANDMISS][17] =
+ cuda_images[TLBHANDMISS][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[TLBHANDMISS][18] = (itlb_addr << 16)&0xffff0000;
- cuda_images[BIG_CPI][16] =
+ cuda_images[BIG_CPI][16] =
(cuda_images[BIG_CPI][16]&0xffff0000) |
((dtlb_addr >> 8)&0x0000ffff);
- cuda_images[BIG_CPI][17] =
+ cuda_images[BIG_CPI][17] =
((dtlb_addr << 24)&0xff000000) | ((itlb_addr >> 16)&0x000000ff);
cuda_images[BIG_CPI][18] = (itlb_addr << 16)&0xffff0000;
} else {
@@ -434,7 +434,7 @@ static void perf_patch_images(void)
/*
* ioctl routine
- * All routines effect the processor that they are executed on. Thus you
+ * All routines effect the processor that they are executed on. Thus you
* must be running on the processor that you wish to change.
*/
@@ -460,7 +460,7 @@ static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
}
/* copy out the Counters */
- if (copy_to_user((void __user *)arg, raddr,
+ if (copy_to_user((void __user *)arg, raddr,
sizeof (raddr)) != 0) {
error = -EFAULT;
break;
@@ -488,7 +488,7 @@ static const struct file_operations perf_fops = {
.open = perf_open,
.release = perf_release
};
-
+
static struct miscdevice perf_dev = {
MISC_DYNAMIC_MINOR,
PA_PERF_DEV,
@@ -596,7 +596,7 @@ static int perf_stop_counters(uint32_t *raddr)
/* OR sticky2 (bit 1496) to counter2 bit 32 */
tmp64 |= (userbuf[23] >> 8) & 0x0000000080000000;
raddr[2] = (uint32_t)tmp64;
-
+
/* Counter3 is bits 1497 to 1528 */
tmp64 = (userbuf[23] >> 7) & 0x00000000ffffffff;
/* OR sticky3 (bit 1529) to counter3 bit 32 */
@@ -618,7 +618,7 @@ static int perf_stop_counters(uint32_t *raddr)
userbuf[22] = 0;
userbuf[23] = 0;
- /*
+ /*
* Write back the zeroed bytes + the image given
* the read was destructive.
*/
@@ -626,13 +626,13 @@ static int perf_stop_counters(uint32_t *raddr)
} else {
/*
- * Read RDR-15 which contains the counters and sticky bits
+ * Read RDR-15 which contains the counters and sticky bits
*/
if (!perf_rdr_read_ubuf(15, userbuf)) {
return -13;
}
- /*
+ /*
* Clear out the counters
*/
perf_rdr_clear(15);
@@ -645,7 +645,7 @@ static int perf_stop_counters(uint32_t *raddr)
raddr[2] = (uint32_t)((userbuf[1] >> 32) & 0x00000000ffffffffUL);
raddr[3] = (uint32_t)(userbuf[1] & 0x00000000ffffffffUL);
}
-
+
return 0;
}
@@ -683,7 +683,7 @@ static int perf_rdr_read_ubuf(uint32_t rdr_num, uint64_t *buffer)
i = tentry->num_words;
while (i--) {
buffer[i] = 0;
- }
+ }
/* Check for bits an even number of 64 */
if ((xbits = width & 0x03f) != 0) {
@@ -809,18 +809,22 @@ static int perf_write_image(uint64_t *memaddr)
}
runway = ioremap_nocache(cpu_device->hpa.start, 4096);
+ if (!runway) {
+ pr_err("perf_write_image: ioremap failed!\n");
+ return -ENOMEM;
+ }
/* Merge intrigue bits into Runway STATUS 0 */
tmp64 = __raw_readq(runway + RUNWAY_STATUS) & 0xffecfffffffffffful;
- __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
+ __raw_writeq(tmp64 | (*memaddr++ & 0x0013000000000000ul),
runway + RUNWAY_STATUS);
-
+
/* Write RUNWAY DEBUG registers */
for (i = 0; i < 8; i++) {
__raw_writeq(*memaddr++, runway + RUNWAY_DEBUG);
}
- return 0;
+ return 0;
}
/*
@@ -844,7 +848,7 @@ printk("perf_rdr_write\n");
perf_rdr_shift_out_U(rdr_num, buffer[i]);
} else {
perf_rdr_shift_out_W(rdr_num, buffer[i]);
- }
+ }
}
printk("perf_rdr_write done\n");
}
diff --git a/arch/parisc/kernel/syscall.S b/arch/parisc/kernel/syscall.S
index 23de307c3052..41e60a9c7db2 100644
--- a/arch/parisc/kernel/syscall.S
+++ b/arch/parisc/kernel/syscall.S
@@ -742,7 +742,7 @@ lws_compare_and_swap_2:
10: ldd 0(%r25), %r25
11: ldd 0(%r24), %r24
#else
- /* Load new value into r22/r23 - high/low */
+ /* Load old value into r22/r23 - high/low */
10: ldw 0(%r25), %r22
11: ldw 4(%r25), %r23
/* Load new value into fr4 for atomic store later */
@@ -834,11 +834,11 @@ cas2_action:
copy %r0, %r28
#else
/* Compare first word */
-19: ldw,ma 0(%r26), %r29
+19: ldw 0(%r26), %r29
sub,= %r29, %r22, %r0
b,n cas2_end
/* Compare second word */
-20: ldw,ma 4(%r26), %r29
+20: ldw 4(%r26), %r29
sub,= %r29, %r23, %r0
b,n cas2_end
/* Perform the store */
diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c
index b2da7c8baed7..292458b694fb 100644
--- a/arch/powerpc/kernel/align.c
+++ b/arch/powerpc/kernel/align.c
@@ -235,6 +235,28 @@ static int emulate_dcbz(struct pt_regs *regs, unsigned char __user *addr)
#define SWIZ_PTR(p) ((unsigned char __user *)((p) ^ swiz))
+#define __get_user_or_set_dar(_regs, _dest, _addr) \
+ ({ \
+ int rc = 0; \
+ typeof(_addr) __addr = (_addr); \
+ if (__get_user_inatomic(_dest, __addr)) { \
+ _regs->dar = (unsigned long)__addr; \
+ rc = -EFAULT; \
+ } \
+ rc; \
+ })
+
+#define __put_user_or_set_dar(_regs, _src, _addr) \
+ ({ \
+ int rc = 0; \
+ typeof(_addr) __addr = (_addr); \
+ if (__put_user_inatomic(_src, __addr)) { \
+ _regs->dar = (unsigned long)__addr; \
+ rc = -EFAULT; \
+ } \
+ rc; \
+ })
+
static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
unsigned int reg, unsigned int nb,
unsigned int flags, unsigned int instr,
@@ -263,9 +285,10 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
} else {
unsigned long pc = regs->nip ^ (swiz & 4);
- if (__get_user_inatomic(instr,
- (unsigned int __user *)pc))
+ if (__get_user_or_set_dar(regs, instr,
+ (unsigned int __user *)pc))
return -EFAULT;
+
if (swiz == 0 && (flags & SW))
instr = cpu_to_le32(instr);
nb = (instr >> 11) & 0x1f;
@@ -309,31 +332,31 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
((nb0 + 3) / 4) * sizeof(unsigned long));
for (i = 0; i < nb; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__get_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
if (nb0 > 0) {
rptr = &regs->gpr[0];
addr += nb;
for (i = 0; i < nb0; ++i, ++p)
- if (__get_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__get_user_or_set_dar(regs,
+ REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
}
} else {
for (i = 0; i < nb; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr, i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__put_user_or_set_dar(regs, REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
if (nb0 > 0) {
rptr = &regs->gpr[0];
addr += nb;
for (i = 0; i < nb0; ++i, ++p)
- if (__put_user_inatomic(REG_BYTE(rptr,
- i ^ bswiz),
- SWIZ_PTR(p)))
+ if (__put_user_or_set_dar(regs,
+ REG_BYTE(rptr, i ^ bswiz),
+ SWIZ_PTR(p)))
return -EFAULT;
}
}
@@ -345,29 +368,32 @@ static int emulate_multiple(struct pt_regs *regs, unsigned char __user *addr,
* Only POWER6 has these instructions, and it does true little-endian,
* so we don't need the address swizzling.
*/
-static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg,
- unsigned int flags)
+static int emulate_fp_pair(struct pt_regs *regs, unsigned char __user *addr,
+ unsigned int reg, unsigned int flags)
{
char *ptr0 = (char *) &current->thread.TS_FPR(reg);
char *ptr1 = (char *) &current->thread.TS_FPR(reg+1);
- int i, ret, sw = 0;
+ int i, sw = 0;
if (reg & 1)
return 0; /* invalid form: FRS/FRT must be even */
if (flags & SW)
sw = 7;
- ret = 0;
+
for (i = 0; i < 8; ++i) {
if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i))
+ return -EFAULT;
+ if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8))
+ return -EFAULT;
} else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i))
+ return -EFAULT;
+ if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8))
+ return -EFAULT;
}
}
- if (ret)
- return -EFAULT;
+
return 1; /* exception handled and fixed up */
}
@@ -377,24 +403,27 @@ static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr,
{
char *ptr0 = (char *)&regs->gpr[reg];
char *ptr1 = (char *)&regs->gpr[reg+1];
- int i, ret, sw = 0;
+ int i, sw = 0;
if (reg & 1)
return 0; /* invalid form: GPR must be even */
if (flags & SW)
sw = 7;
- ret = 0;
+
for (i = 0; i < 8; ++i) {
if (!(flags & ST)) {
- ret |= __get_user(ptr0[i^sw], addr + i);
- ret |= __get_user(ptr1[i^sw], addr + i + 8);
+ if (__get_user_or_set_dar(regs, ptr0[i^sw], addr + i))
+ return -EFAULT;
+ if (__get_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8))
+ return -EFAULT;
} else {
- ret |= __put_user(ptr0[i^sw], addr + i);
- ret |= __put_user(ptr1[i^sw], addr + i + 8);
+ if (__put_user_or_set_dar(regs, ptr0[i^sw], addr + i))
+ return -EFAULT;
+ if (__put_user_or_set_dar(regs, ptr1[i^sw], addr + i + 8))
+ return -EFAULT;
}
}
- if (ret)
- return -EFAULT;
+
return 1; /* exception handled and fixed up */
}
#endif /* CONFIG_PPC64 */
@@ -687,9 +716,14 @@ static int emulate_vsx(unsigned char __user *addr, unsigned int reg,
for (j = 0; j < length; j += elsize) {
for (i = 0; i < elsize; ++i) {
if (flags & ST)
- ret |= __put_user(ptr[i^sw], addr + i);
+ ret = __put_user_or_set_dar(regs, ptr[i^sw],
+ addr + i);
else
- ret |= __get_user(ptr[i^sw], addr + i);
+ ret = __get_user_or_set_dar(regs, ptr[i^sw],
+ addr + i);
+
+ if (ret)
+ return ret;
}
ptr += elsize;
#ifdef __LITTLE_ENDIAN__
@@ -739,7 +773,7 @@ int fix_alignment(struct pt_regs *regs)
unsigned int dsisr;
unsigned char __user *addr;
unsigned long p, swiz;
- int ret, i;
+ int i;
union data {
u64 ll;
double dd;
@@ -936,7 +970,7 @@ int fix_alignment(struct pt_regs *regs)
if (flags & F) {
/* Special case for 16-byte FP loads and stores */
PPC_WARN_ALIGNMENT(fp_pair, regs);
- return emulate_fp_pair(addr, reg, flags);
+ return emulate_fp_pair(regs, addr, reg, flags);
} else {
#ifdef CONFIG_PPC64
/* Special case for 16-byte loads and stores */
@@ -966,15 +1000,12 @@ int fix_alignment(struct pt_regs *regs)
}
data.ll = 0;
- ret = 0;
p = (unsigned long)addr;
for (i = 0; i < nb; i++)
- ret |= __get_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
-
- if (unlikely(ret))
- return -EFAULT;
+ if (__get_user_or_set_dar(regs, data.v[start + i],
+ SWIZ_PTR(p++)))
+ return -EFAULT;
} else if (flags & F) {
data.ll = current->thread.TS_FPR(reg);
@@ -1046,15 +1077,13 @@ int fix_alignment(struct pt_regs *regs)
break;
}
- ret = 0;
p = (unsigned long)addr;
for (i = 0; i < nb; i++)
- ret |= __put_user_inatomic(data.v[start + i],
- SWIZ_PTR(p++));
+ if (__put_user_or_set_dar(regs, data.v[start + i],
+ SWIZ_PTR(p++)))
+ return -EFAULT;
- if (unlikely(ret))
- return -EFAULT;
} else if (flags & F)
current->thread.TS_FPR(reg) = data.ll;
else
diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S
index 2cb4d5552319..891080c4a41e 100644
--- a/arch/powerpc/kernel/entry_64.S
+++ b/arch/powerpc/kernel/entry_64.S
@@ -1243,10 +1243,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
- SAVE_8GPRS(0,r1)
- SAVE_8GPRS(8,r1)
- SAVE_8GPRS(16,r1)
- SAVE_8GPRS(24,r1)
+ SAVE_GPR(0, r1)
+ SAVE_10GPRS(2, r1)
+ SAVE_10GPRS(12, r1)
+ SAVE_10GPRS(22, r1)
+
+ /* Save previous stack pointer (r1) */
+ addi r8, r1, SWITCH_FRAME_SIZE
+ std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@@ -1300,10 +1304,10 @@ ftrace_call:
#endif
/* Restore gprs */
- REST_8GPRS(0,r1)
- REST_8GPRS(8,r1)
- REST_8GPRS(16,r1)
- REST_8GPRS(24,r1)
+ REST_GPR(0,r1)
+ REST_10GPRS(2,r1)
+ REST_10GPRS(12,r1)
+ REST_10GPRS(22,r1)
/* Restore callee's TOC */
ld r2, 24(r1)
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 2e2fc1e37715..fd68e19b9ef7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -764,7 +764,29 @@ EXC_REAL(program_check, 0x700, 0x800)
EXC_VIRT(program_check, 0x4700, 0x4800, 0x700)
TRAMP_KVM(PACA_EXGEN, 0x700)
EXC_COMMON_BEGIN(program_check_common)
- EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
+ /*
+ * It's possible to receive a TM Bad Thing type program check with
+ * userspace register values (in particular r1), but with SRR1 reporting
+ * that we came from the kernel. Normally that would confuse the bad
+ * stack logic, and we would report a bad kernel stack pointer. Instead
+ * we switch to the emergency stack if we're taking a TM Bad Thing from
+ * the kernel.
+ */
+ li r10,MSR_PR /* Build a mask of MSR_PR .. */
+ oris r10,r10,0x200000@h /* .. and SRR1_PROGTM */
+ and r10,r10,r12 /* Mask SRR1 with that. */
+ srdi r10,r10,8 /* Shift it so we can compare */
+ cmpldi r10,(0x200000 >> 8) /* .. with an immediate. */
+ bne 1f /* If != go to normal path. */
+
+ /* SRR1 had PR=0 and SRR1_PROGTM=1, so use the emergency stack */
+ andi. r10,r12,MSR_PR; /* Set CR0 correctly for label */
+ /* 3 in EXCEPTION_PROLOG_COMMON */
+ mr r10,r1 /* Save r1 */
+ ld r1,PACAEMERGSP(r13) /* Use emergency stack */
+ subi r1,r1,INT_FRAME_SIZE /* alloc stack frame */
+ b 3f /* Jump into the macro !! */
+1: EXCEPTION_PROLOG_COMMON(0x700, PACA_EXGEN)
bl save_nvgprs
RECONCILE_IRQ_STATE(r10, r11)
addi r3,r1,STACK_FRAME_OVERHEAD
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index dcbb9144c16d..d97370866a5f 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -131,7 +131,7 @@ static void flush_tmregs_to_thread(struct task_struct *tsk)
* in the appropriate thread structures from live.
*/
- if (tsk != current)
+ if ((!cpu_has_feature(CPU_FTR_TM)) || (tsk != current))
return;
if (MSR_TM_SUSPENDED(mfmsr())) {
diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c
index 96698fdf93b4..04e92257fd69 100644
--- a/arch/powerpc/kernel/signal_64.c
+++ b/arch/powerpc/kernel/signal_64.c
@@ -452,9 +452,20 @@ static long restore_tm_sigcontexts(struct task_struct *tsk,
if (MSR_TM_RESV(msr))
return -EINVAL;
- /* pull in MSR TM from user context */
+ /* pull in MSR TS bits from user context */
regs->msr = (regs->msr & ~MSR_TS_MASK) | (msr & MSR_TS_MASK);
+ /*
+ * Ensure that TM is enabled in regs->msr before we leave the signal
+ * handler. It could be the case that (a) user disabled the TM bit
+ * through the manipulation of the MSR bits in uc_mcontext or (b) the
+ * TM bit was disabled because a sufficient number of context switches
+ * happened whilst in the signal handler and load_tm overflowed,
+ * disabling the TM bit. In either case we can end up with an illegal
+ * TM state leading to a TM Bad Thing when we return to userspace.
+ */
+ regs->msr |= MSR_TM;
+
/* pull in MSR LE from user context */
regs->msr = (regs->msr & ~MSR_LE) | (msr & MSR_LE);
diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c
index c379ff5a4438..da2a7eccb10a 100644
--- a/arch/powerpc/kvm/book3s_64_vio.c
+++ b/arch/powerpc/kvm/book3s_64_vio.c
@@ -129,8 +129,11 @@ static int kvm_spapr_tce_mmap(struct file *file, struct vm_area_struct *vma)
static int kvm_spapr_tce_release(struct inode *inode, struct file *filp)
{
struct kvmppc_spapr_tce_table *stt = filp->private_data;
+ struct kvm *kvm = stt->kvm;
+ mutex_lock(&kvm->lock);
list_del_rcu(&stt->list);
+ mutex_unlock(&kvm->lock);
kvm_put_kvm(stt->kvm);
@@ -150,6 +153,7 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
struct kvm_create_spapr_tce_64 *args)
{
struct kvmppc_spapr_tce_table *stt = NULL;
+ struct kvmppc_spapr_tce_table *siter;
unsigned long npages, size;
int ret = -ENOMEM;
int i;
@@ -157,24 +161,16 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
if (!args->size)
return -EINVAL;
- /* Check this LIOBN hasn't been previously allocated */
- list_for_each_entry(stt, &kvm->arch.spapr_tce_tables, list) {
- if (stt->liobn == args->liobn)
- return -EBUSY;
- }
-
size = args->size;
npages = kvmppc_tce_pages(size);
ret = kvmppc_account_memlimit(kvmppc_stt_pages(npages), true);
- if (ret) {
- stt = NULL;
- goto fail;
- }
+ if (ret)
+ return ret;
stt = kzalloc(sizeof(*stt) + npages * sizeof(struct page *),
GFP_KERNEL);
if (!stt)
- goto fail;
+ goto fail_acct;
stt->liobn = args->liobn;
stt->page_shift = args->page_shift;
@@ -188,24 +184,39 @@ long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm,
goto fail;
}
- kvm_get_kvm(kvm);
-
mutex_lock(&kvm->lock);
- list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+
+ /* Check this LIOBN hasn't been previously allocated */
+ ret = 0;
+ list_for_each_entry(siter, &kvm->arch.spapr_tce_tables, list) {
+ if (siter->liobn == args->liobn) {
+ ret = -EBUSY;
+ break;
+ }
+ }
+
+ if (!ret)
+ ret = anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
+ stt, O_RDWR | O_CLOEXEC);
+
+ if (ret >= 0) {
+ list_add_rcu(&stt->list, &kvm->arch.spapr_tce_tables);
+ kvm_get_kvm(kvm);
+ }
mutex_unlock(&kvm->lock);
- return anon_inode_getfd("kvm-spapr-tce", &kvm_spapr_tce_fops,
- stt, O_RDWR | O_CLOEXEC);
+ if (ret >= 0)
+ return ret;
-fail:
- if (stt) {
- for (i = 0; i < npages; i++)
- if (stt->pages[i])
- __free_page(stt->pages[i]);
+ fail:
+ for (i = 0; i < npages; i++)
+ if (stt->pages[i])
+ __free_page(stt->pages[i]);
- kfree(stt);
- }
+ kfree(stt);
+ fail_acct:
+ kvmppc_account_memlimit(kvmppc_stt_pages(npages), false);
return ret;
}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 70963c845e96..fc0df0f6fe88 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -601,8 +601,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
break;
#endif
case KVM_CAP_PPC_HTM:
- r = cpu_has_feature(CPU_FTR_TM_COMP) &&
- is_kvmppc_hv_enabled(kvm);
+ r = cpu_has_feature(CPU_FTR_TM_COMP) && hv_enabled;
break;
default:
r = 0;
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index d5ce34dcf4d9..1e28747d677f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -42,6 +42,8 @@
#include <linux/memblock.h>
#include <linux/hugetlb.h>
#include <linux/slab.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
#include <asm/pgalloc.h>
#include <asm/page.h>
@@ -421,6 +423,28 @@ static int __init parse_disable_radix(char *p)
}
early_param("disable_radix", parse_disable_radix);
+/*
+ * If we're running under a hypervisor, we currently can't do radix
+ * since we don't have the code to do the H_REGISTER_PROC_TBL hcall.
+ * We tell that we're running under a hypervisor by looking for the
+ * /chosen/ibm,architecture-vec-5 property.
+ */
+static void early_check_vec5(void)
+{
+ unsigned long root, chosen;
+ int size;
+ const u8 *vec5;
+
+ root = of_get_flat_dt_root();
+ chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
+ if (chosen == -FDT_ERR_NOTFOUND)
+ return;
+ vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size);
+ if (!vec5)
+ return;
+ cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+}
+
void __init mmu_early_init_devtree(void)
{
/* Disable radix mode based on kernel command line. */
@@ -428,6 +452,15 @@ void __init mmu_early_init_devtree(void)
if (disable_radix || !(mfmsr() & MSR_HV))
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX;
+ /*
+ * Check /chosen/ibm,architecture-vec-5 if running as a guest.
+ * When running bare-metal, we can use radix if we like
+ * even though the ibm,architecture-vec-5 property created by
+ * skiboot doesn't have the necessary bits set.
+ */
+ if (early_radix_enabled() && !(mfmsr() & MSR_HV))
+ early_check_vec5();
+
if (early_radix_enabled())
radix__early_init_devtree();
else
diff --git a/arch/powerpc/perf/isa207-common.h b/arch/powerpc/perf/isa207-common.h
index 4d0a4e5017c2..8e6dd17fe603 100644
--- a/arch/powerpc/perf/isa207-common.h
+++ b/arch/powerpc/perf/isa207-common.h
@@ -201,6 +201,10 @@
CNST_PMC_VAL(1) | CNST_PMC_VAL(2) | CNST_PMC_VAL(3) | \
CNST_PMC_VAL(4) | CNST_PMC_VAL(5) | CNST_PMC_VAL(6) | CNST_NC_VAL
+/*
+ * Lets restrict use of PMC5 for instruction counting.
+ */
+#define P9_DD1_TEST_ADDER (ISA207_TEST_ADDER | CNST_PMC_VAL(5))
/* Bits in MMCR1 for PowerISA v2.07 */
#define MMCR1_UNIT_SHIFT(pmc) (60 - (4 * ((pmc) - 1)))
diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c
index 8e9a81967ff8..9abcd8f65504 100644
--- a/arch/powerpc/perf/power9-pmu.c
+++ b/arch/powerpc/perf/power9-pmu.c
@@ -295,7 +295,7 @@ static struct power_pmu power9_pmu = {
.name = "POWER9",
.n_counter = MAX_PMU_COUNTERS,
.add_fields = ISA207_ADD_FIELDS,
- .test_adder = ISA207_TEST_ADDER,
+ .test_adder = P9_DD1_TEST_ADDER,
.compute_mmcr = isa207_compute_mmcr,
.config_bhrb = power9_config_bhrb,
.bhrb_filter_map = power9_bhrb_filter_map,
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index a560a98bcf3b..6a5e7467445c 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -225,8 +225,10 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
return -ENOENT;
dn = dlpar_configure_connector(drc_index, parent_dn);
- if (!dn)
+ if (!dn) {
+ of_node_put(parent_dn);
return -ENOENT;
+ }
rc = dlpar_attach_node(dn);
if (rc)
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 303d28eb03a2..591cbdf615af 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -28,6 +28,7 @@
#include <linux/cpufeature.h>
#include <linux/init.h>
#include <linux/spinlock.h>
+#include <linux/fips.h>
#include <crypto/xts.h>
#include <asm/cpacf.h>
@@ -501,6 +502,12 @@ static int xts_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
if (err)
return err;
+ /* In fips mode only 128 bit or 256 bit keys are valid */
+ if (fips_enabled && key_len != 32 && key_len != 64) {
+ tfm->crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
+ return -EINVAL;
+ }
+
/* Pick the correct function code based on the key length */
fc = (key_len == 32) ? CPACF_KM_XTS_128 :
(key_len == 64) ? CPACF_KM_XTS_256 : 0;
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 1113389d0a39..fe7368a41aa8 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -110,22 +110,30 @@ static const u8 initial_parm_block[32] __initconst = {
/*** helper functions ***/
+/*
+ * generate_entropy:
+ * This algorithm produces 64 bytes of entropy data based on 1024
+ * individual stckf() invocations assuming that each stckf() value
+ * contributes 0.25 bits of entropy. So the caller gets 256 bit
+ * entropy per 64 byte or 4 bits entropy per byte.
+ */
static int generate_entropy(u8 *ebuf, size_t nbytes)
{
int n, ret = 0;
- u8 *pg, *h, hash[32];
+ u8 *pg, *h, hash[64];
- pg = (u8 *) __get_free_page(GFP_KERNEL);
+ /* allocate 2 pages */
+ pg = (u8 *) __get_free_pages(GFP_KERNEL, 1);
if (!pg) {
prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
return -ENOMEM;
}
while (nbytes) {
- /* fill page with urandom bytes */
- get_random_bytes(pg, PAGE_SIZE);
- /* exor page with stckf values */
- for (n = 0; n < PAGE_SIZE / sizeof(u64); n++) {
+ /* fill pages with urandom bytes */
+ get_random_bytes(pg, 2*PAGE_SIZE);
+ /* exor pages with 1024 stckf values */
+ for (n = 0; n < 2 * PAGE_SIZE / sizeof(u64); n++) {
u64 *p = ((u64 *)pg) + n;
*p ^= get_tod_clock_fast();
}
@@ -134,8 +142,8 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
h = hash;
else
h = ebuf;
- /* generate sha256 from this page */
- cpacf_kimd(CPACF_KIMD_SHA_256, h, pg, PAGE_SIZE);
+ /* hash over the filled pages */
+ cpacf_kimd(CPACF_KIMD_SHA_512, h, pg, 2*PAGE_SIZE);
if (n < sizeof(hash))
memcpy(ebuf, hash, n);
ret += n;
@@ -143,7 +151,7 @@ static int generate_entropy(u8 *ebuf, size_t nbytes)
nbytes -= n;
}
- free_page((unsigned long)pg);
+ free_pages((unsigned long)pg, 1);
return ret;
}
@@ -334,7 +342,7 @@ static int __init prng_sha512_selftest(void)
static int __init prng_sha512_instantiate(void)
{
int ret, datalen;
- u8 seed[64];
+ u8 seed[64 + 32 + 16];
pr_debug("prng runs in SHA-512 mode "
"with chunksize=%d and reseed_limit=%u\n",
@@ -357,12 +365,12 @@ static int __init prng_sha512_instantiate(void)
if (ret)
goto outfree;
- /* generate initial seed bytestring, first 48 bytes of entropy */
- ret = generate_entropy(seed, 48);
- if (ret != 48)
+ /* generate initial seed bytestring, with 256 + 128 bits entropy */
+ ret = generate_entropy(seed, 64 + 32);
+ if (ret != 64 + 32)
goto outfree;
/* followed by 16 bytes of unique nonce */
- get_tod_clock_ext(seed + 48);
+ get_tod_clock_ext(seed + 64 + 32);
/* initial seed of the ppno drng */
cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
@@ -395,9 +403,9 @@ static void prng_sha512_deinstantiate(void)
static int prng_sha512_reseed(void)
{
int ret;
- u8 seed[32];
+ u8 seed[64];
- /* generate 32 bytes of fresh entropy */
+ /* fetch 256 bits of fresh entropy */
ret = generate_entropy(seed, sizeof(seed));
if (ret != sizeof(seed))
return ret;
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index bea785d7f853..af85d6b12028 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -5,6 +5,7 @@
#include <linux/errno.h>
typedef struct {
+ spinlock_t lock;
cpumask_t cpu_attach_mask;
atomic_t flush_count;
unsigned int flush_mm;
@@ -25,6 +26,7 @@ typedef struct {
} mm_context_t;
#define INIT_MM_CONTEXT(name) \
+ .context.lock = __SPIN_LOCK_UNLOCKED(name.context.lock), \
.context.pgtable_lock = \
__SPIN_LOCK_UNLOCKED(name.context.pgtable_lock), \
.context.pgtable_list = LIST_HEAD_INIT(name.context.pgtable_list), \
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index 515fea5a3fc4..f65a708ac395 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -15,6 +15,7 @@
static inline int init_new_context(struct task_struct *tsk,
struct mm_struct *mm)
{
+ spin_lock_init(&mm->context.lock);
spin_lock_init(&mm->context.pgtable_lock);
INIT_LIST_HEAD(&mm->context.pgtable_list);
spin_lock_init(&mm->context.gmap_lock);
@@ -93,7 +94,6 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
if (prev == next)
return;
cpumask_set_cpu(cpu, &next->context.cpu_attach_mask);
- cpumask_set_cpu(cpu, mm_cpumask(next));
/* Clear old ASCE by loading the kernel ASCE. */
__ctl_load(S390_lowcore.kernel_asce, 1, 1);
__ctl_load(S390_lowcore.kernel_asce, 7, 7);
@@ -111,9 +111,8 @@ static inline void finish_arch_post_lock_switch(void)
preempt_disable();
while (atomic_read(&mm->context.flush_count))
cpu_relax();
-
- if (mm->context.flush_mm)
- __tlb_flush_mm(mm);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(mm));
+ __tlb_flush_mm_lazy(mm);
preempt_enable();
}
set_fs(current->thread.mm_segment);
@@ -126,6 +125,7 @@ static inline void activate_mm(struct mm_struct *prev,
struct mm_struct *next)
{
switch_mm(prev, next, current);
+ cpumask_set_cpu(smp_processor_id(), mm_cpumask(next));
set_user_asce(next);
}
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 0cea7026e4ff..db74d398a443 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -480,7 +480,7 @@ static inline int mm_alloc_pgste(struct mm_struct *mm)
* In the case that a guest uses storage keys
* faults should no longer be backed by zero pages
*/
-#define mm_forbids_zeropage mm_use_skey
+#define mm_forbids_zeropage mm_has_pgste
static inline int mm_use_skey(struct mm_struct *mm)
{
#ifdef CONFIG_PGSTE
@@ -1359,7 +1359,9 @@ static inline pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma,
static inline void pmdp_invalidate(struct vm_area_struct *vma,
unsigned long addr, pmd_t *pmdp)
{
- pmdp_xchg_direct(vma->vm_mm, addr, pmdp, __pmd(_SEGMENT_ENTRY_INVALID));
+ pmd_t pmd = __pmd(pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
+
+ pmdp_xchg_direct(vma->vm_mm, addr, pmdp, pmd);
}
#define __HAVE_ARCH_PMDP_SET_WRPROTECT
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 39846100682a..eed927aeb08f 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -43,23 +43,6 @@ static inline void __tlb_flush_global(void)
* Flush TLB entries for a specific mm on all CPUs (in case gmap is used
* this implicates multiple ASCEs!).
*/
-static inline void __tlb_flush_full(struct mm_struct *mm)
-{
- preempt_disable();
- atomic_inc(&mm->context.flush_count);
- if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
- /* Local TLB flush */
- __tlb_flush_local();
- } else {
- /* Global TLB flush */
- __tlb_flush_global();
- /* Reset TLB flush mask */
- cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
- }
- atomic_dec(&mm->context.flush_count);
- preempt_enable();
-}
-
static inline void __tlb_flush_mm(struct mm_struct *mm)
{
unsigned long gmap_asce;
@@ -71,16 +54,18 @@ static inline void __tlb_flush_mm(struct mm_struct *mm)
*/
preempt_disable();
atomic_inc(&mm->context.flush_count);
+ /* Reset TLB flush mask */
+ cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
+ barrier();
gmap_asce = READ_ONCE(mm->context.gmap_asce);
if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
if (gmap_asce)
__tlb_flush_idte(gmap_asce);
__tlb_flush_idte(mm->context.asce);
} else {
- __tlb_flush_full(mm);
+ /* Global TLB flush */
+ __tlb_flush_global();
}
- /* Reset TLB flush mask */
- cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
atomic_dec(&mm->context.flush_count);
preempt_enable();
}
@@ -94,7 +79,6 @@ static inline void __tlb_flush_kernel(void)
}
#else
#define __tlb_flush_global() __tlb_flush_local()
-#define __tlb_flush_full(mm) __tlb_flush_local()
/*
* Flush TLB entries for a specific ASCE on all CPUs.
@@ -112,10 +96,12 @@ static inline void __tlb_flush_kernel(void)
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{
+ spin_lock(&mm->context.lock);
if (mm->context.flush_mm) {
- __tlb_flush_mm(mm);
mm->context.flush_mm = 0;
+ __tlb_flush_mm(mm);
}
+ spin_unlock(&mm->context.lock);
}
/*
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 3ba622702ce4..cb2cd04e6698 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -2125,6 +2125,37 @@ static inline void thp_split_mm(struct mm_struct *mm)
}
/*
+ * Remove all empty zero pages from the mapping for lazy refaulting
+ * - This must be called after mm->context.has_pgste is set, to avoid
+ * future creation of zero pages
+ * - This must be called after THP was enabled
+ */
+static int __zap_zero_pages(pmd_t *pmd, unsigned long start,
+ unsigned long end, struct mm_walk *walk)
+{
+ unsigned long addr;
+
+ for (addr = start; addr != end; addr += PAGE_SIZE) {
+ pte_t *ptep;
+ spinlock_t *ptl;
+
+ ptep = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
+ if (is_zero_pfn(pte_pfn(*ptep)))
+ ptep_xchg_direct(walk->mm, addr, ptep, __pte(_PAGE_INVALID));
+ pte_unmap_unlock(ptep, ptl);
+ }
+ return 0;
+}
+
+static inline void zap_zero_pages(struct mm_struct *mm)
+{
+ struct mm_walk walk = { .pmd_entry = __zap_zero_pages };
+
+ walk.mm = mm;
+ walk_page_range(0, TASK_SIZE, &walk);
+}
+
+/*
* switch on pgstes for its userspace process (for kvm)
*/
int s390_enable_sie(void)
@@ -2141,6 +2172,7 @@ int s390_enable_sie(void)
mm->context.has_pgste = 1;
/* split thp mappings and disable thp for future mappings */
thp_split_mm(mm);
+ zap_zero_pages(mm);
up_write(&mm->mmap_sem);
return 0;
}
@@ -2153,13 +2185,6 @@ EXPORT_SYMBOL_GPL(s390_enable_sie);
static int __s390_enable_skey(pte_t *pte, unsigned long addr,
unsigned long next, struct mm_walk *walk)
{
- /*
- * Remove all zero page mappings,
- * after establishing a policy to forbid zero page mappings
- * following faults for that page will get fresh anonymous pages
- */
- if (is_zero_pfn(pte_pfn(*pte)))
- ptep_xchg_direct(walk->mm, addr, pte, __pte(_PAGE_INVALID));
/* Clear storage key */
ptep_zap_key(walk->mm, addr, pte);
return 0;
diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c
index 18d4107e10ee..97fc449a7470 100644
--- a/arch/s390/mm/gup.c
+++ b/arch/s390/mm/gup.c
@@ -56,13 +56,12 @@ static inline int gup_pte_range(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
static inline int gup_huge_pmd(pmd_t *pmdp, pmd_t pmd, unsigned long addr,
unsigned long end, int write, struct page **pages, int *nr)
{
- unsigned long mask, result;
struct page *head, *page;
+ unsigned long mask;
int refs;
- result = write ? 0 : _SEGMENT_ENTRY_PROTECT;
- mask = result | _SEGMENT_ENTRY_INVALID;
- if ((pmd_val(pmd) & mask) != result)
+ mask = (write ? _SEGMENT_ENTRY_PROTECT : 0) | _SEGMENT_ENTRY_INVALID;
+ if ((pmd_val(pmd) & mask) != 0)
return 0;
VM_BUG_ON(!pfn_valid(pmd_val(pmd) >> PAGE_SHIFT));
diff --git a/arch/sparc/include/asm/setup.h b/arch/sparc/include/asm/setup.h
index be0cc1beed41..3fae200dd251 100644
--- a/arch/sparc/include/asm/setup.h
+++ b/arch/sparc/include/asm/setup.h
@@ -59,8 +59,11 @@ extern atomic_t dcpage_flushes;
extern atomic_t dcpage_flushes_xcall;
extern int sysctl_tsb_ratio;
-#endif
+#ifdef CONFIG_SERIAL_SUNHV
+void sunhv_migrate_hvcons_irq(int cpu);
+#endif
+#endif
void sun_do_break(void);
extern int stop_a_enabled;
extern int scons_pwroff;
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index 2deb89ef1d5f..ca7cb8e57ab0 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -1465,8 +1465,12 @@ void smp_send_stop(void)
int cpu;
if (tlb_type == hypervisor) {
+ int this_cpu = smp_processor_id();
+#ifdef CONFIG_SERIAL_SUNHV
+ sunhv_migrate_hvcons_irq(this_cpu);
+#endif
for_each_online_cpu(cpu) {
- if (cpu == smp_processor_id())
+ if (cpu == this_cpu)
continue;
#ifdef CONFIG_SUN_LDOMS
if (ldom_domaining_enabled) {
diff --git a/arch/x86/include/asm/alternative-asm.h b/arch/x86/include/asm/alternative-asm.h
index e7636bac7372..6c98821fef5e 100644
--- a/arch/x86/include/asm/alternative-asm.h
+++ b/arch/x86/include/asm/alternative-asm.h
@@ -62,8 +62,10 @@
#define new_len2 145f-144f
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
+ *
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
#define alt_max_short(a, b) ((a) ^ (((a) ^ (b)) & -(-((a) < (b)))))
diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h
index 1b020381ab38..d4aea31eec03 100644
--- a/arch/x86/include/asm/alternative.h
+++ b/arch/x86/include/asm/alternative.h
@@ -103,12 +103,12 @@ static inline int alternatives_text_reserved(void *start, void *end)
alt_end_marker ":\n"
/*
- * max without conditionals. Idea adapted from:
+ * gas compatible max based on the idea from:
* http://graphics.stanford.edu/~seander/bithacks.html#IntegerMinOrMax
*
- * The additional "-" is needed because gas works with s32s.
+ * The additional "-" is needed because gas uses a "true" value of -1.
*/
-#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") - (" b ")))))"
+#define alt_max_short(a, b) "((" a ") ^ (((" a ") ^ (" b ")) & -(-((" a ") < (" b ")))))"
/*
* Pad the second replacement alternative with additional NOPs if it is
diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h
index b31761ecce63..7bcd138c3aa9 100644
--- a/arch/x86/include/asm/elf.h
+++ b/arch/x86/include/asm/elf.h
@@ -204,6 +204,7 @@ void set_personality_ia32(bool);
#define ELF_CORE_COPY_REGS(pr_reg, regs) \
do { \
+ unsigned long base; \
unsigned v; \
(pr_reg)[0] = (regs)->r15; \
(pr_reg)[1] = (regs)->r14; \
@@ -226,8 +227,8 @@ do { \
(pr_reg)[18] = (regs)->flags; \
(pr_reg)[19] = (regs)->sp; \
(pr_reg)[20] = (regs)->ss; \
- (pr_reg)[21] = current->thread.fsbase; \
- (pr_reg)[22] = current->thread.gsbase; \
+ rdmsrl(MSR_FS_BASE, base); (pr_reg)[21] = base; \
+ rdmsrl(MSR_KERNEL_GS_BASE, base); (pr_reg)[22] = base; \
asm("movl %%ds,%0" : "=r" (v)); (pr_reg)[23] = v; \
asm("movl %%es,%0" : "=r" (v)); (pr_reg)[24] = v; \
asm("movl %%fs,%0" : "=r" (v)); (pr_reg)[25] = v; \
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 19d14ac23ef9..fc3c7e49c8e4 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -296,6 +296,7 @@ struct x86_emulate_ctxt {
bool perm_ok; /* do not check permissions if true */
bool ud; /* inject an #UD if host doesn't support insn */
+ bool tf; /* TF value before instruction (after for syscall/sysret) */
bool have_exception;
struct x86_exception exception;
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 11e572561960..c3c1ad2fce5c 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -178,10 +178,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled)
return -EINVAL;
}
+ if (!enabled) {
+ ++disabled_cpus;
+ return -EINVAL;
+ }
+
if (boot_cpu_physical_apicid != -1U)
ver = boot_cpu_apic_version;
- cpu = __generic_processor_info(id, ver, enabled);
+ cpu = generic_processor_info(id, ver);
if (cpu >= 0)
early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index f2234918e494..e2ead34da465 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -2070,7 +2070,7 @@ static int allocate_logical_cpuid(int apicid)
return nr_logical_cpuids++;
}
-int __generic_processor_info(int apicid, int version, bool enabled)
+int generic_processor_info(int apicid, int version)
{
int cpu, max = nr_cpu_ids;
bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid,
@@ -2128,11 +2128,9 @@ int __generic_processor_info(int apicid, int version, bool enabled)
if (num_processors >= nr_cpu_ids) {
int thiscpu = max + disabled_cpus;
- if (enabled) {
- pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
- "reached. Processor %d/0x%x ignored.\n",
- max, thiscpu, apicid);
- }
+ pr_warning("APIC: NR_CPUS/possible_cpus limit of %i "
+ "reached. Processor %d/0x%x ignored.\n",
+ max, thiscpu, apicid);
disabled_cpus++;
return -EINVAL;
@@ -2184,23 +2182,13 @@ int __generic_processor_info(int apicid, int version, bool enabled)
apic->x86_32_early_logical_apicid(cpu);
#endif
set_cpu_possible(cpu, true);
-
- if (enabled) {
- num_processors++;
- physid_set(apicid, phys_cpu_present_map);
- set_cpu_present(cpu, true);
- } else {
- disabled_cpus++;
- }
+ physid_set(apicid, phys_cpu_present_map);
+ set_cpu_present(cpu, true);
+ num_processors++;
return cpu;
}
-int generic_processor_info(int apicid, int version)
-{
- return __generic_processor_info(apicid, version, true);
-}
-
int hard_smp_processor_id(void)
{
return read_apic_id();
diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c
index cdc0deab00c9..13dbcc0f9d03 100644
--- a/arch/x86/kernel/cpu/microcode/intel.c
+++ b/arch/x86/kernel/cpu/microcode/intel.c
@@ -34,6 +34,7 @@
#include <linux/mm.h>
#include <asm/microcode_intel.h>
+#include <asm/intel-family.h>
#include <asm/processor.h>
#include <asm/tlbflush.h>
#include <asm/setup.h>
@@ -1046,6 +1047,18 @@ static int get_ucode_fw(void *to, const void *from, size_t n)
return 0;
}
+static bool is_blacklisted(unsigned int cpu)
+{
+ struct cpuinfo_x86 *c = &cpu_data(cpu);
+
+ if (c->x86 == 6 && c->x86_model == INTEL_FAM6_BROADWELL_X) {
+ pr_err_once("late loading on model 79 is disabled.\n");
+ return true;
+ }
+
+ return false;
+}
+
static enum ucode_state request_microcode_fw(int cpu, struct device *device,
bool refresh_fw)
{
@@ -1054,6 +1067,9 @@ static enum ucode_state request_microcode_fw(int cpu, struct device *device,
const struct firmware *firmware;
enum ucode_state ret;
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
sprintf(name, "intel-ucode/%02x-%02x-%02x",
c->x86, c->x86_model, c->x86_mask);
@@ -1078,6 +1094,9 @@ static int get_ucode_user(void *to, const void *from, size_t n)
static enum ucode_state
request_microcode_user(int cpu, const void __user *buf, size_t size)
{
+ if (is_blacklisted(cpu))
+ return UCODE_NFOUND;
+
return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
}
diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c
index c114b132d121..7052d9a65fe9 100644
--- a/arch/x86/kernel/fpu/regset.c
+++ b/arch/x86/kernel/fpu/regset.c
@@ -130,11 +130,16 @@ int xstateregs_set(struct task_struct *target, const struct user_regset *regset,
fpu__activate_fpstate_write(fpu);
- if (boot_cpu_has(X86_FEATURE_XSAVES))
+ if (boot_cpu_has(X86_FEATURE_XSAVES)) {
ret = copyin_to_xsaves(kbuf, ubuf, xsave);
- else
+ } else {
ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, xsave, 0, -1);
+ /* xcomp_bv must be 0 when using uncompacted format */
+ if (!ret && xsave->header.xcomp_bv)
+ ret = -EINVAL;
+ }
+
/*
* In case of failure, mark all states as init:
*/
diff --git a/arch/x86/kernel/fpu/signal.c b/arch/x86/kernel/fpu/signal.c
index a184c210efba..3ec0d2d64601 100644
--- a/arch/x86/kernel/fpu/signal.c
+++ b/arch/x86/kernel/fpu/signal.c
@@ -329,6 +329,10 @@ static int __fpu__restore_sig(void __user *buf, void __user *buf_fx, int size)
} else {
err = __copy_from_user(&fpu->state.xsave,
buf_fx, state_size);
+
+ /* xcomp_bv must be 0 when using uncompacted format */
+ if (!err && state_size > offsetof(struct xregs_state, header) && fpu->state.xsave.header.xcomp_bv)
+ err = -EINVAL;
}
if (err || __copy_from_user(&env, buf, sizeof(env))) {
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 55ffd9dc2258..77f17cbfe271 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -141,7 +141,8 @@ void kvm_async_pf_task_wait(u32 token)
n.token = token;
n.cpu = smp_processor_id();
- n.halted = is_idle_task(current) || preempt_count() > 1;
+ n.halted = is_idle_task(current) || preempt_count() > 1 ||
+ rcu_preempt_depth();
init_swait_queue_head(&n.wq);
hlist_add_head(&n.link, &b->list);
raw_spin_unlock(&b->lock);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index b3760b3c1ca0..0887d2ae3797 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -136,6 +136,123 @@ void release_thread(struct task_struct *dead_task)
}
}
+enum which_selector {
+ FS,
+ GS
+};
+
+/*
+ * Saves the FS or GS base for an outgoing thread if FSGSBASE extensions are
+ * not available. The goal is to be reasonably fast on non-FSGSBASE systems.
+ * It's forcibly inlined because it'll generate better code and this function
+ * is hot.
+ */
+static __always_inline void save_base_legacy(struct task_struct *prev_p,
+ unsigned short selector,
+ enum which_selector which)
+{
+ if (likely(selector == 0)) {
+ /*
+ * On Intel (without X86_BUG_NULL_SEG), the segment base could
+ * be the pre-existing saved base or it could be zero. On AMD
+ * (with X86_BUG_NULL_SEG), the segment base could be almost
+ * anything.
+ *
+ * This branch is very hot (it's hit twice on almost every
+ * context switch between 64-bit programs), and avoiding
+ * the RDMSR helps a lot, so we just assume that whatever
+ * value is already saved is correct. This matches historical
+ * Linux behavior, so it won't break existing applications.
+ *
+ * To avoid leaking state, on non-X86_BUG_NULL_SEG CPUs, if we
+ * report that the base is zero, it needs to actually be zero:
+ * see the corresponding logic in load_seg_legacy.
+ */
+ } else {
+ /*
+ * If the selector is 1, 2, or 3, then the base is zero on
+ * !X86_BUG_NULL_SEG CPUs and could be anything on
+ * X86_BUG_NULL_SEG CPUs. In the latter case, Linux
+ * has never attempted to preserve the base across context
+ * switches.
+ *
+ * If selector > 3, then it refers to a real segment, and
+ * saving the base isn't necessary.
+ */
+ if (which == FS)
+ prev_p->thread.fsbase = 0;
+ else
+ prev_p->thread.gsbase = 0;
+ }
+}
+
+static __always_inline void save_fsgs(struct task_struct *task)
+{
+ savesegment(fs, task->thread.fsindex);
+ savesegment(gs, task->thread.gsindex);
+ save_base_legacy(task, task->thread.fsindex, FS);
+ save_base_legacy(task, task->thread.gsindex, GS);
+}
+
+static __always_inline void loadseg(enum which_selector which,
+ unsigned short sel)
+{
+ if (which == FS)
+ loadsegment(fs, sel);
+ else
+ load_gs_index(sel);
+}
+
+static __always_inline void load_seg_legacy(unsigned short prev_index,
+ unsigned long prev_base,
+ unsigned short next_index,
+ unsigned long next_base,
+ enum which_selector which)
+{
+ if (likely(next_index <= 3)) {
+ /*
+ * The next task is using 64-bit TLS, is not using this
+ * segment at all, or is having fun with arcane CPU features.
+ */
+ if (next_base == 0) {
+ /*
+ * Nasty case: on AMD CPUs, we need to forcibly zero
+ * the base.
+ */
+ if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
+ loadseg(which, __USER_DS);
+ loadseg(which, next_index);
+ } else {
+ /*
+ * We could try to exhaustively detect cases
+ * under which we can skip the segment load,
+ * but there's really only one case that matters
+ * for performance: if both the previous and
+ * next states are fully zeroed, we can skip
+ * the load.
+ *
+ * (This assumes that prev_base == 0 has no
+ * false positives. This is the case on
+ * Intel-style CPUs.)
+ */
+ if (likely(prev_index | next_index | prev_base))
+ loadseg(which, next_index);
+ }
+ } else {
+ if (prev_index != next_index)
+ loadseg(which, next_index);
+ wrmsrl(which == FS ? MSR_FS_BASE : MSR_KERNEL_GS_BASE,
+ next_base);
+ }
+ } else {
+ /*
+ * The next task is using a real segment. Loading the selector
+ * is sufficient.
+ */
+ loadseg(which, next_index);
+ }
+}
+
int copy_thread_tls(unsigned long clone_flags, unsigned long sp,
unsigned long arg, struct task_struct *p, unsigned long tls)
{
@@ -216,10 +333,19 @@ start_thread_common(struct pt_regs *regs, unsigned long new_ip,
unsigned long new_sp,
unsigned int _cs, unsigned int _ss, unsigned int _ds)
{
+ WARN_ON_ONCE(regs != current_pt_regs());
+
+ if (static_cpu_has(X86_BUG_NULL_SEG)) {
+ /* Loading zero below won't clear the base. */
+ loadsegment(fs, __USER_DS);
+ load_gs_index(__USER_DS);
+ }
+
loadsegment(fs, 0);
loadsegment(es, _ds);
loadsegment(ds, _ds);
load_gs_index(0);
+
regs->ip = new_ip;
regs->sp = new_sp;
regs->cs = _cs;
@@ -264,7 +390,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
struct fpu *next_fpu = &next->fpu;
int cpu = smp_processor_id();
struct tss_struct *tss = &per_cpu(cpu_tss, cpu);
- unsigned prev_fsindex, prev_gsindex;
fpu_switch_t fpu_switch;
fpu_switch = switch_fpu_prepare(prev_fpu, next_fpu, cpu);
@@ -274,8 +399,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
*
* (e.g. xen_load_tls())
*/
- savesegment(fs, prev_fsindex);
- savesegment(gs, prev_gsindex);
+ save_fsgs(prev_p);
/*
* Load TLS before restoring any segments so that segment loads
@@ -314,108 +438,10 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
if (unlikely(next->ds | prev->ds))
loadsegment(ds, next->ds);
- /*
- * Switch FS and GS.
- *
- * These are even more complicated than DS and ES: they have
- * 64-bit bases are that controlled by arch_prctl. The bases
- * don't necessarily match the selectors, as user code can do
- * any number of things to cause them to be inconsistent.
- *
- * We don't promise to preserve the bases if the selectors are
- * nonzero. We also don't promise to preserve the base if the
- * selector is zero and the base doesn't match whatever was
- * most recently passed to ARCH_SET_FS/GS. (If/when the
- * FSGSBASE instructions are enabled, we'll need to offer
- * stronger guarantees.)
- *
- * As an invariant,
- * (fsbase != 0 && fsindex != 0) || (gsbase != 0 && gsindex != 0) is
- * impossible.
- */
- if (next->fsindex) {
- /* Loading a nonzero value into FS sets the index and base. */
- loadsegment(fs, next->fsindex);
- } else {
- if (next->fsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_fsindex)
- loadsegment(fs, 0);
- wrmsrl(MSR_FS_BASE, next->fsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- */
- loadsegment(fs, __USER_DS);
- loadsegment(fs, 0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_FS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->fsbase || prev_fsindex)
- loadsegment(fs, 0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_fsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_fsindex)
- prev->fsbase = 0;
- prev->fsindex = prev_fsindex;
-
- if (next->gsindex) {
- /* Loading a nonzero value into GS sets the index and base. */
- load_gs_index(next->gsindex);
- } else {
- if (next->gsbase) {
- /* Next index is zero but next base is nonzero. */
- if (prev_gsindex)
- load_gs_index(0);
- wrmsrl(MSR_KERNEL_GS_BASE, next->gsbase);
- } else {
- /* Next base and index are both zero. */
- if (static_cpu_has_bug(X86_BUG_NULL_SEG)) {
- /*
- * We don't know the previous base and can't
- * find out without RDMSR. Forcibly clear it.
- *
- * This contains a pointless SWAPGS pair.
- * Fixing it would involve an explicit check
- * for Xen or a new pvop.
- */
- load_gs_index(__USER_DS);
- load_gs_index(0);
- } else {
- /*
- * If the previous index is zero and ARCH_SET_GS
- * didn't change the base, then the base is
- * also zero and we don't need to do anything.
- */
- if (prev->gsbase || prev_gsindex)
- load_gs_index(0);
- }
- }
- }
- /*
- * Save the old state and preserve the invariant.
- * NB: if prev_gsindex == 0, then we can't reliably learn the base
- * without RDMSR because Intel user code can zero it without telling
- * us and AMD user code can program any 32-bit value without telling
- * us.
- */
- if (prev_gsindex)
- prev->gsbase = 0;
- prev->gsindex = prev_gsindex;
+ load_seg_legacy(prev->fsindex, prev->fsbase,
+ next->fsindex, next->fsbase, FS);
+ load_seg_legacy(prev->gsindex, prev->gsbase,
+ next->gsindex, next->gsbase, GS);
switch_fpu_finish(next_fpu, fpu_switch);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index de36660751b5..72b737b8c9d6 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2738,6 +2738,7 @@ static int em_syscall(struct x86_emulate_ctxt *ctxt)
ctxt->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IF);
}
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
return X86EMUL_CONTINUE;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 5f2412704b81..d29c745f10ad 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -3649,19 +3649,19 @@ static inline bool is_last_gpte(struct kvm_mmu *mmu,
unsigned level, unsigned gpte)
{
/*
- * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
- * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
- * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
- */
- gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
-
- /*
* The RHS has bit 7 set iff level < mmu->last_nonleaf_level.
* If it is clear, there are no large pages at this level, so clear
* PT_PAGE_SIZE_MASK in gpte if that is the case.
*/
gpte &= level - mmu->last_nonleaf_level;
+ /*
+ * PT_PAGE_TABLE_LEVEL always terminates. The RHS has bit 7 set
+ * iff level <= PT_PAGE_TABLE_LEVEL, which for our purpose means
+ * level == PT_PAGE_TABLE_LEVEL; set PT_PAGE_SIZE_MASK in gpte then.
+ */
+ gpte |= level - PT_PAGE_TABLE_LEVEL - 1;
+
return gpte & PT_PAGE_SIZE_MASK;
}
@@ -4169,6 +4169,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly)
update_permission_bitmask(vcpu, context, true);
update_pkru_bitmask(vcpu, context, true);
+ update_last_nonleaf_level(vcpu, context);
reset_rsvds_bits_mask_ept(vcpu, context, execonly);
reset_ept_shadow_zero_bits_mask(vcpu, context, execonly);
}
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index a01105485315..37363900297d 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -324,10 +324,11 @@ retry_walk:
--walker->level;
index = PT_INDEX(addr, walker->level);
-
table_gfn = gpte_to_gfn(pte);
offset = index * sizeof(pt_element_t);
pte_gpa = gfn_to_gpa(table_gfn) + offset;
+
+ BUG_ON(walker->level < 1);
walker->table_gfn[walker->level - 1] = table_gfn;
walker->pte_gpa[walker->level - 1] = pte_gpa;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 3dc6d8017ce9..a8ae57acb6f6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2167,46 +2167,44 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
struct pi_desc old, new;
unsigned int dest;
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ /*
+ * In case of hot-plug or hot-unplug, we may have to undo
+ * vmx_vcpu_pi_put even if there is no assigned device. And we
+ * always keep PI.NDST up to date for simplicity: it makes the
+ * code easier, and CPU migration is not a fast path.
+ */
+ if (!pi_test_sn(pi_desc) && vcpu->cpu == cpu)
+ return;
+
+ /*
+ * First handle the simple case where no cmpxchg is necessary; just
+ * allow posting non-urgent interrupts.
+ *
+ * If the 'nv' field is POSTED_INTR_WAKEUP_VECTOR, do not change
+ * PI.NDST: pi_post_block will do it for us and the wakeup_handler
+ * expects the VCPU to be on the blocked_vcpu_list that matches
+ * PI.NDST.
+ */
+ if (pi_desc->nv == POSTED_INTR_WAKEUP_VECTOR ||
+ vcpu->cpu == cpu) {
+ pi_clear_sn(pi_desc);
return;
+ }
+ /* The full case. */
do {
old.control = new.control = pi_desc->control;
- /*
- * If 'nv' field is POSTED_INTR_WAKEUP_VECTOR, there
- * are two possible cases:
- * 1. After running 'pre_block', context switch
- * happened. For this case, 'sn' was set in
- * vmx_vcpu_put(), so we need to clear it here.
- * 2. After running 'pre_block', we were blocked,
- * and woken up by some other guy. For this case,
- * we don't need to do anything, 'pi_post_block'
- * will do everything for us. However, we cannot
- * check whether it is case #1 or case #2 here
- * (maybe, not needed), so we also clear sn here,
- * I think it is not a big deal.
- */
- if (pi_desc->nv != POSTED_INTR_WAKEUP_VECTOR) {
- if (vcpu->cpu != cpu) {
- dest = cpu_physical_id(cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
- }
+ dest = cpu_physical_id(cpu);
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- }
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
- /* Allow posting non-urgent interrupts */
new.sn = 0;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
}
static void decache_tsc_multiplier(struct vcpu_vmx *vmx)
@@ -4761,21 +4759,30 @@ static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu)
{
#ifdef CONFIG_SMP
if (vcpu->mode == IN_GUEST_MODE) {
- struct vcpu_vmx *vmx = to_vmx(vcpu);
-
/*
- * Currently, we don't support urgent interrupt,
- * all interrupts are recognized as non-urgent
- * interrupt, so we cannot post interrupts when
- * 'SN' is set.
+ * The vector of interrupt to be delivered to vcpu had
+ * been set in PIR before this function.
*
- * If the vcpu is in guest mode, it means it is
- * running instead of being scheduled out and
- * waiting in the run queue, and that's the only
- * case when 'SN' is set currently, warning if
- * 'SN' is set.
+ * Following cases will be reached in this block, and
+ * we always send a notification event in all cases as
+ * explained below.
+ *
+ * Case 1: vcpu keeps in non-root mode. Sending a
+ * notification event posts the interrupt to vcpu.
+ *
+ * Case 2: vcpu exits to root mode and is still
+ * runnable. PIR will be synced to vIRR before the
+ * next vcpu entry. Sending a notification event in
+ * this case has no effect, as vcpu is not in root
+ * mode.
+ *
+ * Case 3: vcpu exits to root mode and is blocked.
+ * vcpu_block() has already synced PIR to vIRR and
+ * never blocks vcpu if vIRR is not cleared. Therefore,
+ * a blocked vcpu here does not wait for any requested
+ * interrupts in PIR, and sending a notification event
+ * which has no effect is safe here.
*/
- WARN_ON_ONCE(pi_test_sn(&vmx->pi_desc));
apic->send_IPI_mask(get_cpu_mask(vcpu->cpu),
POSTED_INTR_VECTOR);
@@ -9187,6 +9194,13 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx->msr_ia32_feature_control_valid_bits = FEATURE_CONTROL_LOCKED;
+ /*
+ * Enforce invariant: pi_desc.nv is always either POSTED_INTR_VECTOR
+ * or POSTED_INTR_WAKEUP_VECTOR.
+ */
+ vmx->pi_desc.nv = POSTED_INTR_VECTOR;
+ vmx->pi_desc.sn = 1;
+
return &vmx->vcpu;
free_vmcs:
@@ -9996,6 +10010,11 @@ static void prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
page_to_phys(vmx->nested.virtual_apic_page));
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
+ } else {
+#ifdef CONFIG_X86_64
+ exec_control |= CPU_BASED_CR8_LOAD_EXITING |
+ CPU_BASED_CR8_STORE_EXITING;
+#endif
}
if (cpu_has_vmx_msr_bitmap() &&
@@ -10671,7 +10690,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
* (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
*/
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
- kvm_set_cr4(vcpu, vmcs12->host_cr4);
+ vmx_set_cr4(vcpu, vmcs12->host_cr4);
nested_ept_uninit_mmu_context(vcpu);
@@ -11000,6 +11019,37 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
}
+static void __pi_post_block(struct kvm_vcpu *vcpu)
+{
+ struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
+ struct pi_desc old, new;
+ unsigned int dest;
+
+ do {
+ old.control = new.control = pi_desc->control;
+ WARN(old.nv != POSTED_INTR_WAKEUP_VECTOR,
+ "Wakeup handler not enabled while the VCPU is blocked\n");
+
+ dest = cpu_physical_id(vcpu->cpu);
+
+ if (x2apic_enabled())
+ new.ndst = dest;
+ else
+ new.ndst = (dest << 8) & 0xFF00;
+
+ /* set 'NV' to 'notification vector' */
+ new.nv = POSTED_INTR_VECTOR;
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
+
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu == -1)) {
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_del(&vcpu->blocked_vcpu_list);
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ vcpu->pre_pcpu = -1;
+ }
+}
+
/*
* This routine does the following things for vCPU which is going
* to be blocked if VT-d PI is enabled.
@@ -11015,7 +11065,6 @@ static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
*/
static int pi_pre_block(struct kvm_vcpu *vcpu)
{
- unsigned long flags;
unsigned int dest;
struct pi_desc old, new;
struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
@@ -11025,34 +11074,20 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
!kvm_vcpu_apicv_active(vcpu))
return 0;
- vcpu->pre_pcpu = vcpu->cpu;
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_add_tail(&vcpu->blocked_vcpu_list,
- &per_cpu(blocked_vcpu_on_cpu,
- vcpu->pre_pcpu));
- spin_unlock_irqrestore(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ if (!WARN_ON_ONCE(vcpu->pre_pcpu != -1)) {
+ vcpu->pre_pcpu = vcpu->cpu;
+ spin_lock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ list_add_tail(&vcpu->blocked_vcpu_list,
+ &per_cpu(blocked_vcpu_on_cpu,
+ vcpu->pre_pcpu));
+ spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, vcpu->pre_pcpu));
+ }
do {
old.control = new.control = pi_desc->control;
- /*
- * We should not block the vCPU if
- * an interrupt is posted for it.
- */
- if (pi_test_on(pi_desc) == 1) {
- spin_lock_irqsave(&per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
-
- return 1;
- }
-
WARN((pi_desc->sn == 1),
"Warning: SN field of posted-interrupts "
"is set before blocking\n");
@@ -11074,10 +11109,15 @@ static int pi_pre_block(struct kvm_vcpu *vcpu)
/* set 'NV' to 'wakeup vector' */
new.nv = POSTED_INTR_WAKEUP_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
+ } while (cmpxchg64(&pi_desc->control, old.control,
+ new.control) != old.control);
- return 0;
+ /* We should not block the vCPU if an interrupt is posted for it. */
+ if (pi_test_on(pi_desc) == 1)
+ __pi_post_block(vcpu);
+
+ local_irq_enable();
+ return (vcpu->pre_pcpu == -1);
}
static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -11093,44 +11133,13 @@ static int vmx_pre_block(struct kvm_vcpu *vcpu)
static void pi_post_block(struct kvm_vcpu *vcpu)
{
- struct pi_desc *pi_desc = vcpu_to_pi_desc(vcpu);
- struct pi_desc old, new;
- unsigned int dest;
- unsigned long flags;
-
- if (!kvm_arch_has_assigned_device(vcpu->kvm) ||
- !irq_remapping_cap(IRQ_POSTING_CAP) ||
- !kvm_vcpu_apicv_active(vcpu))
+ if (vcpu->pre_pcpu == -1)
return;
- do {
- old.control = new.control = pi_desc->control;
-
- dest = cpu_physical_id(vcpu->cpu);
-
- if (x2apic_enabled())
- new.ndst = dest;
- else
- new.ndst = (dest << 8) & 0xFF00;
-
- /* Allow posting non-urgent interrupts */
- new.sn = 0;
-
- /* set 'NV' to 'notification vector' */
- new.nv = POSTED_INTR_VECTOR;
- } while (cmpxchg(&pi_desc->control, old.control,
- new.control) != old.control);
-
- if(vcpu->pre_pcpu != -1) {
- spin_lock_irqsave(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- list_del(&vcpu->blocked_vcpu_list);
- spin_unlock_irqrestore(
- &per_cpu(blocked_vcpu_on_cpu_lock,
- vcpu->pre_pcpu), flags);
- vcpu->pre_pcpu = -1;
- }
+ WARN_ON(irqs_disabled());
+ local_irq_disable();
+ __pi_post_block(vcpu);
+ local_irq_enable();
}
static void vmx_post_block(struct kvm_vcpu *vcpu)
@@ -11158,7 +11167,7 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
struct kvm_lapic_irq irq;
struct kvm_vcpu *vcpu;
struct vcpu_data vcpu_info;
- int idx, ret = -EINVAL;
+ int idx, ret = 0;
if (!kvm_arch_has_assigned_device(kvm) ||
!irq_remapping_cap(IRQ_POSTING_CAP) ||
@@ -11167,7 +11176,12 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
idx = srcu_read_lock(&kvm->irq_srcu);
irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu);
- BUG_ON(guest_irq >= irq_rt->nr_rt_entries);
+ if (guest_irq >= irq_rt->nr_rt_entries ||
+ hlist_empty(&irq_rt->map[guest_irq])) {
+ pr_warn_once("no route for guest_irq %u/%u (broken user space?)\n",
+ guest_irq, irq_rt->nr_rt_entries);
+ goto out;
+ }
hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) {
if (e->type != KVM_IRQ_ROUTING_MSI)
@@ -11210,12 +11224,8 @@ static int vmx_update_pi_irte(struct kvm *kvm, unsigned int host_irq,
if (set)
ret = irq_set_vcpu_affinity(host_irq, &vcpu_info);
- else {
- /* suppress notification event before unposting */
- pi_set_sn(vcpu_to_pi_desc(vcpu));
+ else
ret = irq_set_vcpu_affinity(host_irq, NULL);
- pi_clear_sn(vcpu_to_pi_desc(vcpu));
- }
if (ret < 0) {
printk(KERN_INFO "%s: failed to update PI IRTE\n",
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 9e2c2b00215b..31b15149f412 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -5250,6 +5250,8 @@ static void init_emulate_ctxt(struct kvm_vcpu *vcpu)
kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
ctxt->eflags = kvm_get_rflags(vcpu);
+ ctxt->tf = (ctxt->eflags & X86_EFLAGS_TF) != 0;
+
ctxt->eip = kvm_rip_read(vcpu);
ctxt->mode = (!is_protmode(vcpu)) ? X86EMUL_MODE_REAL :
(ctxt->eflags & X86_EFLAGS_VM) ? X86EMUL_MODE_VM86 :
@@ -5465,37 +5467,26 @@ static int kvm_vcpu_check_hw_bp(unsigned long addr, u32 type, u32 dr7,
return dr6;
}
-static void kvm_vcpu_check_singlestep(struct kvm_vcpu *vcpu, unsigned long rflags, int *r)
+static void kvm_vcpu_do_singlestep(struct kvm_vcpu *vcpu, int *r)
{
struct kvm_run *kvm_run = vcpu->run;
- /*
- * rflags is the old, "raw" value of the flags. The new value has
- * not been saved yet.
- *
- * This is correct even for TF set by the guest, because "the
- * processor will not generate this exception after the instruction
- * that sets the TF flag".
- */
- if (unlikely(rflags & X86_EFLAGS_TF)) {
- if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
- kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 |
- DR6_RTM;
- kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
- kvm_run->debug.arch.exception = DB_VECTOR;
- kvm_run->exit_reason = KVM_EXIT_DEBUG;
- *r = EMULATE_USER_EXIT;
- } else {
- vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
- /*
- * "Certain debug exceptions may clear bit 0-3. The
- * remaining contents of the DR6 register are never
- * cleared by the processor".
- */
- vcpu->arch.dr6 &= ~15;
- vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
- kvm_queue_exception(vcpu, DB_VECTOR);
- }
+ if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP) {
+ kvm_run->debug.arch.dr6 = DR6_BS | DR6_FIXED_1 | DR6_RTM;
+ kvm_run->debug.arch.pc = vcpu->arch.singlestep_rip;
+ kvm_run->debug.arch.exception = DB_VECTOR;
+ kvm_run->exit_reason = KVM_EXIT_DEBUG;
+ *r = EMULATE_USER_EXIT;
+ } else {
+ vcpu->arch.emulate_ctxt.eflags &= ~X86_EFLAGS_TF;
+ /*
+ * "Certain debug exceptions may clear bit 0-3. The
+ * remaining contents of the DR6 register are never
+ * cleared by the processor".
+ */
+ vcpu->arch.dr6 &= ~15;
+ vcpu->arch.dr6 |= DR6_BS | DR6_RTM;
+ kvm_queue_exception(vcpu, DB_VECTOR);
}
}
@@ -5650,8 +5641,9 @@ restart:
toggle_interruptibility(vcpu, ctxt->interruptibility);
vcpu->arch.emulate_regs_need_sync_to_vcpu = false;
kvm_rip_write(vcpu, ctxt->eip);
- if (r == EMULATE_DONE)
- kvm_vcpu_check_singlestep(vcpu, rflags, &r);
+ if (r == EMULATE_DONE &&
+ (ctxt->tf || (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)))
+ kvm_vcpu_do_singlestep(vcpu, &r);
if (!ctxt->have_exception ||
exception_type(ctxt->exception.vector) == EXCPT_TRAP)
__kvm_set_rflags(vcpu, ctxt->eflags);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9f72ca3b2669..1dd796025472 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -191,8 +191,7 @@ is_prefetch(struct pt_regs *regs, unsigned long error_code, unsigned long addr)
* 6. T1 : reaches here, sees vma_pkey(vma)=5, when we really
* faulted on a pte with its pkey=4.
*/
-static void fill_sig_info_pkey(int si_code, siginfo_t *info,
- struct vm_area_struct *vma)
+static void fill_sig_info_pkey(int si_code, siginfo_t *info, u32 *pkey)
{
/* This is effectively an #ifdef */
if (!boot_cpu_has(X86_FEATURE_OSPKE))
@@ -208,7 +207,7 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* valid VMA, so we should never reach this without a
* valid VMA.
*/
- if (!vma) {
+ if (!pkey) {
WARN_ONCE(1, "PKU fault with no VMA passed in");
info->si_pkey = 0;
return;
@@ -218,13 +217,12 @@ static void fill_sig_info_pkey(int si_code, siginfo_t *info,
* absolutely guranteed to be 100% accurate because of
* the race explained above.
*/
- info->si_pkey = vma_pkey(vma);
+ info->si_pkey = *pkey;
}
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
- struct task_struct *tsk, struct vm_area_struct *vma,
- int fault)
+ struct task_struct *tsk, u32 *pkey, int fault)
{
unsigned lsb = 0;
siginfo_t info;
@@ -239,7 +237,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address,
lsb = PAGE_SHIFT;
info.si_addr_lsb = lsb;
- fill_sig_info_pkey(si_code, &info, vma);
+ fill_sig_info_pkey(si_code, &info, pkey);
force_sig_info(si_signo, &info, tsk);
}
@@ -718,8 +716,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
struct task_struct *tsk = current;
unsigned long flags;
int sig;
- /* No context means no VMA to pass down */
- struct vm_area_struct *vma = NULL;
/* Are we prepared to handle this kernel fault? */
if (fixup_exception(regs, X86_TRAP_PF)) {
@@ -744,7 +740,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
/* XXX: hwpoison faults will set the wrong code. */
force_sig_info_fault(signal, si_code, address,
- tsk, vma, 0);
+ tsk, NULL, 0);
}
/*
@@ -853,8 +849,7 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code,
static void
__bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- int si_code)
+ unsigned long address, u32 *pkey, int si_code)
{
struct task_struct *tsk = current;
@@ -902,7 +897,7 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
tsk->thread.error_code = error_code;
tsk->thread.trap_nr = X86_TRAP_PF;
- force_sig_info_fault(SIGSEGV, si_code, address, tsk, vma, 0);
+ force_sig_info_fault(SIGSEGV, si_code, address, tsk, pkey, 0);
return;
}
@@ -915,9 +910,9 @@ __bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
static noinline void
bad_area_nosemaphore(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma)
+ unsigned long address, u32 *pkey)
{
- __bad_area_nosemaphore(regs, error_code, address, vma, SEGV_MAPERR);
+ __bad_area_nosemaphore(regs, error_code, address, pkey, SEGV_MAPERR);
}
static void
@@ -925,6 +920,10 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
unsigned long address, struct vm_area_struct *vma, int si_code)
{
struct mm_struct *mm = current->mm;
+ u32 pkey;
+
+ if (vma)
+ pkey = vma_pkey(vma);
/*
* Something tried to access memory that isn't in our memory map..
@@ -932,7 +931,8 @@ __bad_area(struct pt_regs *regs, unsigned long error_code,
*/
up_read(&mm->mmap_sem);
- __bad_area_nosemaphore(regs, error_code, address, vma, si_code);
+ __bad_area_nosemaphore(regs, error_code, address,
+ (vma) ? &pkey : NULL, si_code);
}
static noinline void
@@ -975,7 +975,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code,
static void
do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
- struct vm_area_struct *vma, unsigned int fault)
+ u32 *pkey, unsigned int fault)
{
struct task_struct *tsk = current;
int code = BUS_ADRERR;
@@ -1002,13 +1002,12 @@ do_sigbus(struct pt_regs *regs, unsigned long error_code, unsigned long address,
code = BUS_MCEERR_AR;
}
#endif
- force_sig_info_fault(SIGBUS, code, address, tsk, vma, fault);
+ force_sig_info_fault(SIGBUS, code, address, tsk, pkey, fault);
}
static noinline void
mm_fault_error(struct pt_regs *regs, unsigned long error_code,
- unsigned long address, struct vm_area_struct *vma,
- unsigned int fault)
+ unsigned long address, u32 *pkey, unsigned int fault)
{
if (fatal_signal_pending(current) && !(error_code & PF_USER)) {
no_context(regs, error_code, address, 0, 0);
@@ -1032,9 +1031,9 @@ mm_fault_error(struct pt_regs *regs, unsigned long error_code,
} else {
if (fault & (VM_FAULT_SIGBUS|VM_FAULT_HWPOISON|
VM_FAULT_HWPOISON_LARGE))
- do_sigbus(regs, error_code, address, vma, fault);
+ do_sigbus(regs, error_code, address, pkey, fault);
else if (fault & VM_FAULT_SIGSEGV)
- bad_area_nosemaphore(regs, error_code, address, vma);
+ bad_area_nosemaphore(regs, error_code, address, pkey);
else
BUG();
}
@@ -1220,6 +1219,7 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code,
struct mm_struct *mm;
int fault, major = 0;
unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE;
+ u32 pkey;
tsk = current;
mm = tsk->mm;
@@ -1420,9 +1420,10 @@ good_area:
return;
}
+ pkey = vma_pkey(vma);
up_read(&mm->mmap_sem);
if (unlikely(fault & VM_FAULT_ERROR)) {
- mm_fault_error(regs, error_code, address, vma, fault);
+ mm_fault_error(regs, error_code, address, &pkey, fault);
return;
}
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9a324fc8bed8..3e27ded6ac65 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -689,7 +689,7 @@ static void __meminit free_pagetable(struct page *page, int order)
if (PageReserved(page)) {
__ClearPageReserved(page);
- magic = (unsigned long)page->lru.next;
+ magic = (unsigned long)page->freelist;
if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) {
while (nr_pages--)
put_page_bootmem(page++);
diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile
index 555b9fa0ad43..7dbdb780264d 100644
--- a/arch/x86/purgatory/Makefile
+++ b/arch/x86/purgatory/Makefile
@@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y))
LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib
targets += purgatory.ro
+KASAN_SANITIZE := n
KCOV_INSTRUMENT := n
# Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That